diff options
37 files changed, 675 insertions, 613 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer index d1f67bb81d5d..5ed284523956 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats +++ b/Documentation/ABI/testing/sysfs-bus-pci-devices-aer @@ -117,3 +117,47 @@ Date: July 2018 KernelVersion: 4.19.0 Contact: linux-pci@vger.kernel.org, rajatja@google.com Description: Total number of ERR_NONFATAL messages reported to rootport. + +PCIe AER ratelimits +------------------- + +These attributes show up under all the devices that are AER capable. +They represent configurable ratelimits of logs per error type. + +See Documentation/PCI/pcieaer-howto.rst for more info on ratelimits. + +What: /sys/bus/pci/devices/<dev>/aer/correctable_ratelimit_interval_ms +Date: May 2025 +KernelVersion: 6.16.0 +Contact: linux-pci@vger.kernel.org +Description: Writing 0 disables AER correctable error log ratelimiting. + Writing a positive value sets the ratelimit interval in ms. + Default is DEFAULT_RATELIMIT_INTERVAL (5000 ms). + +What: /sys/bus/pci/devices/<dev>/aer/correctable_ratelimit_burst +Date: May 2025 +KernelVersion: 6.16.0 +Contact: linux-pci@vger.kernel.org +Description: Ratelimit burst for correctable error logs. Writing a value + changes the number of errors (burst) allowed per interval + before ratelimiting. Reading gets the current ratelimit + burst. Default is DEFAULT_RATELIMIT_BURST (10). + +What: /sys/bus/pci/devices/<dev>/aer/nonfatal_ratelimit_interval_ms +Date: May 2025 +KernelVersion: 6.16.0 +Contact: linux-pci@vger.kernel.org +Description: Writing 0 disables AER non-fatal uncorrectable error log + ratelimiting. Writing a positive value sets the ratelimit + interval in ms. Default is DEFAULT_RATELIMIT_INTERVAL + (5000 ms). + +What: /sys/bus/pci/devices/<dev>/aer/nonfatal_ratelimit_burst +Date: May 2025 +KernelVersion: 6.16.0 +Contact: linux-pci@vger.kernel.org +Description: Ratelimit burst for non-fatal uncorrectable error logs. + Writing a value changes the number of errors (burst) + allowed per interval before ratelimiting. Reading gets the + current ratelimit burst. Default is DEFAULT_RATELIMIT_BURST + (10). diff --git a/Documentation/PCI/pcieaer-howto.rst b/Documentation/PCI/pcieaer-howto.rst index f013f3b27c82..4b71e2f43ca7 100644 --- a/Documentation/PCI/pcieaer-howto.rst +++ b/Documentation/PCI/pcieaer-howto.rst @@ -85,12 +85,27 @@ In the example, 'Requester ID' means the ID of the device that sent the error message to the Root Port. Please refer to PCIe specs for other fields. +AER Ratelimits +-------------- + +Since error messages can be generated for each transaction, we may see +large volumes of errors reported. To prevent spammy devices from flooding +the console/stalling execution, messages are throttled by device and error +type (correctable vs. non-fatal uncorrectable). Fatal errors, including +DPC errors, are not ratelimited. + +AER uses the default ratelimit of DEFAULT_RATELIMIT_BURST (10 events) over +DEFAULT_RATELIMIT_INTERVAL (5 seconds). + +Ratelimits are exposed in the form of sysfs attributes and configurable. +See Documentation/ABI/testing/sysfs-bus-pci-devices-aer. + AER Statistics / Counters ------------------------- When PCIe AER errors are captured, the counters / statistics are also exposed in the form of sysfs attributes which are documented at -Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats +Documentation/ABI/testing/sysfs-bus-pci-devices-aer. Developer Guide =============== diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst index d75728eb05f8..3d56f94ac2ee 100644 --- a/Documentation/driver-api/driver-model/devres.rst +++ b/Documentation/driver-api/driver-model/devres.rst @@ -391,12 +391,11 @@ PCI devm_pci_remap_cfgspace() : ioremap PCI configuration space devm_pci_remap_cfg_resource() : ioremap PCI configuration space resource - pcim_enable_device() : after success, some PCI ops become managed + pcim_enable_device() : after success, the PCI device gets disabled automatically on driver detach pcim_iomap() : do iomap() on a single BAR pcim_iomap_regions() : do request_region() and iomap() on multiple BARs pcim_iomap_table() : array of mapped addresses indexed by BAR pcim_iounmap() : do iounmap() on a single BAR - pcim_iounmap_regions() : do iounmap() and release_region() on multiple BARs pcim_pin_device() : keep PCI device enabled after release pcim_set_mwi() : enable Memory-Write-Invalidate PCI transaction diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 0d619df03fa9..66ce6b81c7d9 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -3717,7 +3717,7 @@ static int mtip_pci_probe(struct pci_dev *pdev, rv = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (rv) { dev_warn(&pdev->dev, "64-bit DMA enable failed\n"); - goto setmask_err; + goto iomap_err; } /* Copy the info we may need later into the private data structure. */ @@ -3733,7 +3733,7 @@ static int mtip_pci_probe(struct pci_dev *pdev, if (!dd->isr_workq) { dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance); rv = -ENOMEM; - goto setmask_err; + goto iomap_err; } memset(cpu_list, 0, sizeof(cpu_list)); @@ -3830,8 +3830,6 @@ msi_initialize_err: drop_cpu(dd->work[1].cpu_binding); drop_cpu(dd->work[2].cpu_binding); } -setmask_err: - pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); iomap_err: kfree(dd); @@ -3907,7 +3905,6 @@ static void mtip_pci_remove(struct pci_dev *pdev) pci_disable_msi(pdev); - pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); pci_set_drvdata(pdev, NULL); put_disk(dd->disk); diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index dd9e26b7b718..33b6e12c7d24 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -2030,9 +2030,6 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (!iommu->dev) return -ENODEV; - /* Prevent binding other PCI device drivers to IOMMU devices */ - iommu->dev->match_driver = false; - /* ACPI _PRT won't have an IRQ for IOMMU */ iommu->dev->irq_managed = 1; diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index b6851101ac36..69048869ef1c 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -369,7 +369,9 @@ void pci_bus_add_device(struct pci_dev *dev) pdev->name); } - dev->match_driver = !dn || of_device_is_available(dn); + if (!dn || of_device_is_available(dn)) + pci_dev_allow_binding(dev); + retval = device_attach(&dev->dev); if (retval < 0 && retval != -EPROBE_DEFER) pci_warn(dev, "device attach failed (%d)\n", retval); diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index ecc33f6789e3..d1cd48efad43 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -227,7 +227,7 @@ static const struct irq_domain_ops dw_pcie_msi_domain_ops = { int dw_pcie_allocate_domains(struct dw_pcie_rp *pp) { struct dw_pcie *pci = to_dw_pcie_from_pp(pp); - struct fwnode_handle *fwnode = of_node_to_fwnode(pci->dev->of_node); + struct fwnode_handle *fwnode = of_fwnode_handle(pci->dev->of_node); pp->irq_domain = irq_domain_create_linear(fwnode, pp->num_vectors, &dw_pcie_msi_domain_ops, pp); diff --git a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c index 0e088e74155d..6628eed9d26e 100644 --- a/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c +++ b/drivers/pci/controller/mobiveil/pcie-mobiveil-host.c @@ -435,7 +435,7 @@ static const struct irq_domain_ops msi_domain_ops = { static int mobiveil_allocate_msi_domains(struct mobiveil_pcie *pcie) { struct device *dev = &pcie->pdev->dev; - struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node); + struct fwnode_handle *fwnode = of_fwnode_handle(dev->of_node); struct mobiveil_msi *msi = &pcie->rp.msi; mutex_init(&msi->lock); diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c index 7bce327897c9..69a9c0a87639 100644 --- a/drivers/pci/controller/pci-xgene-msi.c +++ b/drivers/pci/controller/pci-xgene-msi.c @@ -247,7 +247,7 @@ static int xgene_allocate_domains(struct xgene_msi *msi) if (!msi->inner_domain) return -ENOMEM; - msi->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(msi->node), + msi->msi_domain = pci_msi_create_irq_domain(of_fwnode_handle(msi->node), &xgene_msi_domain_info, msi->inner_domain); diff --git a/drivers/pci/controller/pcie-altera-msi.c b/drivers/pci/controller/pcie-altera-msi.c index e1cee3c0575f..5fb3a2e0017e 100644 --- a/drivers/pci/controller/pcie-altera-msi.c +++ b/drivers/pci/controller/pcie-altera-msi.c @@ -164,7 +164,7 @@ static const struct irq_domain_ops msi_domain_ops = { static int altera_allocate_domains(struct altera_msi *msi) { - struct fwnode_handle *fwnode = of_node_to_fwnode(msi->pdev->dev.of_node); + struct fwnode_handle *fwnode = of_fwnode_handle(msi->pdev->dev.of_node); msi->inner_domain = irq_domain_add_linear(NULL, msi->num_of_vectors, &msi_domain_ops, msi); diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index e19628e13898..924a81e073c0 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -581,7 +581,7 @@ static const struct irq_domain_ops msi_domain_ops = { static int brcm_allocate_domains(struct brcm_msi *msi) { - struct fwnode_handle *fwnode = of_node_to_fwnode(msi->np); + struct fwnode_handle *fwnode = of_fwnode_handle(msi->np); struct device *dev = msi->dev; msi->inner_domain = irq_domain_add_linear(NULL, msi->nr, &msi_domain_ops, msi); diff --git a/drivers/pci/controller/pcie-iproc-msi.c b/drivers/pci/controller/pcie-iproc-msi.c index 649fcb449f34..804b3a5787c5 100644 --- a/drivers/pci/controller/pcie-iproc-msi.c +++ b/drivers/pci/controller/pcie-iproc-msi.c @@ -451,7 +451,7 @@ static int iproc_msi_alloc_domains(struct device_node *node, if (!msi->inner_domain) return -ENOMEM; - msi->msi_domain = pci_msi_create_irq_domain(of_node_to_fwnode(node), + msi->msi_domain = pci_msi_create_irq_domain(of_fwnode_handle(node), &iproc_msi_domain_info, msi->inner_domain); if (!msi->msi_domain) { diff --git a/drivers/pci/controller/pcie-mediatek.c b/drivers/pci/controller/pcie-mediatek.c index 811a8b4acd50..efcc4a7c17be 100644 --- a/drivers/pci/controller/pcie-mediatek.c +++ b/drivers/pci/controller/pcie-mediatek.c @@ -485,7 +485,7 @@ static struct msi_domain_info mtk_msi_domain_info = { static int mtk_pcie_allocate_msi_domains(struct mtk_pcie_port *port) { - struct fwnode_handle *fwnode = of_node_to_fwnode(port->pcie->dev->of_node); + struct fwnode_handle *fwnode = of_fwnode_handle(port->pcie->dev->of_node); mutex_init(&port->lock); diff --git a/drivers/pci/controller/pcie-xilinx-dma-pl.c b/drivers/pci/controller/pcie-xilinx-dma-pl.c index dd117f07fc95..71cf13ae51c7 100644 --- a/drivers/pci/controller/pcie-xilinx-dma-pl.c +++ b/drivers/pci/controller/pcie-xilinx-dma-pl.c @@ -470,7 +470,7 @@ static int xilinx_pl_dma_pcie_init_msi_irq_domain(struct pl_dma_pcie *port) struct device *dev = port->dev; struct xilinx_msi *msi = &port->msi; int size = BITS_TO_LONGS(XILINX_NUM_MSI_IRQS) * sizeof(long); - struct fwnode_handle *fwnode = of_node_to_fwnode(port->dev->of_node); + struct fwnode_handle *fwnode = of_fwnode_handle(port->dev->of_node); msi->dev_domain = irq_domain_add_linear(NULL, XILINX_NUM_MSI_IRQS, &dev_msi_domain_ops, port); diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c index 8d6e2a89b067..9cf8a96f7bc4 100644 --- a/drivers/pci/controller/pcie-xilinx-nwl.c +++ b/drivers/pci/controller/pcie-xilinx-nwl.c @@ -495,7 +495,7 @@ static int nwl_pcie_init_msi_irq_domain(struct nwl_pcie *pcie) { #ifdef CONFIG_PCI_MSI struct device *dev = pcie->dev; - struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node); + struct fwnode_handle *fwnode = of_fwnode_handle(dev->of_node); struct nwl_msi *msi = &pcie->msi; msi->dev_domain = irq_domain_add_linear(NULL, INT_PCI_MSI_NR, diff --git a/drivers/pci/controller/plda/pcie-plda-host.c b/drivers/pci/controller/plda/pcie-plda-host.c index 4153214ca410..4c7a9fa311e3 100644 --- a/drivers/pci/controller/plda/pcie-plda-host.c +++ b/drivers/pci/controller/plda/pcie-plda-host.c @@ -150,7 +150,7 @@ static struct msi_domain_info plda_msi_domain_info = { static int plda_allocate_msi_domains(struct plda_pcie_rp *port) { struct device *dev = port->dev; - struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node); + struct fwnode_handle *fwnode = of_fwnode_handle(dev->of_node); struct plda_msi *msi = &port->msi; mutex_init(&port->msi.lock); diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c index 73047316889e..9f4190501395 100644 --- a/drivers/pci/devres.c +++ b/drivers/pci/devres.c @@ -6,30 +6,13 @@ /* * On the state of PCI's devres implementation: * - * The older devres API for PCI has two significant problems: + * The older PCI devres API has one significant problem: * - * 1. It is very strongly tied to the statically allocated mapping table in - * struct pcim_iomap_devres below. This is mostly solved in the sense of the - * pcim_ functions in this file providing things like ranged mapping by - * bypassing this table, whereas the functions that were present in the old - * API still enter the mapping addresses into the table for users of the old - * API. - * - * 2. The region-request-functions in pci.c do become managed IF the device has - * been enabled with pcim_enable_device() instead of pci_enable_device(). - * This resulted in the API becoming inconsistent: Some functions have an - * obviously managed counter-part (e.g., pci_iomap() <-> pcim_iomap()), - * whereas some don't and are never managed, while others don't and are - * _sometimes_ managed (e.g. pci_request_region()). - * - * Consequently, in the new API, region requests performed by the pcim_ - * functions are automatically cleaned up through the devres callback - * pcim_addr_resource_release(). - * - * Users of pcim_enable_device() + pci_*region*() are redirected in - * pci.c to the managed functions here in this file. This isn't exactly - * perfect, but the only alternative way would be to port ALL drivers - * using said combination to pcim_ functions. + * It is very strongly tied to the statically allocated mapping table in struct + * pcim_iomap_devres below. This is mostly solved in the sense of the pcim_ + * functions in this file providing things like ranged mapping by bypassing + * this table, whereas the functions that were present in the old API still + * enter the mapping addresses into the table for users of the old API. * * TODO: * Remove the legacy table entirely once all calls to pcim_iomap_table() in @@ -87,104 +70,6 @@ static inline void pcim_addr_devres_clear(struct pcim_addr_devres *res) res->bar = -1; } -/* - * The following functions, __pcim_*_region*, exist as counterparts to the - * versions from pci.c - which, unfortunately, can be in "hybrid mode", i.e., - * sometimes managed, sometimes not. - * - * To separate the APIs cleanly, we define our own, simplified versions here. - */ - -/** - * __pcim_request_region_range - Request a ranged region - * @pdev: PCI device the region belongs to - * @bar: BAR the range is within - * @offset: offset from the BAR's start address - * @maxlen: length in bytes, beginning at @offset - * @name: name of the driver requesting the resource - * @req_flags: flags for the request, e.g., for kernel-exclusive requests - * - * Returns: 0 on success, a negative error code on failure. - * - * Request a range within a device's PCI BAR. Sanity check the input. - */ -static int __pcim_request_region_range(struct pci_dev *pdev, int bar, - unsigned long offset, - unsigned long maxlen, - const char *name, int req_flags) -{ - resource_size_t start = pci_resource_start(pdev, bar); - resource_size_t len = pci_resource_len(pdev, bar); - unsigned long dev_flags = pci_resource_flags(pdev, bar); - - if (start == 0 || len == 0) /* Unused BAR. */ - return 0; - if (len <= offset) - return -EINVAL; - - start += offset; - len -= offset; - - if (len > maxlen && maxlen != 0) - len = maxlen; - - if (dev_flags & IORESOURCE_IO) { - if (!request_region(start, len, name)) - return -EBUSY; - } else if (dev_flags & IORESOURCE_MEM) { - if (!__request_mem_region(start, len, name, req_flags)) - return -EBUSY; - } else { - /* That's not a device we can request anything on. */ - return -ENODEV; - } - - return 0; -} - -static void __pcim_release_region_range(struct pci_dev *pdev, int bar, - unsigned long offset, - unsigned long maxlen) -{ - resource_size_t start = pci_resource_start(pdev, bar); - resource_size_t len = pci_resource_len(pdev, bar); - unsigned long flags = pci_resource_flags(pdev, bar); - - if (len <= offset || start == 0) - return; - - if (len == 0 || maxlen == 0) /* This an unused BAR. Do nothing. */ - return; - - start += offset; - len -= offset; - - if (len > maxlen) - len = maxlen; - - if (flags & IORESOURCE_IO) - release_region(start, len); - else if (flags & IORESOURCE_MEM) - release_mem_region(start, len); -} - -static int __pcim_request_region(struct pci_dev *pdev, int bar, - const char *name, int flags) -{ - unsigned long offset = 0; - unsigned long len = pci_resource_len(pdev, bar); - - return __pcim_request_region_range(pdev, bar, offset, len, name, flags); -} - -static void __pcim_release_region(struct pci_dev *pdev, int bar) -{ - unsigned long offset = 0; - unsigned long len = pci_resource_len(pdev, bar); - - __pcim_release_region_range(pdev, bar, offset, len); -} - static void pcim_addr_resource_release(struct device *dev, void *resource_raw) { struct pci_dev *pdev = to_pci_dev(dev); @@ -192,11 +77,11 @@ static void pcim_addr_resource_release(struct device *dev, void *resource_raw) switch (res->type) { case PCIM_ADDR_DEVRES_TYPE_REGION: - __pcim_release_region(pdev, res->bar); + pci_release_region(pdev, res->bar); break; case PCIM_ADDR_DEVRES_TYPE_REGION_MAPPING: pci_iounmap(pdev, res->baseaddr); - __pcim_release_region(pdev, res->bar); + pci_release_region(pdev, res->bar); break; case PCIM_ADDR_DEVRES_TYPE_MAPPING: pci_iounmap(pdev, res->baseaddr); @@ -735,7 +620,7 @@ void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar, res->type = PCIM_ADDR_DEVRES_TYPE_REGION_MAPPING; res->bar = bar; - ret = __pcim_request_region(pdev, bar, name, 0); + ret = pci_request_region(pdev, bar, name); if (ret != 0) goto err_region; @@ -749,7 +634,7 @@ void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar, return res->baseaddr; err_iomap: - __pcim_release_region(pdev, bar); + pci_release_region(pdev, bar); err_region: pcim_addr_devres_free(res); @@ -823,8 +708,20 @@ err: } EXPORT_SYMBOL(pcim_iomap_regions); -static int _pcim_request_region(struct pci_dev *pdev, int bar, const char *name, - int request_flags) +/** + * pcim_request_region - Request a PCI BAR + * @pdev: PCI device to request region for + * @bar: Index of BAR to request + * @name: Name of the driver requesting the resource + * + * Returns: 0 on success, a negative error code on failure. + * + * Request region specified by @bar. + * + * The region will automatically be released on driver detach. If desired, + * release manually only with pcim_release_region(). + */ +int pcim_request_region(struct pci_dev *pdev, int bar, const char *name) { int ret; struct pcim_addr_devres *res; @@ -838,7 +735,7 @@ static int _pcim_request_region(struct pci_dev *pdev, int bar, const char *name, res->type = PCIM_ADDR_DEVRES_TYPE_REGION; res->bar = bar; - ret = __pcim_request_region(pdev, bar, name, request_flags); + ret = pci_request_region(pdev, bar, name); if (ret != 0) { pcim_addr_devres_free(res); return ret; @@ -847,45 +744,9 @@ static int _pcim_request_region(struct pci_dev *pdev, int bar, const char *name, devres_add(&pdev->dev, res); return 0; } - -/** - * pcim_request_region - Request a PCI BAR - * @pdev: PCI device to request region for - * @bar: Index of BAR to request - * @name: Name of the driver requesting the resource - * - * Returns: 0 on success, a negative error code on failure. - * - * Request region specified by @bar. - * - * The region will automatically be released on driver detach. If desired, - * release manually only with pcim_release_region(). - */ -int pcim_request_region(struct pci_dev *pdev, int bar, const char *name) -{ - return _pcim_request_region(pdev, bar, name, 0); -} EXPORT_SYMBOL(pcim_request_region); /** - * pcim_request_region_exclusive - Request a PCI BAR exclusively - * @pdev: PCI device to request region for - * @bar: Index of BAR to request - * @name: Name of the driver requesting the resource - * - * Returns: 0 on success, a negative error code on failure. - * - * Request region specified by @bar exclusively. - * - * The region will automatically be released on driver detach. If desired, - * release manually only with pcim_release_region(). - */ -int pcim_request_region_exclusive(struct pci_dev *pdev, int bar, const char *name) -{ - return _pcim_request_region(pdev, bar, name, IORESOURCE_EXCLUSIVE); -} - -/** * pcim_release_region - Release a PCI BAR * @pdev: PCI device to operate on * @bar: Index of BAR to release @@ -893,7 +754,7 @@ int pcim_request_region_exclusive(struct pci_dev *pdev, int bar, const char *nam * Release a region manually that was previously requested by * pcim_request_region(). */ -void pcim_release_region(struct pci_dev *pdev, int bar) +static void pcim_release_region(struct pci_dev *pdev, int bar) { struct pcim_addr_devres res_searched; @@ -956,30 +817,6 @@ err: EXPORT_SYMBOL(pcim_request_all_regions); /** - * pcim_iounmap_regions - Unmap and release PCI BARs (DEPRECATED) - * @pdev: PCI device to map IO resources for - * @mask: Mask of BARs to unmap and release - * - * Unmap and release regions specified by @mask. - * - * This function is DEPRECATED. Do not use it in new code. - * Use pcim_iounmap_region() instead. - */ -void pcim_iounmap_regions(struct pci_dev *pdev, int mask) -{ - int i; - - for (i = 0; i < PCI_STD_NUM_BARS; i++) { - if (!mask_contains_bar(mask, i)) - continue; - - pcim_iounmap_region(pdev, i); - pcim_remove_bar_from_legacy_table(pdev, i); - } -} -EXPORT_SYMBOL(pcim_iounmap_regions); - -/** * pcim_iomap_range - Create a ranged __iomap mapping within a PCI BAR * @pdev: PCI device to map IO resources for * @bar: Index of the BAR diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c index d30f1316c98e..fadcf98a8a66 100644 --- a/drivers/pci/hotplug/pci_hotplug_core.c +++ b/drivers/pci/hotplug/pci_hotplug_core.c @@ -20,13 +20,9 @@ #include <linux/types.h> #include <linux/kobject.h> #include <linux/sysfs.h> -#include <linux/pagemap.h> #include <linux/init.h> -#include <linux/mount.h> -#include <linux/namei.h> #include <linux/pci.h> #include <linux/pci_hotplug.h> -#include <linux/uaccess.h> #include "../pci.h" #include "cpci_hotplug.h" @@ -492,6 +488,75 @@ void pci_hp_destroy(struct hotplug_slot *slot) } EXPORT_SYMBOL_GPL(pci_hp_destroy); +static DECLARE_WAIT_QUEUE_HEAD(pci_hp_link_change_wq); + +/** + * pci_hp_ignore_link_change - begin code section causing spurious link changes + * @pdev: PCI hotplug bridge + * + * Mark the beginning of a code section causing spurious link changes on the + * Secondary Bus of @pdev, e.g. as a side effect of a Secondary Bus Reset, + * D3cold transition, firmware update or FPGA reconfiguration. + * + * Hotplug drivers can thus check whether such a code section is executing + * concurrently, await it with pci_hp_spurious_link_change() and ignore the + * resulting link change events. + * + * Must be paired with pci_hp_unignore_link_change(). May be called both + * from the PCI core and from Endpoint drivers. May be called for bridges + * which are not hotplug-capable, in which case it has no effect because + * no hotplug driver is bound to the bridge. + */ +void pci_hp_ignore_link_change(struct pci_dev *pdev) +{ + set_bit(PCI_LINK_CHANGING, &pdev->priv_flags); + smp_mb__after_atomic(); /* pairs with implied barrier of wait_event() */ +} + +/** + * pci_hp_unignore_link_change - end code section causing spurious link changes + * @pdev: PCI hotplug bridge + * + * Mark the end of a code section causing spurious link changes on the + * Secondary Bus of @pdev. Must be paired with pci_hp_ignore_link_change(). + */ +void pci_hp_unignore_link_change(struct pci_dev *pdev) +{ + set_bit(PCI_LINK_CHANGED, &pdev->priv_flags); + mb(); /* ensure pci_hp_spurious_link_change() sees either bit set */ + clear_bit(PCI_LINK_CHANGING, &pdev->priv_flags); + wake_up_all(&pci_hp_link_change_wq); +} + +/** + * pci_hp_spurious_link_change - check for spurious link changes + * @pdev: PCI hotplug bridge + * + * Check whether a code section is executing concurrently which is causing + * spurious link changes on the Secondary Bus of @pdev. Await the end of the + * code section if so. + * + * May be called by hotplug drivers to check whether a link change is spurious + * and can be ignored. + * + * Because a genuine link change may have occurred in-between a spurious link + * change and the invocation of this function, hotplug drivers should perform + * sanity checks such as retrieving the current link state and bringing down + * the slot if the link is down. + * + * Return: %true if such a code section has been executing concurrently, + * otherwise %false. Also return %true if such a code section has not been + * executing concurrently, but at least once since the last invocation of this + * function. + */ +bool pci_hp_spurious_link_change(struct pci_dev *pdev) +{ + wait_event(pci_hp_link_change_wq, + !test_bit(PCI_LINK_CHANGING, &pdev->priv_flags)); + + return test_and_clear_bit(PCI_LINK_CHANGED, &pdev->priv_flags); +} + static int __init pci_hotplug_init(void) { int result; diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 273dd8c66f4e..debc79b0adfb 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -187,6 +187,7 @@ int pciehp_card_present(struct controller *ctrl); int pciehp_card_present_or_link_active(struct controller *ctrl); int pciehp_check_link_status(struct controller *ctrl); int pciehp_check_link_active(struct controller *ctrl); +bool pciehp_device_replaced(struct controller *ctrl); void pciehp_release_ctrl(struct controller *ctrl); int pciehp_sysfs_enable_slot(struct hotplug_slot *hotplug_slot); diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index 997841c69893..f59baa912970 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -284,35 +284,6 @@ static int pciehp_suspend(struct pcie_device *dev) return 0; } -static bool pciehp_device_replaced(struct controller *ctrl) -{ - struct pci_dev *pdev __free(pci_dev_put) = NULL; - u32 reg; - - if (pci_dev_is_disconnected(ctrl->pcie->port)) - return false; - - pdev = pci_get_slot(ctrl->pcie->port->subordinate, PCI_DEVFN(0, 0)); - if (!pdev) - return true; - - if (pci_read_config_dword(pdev, PCI_VENDOR_ID, ®) || - reg != (pdev->vendor | (pdev->device << 16)) || - pci_read_config_dword(pdev, PCI_CLASS_REVISION, ®) || - reg != (pdev->revision | (pdev->class << 8))) - return true; - - if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL && - (pci_read_config_dword(pdev, PCI_SUBSYSTEM_VENDOR_ID, ®) || - reg != (pdev->subsystem_vendor | (pdev->subsystem_device << 16)))) - return true; - - if (pci_get_dsn(pdev) != ctrl->dsn) - return true; - - return false; -} - static int pciehp_resume_noirq(struct pcie_device *dev) { struct controller *ctrl = get_service_data(dev); diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index d603a7aa7483..bcc938d4420f 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -131,7 +131,7 @@ static void remove_board(struct controller *ctrl, bool safe_removal) INDICATOR_NOOP); /* Don't carry LBMS indications across */ - pcie_reset_lbms_count(ctrl->pcie->port); + pcie_reset_lbms(ctrl->pcie->port); } static int pciehp_enable_slot(struct controller *ctrl); diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 8a09fb6083e2..ebd342bda235 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -563,20 +563,50 @@ void pciehp_power_off_slot(struct controller *ctrl) PCI_EXP_SLTCTL_PWR_OFF); } -static void pciehp_ignore_dpc_link_change(struct controller *ctrl, - struct pci_dev *pdev, int irq) +bool pciehp_device_replaced(struct controller *ctrl) +{ + struct pci_dev *pdev __free(pci_dev_put) = NULL; + u32 reg; + + if (pci_dev_is_disconnected(ctrl->pcie->port)) + return false; + + pdev = pci_get_slot(ctrl->pcie->port->subordinate, PCI_DEVFN(0, 0)); + if (!pdev) + return true; + + if (pci_read_config_dword(pdev, PCI_VENDOR_ID, ®) || + reg != (pdev->vendor | (pdev->device << 16)) || + pci_read_config_dword(pdev, PCI_CLASS_REVISION, ®) || + reg != (pdev->revision | (pdev->class << 8))) + return true; + + if (pdev->hdr_type == PCI_HEADER_TYPE_NORMAL && + (pci_read_config_dword(pdev, PCI_SUBSYSTEM_VENDOR_ID, ®) || + reg != (pdev->subsystem_vendor | (pdev->subsystem_device << 16)))) + return true; + + if (pci_get_dsn(pdev) != ctrl->dsn) + return true; + + return false; +} + +static void pciehp_ignore_link_change(struct controller *ctrl, + struct pci_dev *pdev, int irq, + u16 ignored_events) { /* * Ignore link changes which occurred while waiting for DPC recovery. * Could be several if DPC triggered multiple times consecutively. + * Also ignore link changes caused by Secondary Bus Reset, etc. */ synchronize_hardirq(irq); - atomic_and(~PCI_EXP_SLTSTA_DLLSC, &ctrl->pending_events); + atomic_and(~ignored_events, &ctrl->pending_events); if (pciehp_poll_mode) pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, - PCI_EXP_SLTSTA_DLLSC); - ctrl_info(ctrl, "Slot(%s): Link Down/Up ignored (recovered by DPC)\n", - slot_name(ctrl)); + ignored_events); + ctrl_info(ctrl, "Slot(%s): Link Down/Up ignored\n", slot_name(ctrl)); /* * If the link is unexpectedly down after successful recovery, @@ -584,8 +614,8 @@ static void pciehp_ignore_dpc_link_change(struct controller *ctrl, * Synthesize it to ensure that it is acted on. */ down_read_nested(&ctrl->reset_lock, ctrl->depth); - if (!pciehp_check_link_active(ctrl)) - pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC); + if (!pciehp_check_link_active(ctrl) || pciehp_device_replaced(ctrl)) + pciehp_request(ctrl, ignored_events); up_read(&ctrl->reset_lock); } @@ -732,12 +762,19 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) /* * Ignore Link Down/Up events caused by Downstream Port Containment - * if recovery from the error succeeded. + * if recovery succeeded, or caused by Secondary Bus Reset, + * suspend to D3cold, firmware update, FPGA reconfiguration, etc. */ - if ((events & PCI_EXP_SLTSTA_DLLSC) && pci_dpc_recovered(pdev) && + if ((events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC)) && + (pci_dpc_recovered(pdev) || pci_hp_spurious_link_change(pdev)) && ctrl->state == ON_STATE) { - events &= ~PCI_EXP_SLTSTA_DLLSC; - pciehp_ignore_dpc_link_change(ctrl, pdev, irq); + u16 ignored_events = PCI_EXP_SLTSTA_DLLSC; + + if (!ctrl->inband_presence_disabled) + ignored_events |= events & PCI_EXP_SLTSTA_PDC; + + events &= ~ignored_events; + pciehp_ignore_link_change(ctrl, pdev, irq, ignored_events); } /* @@ -902,7 +939,6 @@ int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe) { struct controller *ctrl = to_ctrl(hotplug_slot); struct pci_dev *pdev = ctrl_dev(ctrl); - u16 stat_mask = 0, ctrl_mask = 0; int rc; if (probe) @@ -910,23 +946,11 @@ int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, bool probe) down_write_nested(&ctrl->reset_lock, ctrl->depth); - if (!ATTN_BUTTN(ctrl)) { - ctrl_mask |= PCI_EXP_SLTCTL_PDCE; - stat_mask |= PCI_EXP_SLTSTA_PDC; - } - ctrl_mask |= PCI_EXP_SLTCTL_DLLSCE; - stat_mask |= PCI_EXP_SLTSTA_DLLSC; - - pcie_write_cmd(ctrl, 0, ctrl_mask); - ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, - pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, 0); + pci_hp_ignore_link_change(pdev); rc = pci_bridge_secondary_bus_reset(ctrl->pcie->port); - pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, stat_mask); - pcie_write_cmd_nowait(ctrl, ctrl_mask, ctrl_mask); - ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, - pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, ctrl_mask); + pci_hp_unignore_link_change(pdev); up_write(&ctrl->reset_lock); return rc; diff --git a/drivers/pci/iomap.c b/drivers/pci/iomap.c index fe706ed946df..ea86c282a386 100644 --- a/drivers/pci/iomap.c +++ b/drivers/pci/iomap.c @@ -25,10 +25,6 @@ * * @maxlen specifies the maximum length to map. If you want to get access to * the complete BAR from offset to the end, pass %0 here. - * - * NOTE: - * This function is never managed, even if you initialized with - * pcim_enable_device(). * */ void __iomem *pci_iomap_range(struct pci_dev *dev, int bar, @@ -76,10 +72,6 @@ EXPORT_SYMBOL(pci_iomap_range); * * @maxlen specifies the maximum length to map. If you want to get access to * the complete BAR from offset to the end, pass %0 here. - * - * NOTE: - * This function is never managed, even if you initialized with - * pcim_enable_device(). * */ void __iomem *pci_iomap_wc_range(struct pci_dev *dev, int bar, @@ -127,10 +119,6 @@ EXPORT_SYMBOL_GPL(pci_iomap_wc_range); * * @maxlen specifies the maximum length to map. If you want to get access to * the complete BAR without checking for its length first, pass %0 here. - * - * NOTE: - * This function is never managed, even if you initialized with - * pcim_enable_device(). If you need automatic cleanup, use pcim_iomap(). * */ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { @@ -152,10 +140,6 @@ EXPORT_SYMBOL(pci_iomap); * * @maxlen specifies the maximum length to map. If you want to get access to * the complete BAR without checking for its length first, pass %0 here. - * - * NOTE: - * This function is never managed, even if you initialized with - * pcim_enable_device(). * */ void __iomem *pci_iomap_wc(struct pci_dev *dev, int bar, unsigned long maxlen) { diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index af370628e583..b78e0e417324 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -1676,24 +1676,19 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) return NULL; root_ops = kzalloc(sizeof(*root_ops), GFP_KERNEL); - if (!root_ops) { - kfree(ri); - return NULL; - } + if (!root_ops) + goto free_ri; ri->cfg = pci_acpi_setup_ecam_mapping(root); - if (!ri->cfg) { - kfree(ri); - kfree(root_ops); - return NULL; - } + if (!ri->cfg) + goto free_root_ops; root_ops->release_info = pci_acpi_generic_release_info; root_ops->prepare_resources = pci_acpi_root_prepare_resources; root_ops->pci_ops = (struct pci_ops *)&ri->cfg->ops->pci_ops; bus = acpi_pci_root_create(root, root_ops, &ri->common, ri->cfg); if (!bus) - return NULL; + goto free_cfg; /* If we must preserve the resource configuration, claim now */ host = pci_find_host_bridge(bus); @@ -1710,6 +1705,14 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) pcie_bus_configure_settings(child); return bus; + +free_cfg: + pci_ecam_free(ri->cfg); +free_root_ops: + kfree(root_ops); +free_ri: + kfree(ri); + return NULL; } void pcibios_add_bus(struct pci_bus *bus) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 082918ce03d8..67db34fd10ee 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -1501,7 +1501,7 @@ static int pci_bus_match(struct device *dev, const struct device_driver *drv) struct pci_driver *pci_drv; const struct pci_device_id *found_id; - if (!pci_dev->match_driver) + if (pci_dev_binding_disallowed(pci_dev)) return 0; pci_drv = (struct pci_driver *)to_pci_driver(drv); diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 71a36f57ef57..268c69daa4d5 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -1808,6 +1808,7 @@ const struct attribute_group *pci_dev_attr_groups[] = { &pcie_dev_attr_group, #ifdef CONFIG_PCIEAER &aer_stats_attr_group, + &aer_attr_group, #endif #ifdef CONFIG_PCIEASPM &aspm_ctrl_attr_group, diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index cd51c6454d1b..da6bb9ef6dfc 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3944,16 +3944,6 @@ void pci_release_region(struct pci_dev *pdev, int bar) if (!pci_bar_index_is_valid(bar)) return; - /* - * This is done for backwards compatibility, because the old PCI devres - * API had a mode in which the function became managed if it had been - * enabled with pcim_enable_device() instead of pci_enable_device(). - */ - if (pci_is_managed(pdev)) { - pcim_release_region(pdev, bar); - return; - } - if (pci_resource_len(pdev, bar) == 0) return; if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) @@ -3991,13 +3981,6 @@ static int __pci_request_region(struct pci_dev *pdev, int bar, if (!pci_bar_index_is_valid(bar)) return -EINVAL; - if (pci_is_managed(pdev)) { - if (exclusive == IORESOURCE_EXCLUSIVE) - return pcim_request_region_exclusive(pdev, bar, name); - - return pcim_request_region(pdev, bar, name); - } - if (pci_resource_len(pdev, bar) == 0) return 0; @@ -4034,11 +4017,6 @@ err_out: * * Returns 0 on success, or %EBUSY on error. A warning * message is also printed on failure. - * - * NOTE: - * This is a "hybrid" function: It's normally unmanaged, but becomes managed - * when pcim_enable_device() has been called in advance. This hybrid feature is - * DEPRECATED! If you want managed cleanup, use the pcim_* functions instead. */ int pci_request_region(struct pci_dev *pdev, int bar, const char *name) { @@ -4091,11 +4069,6 @@ err_out: * @name: Name of the driver requesting the resources * * Returns: 0 on success, negative error code on failure. - * - * NOTE: - * This is a "hybrid" function: It's normally unmanaged, but becomes managed - * when pcim_enable_device() has been called in advance. This hybrid feature is - * DEPRECATED! If you want managed cleanup, use the pcim_* functions instead. */ int pci_request_selected_regions(struct pci_dev *pdev, int bars, const char *name) @@ -4111,11 +4084,6 @@ EXPORT_SYMBOL(pci_request_selected_regions); * @name: name of the driver requesting the resources * * Returns: 0 on success, negative error code on failure. - * - * NOTE: - * This is a "hybrid" function: It's normally unmanaged, but becomes managed - * when pcim_enable_device() has been called in advance. This hybrid feature is - * DEPRECATED! If you want managed cleanup, use the pcim_* functions instead. */ int pci_request_selected_regions_exclusive(struct pci_dev *pdev, int bars, const char *name) @@ -4151,11 +4119,6 @@ EXPORT_SYMBOL(pci_release_regions); * * Returns 0 on success, or %EBUSY on error. A warning * message is also printed on failure. - * - * NOTE: - * This is a "hybrid" function: It's normally unmanaged, but becomes managed - * when pcim_enable_device() has been called in advance. This hybrid feature is - * DEPRECATED! If you want managed cleanup, use the pcim_* functions instead. */ int pci_request_regions(struct pci_dev *pdev, const char *name) { @@ -4180,11 +4143,6 @@ EXPORT_SYMBOL(pci_request_regions); * * Returns 0 on success, or %EBUSY on error. A warning message is also * printed on failure. - * - * NOTE: - * This is a "hybrid" function: It's normally unmanaged, but becomes managed - * when pcim_enable_device() has been called in advance. This hybrid feature is - * DEPRECATED! If you want managed cleanup, use the pcim_* functions instead. */ int pci_request_regions_exclusive(struct pci_dev *pdev, const char *name) { @@ -4725,6 +4683,11 @@ static int pcie_wait_for_link_status(struct pci_dev *pdev, * @pdev: Device whose link to retrain. * @use_lt: Use the LT bit if TRUE, or the DLLLA bit if FALSE, for status. * + * Trigger retraining of the PCIe Link and wait for the completion of the + * retraining. As link retraining is known to asserts LBMS and may change + * the Link Speed, LBMS is cleared after the retraining and the Link Speed + * of the subordinate bus is updated. + * * Retrain completion status is retrieved from the Link Status Register * according to @use_lt. It is not verified whether the use of the DLLLA * bit is valid. @@ -4764,7 +4727,19 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt) * to track link speed or width changes made by hardware itself * in attempt to correct unreliable link operation. */ - pcie_reset_lbms_count(pdev); + pcie_reset_lbms(pdev); + + /* + * Ensure the Link Speed updates after retraining in case the Link + * Speed was changed because of the retraining. While the bwctrl's + * IRQ handler normally picks up the new Link Speed, clearing LBMS + * races with the IRQ handler reading the Link Status register and + * can result in the handler returning early without updating the + * Link Speed. + */ + if (pdev->subordinate) + pcie_update_link_speed(pdev->subordinate); + return rc; } @@ -4961,7 +4936,7 @@ int pci_bridge_wait_for_secondary_bus(struct pci_dev *dev, char *reset_type) delay); if (!pcie_wait_for_link_delay(dev, true, delay)) { /* Did not train, no need to wait any further */ - pci_info(dev, "Data Link Layer Link Active not set in 1000 msec\n"); + pci_info(dev, "Data Link Layer Link Active not set in %d msec\n", delay); return -ENOTTY; } @@ -6813,11 +6788,6 @@ int __weak pci_ext_cfg_avail(void) return 1; } -void __weak pci_fixup_cardbus(struct pci_bus *bus) -{ -} -EXPORT_SYMBOL(pci_fixup_cardbus); - static int __init pci_setup(char *str) { while (str) { diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 49165b739138..5cf429e0016e 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -228,6 +228,7 @@ static inline int pci_proc_detach_bus(struct pci_bus *bus) { return 0; } /* Functions for PCI Hotplug drivers to use */ int pci_hp_add_bridge(struct pci_dev *dev); +bool pci_hp_spurious_link_change(struct pci_dev *pdev); #if defined(CONFIG_SYSFS) && defined(HAVE_PCI_LEGACY) void pci_create_legacy_files(struct pci_bus *bus); @@ -558,6 +559,10 @@ static inline int pci_dev_set_disconnected(struct pci_dev *dev, void *unused) #define PCI_DPC_RECOVERED 1 #define PCI_DPC_RECOVERING 2 #define PCI_DEV_REMOVED 3 +#define PCI_LINK_CHANGED 4 +#define PCI_LINK_CHANGING 5 +#define PCI_LINK_LBMS_SEEN 6 +#define PCI_DEV_ALLOW_BINDING 7 static inline void pci_dev_assign_added(struct pci_dev *dev) { @@ -581,6 +586,16 @@ static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev) return test_and_set_bit(PCI_DEV_REMOVED, &dev->priv_flags); } +static inline void pci_dev_allow_binding(struct pci_dev *dev) +{ + set_bit(PCI_DEV_ALLOW_BINDING, &dev->priv_flags); +} + +static inline bool pci_dev_binding_disallowed(struct pci_dev *dev) +{ + return !test_bit(PCI_DEV_ALLOW_BINDING, &dev->priv_flags); +} + #ifdef CONFIG_PCIEAER #include <linux/aer.h> @@ -588,12 +603,15 @@ static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev) struct aer_err_info { struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES]; + int ratelimit_print[AER_MAX_MULTI_ERR_DEVICES]; int error_dev_num; + const char *level; /* printk level */ unsigned int id:16; unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */ - unsigned int __pad1:5; + unsigned int root_ratelimit_print:1; /* 0=skip, 1=print */ + unsigned int __pad1:4; unsigned int multi_error_valid:1; unsigned int first_error:5; @@ -605,15 +623,16 @@ struct aer_err_info { struct pcie_tlp_log tlp; /* TLP Header */ }; -int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info); -void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); +int aer_get_device_error_info(struct aer_err_info *info, int i); +void aer_print_error(struct aer_err_info *info, int i); int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2, unsigned int tlp_len, bool flit, struct pcie_tlp_log *log); unsigned int aer_tlp_log_len(struct pci_dev *dev, u32 aercc); void pcie_print_tlp_log(const struct pci_dev *dev, - const struct pcie_tlp_log *log, const char *pfx); + const struct pcie_tlp_log *log, const char *level, + const char *pfx); #endif /* CONFIG_PCIEAER */ #ifdef CONFIG_PCIEPORTBUS @@ -825,14 +844,9 @@ static inline void pcie_ecrc_get_policy(char *str) { } #endif #ifdef CONFIG_PCIEPORTBUS -void pcie_reset_lbms_count(struct pci_dev *port); -int pcie_lbms_count(struct pci_dev *port, unsigned long *val); +void pcie_reset_lbms(struct pci_dev *port); #else -static inline void pcie_reset_lbms_count(struct pci_dev *port) {} -static inline int pcie_lbms_count(struct pci_dev *port, unsigned long *val) -{ - return -EOPNOTSUPP; -} +static inline void pcie_reset_lbms(struct pci_dev *port) {} #endif struct pci_dev_reset_methods { @@ -962,6 +976,7 @@ void pci_no_aer(void); void pci_aer_init(struct pci_dev *dev); void pci_aer_exit(struct pci_dev *dev); extern const struct attribute_group aer_stats_attr_group; +extern const struct attribute_group aer_attr_group; void pci_aer_clear_fatal_status(struct pci_dev *dev); int pci_aer_clear_status(struct pci_dev *dev); int pci_aer_raw_clear_status(struct pci_dev *dev); @@ -1060,11 +1075,6 @@ static inline pci_power_t mid_pci_get_power_state(struct pci_dev *pdev) } #endif -int pcim_intx(struct pci_dev *dev, int enable); -int pcim_request_region_exclusive(struct pci_dev *pdev, int bar, - const char *name); -void pcim_release_region(struct pci_dev *pdev, int bar); - /* * Config Address for PCI Configuration Mechanism #1 * diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index a1cf8c7ef628..70ac66188367 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -28,6 +28,7 @@ #include <linux/interrupt.h> #include <linux/delay.h> #include <linux/kfifo.h> +#include <linux/ratelimit.h> #include <linux/slab.h> #include <acpi/apei.h> #include <acpi/ghes.h> @@ -54,8 +55,8 @@ struct aer_rpc { DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX); }; -/* AER stats for the device */ -struct aer_stats { +/* AER info for the device */ +struct aer_info { /* * Fields for all AER capable devices. They indicate the errors @@ -88,6 +89,10 @@ struct aer_stats { u64 rootport_total_cor_errs; u64 rootport_total_fatal_errs; u64 rootport_total_nonfatal_errs; + + /* Ratelimits for errors */ + struct ratelimit_state correctable_ratelimit; + struct ratelimit_state nonfatal_ratelimit; }; #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ @@ -377,7 +382,12 @@ void pci_aer_init(struct pci_dev *dev) if (!dev->aer_cap) return; - dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL); + dev->aer_info = kzalloc(sizeof(*dev->aer_info), GFP_KERNEL); + + ratelimit_state_init(&dev->aer_info->correctable_ratelimit, + DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); + ratelimit_state_init(&dev->aer_info->nonfatal_ratelimit, + DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); /* * We save/restore PCI_ERR_UNCOR_MASK, PCI_ERR_UNCOR_SEVER, @@ -398,8 +408,8 @@ void pci_aer_init(struct pci_dev *dev) void pci_aer_exit(struct pci_dev *dev) { - kfree(dev->aer_stats); - dev->aer_stats = NULL; + kfree(dev->aer_info); + dev->aer_info = NULL; } #define AER_AGENT_RECEIVER 0 @@ -537,10 +547,10 @@ static const char *aer_agent_string[] = { { \ unsigned int i; \ struct pci_dev *pdev = to_pci_dev(dev); \ - u64 *stats = pdev->aer_stats->stats_array; \ + u64 *stats = pdev->aer_info->stats_array; \ size_t len = 0; \ \ - for (i = 0; i < ARRAY_SIZE(pdev->aer_stats->stats_array); i++) {\ + for (i = 0; i < ARRAY_SIZE(pdev->aer_info->stats_array); i++) { \ if (strings_array[i]) \ len += sysfs_emit_at(buf, len, "%s %llu\n", \ strings_array[i], \ @@ -551,7 +561,7 @@ static const char *aer_agent_string[] = { i, stats[i]); \ } \ len += sysfs_emit_at(buf, len, "TOTAL_%s %llu\n", total_string, \ - pdev->aer_stats->total_field); \ + pdev->aer_info->total_field); \ return len; \ } \ static DEVICE_ATTR_RO(name) @@ -572,7 +582,7 @@ aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs, char *buf) \ { \ struct pci_dev *pdev = to_pci_dev(dev); \ - return sysfs_emit(buf, "%llu\n", pdev->aer_stats->field); \ + return sysfs_emit(buf, "%llu\n", pdev->aer_info->field); \ } \ static DEVICE_ATTR_RO(name) @@ -599,7 +609,7 @@ static umode_t aer_stats_attrs_are_visible(struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct pci_dev *pdev = to_pci_dev(dev); - if (!pdev->aer_stats) + if (!pdev->aer_info) return 0; if ((a == &dev_attr_aer_rootport_total_err_cor.attr || @@ -617,31 +627,136 @@ const struct attribute_group aer_stats_attr_group = { .is_visible = aer_stats_attrs_are_visible, }; +/* + * Ratelimit interval + * <=0: disabled with ratelimit.interval = 0 + * >0: enabled with ratelimit.interval in ms + */ +#define aer_ratelimit_interval_attr(name, ratelimit) \ + static ssize_t \ + name##_show(struct device *dev, struct device_attribute *attr, \ + char *buf) \ + { \ + struct pci_dev *pdev = to_pci_dev(dev); \ + \ + return sysfs_emit(buf, "%d\n", \ + pdev->aer_info->ratelimit.interval); \ + } \ + \ + static ssize_t \ + name##_store(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t count) \ + { \ + struct pci_dev *pdev = to_pci_dev(dev); \ + int interval; \ + \ + if (!capable(CAP_SYS_ADMIN)) \ + return -EPERM; \ + \ + if (kstrtoint(buf, 0, &interval) < 0) \ + return -EINVAL; \ + \ + if (interval <= 0) \ + interval = 0; \ + else \ + interval = msecs_to_jiffies(interval); \ + \ + pdev->aer_info->ratelimit.interval = interval; \ + \ + return count; \ + } \ + static DEVICE_ATTR_RW(name); + +#define aer_ratelimit_burst_attr(name, ratelimit) \ + static ssize_t \ + name##_show(struct device *dev, struct device_attribute *attr, \ + char *buf) \ + { \ + struct pci_dev *pdev = to_pci_dev(dev); \ + \ + return sysfs_emit(buf, "%d\n", \ + pdev->aer_info->ratelimit.burst); \ + } \ + \ + static ssize_t \ + name##_store(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t count) \ + { \ + struct pci_dev *pdev = to_pci_dev(dev); \ + int burst; \ + \ + if (!capable(CAP_SYS_ADMIN)) \ + return -EPERM; \ + \ + if (kstrtoint(buf, 0, &burst) < 0) \ + return -EINVAL; \ + \ + pdev->aer_info->ratelimit.burst = burst; \ + \ + return count; \ + } \ + static DEVICE_ATTR_RW(name); + +#define aer_ratelimit_attrs(name) \ + aer_ratelimit_interval_attr(name##_ratelimit_interval_ms, \ + name##_ratelimit) \ + aer_ratelimit_burst_attr(name##_ratelimit_burst, \ + name##_ratelimit) + +aer_ratelimit_attrs(correctable) +aer_ratelimit_attrs(nonfatal) + +static struct attribute *aer_attrs[] = { + &dev_attr_correctable_ratelimit_interval_ms.attr, + &dev_attr_correctable_ratelimit_burst.attr, + &dev_attr_nonfatal_ratelimit_interval_ms.attr, + &dev_attr_nonfatal_ratelimit_burst.attr, + NULL +}; + +static umode_t aer_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = kobj_to_dev(kobj); + struct pci_dev *pdev = to_pci_dev(dev); + + if (!pdev->aer_info) + return 0; + + return a->mode; +} + +const struct attribute_group aer_attr_group = { + .name = "aer", + .attrs = aer_attrs, + .is_visible = aer_attrs_are_visible, +}; + static void pci_dev_aer_stats_incr(struct pci_dev *pdev, struct aer_err_info *info) { unsigned long status = info->status & ~info->mask; int i, max = -1; u64 *counter = NULL; - struct aer_stats *aer_stats = pdev->aer_stats; + struct aer_info *aer_info = pdev->aer_info; - if (!aer_stats) + if (!aer_info) return; switch (info->severity) { case AER_CORRECTABLE: - aer_stats->dev_total_cor_errs++; - counter = &aer_stats->dev_cor_errs[0]; + aer_info->dev_total_cor_errs++; + counter = &aer_info->dev_cor_errs[0]; max = AER_MAX_TYPEOF_COR_ERRS; break; case AER_NONFATAL: - aer_stats->dev_total_nonfatal_errs++; - counter = &aer_stats->dev_nonfatal_errs[0]; + aer_info->dev_total_nonfatal_errs++; + counter = &aer_info->dev_nonfatal_errs[0]; max = AER_MAX_TYPEOF_UNCOR_ERRS; break; case AER_FATAL: - aer_stats->dev_total_fatal_errs++; - counter = &aer_stats->dev_fatal_errs[0]; + aer_info->dev_total_fatal_errs++; + counter = &aer_info->dev_fatal_errs[0]; max = AER_MAX_TYPEOF_UNCOR_ERRS; break; } @@ -653,37 +768,46 @@ static void pci_dev_aer_stats_incr(struct pci_dev *pdev, static void pci_rootport_aer_stats_incr(struct pci_dev *pdev, struct aer_err_source *e_src) { - struct aer_stats *aer_stats = pdev->aer_stats; + struct aer_info *aer_info = pdev->aer_info; - if (!aer_stats) + if (!aer_info) return; if (e_src->status & PCI_ERR_ROOT_COR_RCV) - aer_stats->rootport_total_cor_errs++; + aer_info->rootport_total_cor_errs++; if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) - aer_stats->rootport_total_fatal_errs++; + aer_info->rootport_total_fatal_errs++; else - aer_stats->rootport_total_nonfatal_errs++; + aer_info->rootport_total_nonfatal_errs++; + } +} + +static int aer_ratelimit(struct pci_dev *dev, unsigned int severity) +{ + switch (severity) { + case AER_NONFATAL: + return __ratelimit(&dev->aer_info->nonfatal_ratelimit); + case AER_CORRECTABLE: + return __ratelimit(&dev->aer_info->correctable_ratelimit); + default: + return 1; /* Don't ratelimit fatal errors */ } } -static void __aer_print_error(struct pci_dev *dev, - struct aer_err_info *info) +static void __aer_print_error(struct pci_dev *dev, struct aer_err_info *info) { const char **strings; unsigned long status = info->status & ~info->mask; - const char *level, *errmsg; + const char *level = info->level; + const char *errmsg; int i; - if (info->severity == AER_CORRECTABLE) { + if (info->severity == AER_CORRECTABLE) strings = aer_correctable_error_string; - level = KERN_WARNING; - } else { + else strings = aer_uncorrectable_error_string; - level = KERN_ERR; - } for_each_set_bit(i, &status, 32) { errmsg = strings[i]; @@ -693,14 +817,39 @@ static void __aer_print_error(struct pci_dev *dev, aer_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg, info->first_error == i ? " (First)" : ""); } - pci_dev_aer_stats_incr(dev, info); } -void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) +static void aer_print_source(struct pci_dev *dev, struct aer_err_info *info, + bool found) +{ + u16 source = info->id; + + pci_info(dev, "%s%s error message received from %04x:%02x:%02x.%d%s\n", + info->multi_error_valid ? "Multiple " : "", + aer_error_severity_string[info->severity], + pci_domain_nr(dev->bus), PCI_BUS_NUM(source), + PCI_SLOT(source), PCI_FUNC(source), + found ? "" : " (no details found"); +} + +void aer_print_error(struct aer_err_info *info, int i) { - int layer, agent; - int id = pci_dev_id(dev); - const char *level; + struct pci_dev *dev; + int layer, agent, id; + const char *level = info->level; + + if (WARN_ON_ONCE(i >= AER_MAX_MULTI_ERR_DEVICES)) + return; + + dev = info->dev[i]; + id = pci_dev_id(dev); + + pci_dev_aer_stats_incr(dev, info); + trace_aer_event(pci_name(dev), (info->status & ~info->mask), + info->severity, info->tlp_header_valid, &info->tlp); + + if (!info->ratelimit_print[i]) + return; if (!info->status) { pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n", @@ -711,8 +860,6 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) layer = AER_GET_LAYER_ERROR(info->severity, info->status); agent = AER_GET_AGENT(info->severity, info->status); - level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR; - aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n", aer_error_severity_string[info->severity], aer_error_layer[layer], aer_agent_string[agent]); @@ -723,26 +870,11 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info) __aer_print_error(dev, info); if (info->tlp_header_valid) - pcie_print_tlp_log(dev, &info->tlp, dev_fmt(" ")); + pcie_print_tlp_log(dev, &info->tlp, level, dev_fmt(" ")); out: if (info->id && info->error_dev_num > 1 && info->id == id) pci_err(dev, " Error of this Agent is reported first\n"); - - trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask), - info->severity, info->tlp_header_valid, &info->tlp); -} - -static void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info) -{ - u8 bus = info->id >> 8; - u8 devfn = info->id & 0xff; - - pci_info(dev, "%s%s error message received from %04x:%02x:%02x.%d\n", - info->multi_error_valid ? "Multiple " : "", - aer_error_severity_string[info->severity], - pci_domain_nr(dev->bus), bus, PCI_SLOT(devfn), - PCI_FUNC(devfn)); } #ifdef CONFIG_ACPI_APEI_PCIEAER @@ -765,40 +897,48 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity, { int layer, agent, tlp_header_valid = 0; u32 status, mask; - struct aer_err_info info; + struct aer_err_info info = { + .severity = aer_severity, + .first_error = PCI_ERR_CAP_FEP(aer->cap_control), + }; if (aer_severity == AER_CORRECTABLE) { status = aer->cor_status; mask = aer->cor_mask; + info.level = KERN_WARNING; } else { status = aer->uncor_status; mask = aer->uncor_mask; + info.level = KERN_ERR; tlp_header_valid = status & AER_LOG_TLP_MASKS; } - layer = AER_GET_LAYER_ERROR(aer_severity, status); - agent = AER_GET_AGENT(aer_severity, status); - - memset(&info, 0, sizeof(info)); - info.severity = aer_severity; info.status = status; info.mask = mask; - info.first_error = PCI_ERR_CAP_FEP(aer->cap_control); - pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask); + pci_dev_aer_stats_incr(dev, &info); + trace_aer_event(pci_name(dev), (status & ~mask), + aer_severity, tlp_header_valid, &aer->header_log); + + if (!aer_ratelimit(dev, info.severity)) + return; + + layer = AER_GET_LAYER_ERROR(aer_severity, status); + agent = AER_GET_AGENT(aer_severity, status); + + aer_printk(info.level, dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", + status, mask); __aer_print_error(dev, &info); - pci_err(dev, "aer_layer=%s, aer_agent=%s\n", - aer_error_layer[layer], aer_agent_string[agent]); + aer_printk(info.level, dev, "aer_layer=%s, aer_agent=%s\n", + aer_error_layer[layer], aer_agent_string[agent]); if (aer_severity != AER_CORRECTABLE) - pci_err(dev, "aer_uncor_severity: 0x%08x\n", - aer->uncor_severity); + aer_printk(info.level, dev, "aer_uncor_severity: 0x%08x\n", + aer->uncor_severity); if (tlp_header_valid) - pcie_print_tlp_log(dev, &aer->header_log, dev_fmt(" ")); - - trace_aer_event(dev_name(&dev->dev), (status & ~mask), - aer_severity, tlp_header_valid, &aer->header_log); + pcie_print_tlp_log(dev, &aer->header_log, info.level, + dev_fmt(" ")); } EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL"); @@ -809,12 +949,27 @@ EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL"); */ static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) { - if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) { - e_info->dev[e_info->error_dev_num] = pci_dev_get(dev); - e_info->error_dev_num++; - return 0; + int i = e_info->error_dev_num; + + if (i >= AER_MAX_MULTI_ERR_DEVICES) + return -ENOSPC; + + e_info->dev[i] = pci_dev_get(dev); + e_info->error_dev_num++; + + /* + * Ratelimit AER log messages. "dev" is either the source + * identified by the root's Error Source ID or it has an unmasked + * error logged in its own AER Capability. Messages are emitted + * when "ratelimit_print[i]" is non-zero. If we will print detail + * for a downstream device, make sure we print the Error Source ID + * from the root as well. + */ + if (aer_ratelimit(dev, e_info->severity)) { + e_info->ratelimit_print[i] = 1; + e_info->root_ratelimit_print = 1; } - return -ENOSPC; + return 0; } /** @@ -908,7 +1063,7 @@ static int find_device_iter(struct pci_dev *dev, void *data) * e_info->error_dev_num and e_info->dev[], based on the given information. */ static bool find_source_device(struct pci_dev *parent, - struct aer_err_info *e_info) + struct aer_err_info *e_info) { struct pci_dev *dev = parent; int result; @@ -926,15 +1081,8 @@ static bool find_source_device(struct pci_dev *parent, else pci_walk_bus(parent->subordinate, find_device_iter, e_info); - if (!e_info->error_dev_num) { - u8 bus = e_info->id >> 8; - u8 devfn = e_info->id & 0xff; - - pci_info(parent, "found no error details for %04x:%02x:%02x.%d\n", - pci_domain_nr(parent->bus), bus, PCI_SLOT(devfn), - PCI_FUNC(devfn)); + if (!e_info->error_dev_num) return false; - } return true; } @@ -1141,9 +1289,10 @@ static void aer_recover_work_func(struct work_struct *work) pdev = pci_get_domain_bus_and_slot(entry.domain, entry.bus, entry.devfn); if (!pdev) { - pr_err("no pci_dev for %04x:%02x:%02x.%x\n", - entry.domain, entry.bus, - PCI_SLOT(entry.devfn), PCI_FUNC(entry.devfn)); + pr_err_ratelimited("%04x:%02x:%02x.%x: no pci_dev found\n", + entry.domain, entry.bus, + PCI_SLOT(entry.devfn), + PCI_FUNC(entry.devfn)); continue; } pci_print_aer(pdev, entry.severity, entry.regs); @@ -1199,19 +1348,26 @@ EXPORT_SYMBOL_GPL(aer_recover_queue); /** * aer_get_device_error_info - read error status from dev and store it to info - * @dev: pointer to the device expected to have an error record * @info: pointer to structure to store the error record + * @i: index into info->dev[] * * Return: 1 on success, 0 on error. * * Note that @info is reused among all error devices. Clear fields properly. */ -int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) +int aer_get_device_error_info(struct aer_err_info *info, int i) { - int type = pci_pcie_type(dev); - int aer = dev->aer_cap; + struct pci_dev *dev; + int type, aer; u32 aercc; + if (i >= AER_MAX_MULTI_ERR_DEVICES) + return 0; + + dev = info->dev[i]; + aer = dev->aer_cap; + type = pci_pcie_type(dev); + /* Must reset in this function */ info->status = 0; info->tlp_header_valid = 0; @@ -1263,63 +1419,87 @@ static inline void aer_process_err_devices(struct aer_err_info *e_info) /* Report all before handling them, to not lose records by reset etc. */ for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { - if (aer_get_device_error_info(e_info->dev[i], e_info)) - aer_print_error(e_info->dev[i], e_info); + if (aer_get_device_error_info(e_info, i)) + aer_print_error(e_info, i); } for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) { - if (aer_get_device_error_info(e_info->dev[i], e_info)) + if (aer_get_device_error_info(e_info, i)) handle_error_source(e_info->dev[i], e_info); } } /** - * aer_isr_one_error - consume an error detected by Root Port - * @rpc: pointer to the Root Port which holds an error - * @e_src: pointer to an error source + * aer_isr_one_error_type - consume a Correctable or Uncorrectable Error + * detected by Root Port or RCEC + * @root: pointer to Root Port or RCEC that signaled AER interrupt + * @info: pointer to AER error info */ -static void aer_isr_one_error(struct aer_rpc *rpc, - struct aer_err_source *e_src) +static void aer_isr_one_error_type(struct pci_dev *root, + struct aer_err_info *info) { - struct pci_dev *pdev = rpc->rpd; - struct aer_err_info e_info; + bool found; - pci_rootport_aer_stats_incr(pdev, e_src); + found = find_source_device(root, info); /* - * There is a possibility that both correctable error and - * uncorrectable error being logged. Report correctable error first. + * If we're going to log error messages, we've already set + * "info->root_ratelimit_print" and "info->ratelimit_print[i]" to + * non-zero (which enables printing) because this is either an + * ERR_FATAL or we found a device with an error logged in its AER + * Capability. + * + * If we didn't find the Error Source device, at least log the + * Requester ID from the ERR_* Message received by the Root Port or + * RCEC, ratelimited by the RP or RCEC. */ - if (e_src->status & PCI_ERR_ROOT_COR_RCV) { - e_info.id = ERR_COR_ID(e_src->id); - e_info.severity = AER_CORRECTABLE; - - if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV) - e_info.multi_error_valid = 1; - else - e_info.multi_error_valid = 0; - aer_print_port_info(pdev, &e_info); + if (info->root_ratelimit_print || + (!found && aer_ratelimit(root, info->severity))) + aer_print_source(root, info, found); - if (find_source_device(pdev, &e_info)) - aer_process_err_devices(&e_info); - } - - if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { - e_info.id = ERR_UNCOR_ID(e_src->id); + if (found) + aer_process_err_devices(info); +} - if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) - e_info.severity = AER_FATAL; - else - e_info.severity = AER_NONFATAL; +/** + * aer_isr_one_error - consume error(s) signaled by an AER interrupt from + * Root Port or RCEC + * @root: pointer to Root Port or RCEC that signaled AER interrupt + * @e_src: pointer to an error source + */ +static void aer_isr_one_error(struct pci_dev *root, + struct aer_err_source *e_src) +{ + u32 status = e_src->status; - if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV) - e_info.multi_error_valid = 1; - else - e_info.multi_error_valid = 0; + pci_rootport_aer_stats_incr(root, e_src); - aer_print_port_info(pdev, &e_info); + /* + * There is a possibility that both correctable error and + * uncorrectable error being logged. Report correctable error first. + */ + if (status & PCI_ERR_ROOT_COR_RCV) { + int multi = status & PCI_ERR_ROOT_MULTI_COR_RCV; + struct aer_err_info e_info = { + .id = ERR_COR_ID(e_src->id), + .severity = AER_CORRECTABLE, + .level = KERN_WARNING, + .multi_error_valid = multi ? 1 : 0, + }; + + aer_isr_one_error_type(root, &e_info); + } - if (find_source_device(pdev, &e_info)) - aer_process_err_devices(&e_info); + if (status & PCI_ERR_ROOT_UNCOR_RCV) { + int fatal = status & PCI_ERR_ROOT_FATAL_RCV; + int multi = status & PCI_ERR_ROOT_MULTI_UNCOR_RCV; + struct aer_err_info e_info = { + .id = ERR_UNCOR_ID(e_src->id), + .severity = fatal ? AER_FATAL : AER_NONFATAL, + .level = KERN_ERR, + .multi_error_valid = multi ? 1 : 0, + }; + + aer_isr_one_error_type(root, &e_info); } } @@ -1340,7 +1520,7 @@ static irqreturn_t aer_isr(int irq, void *context) return IRQ_NONE; while (kfifo_get(&rpc->aer_fifo, &e_src)) - aer_isr_one_error(rpc, &e_src); + aer_isr_one_error(rpc->rpd, &e_src); return IRQ_HANDLED; } diff --git a/drivers/pci/pcie/bwctrl.c b/drivers/pci/pcie/bwctrl.c index d8d2aa85a229..36f939f23d34 100644 --- a/drivers/pci/pcie/bwctrl.c +++ b/drivers/pci/pcie/bwctrl.c @@ -38,24 +38,14 @@ /** * struct pcie_bwctrl_data - PCIe bandwidth controller * @set_speed_mutex: Serializes link speed changes - * @lbms_count: Count for LBMS (since last reset) * @cdev: Thermal cooling device associated with the port */ struct pcie_bwctrl_data { struct mutex set_speed_mutex; - atomic_t lbms_count; struct thermal_cooling_device *cdev; }; -/* - * Prevent port removal during LBMS count accessors and Link Speed changes. - * - * These have to be differentiated because pcie_bwctrl_change_speed() calls - * pcie_retrain_link() which uses LBMS count reset accessor on success - * (using just one rwsem triggers "possible recursive locking detected" - * warning). - */ -static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem); +/* Prevent port removal during Link Speed changes. */ static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem); static bool pcie_valid_speed(enum pci_bus_speed speed) @@ -127,18 +117,7 @@ static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool if (ret != PCIBIOS_SUCCESSFUL) return pcibios_err_to_errno(ret); - ret = pcie_retrain_link(port, use_lt); - if (ret < 0) - return ret; - - /* - * Ensure link speed updates also with platforms that have problems - * with notifications. - */ - if (port->subordinate) - pcie_update_link_speed(port->subordinate); - - return 0; + return pcie_retrain_link(port, use_lt); } /** @@ -202,15 +181,14 @@ int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req, static void pcie_bwnotif_enable(struct pcie_device *srv) { - struct pcie_bwctrl_data *data = srv->port->link_bwctrl; struct pci_dev *port = srv->port; u16 link_status; int ret; - /* Count LBMS seen so far as one */ + /* Note if LBMS has been seen so far */ ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS) - atomic_inc(&data->lbms_count); + set_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags); pcie_capability_set_word(port, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE); @@ -233,7 +211,6 @@ static void pcie_bwnotif_disable(struct pci_dev *port) static irqreturn_t pcie_bwnotif_irq(int irq, void *context) { struct pcie_device *srv = context; - struct pcie_bwctrl_data *data = srv->port->link_bwctrl; struct pci_dev *port = srv->port; u16 link_status, events; int ret; @@ -247,7 +224,7 @@ static irqreturn_t pcie_bwnotif_irq(int irq, void *context) return IRQ_NONE; if (events & PCI_EXP_LNKSTA_LBMS) - atomic_inc(&data->lbms_count); + set_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags); pcie_capability_write_word(port, PCI_EXP_LNKSTA, events); @@ -262,31 +239,10 @@ static irqreturn_t pcie_bwnotif_irq(int irq, void *context) return IRQ_HANDLED; } -void pcie_reset_lbms_count(struct pci_dev *port) +void pcie_reset_lbms(struct pci_dev *port) { - struct pcie_bwctrl_data *data; - - guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); - data = port->link_bwctrl; - if (data) - atomic_set(&data->lbms_count, 0); - else - pcie_capability_write_word(port, PCI_EXP_LNKSTA, - PCI_EXP_LNKSTA_LBMS); -} - -int pcie_lbms_count(struct pci_dev *port, unsigned long *val) -{ - struct pcie_bwctrl_data *data; - - guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem); - data = port->link_bwctrl; - if (!data) - return -ENOTTY; - - *val = atomic_read(&data->lbms_count); - - return 0; + clear_bit(PCI_LINK_LBMS_SEEN, &port->priv_flags); + pcie_capability_write_word(port, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS); } static int pcie_bwnotif_probe(struct pcie_device *srv) @@ -308,18 +264,16 @@ static int pcie_bwnotif_probe(struct pcie_device *srv) return ret; scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { - scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { - port->link_bwctrl = data; - - ret = request_irq(srv->irq, pcie_bwnotif_irq, - IRQF_SHARED, "PCIe bwctrl", srv); - if (ret) { - port->link_bwctrl = NULL; - return ret; - } + port->link_bwctrl = data; - pcie_bwnotif_enable(srv); + ret = request_irq(srv->irq, pcie_bwnotif_irq, + IRQF_SHARED, "PCIe bwctrl", srv); + if (ret) { + port->link_bwctrl = NULL; + return ret; } + + pcie_bwnotif_enable(srv); } pci_dbg(port, "enabled with IRQ %d\n", srv->irq); @@ -339,13 +293,11 @@ static void pcie_bwnotif_remove(struct pcie_device *srv) pcie_cooling_device_unregister(data->cdev); scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) { - scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) { - pcie_bwnotif_disable(srv->port); + pcie_bwnotif_disable(srv->port); - free_irq(srv->irq, srv); + free_irq(srv->irq, srv); - srv->port->link_bwctrl = NULL; - } + srv->port->link_bwctrl = NULL; } } diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c index df42f15c9829..fc18349614d7 100644 --- a/drivers/pci/pcie/dpc.c +++ b/drivers/pci/pcie/dpc.c @@ -222,7 +222,7 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev) dpc_tlp_log_len(pdev), pdev->subordinate->flit_mode, &tlp_log); - pcie_print_tlp_log(pdev, &tlp_log, dev_fmt("")); + pcie_print_tlp_log(pdev, &tlp_log, KERN_ERR, dev_fmt("")); if (pdev->dpc_rp_log_size < PCIE_STD_NUM_TLP_HEADERLOG + 1) goto clear_status; @@ -252,46 +252,59 @@ static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev, else info->severity = AER_NONFATAL; + info->level = KERN_ERR; + + info->dev[0] = dev; + info->error_dev_num = 1; + return 1; } void dpc_process_error(struct pci_dev *pdev) { u16 cap = pdev->dpc_cap, status, source, reason, ext_reason; - struct aer_err_info info; + struct aer_err_info info = {}; pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); - pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source); - - pci_info(pdev, "containment event, status:%#06x source:%#06x\n", - status, source); reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN; - ext_reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT; - pci_warn(pdev, "%s detected\n", - (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR) ? - "unmasked uncorrectable error" : - (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE) ? - "ERR_NONFATAL" : - (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) ? - "ERR_FATAL" : - (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) ? - "RP PIO error" : - (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER) ? - "software trigger" : - "reserved error"); - - /* show RP PIO error detail information */ - if (pdev->dpc_rp_extensions && - reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT && - ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) - dpc_process_rp_pio_error(pdev); - else if (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR && - dpc_get_aer_uncorrect_severity(pdev, &info) && - aer_get_device_error_info(pdev, &info)) { - aer_print_error(pdev, &info); - pci_aer_clear_nonfatal_status(pdev); - pci_aer_clear_fatal_status(pdev); + + switch (reason) { + case PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR: + pci_warn(pdev, "containment event, status:%#06x: unmasked uncorrectable error detected\n", + status); + if (dpc_get_aer_uncorrect_severity(pdev, &info) && + aer_get_device_error_info(&info, 0)) { + aer_print_error(&info, 0); + pci_aer_clear_nonfatal_status(pdev); + pci_aer_clear_fatal_status(pdev); + } + break; + case PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE: + case PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE: + pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, + &source); + pci_warn(pdev, "containment event, status:%#06x, %s received from %04x:%02x:%02x.%d\n", + status, + (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) ? + "ERR_FATAL" : "ERR_NONFATAL", + pci_domain_nr(pdev->bus), PCI_BUS_NUM(source), + PCI_SLOT(source), PCI_FUNC(source)); + break; + case PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT: + ext_reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT; + pci_warn(pdev, "containment event, status:%#06x: %s detected\n", + status, + (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) ? + "RP PIO error" : + (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER) ? + "software trigger" : + "reserved error"); + /* show RP PIO error detail information */ + if (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO && + pdev->dpc_rp_extensions) + dpc_process_rp_pio_error(pdev); + break; } } diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 31090770fffc..de6381c690f5 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -271,7 +271,6 @@ failed: pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT); - /* TODO: Should kernel panic here? */ pci_info(bridge, "device recovery failed\n"); return status; diff --git a/drivers/pci/pcie/tlp.c b/drivers/pci/pcie/tlp.c index 890d5391d7f5..71f8fc9ea2ed 100644 --- a/drivers/pci/pcie/tlp.c +++ b/drivers/pci/pcie/tlp.c @@ -98,12 +98,14 @@ int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2, * pcie_print_tlp_log - Print TLP Header / Prefix Log contents * @dev: PCIe device * @log: TLP Log structure + * @level: Printk log level * @pfx: String prefix * * Prints TLP Header and Prefix Log information held by @log. */ void pcie_print_tlp_log(const struct pci_dev *dev, - const struct pcie_tlp_log *log, const char *pfx) + const struct pcie_tlp_log *log, const char *level, + const char *pfx) { /* EE_PREFIX_STR fits the extended DW space needed for the Flit mode */ char buf[11 * PCIE_STD_MAX_TLP_HEADERLOG + 1]; @@ -130,6 +132,6 @@ void pcie_print_tlp_log(const struct pci_dev *dev, } } - pci_err(dev, "%sTLP Header%s: %s\n", pfx, + dev_printk(level, &dev->dev, "%sTLP Header%s: %s\n", pfx, log->flit ? " (Flit)" : "", buf); } diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 364fa2a514f8..4b8693ec9e4c 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2058,7 +2058,7 @@ int pci_setup_device(struct pci_dev *dev) if (class == PCI_CLASS_BRIDGE_PCI) goto bad; pci_read_irq(dev); - pci_read_bases(dev, 6, PCI_ROM_ADDRESS); + pci_read_bases(dev, PCI_STD_NUM_BARS, PCI_ROM_ADDRESS); pci_subsystem_ids(dev, &dev->subsystem_vendor, &dev->subsystem_device); @@ -2711,7 +2711,6 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) pci_set_msi_domain(dev); /* Notifier could use PCI capabilities */ - dev->match_driver = false; ret = device_add(&dev->dev); WARN_ON(ret < 0); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 8d610c17e0f2..64ac1ee944d3 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -38,14 +38,10 @@ static bool pcie_lbms_seen(struct pci_dev *dev, u16 lnksta) { - unsigned long count; - int ret; - - ret = pcie_lbms_count(dev, &count); - if (ret < 0) - return lnksta & PCI_EXP_LNKSTA_LBMS; + if (test_bit(PCI_LINK_LBMS_SEEN, &dev->priv_flags)) + return true; - return count > 0; + return lnksta & PCI_EXP_LNKSTA_LBMS; } /* diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c index 45c8252c8edc..5e5cf2c3e2c8 100644 --- a/drivers/pcmcia/cardbus.c +++ b/drivers/pcmcia/cardbus.c @@ -72,7 +72,6 @@ int __ref cb_alloc(struct pcmcia_socket *s) pci_lock_rescan_remove(); s->functions = pci_scan_slot(bus, PCI_DEVFN(0, 0)); - pci_fixup_cardbus(bus); max = bus->busn_res.start; for (pass = 0; pass < 2; pass++) diff --git a/include/linux/pci.h b/include/linux/pci.h index 0e8e3fd77e96..b07e77637082 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -346,7 +346,7 @@ struct pci_dev { u8 hdr_type; /* PCI header type (`multi' flag masked out) */ #ifdef CONFIG_PCIEAER u16 aer_cap; /* AER capability offset */ - struct aer_stats *aer_stats; /* AER stats for this device */ + struct aer_info *aer_info; /* AER info for this device */ #endif #ifdef CONFIG_PCIEPORTBUS struct rcec_ea *rcec_ea; /* RCEC cached endpoint association */ @@ -423,8 +423,6 @@ struct pci_dev { struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ struct resource driver_exclusive_resource; /* driver exclusive resource ranges */ - bool match_driver; /* Skip attaching driver */ - unsigned int transparent:1; /* Subtractive decode bridge */ unsigned int io_window:1; /* Bridge has I/O window */ unsigned int pref_window:1; /* Bridge has pref mem window */ @@ -1139,9 +1137,6 @@ resource_size_t pcibios_align_resource(void *, const struct resource *, resource_size_t, resource_size_t); -/* Weak but can be overridden by arch */ -void pci_fixup_cardbus(struct pci_bus *); - /* Generic PCI functions used internally */ void pcibios_resource_to_bus(struct pci_bus *bus, struct pci_bus_region *region, @@ -1848,6 +1843,14 @@ static inline bool pcie_aspm_support_enabled(void) { return false; } static inline bool pcie_aspm_enabled(struct pci_dev *pdev) { return false; } #endif +#ifdef CONFIG_HOTPLUG_PCI +void pci_hp_ignore_link_change(struct pci_dev *pdev); +void pci_hp_unignore_link_change(struct pci_dev *pdev); +#else +static inline void pci_hp_ignore_link_change(struct pci_dev *pdev) { } +static inline void pci_hp_unignore_link_change(struct pci_dev *pdev) { } +#endif + #ifdef CONFIG_PCIEAER bool pci_aer_available(void); #else @@ -2322,7 +2325,6 @@ void pcim_iounmap(struct pci_dev *pdev, void __iomem *addr); void __iomem * const *pcim_iomap_table(struct pci_dev *pdev); int pcim_request_region(struct pci_dev *pdev, int bar, const char *name); int pcim_iomap_regions(struct pci_dev *pdev, int mask, const char *name); -void pcim_iounmap_regions(struct pci_dev *pdev, int mask); void __iomem *pcim_iomap_range(struct pci_dev *pdev, int bar, unsigned long offset, unsigned long len); |