diff options
Diffstat (limited to 'drivers/pci/pcie/aer.c')
-rw-r--r-- | drivers/pci/pcie/aer.c | 239 |
1 files changed, 60 insertions, 179 deletions
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 83180edd6ed4..a90a9194ac4a 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -30,7 +30,7 @@ #include "../pci.h" #include "portdrv.h" -#define AER_ERROR_SOURCES_MAX 100 +#define AER_ERROR_SOURCES_MAX 128 #define AER_MAX_TYPEOF_COR_ERRS 16 /* as per PCI_ERR_COR_STATUS */ #define AER_MAX_TYPEOF_UNCOR_ERRS 26 /* as per PCI_ERR_UNCOR_STATUS*/ @@ -42,21 +42,7 @@ struct aer_err_source { struct aer_rpc { struct pci_dev *rpd; /* Root Port device */ - struct work_struct dpc_handler; - struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX]; - struct aer_err_info e_info; - unsigned short prod_idx; /* Error Producer Index */ - unsigned short cons_idx; /* Error Consumer Index */ - int isr; - spinlock_t e_lock; /* - * Lock access to Error Status/ID Regs - * and error producer/consumer index - */ - struct mutex rpc_mutex; /* - * only one thread could do - * recovery on the same - * root port hierarchy - */ + DECLARE_KFIFO(aer_fifo, struct aer_err_source, AER_ERROR_SOURCES_MAX); }; /* AER stats for the device */ @@ -866,7 +852,7 @@ void cper_print_aer(struct pci_dev *dev, int aer_severity, static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev) { if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) { - e_info->dev[e_info->error_dev_num] = dev; + e_info->dev[e_info->error_dev_num] = pci_dev_get(dev); e_info->error_dev_num++; return 0; } @@ -1010,9 +996,12 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info) info->status); pci_aer_clear_device_status(dev); } else if (info->severity == AER_NONFATAL) - pcie_do_nonfatal_recovery(dev); + pcie_do_recovery(dev, pci_channel_io_normal, + PCIE_PORT_SERVICE_AER); else if (info->severity == AER_FATAL) - pcie_do_fatal_recovery(dev, PCIE_PORT_SERVICE_AER); + pcie_do_recovery(dev, pci_channel_io_frozen, + PCIE_PORT_SERVICE_AER); + pci_dev_put(dev); } #ifdef CONFIG_ACPI_APEI_PCIEAER @@ -1047,9 +1036,11 @@ static void aer_recover_work_func(struct work_struct *work) } cper_print_aer(pdev, entry.severity, entry.regs); if (entry.severity == AER_NONFATAL) - pcie_do_nonfatal_recovery(pdev); + pcie_do_recovery(pdev, pci_channel_io_normal, + PCIE_PORT_SERVICE_AER); else if (entry.severity == AER_FATAL) - pcie_do_fatal_recovery(pdev, PCIE_PORT_SERVICE_AER); + pcie_do_recovery(pdev, pci_channel_io_frozen, + PCIE_PORT_SERVICE_AER); pci_dev_put(pdev); } } @@ -1065,7 +1056,6 @@ static DECLARE_WORK(aer_recover_work, aer_recover_work_func); void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, int severity, struct aer_capability_regs *aer_regs) { - unsigned long flags; struct aer_recover_entry entry = { .bus = bus, .devfn = devfn, @@ -1074,13 +1064,12 @@ void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, .regs = aer_regs, }; - spin_lock_irqsave(&aer_recover_ring_lock, flags); - if (kfifo_put(&aer_recover_ring, entry)) + if (kfifo_in_spinlocked(&aer_recover_ring, &entry, sizeof(entry), + &aer_recover_ring_lock)) schedule_work(&aer_recover_work); else pr_err("AER recover: Buffer overflow when recovering AER for %04x:%02x:%02x:%x\n", domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); - spin_unlock_irqrestore(&aer_recover_ring_lock, flags); } EXPORT_SYMBOL_GPL(aer_recover_queue); #endif @@ -1115,8 +1104,9 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) &info->mask); if (!(info->status & ~info->mask)) return 0; - } else if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE || - info->severity == AER_NONFATAL) { + } else if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM || + info->severity == AER_NONFATAL) { /* Link is still healthy for IO reads */ pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, @@ -1170,7 +1160,7 @@ static void aer_isr_one_error(struct aer_rpc *rpc, struct aer_err_source *e_src) { struct pci_dev *pdev = rpc->rpd; - struct aer_err_info *e_info = &rpc->e_info; + struct aer_err_info e_info; pci_rootport_aer_stats_incr(pdev, e_src); @@ -1179,83 +1169,57 @@ static void aer_isr_one_error(struct aer_rpc *rpc, * uncorrectable error being logged. Report correctable error first. */ if (e_src->status & PCI_ERR_ROOT_COR_RCV) { - e_info->id = ERR_COR_ID(e_src->id); - e_info->severity = AER_CORRECTABLE; + e_info.id = ERR_COR_ID(e_src->id); + e_info.severity = AER_CORRECTABLE; if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV) - e_info->multi_error_valid = 1; + e_info.multi_error_valid = 1; else - e_info->multi_error_valid = 0; - aer_print_port_info(pdev, e_info); + e_info.multi_error_valid = 0; + aer_print_port_info(pdev, &e_info); - if (find_source_device(pdev, e_info)) - aer_process_err_devices(e_info); + if (find_source_device(pdev, &e_info)) + aer_process_err_devices(&e_info); } if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) { - e_info->id = ERR_UNCOR_ID(e_src->id); + e_info.id = ERR_UNCOR_ID(e_src->id); if (e_src->status & PCI_ERR_ROOT_FATAL_RCV) - e_info->severity = AER_FATAL; + e_info.severity = AER_FATAL; else - e_info->severity = AER_NONFATAL; + e_info.severity = AER_NONFATAL; if (e_src->status & PCI_ERR_ROOT_MULTI_UNCOR_RCV) - e_info->multi_error_valid = 1; + e_info.multi_error_valid = 1; else - e_info->multi_error_valid = 0; + e_info.multi_error_valid = 0; - aer_print_port_info(pdev, e_info); + aer_print_port_info(pdev, &e_info); - if (find_source_device(pdev, e_info)) - aer_process_err_devices(e_info); + if (find_source_device(pdev, &e_info)) + aer_process_err_devices(&e_info); } } /** - * get_e_source - retrieve an error source - * @rpc: pointer to the root port which holds an error - * @e_src: pointer to store retrieved error source - * - * Return 1 if an error source is retrieved, otherwise 0. - * - * Invoked by DPC handler to consume an error. - */ -static int get_e_source(struct aer_rpc *rpc, struct aer_err_source *e_src) -{ - unsigned long flags; - - /* Lock access to Root error producer/consumer index */ - spin_lock_irqsave(&rpc->e_lock, flags); - if (rpc->prod_idx == rpc->cons_idx) { - spin_unlock_irqrestore(&rpc->e_lock, flags); - return 0; - } - - *e_src = rpc->e_sources[rpc->cons_idx]; - rpc->cons_idx++; - if (rpc->cons_idx == AER_ERROR_SOURCES_MAX) - rpc->cons_idx = 0; - spin_unlock_irqrestore(&rpc->e_lock, flags); - - return 1; -} - -/** * aer_isr - consume errors detected by root port * @work: definition of this work item * * Invoked, as DPC, when root port records new detected error */ -static void aer_isr(struct work_struct *work) +static irqreturn_t aer_isr(int irq, void *context) { - struct aer_rpc *rpc = container_of(work, struct aer_rpc, dpc_handler); + struct pcie_device *dev = (struct pcie_device *)context; + struct aer_rpc *rpc = get_service_data(dev); struct aer_err_source uninitialized_var(e_src); - mutex_lock(&rpc->rpc_mutex); - while (get_e_source(rpc, &e_src)) + if (kfifo_is_empty(&rpc->aer_fifo)) + return IRQ_NONE; + + while (kfifo_get(&rpc->aer_fifo, &e_src)) aer_isr_one_error(rpc, &e_src); - mutex_unlock(&rpc->rpc_mutex); + return IRQ_HANDLED; } /** @@ -1265,56 +1229,26 @@ static void aer_isr(struct work_struct *work) * * Invoked when Root Port detects AER messages. */ -irqreturn_t aer_irq(int irq, void *context) +static irqreturn_t aer_irq(int irq, void *context) { - unsigned int status, id; struct pcie_device *pdev = (struct pcie_device *)context; struct aer_rpc *rpc = get_service_data(pdev); - int next_prod_idx; - unsigned long flags; - int pos; - - pos = pdev->port->aer_cap; - /* - * Must lock access to Root Error Status Reg, Root Error ID Reg, - * and Root error producer/consumer index - */ - spin_lock_irqsave(&rpc->e_lock, flags); + struct pci_dev *rp = rpc->rpd; + struct aer_err_source e_src = {}; + int pos = rp->aer_cap; - /* Read error status */ - pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, &status); - if (!(status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV))) { - spin_unlock_irqrestore(&rpc->e_lock, flags); + pci_read_config_dword(rp, pos + PCI_ERR_ROOT_STATUS, &e_src.status); + if (!(e_src.status & (PCI_ERR_ROOT_UNCOR_RCV|PCI_ERR_ROOT_COR_RCV))) return IRQ_NONE; - } - /* Read error source and clear error status */ - pci_read_config_dword(pdev->port, pos + PCI_ERR_ROOT_ERR_SRC, &id); - pci_write_config_dword(pdev->port, pos + PCI_ERR_ROOT_STATUS, status); + pci_read_config_dword(rp, pos + PCI_ERR_ROOT_ERR_SRC, &e_src.id); + pci_write_config_dword(rp, pos + PCI_ERR_ROOT_STATUS, e_src.status); - /* Store error source for later DPC handler */ - next_prod_idx = rpc->prod_idx + 1; - if (next_prod_idx == AER_ERROR_SOURCES_MAX) - next_prod_idx = 0; - if (next_prod_idx == rpc->cons_idx) { - /* - * Error Storm Condition - possibly the same error occurred. - * Drop the error. - */ - spin_unlock_irqrestore(&rpc->e_lock, flags); + if (!kfifo_put(&rpc->aer_fifo, e_src)) return IRQ_HANDLED; - } - rpc->e_sources[rpc->prod_idx].status = status; - rpc->e_sources[rpc->prod_idx].id = id; - rpc->prod_idx = next_prod_idx; - spin_unlock_irqrestore(&rpc->e_lock, flags); - - /* Invoke DPC handler */ - schedule_work(&rpc->dpc_handler); - return IRQ_HANDLED; + return IRQ_WAKE_THREAD; } -EXPORT_SYMBOL_GPL(aer_irq); static int set_device_error_reporting(struct pci_dev *dev, void *data) { @@ -1423,33 +1357,6 @@ static void aer_disable_rootport(struct aer_rpc *rpc) } /** - * aer_alloc_rpc - allocate Root Port data structure - * @dev: pointer to the pcie_dev data structure - * - * Invoked when Root Port's AER service is loaded. - */ -static struct aer_rpc *aer_alloc_rpc(struct pcie_device *dev) -{ - struct aer_rpc *rpc; - - rpc = kzalloc(sizeof(struct aer_rpc), GFP_KERNEL); - if (!rpc) - return NULL; - - /* Initialize Root lock access, e_lock, to Root Error Status Reg */ - spin_lock_init(&rpc->e_lock); - - rpc->rpd = dev->port; - INIT_WORK(&rpc->dpc_handler, aer_isr); - mutex_init(&rpc->rpc_mutex); - - /* Use PCIe bus function to store rpc into PCIe device */ - set_service_data(dev, rpc); - - return rpc; -} - -/** * aer_remove - clean up resources * @dev: pointer to the pcie_dev data structure * @@ -1459,16 +1366,7 @@ static void aer_remove(struct pcie_device *dev) { struct aer_rpc *rpc = get_service_data(dev); - if (rpc) { - /* If register interrupt service, it must be free. */ - if (rpc->isr) - free_irq(dev->irq, dev); - - flush_work(&rpc->dpc_handler); - aer_disable_rootport(rpc); - kfree(rpc); - set_service_data(dev, NULL); - } + aer_disable_rootport(rpc); } /** @@ -1481,27 +1379,24 @@ static int aer_probe(struct pcie_device *dev) { int status; struct aer_rpc *rpc; - struct device *device = &dev->port->dev; + struct device *device = &dev->device; - /* Alloc rpc data structure */ - rpc = aer_alloc_rpc(dev); + rpc = devm_kzalloc(device, sizeof(struct aer_rpc), GFP_KERNEL); if (!rpc) { dev_printk(KERN_DEBUG, device, "alloc AER rpc failed\n"); - aer_remove(dev); return -ENOMEM; } + rpc->rpd = dev->port; + set_service_data(dev, rpc); - /* Request IRQ ISR */ - status = request_irq(dev->irq, aer_irq, IRQF_SHARED, "aerdrv", dev); + status = devm_request_threaded_irq(device, dev->irq, aer_irq, aer_isr, + IRQF_SHARED, "aerdrv", dev); if (status) { dev_printk(KERN_DEBUG, device, "request AER IRQ %d failed\n", dev->irq); - aer_remove(dev); return status; } - rpc->isr = 1; - aer_enable_rootport(rpc); dev_info(device, "AER enabled with IRQ %d\n", dev->irq); return 0; @@ -1526,7 +1421,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); - rc = pci_bridge_secondary_bus_reset(dev); + rc = pci_bus_error_reset(dev); pci_printk(KERN_DEBUG, dev, "Root Port link has been reset\n"); /* Clear Root Error Status */ @@ -1541,18 +1436,6 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } -/** - * aer_error_resume - clean up corresponding error status bits - * @dev: pointer to Root Port's pci_dev data structure - * - * Invoked by Port Bus driver during nonfatal recovery. - */ -static void aer_error_resume(struct pci_dev *dev) -{ - pci_aer_clear_device_status(dev); - pci_cleanup_aer_uncorrect_error_status(dev); -} - static struct pcie_port_service_driver aerdriver = { .name = "aer", .port_type = PCI_EXP_TYPE_ROOT_PORT, @@ -1560,7 +1443,6 @@ static struct pcie_port_service_driver aerdriver = { .probe = aer_probe, .remove = aer_remove, - .error_resume = aer_error_resume, .reset_link = aer_root_reset, }; @@ -1569,10 +1451,9 @@ static struct pcie_port_service_driver aerdriver = { * * Invoked when AER root service driver is loaded. */ -static int __init aer_service_init(void) +int __init pcie_aer_init(void) { if (!pci_aer_available() || aer_acpi_firmware_first()) return -ENXIO; return pcie_port_service_register(&aerdriver); } -device_initcall(aer_service_init); |