summaryrefslogtreecommitdiff
path: root/drivers/pci/pcie
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci/pcie')
-rw-r--r--drivers/pci/pcie/aer.c336
-rw-r--r--drivers/pci/pcie/aspm.c8
-rw-r--r--drivers/pci/pcie/dpc.c107
-rw-r--r--drivers/pci/pcie/err.c17
-rw-r--r--drivers/pci/pcie/portdrv_pci.c25
5 files changed, 321 insertions, 172 deletions
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index a2e88386af28..4e823ae051a7 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -31,26 +31,9 @@
#include "portdrv.h"
#define AER_ERROR_SOURCES_MAX 100
-#define AER_MAX_MULTI_ERR_DEVICES 5 /* Not likely to have more */
-struct aer_err_info {
- struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
- int error_dev_num;
-
- unsigned int id:16;
-
- unsigned int severity:2; /* 0:NONFATAL | 1:FATAL | 2:COR */
- unsigned int __pad1:5;
- unsigned int multi_error_valid:1;
-
- unsigned int first_error:5;
- unsigned int __pad2:2;
- unsigned int tlp_header_valid:1;
-
- unsigned int status; /* COR/UNCOR Error Status */
- unsigned int mask; /* COR/UNCOR Error Mask */
- struct aer_header_log_regs tlp; /* TLP Header */
-};
+#define AER_MAX_TYPEOF_COR_ERRS 16 /* as per PCI_ERR_COR_STATUS */
+#define AER_MAX_TYPEOF_UNCOR_ERRS 26 /* as per PCI_ERR_UNCOR_STATUS*/
struct aer_err_source {
unsigned int status;
@@ -76,6 +59,42 @@ struct aer_rpc {
*/
};
+/* AER stats for the device */
+struct aer_stats {
+
+ /*
+ * Fields for all AER capable devices. They indicate the errors
+ * "as seen by this device". Note that this may mean that if an
+ * end point is causing problems, the AER counters may increment
+ * at its link partner (e.g. root port) because the errors will be
+ * "seen" by the link partner and not the the problematic end point
+ * itself (which may report all counters as 0 as it never saw any
+ * problems).
+ */
+ /* Counters for different type of correctable errors */
+ u64 dev_cor_errs[AER_MAX_TYPEOF_COR_ERRS];
+ /* Counters for different type of fatal uncorrectable errors */
+ u64 dev_fatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
+ /* Counters for different type of nonfatal uncorrectable errors */
+ u64 dev_nonfatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS];
+ /* Total number of ERR_COR sent by this device */
+ u64 dev_total_cor_errs;
+ /* Total number of ERR_FATAL sent by this device */
+ u64 dev_total_fatal_errs;
+ /* Total number of ERR_NONFATAL sent by this device */
+ u64 dev_total_nonfatal_errs;
+
+ /*
+ * Fields for Root ports & root complex event collectors only, these
+ * indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL
+ * messages received by the root port / event collector, INCLUDING the
+ * ones that are generated internally (by the rootport itself)
+ */
+ u64 rootport_total_cor_errs;
+ u64 rootport_total_fatal_errs;
+ u64 rootport_total_nonfatal_errs;
+};
+
#define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \
PCI_ERR_UNC_ECRC| \
PCI_ERR_UNC_UNSUP| \
@@ -303,12 +322,13 @@ int pcie_aer_get_firmware_first(struct pci_dev *dev)
if (!pci_is_pcie(dev))
return 0;
+ if (pcie_ports_native)
+ return 0;
+
if (!dev->__aer_firmware_first_valid)
aer_set_firmware_first(dev);
return dev->__aer_firmware_first;
}
-#define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \
- PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE)
static bool aer_firmware_first;
@@ -323,6 +343,9 @@ bool aer_acpi_firmware_first(void)
.firmware_first = 0,
};
+ if (pcie_ports_native)
+ return false;
+
if (!parsed) {
apei_hest_parse(aer_hest_parse, &info);
aer_firmware_first = info.firmware_first;
@@ -357,16 +380,30 @@ int pci_disable_pcie_error_reporting(struct pci_dev *dev)
}
EXPORT_SYMBOL_GPL(pci_disable_pcie_error_reporting);
+void pci_aer_clear_device_status(struct pci_dev *dev)
+{
+ u16 sta;
+
+ pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &sta);
+ pcie_capability_write_word(dev, PCI_EXP_DEVSTA, sta);
+}
+
int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
{
int pos;
- u32 status;
+ u32 status, sev;
pos = dev->aer_cap;
if (!pos)
return -EIO;
+ if (pcie_aer_get_firmware_first(dev))
+ return -EIO;
+
+ /* Clear status bits for ERR_NONFATAL errors only */
pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
+ pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
+ status &= ~sev;
if (status)
pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
@@ -374,6 +411,26 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
}
EXPORT_SYMBOL_GPL(pci_cleanup_aer_uncorrect_error_status);
+void pci_aer_clear_fatal_status(struct pci_dev *dev)
+{
+ int pos;
+ u32 status, sev;
+
+ pos = dev->aer_cap;
+ if (!pos)
+ return;
+
+ if (pcie_aer_get_firmware_first(dev))
+ return;
+
+ /* Clear status bits for ERR_FATAL errors only */
+ pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
+ pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev);
+ status &= sev;
+ if (status)
+ pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
+}
+
int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
{
int pos;
@@ -387,6 +444,9 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
if (!pos)
return -EIO;
+ if (pcie_aer_get_firmware_first(dev))
+ return -EIO;
+
port_type = pci_pcie_type(dev);
if (port_type == PCI_EXP_TYPE_ROOT_PORT) {
pci_read_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, &status);
@@ -402,10 +462,20 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev)
return 0;
}
-int pci_aer_init(struct pci_dev *dev)
+void pci_aer_init(struct pci_dev *dev)
{
dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR);
- return pci_cleanup_aer_error_status_regs(dev);
+
+ if (dev->aer_cap)
+ dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL);
+
+ pci_cleanup_aer_error_status_regs(dev);
+}
+
+void pci_aer_exit(struct pci_dev *dev)
+{
+ kfree(dev->aer_stats);
+ dev->aer_stats = NULL;
}
#define AER_AGENT_RECEIVER 0
@@ -458,52 +528,52 @@ static const char *aer_error_layer[] = {
"Transaction Layer"
};
-static const char *aer_correctable_error_string[] = {
- "Receiver Error", /* Bit Position 0 */
+static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = {
+ "RxErr", /* Bit Position 0 */
NULL,
NULL,
NULL,
NULL,
NULL,
- "Bad TLP", /* Bit Position 6 */
- "Bad DLLP", /* Bit Position 7 */
- "RELAY_NUM Rollover", /* Bit Position 8 */
+ "BadTLP", /* Bit Position 6 */
+ "BadDLLP", /* Bit Position 7 */
+ "Rollover", /* Bit Position 8 */
NULL,
NULL,
NULL,
- "Replay Timer Timeout", /* Bit Position 12 */
- "Advisory Non-Fatal", /* Bit Position 13 */
- "Corrected Internal Error", /* Bit Position 14 */
- "Header Log Overflow", /* Bit Position 15 */
+ "Timeout", /* Bit Position 12 */
+ "NonFatalErr", /* Bit Position 13 */
+ "CorrIntErr", /* Bit Position 14 */
+ "HeaderOF", /* Bit Position 15 */
};
-static const char *aer_uncorrectable_error_string[] = {
+static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = {
"Undefined", /* Bit Position 0 */
NULL,
NULL,
NULL,
- "Data Link Protocol", /* Bit Position 4 */
- "Surprise Down Error", /* Bit Position 5 */
+ "DLP", /* Bit Position 4 */
+ "SDES", /* Bit Position 5 */
NULL,
NULL,
NULL,
NULL,
NULL,
NULL,
- "Poisoned TLP", /* Bit Position 12 */
- "Flow Control Protocol", /* Bit Position 13 */
- "Completion Timeout", /* Bit Position 14 */
- "Completer Abort", /* Bit Position 15 */
- "Unexpected Completion", /* Bit Position 16 */
- "Receiver Overflow", /* Bit Position 17 */
- "Malformed TLP", /* Bit Position 18 */
+ "TLP", /* Bit Position 12 */
+ "FCP", /* Bit Position 13 */
+ "CmpltTO", /* Bit Position 14 */
+ "CmpltAbrt", /* Bit Position 15 */
+ "UnxCmplt", /* Bit Position 16 */
+ "RxOF", /* Bit Position 17 */
+ "MalfTLP", /* Bit Position 18 */
"ECRC", /* Bit Position 19 */
- "Unsupported Request", /* Bit Position 20 */
- "ACS Violation", /* Bit Position 21 */
- "Uncorrectable Internal Error", /* Bit Position 22 */
- "MC Blocked TLP", /* Bit Position 23 */
- "AtomicOp Egress Blocked", /* Bit Position 24 */
- "TLP Prefix Blocked Error", /* Bit Position 25 */
+ "UnsupReq", /* Bit Position 20 */
+ "ACSViol", /* Bit Position 21 */
+ "UncorrIntErr", /* Bit Position 22 */
+ "BlockedTLP", /* Bit Position 23 */
+ "AtomicOpBlocked", /* Bit Position 24 */
+ "TLPBlockedErr", /* Bit Position 25 */
};
static const char *aer_agent_string[] = {
@@ -513,6 +583,144 @@ static const char *aer_agent_string[] = {
"Transmitter ID"
};
+#define aer_stats_dev_attr(name, stats_array, strings_array, \
+ total_string, total_field) \
+ static ssize_t \
+ name##_show(struct device *dev, struct device_attribute *attr, \
+ char *buf) \
+{ \
+ unsigned int i; \
+ char *str = buf; \
+ struct pci_dev *pdev = to_pci_dev(dev); \
+ u64 *stats = pdev->aer_stats->stats_array; \
+ \
+ for (i = 0; i < ARRAY_SIZE(strings_array); i++) { \
+ if (strings_array[i]) \
+ str += sprintf(str, "%s %llu\n", \
+ strings_array[i], stats[i]); \
+ else if (stats[i]) \
+ str += sprintf(str, #stats_array "_bit[%d] %llu\n",\
+ i, stats[i]); \
+ } \
+ str += sprintf(str, "TOTAL_%s %llu\n", total_string, \
+ pdev->aer_stats->total_field); \
+ return str-buf; \
+} \
+static DEVICE_ATTR_RO(name)
+
+aer_stats_dev_attr(aer_dev_correctable, dev_cor_errs,
+ aer_correctable_error_string, "ERR_COR",
+ dev_total_cor_errs);
+aer_stats_dev_attr(aer_dev_fatal, dev_fatal_errs,
+ aer_uncorrectable_error_string, "ERR_FATAL",
+ dev_total_fatal_errs);
+aer_stats_dev_attr(aer_dev_nonfatal, dev_nonfatal_errs,
+ aer_uncorrectable_error_string, "ERR_NONFATAL",
+ dev_total_nonfatal_errs);
+
+#define aer_stats_rootport_attr(name, field) \
+ static ssize_t \
+ name##_show(struct device *dev, struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct pci_dev *pdev = to_pci_dev(dev); \
+ return sprintf(buf, "%llu\n", pdev->aer_stats->field); \
+} \
+static DEVICE_ATTR_RO(name)
+
+aer_stats_rootport_attr(aer_rootport_total_err_cor,
+ rootport_total_cor_errs);
+aer_stats_rootport_attr(aer_rootport_total_err_fatal,
+ rootport_total_fatal_errs);
+aer_stats_rootport_attr(aer_rootport_total_err_nonfatal,
+ rootport_total_nonfatal_errs);
+
+static struct attribute *aer_stats_attrs[] __ro_after_init = {
+ &dev_attr_aer_dev_correctable.attr,
+ &dev_attr_aer_dev_fatal.attr,
+ &dev_attr_aer_dev_nonfatal.attr,
+ &dev_attr_aer_rootport_total_err_cor.attr,
+ &dev_attr_aer_rootport_total_err_fatal.attr,
+ &dev_attr_aer_rootport_total_err_nonfatal.attr,
+ NULL
+};
+
+static umode_t aer_stats_attrs_are_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ if (!pdev->aer_stats)
+ return 0;
+
+ if ((a == &dev_attr_aer_rootport_total_err_cor.attr ||
+ a == &dev_attr_aer_rootport_total_err_fatal.attr ||
+ a == &dev_attr_aer_rootport_total_err_nonfatal.attr) &&
+ pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT)
+ return 0;
+
+ return a->mode;
+}
+
+const struct attribute_group aer_stats_attr_group = {
+ .attrs = aer_stats_attrs,
+ .is_visible = aer_stats_attrs_are_visible,
+};
+
+static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
+ struct aer_err_info *info)
+{
+ int status, i, max = -1;
+ u64 *counter = NULL;
+ struct aer_stats *aer_stats = pdev->aer_stats;
+
+ if (!aer_stats)
+ return;
+
+ switch (info->severity) {
+ case AER_CORRECTABLE:
+ aer_stats->dev_total_cor_errs++;
+ counter = &aer_stats->dev_cor_errs[0];
+ max = AER_MAX_TYPEOF_COR_ERRS;
+ break;
+ case AER_NONFATAL:
+ aer_stats->dev_total_nonfatal_errs++;
+ counter = &aer_stats->dev_nonfatal_errs[0];
+ max = AER_MAX_TYPEOF_UNCOR_ERRS;
+ break;
+ case AER_FATAL:
+ aer_stats->dev_total_fatal_errs++;
+ counter = &aer_stats->dev_fatal_errs[0];
+ max = AER_MAX_TYPEOF_UNCOR_ERRS;
+ break;
+ }
+
+ status = (info->status & ~info->mask);
+ for (i = 0; i < max; i++)
+ if (status & (1 << i))
+ counter[i]++;
+}
+
+static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
+ struct aer_err_source *e_src)
+{
+ struct aer_stats *aer_stats = pdev->aer_stats;
+
+ if (!aer_stats)
+ return;
+
+ if (e_src->status & PCI_ERR_ROOT_COR_RCV)
+ aer_stats->rootport_total_cor_errs++;
+
+ if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
+ if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
+ aer_stats->rootport_total_fatal_errs++;
+ else
+ aer_stats->rootport_total_nonfatal_errs++;
+ }
+}
+
static void __print_tlp_header(struct pci_dev *dev,
struct aer_header_log_regs *t)
{
@@ -545,9 +753,10 @@ static void __aer_print_error(struct pci_dev *dev,
pci_err(dev, " [%2d] Unknown Error Bit%s\n",
i, info->first_error == i ? " (First)" : "");
}
+ pci_dev_aer_stats_incr(dev, info);
}
-static void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
+void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
{
int layer, agent;
int id = ((dev->bus->number << 8) | dev->devfn);
@@ -799,6 +1008,7 @@ static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
if (pos)
pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS,
info->status);
+ pci_aer_clear_device_status(dev);
} else if (info->severity == AER_NONFATAL)
pcie_do_nonfatal_recovery(dev);
else if (info->severity == AER_FATAL)
@@ -876,7 +1086,7 @@ EXPORT_SYMBOL_GPL(aer_recover_queue);
#endif
/**
- * get_device_error_info - read error status from dev and store it to info
+ * aer_get_device_error_info - read error status from dev and store it to info
* @dev: pointer to the device expected to have a error record
* @info: pointer to structure to store the error record
*
@@ -884,7 +1094,7 @@ EXPORT_SYMBOL_GPL(aer_recover_queue);
*
* Note that @info is reused among all error devices. Clear fields properly.
*/
-static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
+int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
{
int pos, temp;
@@ -942,11 +1152,11 @@ static inline void aer_process_err_devices(struct aer_err_info *e_info)
/* Report all before handle them, not to lost records by reset etc. */
for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
- if (get_device_error_info(e_info->dev[i], e_info))
+ if (aer_get_device_error_info(e_info->dev[i], e_info))
aer_print_error(e_info->dev[i], e_info);
}
for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
- if (get_device_error_info(e_info->dev[i], e_info))
+ if (aer_get_device_error_info(e_info->dev[i], e_info))
handle_error_source(e_info->dev[i], e_info);
}
}
@@ -962,6 +1172,8 @@ static void aer_isr_one_error(struct aer_rpc *rpc,
struct pci_dev *pdev = rpc->rpd;
struct aer_err_info *e_info = &rpc->e_info;
+ pci_rootport_aer_stats_incr(pdev, e_src);
+
/*
* There is a possibility that both correctable error and
* uncorrectable error being logged. Report correctable error first.
@@ -1336,20 +1548,8 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
*/
static void aer_error_resume(struct pci_dev *dev)
{
- int pos;
- u32 status, mask;
- u16 reg16;
-
- /* Clean up Root device status */
- pcie_capability_read_word(dev, PCI_EXP_DEVSTA, &reg16);
- pcie_capability_write_word(dev, PCI_EXP_DEVSTA, reg16);
-
- /* Clean AER Root Error Status */
- pos = dev->aer_cap;
- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
- pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
- status &= ~mask; /* Clear corresponding nonfatal bits */
- pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
+ pci_aer_clear_device_status(dev);
+ pci_cleanup_aer_uncorrect_error_status(dev);
}
static struct pcie_port_service_driver aerdriver = {
diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c
index c687c817b47d..5326916715d2 100644
--- a/drivers/pci/pcie/aspm.c
+++ b/drivers/pci/pcie/aspm.c
@@ -1127,11 +1127,9 @@ static int pcie_aspm_set_policy(const char *val,
if (aspm_disabled)
return -EPERM;
- for (i = 0; i < ARRAY_SIZE(policy_str); i++)
- if (!strncmp(val, policy_str[i], strlen(policy_str[i])))
- break;
- if (i >= ARRAY_SIZE(policy_str))
- return -EINVAL;
+ i = sysfs_match_string(policy_str, val);
+ if (i < 0)
+ return i;
if (i == aspm_policy)
return 0;
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index 921ed979109d..f03279fc87cd 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -6,6 +6,7 @@
* Copyright (C) 2016 Intel Corp.
*/
+#include <linux/aer.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/init.h>
@@ -16,10 +17,8 @@
struct dpc_dev {
struct pcie_device *dev;
- struct work_struct work;
u16 cap_pos;
bool rp_extensions;
- u32 rp_pio_status;
u8 rp_log_size;
};
@@ -65,19 +64,13 @@ static int dpc_wait_rp_inactive(struct dpc_dev *dpc)
return 0;
}
-static void dpc_wait_link_inactive(struct dpc_dev *dpc)
-{
- struct pci_dev *pdev = dpc->dev->port;
-
- pcie_wait_for_link(pdev, false);
-}
-
static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
{
struct dpc_dev *dpc;
struct pcie_device *pciedev;
struct device *devdpc;
- u16 cap, ctl;
+
+ u16 cap;
/*
* DPC disables the Link automatically in hardware, so it has
@@ -92,34 +85,17 @@ static pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
* Wait until the Link is inactive, then clear DPC Trigger Status
* to allow the Port to leave DPC.
*/
- dpc_wait_link_inactive(dpc);
+ pcie_wait_for_link(pdev, false);
if (dpc->rp_extensions && dpc_wait_rp_inactive(dpc))
return PCI_ERS_RESULT_DISCONNECT;
- if (dpc->rp_extensions && dpc->rp_pio_status) {
- pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS,
- dpc->rp_pio_status);
- dpc->rp_pio_status = 0;
- }
pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
PCI_EXP_DPC_STATUS_TRIGGER);
- pci_read_config_word(pdev, cap + PCI_EXP_DPC_CTL, &ctl);
- pci_write_config_word(pdev, cap + PCI_EXP_DPC_CTL,
- ctl | PCI_EXP_DPC_CTL_INT_EN);
-
return PCI_ERS_RESULT_RECOVERED;
}
-static void dpc_work(struct work_struct *work)
-{
- struct dpc_dev *dpc = container_of(work, struct dpc_dev, work);
- struct pci_dev *pdev = dpc->dev->port;
-
- /* We configure DPC so it only triggers on ERR_FATAL */
- pcie_do_fatal_recovery(pdev, PCIE_PORT_SERVICE_DPC);
-}
static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
{
@@ -134,8 +110,6 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
dev_err(dev, "rp_pio_status: %#010x, rp_pio_mask: %#010x\n",
status, mask);
- dpc->rp_pio_status = status;
-
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_SEVERITY, &sev);
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_SYSERROR, &syserr);
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_EXCEPTION, &exc);
@@ -146,15 +120,14 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &dpc_status);
first_error = (dpc_status & 0x1f00) >> 8;
- status &= ~mask;
for (i = 0; i < ARRAY_SIZE(rp_pio_error_string); i++) {
- if (status & (1 << i))
+ if ((status & ~mask) & (1 << i))
dev_err(dev, "[%2d] %s%s\n", i, rp_pio_error_string[i],
first_error == i ? " (First)" : "");
}
if (dpc->rp_log_size < 4)
- return;
+ goto clear_status;
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG,
&dw0);
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG + 4,
@@ -167,7 +140,7 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
dw0, dw1, dw2, dw3);
if (dpc->rp_log_size < 5)
- return;
+ goto clear_status;
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG, &log);
dev_err(dev, "RP PIO ImpSpec Log %#010x\n", log);
@@ -176,43 +149,26 @@ static void dpc_process_rp_pio_error(struct dpc_dev *dpc)
cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, &prefix);
dev_err(dev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix);
}
+ clear_status:
+ pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status);
}
-static irqreturn_t dpc_irq(int irq, void *context)
+static irqreturn_t dpc_handler(int irq, void *context)
{
- struct dpc_dev *dpc = (struct dpc_dev *)context;
+ struct aer_err_info info;
+ struct dpc_dev *dpc = context;
struct pci_dev *pdev = dpc->dev->port;
struct device *dev = &dpc->dev->device;
- u16 cap = dpc->cap_pos, ctl, status, source, reason, ext_reason;
-
- pci_read_config_word(pdev, cap + PCI_EXP_DPC_CTL, &ctl);
-
- if (!(ctl & PCI_EXP_DPC_CTL_INT_EN) || ctl == (u16)(~0))
- return IRQ_NONE;
+ u16 cap = dpc->cap_pos, status, source, reason, ext_reason;
pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
-
- if (!(status & PCI_EXP_DPC_STATUS_INTERRUPT))
- return IRQ_NONE;
-
- if (!(status & PCI_EXP_DPC_STATUS_TRIGGER)) {
- pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
- PCI_EXP_DPC_STATUS_INTERRUPT);
- return IRQ_HANDLED;
- }
-
- pci_write_config_word(pdev, cap + PCI_EXP_DPC_CTL,
- ctl & ~PCI_EXP_DPC_CTL_INT_EN);
-
- pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID,
- &source);
+ pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, &source);
dev_info(dev, "DPC containment event, status:%#06x source:%#06x\n",
- status, source);
+ status, source);
reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN) >> 1;
ext_reason = (status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT) >> 5;
-
dev_warn(dev, "DPC %s detected, remove downstream devices\n",
(reason == 0) ? "unmasked uncorrectable error" :
(reason == 1) ? "ERR_NONFATAL" :
@@ -220,15 +176,36 @@ static irqreturn_t dpc_irq(int irq, void *context)
(ext_reason == 0) ? "RP PIO error" :
(ext_reason == 1) ? "software trigger" :
"reserved error");
+
/* show RP PIO error detail information */
if (dpc->rp_extensions && reason == 3 && ext_reason == 0)
dpc_process_rp_pio_error(dpc);
+ else if (reason == 0 && aer_get_device_error_info(pdev, &info)) {
+ aer_print_error(pdev, &info);
+ pci_cleanup_aer_uncorrect_error_status(pdev);
+ }
- pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
- PCI_EXP_DPC_STATUS_INTERRUPT);
+ /* We configure DPC so it only triggers on ERR_FATAL */
+ pcie_do_fatal_recovery(pdev, PCIE_PORT_SERVICE_DPC);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t dpc_irq(int irq, void *context)
+{
+ struct dpc_dev *dpc = (struct dpc_dev *)context;
+ struct pci_dev *pdev = dpc->dev->port;
+ u16 cap = dpc->cap_pos, status;
+
+ pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status);
- schedule_work(&dpc->work);
+ if (!(status & PCI_EXP_DPC_STATUS_INTERRUPT) || status == (u16)(~0))
+ return IRQ_NONE;
+ pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS,
+ PCI_EXP_DPC_STATUS_INTERRUPT);
+ if (status & PCI_EXP_DPC_STATUS_TRIGGER)
+ return IRQ_WAKE_THREAD;
return IRQ_HANDLED;
}
@@ -250,11 +227,11 @@ static int dpc_probe(struct pcie_device *dev)
dpc->cap_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC);
dpc->dev = dev;
- INIT_WORK(&dpc->work, dpc_work);
set_service_data(dev, dpc);
- status = devm_request_irq(device, dev->irq, dpc_irq, IRQF_SHARED,
- "pcie-dpc", dpc);
+ status = devm_request_threaded_irq(device, dev->irq, dpc_irq,
+ dpc_handler, IRQF_SHARED,
+ "pcie-dpc", dpc);
if (status) {
dev_warn(device, "request IRQ%d failed: %d\n", dev->irq,
status);
diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c
index f7ce0cb0b0b7..674984a9277a 100644
--- a/drivers/pci/pcie/err.c
+++ b/drivers/pci/pcie/err.c
@@ -252,6 +252,7 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
dev->error_state = state;
pci_walk_bus(dev->subordinate, cb, &result_data);
if (cb == report_resume) {
+ pci_aer_clear_device_status(dev);
pci_cleanup_aer_uncorrect_error_status(dev);
dev->error_state = pci_channel_io_normal;
}
@@ -259,15 +260,10 @@ static pci_ers_result_t broadcast_error_message(struct pci_dev *dev,
/*
* If the error is reported by an end point, we think this
* error is related to the upstream link of the end point.
+ * The error is non fatal so the bus is ok; just invoke
+ * the callback for the function that logged the error.
*/
- if (state == pci_channel_io_normal)
- /*
- * the error is non fatal so the bus is ok, just invoke
- * the callback for the function that logged the error.
- */
- cb(dev, &result_data);
- else
- pci_walk_bus(dev->bus, cb, &result_data);
+ cb(dev, &result_data);
}
return result_data.result;
@@ -295,6 +291,7 @@ void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service)
parent = udev->subordinate;
pci_lock_rescan_remove();
+ pci_dev_get(dev);
list_for_each_entry_safe_reverse(pdev, temp, &parent->devices,
bus_list) {
pci_dev_get(pdev);
@@ -316,7 +313,8 @@ void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service)
* do error recovery on all subordinates of the bridge instead
* of the bridge and clear the error status of the bridge.
*/
- pci_cleanup_aer_uncorrect_error_status(dev);
+ pci_aer_clear_fatal_status(dev);
+ pci_aer_clear_device_status(dev);
}
if (result == PCI_ERS_RESULT_RECOVERED) {
@@ -328,6 +326,7 @@ void pcie_do_fatal_recovery(struct pci_dev *dev, u32 service)
pci_info(dev, "Device recovery from fatal error failed\n");
}
+ pci_dev_put(dev);
pci_unlock_rescan_remove();
}
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
index 5e0f02c68faa..eef22dc29140 100644
--- a/drivers/pci/pcie/portdrv_pci.c
+++ b/drivers/pci/pcie/portdrv_pci.c
@@ -42,17 +42,6 @@ __setup("pcie_ports=", pcie_port_setup);
/* global data */
-static int pcie_portdrv_restore_config(struct pci_dev *dev)
-{
- int retval;
-
- retval = pci_enable_device(dev);
- if (retval)
- return retval;
- pci_set_master(dev);
- return 0;
-}
-
#ifdef CONFIG_PM
static int pcie_port_runtime_suspend(struct device *dev)
{
@@ -162,19 +151,6 @@ static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev)
return PCI_ERS_RESULT_RECOVERED;
}
-static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev)
-{
- /* If fatal, restore cfg space for possible link reset at upstream */
- if (dev->error_state == pci_channel_io_frozen) {
- dev->state_saved = true;
- pci_restore_state(dev);
- pcie_portdrv_restore_config(dev);
- pci_enable_pcie_error_reporting(dev);
- }
-
- return PCI_ERS_RESULT_RECOVERED;
-}
-
static int resume_iter(struct device *device, void *data)
{
struct pcie_device *pcie_device;
@@ -210,7 +186,6 @@ static const struct pci_device_id port_pci_ids[] = { {
static const struct pci_error_handlers pcie_portdrv_err_handler = {
.error_detected = pcie_portdrv_error_detected,
.mmio_enabled = pcie_portdrv_mmio_enabled,
- .slot_reset = pcie_portdrv_slot_reset,
.resume = pcie_portdrv_err_resume,
};