summaryrefslogtreecommitdiff
path: root/drivers/edac
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig5
-rw-r--r--drivers/edac/altera_edac.c412
-rw-r--r--drivers/edac/altera_edac.h128
-rw-r--r--drivers/edac/amd64_edac.c129
-rw-r--r--drivers/edac/amd64_edac.h2
-rw-r--r--drivers/edac/edac_mc.c2
-rw-r--r--drivers/edac/edac_mc_sysfs.c3
-rw-r--r--drivers/edac/i7core_edac.c83
-rw-r--r--drivers/edac/ie31200_edac.c121
-rw-r--r--drivers/edac/mce_amd.c9
-rw-r--r--drivers/edac/sb_edac.c254
11 files changed, 685 insertions, 463 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 37755e63cc28..6ca7474baf4a 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -378,12 +378,11 @@ config EDAC_ALTERA
config EDAC_ALTERA_L2C
bool "Altera L2 Cache ECC"
- depends on EDAC_ALTERA=y
- select CACHE_L2X0
+ depends on EDAC_ALTERA=y && CACHE_L2X0
help
Support for error detection and correction on the
Altera L2 cache Memory for Altera SoCs. This option
- requires L2 cache so it will force that selection.
+ requires L2 cache.
config EDAC_ALTERA_OCRAM
bool "Altera On-Chip RAM ECC"
diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
index 63e42098726d..5b4d223d6d68 100644
--- a/drivers/edac/altera_edac.c
+++ b/drivers/edac/altera_edac.c
@@ -24,6 +24,7 @@
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/mfd/syscon.h>
+#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
@@ -78,27 +79,6 @@ static const struct altr_sdram_prv_data a10_data = {
.ue_set_mask = A10_DIAGINT_TDERRA_MASK,
};
-/************************** EDAC Device Defines **************************/
-
-/* OCRAM ECC Management Group Defines */
-#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET 0x04
-#define ALTR_OCR_ECC_EN BIT(0)
-#define ALTR_OCR_ECC_INJS BIT(1)
-#define ALTR_OCR_ECC_INJD BIT(2)
-#define ALTR_OCR_ECC_SERR BIT(3)
-#define ALTR_OCR_ECC_DERR BIT(4)
-
-/* L2 ECC Management Group Defines */
-#define ALTR_MAN_GRP_L2_ECC_OFFSET 0x00
-#define ALTR_L2_ECC_EN BIT(0)
-#define ALTR_L2_ECC_INJS BIT(1)
-#define ALTR_L2_ECC_INJD BIT(2)
-
-#define ALTR_UE_TRIGGER_CHAR 'U' /* Trigger for UE */
-#define ALTR_TRIGGER_READ_WRD_CNT 32 /* Line size x 4 */
-#define ALTR_TRIG_OCRAM_BYTE_SIZE 128 /* Line size x 4 */
-#define ALTR_TRIG_L2C_BYTE_SIZE 4096 /* Full Page */
-
/*********************** EDAC Memory Controller Functions ****************/
/* The SDRAM controller uses the EDAC Memory Controller framework. */
@@ -252,8 +232,8 @@ static unsigned long get_total_mem(void)
}
static const struct of_device_id altr_sdram_ctrl_of_match[] = {
- { .compatible = "altr,sdram-edac", .data = (void *)&c5_data},
- { .compatible = "altr,sdram-edac-a10", .data = (void *)&a10_data},
+ { .compatible = "altr,sdram-edac", .data = &c5_data},
+ { .compatible = "altr,sdram-edac-a10", .data = &a10_data},
{},
};
MODULE_DEVICE_TABLE(of, altr_sdram_ctrl_of_match);
@@ -570,28 +550,8 @@ module_platform_driver(altr_edac_driver);
const struct edac_device_prv_data ocramecc_data;
const struct edac_device_prv_data l2ecc_data;
-
-struct edac_device_prv_data {
- int (*setup)(struct platform_device *pdev, void __iomem *base);
- int ce_clear_mask;
- int ue_clear_mask;
- char dbgfs_name[20];
- void * (*alloc_mem)(size_t size, void **other);
- void (*free_mem)(void *p, size_t size, void *other);
- int ecc_enable_mask;
- int ce_set_mask;
- int ue_set_mask;
- int trig_alloc_sz;
-};
-
-struct altr_edac_device_dev {
- void __iomem *base;
- int sb_irq;
- int db_irq;
- const struct edac_device_prv_data *data;
- struct dentry *debugfs_dir;
- char *edac_dev_name;
-};
+const struct edac_device_prv_data a10_ocramecc_data;
+const struct edac_device_prv_data a10_l2ecc_data;
static irqreturn_t altr_edac_device_handler(int irq, void *dev_id)
{
@@ -665,8 +625,9 @@ static ssize_t altr_edac_device_trig(struct file *file,
if (ACCESS_ONCE(ptemp[i]))
result = -1;
/* Toggle Error bit (it is latched), leave ECC enabled */
- writel(error_mask, drvdata->base);
- writel(priv->ecc_enable_mask, drvdata->base);
+ writel(error_mask, (drvdata->base + priv->set_err_ofst));
+ writel(priv->ecc_enable_mask, (drvdata->base +
+ priv->set_err_ofst));
ptemp[i] = i;
}
/* Ensure it has been written out */
@@ -694,6 +655,16 @@ static const struct file_operations altr_edac_device_inject_fops = {
.llseek = generic_file_llseek,
};
+static ssize_t altr_edac_a10_device_trig(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos);
+
+static const struct file_operations altr_edac_a10_device_inject_fops = {
+ .open = simple_open,
+ .write = altr_edac_a10_device_trig,
+ .llseek = generic_file_llseek,
+};
+
static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci,
const struct edac_device_prv_data *priv)
{
@@ -708,17 +679,18 @@ static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci,
if (!edac_debugfs_create_file(priv->dbgfs_name, S_IWUSR,
drvdata->debugfs_dir, edac_dci,
- &altr_edac_device_inject_fops))
+ priv->inject_fops))
debugfs_remove_recursive(drvdata->debugfs_dir);
}
static const struct of_device_id altr_edac_device_of_match[] = {
#ifdef CONFIG_EDAC_ALTERA_L2C
- { .compatible = "altr,socfpga-l2-ecc", .data = (void *)&l2ecc_data },
+ { .compatible = "altr,socfpga-l2-ecc", .data = &l2ecc_data },
+ { .compatible = "altr,socfpga-a10-l2-ecc", .data = &a10_l2ecc_data },
#endif
#ifdef CONFIG_EDAC_ALTERA_OCRAM
- { .compatible = "altr,socfpga-ocram-ecc",
- .data = (void *)&ocramecc_data },
+ { .compatible = "altr,socfpga-ocram-ecc", .data = &ocramecc_data },
+ { .compatible = "altr,socfpga-a10-ocram-ecc", .data = &a10_ocramecc_data },
#endif
{},
};
@@ -789,7 +761,7 @@ static int altr_edac_device_probe(struct platform_device *pdev)
/* Check specific dependencies for the module */
if (drvdata->data->setup) {
- res = drvdata->data->setup(pdev, drvdata->base);
+ res = drvdata->data->setup(drvdata);
if (res)
goto fail1;
}
@@ -856,6 +828,25 @@ module_platform_driver(altr_edac_device_driver);
/*********************** OCRAM EDAC Device Functions *********************/
#ifdef CONFIG_EDAC_ALTERA_OCRAM
+/*
+ * Test for memory's ECC dependencies upon entry because platform specific
+ * startup should have initialized the memory and enabled the ECC.
+ * Can't turn on ECC here because accessing un-initialized memory will
+ * cause CE/UE errors possibly causing an ABORT.
+ */
+static int altr_check_ecc_deps(struct altr_edac_device_dev *device)
+{
+ void __iomem *base = device->base;
+ const struct edac_device_prv_data *prv = device->data;
+
+ if (readl(base + prv->ecc_en_ofst) & prv->ecc_enable_mask)
+ return 0;
+
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "%s: No ECC present or ECC disabled.\n",
+ device->edac_dev_name);
+ return -ENODEV;
+}
static void *ocram_alloc_mem(size_t size, void **other)
{
@@ -891,36 +882,53 @@ static void ocram_free_mem(void *p, size_t size, void *other)
gen_pool_free((struct gen_pool *)other, (u32)p, size);
}
-/*
- * altr_ocram_check_deps()
- * Test for OCRAM cache ECC dependencies upon entry because
- * platform specific startup should have initialized the
- * On-Chip RAM memory and enabled the ECC.
- * Can't turn on ECC here because accessing un-initialized
- * memory will cause CE/UE errors possibly causing an ABORT.
- */
-static int altr_ocram_check_deps(struct platform_device *pdev,
- void __iomem *base)
+static irqreturn_t altr_edac_a10_ecc_irq(struct altr_edac_device_dev *dci,
+ bool sberr)
{
- if (readl(base) & ALTR_OCR_ECC_EN)
- return 0;
+ void __iomem *base = dci->base;
- edac_printk(KERN_ERR, EDAC_DEVICE,
- "OCRAM: No ECC present or ECC disabled.\n");
- return -ENODEV;
+ if (sberr) {
+ writel(ALTR_A10_ECC_SERRPENA,
+ base + ALTR_A10_ECC_INTSTAT_OFST);
+ edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name);
+ } else {
+ writel(ALTR_A10_ECC_DERRPENA,
+ base + ALTR_A10_ECC_INTSTAT_OFST);
+ edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name);
+ panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
+ }
+ return IRQ_HANDLED;
}
const struct edac_device_prv_data ocramecc_data = {
- .setup = altr_ocram_check_deps,
+ .setup = altr_check_ecc_deps,
.ce_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_SERR),
.ue_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_DERR),
.dbgfs_name = "altr_ocram_trigger",
.alloc_mem = ocram_alloc_mem,
.free_mem = ocram_free_mem,
.ecc_enable_mask = ALTR_OCR_ECC_EN,
+ .ecc_en_ofst = ALTR_OCR_ECC_REG_OFFSET,
.ce_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJS),
.ue_set_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_INJD),
+ .set_err_ofst = ALTR_OCR_ECC_REG_OFFSET,
.trig_alloc_sz = ALTR_TRIG_OCRAM_BYTE_SIZE,
+ .inject_fops = &altr_edac_device_inject_fops,
+};
+
+const struct edac_device_prv_data a10_ocramecc_data = {
+ .setup = altr_check_ecc_deps,
+ .ce_clear_mask = ALTR_A10_ECC_SERRPENA,
+ .ue_clear_mask = ALTR_A10_ECC_DERRPENA,
+ .irq_status_mask = A10_SYSMGR_ECC_INTSTAT_OCRAM,
+ .dbgfs_name = "altr_ocram_trigger",
+ .ecc_enable_mask = ALTR_A10_OCRAM_ECC_EN_CTL,
+ .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST,
+ .ce_set_mask = ALTR_A10_ECC_TSERRA,
+ .ue_set_mask = ALTR_A10_ECC_TDERRA,
+ .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST,
+ .ecc_irq_handler = altr_edac_a10_ecc_irq,
+ .inject_fops = &altr_edac_a10_device_inject_fops,
};
#endif /* CONFIG_EDAC_ALTERA_OCRAM */
@@ -966,10 +974,13 @@ static void l2_free_mem(void *p, size_t size, void *other)
* Bail if ECC is not enabled.
* Note that L2 Cache Enable is forced at build time.
*/
-static int altr_l2_check_deps(struct platform_device *pdev,
- void __iomem *base)
+static int altr_l2_check_deps(struct altr_edac_device_dev *device)
{
- if (readl(base) & ALTR_L2_ECC_EN)
+ void __iomem *base = device->base;
+ const struct edac_device_prv_data *prv = device->data;
+
+ if ((readl(base) & prv->ecc_enable_mask) ==
+ prv->ecc_enable_mask)
return 0;
edac_printk(KERN_ERR, EDAC_DEVICE,
@@ -977,6 +988,24 @@ static int altr_l2_check_deps(struct platform_device *pdev,
return -ENODEV;
}
+static irqreturn_t altr_edac_a10_l2_irq(struct altr_edac_device_dev *dci,
+ bool sberr)
+{
+ if (sberr) {
+ regmap_write(dci->edac->ecc_mgr_map,
+ A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST,
+ A10_SYSGMR_MPU_CLEAR_L2_ECC_SB);
+ edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name);
+ } else {
+ regmap_write(dci->edac->ecc_mgr_map,
+ A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST,
+ A10_SYSGMR_MPU_CLEAR_L2_ECC_MB);
+ edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name);
+ panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
+ }
+ return IRQ_HANDLED;
+}
+
const struct edac_device_prv_data l2ecc_data = {
.setup = altr_l2_check_deps,
.ce_clear_mask = 0,
@@ -987,11 +1016,252 @@ const struct edac_device_prv_data l2ecc_data = {
.ecc_enable_mask = ALTR_L2_ECC_EN,
.ce_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJS),
.ue_set_mask = (ALTR_L2_ECC_EN | ALTR_L2_ECC_INJD),
+ .set_err_ofst = ALTR_L2_ECC_REG_OFFSET,
+ .trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE,
+ .inject_fops = &altr_edac_device_inject_fops,
+};
+
+const struct edac_device_prv_data a10_l2ecc_data = {
+ .setup = altr_l2_check_deps,
+ .ce_clear_mask = ALTR_A10_L2_ECC_SERR_CLR,
+ .ue_clear_mask = ALTR_A10_L2_ECC_MERR_CLR,
+ .irq_status_mask = A10_SYSMGR_ECC_INTSTAT_L2,
+ .dbgfs_name = "altr_l2_trigger",
+ .alloc_mem = l2_alloc_mem,
+ .free_mem = l2_free_mem,
+ .ecc_enable_mask = ALTR_A10_L2_ECC_EN_CTL,
+ .ce_set_mask = ALTR_A10_L2_ECC_CE_INJ_MASK,
+ .ue_set_mask = ALTR_A10_L2_ECC_UE_INJ_MASK,
+ .set_err_ofst = ALTR_A10_L2_ECC_INJ_OFST,
+ .ecc_irq_handler = altr_edac_a10_l2_irq,
.trig_alloc_sz = ALTR_TRIG_L2C_BYTE_SIZE,
+ .inject_fops = &altr_edac_device_inject_fops,
};
#endif /* CONFIG_EDAC_ALTERA_L2C */
+/********************* Arria10 EDAC Device Functions *************************/
+
+/*
+ * The Arria10 EDAC Device Functions differ from the Cyclone5/Arria5
+ * because 2 IRQs are shared among the all ECC peripherals. The ECC
+ * manager manages the IRQs and the children.
+ * Based on xgene_edac.c peripheral code.
+ */
+
+static ssize_t altr_edac_a10_device_trig(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct edac_device_ctl_info *edac_dci = file->private_data;
+ struct altr_edac_device_dev *drvdata = edac_dci->pvt_info;
+ const struct edac_device_prv_data *priv = drvdata->data;
+ void __iomem *set_addr = (drvdata->base + priv->set_err_ofst);
+ unsigned long flags;
+ u8 trig_type;
+
+ if (!user_buf || get_user(trig_type, user_buf))
+ return -EFAULT;
+
+ local_irq_save(flags);
+ if (trig_type == ALTR_UE_TRIGGER_CHAR)
+ writel(priv->ue_set_mask, set_addr);
+ else
+ writel(priv->ce_set_mask, set_addr);
+ /* Ensure the interrupt test bits are set */
+ wmb();
+ local_irq_restore(flags);
+
+ return count;
+}
+
+static irqreturn_t altr_edac_a10_irq_handler(int irq, void *dev_id)
+{
+ irqreturn_t rc = IRQ_NONE;
+ struct altr_arria10_edac *edac = dev_id;
+ struct altr_edac_device_dev *dci;
+ int irq_status;
+ bool sberr = (irq == edac->sb_irq) ? 1 : 0;
+ int sm_offset = sberr ? A10_SYSMGR_ECC_INTSTAT_SERR_OFST :
+ A10_SYSMGR_ECC_INTSTAT_DERR_OFST;
+
+ regmap_read(edac->ecc_mgr_map, sm_offset, &irq_status);
+
+ if ((irq != edac->sb_irq) && (irq != edac->db_irq)) {
+ WARN_ON(1);
+ } else {
+ list_for_each_entry(dci, &edac->a10_ecc_devices, next) {
+ if (irq_status & dci->data->irq_status_mask)
+ rc = dci->data->ecc_irq_handler(dci, sberr);
+ }
+ }
+
+ return rc;
+}
+
+static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
+ struct device_node *np)
+{
+ struct edac_device_ctl_info *dci;
+ struct altr_edac_device_dev *altdev;
+ char *ecc_name = (char *)np->name;
+ struct resource res;
+ int edac_idx;
+ int rc = 0;
+ const struct edac_device_prv_data *prv;
+ /* Get matching node and check for valid result */
+ const struct of_device_id *pdev_id =
+ of_match_node(altr_edac_device_of_match, np);
+ if (IS_ERR_OR_NULL(pdev_id))
+ return -ENODEV;
+
+ /* Get driver specific data for this EDAC device */
+ prv = pdev_id->data;
+ if (IS_ERR_OR_NULL(prv))
+ return -ENODEV;
+
+ if (!devres_open_group(edac->dev, altr_edac_a10_device_add, GFP_KERNEL))
+ return -ENOMEM;
+
+ rc = of_address_to_resource(np, 0, &res);
+ if (rc < 0) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "%s: no resource address\n", ecc_name);
+ goto err_release_group;
+ }
+
+ edac_idx = edac_device_alloc_index();
+ dci = edac_device_alloc_ctl_info(sizeof(*altdev), ecc_name,
+ 1, ecc_name, 1, 0, NULL, 0,
+ edac_idx);
+
+ if (!dci) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "%s: Unable to allocate EDAC device\n", ecc_name);
+ rc = -ENOMEM;
+ goto err_release_group;
+ }
+
+ altdev = dci->pvt_info;
+ dci->dev = edac->dev;
+ altdev->edac_dev_name = ecc_name;
+ altdev->edac_idx = edac_idx;
+ altdev->edac = edac;
+ altdev->edac_dev = dci;
+ altdev->data = prv;
+ altdev->ddev = *edac->dev;
+ dci->dev = &altdev->ddev;
+ dci->ctl_name = "Altera ECC Manager";
+ dci->mod_name = ecc_name;
+ dci->dev_name = ecc_name;
+
+ altdev->base = devm_ioremap_resource(edac->dev, &res);
+ if (IS_ERR(altdev->base)) {
+ rc = PTR_ERR(altdev->base);
+ goto err_release_group1;
+ }
+
+ /* Check specific dependencies for the module */
+ if (altdev->data->setup) {
+ rc = altdev->data->setup(altdev);
+ if (rc)
+ goto err_release_group1;
+ }
+
+ rc = edac_device_add_device(dci);
+ if (rc) {
+ dev_err(edac->dev, "edac_device_add_device failed\n");
+ rc = -ENOMEM;
+ goto err_release_group1;
+ }
+
+ altr_create_edacdev_dbgfs(dci, prv);
+
+ list_add(&altdev->next, &edac->a10_ecc_devices);
+
+ devres_remove_group(edac->dev, altr_edac_a10_device_add);
+
+ return 0;
+
+err_release_group1:
+ edac_device_free_ctl_info(dci);
+err_release_group:
+ edac_printk(KERN_ALERT, EDAC_DEVICE, "%s: %d\n", __func__, __LINE__);
+ devres_release_group(edac->dev, NULL);
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "%s:Error setting up EDAC device: %d\n", ecc_name, rc);
+
+ return rc;
+}
+
+static int altr_edac_a10_probe(struct platform_device *pdev)
+{
+ struct altr_arria10_edac *edac;
+ struct device_node *child;
+ int rc;
+
+ edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL);
+ if (!edac)
+ return -ENOMEM;
+
+ edac->dev = &pdev->dev;
+ platform_set_drvdata(pdev, edac);
+ INIT_LIST_HEAD(&edac->a10_ecc_devices);
+
+ edac->ecc_mgr_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+ "altr,sysmgr-syscon");
+ if (IS_ERR(edac->ecc_mgr_map)) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "Unable to get syscon altr,sysmgr-syscon\n");
+ return PTR_ERR(edac->ecc_mgr_map);
+ }
+
+ edac->sb_irq = platform_get_irq(pdev, 0);
+ rc = devm_request_irq(&pdev->dev, edac->sb_irq,
+ altr_edac_a10_irq_handler,
+ IRQF_SHARED, dev_name(&pdev->dev), edac);
+ if (rc) {
+ edac_printk(KERN_ERR, EDAC_DEVICE, "No SBERR IRQ resource\n");
+ return rc;
+ }
+
+ edac->db_irq = platform_get_irq(pdev, 1);
+ rc = devm_request_irq(&pdev->dev, edac->db_irq,
+ altr_edac_a10_irq_handler,
+ IRQF_SHARED, dev_name(&pdev->dev), edac);
+ if (rc) {
+ edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n");
+ return rc;
+ }
+
+ for_each_child_of_node(pdev->dev.of_node, child) {
+ if (!of_device_is_available(child))
+ continue;
+ if (of_device_is_compatible(child, "altr,socfpga-a10-l2-ecc"))
+ altr_edac_a10_device_add(edac, child);
+ else if (of_device_is_compatible(child,
+ "altr,socfpga-a10-ocram-ecc"))
+ altr_edac_a10_device_add(edac, child);
+ }
+
+ return 0;
+}
+
+static const struct of_device_id altr_edac_a10_of_match[] = {
+ { .compatible = "altr,socfpga-a10-ecc-manager" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, altr_edac_a10_of_match);
+
+static struct platform_driver altr_edac_a10_driver = {
+ .probe = altr_edac_a10_probe,
+ .driver = {
+ .name = "socfpga_a10_ecc_manager",
+ .of_match_table = altr_edac_a10_of_match,
+ },
+};
+module_platform_driver(altr_edac_a10_driver);
+
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Thor Thayer");
MODULE_DESCRIPTION("EDAC Driver for Altera Memories");
diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h
index 953077d3e4f3..42090f36ba6e 100644
--- a/drivers/edac/altera_edac.h
+++ b/drivers/edac/altera_edac.h
@@ -195,4 +195,132 @@ struct altr_sdram_mc_data {
const struct altr_sdram_prv_data *data;
};
+/************************** EDAC Device Defines **************************/
+/***** General Device Trigger Defines *****/
+#define ALTR_UE_TRIGGER_CHAR 'U' /* Trigger for UE */
+#define ALTR_TRIGGER_READ_WRD_CNT 32 /* Line size x 4 */
+#define ALTR_TRIG_OCRAM_BYTE_SIZE 128 /* Line size x 4 */
+#define ALTR_TRIG_L2C_BYTE_SIZE 4096 /* Full Page */
+
+/******* Cyclone5 and Arria5 Defines *******/
+/* OCRAM ECC Management Group Defines */
+#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET 0x04
+#define ALTR_OCR_ECC_REG_OFFSET 0x00
+#define ALTR_OCR_ECC_EN BIT(0)
+#define ALTR_OCR_ECC_INJS BIT(1)
+#define ALTR_OCR_ECC_INJD BIT(2)
+#define ALTR_OCR_ECC_SERR BIT(3)
+#define ALTR_OCR_ECC_DERR BIT(4)
+
+/* L2 ECC Management Group Defines */
+#define ALTR_MAN_GRP_L2_ECC_OFFSET 0x00
+#define ALTR_L2_ECC_REG_OFFSET 0x00
+#define ALTR_L2_ECC_EN BIT(0)
+#define ALTR_L2_ECC_INJS BIT(1)
+#define ALTR_L2_ECC_INJD BIT(2)
+
+/* Arria10 General ECC Block Module Defines */
+#define ALTR_A10_ECC_CTRL_OFST 0x08
+#define ALTR_A10_ECC_EN BIT(0)
+#define ALTR_A10_ECC_INITA BIT(16)
+#define ALTR_A10_ECC_INITB BIT(24)
+
+#define ALTR_A10_ECC_INITSTAT_OFST 0x0C
+#define ALTR_A10_ECC_INITCOMPLETEA BIT(0)
+#define ALTR_A10_ECC_INITCOMPLETEB BIT(8)
+
+#define ALTR_A10_ECC_ERRINTEN_OFST 0x10
+#define ALTR_A10_ECC_SERRINTEN BIT(0)
+
+#define ALTR_A10_ECC_INTSTAT_OFST 0x20
+#define ALTR_A10_ECC_SERRPENA BIT(0)
+#define ALTR_A10_ECC_DERRPENA BIT(8)
+#define ALTR_A10_ECC_ERRPENA_MASK (ALTR_A10_ECC_SERRPENA | \
+ ALTR_A10_ECC_DERRPENA)
+#define ALTR_A10_ECC_SERRPENB BIT(16)
+#define ALTR_A10_ECC_DERRPENB BIT(24)
+#define ALTR_A10_ECC_ERRPENB_MASK (ALTR_A10_ECC_SERRPENB | \
+ ALTR_A10_ECC_DERRPENB)
+
+#define ALTR_A10_ECC_INTTEST_OFST 0x24
+#define ALTR_A10_ECC_TSERRA BIT(0)
+#define ALTR_A10_ECC_TDERRA BIT(8)
+
+/* ECC Manager Defines */
+#define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94
+#define A10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98
+#define A10_SYSMGR_ECC_INTMASK_OCRAM BIT(1)
+
+#define A10_SYSMGR_ECC_INTSTAT_SERR_OFST 0x9C
+#define A10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0
+#define A10_SYSMGR_ECC_INTSTAT_L2 BIT(0)
+#define A10_SYSMGR_ECC_INTSTAT_OCRAM BIT(1)
+
+#define A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST 0xA8
+#define A10_SYSGMR_MPU_CLEAR_L2_ECC_SB BIT(15)
+#define A10_SYSGMR_MPU_CLEAR_L2_ECC_MB BIT(31)
+
+/* Arria 10 L2 ECC Management Group Defines */
+#define ALTR_A10_L2_ECC_CTL_OFST 0x0
+#define ALTR_A10_L2_ECC_EN_CTL BIT(0)
+
+#define ALTR_A10_L2_ECC_STATUS 0xFFD060A4
+#define ALTR_A10_L2_ECC_STAT_OFST 0xA4
+#define ALTR_A10_L2_ECC_SERR_PEND BIT(0)
+#define ALTR_A10_L2_ECC_MERR_PEND BIT(0)
+
+#define ALTR_A10_L2_ECC_CLR_OFST 0x4
+#define ALTR_A10_L2_ECC_SERR_CLR BIT(15)
+#define ALTR_A10_L2_ECC_MERR_CLR BIT(31)
+
+#define ALTR_A10_L2_ECC_INJ_OFST ALTR_A10_L2_ECC_CTL_OFST
+#define ALTR_A10_L2_ECC_CE_INJ_MASK 0x00000101
+#define ALTR_A10_L2_ECC_UE_INJ_MASK 0x00010101
+
+/* Arria 10 OCRAM ECC Management Group Defines */
+#define ALTR_A10_OCRAM_ECC_EN_CTL (BIT(1) | BIT(0))
+
+struct altr_edac_device_dev;
+
+struct edac_device_prv_data {
+ int (*setup)(struct altr_edac_device_dev *device);
+ int ce_clear_mask;
+ int ue_clear_mask;
+ int irq_status_mask;
+ char dbgfs_name[20];
+ void * (*alloc_mem)(size_t size, void **other);
+ void (*free_mem)(void *p, size_t size, void *other);
+ int ecc_enable_mask;
+ int ecc_en_ofst;
+ int ce_set_mask;
+ int ue_set_mask;
+ int set_err_ofst;
+ irqreturn_t (*ecc_irq_handler)(struct altr_edac_device_dev *dci,
+ bool sb);
+ int trig_alloc_sz;
+ const struct file_operations *inject_fops;
+};
+
+struct altr_edac_device_dev {
+ struct list_head next;
+ void __iomem *base;
+ int sb_irq;
+ int db_irq;
+ const struct edac_device_prv_data *data;
+ struct dentry *debugfs_dir;
+ char *edac_dev_name;
+ struct altr_arria10_edac *edac;
+ struct edac_device_ctl_info *edac_dev;
+ struct device ddev;
+ int edac_idx;
+};
+
+struct altr_arria10_edac {
+ struct device *dev;
+ struct regmap *ecc_mgr_map;
+ int sb_irq;
+ int db_irq;
+ struct list_head a10_ecc_devices;
+};
+
#endif /* #ifndef _ALTERA_EDAC_H */
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 930089e9e411..46784eb2edc6 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -15,11 +15,6 @@ module_param(ecc_enable_override, int, 0644);
static struct msr __percpu *msrs;
-/*
- * count successfully initialized driver instances for setup_pci_device()
- */
-static atomic_t drv_instances = ATOMIC_INIT(0);
-
/* Per-node stuff */
static struct ecc_settings **ecc_stngs;
@@ -1918,7 +1913,7 @@ static struct amd64_family_type family_types[] = {
[K8_CPUS] = {
.ctl_name = "K8",
.f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
- .f3_id = PCI_DEVICE_ID_AMD_K8_NB_MISC,
+ .f2_id = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL,
.ops = {
.early_channel_count = k8_early_channel_count,
.map_sysaddr_to_csrow = k8_map_sysaddr_to_csrow,
@@ -1928,7 +1923,7 @@ static struct amd64_family_type family_types[] = {
[F10_CPUS] = {
.ctl_name = "F10h",
.f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
- .f3_id = PCI_DEVICE_ID_AMD_10H_NB_MISC,
+ .f2_id = PCI_DEVICE_ID_AMD_10H_NB_DRAM,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -1938,7 +1933,7 @@ static struct amd64_family_type family_types[] = {
[F15_CPUS] = {
.ctl_name = "F15h",
.f1_id = PCI_DEVICE_ID_AMD_15H_NB_F1,
- .f3_id = PCI_DEVICE_ID_AMD_15H_NB_F3,
+ .f2_id = PCI_DEVICE_ID_AMD_15H_NB_F2,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -1948,7 +1943,7 @@ static struct amd64_family_type family_types[] = {
[F15_M30H_CPUS] = {
.ctl_name = "F15h_M30h",
.f1_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1,
- .f3_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F3,
+ .f2_id = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -1958,7 +1953,7 @@ static struct amd64_family_type family_types[] = {
[F15_M60H_CPUS] = {
.ctl_name = "F15h_M60h",
.f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1,
- .f3_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F3,
+ .f2_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F2,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -1968,7 +1963,7 @@ static struct amd64_family_type family_types[] = {
[F16_CPUS] = {
.ctl_name = "F16h",
.f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1,
- .f3_id = PCI_DEVICE_ID_AMD_16H_NB_F3,
+ .f2_id = PCI_DEVICE_ID_AMD_16H_NB_F2,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -1978,7 +1973,7 @@ static struct amd64_family_type family_types[] = {
[F16_M30H_CPUS] = {
.ctl_name = "F16h_M30h",
.f1_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F1,
- .f3_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F3,
+ .f2_id = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2,
.ops = {
.early_channel_count = f1x_early_channel_count,
.map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow,
@@ -2227,13 +2222,13 @@ static inline void decode_bus_error(int node_id, struct mce *m)
}
/*
- * Use pvt->F2 which contains the F2 CPU PCI device to get the related
- * F1 (AddrMap) and F3 (Misc) devices. Return negative value on error.
+ * Use pvt->F3 which contains the F3 CPU PCI device to get the related
+ * F1 (AddrMap) and F2 (Dct) devices. Return negative value on error.
*/
-static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id)
+static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f2_id)
{
/* Reserve the ADDRESS MAP Device */
- pvt->F1 = pci_get_related_function(pvt->F2->vendor, f1_id, pvt->F2);
+ pvt->F1 = pci_get_related_function(pvt->F3->vendor, f1_id, pvt->F3);
if (!pvt->F1) {
amd64_err("error address map device not found: "
"vendor %x device 0x%x (broken BIOS?)\n",
@@ -2241,15 +2236,15 @@ static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id)
return -ENODEV;
}
- /* Reserve the MISC Device */
- pvt->F3 = pci_get_related_function(pvt->F2->vendor, f3_id, pvt->F2);
- if (!pvt->F3) {
+ /* Reserve the DCT Device */
+ pvt->F2 = pci_get_related_function(pvt->F3->vendor, f2_id, pvt->F3);
+ if (!pvt->F2) {
pci_dev_put(pvt->F1);
pvt->F1 = NULL;
- amd64_err("error F3 device not found: "
+ amd64_err("error F2 device not found: "
"vendor %x device 0x%x (broken BIOS?)\n",
- PCI_VENDOR_ID_AMD, f3_id);
+ PCI_VENDOR_ID_AMD, f2_id);
return -ENODEV;
}
@@ -2263,7 +2258,7 @@ static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id)
static void free_mc_sibling_devs(struct amd64_pvt *pvt)
{
pci_dev_put(pvt->F1);
- pci_dev_put(pvt->F3);
+ pci_dev_put(pvt->F2);
}
/*
@@ -2778,14 +2773,14 @@ static const struct attribute_group *amd64_edac_attr_groups[] = {
NULL
};
-static int init_one_instance(struct pci_dev *F2)
+static int init_one_instance(unsigned int nid)
{
- struct amd64_pvt *pvt = NULL;
+ struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct amd64_family_type *fam_type = NULL;
struct mem_ctl_info *mci = NULL;
struct edac_mc_layer layers[2];
+ struct amd64_pvt *pvt = NULL;
int err = 0, ret;
- u16 nid = amd_pci_dev_to_node_id(F2);
ret = -ENOMEM;
pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
@@ -2793,7 +2788,7 @@ static int init_one_instance(struct pci_dev *F2)
goto err_ret;
pvt->mc_node_id = nid;
- pvt->F2 = F2;
+ pvt->F3 = F3;
ret = -EINVAL;
fam_type = per_family_init(pvt);
@@ -2801,7 +2796,7 @@ static int init_one_instance(struct pci_dev *F2)
goto err_free;
ret = -ENODEV;
- err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f3_id);
+ err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f2_id);
if (err)
goto err_free;
@@ -2836,7 +2831,7 @@ static int init_one_instance(struct pci_dev *F2)
goto err_siblings;
mci->pvt_info = pvt;
- mci->pdev = &pvt->F2->dev;
+ mci->pdev = &pvt->F3->dev;
setup_mci_misc_attrs(mci, fam_type);
@@ -2855,8 +2850,6 @@ static int init_one_instance(struct pci_dev *F2)
amd_register_ecc_decoder(decode_bus_error);
- atomic_inc(&drv_instances);
-
return 0;
err_add_mc:
@@ -2872,19 +2865,11 @@ err_ret:
return ret;
}
-static int probe_one_instance(struct pci_dev *pdev,
- const struct pci_device_id *mc_type)
+static int probe_one_instance(unsigned int nid)
{
- u16 nid = amd_pci_dev_to_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s;
- int ret = 0;
-
- ret = pci_enable_device(pdev);
- if (ret < 0) {
- edac_dbg(0, "ret=%d\n", ret);
- return -EIO;
- }
+ int ret;
ret = -ENOMEM;
s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
@@ -2905,7 +2890,7 @@ static int probe_one_instance(struct pci_dev *pdev,
goto err_enable;
}
- ret = init_one_instance(pdev);
+ ret = init_one_instance(nid);
if (ret < 0) {
amd64_err("Error probing instance: %d\n", nid);
restore_ecc_error_reporting(s, nid, F3);
@@ -2921,19 +2906,18 @@ err_out:
return ret;
}
-static void remove_one_instance(struct pci_dev *pdev)
+static void remove_one_instance(unsigned int nid)
{
- struct mem_ctl_info *mci;
- struct amd64_pvt *pvt;
- u16 nid = amd_pci_dev_to_node_id(pdev);
struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
struct ecc_settings *s = ecc_stngs[nid];
+ struct mem_ctl_info *mci;
+ struct amd64_pvt *pvt;
- mci = find_mci_by_dev(&pdev->dev);
+ mci = find_mci_by_dev(&F3->dev);
WARN_ON(!mci);
/* Remove from EDAC CORE tracking list */
- mci = edac_mc_del_mc(&pdev->dev);
+ mci = edac_mc_del_mc(&F3->dev);
if (!mci)
return;
@@ -2957,31 +2941,6 @@ static void remove_one_instance(struct pci_dev *pdev)
edac_mc_free(mci);
}
-/*
- * This table is part of the interface for loading drivers for PCI devices. The
- * PCI core identifies what devices are on a system during boot, and then
- * inquiry this table to see if this driver is for a given device found.
- */
-static const struct pci_device_id amd64_pci_table[] = {
- { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL) },
- { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM) },
- { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F2) },
- { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F2) },
- { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F2) },
- { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F2) },
- { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F2) },
- {0, }
-};
-MODULE_DEVICE_TABLE(pci, amd64_pci_table);
-
-static struct pci_driver amd64_pci_driver = {
- .name = EDAC_MOD_STR,
- .probe = probe_one_instance,
- .remove = remove_one_instance,
- .id_table = amd64_pci_table,
- .driver.probe_type = PROBE_FORCE_SYNCHRONOUS,
-};
-
static void setup_pci_device(void)
{
struct mem_ctl_info *mci;
@@ -3005,8 +2964,7 @@ static void setup_pci_device(void)
static int __init amd64_edac_init(void)
{
int err = -ENODEV;
-
- printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
+ int i;
opstate_init();
@@ -3022,13 +2980,14 @@ static int __init amd64_edac_init(void)
if (!msrs)
goto err_free;
- err = pci_register_driver(&amd64_pci_driver);
- if (err)
- goto err_pci;
+ for (i = 0; i < amd_nb_num(); i++)
+ if (probe_one_instance(i)) {
+ /* unwind properly */
+ while (--i >= 0)
+ remove_one_instance(i);
- err = -ENODEV;
- if (!atomic_read(&drv_instances))
- goto err_no_instances;
+ goto err_pci;
+ }
setup_pci_device();
@@ -3036,10 +2995,9 @@ static int __init amd64_edac_init(void)
amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR);
#endif
- return 0;
+ printk(KERN_INFO "AMD64 EDAC driver v%s\n", EDAC_AMD64_VERSION);
-err_no_instances:
- pci_unregister_driver(&amd64_pci_driver);
+ return 0;
err_pci:
msrs_free(msrs);
@@ -3055,10 +3013,13 @@ err_ret:
static void __exit amd64_edac_exit(void)
{
+ int i;
+
if (pci_ctl)
edac_pci_release_generic_ctl(pci_ctl);
- pci_unregister_driver(&amd64_pci_driver);
+ for (i = 0; i < amd_nb_num(); i++)
+ remove_one_instance(i);
kfree(ecc_stngs);
ecc_stngs = NULL;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index c0f248f3aaf9..c08870479054 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -422,7 +422,7 @@ struct low_ops {
struct amd64_family_type {
const char *ctl_name;
- u16 f1_id, f3_id;
+ u16 f1_id, f2_id;
struct low_ops ops;
};
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 1472f48c8ac6..6aa256b0a1ed 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -923,7 +923,7 @@ static void edac_inc_ue_error(struct mem_ctl_info *mci,
mci->ue_mc += count;
if (!enable_per_layer_report) {
- mci->ce_noinfo_count += count;
+ mci->ue_noinfo_count += count;
return;
}
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index 26e65ab5932a..10c305b4a2e1 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -998,11 +998,12 @@ void edac_remove_sysfs_mci_device(struct mem_ctl_info *mci)
void edac_unregister_sysfs(struct mem_ctl_info *mci)
{
+ struct bus_type *bus = mci->bus;
const char *name = mci->bus->name;
edac_dbg(1, "Unregistering device %s\n", dev_name(&mci->dev));
device_unregister(&mci->dev);
- bus_unregister(mci->bus);
+ bus_unregister(bus);
kfree(name);
}
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 01087a38da22..8a68a5e943ea 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -271,16 +271,6 @@ struct i7core_pvt {
bool is_registered, enable_scrub;
- /* Fifo double buffers */
- struct mce mce_entry[MCE_LOG_LEN];
- struct mce mce_outentry[MCE_LOG_LEN];
-
- /* Fifo in/out counters */
- unsigned mce_in, mce_out;
-
- /* Count indicator to show errors not got */
- unsigned mce_overrun;
-
/* DCLK Frequency used for computing scrub rate */
int dclk_freq;
@@ -1792,56 +1782,15 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
* i7core_check_error Retrieve and process errors reported by the
* hardware. Called by the Core module.
*/
-static void i7core_check_error(struct mem_ctl_info *mci)
+static void i7core_check_error(struct mem_ctl_info *mci, struct mce *m)
{
struct i7core_pvt *pvt = mci->pvt_info;
- int i;
- unsigned count = 0;
- struct mce *m;
-
- /*
- * MCE first step: Copy all mce errors into a temporary buffer
- * We use a double buffering here, to reduce the risk of
- * losing an error.
- */
- smp_rmb();
- count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
- % MCE_LOG_LEN;
- if (!count)
- goto check_ce_error;
-
- m = pvt->mce_outentry;
- if (pvt->mce_in + count > MCE_LOG_LEN) {
- unsigned l = MCE_LOG_LEN - pvt->mce_in;
-
- memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
- smp_wmb();
- pvt->mce_in = 0;
- count -= l;
- m += l;
- }
- memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
- smp_wmb();
- pvt->mce_in += count;
-
- smp_rmb();
- if (pvt->mce_overrun) {
- i7core_printk(KERN_ERR, "Lost %d memory errors\n",
- pvt->mce_overrun);
- smp_wmb();
- pvt->mce_overrun = 0;
- }
- /*
- * MCE second step: parse errors and display
- */
- for (i = 0; i < count; i++)
- i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
+ i7core_mce_output_error(mci, m);
/*
* Now, let's increment CE error counts
*/
-check_ce_error:
if (!pvt->is_registered)
i7core_udimm_check_mc_ecc_err(mci);
else
@@ -1849,12 +1798,8 @@ check_ce_error:
}
/*
- * i7core_mce_check_error Replicates mcelog routine to get errors
- * This routine simply queues mcelog errors, and
- * return. The error itself should be handled later
- * by i7core_check_error.
- * WARNING: As this routine should be called at NMI time, extra care should
- * be taken to avoid deadlocks, and to be as fast as possible.
+ * Check that logging is enabled and that this is the right type
+ * of error for us to handle.
*/
static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
void *data)
@@ -1866,7 +1811,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
i7_dev = get_i7core_dev(mce->socketid);
if (!i7_dev)
- return NOTIFY_BAD;
+ return NOTIFY_DONE;
mci = i7_dev->mci;
pvt = mci->pvt_info;
@@ -1882,21 +1827,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
if (mce->bank != 8)
return NOTIFY_DONE;
- smp_rmb();
- if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
- smp_wmb();
- pvt->mce_overrun++;
- return NOTIFY_DONE;
- }
-
- /* Copy memory error at the ringbuffer */
- memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
- smp_wmb();
- pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
-
- /* Handle fatal errors immediately */
- if (mce->mcgstatus & 1)
- i7core_check_error(mci);
+ i7core_check_error(mci, mce);
/* Advise mcelog that the errors were handled */
return NOTIFY_STOP;
@@ -2243,8 +2174,6 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
get_dimm_config(mci);
/* record ptr to the generic device */
mci->pdev = &i7core_dev->pdev[0]->dev;
- /* Set the function pointer to an actual operation function */
- mci->edac_check = i7core_check_error;
/* Enable scrubrate setting */
if (pvt->enable_scrub)
diff --git a/drivers/edac/ie31200_edac.c b/drivers/edac/ie31200_edac.c
index 18d77ace4813..1c88d9707495 100644
--- a/drivers/edac/ie31200_edac.c
+++ b/drivers/edac/ie31200_edac.c
@@ -17,6 +17,7 @@
* 015c: Xeon E3-1200 v2/3rd Gen Core processor DRAM Controller
* 0c04: Xeon E3-1200 v3/4th Gen Core Processor DRAM Controller
* 0c08: Xeon E3-1200 v3 Processor DRAM Controller
+ * 1918: Xeon E3-1200 v5 Skylake Host Bridge/DRAM Registers
*
* Based on Intel specification:
* http://www.intel.com/content/dam/www/public/us/en/documents/datasheets/xeon-e3-1200v3-vol-2-datasheet.pdf
@@ -55,6 +56,7 @@
#define PCI_DEVICE_ID_INTEL_IE31200_HB_5 0x015c
#define PCI_DEVICE_ID_INTEL_IE31200_HB_6 0x0c04
#define PCI_DEVICE_ID_INTEL_IE31200_HB_7 0x0c08
+#define PCI_DEVICE_ID_INTEL_IE31200_HB_8 0x1918
#define IE31200_DIMMS 4
#define IE31200_RANKS 8
@@ -105,8 +107,11 @@
* 1 Multiple Bit Error Status (MERRSTS)
* 0 Correctable Error Status (CERRSTS)
*/
+
#define IE31200_C0ECCERRLOG 0x40c8
#define IE31200_C1ECCERRLOG 0x44c8
+#define IE31200_C0ECCERRLOG_SKL 0x4048
+#define IE31200_C1ECCERRLOG_SKL 0x4448
#define IE31200_ECCERRLOG_CE BIT(0)
#define IE31200_ECCERRLOG_UE BIT(1)
#define IE31200_ECCERRLOG_RANK_BITS GENMASK_ULL(28, 27)
@@ -123,17 +128,28 @@
#define IE31200_CAPID0_DDPCD BIT(6)
#define IE31200_CAPID0_ECC BIT(1)
-#define IE31200_MAD_DIMM_0_OFFSET 0x5004
-#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0)
-#define IE31200_MAD_DIMM_A_RANK BIT(17)
-#define IE31200_MAD_DIMM_A_WIDTH BIT(19)
-
-#define IE31200_PAGES(n) (n << (28 - PAGE_SHIFT))
+#define IE31200_MAD_DIMM_0_OFFSET 0x5004
+#define IE31200_MAD_DIMM_0_OFFSET_SKL 0x500C
+#define IE31200_MAD_DIMM_SIZE GENMASK_ULL(7, 0)
+#define IE31200_MAD_DIMM_A_RANK BIT(17)
+#define IE31200_MAD_DIMM_A_RANK_SHIFT 17
+#define IE31200_MAD_DIMM_A_RANK_SKL BIT(10)
+#define IE31200_MAD_DIMM_A_RANK_SKL_SHIFT 10
+#define IE31200_MAD_DIMM_A_WIDTH BIT(19)
+#define IE31200_MAD_DIMM_A_WIDTH_SHIFT 19
+#define IE31200_MAD_DIMM_A_WIDTH_SKL GENMASK_ULL(9, 8)
+#define IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT 8
+
+/* Skylake reports 1GB increments, everything else is 256MB */
+#define IE31200_PAGES(n, skl) \
+ (n << (28 + (2 * skl) - PAGE_SHIFT))
static int nr_channels;
struct ie31200_priv {
void __iomem *window;
+ void __iomem *c0errlog;
+ void __iomem *c1errlog;
};
enum ie31200_chips {
@@ -157,9 +173,9 @@ static const struct ie31200_dev_info ie31200_devs[] = {
};
struct dimm_data {
- u8 size; /* in 256MB multiples */
+ u8 size; /* in multiples of 256MB, except Skylake is 1GB */
u8 dual_rank : 1,
- x16_width : 1; /* 0 means x8 width */
+ x16_width : 2; /* 0 means x8 width */
};
static int how_many_channels(struct pci_dev *pdev)
@@ -197,11 +213,10 @@ static bool ecc_capable(struct pci_dev *pdev)
return true;
}
-static int eccerrlog_row(int channel, u64 log)
+static int eccerrlog_row(u64 log)
{
- int rank = ((log & IE31200_ECCERRLOG_RANK_BITS) >>
- IE31200_ECCERRLOG_RANK_SHIFT);
- return rank | (channel * IE31200_RANKS_PER_CHANNEL);
+ return ((log & IE31200_ECCERRLOG_RANK_BITS) >>
+ IE31200_ECCERRLOG_RANK_SHIFT);
}
static void ie31200_clear_error_info(struct mem_ctl_info *mci)
@@ -219,7 +234,6 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci,
{
struct pci_dev *pdev;
struct ie31200_priv *priv = mci->pvt_info;
- void __iomem *window = priv->window;
pdev = to_pci_dev(mci->pdev);
@@ -232,9 +246,9 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci,
if (!(info->errsts & IE31200_ERRSTS_BITS))
return;
- info->eccerrlog[0] = lo_hi_readq(window + IE31200_C0ECCERRLOG);
+ info->eccerrlog[0] = lo_hi_readq(priv->c0errlog);
if (nr_channels == 2)
- info->eccerrlog[1] = lo_hi_readq(window + IE31200_C1ECCERRLOG);
+ info->eccerrlog[1] = lo_hi_readq(priv->c1errlog);
pci_read_config_word(pdev, IE31200_ERRSTS, &info->errsts2);
@@ -245,10 +259,10 @@ static void ie31200_get_and_clear_error_info(struct mem_ctl_info *mci,
* should be UE info.
*/
if ((info->errsts ^ info->errsts2) & IE31200_ERRSTS_BITS) {
- info->eccerrlog[0] = lo_hi_readq(window + IE31200_C0ECCERRLOG);
+ info->eccerrlog[0] = lo_hi_readq(priv->c0errlog);
if (nr_channels == 2)
info->eccerrlog[1] =
- lo_hi_readq(window + IE31200_C1ECCERRLOG);
+ lo_hi_readq(priv->c1errlog);
}
ie31200_clear_error_info(mci);
@@ -274,14 +288,14 @@ static void ie31200_process_error_info(struct mem_ctl_info *mci,
if (log & IE31200_ECCERRLOG_UE) {
edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1,
0, 0, 0,
- eccerrlog_row(channel, log),
+ eccerrlog_row(log),
channel, -1,
"ie31200 UE", "");
} else if (log & IE31200_ECCERRLOG_CE) {
edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1,
0, 0,
IE31200_ECCERRLOG_SYNDROME(log),
- eccerrlog_row(channel, log),
+ eccerrlog_row(log),
channel, -1,
"ie31200 CE", "");
}
@@ -326,6 +340,33 @@ static void __iomem *ie31200_map_mchbar(struct pci_dev *pdev)
return window;
}
+static void __skl_populate_dimm_info(struct dimm_data *dd, u32 addr_decode,
+ int chan)
+{
+ dd->size = (addr_decode >> (chan << 4)) & IE31200_MAD_DIMM_SIZE;
+ dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK_SKL << (chan << 4))) ? 1 : 0;
+ dd->x16_width = ((addr_decode & (IE31200_MAD_DIMM_A_WIDTH_SKL << (chan << 4))) >>
+ (IE31200_MAD_DIMM_A_WIDTH_SKL_SHIFT + (chan << 4)));
+}
+
+static void __populate_dimm_info(struct dimm_data *dd, u32 addr_decode,
+ int chan)
+{
+ dd->size = (addr_decode >> (chan << 3)) & IE31200_MAD_DIMM_SIZE;
+ dd->dual_rank = (addr_decode & (IE31200_MAD_DIMM_A_RANK << chan)) ? 1 : 0;
+ dd->x16_width = (addr_decode & (IE31200_MAD_DIMM_A_WIDTH << chan)) ? 1 : 0;
+}
+
+static void populate_dimm_info(struct dimm_data *dd, u32 addr_decode, int chan,
+ bool skl)
+{
+ if (skl)
+ __skl_populate_dimm_info(dd, addr_decode, chan);
+ else
+ __populate_dimm_info(dd, addr_decode, chan);
+}
+
+
static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
{
int i, j, ret;
@@ -334,7 +375,8 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
struct dimm_data dimm_info[IE31200_CHANNELS][IE31200_DIMMS_PER_CHANNEL];
void __iomem *window;
struct ie31200_priv *priv;
- u32 addr_decode;
+ u32 addr_decode, mad_offset;
+ bool skl = (pdev->device == PCI_DEVICE_ID_INTEL_IE31200_HB_8);
edac_dbg(0, "MC:\n");
@@ -363,7 +405,10 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
edac_dbg(3, "MC: init mci\n");
mci->pdev = &pdev->dev;
- mci->mtype_cap = MEM_FLAG_DDR3;
+ if (skl)
+ mci->mtype_cap = MEM_FLAG_DDR4;
+ else
+ mci->mtype_cap = MEM_FLAG_DDR3;
mci->edac_ctl_cap = EDAC_FLAG_SECDED;
mci->edac_cap = EDAC_FLAG_SECDED;
mci->mod_name = EDAC_MOD_STR;
@@ -374,19 +419,24 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
mci->ctl_page_to_phys = NULL;
priv = mci->pvt_info;
priv->window = window;
+ if (skl) {
+ priv->c0errlog = window + IE31200_C0ECCERRLOG_SKL;
+ priv->c1errlog = window + IE31200_C1ECCERRLOG_SKL;
+ mad_offset = IE31200_MAD_DIMM_0_OFFSET_SKL;
+ } else {
+ priv->c0errlog = window + IE31200_C0ECCERRLOG;
+ priv->c1errlog = window + IE31200_C1ECCERRLOG;
+ mad_offset = IE31200_MAD_DIMM_0_OFFSET;
+ }
/* populate DIMM info */
for (i = 0; i < IE31200_CHANNELS; i++) {
- addr_decode = readl(window + IE31200_MAD_DIMM_0_OFFSET +
+ addr_decode = readl(window + mad_offset +
(i * 4));
edac_dbg(0, "addr_decode: 0x%x\n", addr_decode);
for (j = 0; j < IE31200_DIMMS_PER_CHANNEL; j++) {
- dimm_info[i][j].size = (addr_decode >> (j * 8)) &
- IE31200_MAD_DIMM_SIZE;
- dimm_info[i][j].dual_rank = (addr_decode &
- (IE31200_MAD_DIMM_A_RANK << j)) ? 1 : 0;
- dimm_info[i][j].x16_width = (addr_decode &
- (IE31200_MAD_DIMM_A_WIDTH << j)) ? 1 : 0;
+ populate_dimm_info(&dimm_info[i][j], addr_decode, j,
+ skl);
edac_dbg(0, "size: 0x%x, rank: %d, width: %d\n",
dimm_info[i][j].size,
dimm_info[i][j].dual_rank,
@@ -405,7 +455,7 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
struct dimm_info *dimm;
unsigned long nr_pages;
- nr_pages = IE31200_PAGES(dimm_info[j][i].size);
+ nr_pages = IE31200_PAGES(dimm_info[j][i].size, skl);
if (nr_pages == 0)
continue;
@@ -417,7 +467,10 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
dimm->nr_pages = nr_pages;
edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
dimm->grain = 8; /* just a guess */
- dimm->mtype = MEM_DDR3;
+ if (skl)
+ dimm->mtype = MEM_DDR4;
+ else
+ dimm->mtype = MEM_DDR3;
dimm->dtype = DEV_UNKNOWN;
dimm->edac_mode = EDAC_UNKNOWN;
}
@@ -426,7 +479,10 @@ static int ie31200_probe1(struct pci_dev *pdev, int dev_idx)
dimm->nr_pages = nr_pages;
edac_dbg(0, "set nr pages: 0x%lx\n", nr_pages);
dimm->grain = 8; /* same guess */
- dimm->mtype = MEM_DDR3;
+ if (skl)
+ dimm->mtype = MEM_DDR4;
+ else
+ dimm->mtype = MEM_DDR3;
dimm->dtype = DEV_UNKNOWN;
dimm->edac_mode = EDAC_UNKNOWN;
}
@@ -501,6 +557,9 @@ static const struct pci_device_id ie31200_pci_tbl[] = {
PCI_VEND_DEV(INTEL, IE31200_HB_7), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
IE31200},
{
+ PCI_VEND_DEV(INTEL, IE31200_HB_8), PCI_ANY_ID, PCI_ANY_ID, 0, 0,
+ IE31200},
+ {
0,
} /* 0 terminated list. */
};
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 49768c08ac07..9b6800a79c7f 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -1052,7 +1052,6 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
struct mce *m = (struct mce *)data;
struct cpuinfo_x86 *c = &cpu_data(m->extcpu);
int ecc;
- u32 ebx = cpuid_ebx(0x80000007);
if (amd_filter_mce(m))
return NOTIFY_STOP;
@@ -1075,7 +1074,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
((m->status & MCI_STATUS_POISON) ? "Poison" : "-"));
- if (!!(ebx & BIT(3))) {
+ if (boot_cpu_has(X86_FEATURE_SMCA)) {
u32 low, high;
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
@@ -1094,7 +1093,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
if (m->status & MCI_STATUS_ADDRV)
pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr);
- if (!!(ebx & BIT(3))) {
+ if (boot_cpu_has(X86_FEATURE_SMCA)) {
decode_smca_errors(m);
goto err_code;
}
@@ -1149,7 +1148,6 @@ static struct notifier_block amd_mce_dec_nb = {
static int __init mce_amd_init(void)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
- u32 ebx;
if (c->x86_vendor != X86_VENDOR_AMD)
return -ENODEV;
@@ -1205,9 +1203,8 @@ static int __init mce_amd_init(void)
break;
case 0x17:
- ebx = cpuid_ebx(0x80000007);
xec_mask = 0x3f;
- if (!(ebx & BIT(3))) {
+ if (!boot_cpu_has(X86_FEATURE_SMCA)) {
printk(KERN_WARNING "Decoding supported only on Scalable MCA processors.\n");
goto err_out;
}
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 93f0d4120289..b4d0bf6534cf 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -21,6 +21,8 @@
#include <linux/smp.h>
#include <linux/bitmap.h>
#include <linux/math64.h>
+#include <linux/mod_devicetable.h>
+#include <asm/cpu_device_id.h>
#include <asm/processor.h>
#include <asm/mce.h>
@@ -28,8 +30,6 @@
/* Static vars */
static LIST_HEAD(sbridge_edac_list);
-static DEFINE_MUTEX(sbridge_edac_lock);
-static int probed;
/*
* Alter this version for the module when modifications are made
@@ -362,16 +362,7 @@ struct sbridge_pvt {
/* Memory type detection */
bool is_mirrored, is_lockstep, is_close_pg;
-
- /* Fifo double buffers */
- struct mce mce_entry[MCE_LOG_LEN];
- struct mce mce_outentry[MCE_LOG_LEN];
-
- /* Fifo in/out counters */
- unsigned mce_in, mce_out;
-
- /* Count indicator to show errors not got */
- unsigned mce_overrun;
+ bool is_chan_hash;
/* Memory description */
u64 tolm, tohm;
@@ -661,18 +652,6 @@ static const struct pci_id_table pci_dev_descr_broadwell_table[] = {
{0,} /* 0 terminated list. */
};
-/*
- * pci_device_id table for which devices we are looking for
- */
-static const struct pci_device_id sbridge_pci_tbl[] = {
- {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0)},
- {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA)},
- {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0)},
- {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0)},
- {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0)},
- {0,} /* 0 terminated list. */
-};
-
/****************************************************************************
Ancillary status routines
@@ -1060,6 +1039,20 @@ static inline u8 sad_pkg_ha(u8 pkg)
return (pkg >> 2) & 0x1;
}
+static int haswell_chan_hash(int idx, u64 addr)
+{
+ int i;
+
+ /*
+ * XOR even bits from 12:26 to bit0 of idx,
+ * odd bits from 13:27 to bit1
+ */
+ for (i = 12; i < 28; i += 2)
+ idx ^= (addr >> i) & 3;
+
+ return idx;
+}
+
/****************************************************************************
Memory check routines
****************************************************************************/
@@ -1616,6 +1609,10 @@ static int get_dimm_config(struct mem_ctl_info *mci)
KNL_MAX_CHANNELS : NUM_CHANNELS;
u64 knl_mc_sizes[KNL_MAX_CHANNELS];
+ if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL) {
+ pci_read_config_dword(pvt->pci_ha0, HASWELL_HASYSDEFEATURE2, &reg);
+ pvt->is_chan_hash = GET_BITFIELD(reg, 21, 21);
+ }
if (pvt->info.type == HASWELL || pvt->info.type == BROADWELL ||
pvt->info.type == KNIGHTS_LANDING)
pci_read_config_dword(pvt->pci_sad1, SAD_TARGET, &reg);
@@ -2118,12 +2115,15 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
}
ch_way = TAD_CH(reg) + 1;
- sck_way = 1 << TAD_SOCK(reg);
+ sck_way = TAD_SOCK(reg);
if (ch_way == 3)
idx = addr >> 6;
- else
+ else {
idx = (addr >> (6 + sck_way + shiftup)) & 0x3;
+ if (pvt->is_chan_hash)
+ idx = haswell_chan_hash(idx, addr);
+ }
idx = idx % ch_way;
/*
@@ -2157,7 +2157,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
switch(ch_way) {
case 2:
case 4:
- sck_xch = 1 << sck_way * (ch_way >> 1);
+ sck_xch = (1 << sck_way) * (ch_way >> 1);
break;
default:
sprintf(msg, "Invalid mirror set. Can't decode addr");
@@ -2193,7 +2193,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
ch_addr = addr - offset;
ch_addr >>= (6 + shiftup);
- ch_addr /= ch_way * sck_way;
+ ch_addr /= sck_xch;
ch_addr <<= (6 + shiftup);
ch_addr |= addr & ((1 << (6 + shiftup)) - 1);
@@ -3075,63 +3075,8 @@ err_parsing:
}
/*
- * sbridge_check_error Retrieve and process errors reported by the
- * hardware. Called by the Core module.
- */
-static void sbridge_check_error(struct mem_ctl_info *mci)
-{
- struct sbridge_pvt *pvt = mci->pvt_info;
- int i;
- unsigned count = 0;
- struct mce *m;
-
- /*
- * MCE first step: Copy all mce errors into a temporary buffer
- * We use a double buffering here, to reduce the risk of
- * loosing an error.
- */
- smp_rmb();
- count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
- % MCE_LOG_LEN;
- if (!count)
- return;
-
- m = pvt->mce_outentry;
- if (pvt->mce_in + count > MCE_LOG_LEN) {
- unsigned l = MCE_LOG_LEN - pvt->mce_in;
-
- memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
- smp_wmb();
- pvt->mce_in = 0;
- count -= l;
- m += l;
- }
- memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
- smp_wmb();
- pvt->mce_in += count;
-
- smp_rmb();
- if (pvt->mce_overrun) {
- sbridge_printk(KERN_ERR, "Lost %d memory errors\n",
- pvt->mce_overrun);
- smp_wmb();
- pvt->mce_overrun = 0;
- }
-
- /*
- * MCE second step: parse errors and display
- */
- for (i = 0; i < count; i++)
- sbridge_mce_output_error(mci, &pvt->mce_outentry[i]);
-}
-
-/*
- * sbridge_mce_check_error Replicates mcelog routine to get errors
- * This routine simply queues mcelog errors, and
- * return. The error itself should be handled later
- * by sbridge_check_error.
- * WARNING: As this routine should be called at NMI time, extra care should
- * be taken to avoid deadlocks, and to be as fast as possible.
+ * Check that logging is enabled and that this is the right type
+ * of error for us to handle.
*/
static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
void *data)
@@ -3146,7 +3091,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
mci = get_mci_for_node_id(mce->socketid);
if (!mci)
- return NOTIFY_BAD;
+ return NOTIFY_DONE;
pvt = mci->pvt_info;
/*
@@ -3176,21 +3121,7 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
"%u APIC %x\n", mce->cpuvendor, mce->cpuid,
mce->time, mce->socketid, mce->apicid);
- smp_rmb();
- if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
- smp_wmb();
- pvt->mce_overrun++;
- return NOTIFY_DONE;
- }
-
- /* Copy memory error at the ringbuffer */
- memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
- smp_wmb();
- pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
-
- /* Handle fatal errors immediately */
- if (mce->mcgstatus & 1)
- sbridge_check_error(mci);
+ sbridge_mce_output_error(mci, mce);
/* Advice mcelog that the error were handled */
return NOTIFY_STOP;
@@ -3276,9 +3207,6 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev, enum type type)
mci->dev_name = pci_name(pdev);
mci->ctl_page_to_phys = NULL;
- /* Set the function pointer to an actual operation function */
- mci->edac_check = sbridge_check_error;
-
pvt->info.type = type;
switch (type) {
case IVY_BRIDGE:
@@ -3426,62 +3354,40 @@ fail0:
return rc;
}
+#define ICPU(model, table) \
+ { X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table }
+
+/* Order here must match "enum type" */
+static const struct x86_cpu_id sbridge_cpuids[] = {
+ ICPU(0x2d, pci_dev_descr_sbridge_table), /* SANDY_BRIDGE */
+ ICPU(0x3e, pci_dev_descr_ibridge_table), /* IVY_BRIDGE */
+ ICPU(0x3f, pci_dev_descr_haswell_table), /* HASWELL */
+ ICPU(0x4f, pci_dev_descr_broadwell_table), /* BROADWELL */
+ ICPU(0x57, pci_dev_descr_knl_table), /* KNIGHTS_LANDING */
+ { }
+};
+MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids);
+
/*
- * sbridge_probe Probe for ONE instance of device to see if it is
+ * sbridge_probe Get all devices and register memory controllers
* present.
* return:
* 0 for FOUND a device
* < 0 for error code
*/
-static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static int sbridge_probe(const struct x86_cpu_id *id)
{
int rc = -ENODEV;
u8 mc, num_mc = 0;
struct sbridge_dev *sbridge_dev;
- enum type type = SANDY_BRIDGE;
+ struct pci_id_table *ptable = (struct pci_id_table *)id->driver_data;
/* get the pci devices we want to reserve for our use */
- mutex_lock(&sbridge_edac_lock);
-
- /*
- * All memory controllers are allocated at the first pass.
- */
- if (unlikely(probed >= 1)) {
- mutex_unlock(&sbridge_edac_lock);
- return -ENODEV;
- }
- probed++;
+ rc = sbridge_get_all_devices(&num_mc, ptable);
- switch (pdev->device) {
- case PCI_DEVICE_ID_INTEL_IBRIDGE_IMC_HA0_TA:
- rc = sbridge_get_all_devices(&num_mc,
- pci_dev_descr_ibridge_table);
- type = IVY_BRIDGE;
- break;
- case PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0:
- rc = sbridge_get_all_devices(&num_mc,
- pci_dev_descr_sbridge_table);
- type = SANDY_BRIDGE;
- break;
- case PCI_DEVICE_ID_INTEL_HASWELL_IMC_HA0:
- rc = sbridge_get_all_devices(&num_mc,
- pci_dev_descr_haswell_table);
- type = HASWELL;
- break;
- case PCI_DEVICE_ID_INTEL_BROADWELL_IMC_HA0:
- rc = sbridge_get_all_devices(&num_mc,
- pci_dev_descr_broadwell_table);
- type = BROADWELL;
- break;
- case PCI_DEVICE_ID_INTEL_KNL_IMC_SAD0:
- rc = sbridge_get_all_devices_knl(&num_mc,
- pci_dev_descr_knl_table);
- type = KNIGHTS_LANDING;
- break;
- }
if (unlikely(rc < 0)) {
- edac_dbg(0, "couldn't get all devices for 0x%x\n", pdev->device);
+ edac_dbg(0, "couldn't get all devices\n");
goto fail0;
}
@@ -3492,14 +3398,13 @@ static int sbridge_probe(struct pci_dev *pdev, const struct pci_device_id *id)
mc, mc + 1, num_mc);
sbridge_dev->mc = mc++;
- rc = sbridge_register_mci(sbridge_dev, type);
+ rc = sbridge_register_mci(sbridge_dev, id - sbridge_cpuids);
if (unlikely(rc < 0))
goto fail1;
}
sbridge_printk(KERN_INFO, "%s\n", SBRIDGE_REVISION);
- mutex_unlock(&sbridge_edac_lock);
return 0;
fail1:
@@ -3508,74 +3413,47 @@ fail1:
sbridge_put_all_devices();
fail0:
- mutex_unlock(&sbridge_edac_lock);
return rc;
}
/*
- * sbridge_remove destructor for one instance of device
+ * sbridge_remove cleanup
*
*/
-static void sbridge_remove(struct pci_dev *pdev)
+static void sbridge_remove(void)
{
struct sbridge_dev *sbridge_dev;
edac_dbg(0, "\n");
- /*
- * we have a trouble here: pdev value for removal will be wrong, since
- * it will point to the X58 register used to detect that the machine
- * is a Nehalem or upper design. However, due to the way several PCI
- * devices are grouped together to provide MC functionality, we need
- * to use a different method for releasing the devices
- */
-
- mutex_lock(&sbridge_edac_lock);
-
- if (unlikely(!probed)) {
- mutex_unlock(&sbridge_edac_lock);
- return;
- }
-
list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
sbridge_unregister_mci(sbridge_dev);
/* Release PCI resources */
sbridge_put_all_devices();
-
- probed--;
-
- mutex_unlock(&sbridge_edac_lock);
}
-MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl);
-
-/*
- * sbridge_driver pci_driver structure for this module
- *
- */
-static struct pci_driver sbridge_driver = {
- .name = "sbridge_edac",
- .probe = sbridge_probe,
- .remove = sbridge_remove,
- .id_table = sbridge_pci_tbl,
-};
-
/*
* sbridge_init Module entry function
* Try to initialize this module for its devices
*/
static int __init sbridge_init(void)
{
- int pci_rc;
+ const struct x86_cpu_id *id;
+ int rc;
edac_dbg(2, "\n");
+ id = x86_match_cpu(sbridge_cpuids);
+ if (!id)
+ return -ENODEV;
+
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
- pci_rc = pci_register_driver(&sbridge_driver);
- if (pci_rc >= 0) {
+ rc = sbridge_probe(id);
+
+ if (rc >= 0) {
mce_register_decode_chain(&sbridge_mce_dec);
if (get_edac_report_status() == EDAC_REPORTING_DISABLED)
sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n");
@@ -3583,9 +3461,9 @@ static int __init sbridge_init(void)
}
sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n",
- pci_rc);
+ rc);
- return pci_rc;
+ return rc;
}
/*
@@ -3595,7 +3473,7 @@ static int __init sbridge_init(void)
static void __exit sbridge_exit(void)
{
edac_dbg(2, "\n");
- pci_unregister_driver(&sbridge_driver);
+ sbridge_remove();
mce_unregister_decode_chain(&sbridge_mce_dec);
}