diff options
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/Kconfig | 50 | ||||
-rw-r--r-- | drivers/edac/Makefile | 9 | ||||
-rw-r--r-- | drivers/edac/altera_edac.c | 346 | ||||
-rw-r--r-- | drivers/edac/altera_edac.h | 6 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.c | 24 | ||||
-rw-r--r-- | drivers/edac/fsl_ddr_edac.c | 633 | ||||
-rw-r--r-- | drivers/edac/fsl_ddr_edac.h | 79 | ||||
-rw-r--r-- | drivers/edac/layerscape_edac.c | 73 | ||||
-rw-r--r-- | drivers/edac/mce_amd.c | 244 | ||||
-rw-r--r-- | drivers/edac/mpc85xx_edac.c | 685 | ||||
-rw-r--r-- | drivers/edac/mpc85xx_edac.h | 66 | ||||
-rw-r--r-- | drivers/edac/mv64x60_edac.c | 4 | ||||
-rw-r--r-- | drivers/edac/ppc4xx_edac.c | 4 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 20 | ||||
-rw-r--r-- | drivers/edac/skx_edac.c | 1121 | ||||
-rw-r--r-- | drivers/edac/wq.c | 2 |
16 files changed, 2441 insertions, 925 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index d0c1dab9b435..82d85cce81f8 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -251,6 +251,14 @@ config EDAC_SBRIDGE Support for error detection and correction the Intel Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers. +config EDAC_SKX + tristate "Intel Skylake server Integrated MC" + depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL + depends on PCI_MMCONFIG + help + Support for error detection and correction the Intel + Skylake server Integrated Memory Controllers. + config EDAC_MPC85XX tristate "Freescale MPC83xx / MPC85xx" depends on EDAC_MM_EDAC && FSL_SOC @@ -258,6 +266,13 @@ config EDAC_MPC85XX Support for error detection and correction on the Freescale MPC8349, MPC8560, MPC8540, MPC8548, T4240 +config EDAC_LAYERSCAPE + tristate "Freescale Layerscape DDR" + depends on EDAC_MM_EDAC && ARCH_LAYERSCAPE + help + Support for error detection and correction on Freescale memory + controllers on Layerscape SoCs. + config EDAC_MV64X60 tristate "Marvell MV64x60" depends on EDAC_MM_EDAC && MV64X60 @@ -398,6 +413,41 @@ config EDAC_ALTERA_ETHERNET Support for error detection and correction on the Altera Ethernet FIFO Memory for Altera SoCs. +config EDAC_ALTERA_NAND + bool "Altera NAND FIFO ECC" + depends on EDAC_ALTERA=y && MTD_NAND_DENALI + help + Support for error detection and correction on the + Altera NAND FIFO Memory for Altera SoCs. + +config EDAC_ALTERA_DMA + bool "Altera DMA FIFO ECC" + depends on EDAC_ALTERA=y && PL330_DMA=y + help + Support for error detection and correction on the + Altera DMA FIFO Memory for Altera SoCs. + +config EDAC_ALTERA_USB + bool "Altera USB FIFO ECC" + depends on EDAC_ALTERA=y && USB_DWC2 + help + Support for error detection and correction on the + Altera USB FIFO Memory for Altera SoCs. + +config EDAC_ALTERA_QSPI + bool "Altera QSPI FIFO ECC" + depends on EDAC_ALTERA=y && SPI_CADENCE_QUADSPI + help + Support for error detection and correction on the + Altera QSPI FIFO Memory for Altera SoCs. + +config EDAC_ALTERA_SDMMC + bool "Altera SDMMC FIFO ECC" + depends on EDAC_ALTERA=y && MMC_DW + help + Support for error detection and correction on the + Altera SDMMC FIFO Memory for Altera SoCs. + config EDAC_SYNOPSYS tristate "Synopsys DDR Memory Controller" depends on EDAC_MM_EDAC && ARCH_ZYNQ diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index f9e4a3e0e6e9..88e472e8b9a9 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_EDAC_I5400) += i5400_edac.o obj-$(CONFIG_EDAC_I7300) += i7300_edac.o obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o +obj-$(CONFIG_EDAC_SKX) += skx_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o @@ -50,7 +51,13 @@ amd64_edac_mod-$(CONFIG_EDAC_AMD64_ERROR_INJECTION) += amd64_edac_inj.o obj-$(CONFIG_EDAC_AMD64) += amd64_edac_mod.o obj-$(CONFIG_EDAC_PASEMI) += pasemi_edac.o -obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac.o + +mpc85xx_edac_mod-y := fsl_ddr_edac.o mpc85xx_edac.o +obj-$(CONFIG_EDAC_MPC85XX) += mpc85xx_edac_mod.o + +layerscape_edac_mod-y := fsl_ddr_edac.o layerscape_edac.o +obj-$(CONFIG_EDAC_LAYERSCAPE) += layerscape_edac_mod.o + obj-$(CONFIG_EDAC_MV64X60) += mv64x60_edac.o obj-$(CONFIG_EDAC_CELL) += cell_edac.o obj-$(CONFIG_EDAC_PPC4XX) += ppc4xx_edac.o diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 2398d0701f5b..58d3e2b39b5b 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -203,7 +203,7 @@ static void altr_sdr_mc_create_debugfs_nodes(struct mem_ctl_info *mci) if (!mci->debugfs) return; - edac_debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci, + edac_debugfs_create_file("altr_trigger", S_IWUSR, mci->debugfs, mci, &altr_sdr_mc_debug_inject_fops); } @@ -680,7 +680,7 @@ static void altr_create_edacdev_dbgfs(struct edac_device_ctl_info *edac_dci, if (!drvdata->debugfs_dir) return; - if (!edac_debugfs_create_file(priv->dbgfs_name, S_IWUSR, + if (!edac_debugfs_create_file("altr_trigger", S_IWUSR, drvdata->debugfs_dir, edac_dci, priv->inject_fops)) debugfs_remove_recursive(drvdata->debugfs_dir); @@ -1108,7 +1108,6 @@ static const struct edac_device_prv_data ocramecc_data = { .setup = altr_check_ecc_deps, .ce_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_SERR), .ue_clear_mask = (ALTR_OCR_ECC_EN | ALTR_OCR_ECC_DERR), - .dbgfs_name = "altr_ocram_trigger", .alloc_mem = ocram_alloc_mem, .free_mem = ocram_free_mem, .ecc_enable_mask = ALTR_OCR_ECC_EN, @@ -1125,7 +1124,6 @@ static const struct edac_device_prv_data a10_ocramecc_data = { .ce_clear_mask = ALTR_A10_ECC_SERRPENA, .ue_clear_mask = ALTR_A10_ECC_DERRPENA, .irq_status_mask = A10_SYSMGR_ECC_INTSTAT_OCRAM, - .dbgfs_name = "altr_ocram_trigger", .ecc_enable_mask = ALTR_A10_OCRAM_ECC_EN_CTL, .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, .ce_set_mask = ALTR_A10_ECC_TSERRA, @@ -1228,7 +1226,6 @@ static const struct edac_device_prv_data l2ecc_data = { .setup = altr_l2_check_deps, .ce_clear_mask = 0, .ue_clear_mask = 0, - .dbgfs_name = "altr_l2_trigger", .alloc_mem = l2_alloc_mem, .free_mem = l2_free_mem, .ecc_enable_mask = ALTR_L2_ECC_EN, @@ -1244,7 +1241,6 @@ static const struct edac_device_prv_data a10_l2ecc_data = { .ce_clear_mask = ALTR_A10_L2_ECC_SERR_CLR, .ue_clear_mask = ALTR_A10_L2_ECC_MERR_CLR, .irq_status_mask = A10_SYSMGR_ECC_INTSTAT_L2, - .dbgfs_name = "altr_l2_trigger", .alloc_mem = l2_alloc_mem, .free_mem = l2_free_mem, .ecc_enable_mask = ALTR_A10_L2_ECC_EN_CTL, @@ -1266,7 +1262,6 @@ static const struct edac_device_prv_data a10_enetecc_data = { .setup = altr_check_ecc_deps, .ce_clear_mask = ALTR_A10_ECC_SERRPENA, .ue_clear_mask = ALTR_A10_ECC_DERRPENA, - .dbgfs_name = "altr_trigger", .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, .ce_set_mask = ALTR_A10_ECC_TSERRA, @@ -1285,6 +1280,292 @@ early_initcall(socfpga_init_ethernet_ecc); #endif /* CONFIG_EDAC_ALTERA_ETHERNET */ +/********************** NAND Device Functions **********************/ + +#ifdef CONFIG_EDAC_ALTERA_NAND + +static const struct edac_device_prv_data a10_nandecc_data = { + .setup = altr_check_ecc_deps, + .ce_clear_mask = ALTR_A10_ECC_SERRPENA, + .ue_clear_mask = ALTR_A10_ECC_DERRPENA, + .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, + .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, + .ce_set_mask = ALTR_A10_ECC_TSERRA, + .ue_set_mask = ALTR_A10_ECC_TDERRA, + .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; + +static int __init socfpga_init_nand_ecc(void) +{ + return altr_init_a10_ecc_device_type("altr,socfpga-nand-ecc"); +} + +early_initcall(socfpga_init_nand_ecc); + +#endif /* CONFIG_EDAC_ALTERA_NAND */ + +/********************** DMA Device Functions **********************/ + +#ifdef CONFIG_EDAC_ALTERA_DMA + +static const struct edac_device_prv_data a10_dmaecc_data = { + .setup = altr_check_ecc_deps, + .ce_clear_mask = ALTR_A10_ECC_SERRPENA, + .ue_clear_mask = ALTR_A10_ECC_DERRPENA, + .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, + .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, + .ce_set_mask = ALTR_A10_ECC_TSERRA, + .ue_set_mask = ALTR_A10_ECC_TDERRA, + .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; + +static int __init socfpga_init_dma_ecc(void) +{ + return altr_init_a10_ecc_device_type("altr,socfpga-dma-ecc"); +} + +early_initcall(socfpga_init_dma_ecc); + +#endif /* CONFIG_EDAC_ALTERA_DMA */ + +/********************** USB Device Functions **********************/ + +#ifdef CONFIG_EDAC_ALTERA_USB + +static const struct edac_device_prv_data a10_usbecc_data = { + .setup = altr_check_ecc_deps, + .ce_clear_mask = ALTR_A10_ECC_SERRPENA, + .ue_clear_mask = ALTR_A10_ECC_DERRPENA, + .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, + .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, + .ce_set_mask = ALTR_A10_ECC_TSERRA, + .ue_set_mask = ALTR_A10_ECC_TDERRA, + .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; + +static int __init socfpga_init_usb_ecc(void) +{ + return altr_init_a10_ecc_device_type("altr,socfpga-usb-ecc"); +} + +early_initcall(socfpga_init_usb_ecc); + +#endif /* CONFIG_EDAC_ALTERA_USB */ + +/********************** QSPI Device Functions **********************/ + +#ifdef CONFIG_EDAC_ALTERA_QSPI + +static const struct edac_device_prv_data a10_qspiecc_data = { + .setup = altr_check_ecc_deps, + .ce_clear_mask = ALTR_A10_ECC_SERRPENA, + .ue_clear_mask = ALTR_A10_ECC_DERRPENA, + .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, + .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, + .ce_set_mask = ALTR_A10_ECC_TSERRA, + .ue_set_mask = ALTR_A10_ECC_TDERRA, + .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; + +static int __init socfpga_init_qspi_ecc(void) +{ + return altr_init_a10_ecc_device_type("altr,socfpga-qspi-ecc"); +} + +early_initcall(socfpga_init_qspi_ecc); + +#endif /* CONFIG_EDAC_ALTERA_QSPI */ + +/********************* SDMMC Device Functions **********************/ + +#ifdef CONFIG_EDAC_ALTERA_SDMMC + +static const struct edac_device_prv_data a10_sdmmceccb_data; +static int altr_portb_setup(struct altr_edac_device_dev *device) +{ + struct edac_device_ctl_info *dci; + struct altr_edac_device_dev *altdev; + char *ecc_name = "sdmmcb-ecc"; + int edac_idx, rc; + struct device_node *np; + const struct edac_device_prv_data *prv = &a10_sdmmceccb_data; + + rc = altr_check_ecc_deps(device); + if (rc) + return rc; + + np = of_find_compatible_node(NULL, NULL, "altr,socfpga-sdmmc-ecc"); + if (!np) { + edac_printk(KERN_WARNING, EDAC_DEVICE, "SDMMC node not found\n"); + return -ENODEV; + } + + /* Create the PortB EDAC device */ + edac_idx = edac_device_alloc_index(); + dci = edac_device_alloc_ctl_info(sizeof(*altdev), ecc_name, 1, + ecc_name, 1, 0, NULL, 0, edac_idx); + if (!dci) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s: Unable to allocate PortB EDAC device\n", + ecc_name); + return -ENOMEM; + } + + /* Initialize the PortB EDAC device structure from PortA structure */ + altdev = dci->pvt_info; + *altdev = *device; + + if (!devres_open_group(&altdev->ddev, altr_portb_setup, GFP_KERNEL)) + return -ENOMEM; + + /* Update PortB specific values */ + altdev->edac_dev_name = ecc_name; + altdev->edac_idx = edac_idx; + altdev->edac_dev = dci; + altdev->data = prv; + dci->dev = &altdev->ddev; + dci->ctl_name = "Altera ECC Manager"; + dci->mod_name = ecc_name; + dci->dev_name = ecc_name; + + /* Update the IRQs for PortB */ + altdev->sb_irq = irq_of_parse_and_map(np, 2); + if (!altdev->sb_irq) { + edac_printk(KERN_ERR, EDAC_DEVICE, "Error PortB SBIRQ alloc\n"); + rc = -ENODEV; + goto err_release_group_1; + } + rc = devm_request_irq(&altdev->ddev, altdev->sb_irq, + prv->ecc_irq_handler, + IRQF_ONESHOT | IRQF_TRIGGER_HIGH, + ecc_name, altdev); + if (rc) { + edac_printk(KERN_ERR, EDAC_DEVICE, "PortB SBERR IRQ error\n"); + goto err_release_group_1; + } + + altdev->db_irq = irq_of_parse_and_map(np, 3); + if (!altdev->db_irq) { + edac_printk(KERN_ERR, EDAC_DEVICE, "Error PortB DBIRQ alloc\n"); + rc = -ENODEV; + goto err_release_group_1; + } + rc = devm_request_irq(&altdev->ddev, altdev->db_irq, + prv->ecc_irq_handler, + IRQF_ONESHOT | IRQF_TRIGGER_HIGH, + ecc_name, altdev); + if (rc) { + edac_printk(KERN_ERR, EDAC_DEVICE, "PortB DBERR IRQ error\n"); + goto err_release_group_1; + } + + rc = edac_device_add_device(dci); + if (rc) { + edac_printk(KERN_ERR, EDAC_DEVICE, + "edac_device_add_device portB failed\n"); + rc = -ENOMEM; + goto err_release_group_1; + } + altr_create_edacdev_dbgfs(dci, prv); + + list_add(&altdev->next, &altdev->edac->a10_ecc_devices); + + devres_remove_group(&altdev->ddev, altr_portb_setup); + + return 0; + +err_release_group_1: + edac_device_free_ctl_info(dci); + devres_release_group(&altdev->ddev, altr_portb_setup); + edac_printk(KERN_ERR, EDAC_DEVICE, + "%s:Error setting up EDAC device: %d\n", ecc_name, rc); + return rc; +} + +static irqreturn_t altr_edac_a10_ecc_irq_portb(int irq, void *dev_id) +{ + struct altr_edac_device_dev *ad = dev_id; + void __iomem *base = ad->base; + const struct edac_device_prv_data *priv = ad->data; + + if (irq == ad->sb_irq) { + writel(priv->ce_clear_mask, + base + ALTR_A10_ECC_INTSTAT_OFST); + edac_device_handle_ce(ad->edac_dev, 0, 0, ad->edac_dev_name); + return IRQ_HANDLED; + } else if (irq == ad->db_irq) { + writel(priv->ue_clear_mask, + base + ALTR_A10_ECC_INTSTAT_OFST); + edac_device_handle_ue(ad->edac_dev, 0, 0, ad->edac_dev_name); + return IRQ_HANDLED; + } + + WARN_ONCE(1, "Unhandled IRQ%d on Port B.", irq); + + return IRQ_NONE; +} + +static const struct edac_device_prv_data a10_sdmmcecca_data = { + .setup = altr_portb_setup, + .ce_clear_mask = ALTR_A10_ECC_SERRPENA, + .ue_clear_mask = ALTR_A10_ECC_DERRPENA, + .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, + .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, + .ce_set_mask = ALTR_A10_ECC_SERRPENA, + .ue_set_mask = ALTR_A10_ECC_DERRPENA, + .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; + +static const struct edac_device_prv_data a10_sdmmceccb_data = { + .setup = altr_portb_setup, + .ce_clear_mask = ALTR_A10_ECC_SERRPENB, + .ue_clear_mask = ALTR_A10_ECC_DERRPENB, + .ecc_enable_mask = ALTR_A10_COMMON_ECC_EN_CTL, + .ecc_en_ofst = ALTR_A10_ECC_CTRL_OFST, + .ce_set_mask = ALTR_A10_ECC_TSERRB, + .ue_set_mask = ALTR_A10_ECC_TDERRB, + .set_err_ofst = ALTR_A10_ECC_INTTEST_OFST, + .ecc_irq_handler = altr_edac_a10_ecc_irq_portb, + .inject_fops = &altr_edac_a10_device_inject_fops, +}; + +static int __init socfpga_init_sdmmc_ecc(void) +{ + int rc = -ENODEV; + struct device_node *child = of_find_compatible_node(NULL, NULL, + "altr,socfpga-sdmmc-ecc"); + if (!child) { + edac_printk(KERN_WARNING, EDAC_DEVICE, "SDMMC node not found\n"); + return -ENODEV; + } + + if (!of_device_is_available(child)) + goto exit; + + if (validate_parent_available(child)) + goto exit; + + rc = altr_init_a10_ecc_block(child, ALTR_A10_SDMMC_IRQ_MASK, + a10_sdmmcecca_data.ecc_enable_mask, 1); +exit: + of_node_put(child); + return rc; +} + +early_initcall(socfpga_init_sdmmc_ecc); + +#endif /* CONFIG_EDAC_ALTERA_SDMMC */ + /********************* Arria10 EDAC Device Functions *************************/ static const struct of_device_id altr_edac_a10_device_of_match[] = { #ifdef CONFIG_EDAC_ALTERA_L2C @@ -1298,6 +1579,21 @@ static const struct of_device_id altr_edac_a10_device_of_match[] = { { .compatible = "altr,socfpga-eth-mac-ecc", .data = &a10_enetecc_data }, #endif +#ifdef CONFIG_EDAC_ALTERA_NAND + { .compatible = "altr,socfpga-nand-ecc", .data = &a10_nandecc_data }, +#endif +#ifdef CONFIG_EDAC_ALTERA_DMA + { .compatible = "altr,socfpga-dma-ecc", .data = &a10_dmaecc_data }, +#endif +#ifdef CONFIG_EDAC_ALTERA_USB + { .compatible = "altr,socfpga-usb-ecc", .data = &a10_usbecc_data }, +#endif +#ifdef CONFIG_EDAC_ALTERA_QSPI + { .compatible = "altr,socfpga-qspi-ecc", .data = &a10_qspiecc_data }, +#endif +#ifdef CONFIG_EDAC_ALTERA_SDMMC + { .compatible = "altr,socfpga-sdmmc-ecc", .data = &a10_sdmmcecca_data }, +#endif {}, }; MODULE_DEVICE_TABLE(of, altr_edac_a10_device_of_match); @@ -1451,11 +1747,11 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac, rc = -ENODEV; goto err_release_group1; } - rc = devm_request_irq(edac->dev, altdev->sb_irq, - prv->ecc_irq_handler, - IRQF_SHARED, ecc_name, altdev); + rc = devm_request_irq(edac->dev, altdev->sb_irq, prv->ecc_irq_handler, + IRQF_ONESHOT | IRQF_TRIGGER_HIGH, + ecc_name, altdev); if (rc) { - edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n"); + edac_printk(KERN_ERR, EDAC_DEVICE, "No SBERR IRQ resource\n"); goto err_release_group1; } @@ -1465,9 +1761,9 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac, rc = -ENODEV; goto err_release_group1; } - rc = devm_request_irq(edac->dev, altdev->db_irq, - prv->ecc_irq_handler, - IRQF_SHARED, ecc_name, altdev); + rc = devm_request_irq(edac->dev, altdev->db_irq, prv->ecc_irq_handler, + IRQF_ONESHOT | IRQF_TRIGGER_HIGH, + ecc_name, altdev); if (rc) { edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n"); goto err_release_group1; @@ -1526,7 +1822,7 @@ static int a10_eccmgr_irqdomain_map(struct irq_domain *d, unsigned int irq, return 0; } -struct irq_domain_ops a10_eccmgr_ic_ops = { +static struct irq_domain_ops a10_eccmgr_ic_ops = { .map = a10_eccmgr_irqdomain_map, .xlate = irq_domain_xlate_twocell, }; @@ -1584,15 +1880,19 @@ static int altr_edac_a10_probe(struct platform_device *pdev) for_each_child_of_node(pdev->dev.of_node, child) { if (!of_device_is_available(child)) continue; - if (of_device_is_compatible(child, "altr,socfpga-a10-l2-ecc")) - altr_edac_a10_device_add(edac, child); - else if ((of_device_is_compatible(child, - "altr,socfpga-a10-ocram-ecc")) || - (of_device_is_compatible(child, - "altr,socfpga-eth-mac-ecc"))) + + if (of_device_is_compatible(child, "altr,socfpga-a10-l2-ecc") || + of_device_is_compatible(child, "altr,socfpga-a10-ocram-ecc") || + of_device_is_compatible(child, "altr,socfpga-eth-mac-ecc") || + of_device_is_compatible(child, "altr,socfpga-nand-ecc") || + of_device_is_compatible(child, "altr,socfpga-dma-ecc") || + of_device_is_compatible(child, "altr,socfpga-usb-ecc") || + of_device_is_compatible(child, "altr,socfpga-qspi-ecc") || + of_device_is_compatible(child, "altr,socfpga-sdmmc-ecc")) + altr_edac_a10_device_add(edac, child); - else if (of_device_is_compatible(child, - "altr,sdram-edac-a10")) + + else if (of_device_is_compatible(child, "altr,sdram-edac-a10")) of_platform_populate(pdev->dev.of_node, altr_sdram_ctrl_of_match, NULL, &pdev->dev); diff --git a/drivers/edac/altera_edac.h b/drivers/edac/altera_edac.h index 687d8e754d36..cbc96290f743 100644 --- a/drivers/edac/altera_edac.h +++ b/drivers/edac/altera_edac.h @@ -250,6 +250,8 @@ struct altr_sdram_mc_data { #define ALTR_A10_ECC_INTTEST_OFST 0x24 #define ALTR_A10_ECC_TSERRA BIT(0) #define ALTR_A10_ECC_TDERRA BIT(8) +#define ALTR_A10_ECC_TSERRB BIT(16) +#define ALTR_A10_ECC_TDERRB BIT(24) /* ECC Manager Defines */ #define A10_SYSMGR_ECC_INTMASK_SET_OFST 0x94 @@ -288,6 +290,9 @@ struct altr_sdram_mc_data { /* Arria 10 Ethernet ECC Management Group Defines */ #define ALTR_A10_COMMON_ECC_EN_CTL BIT(0) +/* Arria 10 SDMMC ECC Management Group Defines */ +#define ALTR_A10_SDMMC_IRQ_MASK (BIT(16) | BIT(15)) + /* A10 ECC Controller memory initialization timeout */ #define ALTR_A10_ECC_INIT_WATCHDOG_10US 10000 @@ -298,7 +303,6 @@ struct edac_device_prv_data { int ce_clear_mask; int ue_clear_mask; int irq_status_mask; - char dbgfs_name[20]; void * (*alloc_mem)(size_t size, void **other); void (*free_mem)(void *p, size_t size, void *other); int ecc_enable_mask; diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 8c0ec2128907..ee181c53626f 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1425,11 +1425,17 @@ static u8 f1x_determine_channel(struct amd64_pvt *pvt, u64 sys_addr, if (intlv_addr & 0x2) { u8 shift = intlv_addr & 0x1 ? 9 : 6; - u32 temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) % 2; + u32 temp = hweight_long((u32) ((sys_addr >> 16) & 0x1F)) & 1; return ((sys_addr >> shift) & 1) ^ temp; } + if (intlv_addr & 0x4) { + u8 shift = intlv_addr & 0x1 ? 9 : 8; + + return (sys_addr >> shift) & 1; + } + return (sys_addr >> (12 + hweight8(intlv_en))) & 1; } @@ -1726,8 +1732,11 @@ static int f15_m30h_match_to_this_node(struct amd64_pvt *pvt, unsigned range, if (!(num_dcts_intlv % 2 == 0) || (num_dcts_intlv > 4)) return -EINVAL; - channel = f15_m30h_determine_channel(pvt, sys_addr, intlv_en, - num_dcts_intlv, dct_sel); + if (pvt->model >= 0x60) + channel = f1x_determine_channel(pvt, sys_addr, false, intlv_en); + else + channel = f15_m30h_determine_channel(pvt, sys_addr, intlv_en, + num_dcts_intlv, dct_sel); /* Verify we stay within the MAX number of channels allowed */ if (channel > 3) @@ -2961,6 +2970,15 @@ static void setup_pci_device(void) } } +static const struct x86_cpu_id amd64_cpuids[] = { + { X86_VENDOR_AMD, 0xF, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, + { X86_VENDOR_AMD, 0x10, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, + { X86_VENDOR_AMD, 0x15, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, + { X86_VENDOR_AMD, 0x16, X86_MODEL_ANY, X86_FEATURE_ANY, 0 }, + { } +}; +MODULE_DEVICE_TABLE(x86cpu, amd64_cpuids); + static int __init amd64_edac_init(void) { int err = -ENODEV; diff --git a/drivers/edac/fsl_ddr_edac.c b/drivers/edac/fsl_ddr_edac.c new file mode 100644 index 000000000000..9774f52f0c3e --- /dev/null +++ b/drivers/edac/fsl_ddr_edac.c @@ -0,0 +1,633 @@ +/* + * Freescale Memory Controller kernel module + * + * Support Power-based SoCs including MPC85xx, MPC86xx, MPC83xx and + * ARM-based Layerscape SoCs including LS2xxx. Originally split + * out from mpc85xx_edac EDAC driver. + * + * Parts Copyrighted (c) 2013 by Freescale Semiconductor, Inc. + * + * Author: Dave Jiang <djiang@mvista.com> + * + * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ +#include <linux/module.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/ctype.h> +#include <linux/io.h> +#include <linux/mod_devicetable.h> +#include <linux/edac.h> +#include <linux/smp.h> +#include <linux/gfp.h> + +#include <linux/of_platform.h> +#include <linux/of_device.h> +#include <linux/of_address.h> +#include "edac_module.h" +#include "edac_core.h" +#include "fsl_ddr_edac.h" + +#define EDAC_MOD_STR "fsl_ddr_edac" + +static int edac_mc_idx; + +static u32 orig_ddr_err_disable; +static u32 orig_ddr_err_sbe; +static bool little_endian; + +static inline u32 ddr_in32(void __iomem *addr) +{ + return little_endian ? ioread32(addr) : ioread32be(addr); +} + +static inline void ddr_out32(void __iomem *addr, u32 value) +{ + if (little_endian) + iowrite32(value, addr); + else + iowrite32be(value, addr); +} + +/************************ MC SYSFS parts ***********************************/ + +#define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +static ssize_t fsl_mc_inject_data_hi_show(struct device *dev, + struct device_attribute *mattr, + char *data) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct fsl_mc_pdata *pdata = mci->pvt_info; + return sprintf(data, "0x%08x", + ddr_in32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_HI)); +} + +static ssize_t fsl_mc_inject_data_lo_show(struct device *dev, + struct device_attribute *mattr, + char *data) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct fsl_mc_pdata *pdata = mci->pvt_info; + return sprintf(data, "0x%08x", + ddr_in32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_LO)); +} + +static ssize_t fsl_mc_inject_ctrl_show(struct device *dev, + struct device_attribute *mattr, + char *data) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct fsl_mc_pdata *pdata = mci->pvt_info; + return sprintf(data, "0x%08x", + ddr_in32(pdata->mc_vbase + FSL_MC_ECC_ERR_INJECT)); +} + +static ssize_t fsl_mc_inject_data_hi_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct fsl_mc_pdata *pdata = mci->pvt_info; + unsigned long val; + int rc; + + if (isdigit(*data)) { + rc = kstrtoul(data, 0, &val); + if (rc) + return rc; + + ddr_out32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_HI, val); + return count; + } + return 0; +} + +static ssize_t fsl_mc_inject_data_lo_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct fsl_mc_pdata *pdata = mci->pvt_info; + unsigned long val; + int rc; + + if (isdigit(*data)) { + rc = kstrtoul(data, 0, &val); + if (rc) + return rc; + + ddr_out32(pdata->mc_vbase + FSL_MC_DATA_ERR_INJECT_LO, val); + return count; + } + return 0; +} + +static ssize_t fsl_mc_inject_ctrl_store(struct device *dev, + struct device_attribute *mattr, + const char *data, size_t count) +{ + struct mem_ctl_info *mci = to_mci(dev); + struct fsl_mc_pdata *pdata = mci->pvt_info; + unsigned long val; + int rc; + + if (isdigit(*data)) { + rc = kstrtoul(data, 0, &val); + if (rc) + return rc; + + ddr_out32(pdata->mc_vbase + FSL_MC_ECC_ERR_INJECT, val); + return count; + } + return 0; +} + +DEVICE_ATTR(inject_data_hi, S_IRUGO | S_IWUSR, + fsl_mc_inject_data_hi_show, fsl_mc_inject_data_hi_store); +DEVICE_ATTR(inject_data_lo, S_IRUGO | S_IWUSR, + fsl_mc_inject_data_lo_show, fsl_mc_inject_data_lo_store); +DEVICE_ATTR(inject_ctrl, S_IRUGO | S_IWUSR, + fsl_mc_inject_ctrl_show, fsl_mc_inject_ctrl_store); + +static struct attribute *fsl_ddr_dev_attrs[] = { + &dev_attr_inject_data_hi.attr, + &dev_attr_inject_data_lo.attr, + &dev_attr_inject_ctrl.attr, + NULL +}; + +ATTRIBUTE_GROUPS(fsl_ddr_dev); + +/**************************** MC Err device ***************************/ + +/* + * Taken from table 8-55 in the MPC8641 User's Manual and/or 9-61 in the + * MPC8572 User's Manual. Each line represents a syndrome bit column as a + * 64-bit value, but split into an upper and lower 32-bit chunk. The labels + * below correspond to Freescale's manuals. + */ +static unsigned int ecc_table[16] = { + /* MSB LSB */ + /* [0:31] [32:63] */ + 0xf00fe11e, 0xc33c0ff7, /* Syndrome bit 7 */ + 0x00ff00ff, 0x00fff0ff, + 0x0f0f0f0f, 0x0f0fff00, + 0x11113333, 0x7777000f, + 0x22224444, 0x8888222f, + 0x44448888, 0xffff4441, + 0x8888ffff, 0x11118882, + 0xffff1111, 0x22221114, /* Syndrome bit 0 */ +}; + +/* + * Calculate the correct ECC value for a 64-bit value specified by high:low + */ +static u8 calculate_ecc(u32 high, u32 low) +{ + u32 mask_low; + u32 mask_high; + int bit_cnt; + u8 ecc = 0; + int i; + int j; + + for (i = 0; i < 8; i++) { + mask_high = ecc_table[i * 2]; + mask_low = ecc_table[i * 2 + 1]; + bit_cnt = 0; + + for (j = 0; j < 32; j++) { + if ((mask_high >> j) & 1) + bit_cnt ^= (high >> j) & 1; + if ((mask_low >> j) & 1) + bit_cnt ^= (low >> j) & 1; + } + + ecc |= bit_cnt << i; + } + + return ecc; +} + +/* + * Create the syndrome code which is generated if the data line specified by + * 'bit' failed. Eg generate an 8-bit codes seen in Table 8-55 in the MPC8641 + * User's Manual and 9-61 in the MPC8572 User's Manual. + */ +static u8 syndrome_from_bit(unsigned int bit) { + int i; + u8 syndrome = 0; + + /* + * Cycle through the upper or lower 32-bit portion of each value in + * ecc_table depending on if 'bit' is in the upper or lower half of + * 64-bit data. + */ + for (i = bit < 32; i < 16; i += 2) + syndrome |= ((ecc_table[i] >> (bit % 32)) & 1) << (i / 2); + + return syndrome; +} + +/* + * Decode data and ecc syndrome to determine what went wrong + * Note: This can only decode single-bit errors + */ +static void sbe_ecc_decode(u32 cap_high, u32 cap_low, u32 cap_ecc, + int *bad_data_bit, int *bad_ecc_bit) +{ + int i; + u8 syndrome; + + *bad_data_bit = -1; + *bad_ecc_bit = -1; + + /* + * Calculate the ECC of the captured data and XOR it with the captured + * ECC to find an ECC syndrome value we can search for + */ + syndrome = calculate_ecc(cap_high, cap_low) ^ cap_ecc; + + /* Check if a data line is stuck... */ + for (i = 0; i < 64; i++) { + if (syndrome == syndrome_from_bit(i)) { + *bad_data_bit = i; + return; + } + } + + /* If data is correct, check ECC bits for errors... */ + for (i = 0; i < 8; i++) { + if ((syndrome >> i) & 0x1) { + *bad_ecc_bit = i; + return; + } + } +} + +#define make64(high, low) (((u64)(high) << 32) | (low)) + +static void fsl_mc_check(struct mem_ctl_info *mci) +{ + struct fsl_mc_pdata *pdata = mci->pvt_info; + struct csrow_info *csrow; + u32 bus_width; + u32 err_detect; + u32 syndrome; + u64 err_addr; + u32 pfn; + int row_index; + u32 cap_high; + u32 cap_low; + int bad_data_bit; + int bad_ecc_bit; + + err_detect = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DETECT); + if (!err_detect) + return; + + fsl_mc_printk(mci, KERN_ERR, "Err Detect Register: %#8.8x\n", + err_detect); + + /* no more processing if not ECC bit errors */ + if (!(err_detect & (DDR_EDE_SBE | DDR_EDE_MBE))) { + ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, err_detect); + return; + } + + syndrome = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_ECC); + + /* Mask off appropriate bits of syndrome based on bus width */ + bus_width = (ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG) & + DSC_DBW_MASK) ? 32 : 64; + if (bus_width == 64) + syndrome &= 0xff; + else + syndrome &= 0xffff; + + err_addr = make64( + ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_EXT_ADDRESS), + ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_ADDRESS)); + pfn = err_addr >> PAGE_SHIFT; + + for (row_index = 0; row_index < mci->nr_csrows; row_index++) { + csrow = mci->csrows[row_index]; + if ((pfn >= csrow->first_page) && (pfn <= csrow->last_page)) + break; + } + + cap_high = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_DATA_HI); + cap_low = ddr_in32(pdata->mc_vbase + FSL_MC_CAPTURE_DATA_LO); + + /* + * Analyze single-bit errors on 64-bit wide buses + * TODO: Add support for 32-bit wide buses + */ + if ((err_detect & DDR_EDE_SBE) && (bus_width == 64)) { + sbe_ecc_decode(cap_high, cap_low, syndrome, + &bad_data_bit, &bad_ecc_bit); + + if (bad_data_bit != -1) + fsl_mc_printk(mci, KERN_ERR, + "Faulty Data bit: %d\n", bad_data_bit); + if (bad_ecc_bit != -1) + fsl_mc_printk(mci, KERN_ERR, + "Faulty ECC bit: %d\n", bad_ecc_bit); + + fsl_mc_printk(mci, KERN_ERR, + "Expected Data / ECC:\t%#8.8x_%08x / %#2.2x\n", + cap_high ^ (1 << (bad_data_bit - 32)), + cap_low ^ (1 << bad_data_bit), + syndrome ^ (1 << bad_ecc_bit)); + } + + fsl_mc_printk(mci, KERN_ERR, + "Captured Data / ECC:\t%#8.8x_%08x / %#2.2x\n", + cap_high, cap_low, syndrome); + fsl_mc_printk(mci, KERN_ERR, "Err addr: %#8.8llx\n", err_addr); + fsl_mc_printk(mci, KERN_ERR, "PFN: %#8.8x\n", pfn); + + /* we are out of range */ + if (row_index == mci->nr_csrows) + fsl_mc_printk(mci, KERN_ERR, "PFN out of range!\n"); + + if (err_detect & DDR_EDE_SBE) + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + pfn, err_addr & ~PAGE_MASK, syndrome, + row_index, 0, -1, + mci->ctl_name, ""); + + if (err_detect & DDR_EDE_MBE) + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + pfn, err_addr & ~PAGE_MASK, syndrome, + row_index, 0, -1, + mci->ctl_name, ""); + + ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, err_detect); +} + +static irqreturn_t fsl_mc_isr(int irq, void *dev_id) +{ + struct mem_ctl_info *mci = dev_id; + struct fsl_mc_pdata *pdata = mci->pvt_info; + u32 err_detect; + + err_detect = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DETECT); + if (!err_detect) + return IRQ_NONE; + + fsl_mc_check(mci); + + return IRQ_HANDLED; +} + +static void fsl_ddr_init_csrows(struct mem_ctl_info *mci) +{ + struct fsl_mc_pdata *pdata = mci->pvt_info; + struct csrow_info *csrow; + struct dimm_info *dimm; + u32 sdram_ctl; + u32 sdtype; + enum mem_type mtype; + u32 cs_bnds; + int index; + + sdram_ctl = ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG); + + sdtype = sdram_ctl & DSC_SDTYPE_MASK; + if (sdram_ctl & DSC_RD_EN) { + switch (sdtype) { + case 0x02000000: + mtype = MEM_RDDR; + break; + case 0x03000000: + mtype = MEM_RDDR2; + break; + case 0x07000000: + mtype = MEM_RDDR3; + break; + case 0x05000000: + mtype = MEM_RDDR4; + break; + default: + mtype = MEM_UNKNOWN; + break; + } + } else { + switch (sdtype) { + case 0x02000000: + mtype = MEM_DDR; + break; + case 0x03000000: + mtype = MEM_DDR2; + break; + case 0x07000000: + mtype = MEM_DDR3; + break; + case 0x05000000: + mtype = MEM_DDR4; + break; + default: + mtype = MEM_UNKNOWN; + break; + } + } + + for (index = 0; index < mci->nr_csrows; index++) { + u32 start; + u32 end; + + csrow = mci->csrows[index]; + dimm = csrow->channels[0]->dimm; + + cs_bnds = ddr_in32(pdata->mc_vbase + FSL_MC_CS_BNDS_0 + + (index * FSL_MC_CS_BNDS_OFS)); + + start = (cs_bnds & 0xffff0000) >> 16; + end = (cs_bnds & 0x0000ffff); + + if (start == end) + continue; /* not populated */ + + start <<= (24 - PAGE_SHIFT); + end <<= (24 - PAGE_SHIFT); + end |= (1 << (24 - PAGE_SHIFT)) - 1; + + csrow->first_page = start; + csrow->last_page = end; + + dimm->nr_pages = end + 1 - start; + dimm->grain = 8; + dimm->mtype = mtype; + dimm->dtype = DEV_UNKNOWN; + if (sdram_ctl & DSC_X32_EN) + dimm->dtype = DEV_X32; + dimm->edac_mode = EDAC_SECDED; + } +} + +int fsl_mc_err_probe(struct platform_device *op) +{ + struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; + struct fsl_mc_pdata *pdata; + struct resource r; + u32 sdram_ctl; + int res; + + if (!devres_open_group(&op->dev, fsl_mc_err_probe, GFP_KERNEL)) + return -ENOMEM; + + layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; + layers[0].size = 4; + layers[0].is_virt_csrow = true; + layers[1].type = EDAC_MC_LAYER_CHANNEL; + layers[1].size = 1; + layers[1].is_virt_csrow = false; + mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers, + sizeof(*pdata)); + if (!mci) { + devres_release_group(&op->dev, fsl_mc_err_probe); + return -ENOMEM; + } + + pdata = mci->pvt_info; + pdata->name = "fsl_mc_err"; + mci->pdev = &op->dev; + pdata->edac_idx = edac_mc_idx++; + dev_set_drvdata(mci->pdev, mci); + mci->ctl_name = pdata->name; + mci->dev_name = pdata->name; + + /* + * Get the endianness of DDR controller registers. + * Default is big endian. + */ + little_endian = of_property_read_bool(op->dev.of_node, "little-endian"); + + res = of_address_to_resource(op->dev.of_node, 0, &r); + if (res) { + pr_err("%s: Unable to get resource for MC err regs\n", + __func__); + goto err; + } + + if (!devm_request_mem_region(&op->dev, r.start, resource_size(&r), + pdata->name)) { + pr_err("%s: Error while requesting mem region\n", + __func__); + res = -EBUSY; + goto err; + } + + pdata->mc_vbase = devm_ioremap(&op->dev, r.start, resource_size(&r)); + if (!pdata->mc_vbase) { + pr_err("%s: Unable to setup MC err regs\n", __func__); + res = -ENOMEM; + goto err; + } + + sdram_ctl = ddr_in32(pdata->mc_vbase + FSL_MC_DDR_SDRAM_CFG); + if (!(sdram_ctl & DSC_ECC_EN)) { + /* no ECC */ + pr_warn("%s: No ECC DIMMs discovered\n", __func__); + res = -ENODEV; + goto err; + } + + edac_dbg(3, "init mci\n"); + mci->mtype_cap = MEM_FLAG_DDR | MEM_FLAG_RDDR | + MEM_FLAG_DDR2 | MEM_FLAG_RDDR2 | + MEM_FLAG_DDR3 | MEM_FLAG_RDDR3 | + MEM_FLAG_DDR4 | MEM_FLAG_RDDR4; + mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; + mci->edac_cap = EDAC_FLAG_SECDED; + mci->mod_name = EDAC_MOD_STR; + + if (edac_op_state == EDAC_OPSTATE_POLL) + mci->edac_check = fsl_mc_check; + + mci->ctl_page_to_phys = NULL; + + mci->scrub_mode = SCRUB_SW_SRC; + + fsl_ddr_init_csrows(mci); + + /* store the original error disable bits */ + orig_ddr_err_disable = ddr_in32(pdata->mc_vbase + FSL_MC_ERR_DISABLE); + ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DISABLE, 0); + + /* clear all error bits */ + ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DETECT, ~0); + + res = edac_mc_add_mc_with_groups(mci, fsl_ddr_dev_groups); + if (res) { + edac_dbg(3, "failed edac_mc_add_mc()\n"); + goto err; + } + + if (edac_op_state == EDAC_OPSTATE_INT) { + ddr_out32(pdata->mc_vbase + FSL_MC_ERR_INT_EN, + DDR_EIE_MBEE | DDR_EIE_SBEE); + + /* store the original error management threshold */ + orig_ddr_err_sbe = ddr_in32(pdata->mc_vbase + + FSL_MC_ERR_SBE) & 0xff0000; + + /* set threshold to 1 error per interrupt */ + ddr_out32(pdata->mc_vbase + FSL_MC_ERR_SBE, 0x10000); + + /* register interrupts */ + pdata->irq = platform_get_irq(op, 0); + res = devm_request_irq(&op->dev, pdata->irq, + fsl_mc_isr, + IRQF_SHARED, + "[EDAC] MC err", mci); + if (res < 0) { + pr_err("%s: Unable to request irq %d for FSL DDR DRAM ERR\n", + __func__, pdata->irq); + res = -ENODEV; + goto err2; + } + + pr_info(EDAC_MOD_STR " acquired irq %d for MC\n", + pdata->irq); + } + + devres_remove_group(&op->dev, fsl_mc_err_probe); + edac_dbg(3, "success\n"); + pr_info(EDAC_MOD_STR " MC err registered\n"); + + return 0; + +err2: + edac_mc_del_mc(&op->dev); +err: + devres_release_group(&op->dev, fsl_mc_err_probe); + edac_mc_free(mci); + return res; +} + +int fsl_mc_err_remove(struct platform_device *op) +{ + struct mem_ctl_info *mci = dev_get_drvdata(&op->dev); + struct fsl_mc_pdata *pdata = mci->pvt_info; + + edac_dbg(0, "\n"); + + if (edac_op_state == EDAC_OPSTATE_INT) { + ddr_out32(pdata->mc_vbase + FSL_MC_ERR_INT_EN, 0); + } + + ddr_out32(pdata->mc_vbase + FSL_MC_ERR_DISABLE, + orig_ddr_err_disable); + ddr_out32(pdata->mc_vbase + FSL_MC_ERR_SBE, orig_ddr_err_sbe); + + edac_mc_del_mc(&op->dev); + edac_mc_free(mci); + return 0; +} diff --git a/drivers/edac/fsl_ddr_edac.h b/drivers/edac/fsl_ddr_edac.h new file mode 100644 index 000000000000..4ccee292eff1 --- /dev/null +++ b/drivers/edac/fsl_ddr_edac.h @@ -0,0 +1,79 @@ +/* + * Freescale Memory Controller kernel module + * + * Support Power-based SoCs including MPC85xx, MPC86xx, MPC83xx and + * ARM-based Layerscape SoCs including LS2xxx. Originally split + * out from mpc85xx_edac EDAC driver. + * + * Author: Dave Jiang <djiang@mvista.com> + * + * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + * + */ +#ifndef _FSL_DDR_EDAC_H_ +#define _FSL_DDR_EDAC_H_ + +#define fsl_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "FSL_DDR", fmt, ##arg) + +/* + * DRAM error defines + */ + +/* DDR_SDRAM_CFG */ +#define FSL_MC_DDR_SDRAM_CFG 0x0110 +#define FSL_MC_CS_BNDS_0 0x0000 +#define FSL_MC_CS_BNDS_OFS 0x0008 + +#define FSL_MC_DATA_ERR_INJECT_HI 0x0e00 +#define FSL_MC_DATA_ERR_INJECT_LO 0x0e04 +#define FSL_MC_ECC_ERR_INJECT 0x0e08 +#define FSL_MC_CAPTURE_DATA_HI 0x0e20 +#define FSL_MC_CAPTURE_DATA_LO 0x0e24 +#define FSL_MC_CAPTURE_ECC 0x0e28 +#define FSL_MC_ERR_DETECT 0x0e40 +#define FSL_MC_ERR_DISABLE 0x0e44 +#define FSL_MC_ERR_INT_EN 0x0e48 +#define FSL_MC_CAPTURE_ATRIBUTES 0x0e4c +#define FSL_MC_CAPTURE_ADDRESS 0x0e50 +#define FSL_MC_CAPTURE_EXT_ADDRESS 0x0e54 +#define FSL_MC_ERR_SBE 0x0e58 + +#define DSC_MEM_EN 0x80000000 +#define DSC_ECC_EN 0x20000000 +#define DSC_RD_EN 0x10000000 +#define DSC_DBW_MASK 0x00180000 +#define DSC_DBW_32 0x00080000 +#define DSC_DBW_64 0x00000000 + +#define DSC_SDTYPE_MASK 0x07000000 +#define DSC_X32_EN 0x00000020 + +/* Err_Int_En */ +#define DDR_EIE_MSEE 0x1 /* memory select */ +#define DDR_EIE_SBEE 0x4 /* single-bit ECC error */ +#define DDR_EIE_MBEE 0x8 /* multi-bit ECC error */ + +/* Err_Detect */ +#define DDR_EDE_MSE 0x1 /* memory select */ +#define DDR_EDE_SBE 0x4 /* single-bit ECC error */ +#define DDR_EDE_MBE 0x8 /* multi-bit ECC error */ +#define DDR_EDE_MME 0x80000000 /* multiple memory errors */ + +/* Err_Disable */ +#define DDR_EDI_MSED 0x1 /* memory select disable */ +#define DDR_EDI_SBED 0x4 /* single-bit ECC error disable */ +#define DDR_EDI_MBED 0x8 /* multi-bit ECC error disable */ + +struct fsl_mc_pdata { + char *name; + int edac_idx; + void __iomem *mc_vbase; + int irq; +}; +int fsl_mc_err_probe(struct platform_device *op); +int fsl_mc_err_remove(struct platform_device *op); +#endif diff --git a/drivers/edac/layerscape_edac.c b/drivers/edac/layerscape_edac.c new file mode 100644 index 000000000000..6c59d897ad12 --- /dev/null +++ b/drivers/edac/layerscape_edac.c @@ -0,0 +1,73 @@ +/* + * Freescale Memory Controller kernel module + * + * Author: York Sun <york.sun@nxp.com> + * + * Copyright 2016 NXP Semiconductor + * + * Derived from mpc85xx_edac.c + * Author: Dave Jiang <djiang@mvista.com> + * + * 2006-2007 (c) MontaVista Software, Inc. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "edac_core.h" +#include "fsl_ddr_edac.h" + +static const struct of_device_id fsl_ddr_mc_err_of_match[] = { + { .compatible = "fsl,qoriq-memory-controller", }, + {}, +}; +MODULE_DEVICE_TABLE(of, fsl_ddr_mc_err_of_match); + +static struct platform_driver fsl_ddr_mc_err_driver = { + .probe = fsl_mc_err_probe, + .remove = fsl_mc_err_remove, + .driver = { + .name = "fsl_ddr_mc_err", + .of_match_table = fsl_ddr_mc_err_of_match, + }, +}; + +static int __init fsl_ddr_mc_init(void) +{ + int res; + + /* make sure error reporting method is sane */ + switch (edac_op_state) { + case EDAC_OPSTATE_POLL: + case EDAC_OPSTATE_INT: + break; + default: + edac_op_state = EDAC_OPSTATE_INT; + break; + } + + res = platform_driver_register(&fsl_ddr_mc_err_driver); + if (res) { + pr_err("MC fails to register\n"); + return res; + } + + return 0; +} + +module_init(fsl_ddr_mc_init); + +static void __exit fsl_ddr_mc_exit(void) +{ + platform_driver_unregister(&fsl_ddr_mc_err_driver); +} + +module_exit(fsl_ddr_mc_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("NXP Semiconductor"); +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, + "EDAC Error Reporting state: 0=Poll, 2=Interrupt"); diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 9b6800a79c7f..daaac2c79ca7 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -148,12 +148,12 @@ static const char * const mc6_mce_desc[] = { }; /* Scalable MCA error strings */ -static const char * const f17h_ls_mce_desc[] = { +static const char * const smca_ls_mce_desc[] = { "Load queue parity", "Store queue parity", "Miss address buffer payload parity", "L1 TLB parity", - "", /* reserved */ + "Reserved", "DC tag error type 6", "DC tag error type 1", "Internal error type 1", @@ -172,7 +172,7 @@ static const char * const f17h_ls_mce_desc[] = { "L2 fill data error", }; -static const char * const f17h_if_mce_desc[] = { +static const char * const smca_if_mce_desc[] = { "microtag probe port parity error", "IC microtag or full tag multi-hit error", "IC full tag parity", @@ -185,19 +185,22 @@ static const char * const f17h_if_mce_desc[] = { "BPQ snoop parity on Thread 1", "L1 BTB multi-match error", "L2 BTB multi-match error", + "L2 Cache Response Poison error", + "System Read Data error", }; -static const char * const f17h_l2_mce_desc[] = { +static const char * const smca_l2_mce_desc[] = { "L2M tag multi-way-hit error", "L2M tag ECC error", "L2M data ECC error", "HW assert", }; -static const char * const f17h_de_mce_desc[] = { +static const char * const smca_de_mce_desc[] = { "uop cache tag parity error", "uop cache data parity error", "Insn buffer parity error", + "uop queue parity error", "Insn dispatch queue parity error", "Fetch address FIFO parity", "Patch RAM data parity", @@ -205,7 +208,7 @@ static const char * const f17h_de_mce_desc[] = { "uop buffer parity" }; -static const char * const f17h_ex_mce_desc[] = { +static const char * const smca_ex_mce_desc[] = { "Watchdog timeout error", "Phy register file parity", "Flag register file parity", @@ -214,18 +217,22 @@ static const char * const f17h_ex_mce_desc[] = { "EX payload parity", "Checkpoint queue parity", "Retire dispatch queue parity", + "Retire status queue parity error", + "Scheduling queue parity error", + "Branch buffer queue parity error", }; -static const char * const f17h_fp_mce_desc[] = { +static const char * const smca_fp_mce_desc[] = { "Physical register file parity", "Freelist parity error", "Schedule queue parity", "NSQ parity error", "Retire queue parity", "Status register file parity", + "Hardware assertion", }; -static const char * const f17h_l3_mce_desc[] = { +static const char * const smca_l3_mce_desc[] = { "Shadow tag macro ECC error", "Shadow tag macro multi-way-hit error", "L3M tag ECC error", @@ -236,7 +243,7 @@ static const char * const f17h_l3_mce_desc[] = { "L3 HW assert", }; -static const char * const f17h_cs_mce_desc[] = { +static const char * const smca_cs_mce_desc[] = { "Illegal request from transport layer", "Address violation", "Security violation", @@ -248,14 +255,14 @@ static const char * const f17h_cs_mce_desc[] = { "ECC error on probe filter access", }; -static const char * const f17h_pie_mce_desc[] = { +static const char * const smca_pie_mce_desc[] = { "HW assert", "Internal PIE register security violation", "Error on GMI link", "Poison data written to internal PIE register", }; -static const char * const f17h_umc_mce_desc[] = { +static const char * const smca_umc_mce_desc[] = { "DRAM ECC error", "Data poison error on DRAM", "SDP parity error", @@ -264,18 +271,39 @@ static const char * const f17h_umc_mce_desc[] = { "Write data CRC error", }; -static const char * const f17h_pb_mce_desc[] = { +static const char * const smca_pb_mce_desc[] = { "Parameter Block RAM ECC error", }; -static const char * const f17h_psp_mce_desc[] = { +static const char * const smca_psp_mce_desc[] = { "PSP RAM ECC or parity error", }; -static const char * const f17h_smu_mce_desc[] = { +static const char * const smca_smu_mce_desc[] = { "SMU RAM ECC or parity error", }; +struct smca_mce_desc { + const char * const *descs; + unsigned int num_descs; +}; + +static struct smca_mce_desc smca_mce_descs[] = { + [SMCA_LS] = { smca_ls_mce_desc, ARRAY_SIZE(smca_ls_mce_desc) }, + [SMCA_IF] = { smca_if_mce_desc, ARRAY_SIZE(smca_if_mce_desc) }, + [SMCA_L2_CACHE] = { smca_l2_mce_desc, ARRAY_SIZE(smca_l2_mce_desc) }, + [SMCA_DE] = { smca_de_mce_desc, ARRAY_SIZE(smca_de_mce_desc) }, + [SMCA_EX] = { smca_ex_mce_desc, ARRAY_SIZE(smca_ex_mce_desc) }, + [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) }, + [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) }, + [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) }, + [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, + [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, + [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, + [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, + [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) }, +}; + static bool f12h_mc0_mce(u16 ec, u8 xec) { bool ret = false; @@ -820,175 +848,35 @@ static void decode_mc6_mce(struct mce *m) pr_emerg(HW_ERR "Corrupted MC6 MCE info?\n"); } -static void decode_f17h_core_errors(const char *ip_name, u8 xec, - unsigned int mca_type) -{ - const char * const *error_desc_array; - size_t len; - - pr_emerg(HW_ERR "%s Error: ", ip_name); - - switch (mca_type) { - case SMCA_LS: - error_desc_array = f17h_ls_mce_desc; - len = ARRAY_SIZE(f17h_ls_mce_desc) - 1; - - if (xec == 0x4) { - pr_cont("Unrecognized LS MCA error code.\n"); - return; - } - break; - - case SMCA_IF: - error_desc_array = f17h_if_mce_desc; - len = ARRAY_SIZE(f17h_if_mce_desc) - 1; - break; - - case SMCA_L2_CACHE: - error_desc_array = f17h_l2_mce_desc; - len = ARRAY_SIZE(f17h_l2_mce_desc) - 1; - break; - - case SMCA_DE: - error_desc_array = f17h_de_mce_desc; - len = ARRAY_SIZE(f17h_de_mce_desc) - 1; - break; - - case SMCA_EX: - error_desc_array = f17h_ex_mce_desc; - len = ARRAY_SIZE(f17h_ex_mce_desc) - 1; - break; - - case SMCA_FP: - error_desc_array = f17h_fp_mce_desc; - len = ARRAY_SIZE(f17h_fp_mce_desc) - 1; - break; - - case SMCA_L3_CACHE: - error_desc_array = f17h_l3_mce_desc; - len = ARRAY_SIZE(f17h_l3_mce_desc) - 1; - break; - - default: - pr_cont("Corrupted MCA core error info.\n"); - return; - } - - if (xec > len) { - pr_cont("Unrecognized %s MCA bank error code.\n", - amd_core_mcablock_names[mca_type]); - return; - } - - pr_cont("%s.\n", error_desc_array[xec]); -} - -static void decode_df_errors(u8 xec, unsigned int mca_type) -{ - const char * const *error_desc_array; - size_t len; - - pr_emerg(HW_ERR "Data Fabric Error: "); - - switch (mca_type) { - case SMCA_CS: - error_desc_array = f17h_cs_mce_desc; - len = ARRAY_SIZE(f17h_cs_mce_desc) - 1; - break; - - case SMCA_PIE: - error_desc_array = f17h_pie_mce_desc; - len = ARRAY_SIZE(f17h_pie_mce_desc) - 1; - break; - - default: - pr_cont("Corrupted MCA Data Fabric info.\n"); - return; - } - - if (xec > len) { - pr_cont("Unrecognized %s MCA bank error code.\n", - amd_df_mcablock_names[mca_type]); - return; - } - - pr_cont("%s.\n", error_desc_array[xec]); -} - /* Decode errors according to Scalable MCA specification */ static void decode_smca_errors(struct mce *m) { - u32 addr = MSR_AMD64_SMCA_MCx_IPID(m->bank); - unsigned int hwid, mca_type, i; - u8 xec = XEC(m->status, xec_mask); - const char * const *error_desc_array; + struct smca_hwid_mcatype *type; + unsigned int bank_type; const char *ip_name; - u32 low, high; - size_t len; + u8 xec = XEC(m->status, xec_mask); - if (rdmsr_safe(addr, &low, &high)) { - pr_emerg("Invalid IP block specified, error information is unreliable.\n"); + if (m->bank >= ARRAY_SIZE(smca_banks)) return; - } - - hwid = high & MCI_IPID_HWID; - mca_type = (high & MCI_IPID_MCATYPE) >> 16; - - pr_emerg(HW_ERR "MC%d IPID value: 0x%08x%08x\n", m->bank, high, low); - - /* - * Based on hwid and mca_type values, decode errors from respective IPs. - * Note: mca_type values make sense only in the context of an hwid. - */ - for (i = 0; i < ARRAY_SIZE(amd_hwids); i++) - if (amd_hwids[i].hwid == hwid) - break; - - switch (i) { - case SMCA_F17H_CORE: - ip_name = (mca_type == SMCA_L3_CACHE) ? - "L3 Cache" : "F17h Core"; - return decode_f17h_core_errors(ip_name, xec, mca_type); - break; - case SMCA_DF: - return decode_df_errors(xec, mca_type); - break; - - case SMCA_UMC: - error_desc_array = f17h_umc_mce_desc; - len = ARRAY_SIZE(f17h_umc_mce_desc) - 1; - break; - - case SMCA_PB: - error_desc_array = f17h_pb_mce_desc; - len = ARRAY_SIZE(f17h_pb_mce_desc) - 1; - break; + if (boot_cpu_data.x86 >= 0x17 && m->bank == 4) + pr_emerg(HW_ERR "Bank 4 is reserved on Fam17h.\n"); - case SMCA_PSP: - error_desc_array = f17h_psp_mce_desc; - len = ARRAY_SIZE(f17h_psp_mce_desc) - 1; - break; - - case SMCA_SMU: - error_desc_array = f17h_smu_mce_desc; - len = ARRAY_SIZE(f17h_smu_mce_desc) - 1; - break; - - default: - pr_emerg(HW_ERR "HWID:%d does not match any existing IPs.\n", hwid); + type = smca_banks[m->bank].type; + if (!type) return; - } - ip_name = amd_hwids[i].name; - pr_emerg(HW_ERR "%s Error: ", ip_name); + bank_type = type->bank_type; + ip_name = smca_bank_names[bank_type].long_name; - if (xec > len) { - pr_cont("Unrecognized %s MCA bank error code.\n", ip_name); - return; - } + pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec); - pr_cont("%s.\n", error_desc_array[xec]); + /* Only print the decode of valid error codes */ + if (xec < smca_mce_descs[bank_type].num_descs && + (type->xec_bitmap & BIT_ULL(xec))) { + pr_emerg(HW_ERR "%s Error: ", ip_name); + pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]); + } } static inline void amd_decode_err_code(u16 ec) @@ -1078,6 +966,8 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) u32 low, high; u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank); + pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-")); + if (!rdmsr_safe(addr, &low, &high) && (low & MCI_CONFIG_MCAX)) pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-")); @@ -1091,12 +981,20 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) pr_cont("]: 0x%016llx\n", m->status); if (m->status & MCI_STATUS_ADDRV) - pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr); + pr_emerg(HW_ERR "Error Addr: 0x%016llx", m->addr); if (boot_cpu_has(X86_FEATURE_SMCA)) { + if (m->status & MCI_STATUS_SYNDV) + pr_cont(", Syndrome: 0x%016llx", m->synd); + + pr_cont(", IPID: 0x%016llx", m->ipid); + + pr_cont("\n"); + decode_smca_errors(m); goto err_code; - } + } else + pr_cont("\n"); if (!fam_ops) goto err_code; diff --git a/drivers/edac/mpc85xx_edac.c b/drivers/edac/mpc85xx_edac.c index ca63d0da8889..ff0567526ee3 100644 --- a/drivers/edac/mpc85xx_edac.c +++ b/drivers/edac/mpc85xx_edac.c @@ -27,15 +27,12 @@ #include "edac_module.h" #include "edac_core.h" #include "mpc85xx_edac.h" +#include "fsl_ddr_edac.h" static int edac_dev_idx; #ifdef CONFIG_PCI static int edac_pci_idx; #endif -static int edac_mc_idx; - -static u32 orig_ddr_err_disable; -static u32 orig_ddr_err_sbe; /* * PCI Err defines @@ -46,103 +43,6 @@ static u32 orig_pci_err_en; #endif static u32 orig_l2_err_disable; -#ifdef CONFIG_FSL_SOC_BOOKE -static u32 orig_hid1[2]; -#endif - -/************************ MC SYSFS parts ***********************************/ - -#define to_mci(k) container_of(k, struct mem_ctl_info, dev) - -static ssize_t mpc85xx_mc_inject_data_hi_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - return sprintf(data, "0x%08x", - in_be32(pdata->mc_vbase + - MPC85XX_MC_DATA_ERR_INJECT_HI)); -} - -static ssize_t mpc85xx_mc_inject_data_lo_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - return sprintf(data, "0x%08x", - in_be32(pdata->mc_vbase + - MPC85XX_MC_DATA_ERR_INJECT_LO)); -} - -static ssize_t mpc85xx_mc_inject_ctrl_show(struct device *dev, - struct device_attribute *mattr, - char *data) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - return sprintf(data, "0x%08x", - in_be32(pdata->mc_vbase + MPC85XX_MC_ECC_ERR_INJECT)); -} - -static ssize_t mpc85xx_mc_inject_data_hi_store(struct device *dev, - struct device_attribute *mattr, - const char *data, size_t count) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - if (isdigit(*data)) { - out_be32(pdata->mc_vbase + MPC85XX_MC_DATA_ERR_INJECT_HI, - simple_strtoul(data, NULL, 0)); - return count; - } - return 0; -} - -static ssize_t mpc85xx_mc_inject_data_lo_store(struct device *dev, - struct device_attribute *mattr, - const char *data, size_t count) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - if (isdigit(*data)) { - out_be32(pdata->mc_vbase + MPC85XX_MC_DATA_ERR_INJECT_LO, - simple_strtoul(data, NULL, 0)); - return count; - } - return 0; -} - -static ssize_t mpc85xx_mc_inject_ctrl_store(struct device *dev, - struct device_attribute *mattr, - const char *data, size_t count) -{ - struct mem_ctl_info *mci = to_mci(dev); - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - if (isdigit(*data)) { - out_be32(pdata->mc_vbase + MPC85XX_MC_ECC_ERR_INJECT, - simple_strtoul(data, NULL, 0)); - return count; - } - return 0; -} - -DEVICE_ATTR(inject_data_hi, S_IRUGO | S_IWUSR, - mpc85xx_mc_inject_data_hi_show, mpc85xx_mc_inject_data_hi_store); -DEVICE_ATTR(inject_data_lo, S_IRUGO | S_IWUSR, - mpc85xx_mc_inject_data_lo_show, mpc85xx_mc_inject_data_lo_store); -DEVICE_ATTR(inject_ctrl, S_IRUGO | S_IWUSR, - mpc85xx_mc_inject_ctrl_show, mpc85xx_mc_inject_ctrl_store); - -static struct attribute *mpc85xx_dev_attrs[] = { - &dev_attr_inject_data_hi.attr, - &dev_attr_inject_data_lo.attr, - &dev_attr_inject_ctrl.attr, - NULL -}; - -ATTRIBUTE_GROUPS(mpc85xx_dev); /**************************** PCI Err device ***************************/ #ifdef CONFIG_PCI @@ -160,18 +60,18 @@ static void mpc85xx_pci_check(struct edac_pci_ctl_info *pci) return; } - printk(KERN_ERR "PCI error(s) detected\n"); - printk(KERN_ERR "PCI/X ERR_DR register: %#08x\n", err_detect); + pr_err("PCI error(s) detected\n"); + pr_err("PCI/X ERR_DR register: %#08x\n", err_detect); - printk(KERN_ERR "PCI/X ERR_ATTRIB register: %#08x\n", + pr_err("PCI/X ERR_ATTRIB register: %#08x\n", in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ATTRIB)); - printk(KERN_ERR "PCI/X ERR_ADDR register: %#08x\n", + pr_err("PCI/X ERR_ADDR register: %#08x\n", in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_ADDR)); - printk(KERN_ERR "PCI/X ERR_EXT_ADDR register: %#08x\n", + pr_err("PCI/X ERR_EXT_ADDR register: %#08x\n", in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_EXT_ADDR)); - printk(KERN_ERR "PCI/X ERR_DL register: %#08x\n", + pr_err("PCI/X ERR_DL register: %#08x\n", in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DL)); - printk(KERN_ERR "PCI/X ERR_DH register: %#08x\n", + pr_err("PCI/X ERR_DH register: %#08x\n", in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DH)); /* clear error bits */ @@ -187,14 +87,14 @@ static void mpc85xx_pci_check(struct edac_pci_ctl_info *pci) static void mpc85xx_pcie_check(struct edac_pci_ctl_info *pci) { struct mpc85xx_pci_pdata *pdata = pci->pvt_info; - u32 err_detect; + u32 err_detect, err_cap_stat; err_detect = in_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR); + err_cap_stat = in_be32(pdata->pci_vbase + MPC85XX_PCI_GAS_TIMR); pr_err("PCIe error(s) detected\n"); pr_err("PCIe ERR_DR register: 0x%08x\n", err_detect); - pr_err("PCIe ERR_CAP_STAT register: 0x%08x\n", - in_be32(pdata->pci_vbase + MPC85XX_PCI_GAS_TIMR)); + pr_err("PCIe ERR_CAP_STAT register: 0x%08x\n", err_cap_stat); pr_err("PCIe ERR_CAP_R0 register: 0x%08x\n", in_be32(pdata->pci_vbase + MPC85XX_PCIE_ERR_CAP_R0)); pr_err("PCIe ERR_CAP_R1 register: 0x%08x\n", @@ -206,6 +106,9 @@ static void mpc85xx_pcie_check(struct edac_pci_ctl_info *pci) /* clear error bits */ out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR, err_detect); + + /* reset error capture */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_GAS_TIMR, err_cap_stat | 0x1); } static int mpc85xx_pcie_find_capability(struct device_node *np) @@ -267,7 +170,6 @@ static int mpc85xx_pci_err_probe(struct platform_device *op) pdata = pci->pvt_info; pdata->name = "mpc85xx_pci_err"; - pdata->irq = NO_IRQ; plat_data = op->dev.platform_data; if (!plat_data) { @@ -297,8 +199,7 @@ static int mpc85xx_pci_err_probe(struct platform_device *op) res = of_address_to_resource(of_node, 0, &r); if (res) { - printk(KERN_ERR "%s: Unable to get resource for " - "PCI err regs\n", __func__); + pr_err("%s: Unable to get resource for PCI err regs\n", __func__); goto err; } @@ -307,15 +208,14 @@ static int mpc85xx_pci_err_probe(struct platform_device *op) if (!devm_request_mem_region(&op->dev, r.start, resource_size(&r), pdata->name)) { - printk(KERN_ERR "%s: Error while requesting mem region\n", - __func__); + pr_err("%s: Error while requesting mem region\n", __func__); res = -EBUSY; goto err; } pdata->pci_vbase = devm_ioremap(&op->dev, r.start, resource_size(&r)); if (!pdata->pci_vbase) { - printk(KERN_ERR "%s: Unable to setup PCI err regs\n", __func__); + pr_err("%s: Unable to setup PCI err regs\n", __func__); res = -ENOMEM; goto err; } @@ -344,6 +244,9 @@ static int mpc85xx_pci_err_probe(struct platform_device *op) /* clear error bits */ out_be32(pdata->pci_vbase + MPC85XX_PCI_ERR_DR, ~0); + /* reset error capture */ + out_be32(pdata->pci_vbase + MPC85XX_PCI_GAS_TIMR, 0x1); + if (edac_pci_add_device(pci, pdata->edac_idx) > 0) { edac_dbg(3, "failed edac_pci_add_device()\n"); goto err; @@ -356,15 +259,14 @@ static int mpc85xx_pci_err_probe(struct platform_device *op) IRQF_SHARED, "[EDAC] PCI err", pci); if (res < 0) { - printk(KERN_ERR - "%s: Unable to request irq %d for " - "MPC85xx PCI err\n", __func__, pdata->irq); + pr_err("%s: Unable to request irq %d for MPC85xx PCI err\n", + __func__, pdata->irq); irq_dispose_mapping(pdata->irq); res = -ENODEV; goto err2; } - printk(KERN_INFO EDAC_MOD_STR " acquired irq %d for PCI Err\n", + pr_info(EDAC_MOD_STR " acquired irq %d for PCI Err\n", pdata->irq); } @@ -386,7 +288,7 @@ static int mpc85xx_pci_err_probe(struct platform_device *op) devres_remove_group(&op->dev, mpc85xx_pci_err_probe); edac_dbg(3, "success\n"); - printk(KERN_INFO EDAC_MOD_STR " PCI err registered\n"); + pr_info(EDAC_MOD_STR " PCI err registered\n"); return 0; @@ -529,17 +431,17 @@ static void mpc85xx_l2_check(struct edac_device_ctl_info *edac_dev) if (!(err_detect & L2_EDE_MASK)) return; - printk(KERN_ERR "ECC Error in CPU L2 cache\n"); - printk(KERN_ERR "L2 Error Detect Register: 0x%08x\n", err_detect); - printk(KERN_ERR "L2 Error Capture Data High Register: 0x%08x\n", + pr_err("ECC Error in CPU L2 cache\n"); + pr_err("L2 Error Detect Register: 0x%08x\n", err_detect); + pr_err("L2 Error Capture Data High Register: 0x%08x\n", in_be32(pdata->l2_vbase + MPC85XX_L2_CAPTDATAHI)); - printk(KERN_ERR "L2 Error Capture Data Lo Register: 0x%08x\n", + pr_err("L2 Error Capture Data Lo Register: 0x%08x\n", in_be32(pdata->l2_vbase + MPC85XX_L2_CAPTDATALO)); - printk(KERN_ERR "L2 Error Syndrome Register: 0x%08x\n", + pr_err("L2 Error Syndrome Register: 0x%08x\n", in_be32(pdata->l2_vbase + MPC85XX_L2_CAPTECC)); - printk(KERN_ERR "L2 Error Attributes Capture Register: 0x%08x\n", + pr_err("L2 Error Attributes Capture Register: 0x%08x\n", in_be32(pdata->l2_vbase + MPC85XX_L2_ERRATTR)); - printk(KERN_ERR "L2 Error Address Capture Register: 0x%08x\n", + pr_err("L2 Error Address Capture Register: 0x%08x\n", in_be32(pdata->l2_vbase + MPC85XX_L2_ERRADDR)); /* clear error detect register */ @@ -588,7 +490,6 @@ static int mpc85xx_l2_err_probe(struct platform_device *op) pdata = edac_dev->pvt_info; pdata->name = "mpc85xx_l2_err"; - pdata->irq = NO_IRQ; edac_dev->dev = &op->dev; dev_set_drvdata(edac_dev->dev, edac_dev); edac_dev->ctl_name = pdata->name; @@ -596,8 +497,7 @@ static int mpc85xx_l2_err_probe(struct platform_device *op) res = of_address_to_resource(op->dev.of_node, 0, &r); if (res) { - printk(KERN_ERR "%s: Unable to get resource for " - "L2 err regs\n", __func__); + pr_err("%s: Unable to get resource for L2 err regs\n", __func__); goto err; } @@ -606,15 +506,14 @@ static int mpc85xx_l2_err_probe(struct platform_device *op) if (!devm_request_mem_region(&op->dev, r.start, resource_size(&r), pdata->name)) { - printk(KERN_ERR "%s: Error while requesting mem region\n", - __func__); + pr_err("%s: Error while requesting mem region\n", __func__); res = -EBUSY; goto err; } pdata->l2_vbase = devm_ioremap(&op->dev, r.start, resource_size(&r)); if (!pdata->l2_vbase) { - printk(KERN_ERR "%s: Unable to setup L2 err regs\n", __func__); + pr_err("%s: Unable to setup L2 err regs\n", __func__); res = -ENOMEM; goto err; } @@ -646,16 +545,14 @@ static int mpc85xx_l2_err_probe(struct platform_device *op) mpc85xx_l2_isr, IRQF_SHARED, "[EDAC] L2 err", edac_dev); if (res < 0) { - printk(KERN_ERR - "%s: Unable to request irq %d for " - "MPC85xx L2 err\n", __func__, pdata->irq); + pr_err("%s: Unable to request irq %d for MPC85xx L2 err\n", + __func__, pdata->irq); irq_dispose_mapping(pdata->irq); res = -ENODEV; goto err2; } - printk(KERN_INFO EDAC_MOD_STR " acquired irq %d for L2 Err\n", - pdata->irq); + pr_info(EDAC_MOD_STR " acquired irq %d for L2 Err\n", pdata->irq); edac_dev->op_state = OP_RUNNING_INTERRUPT; @@ -665,7 +562,7 @@ static int mpc85xx_l2_err_probe(struct platform_device *op) devres_remove_group(&op->dev, mpc85xx_l2_err_probe); edac_dbg(3, "success\n"); - printk(KERN_INFO EDAC_MOD_STR " L2 err registered\n"); + pr_info(EDAC_MOD_STR " L2 err registered\n"); return 0; @@ -729,466 +626,6 @@ static struct platform_driver mpc85xx_l2_err_driver = { }, }; -/**************************** MC Err device ***************************/ - -/* - * Taken from table 8-55 in the MPC8641 User's Manual and/or 9-61 in the - * MPC8572 User's Manual. Each line represents a syndrome bit column as a - * 64-bit value, but split into an upper and lower 32-bit chunk. The labels - * below correspond to Freescale's manuals. - */ -static unsigned int ecc_table[16] = { - /* MSB LSB */ - /* [0:31] [32:63] */ - 0xf00fe11e, 0xc33c0ff7, /* Syndrome bit 7 */ - 0x00ff00ff, 0x00fff0ff, - 0x0f0f0f0f, 0x0f0fff00, - 0x11113333, 0x7777000f, - 0x22224444, 0x8888222f, - 0x44448888, 0xffff4441, - 0x8888ffff, 0x11118882, - 0xffff1111, 0x22221114, /* Syndrome bit 0 */ -}; - -/* - * Calculate the correct ECC value for a 64-bit value specified by high:low - */ -static u8 calculate_ecc(u32 high, u32 low) -{ - u32 mask_low; - u32 mask_high; - int bit_cnt; - u8 ecc = 0; - int i; - int j; - - for (i = 0; i < 8; i++) { - mask_high = ecc_table[i * 2]; - mask_low = ecc_table[i * 2 + 1]; - bit_cnt = 0; - - for (j = 0; j < 32; j++) { - if ((mask_high >> j) & 1) - bit_cnt ^= (high >> j) & 1; - if ((mask_low >> j) & 1) - bit_cnt ^= (low >> j) & 1; - } - - ecc |= bit_cnt << i; - } - - return ecc; -} - -/* - * Create the syndrome code which is generated if the data line specified by - * 'bit' failed. Eg generate an 8-bit codes seen in Table 8-55 in the MPC8641 - * User's Manual and 9-61 in the MPC8572 User's Manual. - */ -static u8 syndrome_from_bit(unsigned int bit) { - int i; - u8 syndrome = 0; - - /* - * Cycle through the upper or lower 32-bit portion of each value in - * ecc_table depending on if 'bit' is in the upper or lower half of - * 64-bit data. - */ - for (i = bit < 32; i < 16; i += 2) - syndrome |= ((ecc_table[i] >> (bit % 32)) & 1) << (i / 2); - - return syndrome; -} - -/* - * Decode data and ecc syndrome to determine what went wrong - * Note: This can only decode single-bit errors - */ -static void sbe_ecc_decode(u32 cap_high, u32 cap_low, u32 cap_ecc, - int *bad_data_bit, int *bad_ecc_bit) -{ - int i; - u8 syndrome; - - *bad_data_bit = -1; - *bad_ecc_bit = -1; - - /* - * Calculate the ECC of the captured data and XOR it with the captured - * ECC to find an ECC syndrome value we can search for - */ - syndrome = calculate_ecc(cap_high, cap_low) ^ cap_ecc; - - /* Check if a data line is stuck... */ - for (i = 0; i < 64; i++) { - if (syndrome == syndrome_from_bit(i)) { - *bad_data_bit = i; - return; - } - } - - /* If data is correct, check ECC bits for errors... */ - for (i = 0; i < 8; i++) { - if ((syndrome >> i) & 0x1) { - *bad_ecc_bit = i; - return; - } - } -} - -#define make64(high, low) (((u64)(high) << 32) | (low)) - -static void mpc85xx_mc_check(struct mem_ctl_info *mci) -{ - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - struct csrow_info *csrow; - u32 bus_width; - u32 err_detect; - u32 syndrome; - u64 err_addr; - u32 pfn; - int row_index; - u32 cap_high; - u32 cap_low; - int bad_data_bit; - int bad_ecc_bit; - - err_detect = in_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT); - if (!err_detect) - return; - - mpc85xx_mc_printk(mci, KERN_ERR, "Err Detect Register: %#8.8x\n", - err_detect); - - /* no more processing if not ECC bit errors */ - if (!(err_detect & (DDR_EDE_SBE | DDR_EDE_MBE))) { - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT, err_detect); - return; - } - - syndrome = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_ECC); - - /* Mask off appropriate bits of syndrome based on bus width */ - bus_width = (in_be32(pdata->mc_vbase + MPC85XX_MC_DDR_SDRAM_CFG) & - DSC_DBW_MASK) ? 32 : 64; - if (bus_width == 64) - syndrome &= 0xff; - else - syndrome &= 0xffff; - - err_addr = make64( - in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_EXT_ADDRESS), - in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_ADDRESS)); - pfn = err_addr >> PAGE_SHIFT; - - for (row_index = 0; row_index < mci->nr_csrows; row_index++) { - csrow = mci->csrows[row_index]; - if ((pfn >= csrow->first_page) && (pfn <= csrow->last_page)) - break; - } - - cap_high = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_DATA_HI); - cap_low = in_be32(pdata->mc_vbase + MPC85XX_MC_CAPTURE_DATA_LO); - - /* - * Analyze single-bit errors on 64-bit wide buses - * TODO: Add support for 32-bit wide buses - */ - if ((err_detect & DDR_EDE_SBE) && (bus_width == 64)) { - sbe_ecc_decode(cap_high, cap_low, syndrome, - &bad_data_bit, &bad_ecc_bit); - - if (bad_data_bit != -1) - mpc85xx_mc_printk(mci, KERN_ERR, - "Faulty Data bit: %d\n", bad_data_bit); - if (bad_ecc_bit != -1) - mpc85xx_mc_printk(mci, KERN_ERR, - "Faulty ECC bit: %d\n", bad_ecc_bit); - - mpc85xx_mc_printk(mci, KERN_ERR, - "Expected Data / ECC:\t%#8.8x_%08x / %#2.2x\n", - cap_high ^ (1 << (bad_data_bit - 32)), - cap_low ^ (1 << bad_data_bit), - syndrome ^ (1 << bad_ecc_bit)); - } - - mpc85xx_mc_printk(mci, KERN_ERR, - "Captured Data / ECC:\t%#8.8x_%08x / %#2.2x\n", - cap_high, cap_low, syndrome); - mpc85xx_mc_printk(mci, KERN_ERR, "Err addr: %#8.8llx\n", err_addr); - mpc85xx_mc_printk(mci, KERN_ERR, "PFN: %#8.8x\n", pfn); - - /* we are out of range */ - if (row_index == mci->nr_csrows) - mpc85xx_mc_printk(mci, KERN_ERR, "PFN out of range!\n"); - - if (err_detect & DDR_EDE_SBE) - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, - pfn, err_addr & ~PAGE_MASK, syndrome, - row_index, 0, -1, - mci->ctl_name, ""); - - if (err_detect & DDR_EDE_MBE) - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, - pfn, err_addr & ~PAGE_MASK, syndrome, - row_index, 0, -1, - mci->ctl_name, ""); - - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT, err_detect); -} - -static irqreturn_t mpc85xx_mc_isr(int irq, void *dev_id) -{ - struct mem_ctl_info *mci = dev_id; - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - u32 err_detect; - - err_detect = in_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT); - if (!err_detect) - return IRQ_NONE; - - mpc85xx_mc_check(mci); - - return IRQ_HANDLED; -} - -static void mpc85xx_init_csrows(struct mem_ctl_info *mci) -{ - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - struct csrow_info *csrow; - struct dimm_info *dimm; - u32 sdram_ctl; - u32 sdtype; - enum mem_type mtype; - u32 cs_bnds; - int index; - - sdram_ctl = in_be32(pdata->mc_vbase + MPC85XX_MC_DDR_SDRAM_CFG); - - sdtype = sdram_ctl & DSC_SDTYPE_MASK; - if (sdram_ctl & DSC_RD_EN) { - switch (sdtype) { - case DSC_SDTYPE_DDR: - mtype = MEM_RDDR; - break; - case DSC_SDTYPE_DDR2: - mtype = MEM_RDDR2; - break; - case DSC_SDTYPE_DDR3: - mtype = MEM_RDDR3; - break; - default: - mtype = MEM_UNKNOWN; - break; - } - } else { - switch (sdtype) { - case DSC_SDTYPE_DDR: - mtype = MEM_DDR; - break; - case DSC_SDTYPE_DDR2: - mtype = MEM_DDR2; - break; - case DSC_SDTYPE_DDR3: - mtype = MEM_DDR3; - break; - default: - mtype = MEM_UNKNOWN; - break; - } - } - - for (index = 0; index < mci->nr_csrows; index++) { - u32 start; - u32 end; - - csrow = mci->csrows[index]; - dimm = csrow->channels[0]->dimm; - - cs_bnds = in_be32(pdata->mc_vbase + MPC85XX_MC_CS_BNDS_0 + - (index * MPC85XX_MC_CS_BNDS_OFS)); - - start = (cs_bnds & 0xffff0000) >> 16; - end = (cs_bnds & 0x0000ffff); - - if (start == end) - continue; /* not populated */ - - start <<= (24 - PAGE_SHIFT); - end <<= (24 - PAGE_SHIFT); - end |= (1 << (24 - PAGE_SHIFT)) - 1; - - csrow->first_page = start; - csrow->last_page = end; - - dimm->nr_pages = end + 1 - start; - dimm->grain = 8; - dimm->mtype = mtype; - dimm->dtype = DEV_UNKNOWN; - if (sdram_ctl & DSC_X32_EN) - dimm->dtype = DEV_X32; - dimm->edac_mode = EDAC_SECDED; - } -} - -static int mpc85xx_mc_err_probe(struct platform_device *op) -{ - struct mem_ctl_info *mci; - struct edac_mc_layer layers[2]; - struct mpc85xx_mc_pdata *pdata; - struct resource r; - u32 sdram_ctl; - int res; - - if (!devres_open_group(&op->dev, mpc85xx_mc_err_probe, GFP_KERNEL)) - return -ENOMEM; - - layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; - layers[0].size = 4; - layers[0].is_virt_csrow = true; - layers[1].type = EDAC_MC_LAYER_CHANNEL; - layers[1].size = 1; - layers[1].is_virt_csrow = false; - mci = edac_mc_alloc(edac_mc_idx, ARRAY_SIZE(layers), layers, - sizeof(*pdata)); - if (!mci) { - devres_release_group(&op->dev, mpc85xx_mc_err_probe); - return -ENOMEM; - } - - pdata = mci->pvt_info; - pdata->name = "mpc85xx_mc_err"; - pdata->irq = NO_IRQ; - mci->pdev = &op->dev; - pdata->edac_idx = edac_mc_idx++; - dev_set_drvdata(mci->pdev, mci); - mci->ctl_name = pdata->name; - mci->dev_name = pdata->name; - - res = of_address_to_resource(op->dev.of_node, 0, &r); - if (res) { - printk(KERN_ERR "%s: Unable to get resource for MC err regs\n", - __func__); - goto err; - } - - if (!devm_request_mem_region(&op->dev, r.start, resource_size(&r), - pdata->name)) { - printk(KERN_ERR "%s: Error while requesting mem region\n", - __func__); - res = -EBUSY; - goto err; - } - - pdata->mc_vbase = devm_ioremap(&op->dev, r.start, resource_size(&r)); - if (!pdata->mc_vbase) { - printk(KERN_ERR "%s: Unable to setup MC err regs\n", __func__); - res = -ENOMEM; - goto err; - } - - sdram_ctl = in_be32(pdata->mc_vbase + MPC85XX_MC_DDR_SDRAM_CFG); - if (!(sdram_ctl & DSC_ECC_EN)) { - /* no ECC */ - printk(KERN_WARNING "%s: No ECC DIMMs discovered\n", __func__); - res = -ENODEV; - goto err; - } - - edac_dbg(3, "init mci\n"); - mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | - MEM_FLAG_DDR | MEM_FLAG_DDR2; - mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; - mci->edac_cap = EDAC_FLAG_SECDED; - mci->mod_name = EDAC_MOD_STR; - mci->mod_ver = MPC85XX_REVISION; - - if (edac_op_state == EDAC_OPSTATE_POLL) - mci->edac_check = mpc85xx_mc_check; - - mci->ctl_page_to_phys = NULL; - - mci->scrub_mode = SCRUB_SW_SRC; - - mpc85xx_init_csrows(mci); - - /* store the original error disable bits */ - orig_ddr_err_disable = - in_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DISABLE); - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DISABLE, 0); - - /* clear all error bits */ - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DETECT, ~0); - - if (edac_mc_add_mc_with_groups(mci, mpc85xx_dev_groups)) { - edac_dbg(3, "failed edac_mc_add_mc()\n"); - goto err; - } - - if (edac_op_state == EDAC_OPSTATE_INT) { - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_INT_EN, - DDR_EIE_MBEE | DDR_EIE_SBEE); - - /* store the original error management threshold */ - orig_ddr_err_sbe = in_be32(pdata->mc_vbase + - MPC85XX_MC_ERR_SBE) & 0xff0000; - - /* set threshold to 1 error per interrupt */ - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_SBE, 0x10000); - - /* register interrupts */ - pdata->irq = irq_of_parse_and_map(op->dev.of_node, 0); - res = devm_request_irq(&op->dev, pdata->irq, - mpc85xx_mc_isr, - IRQF_SHARED, - "[EDAC] MC err", mci); - if (res < 0) { - printk(KERN_ERR "%s: Unable to request irq %d for " - "MPC85xx DRAM ERR\n", __func__, pdata->irq); - irq_dispose_mapping(pdata->irq); - res = -ENODEV; - goto err2; - } - - printk(KERN_INFO EDAC_MOD_STR " acquired irq %d for MC\n", - pdata->irq); - } - - devres_remove_group(&op->dev, mpc85xx_mc_err_probe); - edac_dbg(3, "success\n"); - printk(KERN_INFO EDAC_MOD_STR " MC err registered\n"); - - return 0; - -err2: - edac_mc_del_mc(&op->dev); -err: - devres_release_group(&op->dev, mpc85xx_mc_err_probe); - edac_mc_free(mci); - return res; -} - -static int mpc85xx_mc_err_remove(struct platform_device *op) -{ - struct mem_ctl_info *mci = dev_get_drvdata(&op->dev); - struct mpc85xx_mc_pdata *pdata = mci->pvt_info; - - edac_dbg(0, "\n"); - - if (edac_op_state == EDAC_OPSTATE_INT) { - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_INT_EN, 0); - irq_dispose_mapping(pdata->irq); - } - - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_DISABLE, - orig_ddr_err_disable); - out_be32(pdata->mc_vbase + MPC85XX_MC_ERR_SBE, orig_ddr_err_sbe); - - edac_mc_del_mc(&op->dev); - edac_mc_free(mci); - return 0; -} - static const struct of_device_id mpc85xx_mc_err_of_match[] = { /* deprecate the fsl,85.. forms in the future, 2.6.30? */ { .compatible = "fsl,8540-memory-controller", }, @@ -1217,22 +654,14 @@ static const struct of_device_id mpc85xx_mc_err_of_match[] = { MODULE_DEVICE_TABLE(of, mpc85xx_mc_err_of_match); static struct platform_driver mpc85xx_mc_err_driver = { - .probe = mpc85xx_mc_err_probe, - .remove = mpc85xx_mc_err_remove, + .probe = fsl_mc_err_probe, + .remove = fsl_mc_err_remove, .driver = { .name = "mpc85xx_mc_err", .of_match_table = mpc85xx_mc_err_of_match, }, }; -#ifdef CONFIG_FSL_SOC_BOOKE -static void __init mpc85xx_mc_clear_rfxe(void *data) -{ - orig_hid1[smp_processor_id()] = mfspr(SPRN_HID1); - mtspr(SPRN_HID1, (orig_hid1[smp_processor_id()] & ~HID1_RFXE)); -} -#endif - static struct platform_driver * const drivers[] = { &mpc85xx_mc_err_driver, &mpc85xx_l2_err_driver, @@ -1246,8 +675,7 @@ static int __init mpc85xx_mc_init(void) int res = 0; u32 __maybe_unused pvr = 0; - printk(KERN_INFO "Freescale(R) MPC85xx EDAC driver, " - "(C) 2006 Montavista Software\n"); + pr_info("Freescale(R) MPC85xx EDAC driver, (C) 2006 Montavista Software\n"); /* make sure error reporting method is sane */ switch (edac_op_state) { @@ -1261,44 +689,15 @@ static int __init mpc85xx_mc_init(void) res = platform_register_drivers(drivers, ARRAY_SIZE(drivers)); if (res) - printk(KERN_WARNING EDAC_MOD_STR "drivers fail to register\n"); - -#ifdef CONFIG_FSL_SOC_BOOKE - pvr = mfspr(SPRN_PVR); - - if ((PVR_VER(pvr) == PVR_VER_E500V1) || - (PVR_VER(pvr) == PVR_VER_E500V2)) { - /* - * need to clear HID1[RFXE] to disable machine check int - * so we can catch it - */ - if (edac_op_state == EDAC_OPSTATE_INT) - on_each_cpu(mpc85xx_mc_clear_rfxe, NULL, 0); - } -#endif + pr_warn(EDAC_MOD_STR "drivers fail to register\n"); return 0; } module_init(mpc85xx_mc_init); -#ifdef CONFIG_FSL_SOC_BOOKE -static void __exit mpc85xx_mc_restore_hid1(void *data) -{ - mtspr(SPRN_HID1, orig_hid1[smp_processor_id()]); -} -#endif - static void __exit mpc85xx_mc_exit(void) { -#ifdef CONFIG_FSL_SOC_BOOKE - u32 pvr = mfspr(SPRN_PVR); - - if ((PVR_VER(pvr) == PVR_VER_E500V1) || - (PVR_VER(pvr) == PVR_VER_E500V2)) { - on_each_cpu(mpc85xx_mc_restore_hid1, NULL, 0); - } -#endif platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); } diff --git a/drivers/edac/mpc85xx_edac.h b/drivers/edac/mpc85xx_edac.h index 9352e88d53e5..3f6fb16ad34f 100644 --- a/drivers/edac/mpc85xx_edac.h +++ b/drivers/edac/mpc85xx_edac.h @@ -17,65 +17,6 @@ #define mpc85xx_printk(level, fmt, arg...) \ edac_printk(level, "MPC85xx", fmt, ##arg) -#define mpc85xx_mc_printk(mci, level, fmt, arg...) \ - edac_mc_chipset_printk(mci, level, "MPC85xx", fmt, ##arg) - -/* - * DRAM error defines - */ - -/* DDR_SDRAM_CFG */ -#define MPC85XX_MC_DDR_SDRAM_CFG 0x0110 -#define MPC85XX_MC_CS_BNDS_0 0x0000 -#define MPC85XX_MC_CS_BNDS_1 0x0008 -#define MPC85XX_MC_CS_BNDS_2 0x0010 -#define MPC85XX_MC_CS_BNDS_3 0x0018 -#define MPC85XX_MC_CS_BNDS_OFS 0x0008 - -#define MPC85XX_MC_DATA_ERR_INJECT_HI 0x0e00 -#define MPC85XX_MC_DATA_ERR_INJECT_LO 0x0e04 -#define MPC85XX_MC_ECC_ERR_INJECT 0x0e08 -#define MPC85XX_MC_CAPTURE_DATA_HI 0x0e20 -#define MPC85XX_MC_CAPTURE_DATA_LO 0x0e24 -#define MPC85XX_MC_CAPTURE_ECC 0x0e28 -#define MPC85XX_MC_ERR_DETECT 0x0e40 -#define MPC85XX_MC_ERR_DISABLE 0x0e44 -#define MPC85XX_MC_ERR_INT_EN 0x0e48 -#define MPC85XX_MC_CAPTURE_ATRIBUTES 0x0e4c -#define MPC85XX_MC_CAPTURE_ADDRESS 0x0e50 -#define MPC85XX_MC_CAPTURE_EXT_ADDRESS 0x0e54 -#define MPC85XX_MC_ERR_SBE 0x0e58 - -#define DSC_MEM_EN 0x80000000 -#define DSC_ECC_EN 0x20000000 -#define DSC_RD_EN 0x10000000 -#define DSC_DBW_MASK 0x00180000 -#define DSC_DBW_32 0x00080000 -#define DSC_DBW_64 0x00000000 - -#define DSC_SDTYPE_MASK 0x07000000 - -#define DSC_SDTYPE_DDR 0x02000000 -#define DSC_SDTYPE_DDR2 0x03000000 -#define DSC_SDTYPE_DDR3 0x07000000 -#define DSC_X32_EN 0x00000020 - -/* Err_Int_En */ -#define DDR_EIE_MSEE 0x1 /* memory select */ -#define DDR_EIE_SBEE 0x4 /* single-bit ECC error */ -#define DDR_EIE_MBEE 0x8 /* multi-bit ECC error */ - -/* Err_Detect */ -#define DDR_EDE_MSE 0x1 /* memory select */ -#define DDR_EDE_SBE 0x4 /* single-bit ECC error */ -#define DDR_EDE_MBE 0x8 /* multi-bit ECC error */ -#define DDR_EDE_MME 0x80000000 /* multiple memory errors */ - -/* Err_Disable */ -#define DDR_EDI_MSED 0x1 /* memory select disable */ -#define DDR_EDI_SBED 0x4 /* single-bit ECC error disable */ -#define DDR_EDI_MBED 0x8 /* multi-bit ECC error disable */ - /* * L2 Err defines */ @@ -149,13 +90,6 @@ #define MPC85XX_PCIE_ERR_CAP_R2 0x0030 #define MPC85XX_PCIE_ERR_CAP_R3 0x0034 -struct mpc85xx_mc_pdata { - char *name; - int edac_idx; - void __iomem *mc_vbase; - int irq; -}; - struct mpc85xx_l2_pdata { char *name; int edac_idx; diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c index 6c54127e6eae..cb9b8577acbc 100644 --- a/drivers/edac/mv64x60_edac.c +++ b/drivers/edac/mv64x60_edac.c @@ -118,7 +118,6 @@ static int mv64x60_pci_err_probe(struct platform_device *pdev) pdata->pci_hose = pdev->id; pdata->name = "mpc85xx_pci_err"; - pdata->irq = NO_IRQ; platform_set_drvdata(pdev, pci); pci->dev = &pdev->dev; pci->dev_name = dev_name(&pdev->dev); @@ -291,7 +290,6 @@ static int mv64x60_sram_err_probe(struct platform_device *pdev) pdata = edac_dev->pvt_info; pdata->name = "mv64x60_sram_err"; - pdata->irq = NO_IRQ; edac_dev->dev = &pdev->dev; platform_set_drvdata(pdev, edac_dev); edac_dev->dev_name = dev_name(&pdev->dev); @@ -459,7 +457,6 @@ static int mv64x60_cpu_err_probe(struct platform_device *pdev) pdata = edac_dev->pvt_info; pdata->name = "mv64x60_cpu_err"; - pdata->irq = NO_IRQ; edac_dev->dev = &pdev->dev; platform_set_drvdata(pdev, edac_dev); edac_dev->dev_name = dev_name(&pdev->dev); @@ -727,7 +724,6 @@ static int mv64x60_mc_err_probe(struct platform_device *pdev) mci->pdev = &pdev->dev; platform_set_drvdata(pdev, mci); pdata->name = "mv64x60_mc_err"; - pdata->irq = NO_IRQ; mci->dev_name = dev_name(&pdev->dev); pdata->edac_idx = edac_mc_idx++; diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index d3a64ba61fa3..691ce25e9010 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -1029,8 +1029,6 @@ static int ppc4xx_edac_mc_init(struct mem_ctl_info *mci, pdata = mci->pvt_info; pdata->dcr_host = *dcr_host; - pdata->irqs.sec = NO_IRQ; - pdata->irqs.ded = NO_IRQ; /* Initialize controller capabilities and configuration */ @@ -1111,7 +1109,7 @@ static int ppc4xx_edac_register_irq(struct platform_device *op, ded_irq = irq_of_parse_and_map(np, INTMAP_ECCDED_INDEX); sec_irq = irq_of_parse_and_map(np, INTMAP_ECCSEC_INDEX); - if (ded_irq == NO_IRQ || sec_irq == NO_IRQ) { + if (!ded_irq || !sec_irq) { ppc4xx_edac_mc_printk(KERN_ERR, mci, "Unable to map interrupts.\n"); status = -ENODEV; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 4fb2eb7c800d..54775221a01f 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -552,9 +552,9 @@ static const struct pci_id_table pci_dev_descr_haswell_table[] = { /* Knight's Landing Support */ /* * KNL's memory channels are swizzled between memory controllers. - * MC0 is mapped to CH3,5,6 and MC1 is mapped to CH0,1,2 + * MC0 is mapped to CH3,4,5 and MC1 is mapped to CH0,1,2 */ -#define knl_channel_remap(channel) ((channel + 3) % 6) +#define knl_channel_remap(mc, chan) ((mc) ? (chan) : (chan) + 3) /* Memory controller, TAD tables, error injection - 2-8-0, 2-9-0 (2 of these) */ #define PCI_DEVICE_ID_INTEL_KNL_IMC_MC 0x7840 @@ -1286,7 +1286,7 @@ static u32 knl_get_mc_route(int entry, u32 reg) mc = GET_BITFIELD(reg, entry*3, (entry*3)+2); chan = GET_BITFIELD(reg, (entry*2) + 18, (entry*2) + 18 + 1); - return knl_channel_remap(mc*3 + chan); + return knl_channel_remap(mc, chan); } /* @@ -2474,7 +2474,7 @@ static int sbridge_mci_bind_devs(struct mem_ctl_info *mci, /* Check if everything were registered */ if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 || - !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta) + !pvt->pci_ras || !pvt->pci_ta) goto enodev; if (saw_chan_mask != 0x0f) @@ -2563,8 +2563,7 @@ static int ibridge_mci_bind_devs(struct mem_ctl_info *mci, /* Check if everything were registered */ if (!pvt->pci_sad0 || !pvt->pci_ha0 || !pvt->pci_br0 || - !pvt->pci_br1 || !pvt->pci_tad || !pvt->pci_ras || - !pvt->pci_ta) + !pvt->pci_br1 || !pvt->pci_ras || !pvt->pci_ta) goto enodev; if (saw_chan_mask != 0x0f && /* -EN */ @@ -2997,8 +2996,15 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, } else { char A = *("A"); - channel = knl_channel_remap(channel); + /* + * Reported channel is in range 0-2, so we can't map it + * back to mc. To figure out mc we check machine check + * bank register that reported this error. + * bank15 means mc0 and bank16 means mc1. + */ + channel = knl_channel_remap(m->bank == 16, channel); channel_mask = 1 << channel; + snprintf(msg, sizeof(msg), "%s%s err_code:%04x:%04x channel:%d (DIMM_%c)", overflow ? " OVERFLOW" : "", diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c new file mode 100644 index 000000000000..0ff4878c2aa1 --- /dev/null +++ b/drivers/edac/skx_edac.c @@ -0,0 +1,1121 @@ +/* + * EDAC driver for Intel(R) Xeon(R) Skylake processors + * Copyright (c) 2016, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/edac.h> +#include <linux/mmzone.h> +#include <linux/smp.h> +#include <linux/bitmap.h> +#include <linux/math64.h> +#include <linux/mod_devicetable.h> +#include <asm/cpu_device_id.h> +#include <asm/processor.h> +#include <asm/mce.h> + +#include "edac_core.h" + +#define SKX_REVISION " Ver: 1.0 " + +/* + * Debug macros + */ +#define skx_printk(level, fmt, arg...) \ + edac_printk(level, "skx", fmt, ##arg) + +#define skx_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg) + +/* + * Get a bit field at register value <v>, from bit <lo> to bit <hi> + */ +#define GET_BITFIELD(v, lo, hi) \ + (((v) & GENMASK_ULL((hi), (lo))) >> (lo)) + +static LIST_HEAD(skx_edac_list); + +static u64 skx_tolm, skx_tohm; + +#define NUM_IMC 2 /* memory controllers per socket */ +#define NUM_CHANNELS 3 /* channels per memory controller */ +#define NUM_DIMMS 2 /* Max DIMMS per channel */ + +#define MASK26 0x3FFFFFF /* Mask for 2^26 */ +#define MASK29 0x1FFFFFFF /* Mask for 2^29 */ + +/* + * Each cpu socket contains some pci devices that provide global + * information, and also some that are local to each of the two + * memory controllers on the die. + */ +struct skx_dev { + struct list_head list; + u8 bus[4]; + struct pci_dev *sad_all; + struct pci_dev *util_all; + u32 mcroute; + struct skx_imc { + struct mem_ctl_info *mci; + u8 mc; /* system wide mc# */ + u8 lmc; /* socket relative mc# */ + u8 src_id, node_id; + struct skx_channel { + struct pci_dev *cdev; + struct skx_dimm { + u8 close_pg; + u8 bank_xor_enable; + u8 fine_grain_bank; + u8 rowbits; + u8 colbits; + } dimms[NUM_DIMMS]; + } chan[NUM_CHANNELS]; + } imc[NUM_IMC]; +}; +static int skx_num_sockets; + +struct skx_pvt { + struct skx_imc *imc; +}; + +struct decoded_addr { + struct skx_dev *dev; + u64 addr; + int socket; + int imc; + int channel; + u64 chan_addr; + int sktways; + int chanways; + int dimm; + int rank; + int channel_rank; + u64 rank_address; + int row; + int column; + int bank_address; + int bank_group; +}; + +static struct skx_dev *get_skx_dev(u8 bus, u8 idx) +{ + struct skx_dev *d; + + list_for_each_entry(d, &skx_edac_list, list) { + if (d->bus[idx] == bus) + return d; + } + + return NULL; +} + +enum munittype { + CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD +}; + +struct munit { + u16 did; + u16 devfn[NUM_IMC]; + u8 busidx; + u8 per_socket; + enum munittype mtype; +}; + +/* + * List of PCI device ids that we need together with some device + * number and function numbers to tell which memory controller the + * device belongs to. + */ +static const struct munit skx_all_munits[] = { + { 0x2054, { }, 1, 1, SAD_ALL }, + { 0x2055, { }, 1, 1, UTIL_ALL }, + { 0x2040, { PCI_DEVFN(10, 0), PCI_DEVFN(12, 0) }, 2, 2, CHAN0 }, + { 0x2044, { PCI_DEVFN(10, 4), PCI_DEVFN(12, 4) }, 2, 2, CHAN1 }, + { 0x2048, { PCI_DEVFN(11, 0), PCI_DEVFN(13, 0) }, 2, 2, CHAN2 }, + { 0x208e, { }, 1, 0, SAD }, + { } +}; + +/* + * We use the per-socket device 0x2016 to count how many sockets are present, + * and to detemine which PCI buses are associated with each socket. Allocate + * and build the full list of all the skx_dev structures that we need here. + */ +static int get_all_bus_mappings(void) +{ + struct pci_dev *pdev, *prev; + struct skx_dev *d; + u32 reg; + int ndev = 0; + + prev = NULL; + for (;;) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2016, prev); + if (!pdev) + break; + ndev++; + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + pci_dev_put(pdev); + return -ENOMEM; + } + pci_read_config_dword(pdev, 0xCC, ®); + d->bus[0] = GET_BITFIELD(reg, 0, 7); + d->bus[1] = GET_BITFIELD(reg, 8, 15); + d->bus[2] = GET_BITFIELD(reg, 16, 23); + d->bus[3] = GET_BITFIELD(reg, 24, 31); + edac_dbg(2, "busses: %x, %x, %x, %x\n", + d->bus[0], d->bus[1], d->bus[2], d->bus[3]); + list_add_tail(&d->list, &skx_edac_list); + skx_num_sockets++; + prev = pdev; + } + + return ndev; +} + +static int get_all_munits(const struct munit *m) +{ + struct pci_dev *pdev, *prev; + struct skx_dev *d; + u32 reg; + int i = 0, ndev = 0; + + prev = NULL; + for (;;) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, m->did, prev); + if (!pdev) + break; + ndev++; + if (m->per_socket == NUM_IMC) { + for (i = 0; i < NUM_IMC; i++) + if (m->devfn[i] == pdev->devfn) + break; + if (i == NUM_IMC) + goto fail; + } + d = get_skx_dev(pdev->bus->number, m->busidx); + if (!d) + goto fail; + + /* Be sure that the device is enabled */ + if (unlikely(pci_enable_device(pdev) < 0)) { + skx_printk(KERN_ERR, + "Couldn't enable %04x:%04x\n", PCI_VENDOR_ID_INTEL, m->did); + goto fail; + } + + switch (m->mtype) { + case CHAN0: case CHAN1: case CHAN2: + pci_dev_get(pdev); + d->imc[i].chan[m->mtype].cdev = pdev; + break; + case SAD_ALL: + pci_dev_get(pdev); + d->sad_all = pdev; + break; + case UTIL_ALL: + pci_dev_get(pdev); + d->util_all = pdev; + break; + case SAD: + /* + * one of these devices per core, including cores + * that don't exist on this SKU. Ignore any that + * read a route table of zero, make sure all the + * non-zero values match. + */ + pci_read_config_dword(pdev, 0xB4, ®); + if (reg != 0) { + if (d->mcroute == 0) + d->mcroute = reg; + else if (d->mcroute != reg) { + skx_printk(KERN_ERR, + "mcroute mismatch\n"); + goto fail; + } + } + ndev--; + break; + } + + prev = pdev; + } + + return ndev; +fail: + pci_dev_put(pdev); + return -ENODEV; +} + +const struct x86_cpu_id skx_cpuids[] = { + { X86_VENDOR_INTEL, 6, 0x55, 0, 0 }, /* Skylake */ + { } +}; +MODULE_DEVICE_TABLE(x86cpu, skx_cpuids); + +static u8 get_src_id(struct skx_dev *d) +{ + u32 reg; + + pci_read_config_dword(d->util_all, 0xF0, ®); + + return GET_BITFIELD(reg, 12, 14); +} + +static u8 skx_get_node_id(struct skx_dev *d) +{ + u32 reg; + + pci_read_config_dword(d->util_all, 0xF4, ®); + + return GET_BITFIELD(reg, 0, 2); +} + +static int get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval, + int maxval, char *name) +{ + u32 val = GET_BITFIELD(reg, lobit, hibit); + + if (val < minval || val > maxval) { + edac_dbg(2, "bad %s = %d (raw=%x)\n", name, val, reg); + return -EINVAL; + } + return val + add; +} + +#define IS_DIMM_PRESENT(mtr) GET_BITFIELD((mtr), 15, 15) + +#define numrank(reg) get_dimm_attr((reg), 12, 13, 0, 1, 2, "ranks") +#define numrow(reg) get_dimm_attr((reg), 2, 4, 12, 1, 6, "rows") +#define numcol(reg) get_dimm_attr((reg), 0, 1, 10, 0, 2, "cols") + +static int get_width(u32 mtr) +{ + switch (GET_BITFIELD(mtr, 8, 9)) { + case 0: + return DEV_X4; + case 1: + return DEV_X8; + case 2: + return DEV_X16; + } + return DEV_UNKNOWN; +} + +static int skx_get_hi_lo(void) +{ + struct pci_dev *pdev; + u32 reg; + + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2034, NULL); + if (!pdev) { + edac_dbg(0, "Can't get tolm/tohm\n"); + return -ENODEV; + } + + pci_read_config_dword(pdev, 0xD0, ®); + skx_tolm = reg; + pci_read_config_dword(pdev, 0xD4, ®); + skx_tohm = reg; + pci_read_config_dword(pdev, 0xD8, ®); + skx_tohm |= (u64)reg << 32; + + pci_dev_put(pdev); + edac_dbg(2, "tolm=%llx tohm=%llx\n", skx_tolm, skx_tohm); + + return 0; +} + +static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm, + struct skx_imc *imc, int chan, int dimmno) +{ + int banks = 16, ranks, rows, cols, npages; + u64 size; + + if (!IS_DIMM_PRESENT(mtr)) + return 0; + ranks = numrank(mtr); + rows = numrow(mtr); + cols = numcol(mtr); + + /* + * Compute size in 8-byte (2^3) words, then shift to MiB (2^20) + */ + size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3); + npages = MiB_TO_PAGES(size); + + edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", + imc->mc, chan, dimmno, size, npages, + banks, ranks, rows, cols); + + imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mtr, 0, 0); + imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mtr, 9, 9); + imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0); + imc->chan[chan].dimms[dimmno].rowbits = rows; + imc->chan[chan].dimms[dimmno].colbits = cols; + + dimm->nr_pages = npages; + dimm->grain = 32; + dimm->dtype = get_width(mtr); + dimm->mtype = MEM_DDR4; + dimm->edac_mode = EDAC_SECDED; /* likely better than this */ + snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", + imc->src_id, imc->lmc, chan, dimmno); + + return 1; +} + +#define SKX_GET_MTMTR(dev, reg) \ + pci_read_config_dword((dev), 0x87c, ®) + +static bool skx_check_ecc(struct pci_dev *pdev) +{ + u32 mtmtr; + + SKX_GET_MTMTR(pdev, mtmtr); + + return !!GET_BITFIELD(mtmtr, 2, 2); +} + +static int skx_get_dimm_config(struct mem_ctl_info *mci) +{ + struct skx_pvt *pvt = mci->pvt_info; + struct skx_imc *imc = pvt->imc; + struct dimm_info *dimm; + int i, j; + u32 mtr, amap; + int ndimms; + + for (i = 0; i < NUM_CHANNELS; i++) { + ndimms = 0; + pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap); + for (j = 0; j < NUM_DIMMS; j++) { + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, i, j, 0); + pci_read_config_dword(imc->chan[i].cdev, + 0x80 + 4*j, &mtr); + ndimms += get_dimm_info(mtr, amap, dimm, imc, i, j); + } + if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) { + skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc); + return -ENODEV; + } + } + + return 0; +} + +static void skx_unregister_mci(struct skx_imc *imc) +{ + struct mem_ctl_info *mci = imc->mci; + + if (!mci) + return; + + edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci); + + /* Remove MC sysfs nodes */ + edac_mc_del_mc(mci->pdev); + + edac_dbg(1, "%s: free mci struct\n", mci->ctl_name); + kfree(mci->ctl_name); + edac_mc_free(mci); +} + +static int skx_register_mci(struct skx_imc *imc) +{ + struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; + struct pci_dev *pdev = imc->chan[0].cdev; + struct skx_pvt *pvt; + int rc; + + /* allocate a new MC control structure */ + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = NUM_CHANNELS; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = NUM_DIMMS; + layers[1].is_virt_csrow = true; + mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers, + sizeof(struct skx_pvt)); + + if (unlikely(!mci)) + return -ENOMEM; + + edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci); + + /* Associate skx_dev and mci for future usage */ + imc->mci = mci; + pvt = mci->pvt_info; + pvt->imc = imc; + + mci->ctl_name = kasprintf(GFP_KERNEL, "Skylake Socket#%d IMC#%d", + imc->node_id, imc->lmc); + mci->mtype_cap = MEM_FLAG_DDR4; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "skx_edac.c"; + mci->dev_name = pci_name(imc->chan[0].cdev); + mci->mod_ver = SKX_REVISION; + mci->ctl_page_to_phys = NULL; + + rc = skx_get_dimm_config(mci); + if (rc < 0) + goto fail; + + /* record ptr to the generic device */ + mci->pdev = &pdev->dev; + + /* add this new MC control structure to EDAC's list of MCs */ + if (unlikely(edac_mc_add_mc(mci))) { + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); + rc = -EINVAL; + goto fail; + } + + return 0; + +fail: + kfree(mci->ctl_name); + edac_mc_free(mci); + imc->mci = NULL; + return rc; +} + +#define SKX_MAX_SAD 24 + +#define SKX_GET_SAD(d, i, reg) \ + pci_read_config_dword((d)->sad_all, 0x60 + 8 * (i), ®) +#define SKX_GET_ILV(d, i, reg) \ + pci_read_config_dword((d)->sad_all, 0x64 + 8 * (i), ®) + +#define SKX_SAD_MOD3MODE(sad) GET_BITFIELD((sad), 30, 31) +#define SKX_SAD_MOD3(sad) GET_BITFIELD((sad), 27, 27) +#define SKX_SAD_LIMIT(sad) (((u64)GET_BITFIELD((sad), 7, 26) << 26) | MASK26) +#define SKX_SAD_MOD3ASMOD2(sad) GET_BITFIELD((sad), 5, 6) +#define SKX_SAD_ATTR(sad) GET_BITFIELD((sad), 3, 4) +#define SKX_SAD_INTERLEAVE(sad) GET_BITFIELD((sad), 1, 2) +#define SKX_SAD_ENABLE(sad) GET_BITFIELD((sad), 0, 0) + +#define SKX_ILV_REMOTE(tgt) (((tgt) & 8) == 0) +#define SKX_ILV_TARGET(tgt) ((tgt) & 7) + +static bool skx_sad_decode(struct decoded_addr *res) +{ + struct skx_dev *d = list_first_entry(&skx_edac_list, typeof(*d), list); + u64 addr = res->addr; + int i, idx, tgt, lchan, shift; + u32 sad, ilv; + u64 limit, prev_limit; + int remote = 0; + + /* Simple sanity check for I/O space or out of range */ + if (addr >= skx_tohm || (addr >= skx_tolm && addr < BIT_ULL(32))) { + edac_dbg(0, "Address %llx out of range\n", addr); + return false; + } + +restart: + prev_limit = 0; + for (i = 0; i < SKX_MAX_SAD; i++) { + SKX_GET_SAD(d, i, sad); + limit = SKX_SAD_LIMIT(sad); + if (SKX_SAD_ENABLE(sad)) { + if (addr >= prev_limit && addr <= limit) + goto sad_found; + } + prev_limit = limit + 1; + } + edac_dbg(0, "No SAD entry for %llx\n", addr); + return false; + +sad_found: + SKX_GET_ILV(d, i, ilv); + + switch (SKX_SAD_INTERLEAVE(sad)) { + case 0: + idx = GET_BITFIELD(addr, 6, 8); + break; + case 1: + idx = GET_BITFIELD(addr, 8, 10); + break; + case 2: + idx = GET_BITFIELD(addr, 12, 14); + break; + case 3: + idx = GET_BITFIELD(addr, 30, 32); + break; + } + + tgt = GET_BITFIELD(ilv, 4 * idx, 4 * idx + 3); + + /* If point to another node, find it and start over */ + if (SKX_ILV_REMOTE(tgt)) { + if (remote) { + edac_dbg(0, "Double remote!\n"); + return false; + } + remote = 1; + list_for_each_entry(d, &skx_edac_list, list) { + if (d->imc[0].src_id == SKX_ILV_TARGET(tgt)) + goto restart; + } + edac_dbg(0, "Can't find node %d\n", SKX_ILV_TARGET(tgt)); + return false; + } + + if (SKX_SAD_MOD3(sad) == 0) + lchan = SKX_ILV_TARGET(tgt); + else { + switch (SKX_SAD_MOD3MODE(sad)) { + case 0: + shift = 6; + break; + case 1: + shift = 8; + break; + case 2: + shift = 12; + break; + default: + edac_dbg(0, "illegal mod3mode\n"); + return false; + } + switch (SKX_SAD_MOD3ASMOD2(sad)) { + case 0: + lchan = (addr >> shift) % 3; + break; + case 1: + lchan = (addr >> shift) % 2; + break; + case 2: + lchan = (addr >> shift) % 2; + lchan = (lchan << 1) | ~lchan; + break; + case 3: + lchan = ((addr >> shift) % 2) << 1; + break; + } + lchan = (lchan << 1) | (SKX_ILV_TARGET(tgt) & 1); + } + + res->dev = d; + res->socket = d->imc[0].src_id; + res->imc = GET_BITFIELD(d->mcroute, lchan * 3, lchan * 3 + 2); + res->channel = GET_BITFIELD(d->mcroute, lchan * 2 + 18, lchan * 2 + 19); + + edac_dbg(2, "%llx: socket=%d imc=%d channel=%d\n", + res->addr, res->socket, res->imc, res->channel); + return true; +} + +#define SKX_MAX_TAD 8 + +#define SKX_GET_TADBASE(d, mc, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x850 + 4 * (i), ®) +#define SKX_GET_TADWAYNESS(d, mc, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x880 + 4 * (i), ®) +#define SKX_GET_TADCHNILVOFFSET(d, mc, ch, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[ch].cdev, 0x90 + 4 * (i), ®) + +#define SKX_TAD_BASE(b) ((u64)GET_BITFIELD((b), 12, 31) << 26) +#define SKX_TAD_SKT_GRAN(b) GET_BITFIELD((b), 4, 5) +#define SKX_TAD_CHN_GRAN(b) GET_BITFIELD((b), 6, 7) +#define SKX_TAD_LIMIT(b) (((u64)GET_BITFIELD((b), 12, 31) << 26) | MASK26) +#define SKX_TAD_OFFSET(b) ((u64)GET_BITFIELD((b), 4, 23) << 26) +#define SKX_TAD_SKTWAYS(b) (1 << GET_BITFIELD((b), 10, 11)) +#define SKX_TAD_CHNWAYS(b) (GET_BITFIELD((b), 8, 9) + 1) + +/* which bit used for both socket and channel interleave */ +static int skx_granularity[] = { 6, 8, 12, 30 }; + +static u64 skx_do_interleave(u64 addr, int shift, int ways, u64 lowbits) +{ + addr >>= shift; + addr /= ways; + addr <<= shift; + + return addr | (lowbits & ((1ull << shift) - 1)); +} + +static bool skx_tad_decode(struct decoded_addr *res) +{ + int i; + u32 base, wayness, chnilvoffset; + int skt_interleave_bit, chn_interleave_bit; + u64 channel_addr; + + for (i = 0; i < SKX_MAX_TAD; i++) { + SKX_GET_TADBASE(res->dev, res->imc, i, base); + SKX_GET_TADWAYNESS(res->dev, res->imc, i, wayness); + if (SKX_TAD_BASE(base) <= res->addr && res->addr <= SKX_TAD_LIMIT(wayness)) + goto tad_found; + } + edac_dbg(0, "No TAD entry for %llx\n", res->addr); + return false; + +tad_found: + res->sktways = SKX_TAD_SKTWAYS(wayness); + res->chanways = SKX_TAD_CHNWAYS(wayness); + skt_interleave_bit = skx_granularity[SKX_TAD_SKT_GRAN(base)]; + chn_interleave_bit = skx_granularity[SKX_TAD_CHN_GRAN(base)]; + + SKX_GET_TADCHNILVOFFSET(res->dev, res->imc, res->channel, i, chnilvoffset); + channel_addr = res->addr - SKX_TAD_OFFSET(chnilvoffset); + + if (res->chanways == 3 && skt_interleave_bit > chn_interleave_bit) { + /* Must handle channel first, then socket */ + channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit, + res->chanways, channel_addr); + channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit, + res->sktways, channel_addr); + } else { + /* Handle socket then channel. Preserve low bits from original address */ + channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit, + res->sktways, res->addr); + channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit, + res->chanways, res->addr); + } + + res->chan_addr = channel_addr; + + edac_dbg(2, "%llx: chan_addr=%llx sktways=%d chanways=%d\n", + res->addr, res->chan_addr, res->sktways, res->chanways); + return true; +} + +#define SKX_MAX_RIR 4 + +#define SKX_GET_RIRWAYNESS(d, mc, ch, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \ + 0x108 + 4 * (i), ®) +#define SKX_GET_RIRILV(d, mc, ch, idx, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \ + 0x120 + 16 * idx + 4 * (i), ®) + +#define SKX_RIR_VALID(b) GET_BITFIELD((b), 31, 31) +#define SKX_RIR_LIMIT(b) (((u64)GET_BITFIELD((b), 1, 11) << 29) | MASK29) +#define SKX_RIR_WAYS(b) (1 << GET_BITFIELD((b), 28, 29)) +#define SKX_RIR_CHAN_RANK(b) GET_BITFIELD((b), 16, 19) +#define SKX_RIR_OFFSET(b) ((u64)(GET_BITFIELD((b), 2, 15) << 26)) + +static bool skx_rir_decode(struct decoded_addr *res) +{ + int i, idx, chan_rank; + int shift; + u32 rirway, rirlv; + u64 rank_addr, prev_limit = 0, limit; + + if (res->dev->imc[res->imc].chan[res->channel].dimms[0].close_pg) + shift = 6; + else + shift = 13; + + for (i = 0; i < SKX_MAX_RIR; i++) { + SKX_GET_RIRWAYNESS(res->dev, res->imc, res->channel, i, rirway); + limit = SKX_RIR_LIMIT(rirway); + if (SKX_RIR_VALID(rirway)) { + if (prev_limit <= res->chan_addr && + res->chan_addr <= limit) + goto rir_found; + } + prev_limit = limit; + } + edac_dbg(0, "No RIR entry for %llx\n", res->addr); + return false; + +rir_found: + rank_addr = res->chan_addr >> shift; + rank_addr /= SKX_RIR_WAYS(rirway); + rank_addr <<= shift; + rank_addr |= res->chan_addr & GENMASK_ULL(shift - 1, 0); + + res->rank_address = rank_addr; + idx = (res->chan_addr >> shift) % SKX_RIR_WAYS(rirway); + + SKX_GET_RIRILV(res->dev, res->imc, res->channel, idx, i, rirlv); + res->rank_address = rank_addr - SKX_RIR_OFFSET(rirlv); + chan_rank = SKX_RIR_CHAN_RANK(rirlv); + res->channel_rank = chan_rank; + res->dimm = chan_rank / 4; + res->rank = chan_rank % 4; + + edac_dbg(2, "%llx: dimm=%d rank=%d chan_rank=%d rank_addr=%llx\n", + res->addr, res->dimm, res->rank, + res->channel_rank, res->rank_address); + return true; +} + +static u8 skx_close_row[] = { + 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33 +}; +static u8 skx_close_column[] = { + 3, 4, 5, 14, 19, 23, 24, 25, 26, 27 +}; +static u8 skx_open_row[] = { + 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33 +}; +static u8 skx_open_column[] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 +}; +static u8 skx_open_fine_column[] = { + 3, 4, 5, 7, 8, 9, 10, 11, 12, 13 +}; + +static int skx_bits(u64 addr, int nbits, u8 *bits) +{ + int i, res = 0; + + for (i = 0; i < nbits; i++) + res |= ((addr >> bits[i]) & 1) << i; + return res; +} + +static int skx_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1) +{ + int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1); + + if (do_xor) + ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1); + + return ret; +} + +static bool skx_mad_decode(struct decoded_addr *r) +{ + struct skx_dimm *dimm = &r->dev->imc[r->imc].chan[r->channel].dimms[r->dimm]; + int bg0 = dimm->fine_grain_bank ? 6 : 13; + + if (dimm->close_pg) { + r->row = skx_bits(r->rank_address, dimm->rowbits, skx_close_row); + r->column = skx_bits(r->rank_address, dimm->colbits, skx_close_column); + r->column |= 0x400; /* C10 is autoprecharge, always set */ + r->bank_address = skx_bank_bits(r->rank_address, 8, 9, dimm->bank_xor_enable, 22, 28); + r->bank_group = skx_bank_bits(r->rank_address, 6, 7, dimm->bank_xor_enable, 20, 21); + } else { + r->row = skx_bits(r->rank_address, dimm->rowbits, skx_open_row); + if (dimm->fine_grain_bank) + r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_fine_column); + else + r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_column); + r->bank_address = skx_bank_bits(r->rank_address, 18, 19, dimm->bank_xor_enable, 22, 23); + r->bank_group = skx_bank_bits(r->rank_address, bg0, 17, dimm->bank_xor_enable, 20, 21); + } + r->row &= (1u << dimm->rowbits) - 1; + + edac_dbg(2, "%llx: row=%x col=%x bank_addr=%d bank_group=%d\n", + r->addr, r->row, r->column, r->bank_address, + r->bank_group); + return true; +} + +static bool skx_decode(struct decoded_addr *res) +{ + + return skx_sad_decode(res) && skx_tad_decode(res) && + skx_rir_decode(res) && skx_mad_decode(res); +} + +#ifdef CONFIG_EDAC_DEBUG +/* + * Debug feature. Make /sys/kernel/debug/skx_edac_test/addr. + * Write an address to this file to exercise the address decode + * logic in this driver. + */ +static struct dentry *skx_test; +static u64 skx_fake_addr; + +static int debugfs_u64_set(void *data, u64 val) +{ + struct decoded_addr res; + + res.addr = val; + skx_decode(&res); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); + +static struct dentry *mydebugfs_create(const char *name, umode_t mode, + struct dentry *parent, u64 *value) +{ + return debugfs_create_file(name, mode, parent, value, &fops_u64_wo); +} + +static void setup_skx_debug(void) +{ + skx_test = debugfs_create_dir("skx_edac_test", NULL); + mydebugfs_create("addr", S_IWUSR, skx_test, &skx_fake_addr); +} + +static void teardown_skx_debug(void) +{ + debugfs_remove_recursive(skx_test); +} +#else +static void setup_skx_debug(void) +{ +} + +static void teardown_skx_debug(void) +{ +} +#endif /*CONFIG_EDAC_DEBUG*/ + +static void skx_mce_output_error(struct mem_ctl_info *mci, + const struct mce *m, + struct decoded_addr *res) +{ + enum hw_event_mc_err_type tp_event; + char *type, *optype, msg[256]; + bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); + bool overflow = GET_BITFIELD(m->status, 62, 62); + bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); + bool recoverable; + u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); + u32 mscod = GET_BITFIELD(m->status, 16, 31); + u32 errcode = GET_BITFIELD(m->status, 0, 15); + u32 optypenum = GET_BITFIELD(m->status, 4, 6); + + recoverable = GET_BITFIELD(m->status, 56, 56); + + if (uncorrected_error) { + if (ripv) { + type = "FATAL"; + tp_event = HW_EVENT_ERR_FATAL; + } else { + type = "NON_FATAL"; + tp_event = HW_EVENT_ERR_UNCORRECTED; + } + } else { + type = "CORRECTED"; + tp_event = HW_EVENT_ERR_CORRECTED; + } + + /* + * According with Table 15-9 of the Intel Architecture spec vol 3A, + * memory errors should fit in this mask: + * 000f 0000 1mmm cccc (binary) + * where: + * f = Correction Report Filtering Bit. If 1, subsequent errors + * won't be shown + * mmm = error type + * cccc = channel + * If the mask doesn't match, report an error to the parsing logic + */ + if (!((errcode & 0xef80) == 0x80)) { + optype = "Can't parse: it is not a mem"; + } else { + switch (optypenum) { + case 0: + optype = "generic undef request error"; + break; + case 1: + optype = "memory read error"; + break; + case 2: + optype = "memory write error"; + break; + case 3: + optype = "addr/cmd error"; + break; + case 4: + optype = "memory scrubbing error"; + break; + default: + optype = "reserved"; + break; + } + } + + snprintf(msg, sizeof(msg), + "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x", + overflow ? " OVERFLOW" : "", + (uncorrected_error && recoverable) ? " recoverable" : "", + mscod, errcode, + res->socket, res->imc, res->rank, + res->bank_group, res->bank_address, res->row, res->column); + + edac_dbg(0, "%s\n", msg); + + /* Call the helper to output message */ + edac_mc_handle_error(tp_event, mci, core_err_cnt, + m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, + res->channel, res->dimm, -1, + optype, msg); +} + +static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct mce *mce = (struct mce *)data; + struct decoded_addr res; + struct mem_ctl_info *mci; + char *type; + + if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + return NOTIFY_DONE; + + /* ignore unless this is memory related with an address */ + if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) + return NOTIFY_DONE; + + res.addr = mce->addr; + if (!skx_decode(&res)) + return NOTIFY_DONE; + mci = res.dev->imc[res.imc].mci; + + if (mce->mcgstatus & MCG_STATUS_MCIP) + type = "Exception"; + else + type = "Event"; + + skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n"); + + skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: %Lx " + "Bank %d: %016Lx\n", mce->extcpu, type, + mce->mcgstatus, mce->bank, mce->status); + skx_mc_printk(mci, KERN_DEBUG, "TSC %llx ", mce->tsc); + skx_mc_printk(mci, KERN_DEBUG, "ADDR %llx ", mce->addr); + skx_mc_printk(mci, KERN_DEBUG, "MISC %llx ", mce->misc); + + skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:%x TIME %llu SOCKET " + "%u APIC %x\n", mce->cpuvendor, mce->cpuid, + mce->time, mce->socketid, mce->apicid); + + skx_mce_output_error(mci, mce, &res); + + return NOTIFY_DONE; +} + +static struct notifier_block skx_mce_dec = { + .notifier_call = skx_mce_check_error, +}; + +static void skx_remove(void) +{ + int i, j; + struct skx_dev *d, *tmp; + + edac_dbg(0, "\n"); + + list_for_each_entry_safe(d, tmp, &skx_edac_list, list) { + list_del(&d->list); + for (i = 0; i < NUM_IMC; i++) { + skx_unregister_mci(&d->imc[i]); + for (j = 0; j < NUM_CHANNELS; j++) + pci_dev_put(d->imc[i].chan[j].cdev); + } + pci_dev_put(d->util_all); + pci_dev_put(d->sad_all); + + kfree(d); + } +} + +/* + * skx_init: + * make sure we are running on the correct cpu model + * search for all the devices we need + * check which DIMMs are present. + */ +int __init skx_init(void) +{ + const struct x86_cpu_id *id; + const struct munit *m; + int rc = 0, i; + u8 mc = 0, src_id, node_id; + struct skx_dev *d; + + edac_dbg(2, "\n"); + + id = x86_match_cpu(skx_cpuids); + if (!id) + return -ENODEV; + + rc = skx_get_hi_lo(); + if (rc) + return rc; + + rc = get_all_bus_mappings(); + if (rc < 0) + goto fail; + if (rc == 0) { + edac_dbg(2, "No memory controllers found\n"); + return -ENODEV; + } + + for (m = skx_all_munits; m->did; m++) { + rc = get_all_munits(m); + if (rc < 0) + goto fail; + if (rc != m->per_socket * skx_num_sockets) { + edac_dbg(2, "Expected %d, got %d of %x\n", + m->per_socket * skx_num_sockets, rc, m->did); + rc = -ENODEV; + goto fail; + } + } + + list_for_each_entry(d, &skx_edac_list, list) { + src_id = get_src_id(d); + node_id = skx_get_node_id(d); + edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id); + for (i = 0; i < NUM_IMC; i++) { + d->imc[i].mc = mc++; + d->imc[i].lmc = i; + d->imc[i].src_id = src_id; + d->imc[i].node_id = node_id; + rc = skx_register_mci(&d->imc[i]); + if (rc < 0) + goto fail; + } + } + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + + setup_skx_debug(); + + mce_register_decode_chain(&skx_mce_dec); + + return 0; +fail: + skx_remove(); + return rc; +} + +static void __exit skx_exit(void) +{ + edac_dbg(2, "\n"); + mce_unregister_decode_chain(&skx_mce_dec); + skx_remove(); + teardown_skx_debug(); +} + +module_init(skx_init); +module_exit(skx_exit); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tony Luck"); +MODULE_DESCRIPTION("MC Driver for Intel Skylake server processors"); diff --git a/drivers/edac/wq.c b/drivers/edac/wq.c index 1b8c07e44fd8..2a9a11ae2461 100644 --- a/drivers/edac/wq.c +++ b/drivers/edac/wq.c @@ -27,7 +27,7 @@ EXPORT_SYMBOL_GPL(edac_stop_work); int edac_workqueue_setup(void) { - wq = create_singlethread_workqueue("edac-poller"); + wq = alloc_ordered_workqueue("edac-poller", WQ_MEM_RECLAIM); if (!wq) return -ENODEV; else |