diff options
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/Kconfig | 16 | ||||
-rw-r--r-- | drivers/edac/Makefile | 2 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.c | 260 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 15 | ||||
-rw-r--r-- | drivers/edac/cpc925_edac.c | 2 | ||||
-rw-r--r-- | drivers/edac/e7xxx_edac.c | 2 | ||||
-rw-r--r-- | drivers/edac/edac_mc.c | 40 | ||||
-rw-r--r-- | drivers/edac/edac_pci_sysfs.c | 5 | ||||
-rw-r--r-- | drivers/edac/ghes_edac.c | 4 | ||||
-rw-r--r-- | drivers/edac/i3000_edac.c | 3 | ||||
-rw-r--r-- | drivers/edac/i3200_edac.c | 7 | ||||
-rw-r--r-- | drivers/edac/i82443bxgx_edac.c | 3 | ||||
-rw-r--r-- | drivers/edac/i82860_edac.c | 2 | ||||
-rw-r--r-- | drivers/edac/mce_amd.c | 47 | ||||
-rw-r--r-- | drivers/edac/mce_amd_inj.c | 293 | ||||
-rw-r--r-- | drivers/edac/mv64x60_edac.c | 8 | ||||
-rw-r--r-- | drivers/edac/ppc4xx_edac.c | 4 | ||||
-rw-r--r-- | drivers/edac/x38_edac.c | 3 |
18 files changed, 444 insertions, 272 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 7072c2892d63..49c265255a07 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -61,14 +61,14 @@ config EDAC_DECODE_MCE has been initialized. config EDAC_MCE_INJ - tristate "Simple MCE injection interface over /sysfs" - depends on EDAC_DECODE_MCE + tristate "Simple MCE injection interface" + depends on EDAC_DECODE_MCE && DEBUG_FS default n help - This is a simple interface to inject MCEs over /sysfs and test - the MCE decoding code in EDAC. + This is a simple debugfs interface to inject MCEs and test different + aspects of the MCE handling code. - This is currently AMD-only. + WARNING: Do not even assume this interface is staying stable! config EDAC_MM_EDAC tristate "Main Memory EDAC (Error Detection And Correction) reporting" @@ -105,11 +105,11 @@ config EDAC_GHES In doubt, say 'Y'. config EDAC_AMD64 - tristate "AMD64 (Opteron, Athlon64) K8, F10h" - depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE + tristate "AMD64 (Opteron, Athlon64)" + depends on EDAC_MM_EDAC && AMD_NB && EDAC_DECODE_MCE help Support for error detection and correction of DRAM ECC errors on - the AMD64 families of memory controllers (K8 and F10h) + the AMD64 families (>= K8) of memory controllers. config EDAC_AMD64_ERROR_INJECTION bool "Sysfs HW Error injection facilities" diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 359aa499b200..d40c69a04df7 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_EDAC) := edac_stub.o obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o -edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_pci_sysfs.o +edac_core-y := edac_mc.o edac_device.o edac_mc_sysfs.o edac_core-y += edac_module.o edac_device_sysfs.o ifdef CONFIG_PCI diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index bbd65149cdb2..17638d7cf5c2 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -692,9 +692,19 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) { edac_dbg(1, "F2x%d90 (DRAM Cfg Low): 0x%08x\n", chan, dclr); - edac_dbg(1, " DIMM type: %sbuffered; all DIMMs support ECC: %s\n", - (dclr & BIT(16)) ? "un" : "", - (dclr & BIT(19)) ? "yes" : "no"); + if (pvt->dram_type == MEM_LRDDR3) { + u32 dcsm = pvt->csels[chan].csmasks[0]; + /* + * It's assumed all LRDIMMs in a DCT are going to be of + * same 'type' until proven otherwise. So, use a cs + * value of '0' here to get dcsm value. + */ + edac_dbg(1, " LRDIMM %dx rank multiply\n", (dcsm & 0x3)); + } + + edac_dbg(1, "All DIMMs support ECC:%s\n", + (dclr & BIT(19)) ? "yes" : "no"); + edac_dbg(1, " PAR/ERR parity: %s\n", (dclr & BIT(8)) ? "enabled" : "disabled"); @@ -756,7 +766,7 @@ static void prep_chip_selects(struct amd64_pvt *pvt) if (pvt->fam == 0xf && pvt->ext_model < K8_REV_F) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 8; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 8; - } else if (pvt->fam == 0x15 && pvt->model >= 0x30) { + } else if (pvt->fam == 0x15 && pvt->model == 0x30) { pvt->csels[0].b_cnt = pvt->csels[1].b_cnt = 4; pvt->csels[0].m_cnt = pvt->csels[1].m_cnt = 2; } else { @@ -813,25 +823,63 @@ static void read_dct_base_mask(struct amd64_pvt *pvt) } } -static enum mem_type determine_memory_type(struct amd64_pvt *pvt, int cs) +static void determine_memory_type(struct amd64_pvt *pvt) { - enum mem_type type; + u32 dram_ctrl, dcsm; - /* F15h supports only DDR3 */ - if (pvt->fam >= 0x15) - type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; - else if (pvt->fam == 0x10 || pvt->ext_model >= K8_REV_F) { + switch (pvt->fam) { + case 0xf: + if (pvt->ext_model >= K8_REV_F) + goto ddr3; + + pvt->dram_type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR; + return; + + case 0x10: if (pvt->dchr0 & DDR3_MODE) - type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; + goto ddr3; + + pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2; + return; + + case 0x15: + if (pvt->model < 0x60) + goto ddr3; + + /* + * Model 0x60h needs special handling: + * + * We use a Chip Select value of '0' to obtain dcsm. + * Theoretically, it is possible to populate LRDIMMs of different + * 'Rank' value on a DCT. But this is not the common case. So, + * it's reasonable to assume all DIMMs are going to be of same + * 'type' until proven otherwise. + */ + amd64_read_dct_pci_cfg(pvt, 0, DRAM_CONTROL, &dram_ctrl); + dcsm = pvt->csels[0].csmasks[0]; + + if (((dram_ctrl >> 8) & 0x7) == 0x2) + pvt->dram_type = MEM_DDR4; + else if (pvt->dclr0 & BIT(16)) + pvt->dram_type = MEM_DDR3; + else if (dcsm & 0x3) + pvt->dram_type = MEM_LRDDR3; else - type = (pvt->dclr0 & BIT(16)) ? MEM_DDR2 : MEM_RDDR2; - } else { - type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR; - } + pvt->dram_type = MEM_RDDR3; - amd64_info("CS%d: %s\n", cs, edac_mem_types[type]); + return; - return type; + case 0x16: + goto ddr3; + + default: + WARN(1, KERN_ERR "%s: Family??? 0x%x\n", __func__, pvt->fam); + pvt->dram_type = MEM_EMPTY; + } + return; + +ddr3: + pvt->dram_type = (pvt->dclr0 & BIT(16)) ? MEM_DDR3 : MEM_RDDR3; } /* Get the number of DCT channels the memory controller is using. */ @@ -958,8 +1006,12 @@ static void read_dram_base_limit_regs(struct amd64_pvt *pvt, unsigned range) if (WARN_ON(!nb)) return; - pci_func = (pvt->model == 0x30) ? PCI_DEVICE_ID_AMD_15H_M30H_NB_F1 - : PCI_DEVICE_ID_AMD_15H_NB_F1; + if (pvt->model == 0x60) + pci_func = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1; + else if (pvt->model == 0x30) + pci_func = PCI_DEVICE_ID_AMD_15H_M30H_NB_F1; + else + pci_func = PCI_DEVICE_ID_AMD_15H_NB_F1; f1 = pci_get_related_function(nb->misc->vendor, pci_func, nb->misc); if (WARN_ON(!f1)) @@ -1049,7 +1101,7 @@ static int ddr2_cs_size(unsigned i, bool dct_width) } static int k8_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, - unsigned cs_mode) + unsigned cs_mode, int cs_mask_nr) { u32 dclr = dct ? pvt->dclr1 : pvt->dclr0; @@ -1167,8 +1219,43 @@ static int ddr3_cs_size(unsigned i, bool dct_width) return cs_size; } +static int ddr3_lrdimm_cs_size(unsigned i, unsigned rank_multiply) +{ + unsigned shift = 0; + int cs_size = 0; + + if (i < 4 || i == 6) + cs_size = -1; + else if (i == 12) + shift = 7; + else if (!(i & 0x1)) + shift = i >> 1; + else + shift = (i + 1) >> 1; + + if (cs_size != -1) + cs_size = rank_multiply * (128 << shift); + + return cs_size; +} + +static int ddr4_cs_size(unsigned i) +{ + int cs_size = 0; + + if (i == 0) + cs_size = -1; + else if (i == 1) + cs_size = 1024; + else + /* Min cs_size = 1G */ + cs_size = 1024 * (1 << (i >> 1)); + + return cs_size; +} + static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, - unsigned cs_mode) + unsigned cs_mode, int cs_mask_nr) { u32 dclr = dct ? pvt->dclr1 : pvt->dclr0; @@ -1184,18 +1271,49 @@ static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, * F15h supports only 64bit DCT interfaces */ static int f15_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, - unsigned cs_mode) + unsigned cs_mode, int cs_mask_nr) { WARN_ON(cs_mode > 12); return ddr3_cs_size(cs_mode, false); } +/* F15h M60h supports DDR4 mapping as well.. */ +static int f15_m60h_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, + unsigned cs_mode, int cs_mask_nr) +{ + int cs_size; + u32 dcsm = pvt->csels[dct].csmasks[cs_mask_nr]; + + WARN_ON(cs_mode > 12); + + if (pvt->dram_type == MEM_DDR4) { + if (cs_mode > 9) + return -1; + + cs_size = ddr4_cs_size(cs_mode); + } else if (pvt->dram_type == MEM_LRDDR3) { + unsigned rank_multiply = dcsm & 0xf; + + if (rank_multiply == 3) + rank_multiply = 4; + cs_size = ddr3_lrdimm_cs_size(cs_mode, rank_multiply); + } else { + /* Minimum cs size is 512mb for F15hM60h*/ + if (cs_mode == 0x1) + return -1; + + cs_size = ddr3_cs_size(cs_mode, false); + } + + return cs_size; +} + /* * F16h and F15h model 30h have only limited cs_modes. */ static int f16_dbam_to_chip_select(struct amd64_pvt *pvt, u8 dct, - unsigned cs_mode) + unsigned cs_mode, int cs_mask_nr) { WARN_ON(cs_mode > 12); @@ -1757,13 +1875,20 @@ static void debug_display_dimm_sizes(struct amd64_pvt *pvt, u8 ctrl) size0 = 0; if (dcsb[dimm*2] & DCSB_CS_ENABLE) + /* For f15m60h, need multiplier for LRDIMM cs_size + * calculation. We pass 'dimm' value to the dbam_to_cs + * mapper so we can find the multiplier from the + * corresponding DCSM. + */ size0 = pvt->ops->dbam_to_cs(pvt, ctrl, - DBAM_DIMM(dimm, dbam)); + DBAM_DIMM(dimm, dbam), + dimm); size1 = 0; if (dcsb[dimm*2 + 1] & DCSB_CS_ENABLE) size1 = pvt->ops->dbam_to_cs(pvt, ctrl, - DBAM_DIMM(dimm, dbam)); + DBAM_DIMM(dimm, dbam), + dimm); amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n", dimm * 2, size0, @@ -1812,6 +1937,16 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f16_dbam_to_chip_select, } }, + [F15_M60H_CPUS] = { + .ctl_name = "F15h_M60h", + .f1_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F1, + .f3_id = PCI_DEVICE_ID_AMD_15H_M60H_NB_F3, + .ops = { + .early_channel_count = f1x_early_channel_count, + .map_sysaddr_to_csrow = f1x_map_sysaddr_to_csrow, + .dbam_to_cs = f15_m60h_dbam_to_chip_select, + } + }, [F16_CPUS] = { .ctl_name = "F16h", .f1_id = PCI_DEVICE_ID_AMD_16H_NB_F1, @@ -2175,6 +2310,8 @@ static void read_mc_regs(struct amd64_pvt *pvt) } pvt->ecc_sym_sz = 4; + determine_memory_type(pvt); + edac_dbg(1, " DIMM type: %s\n", edac_mem_types[pvt->dram_type]); if (pvt->fam >= 0x10) { amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp); @@ -2238,7 +2375,8 @@ static u32 get_csrow_nr_pages(struct amd64_pvt *pvt, u8 dct, int csrow_nr) */ cs_mode = DBAM_DIMM(csrow_nr / 2, dbam); - nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode) << (20 - PAGE_SHIFT); + nr_pages = pvt->ops->dbam_to_cs(pvt, dct, cs_mode, (csrow_nr / 2)) + << (20 - PAGE_SHIFT); edac_dbg(0, "csrow: %d, channel: %d, DBAM idx: %d\n", csrow_nr, dct, cs_mode); @@ -2257,7 +2395,6 @@ static int init_csrows(struct mem_ctl_info *mci) struct csrow_info *csrow; struct dimm_info *dimm; enum edac_type edac_mode; - enum mem_type mtype; int i, j, empty = 1; int nr_pages = 0; u32 val; @@ -2302,8 +2439,6 @@ static int init_csrows(struct mem_ctl_info *mci) nr_pages += row_dct1_pages; } - mtype = determine_memory_type(pvt, i); - edac_dbg(1, "Total csrow%d pages: %u\n", i, nr_pages); /* @@ -2317,7 +2452,7 @@ static int init_csrows(struct mem_ctl_info *mci) for (j = 0; j < pvt->channel_count; j++) { dimm = csrow->channels[j]->dimm; - dimm->mtype = mtype; + dimm->mtype = pvt->dram_type; dimm->edac_mode = edac_mode; } } @@ -2604,6 +2739,10 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) fam_type = &family_types[F15_M30H_CPUS]; pvt->ops = &family_types[F15_M30H_CPUS].ops; break; + } else if (pvt->model == 0x60) { + fam_type = &family_types[F15_M60H_CPUS]; + pvt->ops = &family_types[F15_M60H_CPUS].ops; + break; } fam_type = &family_types[F15_CPUS]; @@ -2828,55 +2967,13 @@ static void remove_one_instance(struct pci_dev *pdev) * inquiry this table to see if this driver is for a given device found. */ static const struct pci_device_id amd64_pci_table[] = { - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - }, - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_10H_NB_DRAM, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - }, - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_15H_NB_F2, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - }, - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_15H_M30H_NB_F2, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - }, - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_16H_NB_F2, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - }, - { - .vendor = PCI_VENDOR_ID_AMD, - .device = PCI_DEVICE_ID_AMD_16H_M30H_NB_F2, - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - }, - + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F2) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F2) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F2) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F2) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F2) }, {0, } }; MODULE_DEVICE_TABLE(pci, amd64_pci_table); @@ -2938,6 +3035,11 @@ static int __init amd64_edac_init(void) goto err_no_instances; setup_pci_device(); + +#ifdef CONFIG_X86_32 + amd64_err("%s on 32-bit is unsupported. USE AT YOUR OWN RISK!\n", EDAC_MOD_STR); +#endif + return 0; err_no_instances: diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 55fb5941c6d4..d8468c667925 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -162,10 +162,12 @@ /* * PCI-defined configuration space registers */ -#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F1 0x141b -#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F2 0x141c #define PCI_DEVICE_ID_AMD_15H_NB_F1 0x1601 #define PCI_DEVICE_ID_AMD_15H_NB_F2 0x1602 +#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F1 0x141b +#define PCI_DEVICE_ID_AMD_15H_M30H_NB_F2 0x141c +#define PCI_DEVICE_ID_AMD_15H_M60H_NB_F1 0x1571 +#define PCI_DEVICE_ID_AMD_15H_M60H_NB_F2 0x1572 #define PCI_DEVICE_ID_AMD_16H_NB_F1 0x1531 #define PCI_DEVICE_ID_AMD_16H_NB_F2 0x1532 #define PCI_DEVICE_ID_AMD_16H_M30H_NB_F1 0x1581 @@ -221,6 +223,8 @@ #define csrow_enabled(i, dct, pvt) ((pvt)->csels[(dct)].csbases[(i)] & DCSB_CS_ENABLE) +#define DRAM_CONTROL 0x78 + #define DBAM0 0x80 #define DBAM1 0x180 @@ -301,6 +305,7 @@ enum amd_families { F10_CPUS, F15_CPUS, F15_M30H_CPUS, + F15_M60H_CPUS, F16_CPUS, F16_M30H_CPUS, NUM_FAMILIES, @@ -379,6 +384,9 @@ struct amd64_pvt { /* place to store error injection parameters prior to issue */ struct error_injection injection; + + /* cache the dram_type */ + enum mem_type dram_type; }; enum err_codes { @@ -480,7 +488,8 @@ struct low_ops { int (*early_channel_count) (struct amd64_pvt *pvt); void (*map_sysaddr_to_csrow) (struct mem_ctl_info *mci, u64 sys_addr, struct err_info *); - int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, unsigned cs_mode); + int (*dbam_to_cs) (struct amd64_pvt *pvt, u8 dct, + unsigned cs_mode, int cs_mask_nr); }; struct amd64_family_type { diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c index df6575f1430d..682288ced4ac 100644 --- a/drivers/edac/cpc925_edac.c +++ b/drivers/edac/cpc925_edac.c @@ -562,7 +562,7 @@ static void cpc925_mc_check(struct mem_ctl_info *mci) if (apiexcp & UECC_EXCP_DETECTED) { cpc925_mc_printk(mci, KERN_INFO, "DRAM UECC Fault\n"); - edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, + edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, pfn, offset, 0, csrow, -1, -1, mci->ctl_name, ""); diff --git a/drivers/edac/e7xxx_edac.c b/drivers/edac/e7xxx_edac.c index 3cda79bc8b00..ece3aef16bb1 100644 --- a/drivers/edac/e7xxx_edac.c +++ b/drivers/edac/e7xxx_edac.c @@ -226,7 +226,7 @@ static void process_ce(struct mem_ctl_info *mci, struct e7xxx_error_info *info) static void process_ce_no_info(struct mem_ctl_info *mci) { edac_dbg(3, "\n"); - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 0, 0, 0, -1, -1, -1, + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, 0, -1, -1, -1, "e7xxx CE log register overflow", ""); } diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index c3893b0ddb18..1747906f10ce 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -125,27 +125,27 @@ static void edac_mc_dump_mci(struct mem_ctl_info *mci) #endif /* CONFIG_EDAC_DEBUG */ -/* - * keep those in sync with the enum mem_type - */ const char * const edac_mem_types[] = { - "Empty csrow", - "Reserved csrow type", - "Unknown csrow type", - "Fast page mode RAM", - "Extended data out RAM", - "Burst Extended data out RAM", - "Single data rate SDRAM", - "Registered single data rate SDRAM", - "Double data rate SDRAM", - "Registered Double data rate SDRAM", - "Rambus DRAM", - "Unbuffered DDR2 RAM", - "Fully buffered DDR2", - "Registered DDR2 RAM", - "Rambus XDR", - "Unbuffered DDR3 RAM", - "Registered DDR3 RAM", + [MEM_EMPTY] = "Empty csrow", + [MEM_RESERVED] = "Reserved csrow type", + [MEM_UNKNOWN] = "Unknown csrow type", + [MEM_FPM] = "Fast page mode RAM", + [MEM_EDO] = "Extended data out RAM", + [MEM_BEDO] = "Burst Extended data out RAM", + [MEM_SDR] = "Single data rate SDRAM", + [MEM_RDR] = "Registered single data rate SDRAM", + [MEM_DDR] = "Double data rate SDRAM", + [MEM_RDDR] = "Registered Double data rate SDRAM", + [MEM_RMBS] = "Rambus DRAM", + [MEM_DDR2] = "Unbuffered DDR2 RAM", + [MEM_FB_DDR2] = "Fully buffered DDR2", + [MEM_RDDR2] = "Registered DDR2 RAM", + [MEM_XDR] = "Rambus XDR", + [MEM_DDR3] = "Unbuffered DDR3 RAM", + [MEM_RDDR3] = "Registered DDR3 RAM", + [MEM_LRDDR3] = "Load-Reduced DDR3 RAM", + [MEM_DDR4] = "Unbuffered DDR4 RAM", + [MEM_RDDR4] = "Registered DDR4 RAM", }; EXPORT_SYMBOL_GPL(edac_mem_types); diff --git a/drivers/edac/edac_pci_sysfs.c b/drivers/edac/edac_pci_sysfs.c index e8658e451762..24d877f6e577 100644 --- a/drivers/edac/edac_pci_sysfs.c +++ b/drivers/edac/edac_pci_sysfs.c @@ -14,9 +14,6 @@ #include "edac_core.h" #include "edac_module.h" -/* Turn off this whole feature if PCI is not configured */ -#ifdef CONFIG_PCI - #define EDAC_PCI_SYMLINK "device" /* data variables exported via sysfs */ @@ -761,5 +758,3 @@ MODULE_PARM_DESC(check_pci_errors, module_param(edac_pci_panic_on_pe, int, 0644); MODULE_PARM_DESC(edac_pci_panic_on_pe, "Panic on PCI Bus Parity error: 0=off 1=on"); - -#endif /* CONFIG_PCI */ diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 8399b4e16fe0..b24681998740 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -413,8 +413,8 @@ void ghes_edac_report_mem_error(struct ghes *ghes, int sev, /* Generate the trace event */ grain_bits = fls_long(e->grain); - sprintf(pvt->detail_location, "APEI location: %s %s", - e->location, e->other_detail); + snprintf(pvt->detail_location, sizeof(pvt->detail_location), + "APEI location: %s %s", e->location, e->other_detail); trace_mc_event(type, e->msg, e->label, e->error_count, mci->mc_idx, e->top_layer, e->mid_layer, e->low_layer, PAGES_TO_MiB(e->page_frame_number) | e->offset_in_page, diff --git a/drivers/edac/i3000_edac.c b/drivers/edac/i3000_edac.c index cd28b968e5c7..5cb36a6022cc 100644 --- a/drivers/edac/i3000_edac.c +++ b/drivers/edac/i3000_edac.c @@ -542,8 +542,7 @@ fail1: pci_unregister_driver(&i3000_driver); fail0: - if (mci_pdev) - pci_dev_put(mci_pdev); + pci_dev_put(mci_pdev); return pci_rc; } diff --git a/drivers/edac/i3200_edac.c b/drivers/edac/i3200_edac.c index 022a70273ada..4ad062b0ef26 100644 --- a/drivers/edac/i3200_edac.c +++ b/drivers/edac/i3200_edac.c @@ -242,11 +242,11 @@ static void i3200_process_error_info(struct mem_ctl_info *mci, -1, -1, "i3000 UE", ""); } else if (log & I3200_ECCERRLOG_CE) { - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 0, 0, eccerrlog_syndrome(log), eccerrlog_row(channel, log), -1, -1, - "i3000 UE", ""); + "i3000 CE", ""); } } } @@ -523,8 +523,7 @@ fail1: pci_unregister_driver(&i3200_driver); fail0: - if (mci_pdev) - pci_dev_put(mci_pdev); + pci_dev_put(mci_pdev); return pci_rc; } diff --git a/drivers/edac/i82443bxgx_edac.c b/drivers/edac/i82443bxgx_edac.c index d730e276d1a8..b4705d9366bf 100644 --- a/drivers/edac/i82443bxgx_edac.c +++ b/drivers/edac/i82443bxgx_edac.c @@ -458,8 +458,7 @@ static void __exit i82443bxgx_edacmc_exit(void) if (!i82443bxgx_registered) i82443bxgx_edacmc_remove_one(mci_pdev); - if (mci_pdev) - pci_dev_put(mci_pdev); + pci_dev_put(mci_pdev); } module_init(i82443bxgx_edacmc_init); diff --git a/drivers/edac/i82860_edac.c b/drivers/edac/i82860_edac.c index 3382f6344e42..4382343a7c60 100644 --- a/drivers/edac/i82860_edac.c +++ b/drivers/edac/i82860_edac.c @@ -124,7 +124,7 @@ static int i82860_process_error_info(struct mem_ctl_info *mci, dimm->location[0], dimm->location[1], -1, "i82860 UE", ""); else - edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, + edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, info->eap, 0, info->derrsyn, dimm->location[0], dimm->location[1], -1, "i82860 CE", ""); diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index f78c1c54dbd5..58586d59bf8e 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -138,6 +138,15 @@ static const char * const mc5_mce_desc[] = { "Retire status queue" }; +static const char * const mc6_mce_desc[] = { + "Hardware Assertion", + "Free List", + "Physical Register File", + "Retire Queue", + "Scheduler table", + "Status Register File", +}; + static bool f12h_mc0_mce(u16 ec, u8 xec) { bool ret = false; @@ -432,8 +441,8 @@ static bool k8_mc2_mce(u16 ec, u8 xec) pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec)); else if (xec == 0x0) { if (TLB_ERROR(ec)) - pr_cont(": %s error in a Page Descriptor Cache or " - "Guest TLB.\n", TT_MSG(ec)); + pr_cont("%s error in a Page Descriptor Cache or Guest TLB.\n", + TT_MSG(ec)); else if (BUS_ERROR(ec)) pr_cont(": %s/ECC error in data read from NB: %s.\n", R4_MSG(ec), PP_MSG(ec)); @@ -672,38 +681,10 @@ static void decode_mc6_mce(struct mce *m) pr_emerg(HW_ERR "MC6 Error: "); - switch (xec) { - case 0x0: - pr_cont("Hardware Assertion"); - break; - - case 0x1: - pr_cont("Free List"); - break; - - case 0x2: - pr_cont("Physical Register File"); - break; - - case 0x3: - pr_cont("Retire Queue"); - break; - - case 0x4: - pr_cont("Scheduler table"); - break; - - case 0x5: - pr_cont("Status Register File"); - break; - - default: + if (xec > 0x5) goto wrong_mc6_mce; - break; - } - - pr_cont(" parity error.\n"); + pr_cont("%s parity error.\n", mc6_mce_desc[xec]); return; wrong_mc6_mce: @@ -800,7 +781,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) pr_cont("]: 0x%016llx\n", m->status); if (m->status & MCI_STATUS_ADDRV) - pr_emerg(HW_ERR "MC%d_ADDR: 0x%016llx\n", m->bank, m->addr); + pr_emerg(HW_ERR "MC%d Error Address: 0x%016llx\n", m->bank, m->addr); if (!fam_ops) goto err_code; diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c index 5e46a9fea31b..0bd91a802c67 100644 --- a/drivers/edac/mce_amd_inj.c +++ b/drivers/edac/mce_amd_inj.c @@ -1,173 +1,262 @@ /* - * A simple MCE injection facility for testing the MCE decoding code. This - * driver should be built as module so that it can be loaded on production - * kernels for testing purposes. + * A simple MCE injection facility for testing different aspects of the RAS + * code. This driver should be built as module so that it can be loaded + * on production kernels for testing purposes. * * This file may be distributed under the terms of the GNU General Public * License version 2. * - * Copyright (c) 2010: Borislav Petkov <bp@alien8.de> + * Copyright (c) 2010-14: Borislav Petkov <bp@alien8.de> * Advanced Micro Devices Inc. */ #include <linux/kobject.h> +#include <linux/debugfs.h> #include <linux/device.h> -#include <linux/edac.h> #include <linux/module.h> +#include <linux/cpu.h> #include <asm/mce.h> #include "mce_amd.h" -struct edac_mce_attr { - struct attribute attr; - ssize_t (*show) (struct kobject *kobj, struct edac_mce_attr *attr, char *buf); - ssize_t (*store)(struct kobject *kobj, struct edac_mce_attr *attr, - const char *buf, size_t count); -}; - -#define EDAC_MCE_ATTR(_name, _mode, _show, _store) \ -static struct edac_mce_attr mce_attr_##_name = __ATTR(_name, _mode, _show, _store) - -static struct kobject *mce_kobj; - /* * Collect all the MCi_XXX settings */ static struct mce i_mce; +static struct dentry *dfs_inj; -#define MCE_INJECT_STORE(reg) \ -static ssize_t edac_inject_##reg##_store(struct kobject *kobj, \ - struct edac_mce_attr *attr, \ - const char *data, size_t count)\ +#define MCE_INJECT_SET(reg) \ +static int inj_##reg##_set(void *data, u64 val) \ { \ - int ret = 0; \ - unsigned long value; \ - \ - ret = kstrtoul(data, 16, &value); \ - if (ret < 0) \ - printk(KERN_ERR "Error writing MCE " #reg " field.\n"); \ + struct mce *m = (struct mce *)data; \ \ - i_mce.reg = value; \ - \ - return count; \ + m->reg = val; \ + return 0; \ } -MCE_INJECT_STORE(status); -MCE_INJECT_STORE(misc); -MCE_INJECT_STORE(addr); +MCE_INJECT_SET(status); +MCE_INJECT_SET(misc); +MCE_INJECT_SET(addr); -#define MCE_INJECT_SHOW(reg) \ -static ssize_t edac_inject_##reg##_show(struct kobject *kobj, \ - struct edac_mce_attr *attr, \ - char *buf) \ +#define MCE_INJECT_GET(reg) \ +static int inj_##reg##_get(void *data, u64 *val) \ { \ - return sprintf(buf, "0x%016llx\n", i_mce.reg); \ + struct mce *m = (struct mce *)data; \ + \ + *val = m->reg; \ + return 0; \ } -MCE_INJECT_SHOW(status); -MCE_INJECT_SHOW(misc); -MCE_INJECT_SHOW(addr); +MCE_INJECT_GET(status); +MCE_INJECT_GET(misc); +MCE_INJECT_GET(addr); -EDAC_MCE_ATTR(status, 0644, edac_inject_status_show, edac_inject_status_store); -EDAC_MCE_ATTR(misc, 0644, edac_inject_misc_show, edac_inject_misc_store); -EDAC_MCE_ATTR(addr, 0644, edac_inject_addr_show, edac_inject_addr_store); +DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n"); /* - * This denotes into which bank we're injecting and triggers - * the injection, at the same time. + * Caller needs to be make sure this cpu doesn't disappear + * from under us, i.e.: get_cpu/put_cpu. */ -static ssize_t edac_inject_bank_store(struct kobject *kobj, - struct edac_mce_attr *attr, - const char *data, size_t count) +static int toggle_hw_mce_inject(unsigned int cpu, bool enable) { - int ret = 0; - unsigned long value; + u32 l, h; + int err; - ret = kstrtoul(data, 10, &value); - if (ret < 0) { - printk(KERN_ERR "Invalid bank value!\n"); - return -EINVAL; + err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h); + if (err) { + pr_err("%s: error reading HWCR\n", __func__); + return err; } - if (value > 5) - if (boot_cpu_data.x86 != 0x15 || value > 6) { - printk(KERN_ERR "Non-existent MCE bank: %lu\n", value); - return -EINVAL; - } + enable ? (l |= BIT(18)) : (l &= ~BIT(18)); - i_mce.bank = value; + err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h); + if (err) + pr_err("%s: error writing HWCR\n", __func__); - amd_decode_mce(NULL, 0, &i_mce); + return err; +} - return count; +static int flags_get(void *data, u64 *val) +{ + struct mce *m = (struct mce *)data; + + *val = m->inject_flags; + + return 0; } -static ssize_t edac_inject_bank_show(struct kobject *kobj, - struct edac_mce_attr *attr, char *buf) +static int flags_set(void *data, u64 val) { - return sprintf(buf, "%d\n", i_mce.bank); + struct mce *m = (struct mce *)data; + + m->inject_flags = (u8)val; + return 0; } -EDAC_MCE_ATTR(bank, 0644, edac_inject_bank_show, edac_inject_bank_store); +DEFINE_SIMPLE_ATTRIBUTE(flags_fops, flags_get, flags_set, "%llu\n"); -static struct edac_mce_attr *sysfs_attrs[] = { &mce_attr_status, &mce_attr_misc, - &mce_attr_addr, &mce_attr_bank -}; +/* + * On which CPU to inject? + */ +MCE_INJECT_GET(extcpu); -static int __init edac_init_mce_inject(void) +static int inj_extcpu_set(void *data, u64 val) { - struct bus_type *edac_subsys = NULL; - int i, err = 0; + struct mce *m = (struct mce *)data; - edac_subsys = edac_get_sysfs_subsys(); - if (!edac_subsys) + if (val >= nr_cpu_ids || !cpu_online(val)) { + pr_err("%s: Invalid CPU: %llu\n", __func__, val); return -EINVAL; + } + m->extcpu = val; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n"); - mce_kobj = kobject_create_and_add("mce", &edac_subsys->dev_root->kobj); - if (!mce_kobj) { - printk(KERN_ERR "Error creating a mce kset.\n"); - err = -ENOMEM; - goto err_mce_kobj; +static void trigger_mce(void *info) +{ + asm volatile("int $18"); +} + +static void do_inject(void) +{ + u64 mcg_status = 0; + unsigned int cpu = i_mce.extcpu; + u8 b = i_mce.bank; + + if (!(i_mce.inject_flags & MCJ_EXCEPTION)) { + amd_decode_mce(NULL, 0, &i_mce); + return; } - for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) { - err = sysfs_create_file(mce_kobj, &sysfs_attrs[i]->attr); - if (err) { - printk(KERN_ERR "Error creating %s in sysfs.\n", - sysfs_attrs[i]->attr.name); - goto err_sysfs_create; + get_online_cpus(); + if (!cpu_online(cpu)) + goto err; + + /* prep MCE global settings for the injection */ + mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV; + + if (!(i_mce.status & MCI_STATUS_PCC)) + mcg_status |= MCG_STATUS_RIPV; + + toggle_hw_mce_inject(cpu, true); + + wrmsr_on_cpu(cpu, MSR_IA32_MCG_STATUS, + (u32)mcg_status, (u32)(mcg_status >> 32)); + + wrmsr_on_cpu(cpu, MSR_IA32_MCx_STATUS(b), + (u32)i_mce.status, (u32)(i_mce.status >> 32)); + + wrmsr_on_cpu(cpu, MSR_IA32_MCx_ADDR(b), + (u32)i_mce.addr, (u32)(i_mce.addr >> 32)); + + wrmsr_on_cpu(cpu, MSR_IA32_MCx_MISC(b), + (u32)i_mce.misc, (u32)(i_mce.misc >> 32)); + + toggle_hw_mce_inject(cpu, false); + + smp_call_function_single(cpu, trigger_mce, NULL, 0); + +err: + put_online_cpus(); + +} + +/* + * This denotes into which bank we're injecting and triggers + * the injection, at the same time. + */ +static int inj_bank_set(void *data, u64 val) +{ + struct mce *m = (struct mce *)data; + + if (val > 5) { + if (boot_cpu_data.x86 != 0x15 || val > 6) { + pr_err("Non-existent MCE bank: %llu\n", val); + return -EINVAL; } } - return 0; -err_sysfs_create: - while (--i >= 0) - sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr); + m->bank = val; + do_inject(); - kobject_del(mce_kobj); + return 0; +} -err_mce_kobj: - edac_put_sysfs_subsys(); +static int inj_bank_get(void *data, u64 *val) +{ + struct mce *m = (struct mce *)data; - return err; + *val = m->bank; + return 0; } -static void __exit edac_exit_mce_inject(void) +DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n"); + +struct dfs_node { + char *name; + struct dentry *d; + const struct file_operations *fops; +} dfs_fls[] = { + { .name = "status", .fops = &status_fops }, + { .name = "misc", .fops = &misc_fops }, + { .name = "addr", .fops = &addr_fops }, + { .name = "bank", .fops = &bank_fops }, + { .name = "flags", .fops = &flags_fops }, + { .name = "cpu", .fops = &extcpu_fops }, +}; + +static int __init init_mce_inject(void) { int i; - for (i = 0; i < ARRAY_SIZE(sysfs_attrs); i++) - sysfs_remove_file(mce_kobj, &sysfs_attrs[i]->attr); + dfs_inj = debugfs_create_dir("mce-inject", NULL); + if (!dfs_inj) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) { + dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name, + S_IRUSR | S_IWUSR, + dfs_inj, + &i_mce, + dfs_fls[i].fops); + + if (!dfs_fls[i].d) + goto err_dfs_add; + } + + return 0; + +err_dfs_add: + while (--i >= 0) + debugfs_remove(dfs_fls[i].d); - kobject_del(mce_kobj); + debugfs_remove(dfs_inj); + dfs_inj = NULL; - edac_put_sysfs_subsys(); + return -ENOMEM; } -module_init(edac_init_mce_inject); -module_exit(edac_exit_mce_inject); +static void __exit exit_mce_inject(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) + debugfs_remove(dfs_fls[i].d); + + memset(&dfs_fls, 0, sizeof(dfs_fls)); + + debugfs_remove(dfs_inj); + dfs_inj = NULL; +} +module_init(init_mce_inject); +module_exit(exit_mce_inject); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Borislav Petkov <bp@alien8.de>"); MODULE_AUTHOR("AMD Inc."); -MODULE_DESCRIPTION("MCE injection facility for testing MCE decoding"); +MODULE_DESCRIPTION("MCE injection facility for RAS testing"); diff --git a/drivers/edac/mv64x60_edac.c b/drivers/edac/mv64x60_edac.c index 542fad70e360..6366e880f978 100644 --- a/drivers/edac/mv64x60_edac.c +++ b/drivers/edac/mv64x60_edac.c @@ -178,7 +178,7 @@ static int mv64x60_pci_err_probe(struct platform_device *pdev) res = devm_request_irq(&pdev->dev, pdata->irq, mv64x60_pci_isr, - IRQF_DISABLED, + 0, "[EDAC] PCI err", pci); if (res < 0) { @@ -345,7 +345,7 @@ static int mv64x60_sram_err_probe(struct platform_device *pdev) res = devm_request_irq(&pdev->dev, pdata->irq, mv64x60_sram_isr, - IRQF_DISABLED, + 0, "[EDAC] SRAM err", edac_dev); if (res < 0) { @@ -540,7 +540,7 @@ static int mv64x60_cpu_err_probe(struct platform_device *pdev) res = devm_request_irq(&pdev->dev, pdata->irq, mv64x60_cpu_isr, - IRQF_DISABLED, + 0, "[EDAC] CPU err", edac_dev); if (res < 0) { @@ -800,7 +800,7 @@ static int mv64x60_mc_err_probe(struct platform_device *pdev) res = devm_request_irq(&pdev->dev, pdata->irq, mv64x60_mc_isr, - IRQF_DISABLED, + 0, "[EDAC] MC err", mci); if (res < 0) { diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index 0f04d5ead521..41593539cec4 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -1120,7 +1120,7 @@ static int ppc4xx_edac_register_irq(struct platform_device *op, status = request_irq(ded_irq, ppc4xx_edac_isr, - IRQF_DISABLED, + 0, "[EDAC] MC ECCDED", mci); @@ -1134,7 +1134,7 @@ static int ppc4xx_edac_register_irq(struct platform_device *op, status = request_irq(sec_irq, ppc4xx_edac_isr, - IRQF_DISABLED, + 0, "[EDAC] MC ECCSEC", mci); diff --git a/drivers/edac/x38_edac.c b/drivers/edac/x38_edac.c index e644b52c287c..7c5cdc62f31c 100644 --- a/drivers/edac/x38_edac.c +++ b/drivers/edac/x38_edac.c @@ -500,8 +500,7 @@ fail1: pci_unregister_driver(&x38_driver); fail0: - if (mci_pdev) - pci_dev_put(mci_pdev); + pci_dev_put(mci_pdev); return pci_rc; } |