diff options
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/Kconfig | 2 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.c | 311 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 8 | ||||
-rw-r--r-- | drivers/edac/edac_mc.c | 2 | ||||
-rw-r--r-- | drivers/edac/i10nm_base.c | 9 | ||||
-rw-r--r-- | drivers/edac/mce_amd.c | 146 | ||||
-rw-r--r-- | drivers/edac/sb_edac.c | 2 | ||||
-rw-r--r-- | drivers/edac/sifive_edac.c | 2 | ||||
-rw-r--r-- | drivers/edac/synopsys_edac.c | 52 |
9 files changed, 504 insertions, 30 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 2fc4c3f91fd5..58ab63642e72 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -484,7 +484,7 @@ config EDAC_ARMADA_XP config EDAC_SYNOPSYS tristate "Synopsys DDR Memory Controller" - depends on ARCH_ZYNQ || ARCH_ZYNQMP + depends on ARCH_ZYNQ || ARCH_ZYNQMP || ARCH_INTEL_SOCFPGA help Support for error detection and correction on the Synopsys DDR memory controller. diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 4fce75013674..fba609ada0e6 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -988,6 +988,281 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr) return csrow; } +/* Protect the PCI config register pairs used for DF indirect access. */ +static DEFINE_MUTEX(df_indirect_mutex); + +/* + * Data Fabric Indirect Access uses FICAA/FICAD. + * + * Fabric Indirect Configuration Access Address (FICAA): Constructed based + * on the device's Instance Id and the PCI function and register offset of + * the desired register. + * + * Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO + * and FICAD HI registers but so far we only need the LO register. + * + * Use Instance Id 0xFF to indicate a broadcast read. + */ +#define DF_BROADCAST 0xFF +static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) +{ + struct pci_dev *F4; + u32 ficaa; + int err = -ENODEV; + + if (node >= amd_nb_num()) + goto out; + + F4 = node_to_amd_nb(node)->link; + if (!F4) + goto out; + + ficaa = (instance_id == DF_BROADCAST) ? 0 : 1; + ficaa |= reg & 0x3FC; + ficaa |= (func & 0x7) << 11; + ficaa |= instance_id << 16; + + mutex_lock(&df_indirect_mutex); + + err = pci_write_config_dword(F4, 0x5C, ficaa); + if (err) { + pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa); + goto out_unlock; + } + + err = pci_read_config_dword(F4, 0x98, lo); + if (err) + pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa); + +out_unlock: + mutex_unlock(&df_indirect_mutex); + +out: + return err; +} + +static int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) +{ + return __df_indirect_read(node, func, reg, instance_id, lo); +} + +static int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo) +{ + return __df_indirect_read(node, func, reg, DF_BROADCAST, lo); +} + +struct addr_ctx { + u64 ret_addr; + u32 tmp; + u16 nid; + u8 inst_id; +}; + +static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) +{ + u64 dram_base_addr, dram_limit_addr, dram_hole_base; + + u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask; + u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets; + u8 intlv_addr_sel, intlv_addr_bit; + u8 num_intlv_bits, hashed_bit; + u8 lgcy_mmio_hole_en, base = 0; + u8 cs_mask, cs_id = 0; + bool hash_enabled = false; + + struct addr_ctx ctx; + + memset(&ctx, 0, sizeof(ctx)); + + /* Start from the normalized address */ + ctx.ret_addr = norm_addr; + + ctx.nid = nid; + ctx.inst_id = umc; + + /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */ + if (df_indirect_read_instance(nid, 0, 0x1B4, umc, &ctx.tmp)) + goto out_err; + + /* Remove HiAddrOffset from normalized address, if enabled: */ + if (ctx.tmp & BIT(0)) { + u64 hi_addr_offset = (ctx.tmp & GENMASK_ULL(31, 20)) << 8; + + if (norm_addr >= hi_addr_offset) { + ctx.ret_addr -= hi_addr_offset; + base = 1; + } + } + + /* Read D18F0x110 (DramBaseAddress). */ + if (df_indirect_read_instance(nid, 0, 0x110 + (8 * base), umc, &ctx.tmp)) + goto out_err; + + /* Check if address range is valid. */ + if (!(ctx.tmp & BIT(0))) { + pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n", + __func__, ctx.tmp); + goto out_err; + } + + lgcy_mmio_hole_en = ctx.tmp & BIT(1); + intlv_num_chan = (ctx.tmp >> 4) & 0xF; + intlv_addr_sel = (ctx.tmp >> 8) & 0x7; + dram_base_addr = (ctx.tmp & GENMASK_ULL(31, 12)) << 16; + + /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */ + if (intlv_addr_sel > 3) { + pr_err("%s: Invalid interleave address select %d.\n", + __func__, intlv_addr_sel); + goto out_err; + } + + /* Read D18F0x114 (DramLimitAddress). */ + if (df_indirect_read_instance(nid, 0, 0x114 + (8 * base), umc, &ctx.tmp)) + goto out_err; + + intlv_num_sockets = (ctx.tmp >> 8) & 0x1; + intlv_num_dies = (ctx.tmp >> 10) & 0x3; + dram_limit_addr = ((ctx.tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); + + intlv_addr_bit = intlv_addr_sel + 8; + + /* Re-use intlv_num_chan by setting it equal to log2(#channels) */ + switch (intlv_num_chan) { + case 0: intlv_num_chan = 0; break; + case 1: intlv_num_chan = 1; break; + case 3: intlv_num_chan = 2; break; + case 5: intlv_num_chan = 3; break; + case 7: intlv_num_chan = 4; break; + + case 8: intlv_num_chan = 1; + hash_enabled = true; + break; + default: + pr_err("%s: Invalid number of interleaved channels %d.\n", + __func__, intlv_num_chan); + goto out_err; + } + + num_intlv_bits = intlv_num_chan; + + if (intlv_num_dies > 2) { + pr_err("%s: Invalid number of interleaved nodes/dies %d.\n", + __func__, intlv_num_dies); + goto out_err; + } + + num_intlv_bits += intlv_num_dies; + + /* Add a bit if sockets are interleaved. */ + num_intlv_bits += intlv_num_sockets; + + /* Assert num_intlv_bits <= 4 */ + if (num_intlv_bits > 4) { + pr_err("%s: Invalid interleave bits %d.\n", + __func__, num_intlv_bits); + goto out_err; + } + + if (num_intlv_bits > 0) { + u64 temp_addr_x, temp_addr_i, temp_addr_y; + u8 die_id_bit, sock_id_bit, cs_fabric_id; + + /* + * Read FabricBlockInstanceInformation3_CS[BlockFabricID]. + * This is the fabric id for this coherent slave. Use + * umc/channel# as instance id of the coherent slave + * for FICAA. + */ + if (df_indirect_read_instance(nid, 0, 0x50, umc, &ctx.tmp)) + goto out_err; + + cs_fabric_id = (ctx.tmp >> 8) & 0xFF; + die_id_bit = 0; + + /* If interleaved over more than 1 channel: */ + if (intlv_num_chan) { + die_id_bit = intlv_num_chan; + cs_mask = (1 << die_id_bit) - 1; + cs_id = cs_fabric_id & cs_mask; + } + + sock_id_bit = die_id_bit; + + /* Read D18F1x208 (SystemFabricIdMask). */ + if (intlv_num_dies || intlv_num_sockets) + if (df_indirect_read_broadcast(nid, 1, 0x208, &ctx.tmp)) + goto out_err; + + /* If interleaved over more than 1 die. */ + if (intlv_num_dies) { + sock_id_bit = die_id_bit + intlv_num_dies; + die_id_shift = (ctx.tmp >> 24) & 0xF; + die_id_mask = (ctx.tmp >> 8) & 0xFF; + + cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit; + } + + /* If interleaved over more than 1 socket. */ + if (intlv_num_sockets) { + socket_id_shift = (ctx.tmp >> 28) & 0xF; + socket_id_mask = (ctx.tmp >> 16) & 0xFF; + + cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit; + } + + /* + * The pre-interleaved address consists of XXXXXXIIIYYYYY + * where III is the ID for this CS, and XXXXXXYYYYY are the + * address bits from the post-interleaved address. + * "num_intlv_bits" has been calculated to tell us how many "I" + * bits there are. "intlv_addr_bit" tells us how many "Y" bits + * there are (where "I" starts). + */ + temp_addr_y = ctx.ret_addr & GENMASK_ULL(intlv_addr_bit - 1, 0); + temp_addr_i = (cs_id << intlv_addr_bit); + temp_addr_x = (ctx.ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits; + ctx.ret_addr = temp_addr_x | temp_addr_i | temp_addr_y; + } + + /* Add dram base address */ + ctx.ret_addr += dram_base_addr; + + /* If legacy MMIO hole enabled */ + if (lgcy_mmio_hole_en) { + if (df_indirect_read_broadcast(nid, 0, 0x104, &ctx.tmp)) + goto out_err; + + dram_hole_base = ctx.tmp & GENMASK(31, 24); + if (ctx.ret_addr >= dram_hole_base) + ctx.ret_addr += (BIT_ULL(32) - dram_hole_base); + } + + if (hash_enabled) { + /* Save some parentheses and grab ls-bit at the end. */ + hashed_bit = (ctx.ret_addr >> 12) ^ + (ctx.ret_addr >> 18) ^ + (ctx.ret_addr >> 21) ^ + (ctx.ret_addr >> 30) ^ + cs_id; + + hashed_bit &= BIT(0); + + if (hashed_bit != ((ctx.ret_addr >> intlv_addr_bit) & BIT(0))) + ctx.ret_addr ^= BIT(intlv_addr_bit); + } + + /* Is calculated system address is above DRAM limit address? */ + if (ctx.ret_addr > dram_limit_addr) + goto out_err; + + *sys_addr = ctx.ret_addr; + return 0; + +out_err: + return -EINVAL; +} + static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); /* @@ -2650,6 +2925,26 @@ static struct amd64_family_type family_types[] = { .dbam_to_cs = f17_addr_mask_to_cs_size, } }, + [F19_M10H_CPUS] = { + .ctl_name = "F19h_M10h", + .f0_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_19H_M10H_DF_F6, + .max_mcs = 12, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_addr_mask_to_cs_size, + } + }, + [F19_M50H_CPUS] = { + .ctl_name = "F19h_M50h", + .f0_id = PCI_DEVICE_ID_AMD_19H_M50H_DF_F0, + .f6_id = PCI_DEVICE_ID_AMD_19H_M50H_DF_F6, + .max_mcs = 2, + .ops = { + .early_channel_count = f17_early_channel_count, + .dbam_to_cs = f17_addr_mask_to_cs_size, + } + }, }; /* @@ -3687,11 +3982,25 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) break; case 0x19: - if (pvt->model >= 0x20 && pvt->model <= 0x2f) { + if (pvt->model >= 0x10 && pvt->model <= 0x1f) { + fam_type = &family_types[F19_M10H_CPUS]; + pvt->ops = &family_types[F19_M10H_CPUS].ops; + break; + } else if (pvt->model >= 0x20 && pvt->model <= 0x2f) { fam_type = &family_types[F17_M70H_CPUS]; pvt->ops = &family_types[F17_M70H_CPUS].ops; fam_type->ctl_name = "F19h_M20h"; break; + } else if (pvt->model >= 0x50 && pvt->model <= 0x5f) { + fam_type = &family_types[F19_M50H_CPUS]; + pvt->ops = &family_types[F19_M50H_CPUS].ops; + fam_type->ctl_name = "F19h_M50h"; + break; + } else if (pvt->model >= 0xa0 && pvt->model <= 0xaf) { + fam_type = &family_types[F19_M10H_CPUS]; + pvt->ops = &family_types[F19_M10H_CPUS].ops; + fam_type->ctl_name = "F19h_MA0h"; + break; } fam_type = &family_types[F19_CPUS]; pvt->ops = &family_types[F19_CPUS].ops; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 85aa820bc165..352bda9803f6 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -96,7 +96,7 @@ /* Hardware limit on ChipSelect rows per MC and processors per system */ #define NUM_CHIPSELECTS 8 #define DRAM_RANGES 8 -#define NUM_CONTROLLERS 8 +#define NUM_CONTROLLERS 12 #define ON true #define OFF false @@ -126,6 +126,10 @@ #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F6 0x1446 #define PCI_DEVICE_ID_AMD_19H_DF_F0 0x1650 #define PCI_DEVICE_ID_AMD_19H_DF_F6 0x1656 +#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F0 0x14ad +#define PCI_DEVICE_ID_AMD_19H_M10H_DF_F6 0x14b3 +#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F0 0x166a +#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F6 0x1670 /* * Function 1 - Address Map @@ -298,6 +302,8 @@ enum amd_families { F17_M60H_CPUS, F17_M70H_CPUS, F19_CPUS, + F19_M10H_CPUS, + F19_M50H_CPUS, NUM_FAMILIES, }; diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 9f82ca295353..9d9aabdec96b 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -162,6 +162,8 @@ const char * const edac_mem_types[] = { [MEM_LPDDR4] = "Low-Power-DDR4-RAM", [MEM_LRDDR4] = "Load-Reduced-DDR4-RAM", [MEM_DDR5] = "Unbuffered-DDR5", + [MEM_RDDR5] = "Registered-DDR5", + [MEM_LRDDR5] = "Load-Reduced-DDR5-RAM", [MEM_NVDIMM] = "Non-volatile-RAM", [MEM_WIO2] = "Wide-IO-2", [MEM_HBM2] = "High-bandwidth-memory-Gen2", diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c index 83345bfac246..6cf50ee0b77c 100644 --- a/drivers/edac/i10nm_base.c +++ b/drivers/edac/i10nm_base.c @@ -358,6 +358,9 @@ static int i10nm_get_hbm_munits(void) mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE); if (!mbase) { + pci_dev_put(d->imc[lmc].mdev); + d->imc[lmc].mdev = NULL; + i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n", base + off); return -ENOMEM; @@ -368,6 +371,12 @@ static int i10nm_get_hbm_munits(void) mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0); if (!I10NM_IS_HBM_IMC(mcmtr)) { + iounmap(d->imc[lmc].mbase); + d->imc[lmc].mbase = NULL; + d->imc[lmc].hbm_mc = false; + pci_dev_put(d->imc[lmc].mdev); + d->imc[lmc].mdev = NULL; + i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n"); return -ENODEV; } diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 67dbf4c31271..cc5c63feb26a 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -399,6 +399,63 @@ static const char * const smca_mp5_mce_desc[] = { "Instruction Tag Cache Bank B ECC or parity error", }; +static const char * const smca_mpdma_mce_desc[] = { + "Main SRAM [31:0] bank ECC or parity error", + "Main SRAM [63:32] bank ECC or parity error", + "Main SRAM [95:64] bank ECC or parity error", + "Main SRAM [127:96] bank ECC or parity error", + "Data Cache Bank A ECC or parity error", + "Data Cache Bank B ECC or parity error", + "Data Tag Cache Bank A ECC or parity error", + "Data Tag Cache Bank B ECC or parity error", + "Instruction Cache Bank A ECC or parity error", + "Instruction Cache Bank B ECC or parity error", + "Instruction Tag Cache Bank A ECC or parity error", + "Instruction Tag Cache Bank B ECC or parity error", + "Data Cache Bank A ECC or parity error", + "Data Cache Bank B ECC or parity error", + "Data Tag Cache Bank A ECC or parity error", + "Data Tag Cache Bank B ECC or parity error", + "Instruction Cache Bank A ECC or parity error", + "Instruction Cache Bank B ECC or parity error", + "Instruction Tag Cache Bank A ECC or parity error", + "Instruction Tag Cache Bank B ECC or parity error", + "Data Cache Bank A ECC or parity error", + "Data Cache Bank B ECC or parity error", + "Data Tag Cache Bank A ECC or parity error", + "Data Tag Cache Bank B ECC or parity error", + "Instruction Cache Bank A ECC or parity error", + "Instruction Cache Bank B ECC or parity error", + "Instruction Tag Cache Bank A ECC or parity error", + "Instruction Tag Cache Bank B ECC or parity error", + "System Hub Read Buffer ECC or parity error", + "MPDMA TVF DVSEC Memory ECC or parity error", + "MPDMA TVF MMIO Mailbox0 ECC or parity error", + "MPDMA TVF MMIO Mailbox1 ECC or parity error", + "MPDMA TVF Doorbell Memory ECC or parity error", + "MPDMA TVF SDP Slave Memory 0 ECC or parity error", + "MPDMA TVF SDP Slave Memory 1 ECC or parity error", + "MPDMA TVF SDP Slave Memory 2 ECC or parity error", + "MPDMA TVF SDP Master Memory 0 ECC or parity error", + "MPDMA TVF SDP Master Memory 1 ECC or parity error", + "MPDMA TVF SDP Master Memory 2 ECC or parity error", + "MPDMA TVF SDP Master Memory 3 ECC or parity error", + "MPDMA TVF SDP Master Memory 4 ECC or parity error", + "MPDMA TVF SDP Master Memory 5 ECC or parity error", + "MPDMA TVF SDP Master Memory 6 ECC or parity error", + "MPDMA PTE Command FIFO ECC or parity error", + "MPDMA PTE Hub Data FIFO ECC or parity error", + "MPDMA PTE Internal Data FIFO ECC or parity error", + "MPDMA PTE Command Memory DMA ECC or parity error", + "MPDMA PTE Command Memory Internal ECC or parity error", + "MPDMA PTE DMA Completion FIFO ECC or parity error", + "MPDMA PTE Tablewalk Completion FIFO ECC or parity error", + "MPDMA PTE Descriptor Completion FIFO ECC or parity error", + "MPDMA PTE ReadOnly Completion FIFO ECC or parity error", + "MPDMA PTE DirectWrite Completion FIFO ECC or parity error", + "SDP Watchdog Timer expired", +}; + static const char * const smca_nbio_mce_desc[] = { "ECC or Parity error", "PCIE error", @@ -448,7 +505,7 @@ static const char * const smca_xgmipcs_mce_desc[] = { "Rx Replay Timeout Error", "LinkSub Tx Timeout Error", "LinkSub Rx Timeout Error", - "Rx CMD Pocket Error", + "Rx CMD Packet Error", }; static const char * const smca_xgmiphy_mce_desc[] = { @@ -458,11 +515,66 @@ static const char * const smca_xgmiphy_mce_desc[] = { "PHY APB error", }; -static const char * const smca_waflphy_mce_desc[] = { - "RAM ECC Error", - "ARC instruction buffer parity error", - "ARC data buffer parity error", - "PHY APB error", +static const char * const smca_nbif_mce_desc[] = { + "Timeout error from GMI", + "SRAM ECC error", + "NTB Error Event", + "SDP Parity error", +}; + +static const char * const smca_sata_mce_desc[] = { + "Parity error for port 0", + "Parity error for port 1", + "Parity error for port 2", + "Parity error for port 3", + "Parity error for port 4", + "Parity error for port 5", + "Parity error for port 6", + "Parity error for port 7", +}; + +static const char * const smca_usb_mce_desc[] = { + "Parity error or ECC error for S0 RAM0", + "Parity error or ECC error for S0 RAM1", + "Parity error or ECC error for S0 RAM2", + "Parity error for PHY RAM0", + "Parity error for PHY RAM1", + "AXI Slave Response error", +}; + +static const char * const smca_gmipcs_mce_desc[] = { + "Data Loss Error", + "Training Error", + "Replay Parity Error", + "Rx Fifo Underflow Error", + "Rx Fifo Overflow Error", + "CRC Error", + "BER Exceeded Error", + "Tx Fifo Underflow Error", + "Replay Buffer Parity Error", + "Tx Overflow Error", + "Replay Fifo Overflow Error", + "Replay Fifo Underflow Error", + "Elastic Fifo Overflow Error", + "Deskew Error", + "Offline Error", + "Data Startup Limit Error", + "FC Init Timeout Error", + "Recovery Timeout Error", + "Ready Serial Timeout Error", + "Ready Serial Attempt Error", + "Recovery Attempt Error", + "Recovery Relock Attempt Error", + "Deskew Abort Error", + "Rx Buffer Error", + "Rx LFDS Fifo Overflow Error", + "Rx LFDS Fifo Underflow Error", + "LinkSub Tx Timeout Error", + "LinkSub Rx Timeout Error", + "Rx CMD Packet Error", + "LFDS Training Timeout Error", + "LFDS FC Init Timeout Error", + "Data Loss Error", }; struct smca_mce_desc { @@ -490,12 +602,21 @@ static struct smca_mce_desc smca_mce_descs[] = { [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) }, [SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc) }, [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) }, + [SMCA_MPDMA] = { smca_mpdma_mce_desc, ARRAY_SIZE(smca_mpdma_mce_desc) }, [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) }, [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) }, [SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) }, [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) }, + /* NBIF and SHUB have the same error descriptions, for now. */ + [SMCA_NBIF] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) }, + [SMCA_SHUB] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) }, + [SMCA_SATA] = { smca_sata_mce_desc, ARRAY_SIZE(smca_sata_mce_desc) }, + [SMCA_USB] = { smca_usb_mce_desc, ARRAY_SIZE(smca_usb_mce_desc) }, + [SMCA_GMI_PCS] = { smca_gmipcs_mce_desc, ARRAY_SIZE(smca_gmipcs_mce_desc) }, + /* All the PHY bank types have the same error descriptions, for now. */ [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, - [SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc) }, + [SMCA_WAFL_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, + [SMCA_GMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, }; static bool f12h_mc0_mce(u16 ec, u8 xec) @@ -1045,20 +1166,13 @@ static void decode_mc6_mce(struct mce *m) /* Decode errors according to Scalable MCA specification */ static void decode_smca_error(struct mce *m) { - struct smca_hwid *hwid; - enum smca_bank_types bank_type; + enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank); const char *ip_name; u8 xec = XEC(m->status, xec_mask); - if (m->bank >= ARRAY_SIZE(smca_banks)) + if (bank_type >= N_SMCA_BANK_TYPES) return; - hwid = smca_banks[m->bank].hwid; - if (!hwid) - return; - - bank_type = hwid->bank_type; - if (bank_type == SMCA_RESERVED) { pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank); return; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 1522d4aa2ca6..9678ab97c7ac 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -3439,7 +3439,7 @@ MODULE_DEVICE_TABLE(x86cpu, sbridge_cpuids); static int sbridge_probe(const struct x86_cpu_id *id) { - int rc = -ENODEV; + int rc; u8 mc, num_mc = 0; struct sbridge_dev *sbridge_dev; struct pci_id_table *ptable = (struct pci_id_table *)id->driver_data; diff --git a/drivers/edac/sifive_edac.c b/drivers/edac/sifive_edac.c index 3a3dcb14ed99..ee800aec7d47 100644 --- a/drivers/edac/sifive_edac.c +++ b/drivers/edac/sifive_edac.c @@ -19,7 +19,7 @@ struct sifive_edac_priv { struct edac_device_ctl_info *dci; }; -/** +/* * EDAC error callback * * @event: non-zero if unrecoverable. diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c index 7d08627e738b..f05ff02c0656 100644 --- a/drivers/edac/synopsys_edac.c +++ b/drivers/edac/synopsys_edac.c @@ -101,6 +101,7 @@ /* DDR ECC Quirks */ #define DDR_ECC_INTR_SUPPORT BIT(0) #define DDR_ECC_DATA_POISON_SUPPORT BIT(1) +#define DDR_ECC_INTR_SELF_CLEAR BIT(2) /* ZynqMP Enhanced DDR memory controller registers that are relevant to ECC */ /* ECC Configuration Registers */ @@ -171,6 +172,10 @@ #define DDR_QOS_IRQ_EN_OFST 0x20208 #define DDR_QOS_IRQ_DB_OFST 0x2020C +/* DDR QOS Interrupt register definitions */ +#define DDR_UE_MASK BIT(9) +#define DDR_CE_MASK BIT(8) + /* ECC Corrected Error Register Mask and Shifts*/ #define ECC_CEADDR0_RW_MASK 0x3FFFF #define ECC_CEADDR0_RNK_MASK BIT(24) @@ -533,10 +538,16 @@ static irqreturn_t intr_handler(int irq, void *dev_id) priv = mci->pvt_info; p_data = priv->p_data; - regval = readl(priv->baseaddr + DDR_QOS_IRQ_STAT_OFST); - regval &= (DDR_QOSCE_MASK | DDR_QOSUE_MASK); - if (!(regval & ECC_CE_UE_INTR_MASK)) - return IRQ_NONE; + /* + * v3.0 of the controller has the ce/ue bits cleared automatically, + * so this condition does not apply. + */ + if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) { + regval = readl(priv->baseaddr + DDR_QOS_IRQ_STAT_OFST); + regval &= (DDR_QOSCE_MASK | DDR_QOSUE_MASK); + if (!(regval & ECC_CE_UE_INTR_MASK)) + return IRQ_NONE; + } status = p_data->get_error_info(priv); if (status) @@ -548,7 +559,9 @@ static irqreturn_t intr_handler(int irq, void *dev_id) edac_dbg(3, "Total error count CE %d UE %d\n", priv->ce_cnt, priv->ue_cnt); - writel(regval, priv->baseaddr + DDR_QOS_IRQ_STAT_OFST); + /* v3.0 of the controller does not have this register */ + if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)) + writel(regval, priv->baseaddr + DDR_QOS_IRQ_STAT_OFST); return IRQ_HANDLED; } @@ -834,8 +847,13 @@ static void mc_init(struct mem_ctl_info *mci, struct platform_device *pdev) static void enable_intr(struct synps_edac_priv *priv) { /* Enable UE/CE Interrupts */ - writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK, - priv->baseaddr + DDR_QOS_IRQ_EN_OFST); + if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR) + writel(DDR_UE_MASK | DDR_CE_MASK, + priv->baseaddr + ECC_CLR_OFST); + else + writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK, + priv->baseaddr + DDR_QOS_IRQ_EN_OFST); + } static void disable_intr(struct synps_edac_priv *priv) @@ -890,6 +908,19 @@ static const struct synps_platform_data zynqmp_edac_def = { ), }; +static const struct synps_platform_data synopsys_edac_def = { + .get_error_info = zynqmp_get_error_info, + .get_mtype = zynqmp_get_mtype, + .get_dtype = zynqmp_get_dtype, + .get_ecc_state = zynqmp_get_ecc_state, + .quirks = (DDR_ECC_INTR_SUPPORT | DDR_ECC_INTR_SELF_CLEAR +#ifdef CONFIG_EDAC_DEBUG + | DDR_ECC_DATA_POISON_SUPPORT +#endif + ), +}; + + static const struct of_device_id synps_edac_match[] = { { .compatible = "xlnx,zynq-ddrc-a05", @@ -900,6 +931,10 @@ static const struct of_device_id synps_edac_match[] = { .data = (void *)&zynqmp_edac_def }, { + .compatible = "snps,ddrc-3.80a", + .data = (void *)&synopsys_edac_def + }, + { /* end of table */ } }; @@ -1352,8 +1387,7 @@ static int mc_probe(struct platform_device *pdev) } } - if (of_device_is_compatible(pdev->dev.of_node, - "xlnx,zynqmp-ddrc-2.40a")) + if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) setup_address_map(priv); #endif |