From 0b746e8c1e1e3fcc9e4036efe8d3ea3fd3e5d4c3 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 28 Oct 2021 17:56:56 +0000 Subject: x86/MCE/AMD, EDAC/amd64: Move address translation to AMD64 EDAC The address translation code used for current AMD systems is non-architectural. So move it to EDAC. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211028175728.121452-2-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 199 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) (limited to 'drivers/edac') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 4fce75013674..d2ad9f06abb7 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -988,6 +988,205 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr) return csrow; } +static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) +{ + u64 dram_base_addr, dram_limit_addr, dram_hole_base; + /* We start from the normalized address */ + u64 ret_addr = norm_addr; + + u32 tmp; + + u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask; + u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets; + u8 intlv_addr_sel, intlv_addr_bit; + u8 num_intlv_bits, hashed_bit; + u8 lgcy_mmio_hole_en, base = 0; + u8 cs_mask, cs_id = 0; + bool hash_enabled = false; + + /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */ + if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp)) + goto out_err; + + /* Remove HiAddrOffset from normalized address, if enabled: */ + if (tmp & BIT(0)) { + u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8; + + if (norm_addr >= hi_addr_offset) { + ret_addr -= hi_addr_offset; + base = 1; + } + } + + /* Read D18F0x110 (DramBaseAddress). */ + if (amd_df_indirect_read(nid, 0, 0x110 + (8 * base), umc, &tmp)) + goto out_err; + + /* Check if address range is valid. */ + if (!(tmp & BIT(0))) { + pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n", + __func__, tmp); + goto out_err; + } + + lgcy_mmio_hole_en = tmp & BIT(1); + intlv_num_chan = (tmp >> 4) & 0xF; + intlv_addr_sel = (tmp >> 8) & 0x7; + dram_base_addr = (tmp & GENMASK_ULL(31, 12)) << 16; + + /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */ + if (intlv_addr_sel > 3) { + pr_err("%s: Invalid interleave address select %d.\n", + __func__, intlv_addr_sel); + goto out_err; + } + + /* Read D18F0x114 (DramLimitAddress). */ + if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp)) + goto out_err; + + intlv_num_sockets = (tmp >> 8) & 0x1; + intlv_num_dies = (tmp >> 10) & 0x3; + dram_limit_addr = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); + + intlv_addr_bit = intlv_addr_sel + 8; + + /* Re-use intlv_num_chan by setting it equal to log2(#channels) */ + switch (intlv_num_chan) { + case 0: intlv_num_chan = 0; break; + case 1: intlv_num_chan = 1; break; + case 3: intlv_num_chan = 2; break; + case 5: intlv_num_chan = 3; break; + case 7: intlv_num_chan = 4; break; + + case 8: intlv_num_chan = 1; + hash_enabled = true; + break; + default: + pr_err("%s: Invalid number of interleaved channels %d.\n", + __func__, intlv_num_chan); + goto out_err; + } + + num_intlv_bits = intlv_num_chan; + + if (intlv_num_dies > 2) { + pr_err("%s: Invalid number of interleaved nodes/dies %d.\n", + __func__, intlv_num_dies); + goto out_err; + } + + num_intlv_bits += intlv_num_dies; + + /* Add a bit if sockets are interleaved. */ + num_intlv_bits += intlv_num_sockets; + + /* Assert num_intlv_bits <= 4 */ + if (num_intlv_bits > 4) { + pr_err("%s: Invalid interleave bits %d.\n", + __func__, num_intlv_bits); + goto out_err; + } + + if (num_intlv_bits > 0) { + u64 temp_addr_x, temp_addr_i, temp_addr_y; + u8 die_id_bit, sock_id_bit, cs_fabric_id; + + /* + * Read FabricBlockInstanceInformation3_CS[BlockFabricID]. + * This is the fabric id for this coherent slave. Use + * umc/channel# as instance id of the coherent slave + * for FICAA. + */ + if (amd_df_indirect_read(nid, 0, 0x50, umc, &tmp)) + goto out_err; + + cs_fabric_id = (tmp >> 8) & 0xFF; + die_id_bit = 0; + + /* If interleaved over more than 1 channel: */ + if (intlv_num_chan) { + die_id_bit = intlv_num_chan; + cs_mask = (1 << die_id_bit) - 1; + cs_id = cs_fabric_id & cs_mask; + } + + sock_id_bit = die_id_bit; + + /* Read D18F1x208 (SystemFabricIdMask). */ + if (intlv_num_dies || intlv_num_sockets) + if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp)) + goto out_err; + + /* If interleaved over more than 1 die. */ + if (intlv_num_dies) { + sock_id_bit = die_id_bit + intlv_num_dies; + die_id_shift = (tmp >> 24) & 0xF; + die_id_mask = (tmp >> 8) & 0xFF; + + cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit; + } + + /* If interleaved over more than 1 socket. */ + if (intlv_num_sockets) { + socket_id_shift = (tmp >> 28) & 0xF; + socket_id_mask = (tmp >> 16) & 0xFF; + + cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit; + } + + /* + * The pre-interleaved address consists of XXXXXXIIIYYYYY + * where III is the ID for this CS, and XXXXXXYYYYY are the + * address bits from the post-interleaved address. + * "num_intlv_bits" has been calculated to tell us how many "I" + * bits there are. "intlv_addr_bit" tells us how many "Y" bits + * there are (where "I" starts). + */ + temp_addr_y = ret_addr & GENMASK_ULL(intlv_addr_bit-1, 0); + temp_addr_i = (cs_id << intlv_addr_bit); + temp_addr_x = (ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits; + ret_addr = temp_addr_x | temp_addr_i | temp_addr_y; + } + + /* Add dram base address */ + ret_addr += dram_base_addr; + + /* If legacy MMIO hole enabled */ + if (lgcy_mmio_hole_en) { + if (amd_df_indirect_read(nid, 0, 0x104, umc, &tmp)) + goto out_err; + + dram_hole_base = tmp & GENMASK(31, 24); + if (ret_addr >= dram_hole_base) + ret_addr += (BIT_ULL(32) - dram_hole_base); + } + + if (hash_enabled) { + /* Save some parentheses and grab ls-bit at the end. */ + hashed_bit = (ret_addr >> 12) ^ + (ret_addr >> 18) ^ + (ret_addr >> 21) ^ + (ret_addr >> 30) ^ + cs_id; + + hashed_bit &= BIT(0); + + if (hashed_bit != ((ret_addr >> intlv_addr_bit) & BIT(0))) + ret_addr ^= BIT(intlv_addr_bit); + } + + /* Is calculated system address is above DRAM limit address? */ + if (ret_addr > dram_limit_addr) + goto out_err; + + *sys_addr = ret_addr; + return 0; + +out_err: + return -EINVAL; +} + static int get_channel_from_ecc_syndrome(struct mem_ctl_info *, u16); /* -- cgit v1.2.3 From b3218ae47771f943b3e222f35fc46afacba39929 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 28 Oct 2021 17:56:57 +0000 Subject: x86/amd_nb, EDAC/amd64: Move DF Indirect Read to AMD64 EDAC df_indirect_read() is used only for address translation. Move it to EDAC along with the translation code. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211028175728.121452-3-yazen.ghannam@amd.com --- arch/x86/include/asm/amd_nb.h | 1 - arch/x86/kernel/amd_nb.c | 49 +----------------------------------------- drivers/edac/amd64_edac.c | 50 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 49 deletions(-) (limited to 'drivers/edac') diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index 455066a06f60..00d1a400b7a1 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h @@ -24,7 +24,6 @@ extern int amd_set_subcaches(int, unsigned long); extern int amd_smn_read(u16 node, u32 address, u32 *value); extern int amd_smn_write(u16 node, u32 address, u32 value); -extern int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo); struct amd_l3_cache { unsigned indices; diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index c92c9c774c0e..f814d5fc333e 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -29,7 +29,7 @@ #define PCI_DEVICE_ID_AMD_19H_M40H_DF_F4 0x167d #define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e -/* Protect the PCI config register pairs used for SMN and DF indirect access. */ +/* Protect the PCI config register pairs used for SMN. */ static DEFINE_MUTEX(smn_mutex); static u32 *flush_words; @@ -182,53 +182,6 @@ int amd_smn_write(u16 node, u32 address, u32 value) } EXPORT_SYMBOL_GPL(amd_smn_write); -/* - * Data Fabric Indirect Access uses FICAA/FICAD. - * - * Fabric Indirect Configuration Access Address (FICAA): Constructed based - * on the device's Instance Id and the PCI function and register offset of - * the desired register. - * - * Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO - * and FICAD HI registers but so far we only need the LO register. - */ -int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) -{ - struct pci_dev *F4; - u32 ficaa; - int err = -ENODEV; - - if (node >= amd_northbridges.num) - goto out; - - F4 = node_to_amd_nb(node)->link; - if (!F4) - goto out; - - ficaa = 1; - ficaa |= reg & 0x3FC; - ficaa |= (func & 0x7) << 11; - ficaa |= instance_id << 16; - - mutex_lock(&smn_mutex); - - err = pci_write_config_dword(F4, 0x5C, ficaa); - if (err) { - pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa); - goto out_unlock; - } - - err = pci_read_config_dword(F4, 0x98, lo); - if (err) - pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa); - -out_unlock: - mutex_unlock(&smn_mutex); - -out: - return err; -} -EXPORT_SYMBOL_GPL(amd_df_indirect_read); int amd_cache_northbridges(void) { diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index d2ad9f06abb7..034d9863bcf9 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -988,6 +988,56 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr) return csrow; } +/* Protect the PCI config register pairs used for DF indirect access. */ +static DEFINE_MUTEX(df_indirect_mutex); + +/* + * Data Fabric Indirect Access uses FICAA/FICAD. + * + * Fabric Indirect Configuration Access Address (FICAA): Constructed based + * on the device's Instance Id and the PCI function and register offset of + * the desired register. + * + * Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO + * and FICAD HI registers but so far we only need the LO register. + */ +static int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) +{ + struct pci_dev *F4; + u32 ficaa; + int err = -ENODEV; + + if (node >= amd_nb_num()) + goto out; + + F4 = node_to_amd_nb(node)->link; + if (!F4) + goto out; + + ficaa = 1; + ficaa |= reg & 0x3FC; + ficaa |= (func & 0x7) << 11; + ficaa |= instance_id << 16; + + mutex_lock(&df_indirect_mutex); + + err = pci_write_config_dword(F4, 0x5C, ficaa); + if (err) { + pr_warn("Error writing DF Indirect FICAA, FICAA=0x%x\n", ficaa); + goto out_unlock; + } + + err = pci_read_config_dword(F4, 0x98, lo); + if (err) + pr_warn("Error reading DF Indirect FICAD LO, FICAA=0x%x.\n", ficaa); + +out_unlock: + mutex_unlock(&df_indirect_mutex); + +out: + return err; +} + static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { u64 dram_base_addr, dram_limit_addr, dram_hole_base; -- cgit v1.2.3 From 448c3d6085b71aad58cd515469560ee76c982007 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 28 Oct 2021 17:56:58 +0000 Subject: EDAC/amd64: Allow for DF Indirect Broadcast reads The DF Indirect Access method allows for "Broadcast" accesses in which case no specific instance is targeted. Add support using a reserved instance ID of 0xFF to indicate a broadcast access. Set the FICAA register appropriately. Define helpers functions for instance and broadcast reads and use them where appropriate. Drop the "amd_" prefix since these functions are all static. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211028175728.121452-4-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'drivers/edac') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 034d9863bcf9..d41b9a02cc7d 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1000,8 +1000,11 @@ static DEFINE_MUTEX(df_indirect_mutex); * * Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO * and FICAD HI registers but so far we only need the LO register. + * + * Use Instance Id 0xFF to indicate a broadcast read. */ -static int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) +#define DF_BROADCAST 0xFF +static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) { struct pci_dev *F4; u32 ficaa; @@ -1014,7 +1017,7 @@ static int amd_df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 if (!F4) goto out; - ficaa = 1; + ficaa = (instance_id == DF_BROADCAST) ? 0 : 1; ficaa |= reg & 0x3FC; ficaa |= (func & 0x7) << 11; ficaa |= instance_id << 16; @@ -1038,6 +1041,16 @@ out: return err; } +static int df_indirect_read_instance(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) +{ + return __df_indirect_read(node, func, reg, instance_id, lo); +} + +static int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo) +{ + return __df_indirect_read(node, func, reg, DF_BROADCAST, lo); +} + static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { u64 dram_base_addr, dram_limit_addr, dram_hole_base; @@ -1055,7 +1068,7 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr bool hash_enabled = false; /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */ - if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp)) + if (df_indirect_read_instance(nid, 0, 0x1B4, umc, &tmp)) goto out_err; /* Remove HiAddrOffset from normalized address, if enabled: */ @@ -1069,7 +1082,7 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr } /* Read D18F0x110 (DramBaseAddress). */ - if (amd_df_indirect_read(nid, 0, 0x110 + (8 * base), umc, &tmp)) + if (df_indirect_read_instance(nid, 0, 0x110 + (8 * base), umc, &tmp)) goto out_err; /* Check if address range is valid. */ @@ -1092,7 +1105,7 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr } /* Read D18F0x114 (DramLimitAddress). */ - if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp)) + if (df_indirect_read_instance(nid, 0, 0x114 + (8 * base), umc, &tmp)) goto out_err; intlv_num_sockets = (tmp >> 8) & 0x1; @@ -1148,7 +1161,7 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr * umc/channel# as instance id of the coherent slave * for FICAA. */ - if (amd_df_indirect_read(nid, 0, 0x50, umc, &tmp)) + if (df_indirect_read_instance(nid, 0, 0x50, umc, &tmp)) goto out_err; cs_fabric_id = (tmp >> 8) & 0xFF; @@ -1165,7 +1178,7 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr /* Read D18F1x208 (SystemFabricIdMask). */ if (intlv_num_dies || intlv_num_sockets) - if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp)) + if (df_indirect_read_broadcast(nid, 1, 0x208, &tmp)) goto out_err; /* If interleaved over more than 1 die. */ @@ -1204,7 +1217,7 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr /* If legacy MMIO hole enabled */ if (lgcy_mmio_hole_en) { - if (amd_df_indirect_read(nid, 0, 0x104, umc, &tmp)) + if (df_indirect_read_broadcast(nid, 0, 0x104, &tmp)) goto out_err; dram_hole_base = tmp & GENMASK(31, 24); -- cgit v1.2.3 From 70aeb807cf8649dedbcd59b70dfc38fb89bdf1bd Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 28 Oct 2021 17:56:59 +0000 Subject: EDAC/amd64: Add context struct Define an address translation context struct. This will hold values that will be passed between multiple functions. Save return address, Node ID, and the Instance ID number to start. Currently, the UMC number is used as the Instance ID, but future DF versions may use another value. Also include a "tmp" field to use when reading registers. This is to avoid having to define a temporary variable in multiple functions. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20211028175728.121452-5-yazen.ghannam@amd.com --- drivers/edac/amd64_edac.c | 97 +++++++++++++++++++++++++++-------------------- 1 file changed, 55 insertions(+), 42 deletions(-) (limited to 'drivers/edac') diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index d41b9a02cc7d..ca0c67bc25c6 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1051,13 +1051,16 @@ static int df_indirect_read_broadcast(u16 node, u8 func, u16 reg, u32 *lo) return __df_indirect_read(node, func, reg, DF_BROADCAST, lo); } +struct addr_ctx { + u64 ret_addr; + u32 tmp; + u16 nid; + u8 inst_id; +}; + static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { u64 dram_base_addr, dram_limit_addr, dram_hole_base; - /* We start from the normalized address */ - u64 ret_addr = norm_addr; - - u32 tmp; u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask; u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets; @@ -1067,35 +1070,45 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr u8 cs_mask, cs_id = 0; bool hash_enabled = false; + struct addr_ctx ctx; + + memset(&ctx, 0, sizeof(ctx)); + + /* Start from the normalized address */ + ctx.ret_addr = norm_addr; + + ctx.nid = nid; + ctx.inst_id = umc; + /* Read D18F0x1B4 (DramOffset), check if base 1 is used. */ - if (df_indirect_read_instance(nid, 0, 0x1B4, umc, &tmp)) + if (df_indirect_read_instance(nid, 0, 0x1B4, umc, &ctx.tmp)) goto out_err; /* Remove HiAddrOffset from normalized address, if enabled: */ - if (tmp & BIT(0)) { - u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8; + if (ctx.tmp & BIT(0)) { + u64 hi_addr_offset = (ctx.tmp & GENMASK_ULL(31, 20)) << 8; if (norm_addr >= hi_addr_offset) { - ret_addr -= hi_addr_offset; + ctx.ret_addr -= hi_addr_offset; base = 1; } } /* Read D18F0x110 (DramBaseAddress). */ - if (df_indirect_read_instance(nid, 0, 0x110 + (8 * base), umc, &tmp)) + if (df_indirect_read_instance(nid, 0, 0x110 + (8 * base), umc, &ctx.tmp)) goto out_err; /* Check if address range is valid. */ - if (!(tmp & BIT(0))) { + if (!(ctx.tmp & BIT(0))) { pr_err("%s: Invalid DramBaseAddress range: 0x%x.\n", - __func__, tmp); + __func__, ctx.tmp); goto out_err; } - lgcy_mmio_hole_en = tmp & BIT(1); - intlv_num_chan = (tmp >> 4) & 0xF; - intlv_addr_sel = (tmp >> 8) & 0x7; - dram_base_addr = (tmp & GENMASK_ULL(31, 12)) << 16; + lgcy_mmio_hole_en = ctx.tmp & BIT(1); + intlv_num_chan = (ctx.tmp >> 4) & 0xF; + intlv_addr_sel = (ctx.tmp >> 8) & 0x7; + dram_base_addr = (ctx.tmp & GENMASK_ULL(31, 12)) << 16; /* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */ if (intlv_addr_sel > 3) { @@ -1105,12 +1118,12 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr } /* Read D18F0x114 (DramLimitAddress). */ - if (df_indirect_read_instance(nid, 0, 0x114 + (8 * base), umc, &tmp)) + if (df_indirect_read_instance(nid, 0, 0x114 + (8 * base), umc, &ctx.tmp)) goto out_err; - intlv_num_sockets = (tmp >> 8) & 0x1; - intlv_num_dies = (tmp >> 10) & 0x3; - dram_limit_addr = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); + intlv_num_sockets = (ctx.tmp >> 8) & 0x1; + intlv_num_dies = (ctx.tmp >> 10) & 0x3; + dram_limit_addr = ((ctx.tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0); intlv_addr_bit = intlv_addr_sel + 8; @@ -1161,10 +1174,10 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr * umc/channel# as instance id of the coherent slave * for FICAA. */ - if (df_indirect_read_instance(nid, 0, 0x50, umc, &tmp)) + if (df_indirect_read_instance(nid, 0, 0x50, umc, &ctx.tmp)) goto out_err; - cs_fabric_id = (tmp >> 8) & 0xFF; + cs_fabric_id = (ctx.tmp >> 8) & 0xFF; die_id_bit = 0; /* If interleaved over more than 1 channel: */ @@ -1178,22 +1191,22 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr /* Read D18F1x208 (SystemFabricIdMask). */ if (intlv_num_dies || intlv_num_sockets) - if (df_indirect_read_broadcast(nid, 1, 0x208, &tmp)) + if (df_indirect_read_broadcast(nid, 1, 0x208, &ctx.tmp)) goto out_err; /* If interleaved over more than 1 die. */ if (intlv_num_dies) { sock_id_bit = die_id_bit + intlv_num_dies; - die_id_shift = (tmp >> 24) & 0xF; - die_id_mask = (tmp >> 8) & 0xFF; + die_id_shift = (ctx.tmp >> 24) & 0xF; + die_id_mask = (ctx.tmp >> 8) & 0xFF; cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit; } /* If interleaved over more than 1 socket. */ if (intlv_num_sockets) { - socket_id_shift = (tmp >> 28) & 0xF; - socket_id_mask = (tmp >> 16) & 0xFF; + socket_id_shift = (ctx.tmp >> 28) & 0xF; + socket_id_mask = (ctx.tmp >> 16) & 0xFF; cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit; } @@ -1206,44 +1219,44 @@ static int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr * bits there are. "intlv_addr_bit" tells us how many "Y" bits * there are (where "I" starts). */ - temp_addr_y = ret_addr & GENMASK_ULL(intlv_addr_bit-1, 0); + temp_addr_y = ctx.ret_addr & GENMASK_ULL(intlv_addr_bit - 1, 0); temp_addr_i = (cs_id << intlv_addr_bit); - temp_addr_x = (ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits; - ret_addr = temp_addr_x | temp_addr_i | temp_addr_y; + temp_addr_x = (ctx.ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits; + ctx.ret_addr = temp_addr_x | temp_addr_i | temp_addr_y; } /* Add dram base address */ - ret_addr += dram_base_addr; + ctx.ret_addr += dram_base_addr; /* If legacy MMIO hole enabled */ if (lgcy_mmio_hole_en) { - if (df_indirect_read_broadcast(nid, 0, 0x104, &tmp)) + if (df_indirect_read_broadcast(nid, 0, 0x104, &ctx.tmp)) goto out_err; - dram_hole_base = tmp & GENMASK(31, 24); - if (ret_addr >= dram_hole_base) - ret_addr += (BIT_ULL(32) - dram_hole_base); + dram_hole_base = ctx.tmp & GENMASK(31, 24); + if (ctx.ret_addr >= dram_hole_base) + ctx.ret_addr += (BIT_ULL(32) - dram_hole_base); } if (hash_enabled) { /* Save some parentheses and grab ls-bit at the end. */ - hashed_bit = (ret_addr >> 12) ^ - (ret_addr >> 18) ^ - (ret_addr >> 21) ^ - (ret_addr >> 30) ^ + hashed_bit = (ctx.ret_addr >> 12) ^ + (ctx.ret_addr >> 18) ^ + (ctx.ret_addr >> 21) ^ + (ctx.ret_addr >> 30) ^ cs_id; hashed_bit &= BIT(0); - if (hashed_bit != ((ret_addr >> intlv_addr_bit) & BIT(0))) - ret_addr ^= BIT(intlv_addr_bit); + if (hashed_bit != ((ctx.ret_addr >> intlv_addr_bit) & BIT(0))) + ctx.ret_addr ^= BIT(intlv_addr_bit); } /* Is calculated system address is above DRAM limit address? */ - if (ret_addr > dram_limit_addr) + if (ctx.ret_addr > dram_limit_addr) goto out_err; - *sys_addr = ret_addr; + *sys_addr = ctx.ret_addr; return 0; out_err: -- cgit v1.2.3 From 5176a93ab27aef1b9f4496fc68e6c303a011d7cc Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 16 Dec 2021 16:29:04 +0000 Subject: x86/MCE/AMD, EDAC/mce_amd: Add new SMCA bank types Add HWID and McaType values for new SMCA bank types, and add their error descriptions to edac_mce_amd. The "PHY" bank types all have the same error descriptions, and the NBIF and SHUB bank types have the same error descriptions. So reuse the same arrays where appropriate. [ bp: Remove useless comments over hwid types. ] Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211216162905.4132657-2-yazen.ghannam@amd.com --- arch/x86/include/asm/mce.h | 7 +++ arch/x86/kernel/cpu/mce/amd.c | 21 +++++-- drivers/edac/mce_amd.c | 135 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 151 insertions(+), 12 deletions(-) (limited to 'drivers/edac') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index d58f4f2e006f..52d2b35cac2d 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -313,12 +313,19 @@ enum smca_bank_types { SMCA_SMU, /* System Management Unit */ SMCA_SMU_V2, SMCA_MP5, /* Microprocessor 5 Unit */ + SMCA_MPDMA, /* MPDMA Unit */ SMCA_NBIO, /* Northbridge IO Unit */ SMCA_PCIE, /* PCI Express Unit */ SMCA_PCIE_V2, SMCA_XGMI_PCS, /* xGMI PCS Unit */ + SMCA_NBIF, /* NBIF Unit */ + SMCA_SHUB, /* System HUB Unit */ + SMCA_SATA, /* SATA Unit */ + SMCA_USB, /* USB Unit */ + SMCA_GMI_PCS, /* GMI PCS Unit */ SMCA_XGMI_PHY, /* xGMI PHY Unit */ SMCA_WAFL_PHY, /* WAFL PHY Unit */ + SMCA_GMI_PHY, /* GMI PHY Unit */ N_SMCA_BANK_TYPES }; diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 2f351838d5f7..9407cdc1e081 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -95,11 +95,18 @@ static struct smca_bank_name smca_names[] = { [SMCA_PSP ... SMCA_PSP_V2] = { "psp", "Platform Security Processor" }, [SMCA_SMU ... SMCA_SMU_V2] = { "smu", "System Management Unit" }, [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" }, + [SMCA_MPDMA] = { "mpdma", "MPDMA Unit" }, [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" }, [SMCA_PCIE ... SMCA_PCIE_V2] = { "pcie", "PCI Express Unit" }, [SMCA_XGMI_PCS] = { "xgmi_pcs", "Ext Global Memory Interconnect PCS Unit" }, + [SMCA_NBIF] = { "nbif", "NBIF Unit" }, + [SMCA_SHUB] = { "shub", "System Hub Unit" }, + [SMCA_SATA] = { "sata", "SATA Unit" }, + [SMCA_USB] = { "usb", "USB Unit" }, + [SMCA_GMI_PCS] = { "gmi_pcs", "Global Memory Interconnect PCS Unit" }, [SMCA_XGMI_PHY] = { "xgmi_phy", "Ext Global Memory Interconnect PHY Unit" }, [SMCA_WAFL_PHY] = { "wafl_phy", "WAFL PHY Unit" }, + [SMCA_GMI_PHY] = { "gmi_phy", "Global Memory Interconnect PHY Unit" }, }; static const char *smca_get_name(enum smca_bank_types t) @@ -174,6 +181,9 @@ static struct smca_hwid smca_hwid_mcatypes[] = { /* Microprocessor 5 Unit MCA type */ { SMCA_MP5, HWID_MCATYPE(0x01, 0x2) }, + /* MPDMA MCA type */ + { SMCA_MPDMA, HWID_MCATYPE(0x01, 0x3) }, + /* Northbridge IO Unit MCA type */ { SMCA_NBIO, HWID_MCATYPE(0x18, 0x0) }, @@ -181,14 +191,15 @@ static struct smca_hwid smca_hwid_mcatypes[] = { { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0) }, { SMCA_PCIE_V2, HWID_MCATYPE(0x46, 0x1) }, - /* xGMI PCS MCA type */ { SMCA_XGMI_PCS, HWID_MCATYPE(0x50, 0x0) }, - - /* xGMI PHY MCA type */ + { SMCA_NBIF, HWID_MCATYPE(0x6C, 0x0) }, + { SMCA_SHUB, HWID_MCATYPE(0x80, 0x0) }, + { SMCA_SATA, HWID_MCATYPE(0xA8, 0x0) }, + { SMCA_USB, HWID_MCATYPE(0xAA, 0x0) }, + { SMCA_GMI_PCS, HWID_MCATYPE(0x241, 0x0) }, { SMCA_XGMI_PHY, HWID_MCATYPE(0x259, 0x0) }, - - /* WAFL PHY MCA type */ { SMCA_WAFL_PHY, HWID_MCATYPE(0x267, 0x0) }, + { SMCA_GMI_PHY, HWID_MCATYPE(0x269, 0x0) }, }; struct smca_bank smca_banks[MAX_NR_BANKS]; diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index 67dbf4c31271..cfd3f7ae9251 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -399,6 +399,63 @@ static const char * const smca_mp5_mce_desc[] = { "Instruction Tag Cache Bank B ECC or parity error", }; +static const char * const smca_mpdma_mce_desc[] = { + "Main SRAM [31:0] bank ECC or parity error", + "Main SRAM [63:32] bank ECC or parity error", + "Main SRAM [95:64] bank ECC or parity error", + "Main SRAM [127:96] bank ECC or parity error", + "Data Cache Bank A ECC or parity error", + "Data Cache Bank B ECC or parity error", + "Data Tag Cache Bank A ECC or parity error", + "Data Tag Cache Bank B ECC or parity error", + "Instruction Cache Bank A ECC or parity error", + "Instruction Cache Bank B ECC or parity error", + "Instruction Tag Cache Bank A ECC or parity error", + "Instruction Tag Cache Bank B ECC or parity error", + "Data Cache Bank A ECC or parity error", + "Data Cache Bank B ECC or parity error", + "Data Tag Cache Bank A ECC or parity error", + "Data Tag Cache Bank B ECC or parity error", + "Instruction Cache Bank A ECC or parity error", + "Instruction Cache Bank B ECC or parity error", + "Instruction Tag Cache Bank A ECC or parity error", + "Instruction Tag Cache Bank B ECC or parity error", + "Data Cache Bank A ECC or parity error", + "Data Cache Bank B ECC or parity error", + "Data Tag Cache Bank A ECC or parity error", + "Data Tag Cache Bank B ECC or parity error", + "Instruction Cache Bank A ECC or parity error", + "Instruction Cache Bank B ECC or parity error", + "Instruction Tag Cache Bank A ECC or parity error", + "Instruction Tag Cache Bank B ECC or parity error", + "System Hub Read Buffer ECC or parity error", + "MPDMA TVF DVSEC Memory ECC or parity error", + "MPDMA TVF MMIO Mailbox0 ECC or parity error", + "MPDMA TVF MMIO Mailbox1 ECC or parity error", + "MPDMA TVF Doorbell Memory ECC or parity error", + "MPDMA TVF SDP Slave Memory 0 ECC or parity error", + "MPDMA TVF SDP Slave Memory 1 ECC or parity error", + "MPDMA TVF SDP Slave Memory 2 ECC or parity error", + "MPDMA TVF SDP Master Memory 0 ECC or parity error", + "MPDMA TVF SDP Master Memory 1 ECC or parity error", + "MPDMA TVF SDP Master Memory 2 ECC or parity error", + "MPDMA TVF SDP Master Memory 3 ECC or parity error", + "MPDMA TVF SDP Master Memory 4 ECC or parity error", + "MPDMA TVF SDP Master Memory 5 ECC or parity error", + "MPDMA TVF SDP Master Memory 6 ECC or parity error", + "MPDMA PTE Command FIFO ECC or parity error", + "MPDMA PTE Hub Data FIFO ECC or parity error", + "MPDMA PTE Internal Data FIFO ECC or parity error", + "MPDMA PTE Command Memory DMA ECC or parity error", + "MPDMA PTE Command Memory Internal ECC or parity error", + "MPDMA PTE DMA Completion FIFO ECC or parity error", + "MPDMA PTE Tablewalk Completion FIFO ECC or parity error", + "MPDMA PTE Descriptor Completion FIFO ECC or parity error", + "MPDMA PTE ReadOnly Completion FIFO ECC or parity error", + "MPDMA PTE DirectWrite Completion FIFO ECC or parity error", + "SDP Watchdog Timer expired", +}; + static const char * const smca_nbio_mce_desc[] = { "ECC or Parity error", "PCIE error", @@ -448,7 +505,7 @@ static const char * const smca_xgmipcs_mce_desc[] = { "Rx Replay Timeout Error", "LinkSub Tx Timeout Error", "LinkSub Rx Timeout Error", - "Rx CMD Pocket Error", + "Rx CMD Packet Error", }; static const char * const smca_xgmiphy_mce_desc[] = { @@ -458,11 +515,66 @@ static const char * const smca_xgmiphy_mce_desc[] = { "PHY APB error", }; -static const char * const smca_waflphy_mce_desc[] = { - "RAM ECC Error", - "ARC instruction buffer parity error", - "ARC data buffer parity error", - "PHY APB error", +static const char * const smca_nbif_mce_desc[] = { + "Timeout error from GMI", + "SRAM ECC error", + "NTB Error Event", + "SDP Parity error", +}; + +static const char * const smca_sata_mce_desc[] = { + "Parity error for port 0", + "Parity error for port 1", + "Parity error for port 2", + "Parity error for port 3", + "Parity error for port 4", + "Parity error for port 5", + "Parity error for port 6", + "Parity error for port 7", +}; + +static const char * const smca_usb_mce_desc[] = { + "Parity error or ECC error for S0 RAM0", + "Parity error or ECC error for S0 RAM1", + "Parity error or ECC error for S0 RAM2", + "Parity error for PHY RAM0", + "Parity error for PHY RAM1", + "AXI Slave Response error", +}; + +static const char * const smca_gmipcs_mce_desc[] = { + "Data Loss Error", + "Training Error", + "Replay Parity Error", + "Rx Fifo Underflow Error", + "Rx Fifo Overflow Error", + "CRC Error", + "BER Exceeded Error", + "Tx Fifo Underflow Error", + "Replay Buffer Parity Error", + "Tx Overflow Error", + "Replay Fifo Overflow Error", + "Replay Fifo Underflow Error", + "Elastic Fifo Overflow Error", + "Deskew Error", + "Offline Error", + "Data Startup Limit Error", + "FC Init Timeout Error", + "Recovery Timeout Error", + "Ready Serial Timeout Error", + "Ready Serial Attempt Error", + "Recovery Attempt Error", + "Recovery Relock Attempt Error", + "Deskew Abort Error", + "Rx Buffer Error", + "Rx LFDS Fifo Overflow Error", + "Rx LFDS Fifo Underflow Error", + "LinkSub Tx Timeout Error", + "LinkSub Rx Timeout Error", + "Rx CMD Packet Error", + "LFDS Training Timeout Error", + "LFDS FC Init Timeout Error", + "Data Loss Error", }; struct smca_mce_desc { @@ -490,12 +602,21 @@ static struct smca_mce_desc smca_mce_descs[] = { [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) }, [SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc) }, [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) }, + [SMCA_MPDMA] = { smca_mpdma_mce_desc, ARRAY_SIZE(smca_mpdma_mce_desc) }, [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) }, [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) }, [SMCA_PCIE_V2] = { smca_pcie2_mce_desc, ARRAY_SIZE(smca_pcie2_mce_desc) }, [SMCA_XGMI_PCS] = { smca_xgmipcs_mce_desc, ARRAY_SIZE(smca_xgmipcs_mce_desc) }, + /* NBIF and SHUB have the same error descriptions, for now. */ + [SMCA_NBIF] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) }, + [SMCA_SHUB] = { smca_nbif_mce_desc, ARRAY_SIZE(smca_nbif_mce_desc) }, + [SMCA_SATA] = { smca_sata_mce_desc, ARRAY_SIZE(smca_sata_mce_desc) }, + [SMCA_USB] = { smca_usb_mce_desc, ARRAY_SIZE(smca_usb_mce_desc) }, + [SMCA_GMI_PCS] = { smca_gmipcs_mce_desc, ARRAY_SIZE(smca_gmipcs_mce_desc) }, + /* All the PHY bank types have the same error descriptions, for now. */ [SMCA_XGMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, - [SMCA_WAFL_PHY] = { smca_waflphy_mce_desc, ARRAY_SIZE(smca_waflphy_mce_desc) }, + [SMCA_WAFL_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, + [SMCA_GMI_PHY] = { smca_xgmiphy_mce_desc, ARRAY_SIZE(smca_xgmiphy_mce_desc) }, }; static bool f12h_mc0_mce(u16 ec, u8 xec) -- cgit v1.2.3 From 91f75eb481cfaee5c4ed8fb5214bf2fbfa04bd7b Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 16 Dec 2021 16:29:05 +0000 Subject: x86/MCE/AMD, EDAC/mce_amd: Support non-uniform MCA bank type enumeration AMD systems currently lay out MCA bank types such that the type of bank number "i" is either the same across all CPUs or is Reserved/Read-as-Zero. For example: Bank # | CPUx | CPUy 0 LS LS 1 RAZ UMC 2 CS CS 3 SMU RAZ Future AMD systems will lay out MCA bank types such that the type of bank number "i" may be different across CPUs. For example: Bank # | CPUx | CPUy 0 LS LS 1 RAZ UMC 2 CS NBIO 3 SMU RAZ Change the structures that cache MCA bank types to be per-CPU and update smca_get_bank_type() to handle this change. Move some SMCA-specific structures to amd.c from mce.h, since they no longer need to be global. Break out the "count" for bank types from struct smca_hwid, since this should provide a per-CPU count rather than a system-wide count. Apply the "const" qualifier to the struct smca_hwid_mcatypes array. The values in this array should not change at runtime. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: https://lore.kernel.org/r/20211216162905.4132657-3-yazen.ghannam@amd.com --- arch/x86/include/asm/mce.h | 18 +--------- arch/x86/kernel/cpu/mce/amd.c | 59 +++++++++++++++++++-------------- drivers/edac/mce_amd.c | 11 ++---- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 2 +- 4 files changed, 39 insertions(+), 51 deletions(-) (limited to 'drivers/edac') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 52d2b35cac2d..cc73061e7255 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -329,22 +329,6 @@ enum smca_bank_types { N_SMCA_BANK_TYPES }; -#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype)) - -struct smca_hwid { - unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ - u32 hwid_mcatype; /* (hwid,mcatype) tuple */ - u8 count; /* Number of instances. */ -}; - -struct smca_bank { - struct smca_hwid *hwid; - u32 id; /* Value of MCA_IPID[InstanceId]. */ - u8 sysfs_id; /* Value used for sysfs name. */ -}; - -extern struct smca_bank smca_banks[MAX_NR_BANKS]; - extern const char *smca_get_long_name(enum smca_bank_types t); extern bool amd_mce_is_memory_error(struct mce *m); @@ -352,7 +336,7 @@ extern int mce_threshold_create_device(unsigned int cpu); extern int mce_threshold_remove_device(unsigned int cpu); void mce_amd_feature_init(struct cpuinfo_x86 *c); -enum smca_bank_types smca_get_bank_type(unsigned int bank); +enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank); #else static inline int mce_threshold_create_device(unsigned int cpu) { return 0; }; diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 9407cdc1e081..a1e2f41796dc 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -71,6 +71,22 @@ static const char * const smca_umc_block_names[] = { "misc_umc" }; +#define HWID_MCATYPE(hwid, mcatype) (((hwid) << 16) | (mcatype)) + +struct smca_hwid { + unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ + u32 hwid_mcatype; /* (hwid,mcatype) tuple */ +}; + +struct smca_bank { + const struct smca_hwid *hwid; + u32 id; /* Value of MCA_IPID[InstanceId]. */ + u8 sysfs_id; /* Value used for sysfs name. */ +}; + +static DEFINE_PER_CPU_READ_MOSTLY(struct smca_bank[MAX_NR_BANKS], smca_banks); +static DEFINE_PER_CPU_READ_MOSTLY(u8[N_SMCA_BANK_TYPES], smca_bank_counts); + struct smca_bank_name { const char *name; /* Short name for sysfs */ const char *long_name; /* Long name for pretty-printing */ @@ -126,14 +142,14 @@ const char *smca_get_long_name(enum smca_bank_types t) } EXPORT_SYMBOL_GPL(smca_get_long_name); -enum smca_bank_types smca_get_bank_type(unsigned int bank) +enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank) { struct smca_bank *b; if (bank >= MAX_NR_BANKS) return N_SMCA_BANK_TYPES; - b = &smca_banks[bank]; + b = &per_cpu(smca_banks, cpu)[bank]; if (!b->hwid) return N_SMCA_BANK_TYPES; @@ -141,7 +157,7 @@ enum smca_bank_types smca_get_bank_type(unsigned int bank) } EXPORT_SYMBOL_GPL(smca_get_bank_type); -static struct smca_hwid smca_hwid_mcatypes[] = { +static const struct smca_hwid smca_hwid_mcatypes[] = { /* { bank_type, hwid_mcatype } */ /* Reserved type */ @@ -202,9 +218,6 @@ static struct smca_hwid smca_hwid_mcatypes[] = { { SMCA_GMI_PHY, HWID_MCATYPE(0x269, 0x0) }, }; -struct smca_bank smca_banks[MAX_NR_BANKS]; -EXPORT_SYMBOL_GPL(smca_banks); - /* * In SMCA enabled processors, we can have multiple banks for a given IP type. * So to define a unique name for each bank, we use a temp c-string to append @@ -260,8 +273,9 @@ static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu) static void smca_configure(unsigned int bank, unsigned int cpu) { + u8 *bank_counts = this_cpu_ptr(smca_bank_counts); + const struct smca_hwid *s_hwid; unsigned int i, hwid_mcatype; - struct smca_hwid *s_hwid; u32 high, low; u32 smca_config = MSR_AMD64_SMCA_MCx_CONFIG(bank); @@ -297,10 +311,6 @@ static void smca_configure(unsigned int bank, unsigned int cpu) smca_set_misc_banks_map(bank, cpu); - /* Return early if this bank was already initialized. */ - if (smca_banks[bank].hwid && smca_banks[bank].hwid->hwid_mcatype != 0) - return; - if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) { pr_warn("Failed to read MCA_IPID for bank %d\n", bank); return; @@ -311,10 +321,11 @@ static void smca_configure(unsigned int bank, unsigned int cpu) for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) { s_hwid = &smca_hwid_mcatypes[i]; + if (hwid_mcatype == s_hwid->hwid_mcatype) { - smca_banks[bank].hwid = s_hwid; - smca_banks[bank].id = low; - smca_banks[bank].sysfs_id = s_hwid->count++; + this_cpu_ptr(smca_banks)[bank].hwid = s_hwid; + this_cpu_ptr(smca_banks)[bank].id = low; + this_cpu_ptr(smca_banks)[bank].sysfs_id = bank_counts[s_hwid->bank_type]++; break; } } @@ -600,7 +611,7 @@ out: bool amd_filter_mce(struct mce *m) { - enum smca_bank_types bank_type = smca_get_bank_type(m->bank); + enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank); struct cpuinfo_x86 *c = &boot_cpu_data; /* See Family 17h Models 10h-2Fh Erratum #1114. */ @@ -638,7 +649,7 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank) } else if (c->x86 == 0x17 && (c->x86_model >= 0x10 && c->x86_model <= 0x2F)) { - if (smca_get_bank_type(bank) != SMCA_IF) + if (smca_get_bank_type(smp_processor_id(), bank) != SMCA_IF) return; msrs[0] = MSR_AMD64_SMCA_MCx_MISC(bank); @@ -706,7 +717,7 @@ bool amd_mce_is_memory_error(struct mce *m) u8 xec = (m->status >> 16) & 0x1f; if (mce_flags.smca) - return smca_get_bank_type(m->bank) == SMCA_UMC && xec == 0x0; + return smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC && xec == 0x0; return m->bank == 4 && xec == 0x8; } @@ -1022,7 +1033,7 @@ static struct kobj_type threshold_ktype = { .release = threshold_block_release, }; -static const char *get_name(unsigned int bank, struct threshold_block *b) +static const char *get_name(unsigned int cpu, unsigned int bank, struct threshold_block *b) { enum smca_bank_types bank_type; @@ -1033,7 +1044,7 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return th_names[bank]; } - bank_type = smca_get_bank_type(bank); + bank_type = smca_get_bank_type(cpu, bank); if (bank_type >= N_SMCA_BANK_TYPES) return NULL; @@ -1043,12 +1054,12 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return NULL; } - if (smca_banks[bank].hwid->count == 1) + if (per_cpu(smca_bank_counts, cpu)[bank_type] == 1) return smca_get_name(bank_type); snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, - "%s_%x", smca_get_name(bank_type), - smca_banks[bank].sysfs_id); + "%s_%u", smca_get_name(bank_type), + per_cpu(smca_banks, cpu)[bank].sysfs_id); return buf_mcatype; } @@ -1104,7 +1115,7 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb else tb->blocks = b; - err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(bank, b)); + err = kobject_init_and_add(&b->kobj, &threshold_ktype, tb->kobj, get_name(cpu, bank, b)); if (err) goto out_free; recurse: @@ -1159,7 +1170,7 @@ static int threshold_create_bank(struct threshold_bank **bp, unsigned int cpu, struct device *dev = this_cpu_read(mce_device); struct amd_northbridge *nb = NULL; struct threshold_bank *b = NULL; - const char *name = get_name(bank, NULL); + const char *name = get_name(cpu, bank, NULL); int err = 0; if (!dev) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index cfd3f7ae9251..cc5c63feb26a 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1166,20 +1166,13 @@ static void decode_mc6_mce(struct mce *m) /* Decode errors according to Scalable MCA specification */ static void decode_smca_error(struct mce *m) { - struct smca_hwid *hwid; - enum smca_bank_types bank_type; + enum smca_bank_types bank_type = smca_get_bank_type(m->extcpu, m->bank); const char *ip_name; u8 xec = XEC(m->status, xec_mask); - if (m->bank >= ARRAY_SIZE(smca_banks)) + if (bank_type >= N_SMCA_BANK_TYPES) return; - hwid = smca_banks[m->bank].hwid; - if (!hwid) - return; - - bank_type = hwid->bank_type; - if (bank_type == SMCA_RESERVED) { pr_emerg(HW_ERR "Bank %d is reserved.\n", m->bank); return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 08133de21fdd..75dad0214dc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2647,7 +2647,7 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, * and error occurred in DramECC (Extended error code = 0) then only * process the error, else bail out. */ - if (!m || !((smca_get_bank_type(m->bank) == SMCA_UMC_V2) && + if (!m || !((smca_get_bank_type(m->extcpu, m->bank) == SMCA_UMC_V2) && (XEC(m->status, 0x3f) == 0x0))) return NOTIFY_DONE; -- cgit v1.2.3