diff options
Diffstat (limited to 'drivers/ras/amd')
-rw-r--r-- | drivers/ras/amd/atl/access.c | 27 | ||||
-rw-r--r-- | drivers/ras/amd/atl/dehash.c | 95 | ||||
-rw-r--r-- | drivers/ras/amd/atl/denormalize.c | 102 | ||||
-rw-r--r-- | drivers/ras/amd/atl/internal.h | 10 | ||||
-rw-r--r-- | drivers/ras/amd/atl/map.c | 17 | ||||
-rw-r--r-- | drivers/ras/amd/atl/reg_fields.h | 9 | ||||
-rw-r--r-- | drivers/ras/amd/atl/system.c | 3 | ||||
-rw-r--r-- | drivers/ras/amd/atl/umc.c | 51 |
8 files changed, 309 insertions, 5 deletions
diff --git a/drivers/ras/amd/atl/access.c b/drivers/ras/amd/atl/access.c index f6dd87bb2c35..ee4661ed28ba 100644 --- a/drivers/ras/amd/atl/access.c +++ b/drivers/ras/amd/atl/access.c @@ -36,6 +36,32 @@ static DEFINE_MUTEX(df_indirect_mutex); #define DF_FICAA_REG_NUM_LEGACY GENMASK(10, 2) +static u16 get_accessible_node(u16 node) +{ + /* + * On heterogeneous systems, not all AMD Nodes are accessible + * through software-visible registers. The Node ID needs to be + * adjusted for register accesses. But its value should not be + * changed for the translation methods. + */ + if (df_cfg.flags.heterogeneous) { + /* Only Node 0 is accessible on DF3.5 systems. */ + if (df_cfg.rev == DF3p5) + node = 0; + + /* + * Only the first Node in each Socket is accessible on + * DF4.5 systems, and this is visible to software as one + * Fabric per Socket. The Socket ID can be derived from + * the Node ID and global shift values. + */ + if (df_cfg.rev == DF4p5) + node >>= df_cfg.socket_id_shift - df_cfg.node_id_shift; + } + + return node; +} + static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo) { u32 ficaa_addr = 0x8C, ficad_addr = 0xB8; @@ -43,6 +69,7 @@ static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *l int err = -ENODEV; u32 ficaa = 0; + node = get_accessible_node(node); if (node >= amd_nb_num()) goto out; diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c index 6f414926e6fe..4ea46262c4f5 100644 --- a/drivers/ras/amd/atl/dehash.c +++ b/drivers/ras/amd/atl/dehash.c @@ -253,7 +253,7 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx) hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); - hash_ctl_1T = FIELD_GET(DF4_HASH_CTL_1T, ctx->map.ctl); + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); /* * Generate a unique address to determine which bits @@ -343,6 +343,94 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx) return 0; } +/* + * MI300 hash bits + * 4K 64K 2M 1G 1T 1T + * COH_ST_Select[0] = XOR of addr{8, 12, 15, 22, 29, 36, 43} + * COH_ST_Select[1] = XOR of addr{9, 13, 16, 23, 30, 37, 44} + * COH_ST_Select[2] = XOR of addr{10, 14, 17, 24, 31, 38, 45} + * COH_ST_Select[3] = XOR of addr{11, 18, 25, 32, 39, 46} + * COH_ST_Select[4] = XOR of addr{14, 19, 26, 33, 40, 47} aka Stack + * DieID[0] = XOR of addr{12, 20, 27, 34, 41 } + * DieID[1] = XOR of addr{13, 21, 28, 35, 42 } + */ +static int mi300_dehash_addr(struct addr_ctx *ctx) +{ + bool hash_ctl_4k, hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T; + bool hashed_bit, intlv_bit, test_bit; + u8 num_intlv_bits, base_bit, i; + + if (!map_bits_valid(ctx, 8, 8, 4, 1)) + return -EINVAL; + + hash_ctl_4k = FIELD_GET(DF4p5_HASH_CTL_4K, ctx->map.ctl); + hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl); + hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl); + hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl); + hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl); + + /* Channel bits */ + num_intlv_bits = ilog2(ctx->map.num_intlv_chan); + + for (i = 0; i < num_intlv_bits; i++) { + base_bit = 8 + i; + + /* COH_ST_Select[4] jumps to a base bit of 14. */ + if (i == 4) + base_bit = 14; + + intlv_bit = BIT_ULL(base_bit) & ctx->ret_addr; + + hashed_bit = intlv_bit; + + /* 4k hash bit only applies to the first 3 bits. */ + if (i <= 2) { + test_bit = BIT_ULL(12 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_4k; + } + + /* Use temporary 'test_bit' value to avoid Sparse warnings. */ + test_bit = BIT_ULL(15 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_64k; + test_bit = BIT_ULL(22 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_2M; + test_bit = BIT_ULL(29 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1G; + test_bit = BIT_ULL(36 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1T; + test_bit = BIT_ULL(43 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(base_bit); + } + + /* Die bits */ + num_intlv_bits = ilog2(ctx->map.num_intlv_dies); + + for (i = 0; i < num_intlv_bits; i++) { + base_bit = 12 + i; + + intlv_bit = BIT_ULL(base_bit) & ctx->ret_addr; + + hashed_bit = intlv_bit; + + test_bit = BIT_ULL(20 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_64k; + test_bit = BIT_ULL(27 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_2M; + test_bit = BIT_ULL(34 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1G; + test_bit = BIT_ULL(41 + i) & ctx->ret_addr; + hashed_bit ^= test_bit & hash_ctl_1T; + + if (hashed_bit != intlv_bit) + ctx->ret_addr ^= BIT_ULL(base_bit); + } + + return 0; +} + int dehash_address(struct addr_ctx *ctx) { switch (ctx->map.intlv_mode) { @@ -400,6 +488,11 @@ int dehash_address(struct addr_ctx *ctx) case DF4p5_NPS1_16CHAN_2K_HASH: return df4p5_dehash_addr(ctx); + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: + return mi300_dehash_addr(ctx); + default: atl_debug_on_bad_intlv_mode(ctx); return -EINVAL; diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c index 01f1d0fb6799..d5d0e1fda159 100644 --- a/drivers/ras/amd/atl/denormalize.c +++ b/drivers/ras/amd/atl/denormalize.c @@ -81,6 +81,40 @@ static u64 make_space_for_coh_st_id_split_2_1(struct addr_ctx *ctx) } /* + * Make space for CS ID at bits [14:8] as follows: + * + * 8 channels -> bits [10:8] + * 16 channels -> bits [11:8] + * 32 channels -> bits [14,11:8] + * + * 1 die -> N/A + * 2 dies -> bit [12] + * 4 dies -> bits [13:12] + */ +static u64 make_space_for_coh_st_id_mi300(struct addr_ctx *ctx) +{ + u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan); + u64 denorm_addr; + + if (ctx->map.intlv_bit_pos != 8) { + pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos); + return ~0ULL; + } + + /* Channel bits. Covers up to 4 bits at [11:8]. */ + denorm_addr = expand_bits(8, min(num_intlv_bits, 4), ctx->ret_addr); + + /* Die bits. Always starts at [12]. */ + denorm_addr = expand_bits(12, ilog2(ctx->map.num_intlv_dies), denorm_addr); + + /* Additional channel bit at [14]. */ + if (num_intlv_bits > 4) + denorm_addr = expand_bits(14, 1, denorm_addr); + + return denorm_addr; +} + +/* * Take the current calculated address and shift enough bits in the middle * to make a gap where the interleave bits will be inserted. */ @@ -107,6 +141,12 @@ static u64 make_space_for_coh_st_id(struct addr_ctx *ctx) case DF4p5_NPS1_8CHAN_2K_HASH: case DF4p5_NPS1_16CHAN_2K_HASH: return make_space_for_coh_st_id_split_2_1(ctx); + + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: + return make_space_for_coh_st_id_mi300(ctx); + default: atl_debug_on_bad_intlv_mode(ctx); return ~0ULL; @@ -205,6 +245,32 @@ static u16 get_coh_st_id_df4(struct addr_ctx *ctx) } /* + * MI300 hash has: + * (C)hannel[3:0] = coh_st_id[3:0] + * (S)tack[0] = coh_st_id[4] + * (D)ie[1:0] = coh_st_id[6:5] + * + * Hashed coh_st_id is swizzled so that Stack bit is at the end. + * coh_st_id = SDDCCCC + */ +static u16 get_coh_st_id_mi300(struct addr_ctx *ctx) +{ + u8 channel_bits, die_bits, stack_bit; + u16 die_id; + + /* Subtract the "base" Destination Fabric ID. */ + ctx->coh_st_fabric_id -= get_dst_fabric_id(ctx); + + die_id = (ctx->coh_st_fabric_id & df_cfg.die_id_mask) >> df_cfg.die_id_shift; + + channel_bits = FIELD_GET(GENMASK(3, 0), ctx->coh_st_fabric_id); + stack_bit = FIELD_GET(BIT(4), ctx->coh_st_fabric_id) << 6; + die_bits = die_id << 4; + + return stack_bit | die_bits | channel_bits; +} + +/* * Derive the correct Coherent Station ID that represents the interleave bits * used within the system physical address. This accounts for the * interleave mode, number of interleaved channels/dies/sockets, and @@ -237,6 +303,11 @@ static u16 calculate_coh_st_id(struct addr_ctx *ctx) case DF4p5_NPS1_16CHAN_2K_HASH: return get_coh_st_id_df4(ctx); + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: + return get_coh_st_id_mi300(ctx); + /* COH_ST ID is simply the COH_ST Fabric ID adjusted by the Destination Fabric ID. */ case DF4p5_NPS2_4CHAN_1K_HASH: case DF4p5_NPS1_8CHAN_1K_HASH: @@ -287,6 +358,9 @@ static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id case NOHASH_8CHAN: case NOHASH_16CHAN: case NOHASH_32CHAN: + case MI3_HASH_8CHAN: + case MI3_HASH_16CHAN: + case MI3_HASH_32CHAN: case DF2_2CHAN_HASH: return insert_coh_st_id_at_intlv_bit(ctx, denorm_addr, coh_st_id); @@ -314,6 +388,31 @@ static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id } } +/* + * MI300 systems have a fixed, hardware-defined physical-to-logical + * Coherent Station mapping. The Remap registers are not used. + */ +static const u16 phy_to_log_coh_st_map_mi300[] = { + 12, 13, 14, 15, + 8, 9, 10, 11, + 4, 5, 6, 7, + 0, 1, 2, 3, + 28, 29, 30, 31, + 24, 25, 26, 27, + 20, 21, 22, 23, + 16, 17, 18, 19, +}; + +static u16 get_logical_coh_st_fabric_id_mi300(struct addr_ctx *ctx) +{ + if (ctx->inst_id >= sizeof(phy_to_log_coh_st_map_mi300)) { + atl_debug(ctx, "Instance ID out of range"); + return ~0; + } + + return phy_to_log_coh_st_map_mi300[ctx->inst_id] | (ctx->node_id << df_cfg.node_id_shift); +} + static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx) { u16 component_id, log_fabric_id; @@ -321,6 +420,9 @@ static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx) /* Start with the physical COH_ST Fabric ID. */ u16 phys_fabric_id = ctx->coh_st_fabric_id; + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return get_logical_coh_st_fabric_id_mi300(ctx); + /* Skip logical ID lookup if remapping is disabled. */ if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl) && ctx->map.intlv_mode != DF3_6CHAN) diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index 13f1b6098c96..21d45755e5f2 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -27,8 +27,12 @@ /* PCI ID for Zen4 Server DF Function 0. */ #define DF_FUNC0_ID_ZEN4_SERVER 0x14AD1022 +/* PCI IDs for MI300 DF Function 0. */ +#define DF_FUNC0_ID_MI300 0x15281022 + /* Shift needed for adjusting register values to true values. */ #define DF_DRAM_BASE_LIMIT_LSB 28 +#define MI300_DRAM_LIMIT_LSB 20 enum df_revisions { UNKNOWN, @@ -59,6 +63,9 @@ enum intlv_modes { DF4_NPS1_12CHAN_HASH = 0x15, DF4_NPS2_5CHAN_HASH = 0x16, DF4_NPS1_10CHAN_HASH = 0x17, + MI3_HASH_8CHAN = 0x18, + MI3_HASH_16CHAN = 0x19, + MI3_HASH_32CHAN = 0x1A, DF2_2CHAN_HASH = 0x21, /* DF4.5 modes are all IntLvNumChan + 0x20 */ DF4p5_NPS1_16CHAN_1K_HASH = 0x2C, @@ -86,7 +93,8 @@ enum intlv_modes { struct df_flags { __u8 legacy_ficaa : 1, socket_id_shift_quirk : 1, - __reserved_0 : 6; + heterogeneous : 1, + __reserved_0 : 5; }; struct df_config { diff --git a/drivers/ras/amd/atl/map.c b/drivers/ras/amd/atl/map.c index 33f549b6255a..8b908e8d7495 100644 --- a/drivers/ras/amd/atl/map.c +++ b/drivers/ras/amd/atl/map.c @@ -63,6 +63,10 @@ static int df4p5_get_intlv_mode(struct addr_ctx *ctx) if (ctx->map.intlv_mode <= NOHASH_32CHAN) return 0; + if (ctx->map.intlv_mode >= MI3_HASH_8CHAN && + ctx->map.intlv_mode <= MI3_HASH_32CHAN) + return 0; + /* * Modes matching the ranges above are returned as-is. * @@ -125,6 +129,9 @@ static u64 get_hi_addr_offset(u32 reg_dram_offset) atl_debug_on_bad_df_rev(); } + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + shift = MI300_DRAM_LIMIT_LSB; + return hi_addr_offset << shift; } @@ -369,6 +376,13 @@ static int get_coh_st_fabric_id(struct addr_ctx *ctx) { u32 reg; + /* + * On MI300 systems, the Coherent Station Fabric ID is derived + * later. And it does not depend on the register value. + */ + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return 0; + /* Read D18F0x50 (FabricBlockInstanceInformation3). */ if (df_indirect_read_instance(ctx->node_id, 0, 0x50, ctx->inst_id, ®)) return -EINVAL; @@ -490,6 +504,7 @@ static u8 get_num_intlv_chan(struct addr_ctx *ctx) case NOHASH_8CHAN: case DF3_COD1_8CHAN_HASH: case DF4_NPS1_8CHAN_HASH: + case MI3_HASH_8CHAN: case DF4p5_NPS1_8CHAN_1K_HASH: case DF4p5_NPS1_8CHAN_2K_HASH: return 8; @@ -502,6 +517,7 @@ static u8 get_num_intlv_chan(struct addr_ctx *ctx) case DF4p5_NPS1_12CHAN_2K_HASH: return 12; case NOHASH_16CHAN: + case MI3_HASH_16CHAN: case DF4p5_NPS1_16CHAN_1K_HASH: case DF4p5_NPS1_16CHAN_2K_HASH: return 16; @@ -509,6 +525,7 @@ static u8 get_num_intlv_chan(struct addr_ctx *ctx) case DF4p5_NPS0_24CHAN_2K_HASH: return 24; case NOHASH_32CHAN: + case MI3_HASH_32CHAN: return 32; default: atl_debug_on_bad_intlv_mode(ctx); diff --git a/drivers/ras/amd/atl/reg_fields.h b/drivers/ras/amd/atl/reg_fields.h index 6aaa5093f42c..9dcdf6e4a856 100644 --- a/drivers/ras/amd/atl/reg_fields.h +++ b/drivers/ras/amd/atl/reg_fields.h @@ -246,11 +246,11 @@ #define DF3_HASH_CTL_64K BIT(20) #define DF3_HASH_CTL_2M BIT(21) #define DF3_HASH_CTL_1G BIT(22) -#define DF4_HASH_CTL_4K BIT(7) #define DF4_HASH_CTL_64K BIT(8) #define DF4_HASH_CTL_2M BIT(9) #define DF4_HASH_CTL_1G BIT(10) -#define DF4_HASH_CTL_1T BIT(15) +#define DF4p5_HASH_CTL_4K BIT(7) +#define DF4p5_HASH_CTL_1T BIT(15) /* * High Address Offset @@ -268,10 +268,13 @@ * D18F7x140 [DRAM Offset] * DF4 HiAddrOffset [24:1] * DF4p5 HiAddrOffset [24:1] + * MI300 HiAddrOffset [31:1] */ #define DF2_HI_ADDR_OFFSET GENMASK(31, 20) #define DF3_HI_ADDR_OFFSET GENMASK(31, 12) -#define DF4_HI_ADDR_OFFSET GENMASK(24, 1) + +/* Follow reference code by including reserved bits for simplicity. */ +#define DF4_HI_ADDR_OFFSET GENMASK(31, 1) /* * High Address Offset Enable diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c index af61f2f1d6de..46493ed405d6 100644 --- a/drivers/ras/amd/atl/system.c +++ b/drivers/ras/amd/atl/system.c @@ -124,6 +124,9 @@ static int df4_determine_df_rev(u32 reg) if (reg == DF_FUNC0_ID_ZEN4_SERVER) df_cfg.flags.socket_id_shift_quirk = 1; + if (reg == DF_FUNC0_ID_MI300) + df_cfg.flags.heterogeneous = 1; + return df4_get_fabric_id_mask_registers(); } diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index 9d51e4954687..7bfa21a582f0 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -12,9 +12,57 @@ #include "internal.h" +/* + * MI300 has a fixed, model-specific mapping between a UMC instance and + * its related Data Fabric Coherent Station instance. + * + * The MCA_IPID_UMC[InstanceId] field holds a unique identifier for the + * UMC instance within a Node. Use this to find the appropriate Coherent + * Station ID. + * + * Redundant bits were removed from the map below. + */ +static const u16 umc_coh_st_map[32] = { + 0x393, 0x293, 0x193, 0x093, + 0x392, 0x292, 0x192, 0x092, + 0x391, 0x291, 0x191, 0x091, + 0x390, 0x290, 0x190, 0x090, + 0x793, 0x693, 0x593, 0x493, + 0x792, 0x692, 0x592, 0x492, + 0x791, 0x691, 0x591, 0x491, + 0x790, 0x690, 0x590, 0x490, +}; + +#define UMC_ID_MI300 GENMASK(23, 12) +static u8 get_coh_st_inst_id_mi300(struct atl_err *err) +{ + u16 umc_id = FIELD_GET(UMC_ID_MI300, err->ipid); + u8 i; + + for (i = 0; i < ARRAY_SIZE(umc_coh_st_map); i++) { + if (umc_id == umc_coh_st_map[i]) + break; + } + + WARN_ON_ONCE(i >= ARRAY_SIZE(umc_coh_st_map)); + + return i; +} + +#define MCA_IPID_INST_ID_HI GENMASK_ULL(47, 44) static u8 get_die_id(struct atl_err *err) { /* + * AMD Node ID is provided in MCA_IPID[InstanceIdHi], and this + * needs to be divided by 4 to get the internal Die ID. + */ + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) { + u8 node_id = FIELD_GET(MCA_IPID_INST_ID_HI, err->ipid); + + return node_id >> 2; + } + + /* * For CPUs, this is the AMD Node ID modulo the number * of AMD Nodes per socket. */ @@ -24,6 +72,9 @@ static u8 get_die_id(struct atl_err *err) #define UMC_CHANNEL_NUM GENMASK(31, 20) static u8 get_coh_st_inst_id(struct atl_err *err) { + if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) + return get_coh_st_inst_id_mi300(err); + return FIELD_GET(UMC_CHANNEL_NUM, err->ipid); } |