diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
48 files changed, 1076 insertions, 523 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2931c8ff4cc6..566303c9942f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -771,6 +771,8 @@ struct amd_powerplay { const struct amd_pm_funcs *pp_funcs; }; +struct ip_discovery_top; + /* polaris10 kickers */ #define ASICID_IS_P20(did, rid) (((did == 0x67DF) && \ ((rid == 0xE3) || \ @@ -1096,6 +1098,8 @@ struct amdgpu_device { bool ram_is_direct_mapped; struct list_head ras_list; + + struct ip_discovery_top *ip_top; }; static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) @@ -1292,6 +1296,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); int amdgpu_device_pci_reset(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev); +bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev); void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, u64 num_vis_bytes); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 53895a41932e..81e3b528bbc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -715,7 +715,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * process whose pasid is provided as a parameter. The process could have ZERO * or more queues running and submitting waves to compute units. * - * @kgd: Handle of device from which to get number of waves in flight + * @adev: Handle of device from which to get number of waves in flight * @pasid: Identifies the process for which this query call is invoked * @pasid_wave_cnt: Output parameter updated with number of waves in flight that * belong to process with given pasid @@ -724,7 +724,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * * Note: It's possible that the device has too many queues (oversubscription) * in which case a VMID could be remapped to a different PASID. This could lead - * to an iaccurate wave count. Following is a high-level sequence: + * to an inaccurate wave count. Following is a high-level sequence: * Time T1: vmid = getVmid(); vmid is associated with Pasid P1 * Time T2: passId = getPasId(vmid); vmid is associated with Pasid P2 * In the sequence above wave count obtained from time T1 will be incorrectly diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 2e00c3fb4bd3..cd89d2e46852 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -121,7 +121,7 @@ static size_t amdgpu_amdkfd_acc_size(uint64_t size) } /** - * @amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size + * amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size * of buffer including any reserved for control structures * * @adev: Device to which allocated BO belongs to diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 1c72f6095f08..f522b52725e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -613,7 +613,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, if (args->in.flags) return -EINVAL; r = amdgpu_ctx_stable_pstate(adev, fpriv, id, false, &stable_pstate); - args->out.pstate.flags = stable_pstate; + if (!r) + args->out.pstate.flags = stable_pstate; break; case AMDGPU_CTX_OP_SET_STABLE_PSTATE: if (args->in.flags & ~AMDGPU_CTX_STABLE_PSTATE_FLAGS_MASK) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 4b950de9bf66..db8a8710333e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -728,7 +728,7 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, return -ENOMEM; /* version, increment each time something is added */ - config[no_regs++] = 3; + config[no_regs++] = 4; config[no_regs++] = adev->gfx.config.max_shader_engines; config[no_regs++] = adev->gfx.config.max_tile_pipes; config[no_regs++] = adev->gfx.config.max_cu_per_sh; @@ -768,6 +768,9 @@ static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf, config[no_regs++] = adev->pdev->subsystem_device; config[no_regs++] = adev->pdev->subsystem_vendor; + /* rev==4 APU flag */ + config[no_regs++] = adev->flags & AMD_IS_APU ? 1 : 0; + while (size && (*pos < no_regs * 4)) { uint32_t value; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0b98d65056e3..0cfccea722f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -88,6 +88,8 @@ MODULE_FIRMWARE("amdgpu/vangogh_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 +#define AMDGPU_MAX_RETRY_LIMIT 2 +#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL) const char *amdgpu_asic_name[] = { "TAHITI", @@ -554,7 +556,11 @@ void amdgpu_device_wreg(struct amdgpu_device *adev, /** * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range * - * this function is invoked only the debugfs register access + * @adev: amdgpu_device pointer + * @reg: mmio/rlc register + * @v: value to write + * + * this function is invoked only for the debugfs register access */ void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, uint32_t reg, uint32_t v) @@ -1312,6 +1318,31 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) return true; } +/** + * amdgpu_device_should_use_aspm - check if the device should program ASPM + * + * @adev: amdgpu_device pointer + * + * Confirm whether the module parameter and pcie bridge agree that ASPM should + * be set for this device. + * + * Returns true if it should be used or false if not. + */ +bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) +{ + switch (amdgpu_aspm) { + case -1: + break; + case 0: + return false; + case 1: + return true; + default: + return false; + } + return pcie_aspm_enabled(adev->pdev); +} + /* if we get transitioned to only one device, take VGA back */ /** * amdgpu_device_vga_set_decode - enable/disable vga decode @@ -1446,7 +1477,8 @@ static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) case CHIP_YELLOW_CARP: break; case CHIP_CYAN_SKILLFISH: - if (adev->pdev->device == 0x13FE) + if ((adev->pdev->device == 0x13FE) || + (adev->pdev->device == 0x143F)) adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2; break; default: @@ -2622,6 +2654,12 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) adev->ip_blocks[i].status.late_initialized = true; } + r = amdgpu_ras_late_init(adev); + if (r) { + DRM_ERROR("amdgpu_ras_late_init failed %d", r); + return r; + } + amdgpu_ras_set_error_query_ready(adev, true); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); @@ -4362,7 +4400,9 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, { int r; struct amdgpu_hive_info *hive = NULL; + int retry_limit = 0; +retry: amdgpu_amdkfd_pre_reset(adev); amdgpu_amdkfd_pre_reset(adev); @@ -4411,6 +4451,14 @@ error: } amdgpu_virt_release_full_gpu(adev, true); + if (AMDGPU_RETRY_SRIOV_RESET(r)) { + if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) { + retry_limit++; + goto retry; + } else + DRM_ERROR("GPU reset retry is beyond the retry limit\n"); + } + return r; } @@ -5202,6 +5250,9 @@ skip_hw_reset: drm_helper_resume_force_mode(adev_to_drm(tmp_adev)); } + if (tmp_adev->asic_reset_res) + r = tmp_adev->asic_reset_res; + tmp_adev->asic_reset_res = 0; if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index cd7e8522c130..3501ade93f44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -360,8 +360,11 @@ out: return r; } +static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev); + void amdgpu_discovery_fini(struct amdgpu_device *adev) { + amdgpu_discovery_sysfs_fini(adev); kfree(adev->mman.discovery_bin); adev->mman.discovery_bin = NULL; } @@ -382,6 +385,497 @@ static int amdgpu_discovery_validate_ip(const struct ip *ip) return 0; } +/* ================================================== */ + +struct ip_hw_instance { + struct kobject kobj; /* ip_discovery/die/#die/#hw_id/#instance/<attrs...> */ + + int hw_id; + u8 num_instance; + u8 major, minor, revision; + u8 harvest; + + int num_base_addresses; + u32 base_addr[]; +}; + +struct ip_hw_id { + struct kset hw_id_kset; /* ip_discovery/die/#die/#hw_id/, contains ip_hw_instance */ + int hw_id; +}; + +struct ip_die_entry { + struct kset ip_kset; /* ip_discovery/die/#die/, contains ip_hw_id */ + u16 num_ips; +}; + +/* -------------------------------------------------- */ + +struct ip_hw_instance_attr { + struct attribute attr; + ssize_t (*show)(struct ip_hw_instance *ip_hw_instance, char *buf); +}; + +static ssize_t hw_id_show(struct ip_hw_instance *ip_hw_instance, char *buf) +{ + return sysfs_emit(buf, "%d\n", ip_hw_instance->hw_id); +} + +static ssize_t num_instance_show(struct ip_hw_instance *ip_hw_instance, char *buf) +{ + return sysfs_emit(buf, "%d\n", ip_hw_instance->num_instance); +} + +static ssize_t major_show(struct ip_hw_instance *ip_hw_instance, char *buf) +{ + return sysfs_emit(buf, "%d\n", ip_hw_instance->major); +} + +static ssize_t minor_show(struct ip_hw_instance *ip_hw_instance, char *buf) +{ + return sysfs_emit(buf, "%d\n", ip_hw_instance->minor); +} + +static ssize_t revision_show(struct ip_hw_instance *ip_hw_instance, char *buf) +{ + return sysfs_emit(buf, "%d\n", ip_hw_instance->revision); +} + +static ssize_t harvest_show(struct ip_hw_instance *ip_hw_instance, char *buf) +{ + return sysfs_emit(buf, "0x%01X\n", ip_hw_instance->harvest); +} + +static ssize_t num_base_addresses_show(struct ip_hw_instance *ip_hw_instance, char *buf) +{ + return sysfs_emit(buf, "%d\n", ip_hw_instance->num_base_addresses); +} + +static ssize_t base_addr_show(struct ip_hw_instance *ip_hw_instance, char *buf) +{ + ssize_t res, at; + int ii; + + for (res = at = ii = 0; ii < ip_hw_instance->num_base_addresses; ii++) { + /* Here we satisfy the condition that, at + size <= PAGE_SIZE. + */ + if (at + 12 > PAGE_SIZE) + break; + res = sysfs_emit_at(buf, at, "0x%08X\n", + ip_hw_instance->base_addr[ii]); + if (res <= 0) + break; + at += res; + } + + return res < 0 ? res : at; +} + +static struct ip_hw_instance_attr ip_hw_attr[] = { + __ATTR_RO(hw_id), + __ATTR_RO(num_instance), + __ATTR_RO(major), + __ATTR_RO(minor), + __ATTR_RO(revision), + __ATTR_RO(harvest), + __ATTR_RO(num_base_addresses), + __ATTR_RO(base_addr), +}; + +static struct attribute *ip_hw_instance_attrs[ARRAY_SIZE(ip_hw_attr) + 1]; +ATTRIBUTE_GROUPS(ip_hw_instance); + +#define to_ip_hw_instance(x) container_of(x, struct ip_hw_instance, kobj) +#define to_ip_hw_instance_attr(x) container_of(x, struct ip_hw_instance_attr, attr) + +static ssize_t ip_hw_instance_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct ip_hw_instance *ip_hw_instance = to_ip_hw_instance(kobj); + struct ip_hw_instance_attr *ip_hw_attr = to_ip_hw_instance_attr(attr); + + if (!ip_hw_attr->show) + return -EIO; + + return ip_hw_attr->show(ip_hw_instance, buf); +} + +static const struct sysfs_ops ip_hw_instance_sysfs_ops = { + .show = ip_hw_instance_attr_show, +}; + +static void ip_hw_instance_release(struct kobject *kobj) +{ + struct ip_hw_instance *ip_hw_instance = to_ip_hw_instance(kobj); + + kfree(ip_hw_instance); +} + +static struct kobj_type ip_hw_instance_ktype = { + .release = ip_hw_instance_release, + .sysfs_ops = &ip_hw_instance_sysfs_ops, + .default_groups = ip_hw_instance_groups, +}; + +/* -------------------------------------------------- */ + +#define to_ip_hw_id(x) container_of(to_kset(x), struct ip_hw_id, hw_id_kset) + +static void ip_hw_id_release(struct kobject *kobj) +{ + struct ip_hw_id *ip_hw_id = to_ip_hw_id(kobj); + + if (!list_empty(&ip_hw_id->hw_id_kset.list)) + DRM_ERROR("ip_hw_id->hw_id_kset is not empty"); + kfree(ip_hw_id); +} + +static struct kobj_type ip_hw_id_ktype = { + .release = ip_hw_id_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + +/* -------------------------------------------------- */ + +static void die_kobj_release(struct kobject *kobj); +static void ip_disc_release(struct kobject *kobj); + +struct ip_die_entry_attribute { + struct attribute attr; + ssize_t (*show)(struct ip_die_entry *ip_die_entry, char *buf); +}; + +#define to_ip_die_entry_attr(x) container_of(x, struct ip_die_entry_attribute, attr) + +static ssize_t num_ips_show(struct ip_die_entry *ip_die_entry, char *buf) +{ + return sysfs_emit(buf, "%d\n", ip_die_entry->num_ips); +} + +/* If there are more ip_die_entry attrs, other than the number of IPs, + * we can make this intro an array of attrs, and then initialize + * ip_die_entry_attrs in a loop. + */ +static struct ip_die_entry_attribute num_ips_attr = + __ATTR_RO(num_ips); + +static struct attribute *ip_die_entry_attrs[] = { + &num_ips_attr.attr, + NULL, +}; +ATTRIBUTE_GROUPS(ip_die_entry); /* ip_die_entry_groups */ + +#define to_ip_die_entry(x) container_of(to_kset(x), struct ip_die_entry, ip_kset) + +static ssize_t ip_die_entry_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct ip_die_entry_attribute *ip_die_entry_attr = to_ip_die_entry_attr(attr); + struct ip_die_entry *ip_die_entry = to_ip_die_entry(kobj); + + if (!ip_die_entry_attr->show) + return -EIO; + + return ip_die_entry_attr->show(ip_die_entry, buf); +} + +static void ip_die_entry_release(struct kobject *kobj) +{ + struct ip_die_entry *ip_die_entry = to_ip_die_entry(kobj); + + if (!list_empty(&ip_die_entry->ip_kset.list)) + DRM_ERROR("ip_die_entry->ip_kset is not empty"); + kfree(ip_die_entry); +} + +static const struct sysfs_ops ip_die_entry_sysfs_ops = { + .show = ip_die_entry_attr_show, +}; + +static struct kobj_type ip_die_entry_ktype = { + .release = ip_die_entry_release, + .sysfs_ops = &ip_die_entry_sysfs_ops, + .default_groups = ip_die_entry_groups, +}; + +static struct kobj_type die_kobj_ktype = { + .release = die_kobj_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + +static struct kobj_type ip_discovery_ktype = { + .release = ip_disc_release, + .sysfs_ops = &kobj_sysfs_ops, +}; + +struct ip_discovery_top { + struct kobject kobj; /* ip_discovery/ */ + struct kset die_kset; /* ip_discovery/die/, contains ip_die_entry */ + struct amdgpu_device *adev; +}; + +static void die_kobj_release(struct kobject *kobj) +{ + struct ip_discovery_top *ip_top = container_of(to_kset(kobj), + struct ip_discovery_top, + die_kset); + if (!list_empty(&ip_top->die_kset.list)) + DRM_ERROR("ip_top->die_kset is not empty"); +} + +static void ip_disc_release(struct kobject *kobj) +{ + struct ip_discovery_top *ip_top = container_of(kobj, struct ip_discovery_top, + kobj); + struct amdgpu_device *adev = ip_top->adev; + + adev->ip_top = NULL; + kfree(ip_top); +} + +static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev, + struct ip_die_entry *ip_die_entry, + const size_t _ip_offset, const int num_ips) +{ + int ii, jj, kk, res; + + DRM_DEBUG("num_ips:%d", num_ips); + + /* Find all IPs of a given HW ID, and add their instance to + * #die/#hw_id/#instance/<attributes> + */ + for (ii = 0; ii < HW_ID_MAX; ii++) { + struct ip_hw_id *ip_hw_id = NULL; + size_t ip_offset = _ip_offset; + + for (jj = 0; jj < num_ips; jj++) { + struct ip *ip; + struct ip_hw_instance *ip_hw_instance; + + ip = (struct ip *)(adev->mman.discovery_bin + ip_offset); + if (amdgpu_discovery_validate_ip(ip) || + le16_to_cpu(ip->hw_id) != ii) + goto next_ip; + + DRM_DEBUG("match:%d @ ip_offset:%zu", ii, ip_offset); + + /* We have a hw_id match; register the hw + * block if not yet registered. + */ + if (!ip_hw_id) { + ip_hw_id = kzalloc(sizeof(*ip_hw_id), GFP_KERNEL); + if (!ip_hw_id) + return -ENOMEM; + ip_hw_id->hw_id = ii; + + kobject_set_name(&ip_hw_id->hw_id_kset.kobj, "%d", ii); + ip_hw_id->hw_id_kset.kobj.kset = &ip_die_entry->ip_kset; + ip_hw_id->hw_id_kset.kobj.ktype = &ip_hw_id_ktype; + res = kset_register(&ip_hw_id->hw_id_kset); + if (res) { + DRM_ERROR("Couldn't register ip_hw_id kset"); + kfree(ip_hw_id); + return res; + } + if (hw_id_names[ii]) { + res = sysfs_create_link(&ip_die_entry->ip_kset.kobj, + &ip_hw_id->hw_id_kset.kobj, + hw_id_names[ii]); + if (res) { + DRM_ERROR("Couldn't create IP link %s in IP Die:%s\n", + hw_id_names[ii], + kobject_name(&ip_die_entry->ip_kset.kobj)); + } + } + } + + /* Now register its instance. + */ + ip_hw_instance = kzalloc(struct_size(ip_hw_instance, + base_addr, + ip->num_base_address), + GFP_KERNEL); + if (!ip_hw_instance) { + DRM_ERROR("no memory for ip_hw_instance"); + return -ENOMEM; + } + ip_hw_instance->hw_id = le16_to_cpu(ip->hw_id); /* == ii */ + ip_hw_instance->num_instance = ip->number_instance; + ip_hw_instance->major = ip->major; + ip_hw_instance->minor = ip->minor; + ip_hw_instance->revision = ip->revision; + ip_hw_instance->harvest = ip->harvest; + ip_hw_instance->num_base_addresses = ip->num_base_address; + + for (kk = 0; kk < ip_hw_instance->num_base_addresses; kk++) + ip_hw_instance->base_addr[kk] = ip->base_address[kk]; + + kobject_init(&ip_hw_instance->kobj, &ip_hw_instance_ktype); + ip_hw_instance->kobj.kset = &ip_hw_id->hw_id_kset; + res = kobject_add(&ip_hw_instance->kobj, NULL, + "%d", ip_hw_instance->num_instance); +next_ip: + ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1); + } + } + + return 0; +} + +static int amdgpu_discovery_sysfs_recurse(struct amdgpu_device *adev) +{ + struct binary_header *bhdr; + struct ip_discovery_header *ihdr; + struct die_header *dhdr; + struct kset *die_kset = &adev->ip_top->die_kset; + u16 num_dies, die_offset, num_ips; + size_t ip_offset; + int ii, res; + + bhdr = (struct binary_header *)adev->mman.discovery_bin; + ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + + le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset)); + num_dies = le16_to_cpu(ihdr->num_dies); + + DRM_DEBUG("number of dies: %d\n", num_dies); + + for (ii = 0; ii < num_dies; ii++) { + struct ip_die_entry *ip_die_entry; + + die_offset = le16_to_cpu(ihdr->die_info[ii].die_offset); + dhdr = (struct die_header *)(adev->mman.discovery_bin + die_offset); + num_ips = le16_to_cpu(dhdr->num_ips); + ip_offset = die_offset + sizeof(*dhdr); + + /* Add the die to the kset. + * + * dhdr->die_id == ii, which was checked in + * amdgpu_discovery_reg_base_init(). + */ + + ip_die_entry = kzalloc(sizeof(*ip_die_entry), GFP_KERNEL); + if (!ip_die_entry) + return -ENOMEM; + + ip_die_entry->num_ips = num_ips; + + kobject_set_name(&ip_die_entry->ip_kset.kobj, "%d", le16_to_cpu(dhdr->die_id)); + ip_die_entry->ip_kset.kobj.kset = die_kset; + ip_die_entry->ip_kset.kobj.ktype = &ip_die_entry_ktype; + res = kset_register(&ip_die_entry->ip_kset); + if (res) { + DRM_ERROR("Couldn't register ip_die_entry kset"); + kfree(ip_die_entry); + return res; + } + + amdgpu_discovery_sysfs_ips(adev, ip_die_entry, ip_offset, num_ips); + } + + return 0; +} + +static int amdgpu_discovery_sysfs_init(struct amdgpu_device *adev) +{ + struct kset *die_kset; + int res, ii; + + adev->ip_top = kzalloc(sizeof(*adev->ip_top), GFP_KERNEL); + if (!adev->ip_top) + return -ENOMEM; + + adev->ip_top->adev = adev; + + res = kobject_init_and_add(&adev->ip_top->kobj, &ip_discovery_ktype, + &adev->dev->kobj, "ip_discovery"); + if (res) { + DRM_ERROR("Couldn't init and add ip_discovery/"); + goto Err; + } + + die_kset = &adev->ip_top->die_kset; + kobject_set_name(&die_kset->kobj, "%s", "die"); + die_kset->kobj.parent = &adev->ip_top->kobj; + die_kset->kobj.ktype = &die_kobj_ktype; + res = kset_register(&adev->ip_top->die_kset); + if (res) { + DRM_ERROR("Couldn't register die_kset"); + goto Err; + } + + for (ii = 0; ii < ARRAY_SIZE(ip_hw_attr); ii++) + ip_hw_instance_attrs[ii] = &ip_hw_attr[ii].attr; + ip_hw_instance_attrs[ii] = NULL; + + res = amdgpu_discovery_sysfs_recurse(adev); + + return res; +Err: + kobject_put(&adev->ip_top->kobj); + return res; +} + +/* -------------------------------------------------- */ + +#define list_to_kobj(el) container_of(el, struct kobject, entry) + +static void amdgpu_discovery_sysfs_ip_hw_free(struct ip_hw_id *ip_hw_id) +{ + struct list_head *el, *tmp; + struct kset *hw_id_kset; + + hw_id_kset = &ip_hw_id->hw_id_kset; + spin_lock(&hw_id_kset->list_lock); + list_for_each_prev_safe(el, tmp, &hw_id_kset->list) { + list_del_init(el); + spin_unlock(&hw_id_kset->list_lock); + /* kobject is embedded in ip_hw_instance */ + kobject_put(list_to_kobj(el)); + spin_lock(&hw_id_kset->list_lock); + } + spin_unlock(&hw_id_kset->list_lock); + kobject_put(&ip_hw_id->hw_id_kset.kobj); +} + +static void amdgpu_discovery_sysfs_die_free(struct ip_die_entry *ip_die_entry) +{ + struct list_head *el, *tmp; + struct kset *ip_kset; + + ip_kset = &ip_die_entry->ip_kset; + spin_lock(&ip_kset->list_lock); + list_for_each_prev_safe(el, tmp, &ip_kset->list) { + list_del_init(el); + spin_unlock(&ip_kset->list_lock); + amdgpu_discovery_sysfs_ip_hw_free(to_ip_hw_id(list_to_kobj(el))); + spin_lock(&ip_kset->list_lock); + } + spin_unlock(&ip_kset->list_lock); + kobject_put(&ip_die_entry->ip_kset.kobj); +} + +static void amdgpu_discovery_sysfs_fini(struct amdgpu_device *adev) +{ + struct list_head *el, *tmp; + struct kset *die_kset; + + die_kset = &adev->ip_top->die_kset; + spin_lock(&die_kset->list_lock); + list_for_each_prev_safe(el, tmp, &die_kset->list) { + list_del_init(el); + spin_unlock(&die_kset->list_lock); + amdgpu_discovery_sysfs_die_free(to_ip_die_entry(list_to_kobj(el))); + spin_lock(&die_kset->list_lock); + } + spin_unlock(&die_kset->list_lock); + kobject_put(&adev->ip_top->die_kset.kobj); + kobject_put(&adev->ip_top->kobj); +} + +/* ================================================== */ + int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) { struct binary_header *bhdr; @@ -492,6 +986,8 @@ next_ip: } } + amdgpu_discovery_sysfs_init(adev); + return 0; } @@ -681,6 +1177,8 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): amdgpu_device_ip_block_add(adev, &nv_common_ip_block); break; default: @@ -717,6 +1215,8 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): amdgpu_device_ip_block_add(adev, &gmc_v10_0_ip_block); break; default: @@ -792,6 +1292,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 3): + case IP_VERSION(13, 0, 8): amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block); break; default: @@ -833,6 +1334,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 3): + case IP_VERSION(13, 0, 8): amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); break; default: @@ -869,6 +1371,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 0, 1): case IP_VERSION(3, 1, 2): case IP_VERSION(3, 1, 3): + case IP_VERSION(3, 1, 6): amdgpu_device_ip_block_add(adev, &dm_ip_block); break; default: @@ -918,7 +1421,9 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); break; default: @@ -955,6 +1460,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(5, 2, 5): case IP_VERSION(5, 2, 3): case IP_VERSION(5, 2, 1): + case IP_VERSION(5, 2, 7): amdgpu_device_ip_block_add(adev, &sdma_v5_2_ip_block); break; default: @@ -1054,6 +1560,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 3, 3): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block); break; default: @@ -1260,6 +1767,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 3, 3): adev->family = AMDGPU_FAMILY_YC; break; + case IP_VERSION(10, 3, 6): + adev->family = AMDGPU_FAMILY_GC_10_3_6; + break; + case IP_VERSION(10, 3, 7): + adev->family = AMDGPU_FAMILY_GC_10_3_7; + break; default: return -EINVAL; } @@ -1272,6 +1785,8 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(10, 1, 4): case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): adev->flags |= AMD_IS_APU; break; default: @@ -1306,6 +1821,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(7, 2, 0): case IP_VERSION(7, 2, 1): case IP_VERSION(7, 5, 0): + case IP_VERSION(7, 5, 1): adev->nbio.funcs = &nbio_v7_2_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_2_hdp_flush_reg; break; @@ -1388,6 +1904,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 11): case IP_VERSION(11, 5, 0): case IP_VERSION(13, 0, 1): + case IP_VERSION(13, 0, 9): adev->smuio.funcs = &smuio_v11_0_6_funcs; break; case IP_VERSION(13, 0, 2): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 5cdafdcfec59..a0b5cf9a41cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1923,6 +1923,7 @@ static const struct pci_device_id pciidlist[] = { /* CYAN_SKILLFISH */ {0x1002, 0x13FE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, + {0x1002, 0x143F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYAN_SKILLFISH|AMD_IS_APU}, /* BEIGE_GOBY */ {0x1002, 0x7420, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BEIGE_GOBY}, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 43004822ec6f..52912b6bcb20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -622,65 +622,33 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) return r; } -int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, void *ras_info) +int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; - struct ras_fs_if fs_info = { - .sysfs_name = "gfx_err_count", - }; - struct ras_ih_if ih_info = { - .cb = amdgpu_gfx_process_ras_data_cb, - }; - - if (!adev->gfx.ras_if) { - adev->gfx.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->gfx.ras_if) - return -ENOMEM; - adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX; - adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->gfx.ras_if->sub_block_index = 0; - } - fs_info.head = ih_info.head = *adev->gfx.ras_if; - r = amdgpu_ras_late_init(adev, adev->gfx.ras_if, - &fs_info, &ih_info); + r = amdgpu_ras_block_late_init(adev, ras_block); if (r) - goto free; + return r; - if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) { + if (amdgpu_ras_is_supported(adev, ras_block->block)) { if (!amdgpu_persistent_edc_harvesting_supported(adev)) amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); if (r) goto late_fini; - } else { - /* free gfx ras_if if ras is not supported */ - r = 0; - goto free; } return 0; late_fini: - amdgpu_ras_late_fini(adev, adev->gfx.ras_if, &ih_info); -free: - kfree(adev->gfx.ras_if); - adev->gfx.ras_if = NULL; + amdgpu_ras_block_late_fini(adev, ras_block); return r; } void amdgpu_gfx_ras_fini(struct amdgpu_device *adev) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && - adev->gfx.ras_if) { - struct ras_common_if *ras_if = adev->gfx.ras_if; - struct ras_ih_if ih_info = { - .head = *ras_if, - .cb = amdgpu_gfx_process_ras_data_cb, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } + adev->gfx.ras_if) + amdgpu_ras_block_late_fini(adev, adev->gfx.ras_if); } int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index f99eac544f6d..ccca0a85b982 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -386,7 +386,7 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value); -int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, void *ras_info); +int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index ec1203d49799..a71688d1c1b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -441,6 +441,7 @@ int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev) if (!adev->gmc.xgmi.connected_to_cpu) { adev->gmc.xgmi.ras = &xgmi_ras; amdgpu_ras_register_ras_block(adev, &adev->gmc.xgmi.ras->ras_block); + adev->gmc.xgmi.ras_if = &adev->gmc.xgmi.ras->ras_block.ras_comm; } return 0; @@ -448,50 +449,6 @@ int amdgpu_gmc_ras_early_init(struct amdgpu_device *adev) int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) { - int r; - - if (adev->umc.ras && adev->umc.ras->ras_block.ras_late_init) { - r = adev->umc.ras->ras_block.ras_late_init(adev, NULL); - if (r) - return r; - } - - if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_late_init) { - r = adev->mmhub.ras->ras_block.ras_late_init(adev, NULL); - if (r) - return r; - } - - if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) { - r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, NULL); - if (r) - return r; - } - - if (adev->hdp.ras && adev->hdp.ras->ras_block.ras_late_init) { - r = adev->hdp.ras->ras_block.ras_late_init(adev, NULL); - if (r) - return r; - } - - if (adev->mca.mp0.ras && adev->mca.mp0.ras->ras_block.ras_late_init) { - r = adev->mca.mp0.ras->ras_block.ras_late_init(adev, NULL); - if (r) - return r; - } - - if (adev->mca.mp1.ras && adev->mca.mp1.ras->ras_block.ras_late_init) { - r = adev->mca.mp1.ras->ras_block.ras_late_init(adev, NULL); - if (r) - return r; - } - - if (adev->mca.mpio.ras && adev->mca.mpio.ras->ras_block.ras_late_init) { - r = adev->mca.mpio.ras->ras_block.ras_late_init(adev, NULL); - if (r) - return r; - } - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c index 518966a26130..b7fbc114a175 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c @@ -24,45 +24,9 @@ #include "amdgpu.h" #include "amdgpu_ras.h" -int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, void *ras_info) -{ - int r; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - struct ras_fs_if fs_info = { - .sysfs_name = "hdp_err_count", - }; - - if (!adev->hdp.ras_if) { - adev->hdp.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->hdp.ras_if) - return -ENOMEM; - adev->hdp.ras_if->block = AMDGPU_RAS_BLOCK__HDP; - adev->hdp.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->hdp.ras_if->sub_block_index = 0; - } - ih_info.head = fs_info.head = *adev->hdp.ras_if; - r = amdgpu_ras_late_init(adev, adev->hdp.ras_if, - &fs_info, &ih_info); - if (r || !amdgpu_ras_is_supported(adev, adev->hdp.ras_if->block)) { - kfree(adev->hdp.ras_if); - adev->hdp.ras_if = NULL; - } - - return r; -} - void amdgpu_hdp_ras_fini(struct amdgpu_device *adev) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) && - adev->hdp.ras_if) { - struct ras_common_if *ras_if = adev->hdp.ras_if; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } + adev->hdp.ras_if) + amdgpu_ras_block_late_fini(adev, adev->hdp.ras_if); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h index 4af2c2a322e7..aabd59aa5213 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h @@ -43,6 +43,6 @@ struct amdgpu_hdp { struct amdgpu_hdp_ras *ras; }; -int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, void *ras_info); +int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); void amdgpu_hdp_ras_fini(struct amdgpu_device *adev); #endif /* __AMDGPU_HDP_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index bfc47bea23db..4870e093213d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -37,6 +37,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) struct amdgpu_task_info ti; struct amdgpu_device *adev = ring->adev; int idx; + int r; if (!drm_dev_enter(adev_to_drm(adev), &idx)) { DRM_INFO("%s - device unplugged skipping recovery on scheduler:%s", @@ -63,7 +64,10 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) ti.process_name, ti.tgid, ti.task_name, ti.pid); if (amdgpu_device_should_recover_gpu(ring->adev)) { - amdgpu_device_gpu_recover(ring->adev, job); + r = amdgpu_device_gpu_recover(ring->adev, job); + if (r) + DRM_ERROR("GPU Recovery Failed: %d\n", r); + } else { drm_sched_suspend_timeout(&ring->sched); if (amdgpu_sriov_vf(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 52a60c2316a2..e2607d9f5cf4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -71,50 +71,8 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev, amdgpu_mca_reset_error_count(adev, mc_status_addr); } -int amdgpu_mca_ras_late_init(struct amdgpu_device *adev, - struct amdgpu_mca_ras *mca_dev) -{ - char sysfs_name[32] = {0}; - int r; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - struct ras_fs_if fs_info= { - .sysfs_name = sysfs_name, - }; - - snprintf(sysfs_name, sizeof(sysfs_name), "%s_err_count", mca_dev->ras->ras_block.name); - - if (!mca_dev->ras_if) { - mca_dev->ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!mca_dev->ras_if) - return -ENOMEM; - mca_dev->ras_if->block = mca_dev->ras->ras_block.block; - mca_dev->ras_if->sub_block_index = mca_dev->ras->ras_block.sub_block_index; - mca_dev->ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - } - ih_info.head = fs_info.head = *mca_dev->ras_if; - r = amdgpu_ras_late_init(adev, mca_dev->ras_if, - &fs_info, &ih_info); - if (r || !amdgpu_ras_is_supported(adev, mca_dev->ras_if->block)) { - kfree(mca_dev->ras_if); - mca_dev->ras_if = NULL; - } - - return r; -} - void amdgpu_mca_ras_fini(struct amdgpu_device *adev, struct amdgpu_mca_ras *mca_dev) { - struct ras_ih_if ih_info = { - .cb = NULL, - }; - - if (!mca_dev->ras_if) - return; - - amdgpu_ras_late_fini(adev, mca_dev->ras_if, &ih_info); - kfree(mca_dev->ras_if); - mca_dev->ras_if = NULL; + amdgpu_ras_block_late_fini(adev, mca_dev->ras_if); }
\ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index be030c4031d2..15e1a1efeb4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -56,9 +56,6 @@ void amdgpu_mca_query_ras_error_count(struct amdgpu_device *adev, uint64_t mc_status_addr, void *ras_error_status); -int amdgpu_mca_ras_late_init(struct amdgpu_device *adev, - struct amdgpu_mca_ras *mca_dev); - void amdgpu_mca_ras_fini(struct amdgpu_device *adev, struct amdgpu_mca_ras *mca_dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c index f9b5472a75d7..42413813765a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c @@ -24,45 +24,9 @@ #include "amdgpu.h" #include "amdgpu_ras.h" -int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, void *ras_info) -{ - int r; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - struct ras_fs_if fs_info = { - .sysfs_name = "mmhub_err_count", - }; - - if (!adev->mmhub.ras_if) { - adev->mmhub.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->mmhub.ras_if) - return -ENOMEM; - adev->mmhub.ras_if->block = AMDGPU_RAS_BLOCK__MMHUB; - adev->mmhub.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->mmhub.ras_if->sub_block_index = 0; - } - ih_info.head = fs_info.head = *adev->mmhub.ras_if; - r = amdgpu_ras_late_init(adev, adev->mmhub.ras_if, - &fs_info, &ih_info); - if (r || !amdgpu_ras_is_supported(adev, adev->mmhub.ras_if->block)) { - kfree(adev->mmhub.ras_if); - adev->mmhub.ras_if = NULL; - } - - return r; -} - void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) && - adev->mmhub.ras_if) { - struct ras_common_if *ras_if = adev->mmhub.ras_if; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } + adev->mmhub.ras_if) + amdgpu_ras_block_late_fini(adev, adev->mmhub.ras_if); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 7deda9a3b81e..240b26d9a388 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -47,7 +47,6 @@ struct amdgpu_mmhub { struct amdgpu_mmhub_ras *ras; }; -int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, void *ras_info); void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index 6ace2e390e77..f09ad80f0772 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -22,61 +22,31 @@ #include "amdgpu.h" #include "amdgpu_ras.h" -int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, void *ras_info) +int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - struct ras_fs_if fs_info = { - .sysfs_name = "pcie_bif_err_count", - }; - - if (!adev->nbio.ras_if) { - adev->nbio.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->nbio.ras_if) - return -ENOMEM; - adev->nbio.ras_if->block = AMDGPU_RAS_BLOCK__PCIE_BIF; - adev->nbio.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->nbio.ras_if->sub_block_index = 0; - } - ih_info.head = fs_info.head = *adev->nbio.ras_if; - r = amdgpu_ras_late_init(adev, adev->nbio.ras_if, - &fs_info, &ih_info); + r = amdgpu_ras_block_late_init(adev, ras_block); if (r) - goto free; + return r; - if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { + if (amdgpu_ras_is_supported(adev, ras_block->block)) { r = amdgpu_irq_get(adev, &adev->nbio.ras_controller_irq, 0); if (r) goto late_fini; r = amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0); if (r) goto late_fini; - } else { - r = 0; - goto free; } return 0; late_fini: - amdgpu_ras_late_fini(adev, adev->nbio.ras_if, &ih_info); -free: - kfree(adev->nbio.ras_if); - adev->nbio.ras_if = NULL; + amdgpu_ras_block_late_fini(adev, ras_block); return r; } void amdgpu_nbio_ras_fini(struct amdgpu_device *adev) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) && - adev->nbio.ras_if) { - struct ras_common_if *ras_if = adev->nbio.ras_if; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } + adev->nbio.ras_if) + amdgpu_ras_block_late_fini(adev, adev->nbio.ras_if); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 4afb76d3cd97..f9546c7341b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -104,6 +104,6 @@ struct amdgpu_nbio { struct amdgpu_nbio_ras *ras; }; -int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, void *ras_info); +int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); void amdgpu_nbio_ras_fini(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 9bc9155cbf06..ecbdbe3ea4aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -133,12 +133,12 @@ static int psp_early_init(void *handle) break; case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 3): + case IP_VERSION(13, 0, 8): psp_v13_0_set_psp_funcs(psp); psp->autoload_supported = true; break; case IP_VERSION(11, 0, 8): - if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2 || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 4)) { + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) { psp_v11_0_8_set_psp_funcs(psp); psp->autoload_supported = false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a9c133a09be5..61f589b6fe5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -877,7 +877,7 @@ static int amdgpu_ras_block_match_default(struct amdgpu_ras_block_object *block_ if (!block_obj) return -EINVAL; - if (block_obj->block == block) + if (block_obj->ras_comm.block == block) return 0; return -EINVAL; @@ -1276,18 +1276,17 @@ static int amdgpu_ras_sysfs_remove_feature_node(struct amdgpu_device *adev) } int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, - struct ras_fs_if *head) + struct ras_common_if *head) { - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head->head); + struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); if (!obj || obj->attr_inuse) return -EINVAL; get_obj(obj); - memcpy(obj->fs_data.sysfs_name, - head->sysfs_name, - sizeof(obj->fs_data.sysfs_name)); + snprintf(obj->fs_data.sysfs_name, sizeof(obj->fs_data.sysfs_name), + "%s_err_count", head->name); obj->sysfs_attr = (struct device_attribute){ .attr = { @@ -1594,9 +1593,9 @@ int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, } int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, - struct ras_ih_if *info) + struct ras_common_if *head) { - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); struct ras_ih_data *data; if (!obj) @@ -1616,24 +1615,27 @@ int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, } int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev, - struct ras_ih_if *info) + struct ras_common_if *head) { - struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); + struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); struct ras_ih_data *data; + struct amdgpu_ras_block_object *ras_obj; if (!obj) { /* in case we registe the IH before enable ras feature */ - obj = amdgpu_ras_create_obj(adev, &info->head); + obj = amdgpu_ras_create_obj(adev, head); if (!obj) return -EINVAL; } else get_obj(obj); + ras_obj = container_of(head, struct amdgpu_ras_block_object, ras_comm); + data = &obj->ih_data; /* add the callback.etc */ *data = (struct ras_ih_data) { .inuse = 0, - .cb = info->cb, + .cb = ras_obj->ras_cb, .element_size = sizeof(struct amdgpu_iv_entry), .rptr = 0, .wptr = 0, @@ -1662,10 +1664,7 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev) struct ras_manager *obj, *tmp; list_for_each_entry_safe(obj, tmp, &con->head, node) { - struct ras_ih_if info = { - .head = obj->head, - }; - amdgpu_ras_interrupt_remove_handler(adev, &info); + amdgpu_ras_interrupt_remove_handler(adev, &obj->head); } return 0; @@ -2301,6 +2300,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev) if (!adev->gmc.xgmi.connected_to_cpu) { adev->nbio.ras = &nbio_v7_4_ras; amdgpu_ras_register_ras_block(adev, &adev->nbio.ras->ras_block); + adev->nbio.ras_if = &adev->nbio.ras->ras_block.ras_comm; } break; default: @@ -2397,11 +2397,10 @@ bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev) } /* helper function to handle common stuff in ip late init phase */ -int amdgpu_ras_late_init(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_fs_if *fs_info, - struct ras_ih_if *ih_info) +int amdgpu_ras_block_late_init(struct amdgpu_device *adev, + struct ras_common_if *ras_block) { + struct amdgpu_ras_block_object *ras_obj; struct amdgpu_ras *con = amdgpu_ras_get_context(adev); unsigned long ue_count, ce_count; int r; @@ -2429,15 +2428,16 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, if (adev->in_suspend || amdgpu_in_reset(adev)) return 0; - if (ih_info->cb) { - r = amdgpu_ras_interrupt_add_handler(adev, ih_info); + ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm); + if (ras_obj->ras_cb) { + r = amdgpu_ras_interrupt_add_handler(adev, ras_block); if (r) - goto interrupt; + goto cleanup; } - r = amdgpu_ras_sysfs_create(adev, fs_info); + r = amdgpu_ras_sysfs_create(adev, ras_block); if (r) - goto sysfs; + goto interrupt; /* Those are the cached values at init. */ @@ -2447,27 +2447,34 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev, } return 0; -cleanup: - amdgpu_ras_sysfs_remove(adev, ras_block); -sysfs: - if (ih_info->cb) - amdgpu_ras_interrupt_remove_handler(adev, ih_info); + interrupt: + if (ras_obj->ras_cb) + amdgpu_ras_interrupt_remove_handler(adev, ras_block); +cleanup: amdgpu_ras_feature_enable(adev, ras_block, 0); return r; } +int amdgpu_ras_block_late_init_default(struct amdgpu_device *adev, + struct ras_common_if *ras_block) +{ + return amdgpu_ras_block_late_init(adev, ras_block); +} + /* helper function to remove ras fs node and interrupt handler */ -void amdgpu_ras_late_fini(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_ih_if *ih_info) +void amdgpu_ras_block_late_fini(struct amdgpu_device *adev, + struct ras_common_if *ras_block) { - if (!ras_block || !ih_info) + struct amdgpu_ras_block_object *ras_obj; + if (!ras_block) return; amdgpu_ras_sysfs_remove(adev, ras_block); - if (ih_info->cb) - amdgpu_ras_interrupt_remove_handler(adev, ih_info); + + ras_obj = container_of(ras_block, struct amdgpu_ras_block_object, ras_comm); + if (ras_obj->ras_cb) + amdgpu_ras_interrupt_remove_handler(adev, ras_block); } /* do some init work after IP late init as dependence. @@ -2520,6 +2527,33 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev) amdgpu_ras_disable_all_features(adev, 1); } +int amdgpu_ras_late_init(struct amdgpu_device *adev) +{ + struct amdgpu_ras_block_list *node, *tmp; + struct amdgpu_ras_block_object *obj; + int r; + + list_for_each_entry_safe(node, tmp, &adev->ras_list, node) { + if (!node->ras_obj) { + dev_warn(adev->dev, "Warning: abnormal ras list node.\n"); + continue; + } + + obj = node->ras_obj; + if (obj->ras_late_init) { + r = obj->ras_late_init(adev, &obj->ras_comm); + if (r) { + dev_err(adev->dev, "%s failed to execute ras_late_init! ret:%d\n", + obj->ras_comm.name, r); + return r; + } + } else + amdgpu_ras_block_late_init_default(adev, &obj->ras_comm); + } + + return 0; +} + /* do some fini work before IP fini as dependence */ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index a55743b12d57..143a83043d7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -486,17 +486,13 @@ struct ras_debug_if { }; struct amdgpu_ras_block_object { - /* block name */ - char name[32]; - - enum amdgpu_ras_block block; - - uint32_t sub_block_index; + struct ras_common_if ras_comm; int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index); - int (*ras_late_init)(struct amdgpu_device *adev, void *ras_info); + int (*ras_late_init)(struct amdgpu_device *adev, struct ras_common_if *ras_block); void (*ras_fini)(struct amdgpu_device *adev); + ras_ih_cb ras_cb; const struct amdgpu_ras_block_hw_ops *hw_ops; }; @@ -599,15 +595,15 @@ amdgpu_ras_error_to_ta(enum amdgpu_ras_error_type error) { /* called in ip_init and ip_fini */ int amdgpu_ras_init(struct amdgpu_device *adev); +int amdgpu_ras_late_init(struct amdgpu_device *adev); int amdgpu_ras_fini(struct amdgpu_device *adev); int amdgpu_ras_pre_fini(struct amdgpu_device *adev); -int amdgpu_ras_late_init(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_fs_if *fs_info, - struct ras_ih_if *ih_info); -void amdgpu_ras_late_fini(struct amdgpu_device *adev, - struct ras_common_if *ras_block, - struct ras_ih_if *ih_info); + +int amdgpu_ras_block_late_init(struct amdgpu_device *adev, + struct ras_common_if *ras_block); + +void amdgpu_ras_block_late_fini(struct amdgpu_device *adev, + struct ras_common_if *ras_block); int amdgpu_ras_feature_enable(struct amdgpu_device *adev, struct ras_common_if *head, bool enable); @@ -616,7 +612,7 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, struct ras_common_if *head, bool enable); int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, - struct ras_fs_if *head); + struct ras_common_if *head); int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, struct ras_common_if *head); @@ -633,10 +629,10 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, struct ras_inject_if *info); int amdgpu_ras_interrupt_add_handler(struct amdgpu_device *adev, - struct ras_ih_if *info); + struct ras_common_if *head); int amdgpu_ras_interrupt_remove_handler(struct amdgpu_device *adev, - struct ras_ih_if *info); + struct ras_common_if *head); int amdgpu_ras_interrupt_dispatch(struct amdgpu_device *adev, struct ras_dispatch_if *info); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 65debb65a5df..3b5c43575aa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -87,71 +87,35 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, } int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, - void *ras_ih_info) + struct ras_common_if *ras_block) { int r, i; - struct ras_ih_if *ih_info = (struct ras_ih_if *)ras_ih_info; - struct ras_fs_if fs_info = { - .sysfs_name = "sdma_err_count", - }; - - if (!ih_info) - return -EINVAL; - if (!adev->sdma.ras_if) { - adev->sdma.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->sdma.ras_if) - return -ENOMEM; - adev->sdma.ras_if->block = AMDGPU_RAS_BLOCK__SDMA; - adev->sdma.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->sdma.ras_if->sub_block_index = 0; - } - fs_info.head = ih_info->head = *adev->sdma.ras_if; - - r = amdgpu_ras_late_init(adev, adev->sdma.ras_if, - &fs_info, ih_info); + r = amdgpu_ras_block_late_init(adev, ras_block); if (r) - goto free; + return r; - if (amdgpu_ras_is_supported(adev, adev->sdma.ras_if->block)) { + if (amdgpu_ras_is_supported(adev, ras_block->block)) { for (i = 0; i < adev->sdma.num_instances; i++) { r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0 + i); if (r) goto late_fini; } - } else { - r = 0; - goto free; } return 0; late_fini: - amdgpu_ras_late_fini(adev, adev->sdma.ras_if, ih_info); -free: - kfree(adev->sdma.ras_if); - adev->sdma.ras_if = NULL; + amdgpu_ras_block_late_fini(adev, ras_block); return r; } void amdgpu_sdma_ras_fini(struct amdgpu_device *adev) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) && - adev->sdma.ras_if) { - struct ras_common_if *ras_if = adev->sdma.ras_if; - struct ras_ih_if ih_info = { - .head = *ras_if, - /* the cb member will not be used by - * amdgpu_ras_interrupt_remove_handler, init it only - * to cheat the check in ras_late_fini - */ - .cb = amdgpu_sdma_process_ras_data_cb, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } + adev->sdma.ras_if) + amdgpu_ras_block_late_fini(adev, adev->sdma.ras_if); } int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index eaee12ab6518..8b226ffee32c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -117,7 +117,7 @@ amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring); int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index); uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid); int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, - void *ras_ih_info); + struct ras_common_if *ras_block); void amdgpu_sdma_ras_fini(struct amdgpu_device *adev); int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index ff7805beda38..9400260e3263 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -129,46 +129,25 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, return ret; } -static int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, +int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, void *ras_error_status, struct amdgpu_iv_entry *entry) { return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true); } -int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info) +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; - struct ras_fs_if fs_info = { - .sysfs_name = "umc_err_count", - }; - struct ras_ih_if ih_info = { - .cb = amdgpu_umc_process_ras_data_cb, - }; - if (!adev->umc.ras_if) { - adev->umc.ras_if = - kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->umc.ras_if) - return -ENOMEM; - adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC; - adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->umc.ras_if->sub_block_index = 0; - } - ih_info.head = fs_info.head = *adev->umc.ras_if; - - r = amdgpu_ras_late_init(adev, adev->umc.ras_if, - &fs_info, &ih_info); + r = amdgpu_ras_block_late_init(adev, ras_block); if (r) - goto free; + return r; - if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) { + if (amdgpu_ras_is_supported(adev, ras_block->block)) { r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); if (r) goto late_fini; - } else { - r = 0; - goto free; } /* ras init of specific umc version */ @@ -179,26 +158,15 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info) return 0; late_fini: - amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info); -free: - kfree(adev->umc.ras_if); - adev->umc.ras_if = NULL; + amdgpu_ras_block_late_fini(adev, ras_block); return r; } void amdgpu_umc_ras_fini(struct amdgpu_device *adev) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && - adev->umc.ras_if) { - struct ras_common_if *ras_if = adev->umc.ras_if; - struct ras_ih_if ih_info = { - .head = *ras_if, - .cb = amdgpu_umc_process_ras_data_cb, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } + adev->umc.ras_if) + amdgpu_ras_block_late_fini(adev, adev->umc.ras_if); } int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 4db0526d0be4..e4b3678a6685 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -72,7 +72,7 @@ struct amdgpu_umc { struct amdgpu_umc_ras *ras; }; -int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info); +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); void amdgpu_umc_ras_fini(struct amdgpu_device *adev); int amdgpu_umc_poison_handler(struct amdgpu_device *adev, void *ras_error_status, @@ -85,4 +85,8 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, uint64_t retired_page, uint32_t channel_index, uint32_t umc_inst); + +int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, + void *ras_error_status, + struct amdgpu_iv_entry *entry); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index e1288901beb6..5656bf7d9267 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -836,7 +836,7 @@ static bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev, /* only in new version, AMDGPU_REGS_NO_KIQ and * AMDGPU_REGS_RLC are enabled simultaneously */ } else if ((acc_flags & AMDGPU_REGS_RLC) && - !(acc_flags & AMDGPU_REGS_NO_KIQ)) { + !(acc_flags & AMDGPU_REGS_NO_KIQ) && write) { *rlcg_flag = AMDGPU_RLCG_GC_WRITE_LEGACY; ret = true; } @@ -902,7 +902,7 @@ static u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v for (i = 0; i < timeout; i++) { tmp = readl(scratch_reg1); - if (!(tmp & flag)) + if (!(tmp & AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK)) break; udelay(10); } @@ -940,7 +940,7 @@ void amdgpu_sriov_wreg(struct amdgpu_device *adev, u32 rlcg_flag; if (!amdgpu_sriov_runtime(adev) && - amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, true, &rlcg_flag)) { + amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, true, &rlcg_flag)) { amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag); return; } @@ -957,7 +957,7 @@ u32 amdgpu_sriov_rreg(struct amdgpu_device *adev, u32 rlcg_flag; if (!amdgpu_sriov_runtime(adev) && - amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, false, &rlcg_flag)) + amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, false, &rlcg_flag)) return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag); if (acc_flags & AMDGPU_REGS_NO_KIQ) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 645093610aa0..239f232f9c02 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -43,6 +43,8 @@ #define AMDGPU_RLCG_WRONG_OPERATION_TYPE 0x2000000 #define AMDGPU_RLCG_REG_NOT_IN_RANGE 0x1000000 +#define AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK 0xFFFFF + /* all asic after AI use this offset */ #define mmRCC_IOV_FUNC_IDENTIFIER 0xDE5 /* tonga/fiji use this offset */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 5929d6f528c9..77b65434ccc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -732,53 +732,22 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) return psp_xgmi_terminate(&adev->psp); } -static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, void *ras_info) +static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { - int r; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - struct ras_fs_if fs_info = { - .sysfs_name = "xgmi_wafl_err_count", - }; - if (!adev->gmc.xgmi.supported || adev->gmc.xgmi.num_physical_nodes == 0) return 0; adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev); - if (!adev->gmc.xgmi.ras_if) { - adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->gmc.xgmi.ras_if) - return -ENOMEM; - adev->gmc.xgmi.ras_if->block = AMDGPU_RAS_BLOCK__XGMI_WAFL; - adev->gmc.xgmi.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->gmc.xgmi.ras_if->sub_block_index = 0; - } - ih_info.head = fs_info.head = *adev->gmc.xgmi.ras_if; - r = amdgpu_ras_late_init(adev, adev->gmc.xgmi.ras_if, - &fs_info, &ih_info); - if (r || !amdgpu_ras_is_supported(adev, adev->gmc.xgmi.ras_if->block)) { - kfree(adev->gmc.xgmi.ras_if); - adev->gmc.xgmi.ras_if = NULL; - } - - return r; + return amdgpu_ras_block_late_init(adev, ras_block); } static void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) && - adev->gmc.xgmi.ras_if) { - struct ras_common_if *ras_if = adev->gmc.xgmi.ras_if; - struct ras_ih_if ih_info = { - .cb = NULL, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } + adev->gmc.xgmi.ras_if) + amdgpu_ras_block_late_fini(adev, adev->gmc.xgmi.ras_if); } uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, @@ -981,8 +950,11 @@ struct amdgpu_ras_block_hw_ops xgmi_ras_hw_ops = { struct amdgpu_xgmi_ras xgmi_ras = { .ras_block = { - .name = "xgmi", - .block = AMDGPU_RAS_BLOCK__XGMI_WAFL, + .ras_comm = { + .name = "xgmi_wafl", + .block = AMDGPU_RAS_BLOCK__XGMI_WAFL, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + }, .hw_ops = &xgmi_ras_hw_ops, .ras_late_init = amdgpu_xgmi_ras_late_init, .ras_fini = amdgpu_xgmi_ras_fini, diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index f10ce740a29c..de6d10390ab2 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1719,7 +1719,7 @@ static void cik_program_aspm(struct amdgpu_device *adev) bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false; bool disable_clkreq = false; - if (amdgpu_aspm == 0) + if (!amdgpu_device_should_use_aspm(adev)) return; if (pci_is_root_bus(adev->pdev->bus)) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 8fb4528c741f..90158289cd30 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -106,6 +106,12 @@ #define mmGOLDEN_TSC_COUNT_UPPER_Vangogh_BASE_IDX 1 #define mmGOLDEN_TSC_COUNT_LOWER_Vangogh 0x0026 #define mmGOLDEN_TSC_COUNT_LOWER_Vangogh_BASE_IDX 1 + +#define mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6 0x002d +#define mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6_BASE_IDX 1 +#define mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6 0x002e +#define mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6_BASE_IDX 1 + #define mmSPI_CONFIG_CNTL_1_Vangogh 0x2441 #define mmSPI_CONFIG_CNTL_1_Vangogh_BASE_IDX 1 #define mmVGT_TF_MEMORY_BASE_HI_Vangogh 0x2261 @@ -258,6 +264,20 @@ MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec.bin"); MODULE_FIRMWARE("amdgpu/cyan_skillfish2_mec2.bin"); MODULE_FIRMWARE("amdgpu/cyan_skillfish2_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_ce.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_mec2.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_6_rlc.bin"); + +MODULE_FIRMWARE("amdgpu/gc_10_3_7_ce.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_7_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_7_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_7_mec2.bin"); +MODULE_FIRMWARE("amdgpu/gc_10_3_7_rlc.bin"); + static const struct soc15_reg_golden golden_settings_gc_10_1[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_4, 0xffffffff, 0x00400014), @@ -3408,6 +3428,57 @@ static const struct soc15_reg_golden golden_settings_gc_10_0_cyan_skillfish[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x00800000, 0x00800000) }; +static const struct soc15_reg_golden golden_settings_gc_10_3_6[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000042), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x00000044), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xfffffff3), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf17fffff, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0xffffff7f, 0x00010020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000) +}; + +static const struct soc15_reg_golden golden_settings_gc_10_3_7[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCH_PIPE_STEER, 0x000000ff, 0x000000e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c200), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0x0c1807ff, 0x00000041), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL1_PIPE_STEER, 0x000000ff, 0x000000e4), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_0, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2_PIPE_STEER_1, 0x77777777, 0x32103210), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffff), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CM_CTRL1, 0xff8fff0f, 0x580f1008), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xf7ffffff, 0x00f80988), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x000001ff, 0x00000020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_CL_ENHANCE, 0xf000003f, 0x01200007), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0xffffffbf, 0x00000820), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQG_CONFIG, 0x000017ff, 0x00001000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmSX_DEBUG_1, 0xffffff7f, 0x00010020), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00100000) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -3646,6 +3717,16 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_10_0_cyan_skillfish, (const u32)ARRAY_SIZE(golden_settings_gc_10_0_cyan_skillfish)); break; + case IP_VERSION(10, 3, 6): + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_6, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_6)); + break; + case IP_VERSION(10, 3, 7): + soc15_program_register_sequence(adev, + golden_settings_gc_10_3_7, + (const u32)ARRAY_SIZE(golden_settings_gc_10_3_7)); + break; default: break; } @@ -3834,7 +3915,9 @@ static void gfx_v10_0_check_fw_write_wait(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfx.cp_fw_write_wait = true; break; default: @@ -3955,14 +4038,18 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) case IP_VERSION(10, 3, 3): chip_name = "yellow_carp"; break; + case IP_VERSION(10, 3, 6): + chip_name = "gc_10_3_6"; + break; case IP_VERSION(10, 1, 3): + case IP_VERSION(10, 1, 4): if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) chip_name = "cyan_skillfish2"; else chip_name = "cyan_skillfish"; break; - case IP_VERSION(10, 1, 4): - chip_name = "cyan_skillfish2"; + case IP_VERSION(10, 3, 7): + chip_name = "gc_10_3_7"; break; default: BUG(); @@ -4559,7 +4646,9 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -4696,7 +4785,9 @@ static int gfx_v10_0_sw_init(void *handle) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -4934,7 +5025,8 @@ static void gfx_v10_0_setup_rb(struct amdgpu_device *adev) for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { bitmap = i * adev->gfx.config.max_sh_per_se + j; if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) || - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3))) && + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) || + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6))) && ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1)) continue; gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff); @@ -6208,7 +6300,9 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, DOORBELL_RANGE_LOWER_Sienna_Cichlid, ring->doorbell_index); WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp); @@ -6345,7 +6439,9 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, 0); break; default: @@ -6359,7 +6455,9 @@ static void gfx_v10_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): WREG32_SOC15(GC, 0, mmCP_MEC_CNTL_Sienna_Cichlid, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); @@ -6457,6 +6555,7 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); tmp &= 0xffffff00; @@ -7187,6 +7286,8 @@ static bool gfx_v10_0_check_grbm_cam_remapping(struct amdgpu_device *adev) break; case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 6): + case IP_VERSION(10, 3, 7): return true; default: data = RREG32_SOC15(GC, 0, mmVGT_ESGS_RING_SIZE); @@ -7221,7 +7322,9 @@ static void gfx_v10_0_setup_grbm_cam_remapping(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): /* mmVGT_TF_RING_SIZE_UMD -> mmVGT_TF_RING_SIZE */ data = (SOC15_REG_OFFSET(GC, 0, mmVGT_TF_RING_SIZE_UMD) << GRBM_CAM_DATA__CAM_ADDR__SHIFT) | @@ -7541,6 +7644,7 @@ static int gfx_v10_0_soft_reset(void *handle) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY_Sienna_Cichlid)) grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, @@ -7608,6 +7712,21 @@ static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev) preempt_enable(); clock = clock_lo | (clock_hi << 32ULL); break; + case IP_VERSION(10, 3, 6): + preempt_disable(); + clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6); + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6); + hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_GC_10_3_6); + /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over + * roughly every 42 seconds. + */ + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_GC_10_3_6); + clock_hi = hi_check; + } + preempt_enable(); + clock = clock_lo | (clock_hi << 32ULL); + break; default: preempt_disable(); clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER); @@ -7673,7 +7792,9 @@ static int gfx_v10_0_early_init(void *handle) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfx.num_gfx_rings = GFX10_NUM_GFX_RINGS_Sienna_Cichlid; break; default: @@ -7734,6 +7855,7 @@ static void gfx_v10_0_set_safe_mode(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); @@ -7770,6 +7892,7 @@ static void gfx_v10_0_unset_safe_mode(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE_Sienna_Cichlid, data); break; @@ -8224,6 +8347,7 @@ static void gfx_v10_cntl_power_gating(struct amdgpu_device *adev, bool enable) switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 6): data = 0x4E20 & RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK_Vangogh; WREG32_SOC15(GC, 0, mmRLC_PG_DELAY_3, data); break; @@ -8292,6 +8416,7 @@ static int gfx_v10_0_set_powergating_state(void *handle, break; case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 6): gfx_v10_cntl_pg(adev, enable); amdgpu_gfx_off_ctrl(adev, enable); break; @@ -8318,6 +8443,7 @@ static int gfx_v10_0_set_clockgating_state(void *handle, case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): gfx_v10_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); @@ -9431,7 +9557,9 @@ static void gfx_v10_0_set_rlc_funcs(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfx.rlc.funcs = &gfx_v10_0_rlc_funcs; break; case IP_VERSION(10, 1, 2): @@ -9524,7 +9652,9 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { bitmap = i * adev->gfx.config.max_sh_per_se + j; if (((adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 0)) || - (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3))) && + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 3)) || + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6)) || + (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 7))) && ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1)) continue; mask = 1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 744253be5142..1997f129db9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2195,8 +2195,10 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) return err; } - strcpy(adev->gfx.ras->ras_block.name,"gfx"); - adev->gfx.ras->ras_block.block = AMDGPU_RAS_BLOCK__GFX; + strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx"); + adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX; + adev->gfx.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm; /* If not define special ras_late_init function, use gfx default ras_late_init */ if (!adev->gfx.ras->ras_block.ras_late_init) @@ -2205,6 +2207,10 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) /* If not define special ras_fini function, use gfx default ras_fini */ if (!adev->gfx.ras->ras_block.ras_fini) adev->gfx.ras->ras_block.ras_fini = amdgpu_gfx_ras_fini; + + /* If not defined special ras_cb function, use default ras_cb */ + if (!adev->gfx.ras->ras_block.ras_cb) + adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb; } adev->gfx.config.gb_addr_config = gb_addr_config; @@ -4785,12 +4791,6 @@ static int gfx_v9_0_ecc_late_init(void *handle) if (r) return r; - if (adev->gfx.ras && adev->gfx.ras->ras_block.ras_late_init) { - r = adev->gfx.ras->ras_block.ras_late_init(adev, NULL); - if (r) - return r; - } - if (adev->gfx.ras && adev->gfx.ras->enable_watchdog_timer) adev->gfx.ras->enable_watchdog_timer(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index c64e3a391c99..e7add2020d48 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -672,8 +672,10 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) if (adev->umc.ras) { amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); - strcpy(adev->umc.ras->ras_block.name, "umc"); - adev->umc.ras->ras_block.block = AMDGPU_RAS_BLOCK__UMC; + strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); + adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; /* If don't define special ras_late_init function, use default ras_late_init */ if (!adev->umc.ras->ras_block.ras_late_init) @@ -682,6 +684,10 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) /* If don't define special ras_fini function, use default ras_fini */ if (!adev->umc.ras->ras_block.ras_fini) adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini; + + /* If not defined special ras_cb function, use default ras_cb */ + if (!adev->umc.ras->ras_block.ras_cb) + adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; } } @@ -691,6 +697,7 @@ static void gmc_v10_0_set_mmhub_funcs(struct amdgpu_device *adev) switch (adev->ip_versions[MMHUB_HWIP][0]) { case IP_VERSION(2, 3, 0): case IP_VERSION(2, 4, 0): + case IP_VERSION(2, 4, 1): adev->mmhub.funcs = &mmhub_v2_3_funcs; break; default: @@ -707,7 +714,9 @@ static void gmc_v10_0_set_gfxhub_funcs(struct amdgpu_device *adev) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->gfxhub.funcs = &gfxhub_v2_1_funcs; break; default: @@ -887,7 +896,9 @@ static int gmc_v10_0_sw_init(void *handle) case IP_VERSION(10, 3, 1): case IP_VERSION(10, 3, 4): case IP_VERSION(10, 3, 5): + case IP_VERSION(10, 3, 6): case IP_VERSION(10, 3, 3): + case IP_VERSION(10, 3, 7): adev->num_vmhubs = 2; /* * To fulfill 4-level page support, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 4595027a8c63..412e44af1608 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1232,8 +1232,10 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) if (adev->umc.ras) { amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); - strcpy(adev->umc.ras->ras_block.name, "umc"); - adev->umc.ras->ras_block.block = AMDGPU_RAS_BLOCK__UMC; + strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); + adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; /* If don't define special ras_late_init function, use default ras_late_init */ if (!adev->umc.ras->ras_block.ras_late_init) @@ -1242,6 +1244,10 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) /* If don't define special ras_fini function, use default ras_fini */ if (!adev->umc.ras->ras_block.ras_fini) adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini; + + /* If not defined special ras_cb function, use default ras_cb */ + if (!adev->umc.ras->ras_block.ras_cb) + adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; } } @@ -1280,12 +1286,10 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) if (adev->mmhub.ras) { amdgpu_ras_register_ras_block(adev, &adev->mmhub.ras->ras_block); - strcpy(adev->mmhub.ras->ras_block.name,"mmhub"); - adev->mmhub.ras->ras_block.block = AMDGPU_RAS_BLOCK__MMHUB; - - /* If don't define special ras_late_init function, use default ras_late_init */ - if (!adev->mmhub.ras->ras_block.ras_late_init) - adev->mmhub.ras->ras_block.ras_late_init = amdgpu_mmhub_ras_late_init; + strcpy(adev->mmhub.ras->ras_block.ras_comm.name, "mmhub"); + adev->mmhub.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__MMHUB; + adev->mmhub.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->mmhub.ras_if = &adev->mmhub.ras->ras_block.ras_comm; /* If don't define special ras_fini function, use default ras_fini */ if (!adev->mmhub.ras->ras_block.ras_fini) @@ -1302,6 +1306,7 @@ static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev) { adev->hdp.ras = &hdp_v4_0_ras; amdgpu_ras_register_ras_block(adev, &adev->hdp.ras->ras_block); + adev->hdp.ras_if = &adev->hdp.ras->ras_block.ras_comm; } static void gmc_v9_0_set_mca_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 6b41fcbf4875..d7811e0327cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -157,10 +157,12 @@ struct amdgpu_ras_block_hw_ops hdp_v4_0_ras_hw_ops = { struct amdgpu_hdp_ras hdp_v4_0_ras = { .ras_block = { - .name = "hdp", - .block = AMDGPU_RAS_BLOCK__HDP, + .ras_comm = { + .name = "hdp", + .block = AMDGPU_RAS_BLOCK__HDP, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + }, .hw_ops = &hdp_v4_0_ras_hw_ops, - .ras_late_init = amdgpu_hdp_ras_late_init, .ras_fini = amdgpu_hdp_ras_fini, }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c index 68565262af9c..b4b36899f5c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c @@ -37,11 +37,6 @@ static void mca_v3_0_mp0_query_ras_error_count(struct amdgpu_device *adev, ras_error_status); } -static int mca_v3_0_mp0_ras_late_init(struct amdgpu_device *adev, void *ras_info) -{ - return amdgpu_mca_ras_late_init(adev, &adev->mca.mp0); -} - static void mca_v3_0_mp0_ras_fini(struct amdgpu_device *adev) { amdgpu_mca_ras_fini(adev, &adev->mca.mp0); @@ -53,8 +48,8 @@ static int mca_v3_0_ras_block_match(struct amdgpu_ras_block_object *block_obj, if (!block_obj) return -EINVAL; - if ((block_obj->block == block) && - (block_obj->sub_block_index == sub_block_index)) { + if ((block_obj->ras_comm.block == block) && + (block_obj->ras_comm.sub_block_index == sub_block_index)) { return 0; } @@ -68,12 +63,14 @@ const struct amdgpu_ras_block_hw_ops mca_v3_0_mp0_hw_ops = { struct amdgpu_mca_ras_block mca_v3_0_mp0_ras = { .ras_block = { - .block = AMDGPU_RAS_BLOCK__MCA, - .sub_block_index = AMDGPU_RAS_MCA_BLOCK__MP0, - .name = "mp0", + .ras_comm = { + .block = AMDGPU_RAS_BLOCK__MCA, + .sub_block_index = AMDGPU_RAS_MCA_BLOCK__MP0, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + .name = "mp0", + }, .hw_ops = &mca_v3_0_mp0_hw_ops, .ras_block_match = mca_v3_0_ras_block_match, - .ras_late_init = mca_v3_0_mp0_ras_late_init, .ras_fini = mca_v3_0_mp0_ras_fini, }, }; @@ -86,11 +83,6 @@ static void mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev, ras_error_status); } -static int mca_v3_0_mp1_ras_late_init(struct amdgpu_device *adev, void *ras_info) -{ - return amdgpu_mca_ras_late_init(adev, &adev->mca.mp1); -} - static void mca_v3_0_mp1_ras_fini(struct amdgpu_device *adev) { amdgpu_mca_ras_fini(adev, &adev->mca.mp1); @@ -103,12 +95,14 @@ const struct amdgpu_ras_block_hw_ops mca_v3_0_mp1_hw_ops = { struct amdgpu_mca_ras_block mca_v3_0_mp1_ras = { .ras_block = { - .block = AMDGPU_RAS_BLOCK__MCA, - .sub_block_index = AMDGPU_RAS_MCA_BLOCK__MP1, - .name = "mp1", + .ras_comm = { + .block = AMDGPU_RAS_BLOCK__MCA, + .sub_block_index = AMDGPU_RAS_MCA_BLOCK__MP1, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + .name = "mp1", + }, .hw_ops = &mca_v3_0_mp1_hw_ops, .ras_block_match = mca_v3_0_ras_block_match, - .ras_late_init = mca_v3_0_mp1_ras_late_init, .ras_fini = mca_v3_0_mp1_ras_fini, }, }; @@ -121,11 +115,6 @@ static void mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev, ras_error_status); } -static int mca_v3_0_mpio_ras_late_init(struct amdgpu_device *adev, void *ras_info) -{ - return amdgpu_mca_ras_late_init(adev, &adev->mca.mpio); -} - static void mca_v3_0_mpio_ras_fini(struct amdgpu_device *adev) { amdgpu_mca_ras_fini(adev, &adev->mca.mpio); @@ -138,12 +127,14 @@ const struct amdgpu_ras_block_hw_ops mca_v3_0_mpio_hw_ops = { struct amdgpu_mca_ras_block mca_v3_0_mpio_ras = { .ras_block = { - .block = AMDGPU_RAS_BLOCK__MCA, - .sub_block_index = AMDGPU_RAS_MCA_BLOCK__MPIO, - .name = "mpio", + .ras_comm = { + .block = AMDGPU_RAS_BLOCK__MCA, + .sub_block_index = AMDGPU_RAS_MCA_BLOCK__MPIO, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + .name = "mpio", + }, .hw_ops = &mca_v3_0_mpio_hw_ops, .ras_block_match = mca_v3_0_ras_block_match, - .ras_late_init = mca_v3_0_mpio_ras_late_init, .ras_fini = mca_v3_0_mpio_ras_fini, }, }; @@ -159,6 +150,9 @@ static void mca_v3_0_init(struct amdgpu_device *adev) amdgpu_ras_register_ras_block(adev, &mca->mp0.ras->ras_block); amdgpu_ras_register_ras_block(adev, &mca->mp1.ras->ras_block); amdgpu_ras_register_ras_block(adev, &mca->mpio.ras->ras_block); + mca->mp0.ras_if = &mca->mp0.ras->ras_block.ras_comm; + mca->mp1.ras_if = &mca->mp1.ras->ras_block.ras_comm; + mca->mpio.ras_if = &mca->mpio.ras->ras_block.ras_comm; } const struct amdgpu_mca_funcs mca_v3_0_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 9e16da28505a..1957fb098c4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -93,6 +93,7 @@ mmhub_v2_3_print_l2_protection_fault_status(struct amdgpu_device *adev, switch (adev->ip_versions[MMHUB_HWIP][0]) { case IP_VERSION(2, 3, 0): case IP_VERSION(2, 4, 0): + case IP_VERSION(2, 4, 1): mmhub_cid = mmhub_client_ids_vangogh[cid][rw]; break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 39974b449341..14768570c298 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -664,8 +664,11 @@ const struct amdgpu_ras_block_hw_ops nbio_v7_4_ras_hw_ops = { struct amdgpu_nbio_ras nbio_v7_4_ras = { .ras_block = { - .name = "pcie_bif", - .block = AMDGPU_RAS_BLOCK__PCIE_BIF, + .ras_comm = { + .name = "pcie_bif", + .block = AMDGPU_RAS_BLOCK__PCIE_BIF, + .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE, + }, .hw_ops = &nbio_v7_4_ras_hw_ops, .ras_late_init = amdgpu_nbio_ras_late_init, .ras_fini = amdgpu_nbio_ras_fini, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 5e9ab31fee6b..681180c4bbe9 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -522,7 +522,7 @@ static void nv_pcie_gen3_enable(struct amdgpu_device *adev) static void nv_program_aspm(struct amdgpu_device *adev) { - if (!amdgpu_aspm) + if (!amdgpu_device_should_use_aspm(adev)) return; if (!(adev->flags & AMD_IS_APU) && @@ -637,7 +637,8 @@ static int nv_update_umd_stable_pstate(struct amdgpu_device *adev, adev->gfx.funcs->update_perfmon_mgcg(adev, !enter); if (!(adev->flags & AMD_IS_APU) && - (adev->nbio.funcs->enable_aspm)) + (adev->nbio.funcs->enable_aspm) && + amdgpu_device_should_use_aspm(adev)) adev->nbio.funcs->enable_aspm(adev, !enter); return 0; @@ -907,6 +908,34 @@ static int nv_common_early_init(void *handle) adev->pg_flags = 0; adev->external_rev_id = adev->rev_id + 0x82; break; + case IP_VERSION(10, 3, 6): + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG; + adev->pg_flags = AMD_PG_SUPPORT_GFX_PG; + adev->external_rev_id = adev->rev_id + 0x01; + break; + case IP_VERSION(10, 3, 7): + adev->cg_flags = 0; + adev->pg_flags = AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_JPEG; + adev->external_rev_id = adev->rev_id + 0x01; + break; default: /* FIXME: not supported yet */ return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 17655bc6d2f1..17160849a811 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -34,6 +34,9 @@ MODULE_FIRMWARE("amdgpu/aldebaran_ta.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_asd.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_toc.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_8_asd.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_8_toc.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_8_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -55,6 +58,9 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 3): chip_name = "yellow_carp"; break; + case IP_VERSION(13, 0, 8): + chip_name = "psp_13_0_8"; + break; default: BUG(); } @@ -69,6 +75,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) break; case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 3): + case IP_VERSION(13, 0, 8): err = psp_init_asd_microcode(psp, chip_name); if (err) return err; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 02115d63b071..e26c39fcd336 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1885,9 +1885,6 @@ static int sdma_v4_0_process_ras_data_cb(struct amdgpu_device *adev, static int sdma_v4_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct ras_ih_if ih_info = { - .cb = sdma_v4_0_process_ras_data_cb, - }; sdma_v4_0_setup_ulv(adev); @@ -1897,10 +1894,7 @@ static int sdma_v4_0_late_init(void *handle) adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev); } - if (adev->sdma.ras && adev->sdma.ras->ras_block.ras_late_init) - return adev->sdma.ras->ras_block.ras_late_init(adev, &ih_info); - else - return 0; + return 0; } static int sdma_v4_0_sw_init(void *handle) @@ -2802,6 +2796,7 @@ const struct amdgpu_ras_block_hw_ops sdma_v4_0_ras_hw_ops = { static struct amdgpu_sdma_ras sdma_v4_0_ras = { .ras_block = { .hw_ops = &sdma_v4_0_ras_hw_ops, + .ras_cb = sdma_v4_0_process_ras_data_cb, }, }; @@ -2822,8 +2817,10 @@ static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) if (adev->sdma.ras) { amdgpu_ras_register_ras_block(adev, &adev->sdma.ras->ras_block); - strcpy(adev->sdma.ras->ras_block.name, "sdma"); - adev->sdma.ras->ras_block.block = AMDGPU_RAS_BLOCK__SDMA; + strcpy(adev->sdma.ras->ras_block.ras_comm.name, "sdma"); + adev->sdma.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__SDMA; + adev->sdma.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->sdma.ras_if = &adev->sdma.ras->ras_block.ras_comm; /* If don't define special ras_late_init function, use default ras_late_init */ if (!adev->sdma.ras->ras_block.ras_late_init) @@ -2832,6 +2829,10 @@ static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) /* If don't define special ras_fini function, use default ras_fini */ if (!adev->sdma.ras->ras_block.ras_fini) adev->sdma.ras->ras_block.ras_fini = amdgpu_sdma_ras_fini; + + /* If not defined special ras_cb function, use default ras_cb */ + if (!adev->sdma.ras->ras_block.ras_cb) + adev->sdma.ras->ras_block.ras_cb = amdgpu_sdma_process_ras_data_cb; } } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 45e10d5028c5..81e033549dda 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -264,8 +264,7 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) chip_name = "navi12"; break; case IP_VERSION(5, 0, 1): - if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2 || - adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 1, 4)) + if (adev->apu_flags & AMD_APU_IS_CYAN_SKILLFISH2) chip_name = "cyan_skillfish2"; else chip_name = "cyan_skillfish"; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index d3d6d5b045b8..0ca365006399 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -51,6 +51,7 @@ MODULE_FIRMWARE("amdgpu/beige_goby_sdma.bin"); MODULE_FIRMWARE("amdgpu/vangogh_sdma.bin"); MODULE_FIRMWARE("amdgpu/yellow_carp_sdma.bin"); +MODULE_FIRMWARE("amdgpu/sdma_5_2_7.bin"); #define SDMA1_REG_OFFSET 0x600 #define SDMA3_REG_OFFSET 0x400 @@ -138,28 +139,32 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) switch (adev->ip_versions[SDMA0_HWIP][0]) { case IP_VERSION(5, 2, 0): - chip_name = "sienna_cichlid"; + chip_name = "sienna_cichlid_sdma"; break; case IP_VERSION(5, 2, 2): - chip_name = "navy_flounder"; + chip_name = "navy_flounder_sdma"; break; case IP_VERSION(5, 2, 1): - chip_name = "vangogh"; + chip_name = "vangogh_sdma"; break; case IP_VERSION(5, 2, 4): - chip_name = "dimgrey_cavefish"; + chip_name = "dimgrey_cavefish_sdma"; break; case IP_VERSION(5, 2, 5): - chip_name = "beige_goby"; + chip_name = "beige_goby_sdma"; break; case IP_VERSION(5, 2, 3): - chip_name = "yellow_carp"; + chip_name = "yellow_carp_sdma"; break; + case IP_VERSION(5, 2, 7): + chip_name = "sdma_5_2_7"; + break; + default: BUG(); } - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma.bin", chip_name); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s.bin", chip_name); err = request_firmware(&adev->sdma.instance[0].fw, fw_name, adev->dev); if (err) diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index e6d2f74a7976..7f99e130acd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -2453,7 +2453,7 @@ static void si_program_aspm(struct amdgpu_device *adev) bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false; bool disable_clkreq = false; - if (amdgpu_aspm == 0) + if (!amdgpu_device_should_use_aspm(adev)) return; if (adev->flags & AMD_IS_APU) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index a0235f75dbcb..35d2ebd7a02c 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -670,7 +670,7 @@ static void soc15_pcie_gen3_enable(struct amdgpu_device *adev) static void soc15_program_aspm(struct amdgpu_device *adev) { - if (!amdgpu_aspm) + if (!amdgpu_device_should_use_aspm(adev)) return; if (!(adev->flags & AMD_IS_APU) && @@ -1081,8 +1081,11 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_VCN_MGCG; + /* + * MMHUB PG needs to be disabled for Picasso for + * stability reasons. + */ adev->pg_flags = AMD_PG_SUPPORT_SDMA | - AMD_PG_SUPPORT_MMHUB | AMD_PG_SUPPORT_VCN; } else { adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | @@ -1186,15 +1189,11 @@ static int soc15_common_early_init(void *handle) static int soc15_common_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r = 0; if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_get_irq(adev); - if (adev->nbio.ras && adev->nbio.ras->ras_block.ras_late_init) - r = adev->nbio.ras->ras_block.ras_late_init(adev, NULL); - - return r; + return 0; } static int soc15_common_sw_init(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 87e4ef18e151..c45d9c14ecbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -87,8 +87,14 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev { uint64_t mc_umc_status; uint32_t eccinfo_table_idx; + uint32_t umc_reg_offset; + uint32_t mc_umc_addr; + uint64_t reg_value; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + umc_reg_offset = get_umc_v6_7_reg_offset(adev, + umc_inst, ch_inst); + eccinfo_table_idx = umc_inst * adev->umc.channel_inst_num + ch_inst; /* check the MCUMC_STATUS */ mc_umc_status = ras->umc_ecc.ecc[eccinfo_table_idx].mca_umc_status; @@ -97,8 +103,36 @@ static void umc_v6_7_ecc_info_querry_uncorrectable_error_count(struct amdgpu_dev REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) { *error_count += 1; + + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) + dev_info(adev->dev, "Deferred error, no user action is needed.\n"); + + if (mc_umc_status) + dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); + + /* print IPID registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + + /* print SYND registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + + /* print MISC0 registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + } } static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev, @@ -168,11 +202,13 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, /* loop for all possibilities of [C4 C3 C2] */ for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) { retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT); + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); amdgpu_umc_fill_error_record(err_data, err_addr, retired_page, channel_index, umc_inst); /* shift R14 bit */ retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT); + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); amdgpu_umc_fill_error_record(err_data, err_addr, retired_page, channel_index, umc_inst); } @@ -251,6 +287,8 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev { uint64_t mc_umc_status; uint32_t mc_umc_status_addr; + uint32_t mc_umc_addr; + uint64_t reg_value; mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0); @@ -262,8 +300,36 @@ static void umc_v6_7_querry_uncorrectable_error_count(struct amdgpu_device *adev REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || - REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) + REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) { *error_count += 1; + + if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1) + dev_info(adev->dev, "Deferred error, no user action is needed.\n"); + + if (mc_umc_status) + dev_info(adev->dev, "MCA STATUS 0x%llx, umc_reg_offset 0x%x\n", mc_umc_status, umc_reg_offset); + + /* print IPID registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_IPIDT0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA IPID 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + + /* print SYND registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_SYNDT0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA SYND 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + + /* print MISC0 registers value */ + mc_umc_addr = + SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_MISC0T0); + reg_value = RREG64_PCIE((mc_umc_addr + umc_reg_offset) * 4); + if (reg_value) + dev_info(adev->dev, "MCA MISC0 0x%llx, umc_reg_offset 0x%x\n", reg_value, umc_reg_offset); + } } static void umc_v6_7_reset_error_count_per_channel(struct amdgpu_device *adev, @@ -403,11 +469,13 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, /* loop for all possibilities of [C4 C3 C2] */ for (column = 0; column < UMC_V6_7_NA_MAP_PA_NUM; column++) { retired_page = soc_pa | (column << UMC_V6_7_PA_C2_BIT); + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); amdgpu_umc_fill_error_record(err_data, err_addr, retired_page, channel_index, umc_inst); /* shift R14 bit */ retired_page ^= (0x1ULL << UMC_V6_7_PA_R14_BIT); + dev_info(adev->dev, "Error Address(PA): 0x%llx\n", retired_page); amdgpu_umc_fill_error_record(err_data, err_addr, retired_page, channel_index, umc_inst); } diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 6645ebbd2696..039b90cdc3bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1140,7 +1140,7 @@ static void vi_program_aspm(struct amdgpu_device *adev) bool bL1SS = false; bool bClkReqSupport = true; - if (!amdgpu_aspm) + if (!amdgpu_device_should_use_aspm(adev)) return; if (adev->flags & AMD_IS_APU || |