diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
115 files changed, 1768 insertions, 673 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 6fde9a9d5e2b..6bf6cfaea3f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -167,6 +167,11 @@ amdgpu-y += \ athub_v2_0.o \ athub_v2_1.o +# add SMUIO block +amdgpu-y += \ + smuio_v9_0.o \ + smuio_v11_0.o + # add amdkfd interfaces amdgpu-y += amdgpu_amdkfd.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6eceef23d838..f9c81bc21ba4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -106,6 +106,7 @@ #include "amdgpu_mmhub.h" #include "amdgpu_gfxhub.h" #include "amdgpu_df.h" +#include "amdgpu_smuio.h" #define MAX_GPU_INSTANCE 16 @@ -193,9 +194,9 @@ extern int sched_policy; extern bool debug_evictions; extern bool no_system_mem_limit; #else -static const int sched_policy = KFD_SCHED_POLICY_HWS; -static const bool debug_evictions; /* = false */ -static const bool no_system_mem_limit; +static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS; +static const bool __maybe_unused debug_evictions; /* = false */ +static const bool __maybe_unused no_system_mem_limit; #endif extern int amdgpu_tmz; @@ -920,6 +921,9 @@ struct amdgpu_device { /* nbio */ struct amdgpu_nbio nbio; + /* smuio */ + struct amdgpu_smuio smuio; + /* mmhub */ struct amdgpu_mmhub mmhub; @@ -1309,9 +1313,11 @@ int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev, struct amdgpu_dm_backlight_caps *caps); +bool amdgpu_acpi_is_s0ix_supported(void); #else static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; } static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } +static inline bool amdgpu_acpi_is_s0ix_supported(void) { return false; } #endif int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 1400957034a1..b8655ff73a65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -176,7 +176,7 @@ static struct device *get_mfd_cell_dev(const char *device_name, int r) /** * acp_hw_init - start and test ACP block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * */ static int acp_hw_init(void *handle) @@ -405,7 +405,7 @@ failure: /** * acp_hw_fini - stop the hardware block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * */ static int acp_hw_fini(void *handle) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 165b02e267b0..4f4fda53c08a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -27,6 +27,7 @@ #include <linux/power_supply.h> #include <linux/pm_runtime.h> #include <acpi/video.h> +#include <acpi/actbl.h> #include <drm/drm_crtc_helper.h> #include "amdgpu.h" @@ -894,3 +895,16 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev) unregister_acpi_notifier(&adev->acpi_nb); kfree(adev->atif); } + +/** + * amdgpu_acpi_is_s0ix_supported + * + * returns true if supported, false if not. + */ +bool amdgpu_acpi_is_s0ix_supported(void) +{ + if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) + return true; + + return false; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 7d6c0013af35..b43e68fc1378 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -37,6 +37,7 @@ #include "soc15.h" #include "soc15d.h" #include "gfx_v9_0.h" +#include "amdgpu_amdkfd_gfx_v9.h" enum hqd_dequeue_request_type { NO_ACTION = 0, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 1755386470e6..7791d074bd32 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2043,6 +2043,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) int ret = 0, i; struct list_head duplicate_save; struct amdgpu_sync sync_obj; + unsigned long failed_size = 0; + unsigned long total_size = 0; INIT_LIST_HEAD(&duplicate_save); INIT_LIST_HEAD(&ctx.list); @@ -2099,10 +2101,18 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) uint32_t domain = mem->domain; struct kfd_bo_va_list *bo_va_entry; + total_size += amdgpu_bo_size(bo); + ret = amdgpu_amdkfd_bo_validate(bo, domain, false); if (ret) { - pr_debug("Memory eviction: Validate BOs failed. Try again\n"); - goto validate_map_fail; + pr_debug("Memory eviction: Validate BOs failed\n"); + failed_size += amdgpu_bo_size(bo); + ret = amdgpu_amdkfd_bo_validate(bo, + AMDGPU_GEM_DOMAIN_GTT, false); + if (ret) { + pr_debug("Memory eviction: Try again\n"); + goto validate_map_fail; + } } ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving); if (ret) { @@ -2122,6 +2132,9 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) } } + if (failed_size) + pr_debug("0x%lx/0x%lx in system\n", failed_size, total_size); + /* Update page directories */ ret = process_update_pds(process_info, &sync_obj); if (ret) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8d2878e950da..594a0108e90f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -676,6 +676,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) * cs_parser_fini() - clean parser states * @parser: parser structure holding parsing context. * @error: error number + * @backoff: indicator to backoff the reservation * * If error is set than unvalidate buffer, otherwise just free memory * used by parsing context. @@ -1644,6 +1645,7 @@ err_free_fences: * @parser: command submission parser context * @addr: VM address * @bo: resulting BO of the mapping found + * @map: Placeholder to return found BO mapping * * Search the buffer objects in the command submission context for a certain * virtual memory address. Returns allocation structure when found, NULL diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 08047bc4d588..da21e60bb827 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -37,10 +37,9 @@ uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo, u32 domain, uint32_t size) { - int r; void *ptr; - r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, + amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, domain, bo, NULL, &ptr); if (!*bo) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index 5c1f3725c741..a6667a2ca0db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -300,7 +300,7 @@ end: return result; } -/** +/* * amdgpu_debugfs_regs_read - Callback for reading MMIO registers */ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, @@ -309,7 +309,7 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf, return amdgpu_debugfs_process_reg_op(true, f, buf, size, pos); } -/** +/* * amdgpu_debugfs_regs_write - Callback for writing MMIO registers */ static ssize_t amdgpu_debugfs_regs_write(struct file *f, const char __user *buf, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 7560b05e4ac1..79dd85f71fab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -585,6 +585,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) * @adev: amdgpu_device pointer * @pcie_index: mmio register offset * @pcie_data: mmio register offset + * @reg_addr: indirect register address to read from * * Returns the value of indirect register @reg_addr */ @@ -615,6 +616,7 @@ u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * @pcie_index: mmio register offset * @pcie_data: mmio register offset + * @reg_addr: indirect register address to read from * * Returns the value of indirect register @reg_addr */ @@ -2648,8 +2650,10 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) { int i, r; - amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); - amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + if (!amdgpu_acpi_is_s0ix_supported() || amdgpu_in_reset(adev)) { + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + } for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.valid) @@ -3342,7 +3346,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ /* this will fail for cards that aren't VGA class devices, just * ignore it */ - vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); + if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + vga_client_register(adev->pdev, adev, NULL, amdgpu_device_vga_set_decode); if (amdgpu_device_supports_boco(ddev)) boco = true; @@ -3601,7 +3606,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev) vga_switcheroo_unregister_client(adev->pdev); if (amdgpu_device_supports_boco(adev_to_drm(adev))) vga_switcheroo_fini_domain_pm_ops(adev->dev); - vga_client_register(adev->pdev, NULL, NULL, NULL); + if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + vga_client_register(adev->pdev, NULL, NULL, NULL); if (adev->rio_mem) pci_iounmap(adev->pdev, adev->rio_mem); adev->rio_mem = NULL; @@ -3704,8 +3710,10 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_fence_driver_suspend(adev); - r = amdgpu_device_ip_suspend_phase2(adev); - + if (!amdgpu_acpi_is_s0ix_supported() || amdgpu_in_reset(adev)) + r = amdgpu_device_ip_suspend_phase2(adev); + else + amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry); /* evict remaining vram memory * This second call to evict vram is to evict the gart page table * using the CPU. @@ -3736,6 +3744,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; + if (amdgpu_acpi_is_s0ix_supported()) + amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry); + /* post card */ if (amdgpu_device_need_post(adev)) { r = amdgpu_device_asic_init(adev); @@ -4857,7 +4868,7 @@ int amdgpu_device_baco_enter(struct drm_device *dev) if (!amdgpu_device_supports_baco(adev_to_drm(adev))) return -ENOTSUPP; - if (ras && ras->supported) + if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); return amdgpu_dpm_baco_enter(adev); @@ -4876,7 +4887,7 @@ int amdgpu_device_baco_exit(struct drm_device *dev) if (ret) return ret; - if (ras && ras->supported) + if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt) adev->nbio.funcs->enable_doorbell_interrupt(adev, true); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index bfb95143ba5e..b2dbcb4df020 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -128,6 +128,7 @@ static int hw_id_map[MAX_HWIP] = { [NBIF_HWIP] = NBIF_HWID, [THM_HWIP] = THM_HWID, [CLK_HWIP] = CLKA_HWID, + [UMC_HWIP] = UMC_HWID, }; static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 2e8a8b57639f..f764803c53a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -441,10 +441,6 @@ void amdgpu_display_print_display_setup(struct drm_device *dev) drm_connector_list_iter_end(&iter); } -/** - * amdgpu_display_ddc_probe - * - */ bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector, bool use_aux) { @@ -509,7 +505,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, * to avoid hang caused by placement of scanout BO in GTT on certain * APUs. So force the BO placement to VRAM in case this architecture * will not allow USWC mappings. - * Also, don't allow GTT domain if the BO doens't have USWC falg set. + * Also, don't allow GTT domain if the BO doesn't have USWC flag set. */ if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) && amdgpu_bo_support_uswc(bo_flags) && @@ -526,6 +522,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, domain |= AMDGPU_GEM_DOMAIN_GTT; break; case CHIP_RENOIR: + case CHIP_VANGOGH: domain |= AMDGPU_GEM_DOMAIN_GTT; break; @@ -538,6 +535,146 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev, return domain; } +static const struct drm_format_info dcc_formats[] = { + { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_BGRA8888, .depth = 32, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_XRGB2101010, .depth = 30, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_XBGR2101010, .depth = 30, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_ARGB2101010, .depth = 30, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_ABGR2101010, .depth = 30, .num_planes = 2, + .cpp = { 4, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_RGB565, .depth = 16, .num_planes = 2, + .cpp = { 2, 0, }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, +}; + +static const struct drm_format_info dcc_retile_formats[] = { + { .format = DRM_FORMAT_XRGB8888, .depth = 24, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_XBGR8888, .depth = 24, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_ARGB8888, .depth = 32, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_ABGR8888, .depth = 32, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_BGRA8888, .depth = 32, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_XRGB2101010, .depth = 30, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_XBGR2101010, .depth = 30, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, + { .format = DRM_FORMAT_ARGB2101010, .depth = 30, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_ABGR2101010, .depth = 30, .num_planes = 3, + .cpp = { 4, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, + .has_alpha = true, }, + { .format = DRM_FORMAT_RGB565, .depth = 16, .num_planes = 3, + .cpp = { 2, 0, 0 }, .block_w = {1, 1, 1}, .block_h = {1, 1, 1}, .hsub = 1, .vsub = 1, }, +}; + +static const struct drm_format_info * +lookup_format_info(const struct drm_format_info formats[], + int num_formats, u32 format) +{ + int i; + + for (i = 0; i < num_formats; i++) { + if (formats[i].format == format) + return &formats[i]; + } + + return NULL; +} + +const struct drm_format_info * +amdgpu_lookup_format_info(u32 format, uint64_t modifier) +{ + if (!IS_AMD_FMT_MOD(modifier)) + return NULL; + + if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) + return lookup_format_info(dcc_retile_formats, + ARRAY_SIZE(dcc_retile_formats), + format); + + if (AMD_FMT_MOD_GET(DCC, modifier)) + return lookup_format_info(dcc_formats, ARRAY_SIZE(dcc_formats), + format); + + /* returning NULL will cause the default format structs to be used. */ + return NULL; +} + + +/* + * Tries to extract the renderable DCC offset from the opaque metadata attached + * to the buffer. + */ +static int +extract_render_dcc_offset(struct amdgpu_device *adev, + struct drm_gem_object *obj, + uint64_t *offset) +{ + struct amdgpu_bo *rbo; + int r = 0; + uint32_t metadata[10]; /* Something that fits a descriptor + header. */ + uint32_t size; + + rbo = gem_to_amdgpu_bo(obj); + r = amdgpu_bo_reserve(rbo, false); + + if (unlikely(r)) { + /* Don't show error message when returning -ERESTARTSYS */ + if (r != -ERESTARTSYS) + DRM_ERROR("Unable to reserve buffer: %d\n", r); + return r; + } + + r = amdgpu_bo_get_metadata(rbo, metadata, sizeof(metadata), &size, NULL); + amdgpu_bo_unreserve(rbo); + + if (r) + return r; + + /* + * The first word is the metadata version, and we need space for at least + * the version + pci vendor+device id + 8 words for a descriptor. + */ + if (size < 40 || metadata[0] != 1) + return -EINVAL; + + if (adev->family >= AMDGPU_FAMILY_NV) { + /* resource word 6/7 META_DATA_ADDRESS{_LO} */ + *offset = ((u64)metadata[9] << 16u) | + ((metadata[8] & 0xFF000000u) >> 16); + } else { + /* resource word 5/7 META_DATA_ADDRESS */ + *offset = ((u64)metadata[9] << 8u) | + ((u64)(metadata[7] & 0x1FE0000u) << 23); + } + + return 0; +} + static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) { struct amdgpu_device *adev = drm_to_adev(afb->base.dev); @@ -553,6 +690,8 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) int pipe_xor_bits = 0; int bank_xor_bits = 0; int packers = 0; + int rb = 0; + int pipes = ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes); uint32_t dcc_offset = AMDGPU_TILING_GET(afb->tiling_flags, DCC_OFFSET_256B); switch (swizzle >> 2) { @@ -598,18 +737,17 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) if (has_xor) { switch (version) { case AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS: - pipe_xor_bits = min(block_size_bits - 8, - ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes)); + pipe_xor_bits = min(block_size_bits - 8, pipes); packers = min(block_size_bits - 8 - pipe_xor_bits, ilog2(adev->gfx.config.gb_addr_config_fields.num_pkrs)); break; case AMD_FMT_MOD_TILE_VER_GFX10: - pipe_xor_bits = min(block_size_bits - 8, - ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes)); + pipe_xor_bits = min(block_size_bits - 8, pipes); break; case AMD_FMT_MOD_TILE_VER_GFX9: - pipe_xor_bits = min(block_size_bits - 8, - ilog2(adev->gfx.config.gb_addr_config_fields.num_pipes) + + rb = ilog2(adev->gfx.config.gb_addr_config_fields.num_se) + + ilog2(adev->gfx.config.gb_addr_config_fields.num_rb_per_se); + pipe_xor_bits = min(block_size_bits - 8, pipes + ilog2(adev->gfx.config.gb_addr_config_fields.num_se)); bank_xor_bits = min(block_size_bits - 8 - pipe_xor_bits, ilog2(adev->gfx.config.gb_addr_config_fields.num_banks)); @@ -627,6 +765,8 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) if (dcc_offset != 0) { bool dcc_i64b = AMDGPU_TILING_GET(afb->tiling_flags, DCC_INDEPENDENT_64B) != 0; bool dcc_i128b = version >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS; + const struct drm_format_info *format_info; + u64 render_dcc_offset; /* Enable constant encode on RAVEN2 and later. */ bool dcc_constant_encode = adev->asic_type > CHIP_RAVEN || @@ -644,7 +784,51 @@ static int convert_tiling_flags_to_modifier(struct amdgpu_framebuffer *afb) AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, max_cblock_size); afb->base.offsets[1] = dcc_offset * 256 + afb->base.offsets[0]; - afb->base.pitches[1] = AMDGPU_TILING_GET(afb->tiling_flags, DCC_PITCH_MAX) + 1; + afb->base.pitches[1] = + AMDGPU_TILING_GET(afb->tiling_flags, DCC_PITCH_MAX) + 1; + + /* + * If the userspace driver uses retiling the tiling flags do not contain + * info on the renderable DCC buffer. Luckily the opaque metadata contains + * the info so we can try to extract it. The kernel does not use this info + * but we should convert it to a modifier plane for getfb2, so the + * userspace driver that gets it doesn't have to juggle around another DCC + * plane internally. + */ + if (extract_render_dcc_offset(adev, afb->base.obj[0], + &render_dcc_offset) == 0 && + render_dcc_offset != 0 && + render_dcc_offset != afb->base.offsets[1] && + render_dcc_offset < UINT_MAX) { + uint32_t dcc_block_bits; /* of base surface data */ + + modifier |= AMD_FMT_MOD_SET(DCC_RETILE, 1); + afb->base.offsets[2] = render_dcc_offset; + + if (adev->family >= AMDGPU_FAMILY_NV) { + int extra_pipe = 0; + + if (adev->asic_type >= CHIP_SIENNA_CICHLID && + pipes == packers && pipes > 1) + extra_pipe = 1; + + dcc_block_bits = max(20, 16 + pipes + extra_pipe); + } else { + modifier |= AMD_FMT_MOD_SET(RB, rb) | + AMD_FMT_MOD_SET(PIPE, pipes); + dcc_block_bits = max(20, 18 + rb); + } + + dcc_block_bits -= ilog2(afb->base.format->cpp[0]); + afb->base.pitches[2] = ALIGN(afb->base.width, + 1u << ((dcc_block_bits + 1) / 2)); + } + format_info = amdgpu_lookup_format_info(afb->base.format->format, + modifier); + if (!format_info) + return -EINVAL; + + afb->base.format = format_info; } } @@ -691,13 +875,26 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { - int ret; + int ret, i; rfb->base.obj[0] = obj; drm_helper_mode_fill_fb_struct(dev, &rfb->base, mode_cmd); ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs); if (ret) goto fail; + /* + * This needs to happen before modifier conversion as that might change + * the number of planes. + */ + for (i = 1; i < rfb->base.format->num_planes; ++i) { + if (mode_cmd->handles[i] != mode_cmd->handles[0]) { + drm_dbg_kms(dev, "Plane 0 and %d have different BOs: %u vs. %u\n", + i, mode_cmd->handles[0], mode_cmd->handles[i]); + ret = -EINVAL; + goto fail; + } + } + ret = amdgpu_display_get_fb_info(rfb, &rfb->tiling_flags, &rfb->tmz_surface); if (ret) goto fail; @@ -705,8 +902,16 @@ int amdgpu_display_framebuffer_init(struct drm_device *dev, if (dev->mode_config.allow_fb_modifiers && !(rfb->base.flags & DRM_MODE_FB_MODIFIERS)) { ret = convert_tiling_flags_to_modifier(rfb); - if (ret) + if (ret) { + drm_dbg_kms(dev, "Failed to convert tiling flags 0x%llX to a modifier", + rfb->tiling_flags); goto fail; + } + } + + for (i = 1; i < rfb->base.format->num_planes; ++i) { + rfb->base.obj[i] = rfb->base.obj[0]; + drm_gem_object_get(rfb->base.obj[i]); } return 0; @@ -727,14 +932,14 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev, obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]); if (obj == NULL) { - dev_err(&dev->pdev->dev, "No GEM object associated to handle 0x%08X, " - "can't create framebuffer\n", mode_cmd->handles[0]); + drm_dbg_kms(dev, "No GEM object associated to handle 0x%08X, " + "can't create framebuffer\n", mode_cmd->handles[0]); return ERR_PTR(-ENOENT); } /* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */ if (obj->import_attach) { - DRM_DEBUG_KMS("Cannot create framebuffer from imported dma_buf\n"); + drm_dbg_kms(dev, "Cannot create framebuffer from imported dma_buf\n"); return ERR_PTR(-EINVAL); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 3620b24785e1..dc7b7d116549 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -44,5 +44,7 @@ struct drm_framebuffer * amdgpu_display_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd); +const struct drm_format_info * +amdgpu_lookup_format_info(u32 format, uint64_t modifier); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index e5919efca870..e42175e1acf1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -424,6 +424,7 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_bo *bo; struct amdgpu_bo_param bp; + struct drm_gem_object *gobj; int ret; memset(&bp, 0, sizeof(bp)); @@ -434,17 +435,20 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) bp.type = ttm_bo_type_sg; bp.resv = resv; dma_resv_lock(resv, NULL); - ret = amdgpu_bo_create(adev, &bp, &bo); + ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_CPU, + 0, ttm_bo_type_sg, resv, &gobj); if (ret) goto error; + bo = gem_to_amdgpu_bo(gobj); bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; if (dma_buf->ops != &amdgpu_dmabuf_ops) bo->prime_shared_count = 1; dma_resv_unlock(resv); - return &bo->tbo.base; + return gobj; error: dma_resv_unlock(resv); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f02aeb7c0aae..31506a1678c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -305,7 +305,7 @@ module_param_named(aspm, amdgpu_aspm, int, 0444); * Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality. */ -MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)"); +MODULE_PARM_DESC(runpm, "PX runtime pm (2 = force enable with BAMACO, 1 = force enable with BACO, 0 = disable, -1 = PX only default)"); module_param_named(runpm, amdgpu_runtime_pm, int, 0444); /** @@ -791,7 +791,7 @@ module_param_named(tmz, amdgpu_tmz, int, 0444); * DOC: reset_method (int) * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) */ -MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)"); +MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)"); module_param_named(reset_method, amdgpu_reset_method, int, 0444); /** @@ -1064,10 +1064,10 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, {0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, /* Arcturus */ - {0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x738C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS}, + {0x1002, 0x7388, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS}, + {0x1002, 0x738E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS}, + {0x1002, 0x7390, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ARCTURUS}, /* Navi10 */ {0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, {0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10}, @@ -1101,6 +1101,18 @@ static const struct pci_device_id pciidlist[] = { /* Van Gogh */ {0x1002, 0x163F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VANGOGH|AMD_IS_APU}, + /* Navy_Flounder */ + {0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + {0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + {0x1002, 0x73C3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + {0x1002, 0x73DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER}, + + /* DIMGREY_CAVEFISH */ + {0x1002, 0x73E0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH}, + {0, 0, 0} }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index fe2d495d08ab..d56f4023ebb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -130,6 +130,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring) * * @ring: ring the fence is associated with * @f: resulting fence object + * @flags: flags to pass into the subordinate .emit_fence() call * * Emits a fence command on the requested ring (all asics). * Returns 0 on success, -ENOMEM on failure. @@ -187,6 +188,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, * * @ring: ring the fence is associated with * @s: resulting sequence number + * @timeout: the timeout for waiting in usecs * * Emits a fence command on the requested ring (all asics). * Used For polling fence. @@ -294,7 +296,7 @@ bool amdgpu_fence_process(struct amdgpu_ring *ring) /** * amdgpu_fence_fallback - fallback for hardware interrupts * - * @work: delayed work item + * @t: timer context used to obtain the pointer to ring structure * * Checks for fence activity. */ @@ -310,7 +312,6 @@ static void amdgpu_fence_fallback(struct timer_list *t) /** * amdgpu_fence_wait_empty - wait for all fences to signal * - * @adev: amdgpu device pointer * @ring: ring index the fence is associated with * * Wait for all fences on the requested ring to signal (all asics). @@ -639,7 +640,7 @@ static const char *amdgpu_fence_get_timeline_name(struct dma_fence *f) /** * amdgpu_fence_enable_signaling - enable signalling on fence - * @fence: fence + * @f: fence * * This function is called with fence_queue lock held, and adds a callback * to fence_queue that checks if this fence is signaled, and if so it @@ -675,7 +676,7 @@ static void amdgpu_fence_free(struct rcu_head *rcu) /** * amdgpu_fence_release - callback that fence can be freed * - * @fence: fence + * @f: fence * * This function is called when the reference count becomes zero. * It just RCU schedules freeing up the fence. @@ -740,7 +741,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) return 0; } -/** +/* * amdgpu_debugfs_gpu_recover - manually trigger a gpu reset & recover * * Manually trigger a gpu reset at the next fence wait. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c index c6947d6c7ff5..7c6e02e35573 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fw_attestation.c @@ -26,6 +26,7 @@ #include <linux/dma-mapping.h> #include "amdgpu.h" +#include "amdgpu_fw_attestation.h" #include "amdgpu_psp.h" #include "amdgpu_ucode.h" #include "soc15_common.h" @@ -129,7 +130,6 @@ static int amdgpu_is_fw_attestation_supported(struct amdgpu_device *adev) void amdgpu_fw_attestation_debugfs_init(struct amdgpu_device *adev) { -#if defined(CONFIG_DEBUG_FS) if (!amdgpu_is_fw_attestation_supported(adev)) return; @@ -138,5 +138,4 @@ void amdgpu_fw_attestation_debugfs_init(struct amdgpu_device *adev) adev_to_drm(adev)->primary->debugfs_root, adev, &amdgpu_fw_attestation_debugfs_ops); -#endif -}
\ No newline at end of file +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index e01e681d2a60..0db933026722 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -75,9 +75,9 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) if (adev->dummy_page_addr) return 0; - adev->dummy_page_addr = pci_map_page(adev->pdev, dummy_page, 0, + adev->dummy_page_addr = dma_map_page(&adev->pdev->dev, dummy_page, 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); - if (pci_dma_mapping_error(adev->pdev, adev->dummy_page_addr)) { + if (dma_mapping_error(&adev->pdev->dev, adev->dummy_page_addr)) { dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n"); adev->dummy_page_addr = 0; return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index c9f94fbeb018..d0a1fee1f5f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -70,26 +70,12 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, bp.type = type; bp.resv = resv; bp.preferred_domain = initial_domain; -retry: bp.flags = flags; bp.domain = initial_domain; r = amdgpu_bo_create(adev, &bp, &bo); - if (r) { - if (r != -ERESTARTSYS) { - if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { - flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - goto retry; - } - - if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { - initial_domain |= AMDGPU_GEM_DOMAIN_GTT; - goto retry; - } - DRM_DEBUG("Failed to allocate GEM object (%ld, %d, %u, %d)\n", - size, initial_domain, alignment, r); - } + if (r) return r; - } + *obj = &bo->tbo.base; (*obj)->funcs = &amdgpu_gem_object_funcs; @@ -239,7 +225,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, uint64_t size = args->in.bo_size; struct dma_resv *resv = NULL; struct drm_gem_object *gobj; - uint32_t handle; + uint32_t handle, initial_domain; int r; /* reject invalid gem flags */ @@ -283,9 +269,28 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, resv = vm->root.base.bo->tbo.base.resv; } +retry: + initial_domain = (u32)(0xffffffff & args->in.domains); r = amdgpu_gem_object_create(adev, size, args->in.alignment, - (u32)(0xffffffff & args->in.domains), + initial_domain, flags, ttm_bo_type_device, resv, &gobj); + if (r) { + if (r != -ERESTARTSYS) { + if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) { + flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + goto retry; + } + + if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) { + initial_domain |= AMDGPU_GEM_DOMAIN_GTT; + goto retry; + } + DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n", + size, initial_domain, args->in.alignment, r); + } + return r; + } + if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { if (!r) { struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index e584f48f3b54..cd2c676a2797 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -193,10 +193,14 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev) } bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, - int queue) + int pipe, int queue) { - /* Policy: make queue 0 of each pipe as high priority compute queue */ - return (queue == 0); + bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); + int cond; + /* Policy: alternate between normal and high priority */ + cond = multipipe_policy ? pipe : queue; + + return ((cond % 2) != 0); } @@ -815,3 +819,23 @@ int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev) } return amdgpu_num_kcq; } + +/* amdgpu_gfx_state_change_set - Handle gfx power state change set + * @adev: amdgpu_device pointer + * @state: gfx power state(1 -sGpuChangeState_D0Entry and 2 -sGpuChangeState_D3Entry) + * + */ + +void amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum gfx_change_state state) +{ + if (is_support_sw_smu(adev)) { + smu_gfx_state_change_set(&adev->smu, state); + } else { + mutex_lock(&adev->pm.mutex); + if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->gfx_state_change_set) + ((adev)->powerplay.pp_funcs->gfx_state_change_set( + (adev)->powerplay.pp_handle, state)); + mutex_unlock(&adev->pm.mutex); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 786eb4aa7314..6b5a8f4642cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -47,6 +47,12 @@ enum gfx_pipe_priority { AMDGPU_GFX_PIPE_PRIO_MAX }; +/* Argument for PPSMC_MSG_GpuChangeState */ +enum gfx_change_state { + sGpuChangeState_D0Entry = 1, + sGpuChangeState_D3Entry, +}; + #define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0 #define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15 @@ -374,7 +380,7 @@ void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit, bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec, int pipe, int queue); bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, - int queue); + int pipe, int queue); int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit, @@ -394,4 +400,5 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev); +void amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum gfx_change_state state); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 056cb87d09ea..02af47ddddbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -61,9 +61,8 @@ void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level, amdgpu_gmc_get_vm_pde(adev, level, addr, flags); } -/** +/* * amdgpu_gmc_pd_addr - return the address of the root directory - * */ uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo) { @@ -112,7 +111,7 @@ int amdgpu_gmc_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr, /** * amdgpu_gmc_agp_addr - return the address in the AGP address space * - * @tbo: TTM BO which needs the address, must be in GTT domain + * @bo: TTM BO which needs the address, must be in GTT domain * * Tries to figure out how to access the BO through the AGP aperture. Returns * AMDGPU_BO_INVALID_OFFSET if that is not possible. @@ -422,12 +421,8 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) struct amdgpu_gmc *gmc = &adev->gmc; switch (adev->asic_type) { + case CHIP_VEGA10: case CHIP_VEGA20: - case CHIP_NAVI10: - case CHIP_NAVI14: - case CHIP_SIENNA_CICHLID: - case CHIP_NAVY_FLOUNDER: - case CHIP_DIMGREY_CAVEFISH: /* * noretry = 0 will cause kfd page fault tests fail * for some ASICs, so set default to 1 for these ASICs. @@ -506,6 +501,9 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev) else size = amdgpu_gmc_get_vbios_fb_size(adev); + if (adev->mman.keep_stolen_vga_memory) + size = max(size, (unsigned)AMDGPU_VBIOS_VGA_ALLOCATION); + /* set to 0 if the pre-OS buffer uses up most of vram */ if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) size = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 2f53fa0ae9a6..024d0a563a65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -52,8 +52,10 @@ /** * amdgpu_ib_get - request an IB (Indirect Buffer) * - * @ring: ring index the IB is associated with + * @adev: amdgpu_device pointer + * @vm: amdgpu_vm pointer * @size: requested IB size + * @pool_type: IB pool type (delayed, immediate, direct) * @ib: IB object returned * * Request an IB (all asics). IBs are allocated using the @@ -101,9 +103,10 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, /** * amdgpu_ib_schedule - schedule an IB (Indirect Buffer) on the ring * - * @adev: amdgpu_device pointer + * @ring: ring index the IB is associated with * @num_ibs: number of IBs to schedule * @ibs: IB objects to schedule + * @job: job to schedule * @f: fence created during this submission * * Schedule an IB on the associated ring (all asics). diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 6e9a9e5dbea0..94b069630db3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -208,7 +208,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) return amdgpu_sync_fence(sync, ring->vmid_wait); - fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); + fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL); if (!fences) return -ENOMEM; @@ -259,6 +259,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, * @sync: sync object where we add dependencies * @fence: fence protecting ID from reuse * @job: job who wants to use the VMID + * @id: resulting VMID * * Try to assign a reserved VMID. */ @@ -514,6 +515,7 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, * amdgpu_vmid_reset - reset VMID to zero * * @adev: amdgpu device structure + * @vmhub: vmhub type * @vmid: vmid number to use * * Reset saved GDW, GWS and OA to force switch on next flush. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 111a301ce878..dcd9b4a8e20b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -132,6 +132,35 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) } /** + * amdgpu_ih_ring_write - write IV to the ring buffer + * + * @ih: ih ring to write to + * @iv: the iv to write + * @num_dw: size of the iv in dw + * + * Writes an IV to the ring buffer using the CPU and increment the wptr. + * Used for testing and delegating IVs to a software ring. + */ +void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, + unsigned int num_dw) +{ + uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2; + unsigned int i; + + for (i = 0; i < num_dw; ++i) + ih->ring[wptr++] = cpu_to_le32(iv[i]); + + wptr <<= 2; + wptr &= ih->ptr_mask; + + /* Only commit the new wptr if we don't overflow */ + if (wptr != READ_ONCE(ih->rptr)) { + wmb(); + WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr)); + } +} + +/** * amdgpu_ih_process - interrupt handler * * @adev: amdgpu_device pointer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 4e0bb645176d..3c9cfe7eecff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -72,6 +72,8 @@ struct amdgpu_ih_funcs { int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, unsigned ring_size, bool use_bus_addr); void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); +void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, + unsigned int num_dw); int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 300ac73b4738..bea57e8e793f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -207,6 +207,21 @@ static void amdgpu_irq_handle_ih2(struct work_struct *work) } /** + * amdgpu_irq_handle_ih_soft - kick of processing for ih_soft + * + * @work: work structure in struct amdgpu_irq + * + * Kick of processing IH soft ring. + */ +static void amdgpu_irq_handle_ih_soft(struct work_struct *work) +{ + struct amdgpu_device *adev = container_of(work, struct amdgpu_device, + irq.ih_soft_work); + + amdgpu_ih_process(adev, &adev->irq.ih_soft); +} + +/** * amdgpu_msi_ok - check whether MSI functionality is enabled * * @adev: amdgpu device pointer (unused) @@ -281,6 +296,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev) INIT_WORK(&adev->irq.ih1_work, amdgpu_irq_handle_ih1); INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2); + INIT_WORK(&adev->irq.ih_soft_work, amdgpu_irq_handle_ih_soft); adev->irq.installed = true; /* Use vector 0 for MSI-X */ @@ -413,6 +429,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, bool handled = false; int r; + entry.ih = ih; entry.iv_entry = (const uint32_t *)&ih->ring[ring_index]; amdgpu_ih_decode_iv(adev, &entry); @@ -451,6 +468,24 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, } /** + * amdgpu_irq_delegate - delegate IV to soft IH ring + * + * @adev: amdgpu device pointer + * @entry: IV entry + * @num_dw: size of IV + * + * Delegate the IV to the soft IH ring and schedule processing of it. Used + * if the hardware delegation to IH1 or IH2 doesn't work for some reason. + */ +void amdgpu_irq_delegate(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry, + unsigned int num_dw) +{ + amdgpu_ih_ring_write(&adev->irq.ih_soft, entry->iv_entry, num_dw); + schedule_work(&adev->irq.ih_soft_work); +} + +/** * amdgpu_irq_update - update hardware interrupt state * * @adev: amdgpu device pointer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index c718e94a55c9..ac527e5deae6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -44,6 +44,7 @@ enum amdgpu_interrupt_state { }; struct amdgpu_iv_entry { + struct amdgpu_ih_ring *ih; unsigned client_id; unsigned src_id; unsigned ring_id; @@ -88,9 +89,9 @@ struct amdgpu_irq { bool msi_enabled; /* msi enabled */ /* interrupt rings */ - struct amdgpu_ih_ring ih, ih1, ih2; + struct amdgpu_ih_ring ih, ih1, ih2, ih_soft; const struct amdgpu_ih_funcs *ih_funcs; - struct work_struct ih1_work, ih2_work; + struct work_struct ih1_work, ih2_work, ih_soft_work; struct amdgpu_irq_src self_irq; /* gen irq stuff */ @@ -109,6 +110,9 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, struct amdgpu_irq_src *source); void amdgpu_irq_dispatch(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); +void amdgpu_irq_delegate(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry, + unsigned int num_dw); int amdgpu_irq_update(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type); int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 4ad6d801bc25..fc12fc72366f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -162,10 +162,6 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) } else if (amdgpu_device_supports_baco(dev) && (amdgpu_runtime_pm != 0)) { switch (adev->asic_type) { -#ifdef CONFIG_DRM_AMDGPU_CIK - case CHIP_BONAIRE: - case CHIP_HAWAII: -#endif case CHIP_VEGA20: case CHIP_ARCTURUS: case CHIP_SIENNA_CICHLID: @@ -180,7 +176,7 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags) adev->runpm = true; break; default: - /* enable runpm on VI+ */ + /* enable runpm on CI+ */ adev->runpm = true; break; } @@ -474,7 +470,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, /** * amdgpu_info_ioctl - answer a device specific request. * - * @adev: amdgpu device pointer + * @dev: drm device pointer * @data: request object * @filp: drm filp * @@ -720,38 +716,42 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return n ? -EFAULT : 0; } case AMDGPU_INFO_DEV_INFO: { - struct drm_amdgpu_info_device dev_info; + struct drm_amdgpu_info_device *dev_info; uint64_t vm_size; + int ret; - memset(&dev_info, 0, sizeof(dev_info)); - dev_info.device_id = dev->pdev->device; - dev_info.chip_rev = adev->rev_id; - dev_info.external_rev = adev->external_rev_id; - dev_info.pci_rev = dev->pdev->revision; - dev_info.family = adev->family; - dev_info.num_shader_engines = adev->gfx.config.max_shader_engines; - dev_info.num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; + dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL); + if (!dev_info) + return -ENOMEM; + + dev_info->device_id = dev->pdev->device; + dev_info->chip_rev = adev->rev_id; + dev_info->external_rev = adev->external_rev_id; + dev_info->pci_rev = dev->pdev->revision; + dev_info->family = adev->family; + dev_info->num_shader_engines = adev->gfx.config.max_shader_engines; + dev_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se; /* return all clocks in KHz */ - dev_info.gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10; + dev_info->gpu_counter_freq = amdgpu_asic_get_xclk(adev) * 10; if (adev->pm.dpm_enabled) { - dev_info.max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10; - dev_info.max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10; + dev_info->max_engine_clock = amdgpu_dpm_get_sclk(adev, false) * 10; + dev_info->max_memory_clock = amdgpu_dpm_get_mclk(adev, false) * 10; } else { - dev_info.max_engine_clock = adev->clock.default_sclk * 10; - dev_info.max_memory_clock = adev->clock.default_mclk * 10; + dev_info->max_engine_clock = adev->clock.default_sclk * 10; + dev_info->max_memory_clock = adev->clock.default_mclk * 10; } - dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask; - dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se * + dev_info->enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask; + dev_info->num_rb_pipes = adev->gfx.config.max_backends_per_se * adev->gfx.config.max_shader_engines; - dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts; - dev_info._pad = 0; - dev_info.ids_flags = 0; + dev_info->num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts; + dev_info->_pad = 0; + dev_info->ids_flags = 0; if (adev->flags & AMD_IS_APU) - dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION; + dev_info->ids_flags |= AMDGPU_IDS_FLAGS_FUSION; if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) - dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; + dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION; if (amdgpu_is_tmz(adev)) - dev_info.ids_flags |= AMDGPU_IDS_FLAGS_TMZ; + dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ; vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; vm_size -= AMDGPU_VA_RESERVED_SIZE; @@ -761,45 +761,47 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) adev->vce.fw_version < AMDGPU_VCE_FW_53_45) vm_size = min(vm_size, 1ULL << 40); - dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; - dev_info.virtual_address_max = + dev_info->virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; + dev_info->virtual_address_max = min(vm_size, AMDGPU_GMC_HOLE_START); if (vm_size > AMDGPU_GMC_HOLE_START) { - dev_info.high_va_offset = AMDGPU_GMC_HOLE_END; - dev_info.high_va_max = AMDGPU_GMC_HOLE_END | vm_size; + dev_info->high_va_offset = AMDGPU_GMC_HOLE_END; + dev_info->high_va_max = AMDGPU_GMC_HOLE_END | vm_size; } - dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); - dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE; - dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; - dev_info.cu_active_number = adev->gfx.cu_info.number; - dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; - dev_info.ce_ram_size = adev->gfx.ce_ram_size; - memcpy(&dev_info.cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0], + dev_info->virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); + dev_info->pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE; + dev_info->gart_page_size = AMDGPU_GPU_PAGE_SIZE; + dev_info->cu_active_number = adev->gfx.cu_info.number; + dev_info->cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; + dev_info->ce_ram_size = adev->gfx.ce_ram_size; + memcpy(&dev_info->cu_ao_bitmap[0], &adev->gfx.cu_info.ao_cu_bitmap[0], sizeof(adev->gfx.cu_info.ao_cu_bitmap)); - memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], + memcpy(&dev_info->cu_bitmap[0], &adev->gfx.cu_info.bitmap[0], sizeof(adev->gfx.cu_info.bitmap)); - dev_info.vram_type = adev->gmc.vram_type; - dev_info.vram_bit_width = adev->gmc.vram_width; - dev_info.vce_harvest_config = adev->vce.harvest_config; - dev_info.gc_double_offchip_lds_buf = + dev_info->vram_type = adev->gmc.vram_type; + dev_info->vram_bit_width = adev->gmc.vram_width; + dev_info->vce_harvest_config = adev->vce.harvest_config; + dev_info->gc_double_offchip_lds_buf = adev->gfx.config.double_offchip_lds_buf; - dev_info.wave_front_size = adev->gfx.cu_info.wave_front_size; - dev_info.num_shader_visible_vgprs = adev->gfx.config.max_gprs; - dev_info.num_cu_per_sh = adev->gfx.config.max_cu_per_sh; - dev_info.num_tcc_blocks = adev->gfx.config.max_texture_channel_caches; - dev_info.gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth; - dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth; - dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads; + dev_info->wave_front_size = adev->gfx.cu_info.wave_front_size; + dev_info->num_shader_visible_vgprs = adev->gfx.config.max_gprs; + dev_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh; + dev_info->num_tcc_blocks = adev->gfx.config.max_texture_channel_caches; + dev_info->gs_vgt_table_depth = adev->gfx.config.gs_vgt_table_depth; + dev_info->gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth; + dev_info->max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads; if (adev->family >= AMDGPU_FAMILY_NV) - dev_info.pa_sc_tile_steering_override = + dev_info->pa_sc_tile_steering_override = adev->gfx.config.pa_sc_tile_steering_override; - dev_info.tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask; + dev_info->tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask; - return copy_to_user(out, &dev_info, - min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0; + ret = copy_to_user(out, dev_info, + min((size_t)size, sizeof(*dev_info))) ? -EFAULT : 0; + kfree(dev_info); + return ret; } case AMDGPU_INFO_VCE_CLOCK_TABLE: { unsigned i; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index c6c9723d3d8a..25ec4d57333f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -1075,23 +1075,6 @@ int amdgpu_bo_init(struct amdgpu_device *adev) } /** - * amdgpu_bo_late_init - late init - * @adev: amdgpu device object - * - * Calls amdgpu_ttm_late_init() to free resources used earlier during - * initialization. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_late_init(struct amdgpu_device *adev) -{ - amdgpu_ttm_late_init(adev); - - return 0; -} - -/** * amdgpu_bo_fini - tear down memory manager * @adev: amdgpu device object * @@ -1518,7 +1501,7 @@ uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev, } while (0) /** - * amdgpu_debugfs_print_bo_info - print BO info in debugfs file + * amdgpu_bo_print_info - print BO info in debugfs file * * @id: Index or Id of the BO * @bo: Requested BO for printing info diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index ed47cbac4f75..79120ec41396 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -268,7 +268,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, void amdgpu_bo_unpin(struct amdgpu_bo *bo); int amdgpu_bo_evict_vram(struct amdgpu_device *adev); int amdgpu_bo_init(struct amdgpu_device *adev); -int amdgpu_bo_late_init(struct amdgpu_device *adev); void amdgpu_bo_fini(struct amdgpu_device *adev); int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, struct vm_area_struct *vma); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c index 1f2305b7bd13..f2e20666c9c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c @@ -102,11 +102,12 @@ static void amdgpu_pll_get_fb_ref_div(unsigned nom, unsigned den, unsigned post_ * amdgpu_pll_compute - compute PLL paramaters * * @pll: information about the PLL + * @freq: requested frequency * @dot_clock_p: resulting pixel clock - * fb_div_p: resulting feedback divider - * frac_fb_div_p: fractional part of the feedback divider - * ref_div_p: resulting reference divider - * post_div_p: resulting reference divider + * @fb_div_p: resulting feedback divider + * @frac_fb_div_p: fractional part of the feedback divider + * @ref_div_p: resulting reference divider + * @post_div_p: resulting reference divider * * Try to calculate the PLL parameters to generate the given frequency: * dot_clock = (ref_freq * feedback_div) / (ref_div * post_div) @@ -308,7 +309,6 @@ int amdgpu_pll_get_shared_dp_ppll(struct drm_crtc *crtc) * amdgpu_pll_get_shared_nondp_ppll - return the PPLL used by another non-DP crtc * * @crtc: drm crtc - * @encoder: drm encoder * * Returns the PPLL (Pixel PLL) used by another non-DP crtc/encoder which can * be shared (i.e., same clock). diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 2b0a2b93994b..523d22db094b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1925,7 +1925,8 @@ static int psp_load_smu_fw(struct psp_context *psp) return 0; - if (amdgpu_in_reset(adev) && ras && ras->supported) { + if (amdgpu_in_reset(adev) && ras && ras->supported && + adev->asic_type == CHIP_ARCTURUS) { ret = amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_UNLOAD); if (ret) { DRM_WARN("Failed to set MP1 state prepare for reload\n"); @@ -2573,9 +2574,9 @@ out: return err; } -int parse_ta_bin_descriptor(struct psp_context *psp, - const struct ta_fw_bin_desc *desc, - const struct ta_firmware_header_v2_0 *ta_hdr) +static int parse_ta_bin_descriptor(struct psp_context *psp, + const struct ta_fw_bin_desc *desc, + const struct ta_firmware_header_v2_0 *ta_hdr) { uint8_t *ucode_start_addr = NULL; @@ -2631,7 +2632,7 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name) { struct amdgpu_device *adev = psp->adev; - char fw_name[30]; + char fw_name[PSP_FW_NAME_LEN]; const struct ta_firmware_header_v2_0 *ta_hdr; int err = 0; int ta_index = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index b96267068a72..c136bd449744 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -905,13 +905,6 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, return ret; } -int amdgpu_ras_error_cure(struct amdgpu_device *adev, - struct ras_cure_if *info) -{ - /* psp fw has no cure interface for now. */ - return 0; -} - /* get the total error counts on all IPs */ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, bool is_ce) @@ -1174,7 +1167,7 @@ static void amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) con->dir, &con->disable_ras_err_cnt_harvest); } -void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, +static void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, struct ras_fs_if *head) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1196,7 +1189,6 @@ void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) { -#if defined(CONFIG_DEBUG_FS) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; struct ras_fs_if fs_info; @@ -1205,7 +1197,7 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) * it won't be called in resume path, no need to check * suspend and gpu reset status */ - if (!con) + if (!IS_ENABLED(CONFIG_DEBUG_FS) || !con) return; amdgpu_ras_debugfs_create_ctrl_node(adev); @@ -1219,10 +1211,9 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) amdgpu_ras_debugfs_create(adev, &fs_info); } } -#endif } -void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, +static void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, struct ras_common_if *head) { struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); @@ -1236,7 +1227,6 @@ void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) { -#if defined(CONFIG_DEBUG_FS) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj, *tmp; @@ -1245,7 +1235,6 @@ static void amdgpu_ras_debugfs_remove_all(struct amdgpu_device *adev) } con->dir = NULL; -#endif } /* debugfs end */ @@ -1293,7 +1282,8 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) static int amdgpu_ras_fs_fini(struct amdgpu_device *adev) { - amdgpu_ras_debugfs_remove_all(adev); + if (IS_ENABLED(CONFIG_DEBUG_FS)) + amdgpu_ras_debugfs_remove_all(adev); amdgpu_ras_sysfs_remove_all(adev); return 0; } @@ -1479,8 +1469,8 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) } /* Parse RdRspStatus and WrRspStatus */ -void amdgpu_ras_error_status_query(struct amdgpu_device *adev, - struct ras_query_if *info) +static void amdgpu_ras_error_status_query(struct amdgpu_device *adev, + struct ras_query_if *info) { /* * Only two block need to query read/write diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 4667cce38582..762f5e46c007 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -592,14 +592,8 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, int amdgpu_ras_sysfs_remove(struct amdgpu_device *adev, struct ras_common_if *head); -void amdgpu_ras_debugfs_create(struct amdgpu_device *adev, - struct ras_fs_if *head); - void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev); -void amdgpu_ras_debugfs_remove(struct amdgpu_device *adev, - struct ras_common_if *head); - int amdgpu_ras_error_query(struct amdgpu_device *adev, struct ras_query_if *info); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 2697b250dc32..1a612f51ecd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -52,7 +52,6 @@ /** * amdgpu_ring_alloc - allocate space on the ring buffer * - * @adev: amdgpu_device pointer * @ring: amdgpu_ring structure holding ring information * @ndw: number of dwords to allocate in the ring buffer * @@ -95,7 +94,8 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) amdgpu_ring_write(ring, ring->funcs->nop); } -/** amdgpu_ring_generic_pad_ib - pad IB with NOP packets +/** + * amdgpu_ring_generic_pad_ib - pad IB with NOP packets * * @ring: amdgpu_ring structure holding ring information * @ib: IB to add NOP packets to @@ -112,7 +112,6 @@ void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) * amdgpu_ring_commit - tell the GPU to execute the new * commands on the ring buffer * - * @adev: amdgpu_device pointer * @ring: amdgpu_ring structure holding ring information * * Update the wptr (write pointer) to tell the GPU to @@ -155,8 +154,10 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) * * @adev: amdgpu_device pointer * @ring: amdgpu_ring structure holding ring information - * @max_ndw: maximum number of dw for ring alloc - * @nop: nop packet for this ring + * @max_dw: maximum number of dw for ring alloc + * @irq_src: interrupt source to use for this ring + * @irq_type: interrupt type to use for this ring + * @hw_prio: ring priority (NORMAL/HIGH) * * Initialize the driver information for the selected ring (all asics). * Returns 0 on success, error on failure. @@ -276,7 +277,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, /** * amdgpu_ring_fini - tear down the driver ring struct. * - * @adev: amdgpu_device pointer * @ring: amdgpu_ring structure holding ring information * * Tear down the driver information for the selected ring (all asics). @@ -310,7 +310,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) /** * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper * - * @adev: amdgpu_device pointer + * @ring: ring to write to * @reg0: register to write * @reg1: register to wait on * @ref: reference value to write/wait on diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 0da0a0d98672..b7d861ed5284 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -29,7 +29,7 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" - +#include "amdgpu_sched.h" #include "amdgpu_vm.h" int amdgpu_to_sched_priority(int amdgpu_priority, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h new file mode 100644 index 000000000000..03009157aec8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -0,0 +1,37 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_SMUIO_H__ +#define __AMDGPU_SMUIO_H__ + +struct amdgpu_smuio_funcs { + u32 (*get_rom_index_offset)(struct amdgpu_device *adev); + u32 (*get_rom_data_offset)(struct amdgpu_device *adev); + void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable); + void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); +}; + +struct amdgpu_smuio { + const struct amdgpu_smuio_funcs *funcs; +}; + +#endif /* __AMDGPU_SMUIO_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 8ea6c49529e7..4e558632a5d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -78,7 +78,7 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, /** * amdgpu_sync_get_owner - extract the owner of a fence * - * @fence: fence get the owner from + * @f: fence get the owner from * * Extract who originally created the fence. */ @@ -172,7 +172,6 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) /** * amdgpu_sync_vm_fence - remember to sync to this VM fence * - * @adev: amdgpu device * @sync: sync object to add fence to * @fence: the VM fence to add * @@ -190,6 +189,7 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) /** * amdgpu_sync_resv - sync to a reservation object * + * @adev: amdgpu device * @sync: sync object to add fences from reservation object to * @resv: reservation object with embedded fence * @mode: how owner affects which fences we sync to diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 6042b3b81a4c..7b230bcbf2c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -42,16 +42,11 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) size = 1024 * 1024; /* Number of tests = - * (Total GTT - IB pool - writeback page - ring buffers) / test size + * (Total GTT - gart_pin_size - (2 transfer windows for buffer moves)) / test size */ - n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE; - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) - if (adev->rings[i]) - n -= adev->rings[i]->ring_size; - if (adev->wb.wb_obj) - n -= AMDGPU_GPU_PAGE_SIZE; - if (adev->irq.ih.ring_obj) - n -= adev->irq.ih.ring_size; + n = adev->gmc.gart_size - atomic64_read(&adev->gart_pin_size); + n -= AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS * + AMDGPU_GPU_PAGE_SIZE; n /= size; gtt_obj = kcalloc(n, sizeof(*gtt_obj), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index ee9480d14cbc..324d5e3f3579 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -366,15 +366,15 @@ TRACE_EVENT(amdgpu_vm_update_ptes, TRACE_EVENT(amdgpu_vm_set_ptes, TP_PROTO(uint64_t pe, uint64_t addr, unsigned count, - uint32_t incr, uint64_t flags, bool direct), - TP_ARGS(pe, addr, count, incr, flags, direct), + uint32_t incr, uint64_t flags, bool immediate), + TP_ARGS(pe, addr, count, incr, flags, immediate), TP_STRUCT__entry( __field(u64, pe) __field(u64, addr) __field(u32, count) __field(u32, incr) __field(u64, flags) - __field(bool, direct) + __field(bool, immediate) ), TP_fast_assign( @@ -383,32 +383,32 @@ TRACE_EVENT(amdgpu_vm_set_ptes, __entry->count = count; __entry->incr = incr; __entry->flags = flags; - __entry->direct = direct; + __entry->immediate = immediate; ), TP_printk("pe=%010Lx, addr=%010Lx, incr=%u, flags=%llx, count=%u, " - "direct=%d", __entry->pe, __entry->addr, __entry->incr, - __entry->flags, __entry->count, __entry->direct) + "immediate=%d", __entry->pe, __entry->addr, __entry->incr, + __entry->flags, __entry->count, __entry->immediate) ); TRACE_EVENT(amdgpu_vm_copy_ptes, - TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool direct), - TP_ARGS(pe, src, count, direct), + TP_PROTO(uint64_t pe, uint64_t src, unsigned count, bool immediate), + TP_ARGS(pe, src, count, immediate), TP_STRUCT__entry( __field(u64, pe) __field(u64, src) __field(u32, count) - __field(bool, direct) + __field(bool, immediate) ), TP_fast_assign( __entry->pe = pe; __entry->src = src; __entry->count = count; - __entry->direct = direct; + __entry->immediate = immediate; ), - TP_printk("pe=%010Lx, src=%010Lx, count=%u, direct=%d", + TP_printk("pe=%010Lx, src=%010Lx, count=%u, immediate=%d", __entry->pe, __entry->src, __entry->count, - __entry->direct) + __entry->immediate) ); TRACE_EVENT(amdgpu_vm_flush, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c438d290a6db..4d8f19ab1014 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -451,7 +451,7 @@ error: return r; } -/** +/* * amdgpu_move_blit - Copy an entire buffer to another buffer * * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to @@ -512,7 +512,7 @@ error: return r; } -/** +/* * amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy * * Called by amdgpu_bo_move() @@ -536,7 +536,7 @@ static bool amdgpu_mem_visible(struct amdgpu_device *adev, <= adev->gmc.visible_vram_size; } -/** +/* * amdgpu_bo_move - Move a buffer object to a new memory location * * Called by ttm_bo_handle_move_mem() @@ -642,7 +642,7 @@ out: return 0; } -/** +/* * amdgpu_ttm_io_mem_reserve - Reserve a block of memory during a fault * * Called by ttm_mem_io_reserve() ultimately via ttm_bo_vm_fault() @@ -733,7 +733,7 @@ struct amdgpu_ttm_tt { }; #ifdef CONFIG_DRM_AMDGPU_USERPTR -/** +/* * amdgpu_ttm_tt_get_user_pages - get device accessible pages that back user * memory and start HMM tracking CPU page table update * @@ -838,7 +838,7 @@ out: return r; } -/** +/* * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change * Check if the pages backing this ttm range have been invalidated * @@ -874,7 +874,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) } #endif -/** +/* * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary. * * Called by amdgpu_cs_list_validate(). This creates the page list @@ -889,7 +889,7 @@ void amdgpu_ttm_tt_set_user_pages(struct ttm_tt *ttm, struct page **pages) ttm->pages[i] = pages ? pages[i] : NULL; } -/** +/* * amdgpu_ttm_tt_pin_userptr - prepare the sg table with the user pages * * Called by amdgpu_ttm_backend_bind() @@ -929,7 +929,7 @@ release_sg: return r; } -/** +/* * amdgpu_ttm_tt_unpin_userptr - Unpin and unmap userptr pages */ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_bo_device *bdev, @@ -1010,7 +1010,7 @@ gart_bind_fail: return r; } -/** +/* * amdgpu_ttm_backend_bind - Bind GTT memory * * Called by ttm_tt_bind() on behalf of ttm_bo_handle_move_mem(). @@ -1068,7 +1068,7 @@ static int amdgpu_ttm_backend_bind(struct ttm_bo_device *bdev, return r; } -/** +/* * amdgpu_ttm_alloc_gart - Make sure buffer object is accessible either * through AGP or GART aperture. * @@ -1129,7 +1129,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) return 0; } -/** +/* * amdgpu_ttm_recover_gart - Rebind GTT pages * * Called by amdgpu_gtt_mgr_recover() from amdgpu_device_reset() to @@ -1150,7 +1150,7 @@ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) return r; } -/** +/* * amdgpu_ttm_backend_unbind - Unbind GTT mapped pages * * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and @@ -1199,6 +1199,7 @@ static void amdgpu_ttm_backend_destroy(struct ttm_bo_device *bdev, * amdgpu_ttm_tt_create - Create a ttm_tt object for a given BO * * @bo: The buffer object to create a GTT ttm_tt object around + * @page_flags: Page flags to be added to the ttm_tt object * * Called by ttm_tt_create(). */ @@ -1228,7 +1229,7 @@ static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo, return >t->ttm; } -/** +/* * amdgpu_ttm_tt_populate - Map GTT pages visible to the device * * Map the pages of a ttm_tt object to an address space visible @@ -1273,7 +1274,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_bo_device *bdev, return ttm_pool_alloc(&adev->mman.bdev.pool, ttm, ctx); } -/** +/* * amdgpu_ttm_tt_unpopulate - unmap GTT pages and unpopulate page arrays * * Unmaps pages of a ttm_tt object from the device address space and @@ -1343,7 +1344,7 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo, return 0; } -/** +/* * amdgpu_ttm_tt_get_usermm - Return memory manager for ttm_tt object */ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) @@ -1359,7 +1360,7 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) return gtt->usertask->mm; } -/** +/* * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an * address range for the current task. * @@ -1383,7 +1384,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, return true; } -/** +/* * amdgpu_ttm_tt_is_userptr - Have the pages backing by userptr? */ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) @@ -1396,7 +1397,7 @@ bool amdgpu_ttm_tt_is_userptr(struct ttm_tt *ttm) return true; } -/** +/* * amdgpu_ttm_tt_is_readonly - Is the ttm_tt object read only? */ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) @@ -1437,9 +1438,10 @@ uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_resource *mem) /** * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object * + * @adev: amdgpu_device pointer * @ttm: The ttm_tt object to compute the flags for * @mem: The memory registry backing this ttm_tt object - + * * Figure out the flags to use for a VM PTE (Page Table Entry). */ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, @@ -1456,7 +1458,7 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, return flags; } -/** +/* * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer * object. * @@ -1767,7 +1769,7 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) return 0; } -/** +/* * amdgpu_ttm_init - Init the memory management (ttm) as well as various * gtt/vram related fields. * @@ -1901,18 +1903,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) return 0; } -/** - * amdgpu_ttm_late_init - Handle any late initialization for amdgpu_ttm - */ -void amdgpu_ttm_late_init(struct amdgpu_device *adev) -{ - /* return the VGA stolen memory (if any) back to VRAM */ - if (!adev->mman.keep_stolen_vga_memory) - amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); - amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); -} - -/** +/* * amdgpu_ttm_fini - De-initialize the TTM memory pools */ void amdgpu_ttm_fini(struct amdgpu_device *adev) @@ -1922,8 +1913,8 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) amdgpu_ttm_training_reserve_vram_fini(adev); /* return the stolen vga memory back to VRAM */ - if (adev->mman.keep_stolen_vga_memory) - amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL); + amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL); /* return the IP Discovery TMR memory back to VRAM */ amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL); amdgpu_ttm_fw_reserve_vram_fini(adev); @@ -2233,7 +2224,7 @@ static const struct drm_info_list amdgpu_ttm_debugfs_list[] = { {"ttm_page_pool", amdgpu_ttm_pool_debugfs, 0, NULL}, }; -/** +/* * amdgpu_ttm_vram_read - Linear read access to VRAM * * Accesses VRAM via MMIO for debugging purposes. @@ -2268,7 +2259,7 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, return result; } -/** +/* * amdgpu_ttm_vram_write - Linear write access to VRAM * * Accesses VRAM via MMIO for debugging purposes. @@ -2321,7 +2312,7 @@ static const struct file_operations amdgpu_ttm_vram_fops = { #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS -/** +/* * amdgpu_ttm_gtt_read - Linear read access to GTT memory */ static ssize_t amdgpu_ttm_gtt_read(struct file *f, char __user *buf, @@ -2371,7 +2362,7 @@ static const struct file_operations amdgpu_ttm_gtt_fops = { #endif -/** +/* * amdgpu_iomem_read - Virtual read access to GPU mapped memory * * This function is used to read memory that has been mapped to the @@ -2427,7 +2418,7 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf, return result; } -/** +/* * amdgpu_iomem_write - Virtual write access to GPU mapped memory * * This function is used to write memory that has been mapped to the diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index bdca2970173e..d2987536d7cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -131,7 +131,6 @@ int amdgpu_vram_mgr_query_page_status(struct ttm_resource_manager *man, uint64_t start); int amdgpu_ttm_init(struct amdgpu_device *adev); -void amdgpu_ttm_late_init(struct amdgpu_device *adev); void amdgpu_ttm_fini(struct amdgpu_device *adev); void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 2b7c90b7a712..1beb08af347f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -68,23 +68,32 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr) { uint16_t version_major = le16_to_cpu(hdr->header_version_major); uint16_t version_minor = le16_to_cpu(hdr->header_version_minor); + const struct smc_firmware_header_v1_0 *v1_0_hdr; + const struct smc_firmware_header_v2_0 *v2_0_hdr; + const struct smc_firmware_header_v2_1 *v2_1_hdr; DRM_DEBUG("SMC\n"); amdgpu_ucode_print_common_hdr(hdr); if (version_major == 1) { - const struct smc_firmware_header_v1_0 *smc_hdr = - container_of(hdr, struct smc_firmware_header_v1_0, header); - - DRM_DEBUG("ucode_start_addr: %u\n", le32_to_cpu(smc_hdr->ucode_start_addr)); + v1_0_hdr = container_of(hdr, struct smc_firmware_header_v1_0, header); + DRM_DEBUG("ucode_start_addr: %u\n", le32_to_cpu(v1_0_hdr->ucode_start_addr)); } else if (version_major == 2) { - const struct smc_firmware_header_v1_0 *v1_hdr = - container_of(hdr, struct smc_firmware_header_v1_0, header); - const struct smc_firmware_header_v2_0 *v2_hdr = - container_of(v1_hdr, struct smc_firmware_header_v2_0, v1_0); + switch (version_minor) { + case 0: + v2_0_hdr = container_of(hdr, struct smc_firmware_header_v2_0, v1_0.header); + DRM_DEBUG("ppt_offset_bytes: %u\n", le32_to_cpu(v2_0_hdr->ppt_offset_bytes)); + DRM_DEBUG("ppt_size_bytes: %u\n", le32_to_cpu(v2_0_hdr->ppt_size_bytes)); + break; + case 1: + v2_1_hdr = container_of(hdr, struct smc_firmware_header_v2_1, v1_0.header); + DRM_DEBUG("pptable_count: %u\n", le32_to_cpu(v2_1_hdr->pptable_count)); + DRM_DEBUG("pptable_entry_offset: %u\n", le32_to_cpu(v2_1_hdr->pptable_entry_offset)); + break; + default: + break; + } - DRM_DEBUG("ppt_offset_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_offset_bytes)); - DRM_DEBUG("ppt_size_bytes: %u\n", le32_to_cpu(v2_hdr->ppt_size_bytes)); } else { DRM_ERROR("Unknown SMC ucode version: %u.%u\n", version_major, version_minor); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index f8bebf18ee36..7c5b60e53482 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -87,7 +87,7 @@ #define UVD_NO_OP 0x03ff #define UVD_BASE_SI 0x3800 -/** +/* * amdgpu_uvd_cs_ctx - Command submission parser context * * Used for emulating virtual memory support on UVD 4.2. @@ -545,8 +545,9 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx) /** * amdgpu_uvd_cs_msg_decode - handle UVD decode message * + * @adev: amdgpu_device pointer * @msg: pointer to message structure - * @buf_sizes: returned buffer sizes + * @buf_sizes: placeholder to put the different buffer lengths * * Peek into the decode message and calculate the necessary buffer sizes. */ @@ -1005,6 +1006,7 @@ static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, * amdgpu_uvd_ring_parse_cs - UVD command submission parser * * @parser: Command submission parser context + * @ib_idx: Which indirect buffer to use * * Parse the command stream, patch in addresses as necessary. */ @@ -1279,6 +1281,7 @@ void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring) * amdgpu_uvd_ring_test_ib - test ib execution * * @ring: amdgpu_ring pointer + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test if we can successfully execute an IB */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index 5eb63288d157..edbb8194ee81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -67,6 +67,7 @@ struct amdgpu_uvd { unsigned harvest_config; /* store image width to adjust nb memory state */ unsigned decode_image_width; + uint32_t keyselect; }; int amdgpu_uvd_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index ecaa2d7483b2..9791a4057e8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -90,6 +90,7 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, * amdgpu_vce_init - allocate memory, load vce firmware * * @adev: amdgpu_device pointer + * @size: size for the new BO * * First step to get VCE online, allocate memory and load the firmware */ @@ -428,9 +429,9 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) /** * amdgpu_vce_get_create_msg - generate a VCE create msg * - * @adev: amdgpu_device pointer * @ring: ring we should submit the msg to * @handle: VCE session handle to use + * @bo: amdgpu object for which we query the offset * @fence: optional fence to return * * Open up a stream for HW test @@ -509,9 +510,9 @@ err: /** * amdgpu_vce_get_destroy_msg - generate a VCE destroy msg * - * @adev: amdgpu_device pointer * @ring: ring we should submit the msg to * @handle: VCE session handle to use + * @direct: direct or delayed pool * @fence: optional fence to return * * Close up a stream for HW test or if userspace failed to do so @@ -576,6 +577,7 @@ err: * amdgpu_vce_cs_validate_bo - make sure not to cross 4GB boundary * * @p: parser context + * @ib_idx: indirect buffer to use * @lo: address of lower dword * @hi: address of higher dword * @size: minimum size @@ -625,9 +627,11 @@ static int amdgpu_vce_validate_bo(struct amdgpu_cs_parser *p, uint32_t ib_idx, * amdgpu_vce_cs_reloc - command submission relocation * * @p: parser context + * @ib_idx: indirect buffer to use * @lo: address of lower dword * @hi: address of higher dword * @size: minimum size + * @index: bs/fb index * * Patch relocation inside command stream with real buffer address */ @@ -714,7 +718,7 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, * amdgpu_vce_cs_parse - parse and validate the command stream * * @p: parser context - * + * @ib_idx: indirect buffer to use */ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) { @@ -950,7 +954,7 @@ out: * amdgpu_vce_cs_parse_vm - parse the command stream in VM mode * * @p: parser context - * + * @ib_idx: indirect buffer to use */ int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx) { @@ -1040,7 +1044,9 @@ out: * amdgpu_vce_ring_emit_ib - execute indirect buffer * * @ring: engine to use + * @job: job to retrieve vmid from * @ib: the IB to execute + * @flags: unused * */ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, @@ -1058,7 +1064,9 @@ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, * amdgpu_vce_ring_emit_fence - add a fence command to the ring * * @ring: engine to use - * @fence: the fence + * @addr: address + * @seq: sequence number + * @flags: fence related flags * */ void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, @@ -1116,6 +1124,7 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) * amdgpu_vce_ring_test_ib - test if VCE IBs are working * * @ring: the engine to test on + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * */ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 7e19a6656715..1c97244e0d74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -456,6 +456,37 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) return r; } +int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t rptr; + unsigned int i; + int r; + + if (amdgpu_sriov_vf(adev)) + return 0; + + r = amdgpu_ring_alloc(ring, 16); + if (r) + return r; + + rptr = amdgpu_ring_get_rptr(ring); + + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + if (amdgpu_ring_get_rptr(ring) != rptr) + break; + udelay(1); + } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + + return r; +} + static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, struct dma_fence **fence) @@ -510,16 +541,16 @@ err: } static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) + struct amdgpu_bo **bo) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_bo *bo = NULL; uint32_t *msg; int r, i; + *bo = NULL; r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &bo, NULL, (void **)&msg); + bo, NULL, (void **)&msg); if (r) return r; @@ -540,20 +571,20 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand for (i = 14; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - return amdgpu_vcn_dec_send_msg(ring, bo, fence); + return 0; } static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - struct dma_fence **fence) + struct amdgpu_bo **bo) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_bo *bo = NULL; uint32_t *msg; int r, i; + *bo = NULL; r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &bo, NULL, (void **)&msg); + bo, NULL, (void **)&msg); if (r) return r; @@ -566,19 +597,117 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han for (i = 6; i < 1024; ++i) msg[i] = cpu_to_le32(0x0); - return amdgpu_vcn_dec_send_msg(ring, bo, fence); + return 0; } int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) { - struct dma_fence *fence; + struct dma_fence *fence = NULL; + struct amdgpu_bo *bo; long r; - r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); + r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo); + if (r) + goto error; + + r = amdgpu_vcn_dec_send_msg(ring, bo, NULL); + if (r) + goto error; + r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo); + if (r) + goto error; + + r = amdgpu_vcn_dec_send_msg(ring, bo, &fence); + if (r) + goto error; + + r = dma_fence_wait_timeout(fence, false, timeout); + if (r == 0) + r = -ETIMEDOUT; + else if (r > 0) + r = 0; + + dma_fence_put(fence); +error: + return r; +} + +static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, + struct amdgpu_bo *bo, + struct dma_fence **fence) +{ + struct amdgpu_vcn_decode_buffer *decode_buffer = NULL; + const unsigned int ib_size_dw = 64; + struct amdgpu_device *adev = ring->adev; + struct dma_fence *f = NULL; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + uint64_t addr; + int i, r; + + r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4, + AMDGPU_IB_POOL_DIRECT, &job); + if (r) + goto err; + + ib = &job->ibs[0]; + addr = amdgpu_bo_gpu_offset(bo); + ib->length_dw = 0; + + ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8; + ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER); + decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]); + ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4; + memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer)); + + decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER); + decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32); + decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr); + + for (i = ib->length_dw; i < ib_size_dw; ++i) + ib->ptr[i] = 0x0; + + r = amdgpu_job_submit_direct(job, ring, &f); + if (r) + goto err_free; + + amdgpu_bo_fence(bo, f, false); + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + + if (fence) + *fence = dma_fence_get(f); + dma_fence_put(f); + + return 0; + +err_free: + amdgpu_job_free(job); + +err: + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + return r; +} + +int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct dma_fence *fence = NULL; + struct amdgpu_bo *bo; + long r; + + r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo); + if (r) + goto error; + + r = amdgpu_vcn_dec_sw_send_msg(ring, bo, NULL); + if (r) + goto error; + r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo); if (r) goto error; - r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence); + r = amdgpu_vcn_dec_sw_send_msg(ring, bo, &fence); if (r) goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 17691158f783..13aa417f6be7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -44,6 +44,17 @@ #define VCN_DEC_CMD_PACKET_START 0x0000000a #define VCN_DEC_CMD_PACKET_END 0x0000000b +#define VCN_DEC_SW_CMD_NO_OP 0x00000000 +#define VCN_DEC_SW_CMD_END 0x00000001 +#define VCN_DEC_SW_CMD_IB 0x00000002 +#define VCN_DEC_SW_CMD_FENCE 0x00000003 +#define VCN_DEC_SW_CMD_TRAP 0x00000004 +#define VCN_DEC_SW_CMD_IB_AUTO 0x00000005 +#define VCN_DEC_SW_CMD_SEMAPHORE 0x00000006 +#define VCN_DEC_SW_CMD_PREEMPT_FENCE 0x00000009 +#define VCN_DEC_SW_CMD_REG_WRITE 0x0000000b +#define VCN_DEC_SW_CMD_REG_WAIT 0x0000000c + #define VCN_ENC_CMD_NO_OP 0x00000000 #define VCN_ENC_CMD_END 0x00000001 #define VCN_ENC_CMD_IB 0x00000002 @@ -145,6 +156,10 @@ } while (0) #define AMDGPU_VCN_MULTI_QUEUE_FLAG (1 << 8) +#define AMDGPU_VCN_SW_RING_FLAG (1 << 9) + +#define AMDGPU_VCN_IB_FLAG_DECODE_BUFFER 0x00000001 +#define AMDGPU_VCN_CMD_FLAG_MSG_BUFFER 0x00000001 enum fw_queue_mode { FW_QUEUE_RING_RESET = 1, @@ -236,12 +251,25 @@ struct amdgpu_fw_shared_multi_queue { uint8_t padding[4]; }; +struct amdgpu_fw_shared_sw_ring { + uint8_t is_enabled; + uint8_t padding[3]; +}; + struct amdgpu_fw_shared { uint32_t present_flag_0; uint8_t pad[53]; struct amdgpu_fw_shared_multi_queue multi_queue; + struct amdgpu_fw_shared_sw_ring sw_ring; } __attribute__((__packed__)); +struct amdgpu_vcn_decode_buffer { + uint32_t valid_buf_flag; + uint32_t msg_buffer_address_hi; + uint32_t msg_buffer_address_lo; + uint32_t pad[30]; +}; + int amdgpu_vcn_sw_init(struct amdgpu_device *adev); int amdgpu_vcn_sw_fini(struct amdgpu_device *adev); int amdgpu_vcn_suspend(struct amdgpu_device *adev); @@ -251,6 +279,8 @@ void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring); int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); +int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring); +int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout); int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring); int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 8aff6ef50f91..2d51b7694d1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -106,7 +106,7 @@ failed_kiq: /** * amdgpu_virt_request_full_gpu() - request full gpu access - * @amdgpu: amdgpu device. + * @adev: amdgpu device. * @init: is driver init time. * When start to init/fini driver, first need to request full gpu access. * Return: Zero if request success, otherwise will return error. @@ -129,7 +129,7 @@ int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init) /** * amdgpu_virt_release_full_gpu() - release full gpu access - * @amdgpu: amdgpu device. + * @adev: amdgpu device. * @init: is driver init time. * When finishing driver init/fini, need to release full gpu access. * Return: Zero if release success, otherwise will returen error. @@ -151,7 +151,7 @@ int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init) /** * amdgpu_virt_reset_gpu() - reset gpu - * @amdgpu: amdgpu device. + * @adev: amdgpu device. * Send reset command to GPU hypervisor to reset GPU that VM is using * Return: Zero if reset success, otherwise will return error. */ @@ -186,7 +186,7 @@ void amdgpu_virt_request_init_data(struct amdgpu_device *adev) /** * amdgpu_virt_wait_reset() - wait for reset gpu completed - * @amdgpu: amdgpu device. + * @adev: amdgpu device. * Wait for GPU reset completed. * Return: Zero if reset success, otherwise will return error. */ @@ -202,7 +202,7 @@ int amdgpu_virt_wait_reset(struct amdgpu_device *adev) /** * amdgpu_virt_alloc_mm_table() - alloc memory for mm table - * @amdgpu: amdgpu device. + * @adev: amdgpu device. * MM table is used by UVD and VCE for its initialization * Return: Zero if allocate success. */ @@ -232,7 +232,7 @@ int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev) /** * amdgpu_virt_free_mm_table() - free mm table memory - * @amdgpu: amdgpu device. + * @adev: amdgpu device. * Free MM table memory */ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev) @@ -282,8 +282,8 @@ static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev) if (!*data) return -ENOMEM; - bps = kmalloc(align_space * sizeof((*data)->bps), GFP_KERNEL); - bps_bo = kmalloc(align_space * sizeof((*data)->bps_bo), GFP_KERNEL); + bps = kmalloc_array(align_space, sizeof((*data)->bps), GFP_KERNEL); + bps_bo = kmalloc_array(align_space, sizeof((*data)->bps_bo), GFP_KERNEL); if (!bps || !bps_bo) { kfree(bps); @@ -557,7 +557,7 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) return 0; } -void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work) +static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, virt.vf2pf_work.work); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index ca2344beed81..976a12e5a8b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -107,7 +107,7 @@ struct amdgpu_bo_list_entry; #define AMDGPU_VM_FAULT_STOP_ALWAYS 2 /* Reserve 4MB VRAM for page tables */ -#define AMDGPU_VM_RESERVED_VRAM (4ULL << 20) +#define AMDGPU_VM_RESERVED_VRAM (8ULL << 20) /* max number of VMHUB */ #define AMDGPU_MAX_VMHUBS 3 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 0786e7555554..ac45d9c7a4e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -38,8 +38,8 @@ static int amdgpu_vm_cpu_map_table(struct amdgpu_bo *table) * amdgpu_vm_cpu_prepare - prepare page table update with the CPU * * @p: see amdgpu_vm_update_params definition - * @owner: owner we need to sync to - * @exclusive: exclusive move fence we need to sync to + * @resv: reservation object with embedded fence + * @sync_mode: synchronization mode * * Returns: * Negativ errno, 0 for success. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index db790574dc2e..a83a646759c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -51,8 +51,8 @@ static int amdgpu_vm_sdma_map_table(struct amdgpu_bo *table) * amdgpu_vm_sdma_prepare - prepare SDMA command submission * * @p: see amdgpu_vm_update_params definition - * @owner: owner we need to sync to - * @exclusive: exclusive move fence we need to sync to + * @resv: reservation object with embedded fence + * @sync_mode: synchronization mode * * Returns: * Negativ errno, 0 for success. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index c99c2180785f..d2de2a720a3d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -637,6 +637,8 @@ error_free: * amdgpu_vram_mgr_free_sgt - allocate and fill a sg table * * @adev: amdgpu device pointer + * @dev: device pointer + * @dir: data direction of resource to unmap * @sgt: sg table to free * * Free a previously allocate sg table. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 54c358e71f9a..541ef6be390f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -395,12 +395,17 @@ void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive) int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) { int ret = 0; - struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); - struct amdgpu_device *request_adev = hive->hi_req_gpu ? - hive->hi_req_gpu : adev; + struct amdgpu_hive_info *hive; + struct amdgpu_device *request_adev; bool is_hi_req = pstate == AMDGPU_XGMI_PSTATE_MAX_VEGA20; - bool init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN; + bool init_low; + + hive = amdgpu_get_xgmi_hive(adev); + if (!hive) + return 0; + request_adev = hive->hi_req_gpu ? hive->hi_req_gpu : adev; + init_low = hive->pstate == AMDGPU_XGMI_PSTATE_UNKNOWN; amdgpu_put_xgmi_hive(hive); /* fw bug so temporarily disable pstate switching */ return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index fa817ebff980..6134ed964027 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -171,7 +171,6 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode struct backlight_properties props; struct amdgpu_backlight_privdata *pdata; struct amdgpu_encoder_atom_dig *dig; - u8 backlight_level; char bl_name[16]; /* Mac laptops with multiple GPUs use the gmux driver for backlight @@ -207,8 +206,6 @@ void amdgpu_atombios_encoder_init_backlight(struct amdgpu_encoder *amdgpu_encode pdata->encoder = amdgpu_encoder; - backlight_level = amdgpu_atombios_encoder_get_backlight_level_from_reg(adev); - dig = amdgpu_encoder->enc_priv; dig->bl_dev = bd; diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 5442df094102..13737b317f7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1336,10 +1336,6 @@ cik_asic_reset_method(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_BONAIRE: - /* disable baco reset until it works */ - /* smu7_asic_get_baco_capability(adev, &baco_reset); */ - baco_reset = false; - break; case CHIP_HAWAII: baco_reset = cik_asic_supports_baco(adev); break; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index db953e95f3d2..d3745711d55f 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -177,6 +177,7 @@ static void cik_ih_irq_disable(struct amdgpu_device *adev) * cik_ih_get_wptr - get the IH ring buffer wptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr * * Get the IH ring buffer wptr from either the register * or the writeback memory buffer (CIK). Also check for @@ -266,6 +267,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev, * cik_ih_set_rptr - set the IH ring buffer rptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set wptr * * Set the IH ring buffer rptr. */ diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 1a6494ea5091..43b978144b79 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -215,7 +215,9 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine * * @ring: amdgpu ring pointer + * @job: job to retrive vmid from * @ib: IB object to schedule + * @flags: unused * * Schedule an IB in the DMA ring (CIK). */ @@ -267,7 +269,9 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring) * cik_sdma_ring_emit_fence - emit a fence on the DMA ring * * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate @@ -655,6 +659,7 @@ error_free_wb: * cik_sdma_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test a simple IB in the DMA ring (CIK). * Returns 0 on success, error on failure. @@ -801,6 +806,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, /** * cik_sdma_vm_pad_ib - pad the IB to the required number of dw * + * @ring: amdgpu_ring structure holding ring information * @ib: indirect buffer to fill with padding * */ @@ -849,7 +855,8 @@ static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * cik_sdma_ring_emit_vm_flush - cik vm flush using sDMA * * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using sDMA (CIK). @@ -1298,10 +1305,11 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev) /** * cik_sdma_emit_copy_buffer - copy buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer + * @tmz: is this a secure operation * * Copy GPU buffers using the DMA engine (CIK). * Used by the amdgpu ttm implementation to move pages if @@ -1325,7 +1333,7 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, /** * cik_sdma_emit_fill_buffer - fill buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to fill * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index 1dca0cabc326..da37f8a900af 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -179,6 +179,7 @@ static void cz_ih_irq_disable(struct amdgpu_device *adev) * cz_ih_get_wptr - get the IH ring buffer wptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr * * Get the IH ring buffer wptr from either the register * or the writeback memory buffer (VI). Also check for @@ -213,6 +214,8 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, * cz_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to decode + * @entry: IV entry to place decoded information into * * Decodes the interrupt vector at the current rptr * position and also advance the position. @@ -245,6 +248,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev, * cz_ih_set_rptr - set the IH ring buffer rptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set rptr * * Set the IH ring buffer rptr. */ diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index da240f8fafcf..7944781e1086 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -228,6 +228,7 @@ static void dce_v10_0_pageflip_interrupt_fini(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @crtc_id: crtc to cleanup pageflip on * @crtc_base: new address of the crtc (GPU MC address) + * @async: asynchronous flip * * Triggers the actual pageflip by updating the primary * surface base address. diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index c62c56a69fda..1b6ff0470011 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -246,6 +246,7 @@ static void dce_v11_0_pageflip_interrupt_fini(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @crtc_id: crtc to cleanup pageflip on * @crtc_base: new address of the crtc (GPU MC address) + * @async: asynchronous flip * * Triggers the actual pageflip by updating the primary * surface base address. diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 943976349346..83a88385b762 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -180,6 +180,7 @@ static void dce_v6_0_pageflip_interrupt_fini(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @crtc_id: crtc to cleanup pageflip on * @crtc_base: new address of the crtc (GPU MC address) + * @async: asynchronous flip * * Does the actual pageflip (evergreen+). * During vblank we take the crtc lock and wait for the update_pending @@ -1047,7 +1048,6 @@ static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev, /** - * * dce_v6_0_bandwidth_update - program display watermarks * * @adev: amdgpu_device pointer diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 7973183fa335..224b30214427 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -176,6 +176,7 @@ static void dce_v8_0_pageflip_interrupt_fini(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @crtc_id: crtc to cleanup pageflip on * @crtc_base: new address of the crtc (GPU MC address) + * @async: asynchronous flip * * Triggers the actual pageflip by updating the primary * surface base address. diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index b4d4b76538d2..ffcc64ec6473 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -139,9 +139,6 @@ static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode) struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); unsigned type; - if (amdgpu_sriov_vf(adev)) - return; - switch (mode) { case DRM_MODE_DPMS_ON: amdgpu_crtc->enabled = true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 8c3bad3dfc01..ba1086784525 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -155,6 +155,11 @@ #define mmCGTT_SPI_CS_CLK_CTRL 0x507c #define mmCGTT_SPI_CS_CLK_CTRL_BASE_IDX 1 +#define mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid 0x16f3 +#define mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid_BASE_IDX 0 +#define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid 0x15db +#define mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid_BASE_IDX 0 + MODULE_FIRMWARE("amdgpu/navi10_ce.bin"); MODULE_FIRMWARE("amdgpu/navi10_pfp.bin"); MODULE_FIRMWARE("amdgpu/navi10_me.bin"); @@ -1404,23 +1409,14 @@ static void gfx_v10_rlcg_wreg(struct amdgpu_device *adev, u32 offset, u32 v) { static void *scratch_reg0; static void *scratch_reg1; - static void *scratch_reg2; - static void *scratch_reg3; static void *spare_int; - static uint32_t grbm_cntl; - static uint32_t grbm_idx; uint32_t i = 0; uint32_t retries = 50000; scratch_reg0 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG0_BASE_IDX] + mmSCRATCH_REG0)*4; scratch_reg1 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG1)*4; - scratch_reg2 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG2)*4; - scratch_reg3 = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmSCRATCH_REG1_BASE_IDX] + mmSCRATCH_REG3)*4; spare_int = adev->rmmio + (adev->reg_offset[GC_HWIP][0][mmRLC_SPARE_INT_BASE_IDX] + mmRLC_SPARE_INT)*4; - grbm_cntl = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_CNTL_BASE_IDX] + mmGRBM_GFX_CNTL; - grbm_idx = adev->reg_offset[GC_HWIP][0][mmGRBM_GFX_INDEX_BASE_IDX] + mmGRBM_GFX_INDEX; - if (amdgpu_sriov_runtime(adev)) { pr_err("shouldn't call rlcg write register during runtime\n"); return; @@ -3143,7 +3139,11 @@ static const struct soc15_reg_golden golden_settings_gc_10_3[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0xffffffff, 0x00000280), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), + SOC15_REG_GOLDEN_VALUE(GC, 0 ,mmGCEA_SDP_TAG_RESERVE0, 0xffffffff, 0x10100100), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_SDP_TAG_RESERVE1, 0xffffffff, 0x17000088), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xff000000, 0xff008080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xff000000, 0xff008080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), @@ -3182,6 +3182,7 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_sienna_cichlid[] = static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0xff7f0fff, 0x78000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_PS_CLK_CTRL, 0xff7f0fff, 0x78000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0xff7f0fff, 0x30000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0xff7f0fff, 0x7e000100), @@ -3190,6 +3191,8 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_2[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG4, 0xffffffff, 0x00800000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_EXCEPTION_CONTROL, 0x7fff0f1f, 0x00b80000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCR_GENERAL_CNTL_Sienna_Cichlid, 0x1ff1ffff, 0x00000500), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCUTCL2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xffffffff, 0xff008080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCVM_L2_CGTT_CLK_CTRL_Sienna_Cichlid, 0xffff8fff, 0xff008080), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGE_PC_CNTL, 0x003fffff, 0x00280400), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2A_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_ADDR_MATCH_MASK, 0xffffffff, 0xffffffcf), @@ -3250,10 +3253,14 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_vangogh[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfff7ffff, 0x01030000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0xffffffff, 0x00400000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), + + /* This is not in GDB yet. Don't remove it. It fixes a GPU hang on VanGogh. */ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020), }; static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_CS_CLK_CTRL, 0x78000000, 0x78000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA0_CLK_CTRL, 0x30000000, 0x30000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCGTT_SPI_RA1_CLK_CTRL, 0x7e000000, 0x7e000100), SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_GCR_CNTL, 0x0007ffff, 0x0000c000), @@ -3286,7 +3293,8 @@ static const struct soc15_reg_golden golden_settings_gc_10_3_4[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER8_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_PERFCOUNTER9_SELECT, 0xf0f001ff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x01030000, 0x01030000), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x03a00000, 0x00a00000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmUTCL1_CTRL, 0x03a00000, 0x00a00000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmLDS_CONFIG, 0x00000020, 0x00000020) }; #define DEFAULT_SH_MEM_CONFIG \ @@ -4472,7 +4480,8 @@ static int gfx_v10_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; - hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue) ? AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, @@ -4835,7 +4844,7 @@ static void gfx_v10_0_init_gds_vmid(struct amdgpu_device *adev) * the driver can enable them for graphics. VMID0 should maintain * access so that HWS firmware can save/restore entries. */ - for (vmid = 1; vmid < 16; vmid++) { + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); @@ -5005,7 +5014,7 @@ static int gfx_v10_0_init_csb(struct amdgpu_device *adev) return 0; } -void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) +static void gfx_v10_0_rlc_stop(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); @@ -5986,17 +5995,19 @@ static void gfx_v10_0_cp_gfx_set_doorbell(struct amdgpu_device *adev, { u32 tmp; - tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); - if (ring->use_doorbell) { - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_EN, 1); - } else { - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_EN, 0); + if (!amdgpu_async_gfx_ring) { + tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL); + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 1); + } else { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 0); + } + WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); } - WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp); switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: @@ -6340,6 +6351,11 @@ static int gfx_v10_0_gfx_mqd_init(struct amdgpu_ring *ring) DOORBELL_EN, 0); mqd->cp_rb_doorbell_control = tmp; + /*if there are 2 gfx rings, set the lower doorbell range of the first ring, + *otherwise the range of the second ring will override the first ring */ + if (ring->doorbell_index == adev->doorbell_index.gfx_ring0 << 1) + gfx_v10_0_cp_gfx_set_doorbell(adev, ring); + /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ ring->wptr = 0; mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, mmCP_GFX_HQD_RPTR); @@ -6507,7 +6523,8 @@ static void gfx_v10_0_compute_mqd_set_priority(struct amdgpu_ring *ring, struct struct amdgpu_device *adev = ring->adev; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { + if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue)) { mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; mqd->cp_hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; @@ -8647,6 +8664,7 @@ static int gfx_v10_0_set_priv_inst_fault_state(struct amdgpu_device *adev, WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 671c46ebeced..ca74638dec9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1894,6 +1894,7 @@ static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring, * gfx_v6_0_ring_test_ib - basic ring IB test * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Allocate an IB and execute it on the gfx ring (SI). * Provides a basic gfx ring test to verify that IBs are working. diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index cb07bc21dcbe..a368724c3dfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -1580,10 +1580,10 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @se_num: shader engine to address * @sh_num: sh block to address + * @instance: Certain registers are instanced per SE or SH. + * 0xffffffff means broadcast to all SEs or SHs (CIK). * - * Select which SE, SH combinations to address. Certain - * registers are instanced per SE or SH. 0xffffffff means - * broadcast to all SEs or SHs (CIK). + * Select which SE, SH combinations to address. */ static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance) @@ -1779,8 +1779,6 @@ gfx_v7_0_write_harvested_raster_configs(struct amdgpu_device *adev, * gfx_v7_0_setup_rb - setup the RBs on the asic * * @adev: amdgpu_device pointer - * @se_num: number of SEs (shader engines) for the asic - * @sh_per_se: number of SH blocks per SE for the asic * * Configures per-SE/SH RB registers (CIK). */ @@ -1841,6 +1839,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } +#define DEFAULT_SH_MEM_BASES (0x6000) /** * gfx_v7_0_init_compute_vmid - gart enable * @@ -1849,7 +1848,6 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) * Initialize compute vmid sh_mem registers * */ -#define DEFAULT_SH_MEM_BASES (0x6000) static void gfx_v7_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -1898,7 +1896,7 @@ static void gfx_v7_0_init_gds_vmid(struct amdgpu_device *adev) * the driver can enable them for graphics. VMID0 should maintain * access so that HWS firmware can save/restore entries. */ - for (vmid = 1; vmid < 16; vmid++) { + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); @@ -2074,7 +2072,6 @@ static void gfx_v7_0_scratch_init(struct amdgpu_device *adev) /** * gfx_v7_0_ring_test_ring - basic gfx ring test * - * @adev: amdgpu_device pointer * @ring: amdgpu_ring structure holding ring information * * Allocate a scratch register and write to it using the gfx ring (CIK). @@ -2121,8 +2118,7 @@ error_free_scratch: /** * gfx_v7_0_ring_emit_hdp - emit an hdp flush on the cp * - * @adev: amdgpu_device pointer - * @ridx: amdgpu ring index + * @ring: amdgpu_ring structure holding ring information * * Emits an hdp flush on the cp. */ @@ -2171,8 +2167,10 @@ static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) /** * gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring * - * @adev: amdgpu_device pointer - * @fence: amdgpu fence object + * @ring: amdgpu_ring structure holding ring information + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Emits a fence sequnce number on the gfx ring and flushes * GPU caches. @@ -2212,8 +2210,10 @@ static void gfx_v7_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, /** * gfx_v7_0_ring_emit_fence_compute - emit a fence on the compute ring * - * @adev: amdgpu_device pointer - * @fence: amdgpu fence object + * @ring: amdgpu_ring structure holding ring information + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Emits a fence sequnce number on the compute ring and flushes * GPU caches. @@ -2245,7 +2245,9 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring, * gfx_v7_0_ring_emit_ib - emit an IB (Indirect Buffer) on the ring * * @ring: amdgpu_ring structure holding ring information + * @job: job to retrive vmid from * @ib: amdgpu indirect buffer object + * @flags: options (AMDGPU_HAVE_CTX_SWITCH) * * Emits an DE (drawing engine) or CE (constant engine) IB * on the gfx ring. IBs are usually generated by userspace @@ -2342,6 +2344,7 @@ static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) * gfx_v7_0_ring_test_ib - basic ring IB test * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Allocate an IB and execute it on the gfx ring (CIK). * Provides a basic gfx ring test to verify that IBs are working. @@ -3234,7 +3237,9 @@ static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) /** * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP * - * @adev: amdgpu_device pointer + * @ring: amdgpu_ring pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using the CP (CIK). @@ -5208,15 +5213,6 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) cu_info->lds_size = 64; } -static const struct amdgpu_ip_block_version gfx_v7_0_ip_block = -{ - .type = AMD_IP_BLOCK_TYPE_GFX, - .major = 7, - .minor = 0, - .rev = 0, - .funcs = &gfx_v7_0_ip_funcs, -}; - const struct amdgpu_ip_block_version gfx_v7_1_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h index 6fb9c1524691..eedce7d007f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h @@ -24,7 +24,6 @@ #ifndef __GFX_V7_0_H__ #define __GFX_V7_0_H__ -extern const struct amdgpu_ip_block_version gfx_v7_0_ip_block; extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block; extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block; extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index c3fff49e6514..37639214cbbb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1923,7 +1923,8 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; - hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue) ? AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, @@ -3686,6 +3687,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) mutex_unlock(&adev->grbm_idx_mutex); } +#define DEFAULT_SH_MEM_BASES (0x6000) /** * gfx_v8_0_init_compute_vmid - gart enable * @@ -3694,7 +3696,6 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) * Initialize compute vmid sh_mem registers * */ -#define DEFAULT_SH_MEM_BASES (0x6000) static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev) { int i; @@ -3748,7 +3749,7 @@ static void gfx_v8_0_init_gds_vmid(struct amdgpu_device *adev) * the driver can enable them for graphics. VMID0 should maintain * access so that HWS firmware can save/restore entries. */ - for (vmid = 1; vmid < 16; vmid++) { + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { WREG32(amdgpu_gds_reg_offset[vmid].mem_base, 0); WREG32(amdgpu_gds_reg_offset[vmid].mem_size, 0); WREG32(amdgpu_gds_reg_offset[vmid].gws, 0); @@ -4441,7 +4442,8 @@ static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *m struct amdgpu_device *adev = ring->adev; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { + if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue)) { mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; mqd->cp_hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3d6fb5a514c8..fc9bb94eaaf4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2228,7 +2228,8 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; - hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue) ? + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, + ring->queue) ? AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL; /* type-2 packets are deprecated on MEC, use type-3 instead */ return amdgpu_ring_init(adev, ring, 1024, @@ -2519,7 +2520,7 @@ static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev) * the driver can enable them for graphics. VMID0 should maintain * access so that HWS firmware can save/restore entries. */ - for (vmid = 1; vmid < 16; vmid++) { + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0); WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0); @@ -2992,7 +2993,7 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) } } -void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) +static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) { WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0); gfx_v9_0_enable_gui_idle_interrupt(adev, false); @@ -3383,7 +3384,9 @@ static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *m struct amdgpu_device *adev = ring->adev; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->queue)) { + if (amdgpu_gfx_is_high_priority_compute_queue(adev, + ring->pipe, + ring->queue)) { mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; mqd->cp_hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; @@ -5684,6 +5687,7 @@ static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev, WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index d83577b77a66..6ddd53ba8b77 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -31,13 +31,14 @@ #include "soc15_common.h" -u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev) +static u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev) { return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24; } -void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, - uint64_t page_table_base) +static void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, + uint32_t vmid, + uint64_t page_table_base) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; @@ -275,7 +276,7 @@ static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev) } } -int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) +static int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) { if (amdgpu_sriov_vf(adev) && adev->asic_type != CHIP_ARCTURUS) { /* @@ -305,7 +306,7 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) return 0; } -void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) +static void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; u32 tmp; @@ -336,8 +337,8 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * @value: true redirects VM faults to the default page */ -void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, - bool value) +static void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, + bool value) { u32 tmp; tmp = RREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); @@ -374,7 +375,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); } -void gfxhub_v1_0_init(struct amdgpu_device *adev) +static void gfxhub_v1_0_init(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB_0]; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h index 0c46672bbf49..3174bc5766fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h @@ -24,14 +24,6 @@ #ifndef __GFXHUB_V1_0_H__ #define __GFXHUB_V1_0_H__ -int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev); -void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev); -void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, - bool value); -void gfxhub_v1_0_init(struct amdgpu_device *adev); -u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev); -void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, - uint64_t page_table_base); - extern const struct amdgpu_gfxhub_funcs gfxhub_v1_0_funcs; + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c index 456360bf58fa..2aecc6a243e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_0.c @@ -153,16 +153,16 @@ static void gfxhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; if (!amdgpu_sriov_vf(adev)) { - /* Disable AGP. */ + /* Program the AGP BAR */ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0); - WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, 0); - WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, 0x00FFFFFF); + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); /* Program the system aperture low logical page number. */ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, - adev->gmc.vram_start >> 18); + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - adev->gmc.vram_end >> 18); + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index 724bb29e9bb4..410fd3a1a388 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -152,16 +152,16 @@ static void gfxhub_v2_1_init_system_aperture_regs(struct amdgpu_device *adev) { uint64_t value; - /* Disable AGP. */ + /* Program the AGP BAR */ WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BASE, 0); - WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, 0); - WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, 0x00FFFFFF); + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(GC, 0, mmGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); /* Program the system aperture low logical page number. */ WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, - adev->gmc.vram_start >> 18); + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); WREG32_SOC15(GC, 0, mmGCMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - adev->gmc.vram_end >> 18); + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index d9399324be47..5648c48be77f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -94,13 +94,39 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + bool retry_fault = !!(entry->src_data[1] & 0x80); struct amdgpu_vmhub *hub = &adev->vmhub[entry->vmid_src]; + struct amdgpu_task_info task_info; uint32_t status = 0; u64 addr; addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; + if (retry_fault) { + /* Returning 1 here also prevents sending the IV to the KFD */ + + /* Process it onyl if it's the first fault for this address */ + if (entry->ih != &adev->irq.ih_soft && + amdgpu_gmc_filter_faults(adev, addr, entry->pasid, + entry->timestamp)) + return 1; + + /* Delegate it to a different ring if the hardware hasn't + * already done it. + */ + if (in_interrupt()) { + amdgpu_irq_delegate(adev, entry, 8); + return 1; + } + + /* Try to handle the recoverable page faults by filling page + * tables + */ + if (amdgpu_vm_handle_fault(adev, entry->pasid, addr)) + return 1; + } + if (!amdgpu_sriov_vf(adev)) { /* * Issue a dummy read to wait for the status register to @@ -115,24 +141,25 @@ static int gmc_v10_0_process_interrupt(struct amdgpu_device *adev, WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); } - if (printk_ratelimit()) { - struct amdgpu_task_info task_info; - - memset(&task_info, 0, sizeof(struct amdgpu_task_info)); - amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - - dev_err(adev->dev, - "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, " - "for process %s pid %d thread %s pid %d)\n", - entry->vmid_src ? "mmhub" : "gfxhub", - entry->src_id, entry->ring_id, entry->vmid, - entry->pasid, task_info.process_name, task_info.tgid, - task_info.task_name, task_info.pid); - dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", - addr, entry->client_id); - if (!amdgpu_sriov_vf(adev)) - hub->vmhub_funcs->print_l2_protection_fault_status(adev, status); - } + if (!printk_ratelimit()) + return 0; + + memset(&task_info, 0, sizeof(struct amdgpu_task_info)); + amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); + + dev_err(adev->dev, + "[%s] page fault (src_id:%u ring:%u vmid:%u pasid:%u, " + "for process %s pid %d thread %s pid %d)\n", + entry->vmid_src ? "mmhub" : "gfxhub", + entry->src_id, entry->ring_id, entry->vmid, + entry->pasid, task_info.process_name, task_info.tgid, + task_info.task_name, task_info.pid); + dev_err(adev->dev, " in page starting at address 0x%012llx from client %d\n", + addr, entry->client_id); + + if (!amdgpu_sriov_vf(adev)) + hub->vmhub_funcs->print_l2_protection_fault_status(adev, + status); return 0; } @@ -270,6 +297,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, * * @adev: amdgpu_device pointer * @vmid: vm instance to flush + * @vmhub: vmhub type + * @flush_type: the flush type * * Flush the TLB for the requested page table. */ @@ -362,6 +391,8 @@ error_alloc: * * @adev: amdgpu_device pointer * @pasid: pasid to be flush + * @flush_type: the flush type + * @all_hub: Used with PACKET3_INVALIDATE_TLBS_ALL_HUB() * * Flush the TLB for the requested pasid. */ @@ -401,7 +432,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, return 0; } - for (vmid = 1; vmid < 16; vmid++) { + for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) { ret = gmc_v10_0_get_atc_vmid_pasid_mapping_info(adev, vmid, &queried_pasid); @@ -686,8 +717,6 @@ static int gmc_v10_0_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - amdgpu_bo_late_init(adev); - r = amdgpu_gmc_allocate_vm_inv_eng(adev); if (r) return r; @@ -711,6 +740,7 @@ static void gmc_v10_0_vram_gtt_location(struct amdgpu_device *adev, amdgpu_gmc_vram_location(adev, &adev->gmc, base); amdgpu_gmc_gart_location(adev, mc); + amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ adev->vm_manager.vram_base_offset = adev->gfxhub.funcs->get_mc_fb_offset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 95a9117e9564..f5b69484c45a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -530,7 +530,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) * the VMs are determined by the application and setup and assigned * on the fly in the vm part of radeon_gart.c */ - for (i = 1; i < 16; i++) { + for (i = 1; i < AMDGPU_NUM_VMID; i++) { if (i < 8) WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i, table_addr >> 12); @@ -791,8 +791,6 @@ static int gmc_v6_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_bo_late_init(adev); - if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 80c146df338a..dee2b34effb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -424,6 +424,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * @pasid: pasid to be flush + * @flush_type: type of flush + * @all_hub: flush all hubs * * Flush the TLB for the requested pasid. */ @@ -463,7 +465,9 @@ static int gmc_v7_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vmid: vm instance to flush - * + * @vmhub: which hub to flush + * @flush_type: type of flush + * * * Flush the TLB for the requested page table (CIK). */ static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, @@ -673,7 +677,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) /* set vm size, must be a multiple of 4 */ WREG32(mmVM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); WREG32(mmVM_CONTEXT1_PAGE_TABLE_END_ADDR, adev->vm_manager.max_pfn - 1); - for (i = 1; i < 16; i++) { + for (i = 1; i < AMDGPU_NUM_VMID; i++) { if (i < 8) WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i, table_addr >> 12); @@ -763,6 +767,7 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev) * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value + * @pasid: debug logging only - no functional use * * Print human readable fault information (CIK). */ @@ -956,8 +961,6 @@ static int gmc_v7_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_bo_late_init(adev); - if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 0f32a8002c3d..2d832fc23119 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -609,6 +609,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * @pasid: pasid to be flush + * @flush_type: type of flush + * @all_hub: flush all hubs * * Flush the TLB for the requested pasid. */ @@ -649,6 +651,8 @@ static int gmc_v8_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vmid: vm instance to flush + * @vmhub: which hub to flush + * @flush_type: type of flush * * Flush the TLB for the requested page table (VI). */ @@ -899,7 +903,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) /* set vm size, must be a multiple of 4 */ WREG32(mmVM_CONTEXT1_PAGE_TABLE_START_ADDR, 0); WREG32(mmVM_CONTEXT1_PAGE_TABLE_END_ADDR, adev->vm_manager.max_pfn - 1); - for (i = 1; i < 16; i++) { + for (i = 1; i < AMDGPU_NUM_VMID; i++) { if (i < 8) WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i, table_addr >> 12); @@ -990,6 +994,7 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev) * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value * @mc_client: VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT register value + * @pasid: debug logging only - no functional use * * Print human readable fault information (VI). */ @@ -1057,8 +1062,6 @@ static int gmc_v8_0_late_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_bo_late_init(adev); - if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS) return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0); else diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 0c3421d587e8..e1531d97f486 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -379,41 +379,6 @@ static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = { (0x001d43e0 + 0x00001800), }; -static const uint32_t ecc_umc_mcumc_status_addrs[] = { - (0x000143c2 + 0x00000000), - (0x000143c2 + 0x00000800), - (0x000143c2 + 0x00001000), - (0x000143c2 + 0x00001800), - (0x000543c2 + 0x00000000), - (0x000543c2 + 0x00000800), - (0x000543c2 + 0x00001000), - (0x000543c2 + 0x00001800), - (0x000943c2 + 0x00000000), - (0x000943c2 + 0x00000800), - (0x000943c2 + 0x00001000), - (0x000943c2 + 0x00001800), - (0x000d43c2 + 0x00000000), - (0x000d43c2 + 0x00000800), - (0x000d43c2 + 0x00001000), - (0x000d43c2 + 0x00001800), - (0x001143c2 + 0x00000000), - (0x001143c2 + 0x00000800), - (0x001143c2 + 0x00001000), - (0x001143c2 + 0x00001800), - (0x001543c2 + 0x00000000), - (0x001543c2 + 0x00000800), - (0x001543c2 + 0x00001000), - (0x001543c2 + 0x00001800), - (0x001943c2 + 0x00000000), - (0x001943c2 + 0x00000800), - (0x001943c2 + 0x00001000), - (0x001943c2 + 0x00001800), - (0x001d43c2 + 0x00000000), - (0x001d43c2 + 0x00000800), - (0x001d43c2 + 0x00001000), - (0x001d43c2 + 0x00001800), -}; - static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned type, @@ -502,6 +467,7 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, WREG32(reg, tmp); } } + break; default: break; } @@ -524,14 +490,29 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; - if (retry_fault && amdgpu_gmc_filter_faults(adev, addr, entry->pasid, - entry->timestamp)) - return 1; /* This also prevents sending it to KFD */ + if (retry_fault) { + /* Returning 1 here also prevents sending the IV to the KFD */ - /* If it's the first fault for this address, process it normally */ - if (retry_fault && !in_interrupt() && - amdgpu_vm_handle_fault(adev, entry->pasid, addr)) - return 1; /* This also prevents sending it to KFD */ + /* Process it onyl if it's the first fault for this address */ + if (entry->ih != &adev->irq.ih_soft && + amdgpu_gmc_filter_faults(adev, addr, entry->pasid, + entry->timestamp)) + return 1; + + /* Delegate it to a different ring if the hardware hasn't + * already done it. + */ + if (in_interrupt()) { + amdgpu_irq_delegate(adev, entry, 8); + return 1; + } + + /* Try to handle the recoverable page faults by filling page + * tables + */ + if (amdgpu_vm_handle_fault(adev, entry->pasid, addr)) + return 1; + } if (!printk_ratelimit()) return 0; @@ -557,7 +538,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, entry->src_id, entry->ring_id, entry->vmid, entry->pasid, task_info.process_name, task_info.tgid, task_info.task_name, task_info.pid); - dev_err(adev->dev, " in page starting at address 0x%016llx from client %d\n", + dev_err(adev->dev, " in page starting at address 0x%012llx from client %d\n", addr, entry->client_id); if (amdgpu_sriov_vf(adev)) @@ -710,6 +691,7 @@ static bool gmc_v9_0_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @vmid: vm instance to flush + * @vmhub: which hub to flush * @flush_type: the flush type * * Flush the TLB for the requested page table using certain type. @@ -826,6 +808,8 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, * * @adev: amdgpu_device pointer * @pasid: pasid to be flush + * @flush_type: the flush type + * @all_hub: flush all hubs * * Flush the TLB for the requested pasid. */ @@ -1193,8 +1177,6 @@ static int gmc_v9_0_late_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; - amdgpu_bo_late_init(adev); - r = amdgpu_gmc_allocate_vm_inv_eng(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index a13dd9a51149..37d8b6ca4dab 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -179,6 +179,7 @@ static void iceland_ih_irq_disable(struct amdgpu_device *adev) * iceland_ih_get_wptr - get the IH ring buffer wptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr * * Get the IH ring buffer wptr from either the register * or the writeback memory buffer (VI). Also check for @@ -213,6 +214,8 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, * iceland_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to decode + * @entry: IV entry to place decoded information into * * Decodes the interrupt vector at the current rptr * position and also advance the position. @@ -245,6 +248,7 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev, * iceland_ih_set_rptr - set the IH ring buffer rptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set rptr * * Set the IH ring buffer rptr. */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index c600b61b5f45..7332a320ede8 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -210,7 +210,9 @@ static void jpeg_v1_0_decode_ring_insert_end(struct amdgpu_ring *ring) * jpeg_v1_0_decode_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -282,7 +284,9 @@ static void jpeg_v1_0_decode_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, * jpeg_v1_0_decode_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write ring commands to execute the indirect buffer. */ @@ -511,6 +515,7 @@ void jpeg_v1_0_sw_fini(void *handle) * jpeg_v1_0_start - start JPEG block * * @adev: amdgpu_device pointer + * @mode: SPG or DPG mode * * Setup and start the JPEG block */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 6b80dcea80ec..3b22953aa62e 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -489,7 +489,9 @@ void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) * jpeg_v2_0_dec_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -538,7 +540,9 @@ void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, * jpeg_v2_0_dec_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write ring commands to execute the indirect buffer. */ diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index 0309d84c887d..d7b39c07de20 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -344,7 +344,7 @@ static void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) u32 i; /* Disable all tables */ - for (i = 0; i < 16; i++) + for (i = 0; i < AMDGPU_NUM_VMID; i++) WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL, i * hub->ctx_distance, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 4ac8ac0c56c8..092ff2c43658 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -195,17 +195,17 @@ static void mmhub_v2_0_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; uint32_t tmp; - /* Disable AGP. */ + /* Program the AGP BAR */ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0); - WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, 0); - WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, 0x00FFFFFF); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); if (!amdgpu_sriov_vf(adev)) { /* Program the system aperture low logical page number. */ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_LOW_ADDR, - adev->gmc.vram_start >> 18); + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - adev->gmc.vram_end >> 18); + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); } /* Set default page address. */ @@ -421,7 +421,7 @@ static void mmhub_v2_0_gart_disable(struct amdgpu_device *adev) u32 i; /* Disable all tables */ - for (i = 0; i < 16; i++) + for (i = 0; i < AMDGPU_NUM_VMID; i++) WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, i * hub->ctx_distance, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 3a248c8cd0b9..b72c8e4ca36b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -152,14 +152,14 @@ static void mmhub_v2_3_init_system_aperture_regs(struct amdgpu_device *adev) /* Disable AGP. */ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BASE, 0); - WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, 0); - WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, 0x00FFFFFF); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(MMHUB, 0, mmMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); /* Program the system aperture low logical page number. */ WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_LOW_ADDR, - adev->gmc.vram_start >> 18); + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); WREG32_SOC15(MMHUB, 0, mmMMMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - adev->gmc.vram_end >> 18); + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + @@ -376,7 +376,7 @@ static void mmhub_v2_3_gart_disable(struct amdgpu_device *adev) u32 i; /* Disable all tables */ - for (i = 0; i < 16; i++) + for (i = 0; i < AMDGPU_NUM_VMID; i++) WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT0_CNTL, i * hub->ctx_distance, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 66748bb01b52..4a31737b6bb0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -405,7 +405,7 @@ static void mmhub_v9_4_gart_disable(struct amdgpu_device *adev) for (j = 0; j < MMHUB_NUM_INSTANCES; j++) { /* Disable all tables */ - for (i = 0; i < 16; i++) + for (i = 0; i < AMDGPU_NUM_VMID; i++) WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_CNTL, j * MMHUB_INSTANCE_REGISTER_OFFSET + diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 837769fcb35b..7ba229e43799 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -136,6 +136,9 @@ static void navi10_ih_enable_interrupts(struct amdgpu_device *adev) } adev->irq.ih2.enabled = true; } + + if (adev->irq.ih_soft.ring_size) + adev->irq.ih_soft.enabled = true; } /** @@ -442,6 +445,7 @@ static void navi10_ih_irq_disable(struct amdgpu_device *adev) * navi10_ih_get_wptr - get the IH ring buffer wptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr * * Get the IH ring buffer wptr from either the register * or the writeback memory buffer (NAVI10). Also check for @@ -502,6 +506,8 @@ out: * navi10_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to decode + * @entry: IV entry to place decoded information into * * Decodes the interrupt vector at the current rptr * position and also advance the position. @@ -545,6 +551,7 @@ static void navi10_ih_decode_iv(struct amdgpu_device *adev, * navi10_ih_irq_rearm - rearm IRQ if lost * * @adev: amdgpu_device pointer + * @ih: IH ring to match * */ static void navi10_ih_irq_rearm(struct amdgpu_device *adev, @@ -578,6 +585,7 @@ static void navi10_ih_irq_rearm(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * + * @ih: IH ring buffer to set rptr * Set the IH ring buffer rptr. */ static void navi10_ih_set_rptr(struct amdgpu_device *adev, @@ -695,6 +703,10 @@ static int navi10_ih_sw_init(void *handle) (adev->doorbell_index.ih + 2) << 1; } + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); + if (r) + return r; + r = amdgpu_irq_init(adev); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index e0048806afaa..b5c3db16c2b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -53,8 +53,17 @@ static void nbio_v2_3_remap_hdp_registers(struct amdgpu_device *adev) static u32 nbio_v2_3_get_rev_id(struct amdgpu_device *adev) { - u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); + u32 tmp; + /* + * guest vm gets 0xffffffff when reading RCC_DEV0_EPF0_STRAP0, + * therefore we force rev_id to 0 (which is the default value) + */ + if (amdgpu_sriov_vf(adev)) { + return 0; + } + + tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index e33d8022cc32..ac02dd707c44 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -535,8 +535,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) - else if (amdgpu_device_has_dc_support(adev) && - !nv_is_headless_sku(adev->pdev)) + else if (amdgpu_device_has_dc_support(adev)) amdgpu_device_ip_block_add(adev, &dm_ip_block); #endif amdgpu_device_ip_block_add(adev, &gfx_v10_0_ip_block); @@ -579,7 +578,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && - is_support_sw_smu(adev) && !amdgpu_sriov_vf(adev)) + is_support_sw_smu(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 03e88dbf92be..bd4248c93c49 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -62,7 +62,7 @@ MODULE_FIRMWARE("amdgpu/navy_flounder_ta.bin"); MODULE_FIRMWARE("amdgpu/vangogh_asd.bin"); MODULE_FIRMWARE("amdgpu/vangogh_toc.bin"); MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_sos.bin"); -MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_asd.bin"); +MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_ta.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 @@ -192,15 +192,11 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) break; case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: + case CHIP_DIMGREY_CAVEFISH: err = psp_init_sos_microcode(psp, chip_name); if (err) return err; - err = psp_init_ta_microcode(&adev->psp, chip_name); - if (err) - return err; - break; - case CHIP_DIMGREY_CAVEFISH: - err = psp_init_sos_microcode(psp, chip_name); + err = psp_init_ta_microcode(psp, chip_name); if (err) return err; break; @@ -224,7 +220,7 @@ out2: return err; } -int psp_v11_0_wait_for_bootloader(struct psp_context *psp) +static int psp_v11_0_wait_for_bootloader(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index dff5c15b4858..c4828bd3264b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -40,6 +40,7 @@ MODULE_FIRMWARE("amdgpu/renoir_asd.bin"); MODULE_FIRMWARE("amdgpu/renoir_ta.bin"); MODULE_FIRMWARE("amdgpu/green_sardine_asd.bin"); +MODULE_FIRMWARE("amdgpu/green_sardine_ta.bin"); /* address block */ #define smnMP1_FIRMWARE_FLAGS 0x3010024 diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 5f304d61999e..eb5dc6c5b46e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -243,7 +243,9 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine * * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from * @ib: IB object to schedule + * @flags: unused * * Schedule an IB in the DMA ring (VI). */ @@ -299,7 +301,9 @@ static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring) * sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring * * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate @@ -590,6 +594,7 @@ error_free_wb: * sdma_v2_4_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test a simple IB in the DMA ring (VI). * Returns 0 on success, error on failure. @@ -740,6 +745,7 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, /** * sdma_v2_4_ring_pad_ib - pad the IB to the required number of dw * + * @ring: amdgpu_ring structure holding ring information * @ib: indirect buffer to fill with padding * */ @@ -789,7 +795,8 @@ static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * sdma_v2_4_ring_emit_vm_flush - cik vm flush using sDMA * * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using sDMA (VI). @@ -1188,10 +1195,11 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev) /** * sdma_v2_4_emit_copy_buffer - copy buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer + * @tmz: unused * * Copy GPU buffers using the DMA engine (VI). * Used by the amdgpu ttm implementation to move pages if @@ -1216,7 +1224,7 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, /** * sdma_v2_4_emit_fill_buffer - fill buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index c59f6f6f4c09..ad308d8c6d30 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -417,7 +417,9 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine * * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from * @ib: IB object to schedule + * @flags: unused * * Schedule an IB in the DMA ring (VI). */ @@ -473,7 +475,9 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) * sdma_v3_0_ring_emit_fence - emit a fence on the DMA ring * * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate @@ -862,6 +866,7 @@ error_free_wb: * sdma_v3_0_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test a simple IB in the DMA ring (VI). * Returns 0 on success, error on failure. @@ -1011,6 +1016,7 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, /** * sdma_v3_0_ring_pad_ib - pad the IB to the required number of dw * + * @ring: amdgpu_ring structure holding ring information * @ib: indirect buffer to fill with padding * */ @@ -1060,7 +1066,8 @@ static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * sdma_v3_0_ring_emit_vm_flush - cik vm flush using sDMA * * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using sDMA (VI). @@ -1626,10 +1633,11 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev) /** * sdma_v3_0_emit_copy_buffer - copy buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer + * @tmz: unused * * Copy GPU buffers using the DMA engine (VI). * Used by the amdgpu ttm implementation to move pages if @@ -1654,7 +1662,7 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, /** * sdma_v3_0_emit_fill_buffer - fill buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index dc74ca2aa892..ce56e93c6886 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -593,9 +593,6 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; - if (amdgpu_sriov_vf(adev)) - return 0; - DRM_DEBUG("\n"); switch (adev->asic_type) { @@ -837,7 +834,9 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * sdma_v4_0_ring_emit_ib - Schedule an IB on the DMA engine * * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from * @ib: IB object to schedule + * @flags: unused * * Schedule an IB in the DMA ring (VEGA10). */ @@ -912,7 +911,9 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) * sdma_v4_0_ring_emit_fence - emit a fence on the DMA ring * * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate @@ -1110,7 +1111,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) } } -/** +/* * sdma_v4_0_rb_cntl - get parameters for rb_cntl */ static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) @@ -1573,6 +1574,7 @@ error_free_wb: * sdma_v4_0_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test a simple IB in the DMA ring (VEGA10). * Returns 0 on success, error on failure. @@ -1669,10 +1671,9 @@ static void sdma_v4_0_vm_copy_pte(struct amdgpu_ib *ib, * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry - * @addr: dst addr to write into pe + * @value: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes - * @flags: access flags * * Update PTEs by writing them manually using sDMA (VEGA10). */ @@ -1727,8 +1728,8 @@ static void sdma_v4_0_vm_set_pte_pde(struct amdgpu_ib *ib, /** * sdma_v4_0_ring_pad_ib - pad the IB to the required number of dw * + * @ring: amdgpu_ring structure holding ring information * @ib: indirect buffer to fill with padding - * */ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { @@ -1772,7 +1773,8 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * sdma_v4_0_ring_emit_vm_flush - vm flush using sDMA * * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using sDMA (VEGA10). @@ -2491,10 +2493,11 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) /** * sdma_v4_0_emit_copy_buffer - copy buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer + * @tmz: if a secure copy should be used * * Copy GPU buffers using the DMA engine (VEGA10/12). * Used by the amdgpu ttm implementation to move pages if @@ -2520,7 +2523,7 @@ static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib, /** * sdma_v4_0_emit_fill_buffer - fill buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 9c72b95b7463..b208b81005bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -203,7 +203,7 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev) const struct common_firmware_header *header = NULL; const struct sdma_firmware_header_v1_0 *hdr; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_NAVI12)) return 0; DRM_DEBUG("\n"); @@ -392,7 +392,9 @@ static void sdma_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * sdma_v5_0_ring_emit_ib - Schedule an IB on the DMA engine * * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from * @ib: IB object to schedule + * @flags: unused * * Schedule an IB in the DMA ring (NAVI10). */ @@ -469,7 +471,9 @@ static void sdma_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) * sdma_v5_0_ring_emit_fence - emit a fence on the DMA ring * * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate @@ -959,6 +963,7 @@ static int sdma_v5_0_ring_test_ring(struct amdgpu_ring *ring) * sdma_v5_0_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test a simple IB in the DMA ring (NAVI10). * Returns 0 on success, error on failure. @@ -1061,10 +1066,9 @@ static void sdma_v5_0_vm_copy_pte(struct amdgpu_ib *ib, * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry - * @addr: dst addr to write into pe + * @value: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes - * @flags: access flags * * Update PTEs by writing them manually using sDMA (NAVI10). */ @@ -1118,6 +1122,7 @@ static void sdma_v5_0_vm_set_pte_pde(struct amdgpu_ib *ib, /** * sdma_v5_0_ring_pad_ib - pad the IB + * @ring: amdgpu_ring structure holding ring information * @ib: indirect buffer to fill with padding * * Pad the IB with NOPs to a boundary multiple of 8. @@ -1170,7 +1175,8 @@ static void sdma_v5_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * sdma_v5_0_ring_emit_vm_flush - vm flush using sDMA * * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using sDMA (NAVI10). @@ -1686,10 +1692,11 @@ static void sdma_v5_0_set_irq_funcs(struct amdgpu_device *adev) /** * sdma_v5_0_emit_copy_buffer - copy buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer + * @tmz: if a secure copy should be used * * Copy GPU buffers using the DMA engine (NAVI10). * Used by the amdgpu ttm implementation to move pages if @@ -1715,7 +1722,7 @@ static void sdma_v5_0_emit_copy_buffer(struct amdgpu_ib *ib, /** * sdma_v5_0_emit_fill_buffer - fill buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to fill * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index cb5a6f1437f8..39e17aae655f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -153,9 +153,6 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; - if (amdgpu_sriov_vf(adev)) - return 0; - DRM_DEBUG("\n"); switch (adev->asic_type) { @@ -197,7 +194,7 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) if (err) goto out; - err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[0]); + err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[i]); if (err) goto out; } @@ -356,7 +353,9 @@ static void sdma_v5_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * sdma_v5_2_ring_emit_ib - Schedule an IB on the DMA engine * * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from * @ib: IB object to schedule + * @flags: unused * * Schedule an IB in the DMA ring. */ @@ -418,7 +417,9 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring) * sdma_v5_2_ring_emit_fence - emit a fence on the DMA ring * * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate @@ -916,6 +917,7 @@ static int sdma_v5_2_ring_test_ring(struct amdgpu_ring *ring) * sdma_v5_2_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test a simple IB in the DMA ring. * Returns 0 on success, error on failure. @@ -1017,10 +1019,9 @@ static void sdma_v5_2_vm_copy_pte(struct amdgpu_ib *ib, * * @ib: indirect buffer to fill with commands * @pe: addr of the page entry - * @addr: dst addr to write into pe + * @value: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes - * @flags: access flags * * Update PTEs by writing them manually using sDMA. */ @@ -1076,6 +1077,7 @@ static void sdma_v5_2_vm_set_pte_pde(struct amdgpu_ib *ib, * sdma_v5_2_ring_pad_ib - pad the IB * * @ib: indirect buffer to fill with padding + * @ring: amdgpu_ring structure holding ring information * * Pad the IB with NOPs to a boundary multiple of 8. */ @@ -1127,7 +1129,8 @@ static void sdma_v5_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * sdma_v5_2_ring_emit_vm_flush - vm flush using sDMA * * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using sDMA. @@ -1700,10 +1703,11 @@ static void sdma_v5_2_set_irq_funcs(struct amdgpu_device *adev) /** * sdma_v5_2_emit_copy_buffer - copy buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer + * @tmz: if a secure copy should be used * * Copy GPU buffers using the DMA engine. * Used by the amdgpu ttm implementation to move pages if @@ -1729,7 +1733,7 @@ static void sdma_v5_2_emit_copy_buffer(struct amdgpu_ib *ib, /** * sdma_v5_2_emit_fill_buffer - fill buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to fill * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 7d2bbcbe547b..488497ad5e0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -81,7 +81,9 @@ static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, * si_dma_ring_emit_fence - emit a fence on the DMA ring * * @ring: amdgpu ring pointer - * @fence: amdgpu fence object + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Add a DMA fence packet to the ring to write * the fence seq number and DMA trap packet to generate @@ -244,6 +246,7 @@ error_free_wb: * si_dma_ring_test_ib - test an IB on the DMA engine * * @ring: amdgpu_ring structure holding ring information + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Test a simple IB in the DMA ring (VI). * Returns 0 on success, error on failure. @@ -401,6 +404,7 @@ static void si_dma_vm_set_pte_pde(struct amdgpu_ib *ib, /** * si_dma_pad_ib - pad the IB to the required number of dw * + * @ring: amdgpu_ring pointer * @ib: indirect buffer to fill with padding * */ @@ -436,7 +440,8 @@ static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring) * si_dma_ring_emit_vm_flush - cik vm flush using sDMA * * @ring: amdgpu_ring pointer - * @vm: amdgpu_vm pointer + * @vmid: vmid number to use + * @pd_addr: address * * Update the page table base and flush the VM TLB * using sDMA (VI). @@ -764,10 +769,11 @@ static void si_dma_set_irq_funcs(struct amdgpu_device *adev) /** * si_dma_emit_copy_buffer - copy buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_offset: src GPU address * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer + * @tmz: is this a secure operation * * Copy GPU buffers using the DMA engine (VI). * Used by the amdgpu ttm implementation to move pages if @@ -790,7 +796,7 @@ static void si_dma_emit_copy_buffer(struct amdgpu_ib *ib, /** * si_dma_emit_fill_buffer - fill buffer using the sDMA engine * - * @ring: amdgpu_ring structure holding ring information + * @ib: indirect buffer to copy to * @src_data: value to write to buffer * @dst_offset: dst GPU address * @byte_count: number of bytes to xfer diff --git a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c index 7fb240c4990c..5c7d769aee3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/smu_v11_0_i2c.c @@ -212,6 +212,7 @@ static uint32_t smu_v11_0_i2c_poll_rx_status(struct i2c_adapter *control) /** * smu_v11_0_i2c_transmit - Send a block of data over the I2C bus to a slave device. * + * @control: I2C adapter reference * @address: The I2C address of the slave device. * @data: The data to transmit over the bus. * @numbytes: The amount of data to transmit. @@ -313,7 +314,9 @@ Err: /** * smu_v11_0_i2c_receive - Receive a block of data over the I2C bus from a slave device. * + * @control: I2C adapter reference * @address: The I2C address of the slave device. + * @data: Placeholder to store received data. * @numbytes: The amount of data to transmit. * @i2c_flag: Flags for transmission * diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c new file mode 100644 index 000000000000..e9c474c217ec --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.c @@ -0,0 +1,77 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "smuio_v11_0.h" +#include "smuio/smuio_11_0_0_offset.h" +#include "smuio/smuio_11_0_0_sh_mask.h" + +static u32 smuio_v11_0_get_rom_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX); +} + +static u32 smuio_v11_0_get_rom_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA); +} + +static void smuio_v11_0_update_rom_clock_gating(struct amdgpu_device *adev, bool enable) +{ + u32 def, data; + + /* enable/disable ROM CG is not supported on APU */ + if (adev->flags & AMD_IS_APU) + return; + + def = data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) + data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | + CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK); + else + data |= CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | + CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK; + + if (def != data) + WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); +} + +static void smuio_v11_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +{ + u32 data; + + /* CGTT_ROM_CLK_CTRL0 is not available for APU */ + if (adev->flags & AMD_IS_APU) + return; + + data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0); + if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) + *flags |= AMD_CG_SUPPORT_ROM_MGCG; +} + +const struct amdgpu_smuio_funcs smuio_v11_0_funcs = { + .get_rom_index_offset = smuio_v11_0_get_rom_index_offset, + .get_rom_data_offset = smuio_v11_0_get_rom_data_offset, + .update_rom_clock_gating = smuio_v11_0_update_rom_clock_gating, + .get_clock_gating_state = smuio_v11_0_get_clock_gating_state, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.h b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.h new file mode 100644 index 000000000000..43c4262f2b8b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v11_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMUIO_V11_0_H__ +#define __SMUIO_V11_0_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_smuio_funcs smuio_v11_0_funcs; + +#endif /* __SMUIO_V11_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c new file mode 100644 index 000000000000..8417890af227 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.c @@ -0,0 +1,77 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "smuio_v9_0.h" +#include "smuio/smuio_9_0_offset.h" +#include "smuio/smuio_9_0_sh_mask.h" + +static u32 smuio_v9_0_get_rom_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX); +} + +static u32 smuio_v9_0_get_rom_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA); +} + +static void smuio_v9_0_update_rom_clock_gating(struct amdgpu_device *adev, bool enable) +{ + u32 def, data; + + /* enable/disable ROM CG is not supported on APU */ + if (adev->flags & AMD_IS_APU) + return; + + def = data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) + data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | + CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK); + else + data |= CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | + CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK; + + if (def != data) + WREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0, data); +} + +static void smuio_v9_0_get_clock_gating_state(struct amdgpu_device *adev, u32 *flags) +{ + u32 data; + + /* CGTT_ROM_CLK_CTRL0 is not availabe for APUs */ + if (adev->flags & AMD_IS_APU) + return; + + data = RREG32_SOC15(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0); + if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) + *flags |= AMD_CG_SUPPORT_ROM_MGCG; +} + +const struct amdgpu_smuio_funcs smuio_v9_0_funcs = { + .get_rom_index_offset = smuio_v9_0_get_rom_index_offset, + .get_rom_data_offset = smuio_v9_0_get_rom_data_offset, + .update_rom_clock_gating = smuio_v9_0_update_rom_clock_gating, + .get_clock_gating_state = smuio_v9_0_get_clock_gating_state, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.h b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.h new file mode 100644 index 000000000000..fc265ce9837d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v9_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __SMUIO_V9_0_H__ +#define __SMUIO_V9_0_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_smuio_funcs smuio_v9_0_funcs; + +#endif /* __SMUIO_V9_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 5b79ce9e0699..8a23636ecc27 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -42,8 +42,6 @@ #include "sdma1/sdma1_4_0_offset.h" #include "hdp/hdp_4_0_offset.h" #include "hdp/hdp_4_0_sh_mask.h" -#include "smuio/smuio_9_0_offset.h" -#include "smuio/smuio_9_0_sh_mask.h" #include "nbio/nbio_7_0_default.h" #include "nbio/nbio_7_0_offset.h" #include "nbio/nbio_7_0_sh_mask.h" @@ -71,6 +69,8 @@ #include "jpeg_v2_0.h" #include "vcn_v2_5.h" #include "jpeg_v2_5.h" +#include "smuio_v9_0.h" +#include "smuio_v11_0.h" #include "dce_virtual.h" #include "mxgpu_ai.h" #include "amdgpu_smu.h" @@ -91,12 +91,6 @@ #define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L #define mmHDP_MEM_POWER_CTRL_BASE_IDX 0 -/* for Vega20/arcturus regiter offset change */ -#define mmROM_INDEX_VG20 0x00e4 -#define mmROM_INDEX_VG20_BASE_IDX 0 -#define mmROM_DATA_VG20 0x00e5 -#define mmROM_DATA_VG20_BASE_IDX 0 - /* * Indirect registers accessor */ @@ -296,17 +290,10 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev, dw_ptr = (u32 *)bios; length_dw = ALIGN(length_bytes, 4) / 4; - switch (adev->asic_type) { - case CHIP_VEGA20: - case CHIP_ARCTURUS: - rom_index_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX_VG20); - rom_data_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA_VG20); - break; - default: - rom_index_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_INDEX); - rom_data_offset = SOC15_REG_OFFSET(SMUIO, 0, mmROM_DATA); - break; - } + rom_index_offset = + adev->smuio.funcs->get_rom_index_offset(adev); + rom_data_offset = + adev->smuio.funcs->get_rom_data_offset(adev); /* set rom index to 0 */ WREG32(rom_index_offset, 0); @@ -718,6 +705,12 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) else adev->df.funcs = &df_v1_7_funcs; + if (adev->asic_type == CHIP_VEGA20 || + adev->asic_type == CHIP_ARCTURUS) + adev->smuio.funcs = &smuio_v11_0_funcs; + else + adev->smuio.funcs = &smuio_v9_0_funcs; + adev->rev_id = soc15_get_rev_id(adev); switch (adev->asic_type) { @@ -1176,7 +1169,6 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_BIF_LS | AMD_CG_SUPPORT_HDP_LS | - AMD_CG_SUPPORT_ROM_MGCG | AMD_CG_SUPPORT_MC_MGCG | AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_SDMA_MGCG | @@ -1194,7 +1186,6 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_BIF_LS | AMD_CG_SUPPORT_HDP_LS | - AMD_CG_SUPPORT_ROM_MGCG | AMD_CG_SUPPORT_MC_MGCG | AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_SDMA_MGCG | @@ -1218,7 +1209,6 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_DRM_MGCG | AMD_CG_SUPPORT_DRM_LS | - AMD_CG_SUPPORT_ROM_MGCG | AMD_CG_SUPPORT_MC_MGCG | AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_SDMA_MGCG | @@ -1271,7 +1261,6 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_BIF_LS | AMD_CG_SUPPORT_HDP_LS | - AMD_CG_SUPPORT_ROM_MGCG | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG | AMD_CG_SUPPORT_IH_CG | @@ -1511,24 +1500,6 @@ static void soc15_update_drm_light_sleep(struct amdgpu_device *adev, bool enable WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL), data); } -static void soc15_update_rom_medium_grain_clock_gating(struct amdgpu_device *adev, - bool enable) -{ - uint32_t def, data; - - def = data = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0)); - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_ROM_MGCG)) - data &= ~(CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | - CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK); - else - data |= CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK | - CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE1_MASK; - - if (def != data) - WREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0), data); -} - static int soc15_common_set_clockgating_state(void *handle, enum amd_clockgating_state state) { @@ -1551,7 +1522,7 @@ static int soc15_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE); soc15_update_drm_light_sleep(adev, state == AMD_CG_STATE_GATE); - soc15_update_rom_medium_grain_clock_gating(adev, + adev->smuio.funcs->update_rom_clock_gating(adev, state == AMD_CG_STATE_GATE); adev->df.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); @@ -1568,8 +1539,6 @@ static int soc15_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE); soc15_update_drm_light_sleep(adev, state == AMD_CG_STATE_GATE); - soc15_update_rom_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE); break; case CHIP_ARCTURUS: soc15_update_hdp_light_sleep(adev, @@ -1607,9 +1576,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) *flags |= AMD_CG_SUPPORT_DRM_LS; /* AMD_CG_SUPPORT_ROM_MGCG */ - data = RREG32(SOC15_REG_OFFSET(SMUIO, 0, mmCGTT_ROM_CLK_CTRL0)); - if (!(data & CGTT_ROM_CLK_CTRL0__SOFT_OVERRIDE0_MASK)) - *flags |= AMD_CG_SUPPORT_ROM_MGCG; + adev->smuio.funcs->get_clock_gating_state(adev, flags); adev->df.funcs->get_clockgating_state(adev, flags); } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index e40140bf6699..ce3319993b4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -181,6 +181,7 @@ static void tonga_ih_irq_disable(struct amdgpu_device *adev) * tonga_ih_get_wptr - get the IH ring buffer wptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr * * Get the IH ring buffer wptr from either the register * or the writeback memory buffer (VI). Also check for @@ -215,6 +216,8 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, * tonga_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to decode + * @entry: IV entry to place decoded information into * * Decodes the interrupt vector at the current rptr * position and also advance the position. @@ -247,6 +250,7 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev, * tonga_ih_set_rptr - set the IH ring buffer rptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set rptr * * Set the IH ring buffer rptr. */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index 7cf4b11a65c5..10ecae257b18 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -80,7 +80,9 @@ static void uvd_v3_1_ring_set_wptr(struct amdgpu_ring *ring) * uvd_v3_1_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: iob associated with the indirect buffer * @ib: indirect buffer to execute + * @flags: flags associated with the indirect buffer * * Write ring commands to execute the indirect buffer */ @@ -99,7 +101,9 @@ static void uvd_v3_1_ring_emit_ib(struct amdgpu_ring *ring, * uvd_v3_1_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -277,15 +281,8 @@ static void uvd_v3_1_mc_resume(struct amdgpu_device *adev) */ static int uvd_v3_1_fw_validate(struct amdgpu_device *adev) { - void *ptr; - uint32_t ucode_len, i; - uint32_t keysel; - - ptr = adev->uvd.inst[0].cpu_addr; - ptr += 192 + 16; - memcpy(&ucode_len, ptr, 4); - ptr += ucode_len; - memcpy(&keysel, ptr, 4); + int i; + uint32_t keysel = adev->uvd.keyselect; WREG32(mmUVD_FW_START, keysel); @@ -550,6 +547,8 @@ static int uvd_v3_1_sw_init(void *handle) struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; int r; + void *ptr; + uint32_t ucode_len; /* UVD TRAP */ r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); @@ -571,6 +570,13 @@ static int uvd_v3_1_sw_init(void *handle) if (r) return r; + /* Retrieval firmware validate key */ + ptr = adev->uvd.inst[0].cpu_addr; + ptr += 192 + 16; + memcpy(&ucode_len, ptr, 4); + ptr += ucode_len; + memcpy(&adev->uvd.keyselect, ptr, 4); + r = amdgpu_uvd_entity_init(adev); return r; @@ -617,7 +623,7 @@ static void uvd_v3_1_enable_mgcg(struct amdgpu_device *adev, /** * uvd_v3_1_hw_init - start and test UVD block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Initialize the hardware, boot up the VCPU and do some testing */ @@ -684,7 +690,7 @@ done: /** * uvd_v3_1_hw_fini - stop the hardware block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Stop the UVD block, mark ring as not ready any more */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index b0c0c438fc93..a70d2a0de316 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -149,7 +149,7 @@ static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev, /** * uvd_v4_2_hw_init - start and test UVD block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Initialize the hardware, boot up the VCPU and do some testing */ @@ -204,7 +204,7 @@ done: /** * uvd_v4_2_hw_fini - stop the hardware block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Stop the UVD block, mark ring as not ready any more */ @@ -437,7 +437,9 @@ static void uvd_v4_2_stop(struct amdgpu_device *adev) * uvd_v4_2_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -502,7 +504,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) * uvd_v4_2_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: iob associated with the indirect buffer * @ib: indirect buffer to execute + * @flags: flags associated with the indirect buffer * * Write ring commands to execute the indirect buffer */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 6e57001f6d0a..f3b0a927101b 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -145,7 +145,7 @@ static int uvd_v5_0_sw_fini(void *handle) /** * uvd_v5_0_hw_init - start and test UVD block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Initialize the hardware, boot up the VCPU and do some testing */ @@ -202,7 +202,7 @@ done: /** * uvd_v5_0_hw_fini - stop the hardware block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Stop the UVD block, mark ring as not ready any more */ @@ -454,7 +454,9 @@ static void uvd_v5_0_stop(struct amdgpu_device *adev) * uvd_v5_0_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -518,7 +520,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) * uvd_v5_0_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write ring commands to execute the indirect buffer */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 666bfa4a0b8e..760859880c1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -198,9 +198,9 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring) /** * uvd_v6_0_enc_get_create_msg - generate a UVD ENC create msg * - * @adev: amdgpu_device pointer * @ring: ring we should submit the msg to * @handle: session handle to use + * @bo: amdgpu object for which we query the offset * @fence: optional fence to return * * Open up a stream for HW test @@ -261,9 +261,9 @@ err: /** * uvd_v6_0_enc_get_destroy_msg - generate a UVD ENC destroy msg * - * @adev: amdgpu_device pointer * @ring: ring we should submit the msg to * @handle: session handle to use + * @bo: amdgpu object for which we query the offset * @fence: optional fence to return * * Close up a stream for HW test or if userspace failed to do so @@ -326,6 +326,7 @@ err: * uvd_v6_0_enc_ring_test_ib - test if UVD ENC IBs are working * * @ring: the engine to test on + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * */ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) @@ -464,7 +465,7 @@ static int uvd_v6_0_sw_fini(void *handle) /** * uvd_v6_0_hw_init - start and test UVD block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Initialize the hardware, boot up the VCPU and do some testing */ @@ -533,7 +534,7 @@ done: /** * uvd_v6_0_hw_fini - stop the hardware block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Stop the UVD block, mark ring as not ready any more */ @@ -891,7 +892,9 @@ static void uvd_v6_0_stop(struct amdgpu_device *adev) * uvd_v6_0_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -921,7 +924,9 @@ static void uvd_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq * uvd_v6_0_enc_ring_emit_fence - emit an enc fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write enc a fence and a trap command to the ring. */ @@ -986,7 +991,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) * uvd_v6_0_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write ring commands to execute the indirect buffer */ @@ -1012,7 +1019,9 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, * uvd_v6_0_enc_ring_emit_ib - enc execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrive vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write enc ring commands to execute the indirect buffer */ diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index b44c8677ce8d..312ecf6d24a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -206,9 +206,9 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) /** * uvd_v7_0_enc_get_create_msg - generate a UVD ENC create msg * - * @adev: amdgpu_device pointer * @ring: ring we should submit the msg to * @handle: session handle to use + * @bo: amdgpu object for which we query the offset * @fence: optional fence to return * * Open up a stream for HW test @@ -269,9 +269,9 @@ err: /** * uvd_v7_0_enc_get_destroy_msg - generate a UVD ENC destroy msg * - * @adev: amdgpu_device pointer * @ring: ring we should submit the msg to * @handle: session handle to use + * @bo: amdgpu object for which we query the offset * @fence: optional fence to return * * Close up a stream for HW test or if userspace failed to do so @@ -333,6 +333,7 @@ err: * uvd_v7_0_enc_ring_test_ib - test if UVD ENC IBs are working * * @ring: the engine to test on + * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * */ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) @@ -519,7 +520,7 @@ static int uvd_v7_0_sw_fini(void *handle) /** * uvd_v7_0_hw_init - start and test UVD block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Initialize the hardware, boot up the VCPU and do some testing */ @@ -597,7 +598,7 @@ done: /** * uvd_v7_0_hw_fini - stop the hardware block * - * @adev: amdgpu_device pointer + * @handle: handle used to pass amdgpu_device pointer * * Stop the UVD block, mark ring as not ready any more */ @@ -1147,7 +1148,9 @@ static void uvd_v7_0_stop(struct amdgpu_device *adev) * uvd_v7_0_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -1186,7 +1189,9 @@ static void uvd_v7_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq * uvd_v7_0_enc_ring_emit_fence - emit an enc fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write enc a fence and a trap command to the ring. */ @@ -1282,7 +1287,9 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, * uvd_v7_0_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write ring commands to execute the indirect buffer */ @@ -1313,7 +1320,9 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, * uvd_v7_0_enc_ring_emit_ib - enc execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrive vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write enc ring commands to execute the indirect buffer */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 86e1ef732ebe..c734e31a9e65 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -431,7 +431,6 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) * vcn_v1_0_disable_clock_gating - disable VCN clock gating * * @adev: amdgpu_device pointer - * @sw: enable SW clock gating * * Disable clock gating for VCN block */ @@ -558,7 +557,6 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev) * vcn_v1_0_enable_clock_gating - enable VCN clock gating * * @adev: amdgpu_device pointer - * @sw: enable SW clock gating * * Enable clock gating for VCN block */ @@ -1445,7 +1443,9 @@ static void vcn_v1_0_dec_ring_insert_end(struct amdgpu_ring *ring) * vcn_v1_0_dec_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -1484,7 +1484,9 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 * vcn_v1_0_dec_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write ring commands to execute the indirect buffer */ @@ -1619,7 +1621,9 @@ static void vcn_v1_0_enc_ring_set_wptr(struct amdgpu_ring *ring) * vcn_v1_0_enc_ring_emit_fence - emit an enc fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write enc a fence and a trap command to the ring. */ @@ -1644,7 +1648,9 @@ static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring) * vcn_v1_0_enc_ring_emit_ib - enc execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrive vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write enc ring commands to execute the indirect buffer */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index e285f9c9d460..d63198c945bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -475,7 +475,6 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec * vcn_v2_0_disable_clock_gating - disable VCN clock gating * * @adev: amdgpu_device pointer - * @sw: enable SW clock gating * * Disable clock gating for VCN block */ @@ -636,7 +635,6 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev, * vcn_v2_0_enable_clock_gating - enable VCN clock gating * * @adev: amdgpu_device pointer - * @sw: enable SW clock gating * * Enable clock gating for VCN block */ @@ -1397,6 +1395,7 @@ void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) * vcn_v2_0_dec_ring_insert_nop - insert a nop command * * @ring: amdgpu_ring pointer + * @count: the number of NOP packets to insert * * Write a nop command to the ring. */ @@ -1417,7 +1416,9 @@ void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * vcn_v2_0_dec_ring_emit_fence - emit an fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write a fence and a trap command to the ring. */ @@ -1454,7 +1455,9 @@ void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, * vcn_v2_0_dec_ring_emit_ib - execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrieve vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write ring commands to execute the indirect buffer */ @@ -1600,7 +1603,9 @@ static void vcn_v2_0_enc_ring_set_wptr(struct amdgpu_ring *ring) * vcn_v2_0_enc_ring_emit_fence - emit an enc fence & trap command * * @ring: amdgpu_ring pointer - * @fence: fence to emit + * @addr: address + * @seq: sequence number + * @flags: fence related flags * * Write enc a fence and a trap command to the ring. */ @@ -1625,7 +1630,9 @@ void vcn_v2_0_enc_ring_insert_end(struct amdgpu_ring *ring) * vcn_v2_0_enc_ring_emit_ib - enc execute indirect buffer * * @ring: amdgpu_ring pointer + * @job: job to retrive vmid from * @ib: indirect buffer to execute + * @flags: unused * * Write enc ring commands to execute the indirect buffer */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 269002816109..def583916294 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -48,6 +48,7 @@ #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c #define VCN_INSTANCES_SIENNA_CICHLID 2 +#define DEC_SW_RING_ENABLED FALSE static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN, @@ -155,6 +156,13 @@ static int vcn_v3_0_sw_init(void *handle) if (r) return r; + /* + * Note: doorbell assignment is fixed for SRIOV multiple VCN engines + * Formula: + * vcn_db_base = adev->doorbell_index.vcn.vcn_ring0_1 << 1; + * dec_ring_i = vcn_db_base + i * (adev->vcn.num_enc_rings + 1) + * enc_ring_i,j = vcn_db_base + i * (adev->vcn.num_enc_rings + 1) + 1 + j + */ if (amdgpu_sriov_vf(adev)) { vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1; /* get DWORD offset */ @@ -162,6 +170,7 @@ static int vcn_v3_0_sw_init(void *handle) } for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_fw_shared *fw_shared; if (adev->vcn.harvest_config & (1 << i)) continue; @@ -192,9 +201,7 @@ static int vcn_v3_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_dec; ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) { - ring->doorbell_index = vcn_doorbell_index; - /* NOTE: increment so next VCN engine use next DOORBELL DWORD */ - vcn_doorbell_index++; + ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1); } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; } @@ -216,9 +223,7 @@ static int vcn_v3_0_sw_init(void *handle) ring = &adev->vcn.inst[i].ring_enc[j]; ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) { - ring->doorbell_index = vcn_doorbell_index; - /* NOTE: increment so next VCN engine use next DOORBELL DWORD */ - vcn_doorbell_index++; + ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1 + j; } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; } @@ -230,6 +235,11 @@ static int vcn_v3_0_sw_init(void *handle) if (r) return r; } + + fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG) | + cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG); + fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED); } if (amdgpu_sriov_vf(adev)) { @@ -253,7 +263,17 @@ static int vcn_v3_0_sw_init(void *handle) static int vcn_v3_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; + int i, r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_fw_shared *fw_shared; + + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sw_ring.is_enabled = false; + } if (amdgpu_sriov_vf(adev)) amdgpu_virt_free_mm_table(adev); @@ -291,17 +311,19 @@ static int vcn_v3_0_hw_init(void *handle) continue; ring = &adev->vcn.inst[i].ring_dec; - ring->wptr = 0; - ring->wptr_old = 0; - vcn_v3_0_dec_ring_set_wptr(ring); - ring->sched.ready = true; + if (ring->sched.ready) { + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v3_0_dec_ring_set_wptr(ring); + } for (j = 0; j < adev->vcn.num_enc_rings; ++j) { ring = &adev->vcn.inst[i].ring_enc[j]; - ring->wptr = 0; - ring->wptr_old = 0; - vcn_v3_0_enc_ring_set_wptr(ring); - ring->sched.ready = true; + if (ring->sched.ready) { + ring->wptr = 0; + ring->wptr_old = 0; + vcn_v3_0_enc_ring_set_wptr(ring); + } } } } else { @@ -461,6 +483,15 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET2, 0); WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + + /* non-cache window */ + WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr)); + WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].fw_shared_gpu_addr)); + WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_SIZE0, + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); } static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) @@ -543,13 +574,16 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx /* non-cache window */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect); + VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect); + VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( - VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect); + VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect); } static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) @@ -902,6 +936,7 @@ static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst) static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) { + volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; @@ -1011,6 +1046,12 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp); + /* Stall DPG before WPTR/RPTR reset */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), + UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, + ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); + /* set the write pointer delay */ WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0); @@ -1033,11 +1074,17 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + /* Unstall DPG */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), + 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + return 0; } static int vcn_v3_0_start(struct amdgpu_device *adev) { + volatile struct amdgpu_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; int i, j, k, r; @@ -1180,6 +1227,9 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp); + fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); + /* programm the RB_BASE for ring buffer */ WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); @@ -1192,19 +1242,25 @@ static int vcn_v3_0_start(struct amdgpu_device *adev) ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR); WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + + fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); ring = &adev->vcn.inst[i].ring_enc[0]; WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); + fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); ring = &adev->vcn.inst[i].ring_enc[1]; WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); + fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); } return 0; @@ -1227,12 +1283,12 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) uint32_t table_size; uint32_t size, size_dw; + bool is_vcn_ready; + struct mmsch_v3_0_cmd_direct_write direct_wt = { {0} }; struct mmsch_v3_0_cmd_direct_read_modify_write direct_rd_mod_wt = { {0} }; - struct mmsch_v3_0_cmd_direct_polling - direct_poll = { {0} }; struct mmsch_v3_0_cmd_end end = { {0} }; struct mmsch_v3_0_init_header header; @@ -1240,8 +1296,6 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) MMSCH_COMMAND__DIRECT_REG_WRITE; direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE; - direct_poll.cmd_header.command_type = - MMSCH_COMMAND__DIRECT_REG_POLLING; end.cmd_header.command_type = MMSCH_COMMAND__END; @@ -1364,7 +1418,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) MMSCH_V3_0_INSERT_END(); /* refine header */ - header.inst[i].init_status = 1; + header.inst[i].init_status = 0; header.inst[i].table_offset = header.total_size; header.inst[i].table_size = table_size; header.total_size += table_size; @@ -1422,6 +1476,30 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) } } + /* 6, check each VCN's init_status + * if it remains as 0, then this VCN is not assigned to current VF + * do not start ring for this VCN + */ + size = sizeof(struct mmsch_v3_0_init_header); + table_loc = (uint32_t *)table->cpu_addr; + memcpy(&header, (void *)table_loc, size); + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + + is_vcn_ready = (header.inst[i].init_status == 1); + if (!is_vcn_ready) + DRM_INFO("VCN(%d) engine is disabled by hypervisor\n", i); + + ring = &adev->vcn.inst[i].ring_dec; + ring->sched.ready = is_vcn_ready; + for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + ring = &adev->vcn.inst[i].ring_enc[j]; + ring->sched.ready = is_vcn_ready; + } + } + return 0; } @@ -1531,6 +1609,7 @@ static int vcn_v3_0_stop(struct amdgpu_device *adev) static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, struct dpg_pause_state *new_state) { + volatile struct amdgpu_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t reg_data = 0; int ret_code; @@ -1556,23 +1635,36 @@ static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + /* Stall DPG before WPTR/RPTR reset */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), + UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK, + ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); + /* Restore */ + fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr; + fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + ring->wptr = 0; WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); ring = &adev->vcn.inst[inst_idx].ring_enc[1]; + ring->wptr = 0; WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); - WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, - RREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF); + /* Unstall DPG */ + WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), + 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK); SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); @@ -1630,10 +1722,6 @@ static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2, - lower_32_bits(ring->wptr) | 0x80000000); - if (ring->use_doorbell) { adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); @@ -1642,6 +1730,98 @@ static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring) } } +static void vcn_v3_0_dec_sw_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, uint32_t flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_FENCE); + amdgpu_ring_write(ring, addr); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_TRAP); +} + +static void vcn_v3_0_dec_sw_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END); +} + +static void vcn_v3_0_dec_sw_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + uint32_t flags) +{ + uint32_t vmid = AMDGPU_JOB_GET_VMID(job); + + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_IB); + amdgpu_ring_write(ring, vmid); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); +} + +static void vcn_v3_0_dec_sw_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) +{ + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WAIT); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, val); +} + +static void vcn_v3_0_dec_sw_ring_emit_vm_flush(struct amdgpu_ring *ring, + uint32_t vmid, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t data0, data1, mask; + + pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + + /* wait for register write */ + data0 = hub->ctx0_ptb_addr_lo32 + vmid * hub->ctx_addr_distance; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + vcn_v3_0_dec_sw_ring_emit_reg_wait(ring, data0, data1, mask); +} + +static void vcn_v3_0_dec_sw_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +{ + amdgpu_ring_write(ring, VCN_DEC_SW_CMD_REG_WRITE); + amdgpu_ring_write(ring, reg << 2); + amdgpu_ring_write(ring, val); +} + +static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_DEC, + .align_mask = 0x3f, + .nop = VCN_DEC_SW_CMD_NO_OP, + .vmhub = AMDGPU_MMHUB_0, + .get_rptr = vcn_v3_0_dec_ring_get_rptr, + .get_wptr = vcn_v3_0_dec_ring_get_wptr, + .set_wptr = vcn_v3_0_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v3_0_dec_sw_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v3_0_dec_sw_ring_emit_fdec_swe x2 vm fdec_swe */ + 1, /* vcn_v3_0_dec_sw_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v3_0_dec_sw_ring_emit_ib */ + .emit_ib = vcn_v3_0_dec_sw_ring_emit_ib, + .emit_fence = vcn_v3_0_dec_sw_ring_emit_fence, + .emit_vm_flush = vcn_v3_0_dec_sw_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_dec_sw_ring_test_ring, + .test_ib = NULL,//amdgpu_vcn_dec_sw_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v3_0_dec_sw_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v3_0_dec_sw_ring_emit_wreg, + .emit_reg_wait = vcn_v3_0_dec_sw_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_DEC, .align_mask = 0xf, @@ -1779,9 +1959,13 @@ static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_ring_vm_funcs; + if (!DEC_SW_RING_ENABLED) + adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_ring_vm_funcs; + else + adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_sw_ring_vm_funcs; adev->vcn.inst[i].ring_dec.me = i; - DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i); + DRM_INFO("VCN(%d) decode%s is enabled in VM mode\n", i, + DEC_SW_RING_ENABLED?"(Software Ring)":""); } } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 407c6093c2ec..e5ae31eb744e 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -91,6 +91,9 @@ static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) } adev->irq.ih2.enabled = true; } + + if (adev->irq.ih_soft.ring_size) + adev->irq.ih_soft.enabled = true; } /** @@ -366,6 +369,7 @@ static void vega10_ih_irq_disable(struct amdgpu_device *adev) * vega10_ih_get_wptr - get the IH ring buffer wptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to fetch wptr * * Get the IH ring buffer wptr from either the register * or the writeback memory buffer (VEGA10). Also check for @@ -430,6 +434,8 @@ out: * vega10_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to decode + * @entry: IV entry to place decoded information into * * Decodes the interrupt vector at the current rptr * position and also advance the position. @@ -473,6 +479,7 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev, * vega10_ih_irq_rearm - rearm IRQ if lost * * @adev: amdgpu_device pointer + * @ih: IH ring to match * */ static void vega10_ih_irq_rearm(struct amdgpu_device *adev, @@ -505,6 +512,7 @@ static void vega10_ih_irq_rearm(struct amdgpu_device *adev, * vega10_ih_set_rptr - set the IH ring buffer rptr * * @adev: amdgpu_device pointer + * @ih: IH ring buffer to set rptr * * Set the IH ring buffer rptr. */ @@ -606,6 +614,10 @@ static int vega10_ih_sw_init(void *handle) adev->irq.ih2.use_doorbell = true; adev->irq.ih2.doorbell_index = (adev->doorbell_index.ih + 2) << 1; + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); + if (r) + return r; + r = amdgpu_irq_init(adev); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 9bcd0eebc6d7..d56b474b3a21 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1645,6 +1645,7 @@ static int vi_common_set_clockgating_state(void *handle, case CHIP_POLARIS12: case CHIP_VEGAM: vi_common_set_clockgating_state_by_smu(adev, state); + break; default: break; } |