summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-01-30 19:04:01 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2020-01-30 19:04:01 +0300
commit9f68e3655aae6d49d6ba05dd263f99f33c2567af (patch)
tree42c2c4579c4acbbb456695326af4f4ad8f402813 /drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
parent4cadc60d6bcfee9c626d4b55e9dc1475d21ad3bb (diff)
parentd47c7f06268082bc0082a15297a07c0da59b0fc4 (diff)
downloadlinux-9f68e3655aae6d49d6ba05dd263f99f33c2567af.tar.xz
Merge tag 'drm-next-2020-01-30' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Davbe Airlie: "This is the main pull request for graphics for 5.6. Usual selection of changes all over. I've got one outstanding vmwgfx pull that touches mm so kept it separate until after all of this lands. I'll try and get it to you soon after this, but it might be early next week (nothing wrong with code, just my schedule is messy) This also hits a lot of fbdev drivers with some cleanups. Other notables: - vulkan timeline semaphore support added to syncobjs - nouveau turing secureboot/graphics support - Displayport MST display stream compression support Detailed summary: uapi: - dma-buf heaps added (and fixed) - command line add support for panel oreientation - command line allow overriding penguin count drm: - mipi dsi definition updates - lockdep annotations for dma_resv - remove dma-buf kmap/kunmap support - constify fb_ops in all fbdev drivers - MST fix for daisy chained hotplug- - CTA-861-G modes with VIC >= 193 added - fix drm_panel_of_backlight export - LVDS decoder support - more device based logging support - scanline alighment for dumb buffers - MST DSC helpers scheduler: - documentation fixes - job distribution improvements panel: - Logic PD type 28 panel support - Jimax8729d MIPI-DSI - igenic JZ4770 - generic DSI devicetree bindings - sony acx424AKP panel - Leadtek LTK500HD1829 - xinpeng XPP055C272 - AUO B116XAK01 - GiantPlus GPM940B0 - BOE NV140FHM-N49 - Satoz SAT050AT40H12R2 - Sharp LS020B1DD01D panels. ttm: - use blocking WW lock i915: - hw/uapi state separation - Lock annotation improvements - selftest improvements - ICL/TGL DSI VDSC support - VBT parsing improvments - Display refactoring - DSI updates + fixes - HDCP 2.2 for CFL - CML PCI ID fixes - GLK+ fbc fix - PSR fixes - GEN/GT refactor improvments - DP MST fixes - switch context id alloc to xarray - workaround updates - LMEM debugfs support - tiled monitor fixes - ICL+ clock gating programming removed - DP MST disable sequence fixed - LMEM discontiguous object maps - prefaulting for discontiguous objects - use LMEM for dumb buffers if possible - add LMEM mmap support amdgpu: - enable sync object timelines for vulkan - MST atomic routines - enable MST DSC support - add DMCUB display microengine support - DC OEM i2c support - Renoir DC fixes - Initial HDCP 2.x support - BACO support for Arcturus - Use BACO for runtime PM power save - gfxoff on navi10 - gfx10 golden updates and fixes - DCN support on POWER - GFXOFF for raven1 refresh - MM engine idle handlers cleanup - 10bpc EDP panel fixes - renoir watermark fixes - SR-IOV fixes - Arcturus VCN fixes - GDDR6 training fixes - freesync fixes - Pollock support amdkfd: - unify more codepath with amdgpu - use KIQ to setup HIQ rather than MMIO radeon: - fix vma fault handler race - PPC DMA fix - register check fixes for r100/r200 nouveau: - mmap_sem vs dma_resv fix - rewrite the ACR secure boot code for Turing - TU10x graphics engine support (TU11x pending) - Page kind mapping for turing - 10-bit LUT support - GP10B Tegra fixes - HD audio regression fix hisilicon/hibmc: - use generic fbdev code and helpers rockchip: - dsi/px30 support virtio: - fb damage support - static some functions vc4: - use dma_resv lock wrappers msm: - use dma_resv lock wrappers - sc7180 display + DSI support - a618 support - UBWC support improvements vmwgfx: - updates + new logging uapi exynos: - enable/disable callback cleanups etnaviv: - use dma_resv lock wrappers atmel-hlcdc: - clock fixes mediatek: - cmdq support - non-smooth cursor fixes - ctm property support sun4i: - suspend support - A64 mipi dsi support rcar-du: - Color management module support - LVDS encoder dual-link support - R8A77980 support analogic: - add support for an6345 ast: - atomic modeset support - primary plane garbage fix arcgpu: - fixes for fourcc handling tegra: - minor fixes and improvments mcde: - vblank support meson: - OSD1 plane AFBC commit gma500: - add pageflip support - reomve global drm_dev komeda: - tweak debugfs output - d32 support - runtime PM suppotr udl: - use generic shmem helpers - cleanup and fixes" * tag 'drm-next-2020-01-30' of git://anongit.freedesktop.org/drm/drm: (1998 commits) drm/nouveau/fb/gp102-: allow module to load even when scrubber binary is missing drm/nouveau/acr: return error when registering LSF if ACR not supported drm/nouveau/disp/gv100-: not all channel types support reporting error codes drm/nouveau/disp/nv50-: prevent oops when no channel method map provided drm/nouveau: support synchronous pushbuf submission drm/nouveau: signal pending fences when channel has been killed drm/nouveau: reject attempts to submit to dead channels drm/nouveau: zero vma pointer even if we only unreference it rather than free drm/nouveau: Add HD-audio component notifier support drm/nouveau: fix build error without CONFIG_IOMMU_API drm/nouveau/kms/nv04: remove set but not used variable 'width' drm/nouveau/kms/nv50: remove set but not unused variable 'nv_connector' drm/nouveau/mmu: fix comptag memory leak drm/nouveau/gr/gp10b: Use gp100_grctx and gp100_gr_zbc drm/nouveau/pmu/gm20b,gp10b: Fix Falcon bootstrapping drm/exynos: Rename Exynos to lowercase drm/exynos: change callback names drm/mst: Don't do atomic checks over disabled managers drm/amdgpu: add the lost mutex_init back drm/amd/display: skip opp blank or unblank if test pattern enabled ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c883
1 files changed, 634 insertions, 249 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 93edf9193a7b..c8b63d57a541 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -29,6 +29,7 @@
#include "soc15.h"
#include "soc15d.h"
#include "vcn_v2_0.h"
+#include "mmsch_v1_0.h"
#include "vcn/vcn_2_5_offset.h"
#include "vcn/vcn_2_5_sh_mask.h"
@@ -47,16 +48,16 @@
#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5
#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c
-#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
-
-#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2
+#define VCN25_MAX_HW_INSTANCES_ARCTURUS 2
static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev);
-static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev);
static int vcn_v2_5_set_powergating_state(void *handle,
enum amd_powergating_state state);
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state);
+static int vcn_v2_5_sriov_start(struct amdgpu_device *adev);
static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN,
@@ -91,11 +92,16 @@ static int vcn_v2_5_early_init(void *handle)
} else
adev->vcn.num_vcn_inst = 1;
- adev->vcn.num_enc_rings = 2;
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.num_vcn_inst = 2;
+ adev->vcn.harvest_config = 0;
+ adev->vcn.num_enc_rings = 1;
+ } else {
+ adev->vcn.num_enc_rings = 2;
+ }
vcn_v2_5_set_dec_ring_funcs(adev);
vcn_v2_5_set_enc_ring_funcs(adev);
- vcn_v2_5_set_jpeg_ring_funcs(adev);
vcn_v2_5_set_irq_funcs(adev);
return 0;
@@ -130,12 +136,6 @@ static int vcn_v2_5_sw_init(void *handle)
if (r)
return r;
}
-
- /* VCN JPEG TRAP */
- r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
- VCN_2_0__SRCID__JPEG_DECODE, &adev->vcn.inst[j].irq);
- if (r)
- return r;
}
r = amdgpu_vcn_sw_init(adev);
@@ -184,12 +184,11 @@ static int vcn_v2_5_sw_init(void *handle)
adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(UVD, j, mmUVD_NO_OP);
- adev->vcn.internal.jpeg_pitch = mmUVD_JPEG_PITCH_INTERNAL_OFFSET;
- adev->vcn.inst[j].external.jpeg_pitch = SOC15_REG_OFFSET(UVD, j, mmUVD_JPEG_PITCH);
-
ring = &adev->vcn.inst[j].ring_dec;
ring->use_doorbell = true;
- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8*j;
+
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ (amdgpu_sriov_vf(adev) ? 2*j : 8*j);
sprintf(ring->name, "vcn_dec_%d", j);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0);
if (r)
@@ -198,22 +197,26 @@ static int vcn_v2_5_sw_init(void *handle)
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
ring = &adev->vcn.inst[j].ring_enc[i];
ring->use_doorbell = true;
- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + i + 8*j;
+
+ ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
+ (amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j));
+
sprintf(ring->name, "vcn_enc_%d.%d", j, i);
r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0);
if (r)
return r;
}
+ }
- ring = &adev->vcn.inst[j].ring_jpeg;
- ring->use_doorbell = true;
- ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 1 + 8*j;
- sprintf(ring->name, "vcn_jpeg_%d", j);
- r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq, 0);
+ if (amdgpu_sriov_vf(adev)) {
+ r = amdgpu_virt_alloc_mm_table(adev);
if (r)
return r;
}
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode;
+
return 0;
}
@@ -229,6 +232,9 @@ static int vcn_v2_5_sw_fini(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (amdgpu_sriov_vf(adev))
+ amdgpu_virt_free_mm_table(adev);
+
r = amdgpu_vcn_suspend(adev);
if (r)
return r;
@@ -249,35 +255,44 @@ static int vcn_v2_5_hw_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
- int i, j, r;
+ int i, j, r = 0;
+
+ if (amdgpu_sriov_vf(adev))
+ r = vcn_v2_5_sriov_start(adev);
for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
if (adev->vcn.harvest_config & (1 << j))
continue;
- ring = &adev->vcn.inst[j].ring_dec;
- adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
- ring->doorbell_index, j);
+ if (amdgpu_sriov_vf(adev)) {
+ adev->vcn.inst[j].ring_enc[0].sched.ready = true;
+ adev->vcn.inst[j].ring_enc[1].sched.ready = false;
+ adev->vcn.inst[j].ring_enc[2].sched.ready = false;
+ adev->vcn.inst[j].ring_dec.sched.ready = true;
+ } else {
- r = amdgpu_ring_test_helper(ring);
- if (r)
- goto done;
+ ring = &adev->vcn.inst[j].ring_dec;
+
+ adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
+ ring->doorbell_index, j);
- for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- ring = &adev->vcn.inst[j].ring_enc[i];
r = amdgpu_ring_test_helper(ring);
if (r)
goto done;
- }
- ring = &adev->vcn.inst[j].ring_jpeg;
- r = amdgpu_ring_test_helper(ring);
- if (r)
- goto done;
+ for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+ ring = &adev->vcn.inst[j].ring_enc[i];
+ r = amdgpu_ring_test_helper(ring);
+ if (r)
+ goto done;
+ }
+ }
}
+
done:
if (!r)
- DRM_INFO("VCN decode and encode initialized successfully.\n");
+ DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
+ (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
return r;
}
@@ -300,7 +315,9 @@ static int vcn_v2_5_hw_fini(void *handle)
continue;
ring = &adev->vcn.inst[i].ring_dec;
- if (RREG32_SOC15(VCN, i, mmUVD_STATUS))
+ if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+ (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
+ RREG32_SOC15(VCN, i, mmUVD_STATUS)))
vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
ring->sched.ready = false;
@@ -309,9 +326,6 @@ static int vcn_v2_5_hw_fini(void *handle)
ring = &adev->vcn.inst[i].ring_enc[j];
ring->sched.ready = false;
}
-
- ring = &adev->vcn.inst[i].ring_jpeg;
- ring->sched.ready = false;
}
return 0;
@@ -378,9 +392,9 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
/* cache window 0: fw */
if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
- (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo));
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
- (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi));
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
offset = 0;
} else {
@@ -412,6 +426,99 @@ static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
}
}
+static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
+{
+ uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ uint32_t offset;
+
+ /* cache window 0: fw */
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
+ }
+ offset = 0;
+ } else {
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
+ offset = size;
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
+ }
+
+ if (!indirect)
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
+ else
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
+
+ /* cache window 1: stack */
+ if (!indirect) {
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ } else {
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
+ }
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
+
+ /* cache window 2: context */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
+
+ /* non-cache window */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0), 0, 0, indirect);
+
+ /* VCN global tiling registers */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
+}
+
/**
* vcn_v2_5_disable_clock_gating - disable VCN clock gating
*
@@ -530,6 +637,54 @@ static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
}
}
+static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
+ uint8_t sram_sel, int inst_idx, uint8_t indirect)
+{
+ uint32_t reg_data = 0;
+
+ /* enable sw clock gating control */
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
+ reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ else
+ reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
+ reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
+ reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
+ reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
+ UVD_CGC_CTRL__SYS_MODE_MASK |
+ UVD_CGC_CTRL__UDEC_MODE_MASK |
+ UVD_CGC_CTRL__MPEG2_MODE_MASK |
+ UVD_CGC_CTRL__REGS_MODE_MASK |
+ UVD_CGC_CTRL__RBC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_MC_MODE_MASK |
+ UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
+ UVD_CGC_CTRL__IDCT_MODE_MASK |
+ UVD_CGC_CTRL__MPRD_MODE_MASK |
+ UVD_CGC_CTRL__MPC_MODE_MASK |
+ UVD_CGC_CTRL__LBSI_MODE_MASK |
+ UVD_CGC_CTRL__LRBBM_MODE_MASK |
+ UVD_CGC_CTRL__WCB_MODE_MASK |
+ UVD_CGC_CTRL__VCPU_MODE_MASK |
+ UVD_CGC_CTRL__MMSCH_MODE_MASK);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
+
+ /* turn off clock gating */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_CGC_GATE), 0, sram_sel, indirect);
+
+ /* turn on SUVD clock gating */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
+
+ /* turn on sw mode in UVD_SUVD_CGC_CTRL */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
+}
+
/**
* vcn_v2_5_enable_clock_gating - enable VCN clock gating
*
@@ -592,111 +747,134 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
}
}
-/**
- * jpeg_v2_5_start - start JPEG block
- *
- * @adev: amdgpu_device pointer
- *
- * Setup and start the JPEG block
- */
-static int jpeg_v2_5_start(struct amdgpu_device *adev)
+static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
{
struct amdgpu_ring *ring;
- uint32_t tmp;
- int i;
-
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- ring = &adev->vcn.inst[i].ring_jpeg;
- /* disable anti hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS), 0,
- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
-
- /* JPEG disable CGC */
- tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL);
- tmp |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
- tmp |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
- tmp |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
- WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp);
-
- tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE);
- tmp &= ~(JPEG_CGC_GATE__JPEG_DEC_MASK
- | JPEG_CGC_GATE__JPEG2_DEC_MASK
- | JPEG_CGC_GATE__JMCIF_MASK
- | JPEG_CGC_GATE__JRBBM_MASK);
- WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp);
-
- tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL);
- tmp &= ~(JPEG_CGC_CTRL__JPEG_DEC_MODE_MASK
- | JPEG_CGC_CTRL__JPEG2_DEC_MODE_MASK
- | JPEG_CGC_CTRL__JMCIF_MODE_MASK
- | JPEG_CGC_CTRL__JRBBM_MODE_MASK);
- WREG32_SOC15(VCN, i, mmJPEG_CGC_CTRL, tmp);
-
- /* MJPEG global tiling registers */
- WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX8_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
- WREG32_SOC15(UVD, i, mmJPEG_DEC_GFX10_ADDR_CONFIG,
- adev->gfx.config.gb_addr_config);
-
- /* enable JMI channel */
- WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL), 0,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- /* enable System Interrupt for JRBC */
- WREG32_P(SOC15_REG_OFFSET(VCN, i, mmJPEG_SYS_INT_EN),
- JPEG_SYS_INT_EN__DJRBC_MASK,
- ~JPEG_SYS_INT_EN__DJRBC_MASK);
-
- WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_VMID, 0);
- WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L));
- WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW,
- lower_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH,
- upper_32_bits(ring->gpu_addr));
- WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_RPTR, 0);
- WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR, 0);
- WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_CNTL, 0x00000002L);
- WREG32_SOC15(UVD, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4);
- ring->wptr = RREG32_SOC15(UVD, i, mmUVD_JRBC_RB_WPTR);
- }
-
- return 0;
-}
-
-/**
- * jpeg_v2_5_stop - stop JPEG block
- *
- * @adev: amdgpu_device pointer
- *
- * stop the JPEG block
- */
-static int jpeg_v2_5_stop(struct amdgpu_device *adev)
-{
- uint32_t tmp;
- int i;
+ uint32_t rb_bufsz, tmp;
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- /* reset JMI */
- WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JMI_CNTL),
- UVD_JMI_CNTL__SOFT_RESET_MASK,
- ~UVD_JMI_CNTL__SOFT_RESET_MASK);
-
- tmp = RREG32_SOC15(VCN, i, mmJPEG_CGC_GATE);
- tmp |= (JPEG_CGC_GATE__JPEG_DEC_MASK
- |JPEG_CGC_GATE__JPEG2_DEC_MASK
- |JPEG_CGC_GATE__JMCIF_MASK
- |JPEG_CGC_GATE__JRBBM_MASK);
- WREG32_SOC15(VCN, i, mmJPEG_CGC_GATE, tmp);
-
- /* enable anti hang mechanism */
- WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_JPEG_POWER_STATUS),
- UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK,
- ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK);
- }
+ /* disable register anti-hang mechanism */
+ WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 1,
+ ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
+ /* enable dynamic power gating mode */
+ tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS);
+ tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
+ tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
+ WREG32_SOC15(UVD, inst_idx, mmUVD_POWER_STATUS, tmp);
+
+ if (indirect)
+ adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
+
+ /* enable clock gating */
+ vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);
+
+ /* enable VCPU clock */
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* disable master interupt */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_MASTINT_EN), 0, 0, indirect);
+
+ /* setup mmUVD_LMI_CTRL */
+ tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+ UVD_LMI_CTRL__REQ_MODE_MASK |
+ UVD_LMI_CTRL__CRC_RESET_MASK |
+ UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
+ UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+ UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+ (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+ 0x00100000L);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_LMI_CTRL), tmp, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_MPC_CNTL),
+ 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_MPC_SET_MUXA0),
+ ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_MPC_SET_MUXB0),
+ ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
+ (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
+ (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_MPC_SET_MUX),
+ ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
+ (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
+ (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
+
+ vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect);
+
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
+
+ /* enable LMI MC and UMC channels */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_LMI_CTRL2), 0, 0, indirect);
+
+ /* unblock VCPU register access */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
+
+ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
+ tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
+
+ /* enable master interrupt */
+ WREG32_SOC15_DPG_MODE_2_0(inst_idx, SOC15_DPG_MODE_OFFSET_2_0(
+ UVD, inst_idx, mmUVD_MASTINT_EN),
+ UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
+
+ if (indirect)
+ psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
+ (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
+ (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));
+
+ ring = &adev->vcn.inst[inst_idx].ring_dec;
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
+
+ /* set the write pointer delay */
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
+
+ /* set the wb address */
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
+ (upper_32_bits(ring->gpu_addr) >> 2));
+
+ /* programm the RB_BASE for ring buffer */
+ WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
+ lower_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
+ upper_32_bits(ring->gpu_addr));
+
+ /* Initialize the ring buffer's read and write pointers */
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR, 0);
+
+ WREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2, 0);
+
+ ring->wptr = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_RPTR);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR,
+ lower_32_bits(ring->wptr));
return 0;
}
@@ -713,6 +891,9 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ return vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
+
/* disable register anti-hang mechanism */
WREG32_P(SOC15_REG_OFFSET(UVD, i, mmUVD_POWER_STATUS), 0,
~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
@@ -874,23 +1055,251 @@ static int vcn_v2_5_start(struct amdgpu_device *adev)
WREG32_SOC15(UVD, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
WREG32_SOC15(UVD, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
}
- r = jpeg_v2_5_start(adev);
- return r;
+ return 0;
}
-static int vcn_v2_5_stop(struct amdgpu_device *adev)
+static int vcn_v2_5_mmsch_start(struct amdgpu_device *adev,
+ struct amdgpu_mm_table *table)
+{
+ uint32_t data = 0, loop = 0, size = 0;
+ uint64_t addr = table->gpu_addr;
+ struct mmsch_v1_1_init_header *header = NULL;;
+
+ header = (struct mmsch_v1_1_init_header *)table->cpu_addr;
+ size = header->total_size;
+
+ /*
+ * 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of
+ * memory descriptor location
+ */
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
+
+ /* 2, update vmid of descriptor */
+ data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID);
+ data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
+ /* use domain0 for MM scheduler */
+ data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_VMID, data);
+
+ /* 3, notify mmsch about the size of this descriptor */
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_CTX_SIZE, size);
+
+ /* 4, set resp to zero */
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
+
+ /*
+ * 5, kick off the initialization and wait until
+ * VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero
+ */
+ WREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001);
+
+ data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP);
+ loop = 10;
+ while ((data & 0x10000002) != 0x10000002) {
+ udelay(100);
+ data = RREG32_SOC15(UVD, 0, mmMMSCH_VF_MAILBOX_RESP);
+ loop--;
+ if (!loop)
+ break;
+ }
+
+ if (!loop) {
+ dev_err(adev->dev,
+ "failed to init MMSCH, mmMMSCH_VF_MAILBOX_RESP = %x\n",
+ data);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring;
+ uint32_t offset, size, tmp, i, rb_bufsz;
+ uint32_t table_size = 0;
+ struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
+ struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
+ struct mmsch_v1_0_cmd_direct_polling direct_poll = { { 0 } };
+ struct mmsch_v1_0_cmd_end end = { { 0 } };
+ uint32_t *init_table = adev->virt.mm_table.cpu_addr;
+ struct mmsch_v1_1_init_header *header = (struct mmsch_v1_1_init_header *)init_table;
+
+ direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
+ direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
+ direct_poll.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_POLLING;
+ end.cmd_header.command_type = MMSCH_COMMAND__END;
+
+ header->version = MMSCH_VERSION;
+ header->total_size = sizeof(struct mmsch_v1_1_init_header) >> 2;
+ init_table += header->total_size;
+
+ for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
+ header->eng[i].table_offset = header->total_size;
+ header->eng[i].init_status = 0;
+ header->eng[i].table_size = 0;
+
+ table_size = 0;
+
+ MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS),
+ ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
+
+ size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
+ /* mc resume*/
+ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
+ offset = 0;
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0), 0);
+ } else {
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr));
+ offset = size;
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET0),
+ AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+ }
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE0),
+ size);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET1),
+ 0);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE1),
+ AMDGPU_VCN_STACK_SIZE);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
+ lower_32_bits(adev->vcn.inst[i].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
+ upper_32_bits(adev->vcn.inst[i].gpu_addr + offset +
+ AMDGPU_VCN_STACK_SIZE));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_OFFSET2),
+ 0);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2),
+ AMDGPU_VCN_CONTEXT_SIZE);
+
+ ring = &adev->vcn.inst[i].ring_enc[0];
+ ring->wptr = 0;
+
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_LO),
+ lower_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_RB_BASE_HI),
+ upper_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_RB_SIZE),
+ ring->ring_size / 4);
+
+ ring = &adev->vcn.inst[i].ring_dec;
+ ring->wptr = 0;
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
+ lower_32_bits(ring->gpu_addr));
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i,
+ mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
+ upper_32_bits(ring->gpu_addr));
+
+ /* force RBC into idle state */
+ rb_bufsz = order_base_2(ring->ring_size);
+ tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
+ tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
+ MMSCH_V1_0_INSERT_DIRECT_WT(
+ SOC15_REG_OFFSET(UVD, i, mmUVD_RBC_RB_CNTL), tmp);
+
+ /* add end packet */
+ memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
+ table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
+ init_table += sizeof(struct mmsch_v1_0_cmd_end) / 4;
+
+ /* refine header */
+ header->eng[i].table_size = table_size;
+ header->total_size += table_size;
+ }
+
+ return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table);
+}
+
+static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
{
+ int ret_code = 0;
uint32_t tmp;
- int i, r;
- r = jpeg_v2_5_stop(adev);
- if (r)
- return r;
+ /* Wait for power status to be 1 */
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ /* wait for read ptr to be equal to write ptr */
+ tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR);
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+
+ tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2);
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
+
+ tmp = RREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ /* disable dynamic power gating mode */
+ WREG32_P(SOC15_REG_OFFSET(UVD, inst_idx, mmUVD_POWER_STATUS), 0,
+ ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
+
+ return 0;
+}
+
+static int vcn_v2_5_stop(struct amdgpu_device *adev)
+{
+ uint32_t tmp;
+ int i, r = 0;
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
+
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
+ r = vcn_v2_5_stop_dpg_mode(adev, i);
+ goto power_off;
+ }
+
/* wait for vcn idle */
SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r);
if (r)
@@ -940,12 +1349,74 @@ static int vcn_v2_5_stop(struct amdgpu_device *adev)
~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
}
+power_off:
if (adev->pm.dpm_enabled)
amdgpu_dpm_enable_uvd(adev, false);
return 0;
}
+static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
+ int inst_idx, struct dpg_pause_state *new_state)
+{
+ struct amdgpu_ring *ring;
+ uint32_t reg_data = 0;
+ int ret_code;
+
+ /* pause/unpause if state is changed */
+ if (adev->vcn.pause_state.fw_based != new_state->fw_based) {
+ DRM_DEBUG("dpg pause state changed %d -> %d",
+ adev->vcn.pause_state.fw_based, new_state->fw_based);
+ reg_data = RREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE) &
+ (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
+
+ if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
+ ret_code = 0;
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS, 0x1,
+ UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+
+ if (!ret_code) {
+ /* pause DPG */
+ reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+
+ /* wait for ACK */
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_DPG_PAUSE,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
+ UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);
+
+ /* Restore */
+ ring = &adev->vcn.inst[inst_idx].ring_enc[0];
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
+
+ ring = &adev->vcn.inst[inst_idx].ring_enc[1];
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
+
+ WREG32_SOC15(UVD, inst_idx, mmUVD_RBC_RB_WPTR,
+ RREG32_SOC15(UVD, inst_idx, mmUVD_SCRATCH2) & 0x7FFFFFFF);
+
+ SOC15_WAIT_ON_RREG(UVD, inst_idx, mmUVD_POWER_STATUS,
+ 0x0, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
+ }
+ } else {
+ /* unpause dpg, no need to wait */
+ reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
+ WREG32_SOC15(UVD, inst_idx, mmUVD_DPG_PAUSE, reg_data);
+ }
+ adev->vcn.pause_state.fw_based = new_state->fw_based;
+ }
+
+ return 0;
+}
+
/**
* vcn_v2_5_dec_ring_get_rptr - get read pointer
*
@@ -988,6 +1459,10 @@ static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
+ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
+ WREG32_SOC15(UVD, ring->me, mmUVD_SCRATCH2,
+ lower_32_bits(ring->wptr) | 0x80000000);
+
if (ring->use_doorbell) {
adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
@@ -1125,86 +1600,6 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
-/**
- * vcn_v2_5_jpeg_ring_get_rptr - get read pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware read pointer
- */
-static uint64_t vcn_v2_5_jpeg_ring_get_rptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_RPTR);
-}
-
-/**
- * vcn_v2_5_jpeg_ring_get_wptr - get write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Returns the current hardware write pointer
- */
-static uint64_t vcn_v2_5_jpeg_ring_get_wptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- if (ring->use_doorbell)
- return adev->wb.wb[ring->wptr_offs];
- else
- return RREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR);
-}
-
-/**
- * vcn_v2_5_jpeg_ring_set_wptr - set write pointer
- *
- * @ring: amdgpu_ring pointer
- *
- * Commits the write pointer to the hardware
- */
-static void vcn_v2_5_jpeg_ring_set_wptr(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- if (ring->use_doorbell) {
- adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
- WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
- } else {
- WREG32_SOC15(UVD, ring->me, mmUVD_JRBC_RB_WPTR, lower_32_bits(ring->wptr));
- }
-}
-
-static const struct amdgpu_ring_funcs vcn_v2_5_jpeg_ring_vm_funcs = {
- .type = AMDGPU_RING_TYPE_VCN_JPEG,
- .align_mask = 0xf,
- .vmhub = AMDGPU_MMHUB_1,
- .get_rptr = vcn_v2_5_jpeg_ring_get_rptr,
- .get_wptr = vcn_v2_5_jpeg_ring_get_wptr,
- .set_wptr = vcn_v2_5_jpeg_ring_set_wptr,
- .emit_frame_size =
- SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
- SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
- 8 + /* vcn_v2_0_jpeg_ring_emit_vm_flush */
- 18 + 18 + /* vcn_v2_0_jpeg_ring_emit_fence x2 vm fence */
- 8 + 16,
- .emit_ib_size = 22, /* vcn_v2_0_jpeg_ring_emit_ib */
- .emit_ib = vcn_v2_0_jpeg_ring_emit_ib,
- .emit_fence = vcn_v2_0_jpeg_ring_emit_fence,
- .emit_vm_flush = vcn_v2_0_jpeg_ring_emit_vm_flush,
- .test_ring = amdgpu_vcn_jpeg_ring_test_ring,
- .test_ib = amdgpu_vcn_jpeg_ring_test_ib,
- .insert_nop = vcn_v2_0_jpeg_ring_nop,
- .insert_start = vcn_v2_0_jpeg_ring_insert_start,
- .insert_end = vcn_v2_0_jpeg_ring_insert_end,
- .pad_ib = amdgpu_ring_generic_pad_ib,
- .begin_use = amdgpu_vcn_ring_begin_use,
- .end_use = amdgpu_vcn_ring_end_use,
- .emit_wreg = vcn_v2_0_jpeg_ring_emit_wreg,
- .emit_reg_wait = vcn_v2_0_jpeg_ring_emit_reg_wait,
- .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
-};
-
static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
{
int i;
@@ -1233,19 +1628,6 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev)
}
}
-static void vcn_v2_5_set_jpeg_ring_funcs(struct amdgpu_device *adev)
-{
- int i;
-
- for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
- if (adev->vcn.harvest_config & (1 << i))
- continue;
- adev->vcn.inst[i].ring_jpeg.funcs = &vcn_v2_5_jpeg_ring_vm_funcs;
- adev->vcn.inst[i].ring_jpeg.me = i;
- DRM_INFO("VCN(%d) jpeg decode is enabled in VM mode\n", i);
- }
-}
-
static bool vcn_v2_5_is_idle(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1283,6 +1665,9 @@ static int vcn_v2_5_set_clockgating_state(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
if (enable) {
if (vcn_v2_5_is_idle(handle))
return -EBUSY;
@@ -1300,6 +1685,9 @@ static int vcn_v2_5_set_powergating_state(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
+ if (amdgpu_sriov_vf(adev))
+ return 0;
+
if(state == adev->vcn.cur_state)
return 0;
@@ -1352,9 +1740,6 @@ static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev,
case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
break;
- case VCN_2_0__SRCID__JPEG_DECODE:
- amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_jpeg);
- break;
default:
DRM_ERROR("Unhandled interrupt: %d %d\n",
entry->src_id, entry->src_data[0]);
@@ -1376,7 +1761,7 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
if (adev->vcn.harvest_config & (1 << i))
continue;
- adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 2;
+ adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs;
}
}