diff options
author | Amber Lin <Amber.Lin@amd.com> | 2022-05-17 18:41:01 +0300 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2023-06-09 16:40:40 +0300 |
commit | f544afac3f34124088b981c63843a3cc48f4ee3e (patch) | |
tree | 0ddb1c55d9b67cd3b0d3359e2a1c1841cff66623 /drivers/gpu | |
parent | 62e790879efbf09edb9f262d5eb7765aeaf89809 (diff) | |
download | linux-f544afac3f34124088b981c63843a3cc48f4ee3e.tar.xz |
drm/amdgpu: Add kgd2kfd for GC 9.4.3
New GC (v9.4.3) and ATHUB (v1.8.0) versions
are used. Add kgd_gfx_v9_4_3_*
functions if registers in use of kgd_gfx_v9_*
functions are changed or have different offset.
Signed-off-by: Amber Lin <Amber.Lin@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Mukul Joshi <mukul.joshi@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/Makefile | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c | 183 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 30 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 |
5 files changed, 206 insertions, 16 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 415a7fa395c4..7fb09000efc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -228,6 +228,7 @@ amdgpu-y += \ amdgpu_amdkfd_gfx_v9.o \ amdgpu_amdkfd_arcturus.o \ amdgpu_amdkfd_aldebaran.o \ + amdgpu_amdkfd_gc_9_4_3.o \ amdgpu_amdkfd_gfx_v10.o \ amdgpu_amdkfd_gfx_v10_3.o \ amdgpu_amdkfd_gfx_v11.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c new file mode 100644 index 000000000000..562e1a04160f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -0,0 +1,183 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "amdgpu_amdkfd_arcturus.h" +#include "amdgpu_amdkfd_gfx_v9.h" +#include "gc/gc_9_4_3_offset.h" +#include "gc/gc_9_4_3_sh_mask.h" +#include "athub/athub_1_8_0_offset.h" +#include "athub/athub_1_8_0_sh_mask.h" +#include "oss/osssys_4_0_offset.h" +#include "oss/osssys_4_0_sh_mask.h" +#include "v9_structs.h" +#include "soc15.h" + +static int kgd_gfx_v9_4_3_set_pasid_vmid_mapping(struct amdgpu_device *adev, + u32 pasid, unsigned int vmid) +{ + unsigned long timeout; + + /* + * We have to assume that there is no outstanding mapping. + * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because + * a mapping is in progress or because a mapping finished + * and the SW cleared it. + * So the protocol is to always wait & clear. + */ + uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | + ATC_VMID0_PASID_MAPPING__VALID_MASK; + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, + regATC_VMID0_PASID_MAPPING) + vmid, pasid_mapping); + + timeout = jiffies + msecs_to_jiffies(10); + while (!(RREG32(SOC15_REG_OFFSET(ATHUB, 0, + regATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & + (1U << vmid))) { + if (time_after(jiffies, timeout)) { + pr_err("Fail to program VMID-PASID mapping\n"); + return -ETIME; + } + cpu_relax(); + } + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, + regATC_VMID_PASID_MAPPING_UPDATE_STATUS), + 1U << vmid); + + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, + pasid_mapping); + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid, + pasid_mapping); + + return 0; +} + +static inline struct v9_mqd *get_mqd(void *mqd) +{ + return (struct v9_mqd *)mqd; +} + +static int kgd_gfx_v9_4_3_hqd_load(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr, uint32_t wptr_shift, + uint32_t wptr_mask, struct mm_struct *mm) +{ + struct v9_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, hqd_base, hqd_end, data; + + m = get_mqd(mqd); + + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id); + + /* HQD registers extend to CP_HQD_AQL_DISPATCH_ID_HI */ + mqd_hqd = &m->cp_mqd_base_addr_lo; + hqd_base = SOC15_REG_OFFSET(GC, 0, regCP_MQD_BASE_ADDR); + hqd_end = SOC15_REG_OFFSET(GC, 0, regCP_HQD_AQL_DISPATCH_ID_HI); + + for (reg = hqd_base; reg <= hqd_end; reg++) + WREG32_RLC(reg, mqd_hqd[reg - hqd_base]); + + + /* Activate doorbell logic before triggering WPTR poll. */ + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), + data); + + if (wptr) { + /* Don't read wptr with get_user because the user + * context may not be accessible (if this function + * runs in a work queue). Instead trigger a one-shot + * polling read from memory in the CP. This assumes + * that wptr is GPU-accessible in the queue's VMID via + * ATC or SVM. WPTR==RPTR before starting the poll so + * the CP starts fetching new commands from the right + * place. + * + * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit + * tricky. Assume that the queue didn't overflow. The + * number of valid bits in the 32-bit RPTR depends on + * the queue size. The remaining bits are taken from + * the saved 64-bit WPTR. If the WPTR wrapped, add the + * queue size. + */ + uint32_t queue_size = + 2 << REG_GET_FIELD(m->cp_hqd_pq_control, + CP_HQD_PQ_CONTROL, QUEUE_SIZE); + uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); + + if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) + guessed_wptr += queue_size; + guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); + guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; + + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_LO), + lower_32_bits(guessed_wptr)); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_HI), + upper_32_bits(guessed_wptr)); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), + lower_32_bits((uintptr_t)wptr)); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, + regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + upper_32_bits((uintptr_t)wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, regCP_PQ_WPTR_POLL_CNTL1), + (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, + queue_id)); + } + + /* Start the EOP fetcher */ + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_EOP_RPTR), + REG_SET_FIELD(m->cp_hqd_eop_rptr, + CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); + WREG32_RLC(SOC15_REG_OFFSET(GC, 0, regCP_HQD_ACTIVE), data); + + kgd_gfx_v9_release_queue(adev); + + return 0; +} + +const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { + .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping, + .init_interrupts = kgd_gfx_v9_init_interrupts, + .hqd_load = kgd_gfx_v9_4_3_hqd_load, + .hiq_mqd_load = kgd_gfx_v9_hiq_mqd_load, + .hqd_sdma_load = kgd_arcturus_hqd_sdma_load, + .hqd_dump = kgd_gfx_v9_hqd_dump, + .hqd_sdma_dump = kgd_arcturus_hqd_sdma_dump, + .hqd_is_occupied = kgd_gfx_v9_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_arcturus_hqd_sdma_is_occupied, + .hqd_destroy = kgd_gfx_v9_hqd_destroy, + .hqd_sdma_destroy = kgd_arcturus_hqd_sdma_destroy, + .wave_control_execute = kgd_gfx_v9_wave_control_execute, + .get_atc_vmid_pasid_mapping_info = + kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, + .set_vm_context_page_table_base = + kgd_gfx_v9_set_vm_context_page_table_base, + .program_trap_handler_settings = + kgd_gfx_v9_program_trap_handler_settings +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index ae06d1f2af93..d36219ecd3dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -59,7 +59,7 @@ static void unlock_srbm(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); } -static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, +void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; @@ -68,7 +68,7 @@ static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, lock_srbm(adev, mec, pipe, queue_id, 0); } -static uint64_t get_queue_mask(struct amdgpu_device *adev, +uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { unsigned int bit = pipe_id * adev->gfx.mec.num_queue_per_pipe + @@ -77,7 +77,7 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev, return 1ull << bit; } -static void release_queue(struct amdgpu_device *adev) +void kgd_gfx_v9_release_queue(struct amdgpu_device *adev) { unlock_srbm(adev); } @@ -228,7 +228,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, m = get_mqd(mqd); - acquire_queue(adev, pipe_id, queue_id); + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id); /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; @@ -280,7 +280,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uintptr_t)wptr)); WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1, - (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); + (uint32_t)kgd_gfx_v9_get_queue_mask(adev, pipe_id, queue_id)); } /* Start the EOP fetcher */ @@ -291,7 +291,7 @@ int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); - release_queue(adev); + kgd_gfx_v9_release_queue(adev); return 0; } @@ -307,7 +307,7 @@ int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, m = get_mqd(mqd); - acquire_queue(adev, pipe_id, queue_id); + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id); mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); @@ -343,7 +343,7 @@ int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, out_unlock: spin_unlock(&adev->gfx.kiq[0].ring_lock); - release_queue(adev); + kgd_gfx_v9_release_queue(adev); return r; } @@ -365,13 +365,13 @@ int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev, if (*dump == NULL) return -ENOMEM; - acquire_queue(adev, pipe_id, queue_id); + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id); for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) DUMP_REG(reg); - release_queue(adev); + kgd_gfx_v9_release_queue(adev); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -487,7 +487,7 @@ bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev, bool retval = false; uint32_t low, high; - acquire_queue(adev, pipe_id, queue_id); + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id); act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); if (act) { low = lower_32_bits(queue_address >> 8); @@ -497,7 +497,7 @@ bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev, high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI)) retval = true; } - release_queue(adev); + kgd_gfx_v9_release_queue(adev); return retval; } @@ -532,7 +532,7 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, if (amdgpu_in_reset(adev)) return -EIO; - acquire_queue(adev, pipe_id, queue_id); + kgd_gfx_v9_acquire_queue(adev, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); @@ -561,13 +561,13 @@ int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, break; if (time_after(jiffies, end_jiffies)) { pr_err("cp queue preemption time out.\n"); - release_queue(adev); + kgd_gfx_v9_release_queue(adev); return -ETIME; } usleep_range(500, 1000); } - release_queue(adev); + kgd_gfx_v9_release_queue(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index c7ed3bc9053c..491273a02e30 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -58,3 +58,8 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, int *pasid_wave_cnt, int *max_waves_per_cu); void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr); +void kgd_gfx_v9_acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, + uint32_t queue_id); +uint64_t kgd_gfx_v9_get_queue_mask(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id); +void kgd_gfx_v9_release_queue(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 00f528eb9812..1510041a6ee1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -51,6 +51,7 @@ extern const struct kfd2kgd_calls gfx_v8_kfd2kgd; extern const struct kfd2kgd_calls gfx_v9_kfd2kgd; extern const struct kfd2kgd_calls arcturus_kfd2kgd; extern const struct kfd2kgd_calls aldebaran_kfd2kgd; +extern const struct kfd2kgd_calls gc_9_4_3_kfd2kgd; extern const struct kfd2kgd_calls gfx_v10_kfd2kgd; extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd; extern const struct kfd2kgd_calls gfx_v11_kfd2kgd; @@ -328,7 +329,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) break; case IP_VERSION(9, 4, 3): gfx_target_version = 90400; - f2g = &aldebaran_kfd2kgd; + f2g = &gc_9_4_3_kfd2kgd; break; /* Navi10 */ case IP_VERSION(10, 1, 10): |