diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 155 |
1 files changed, 61 insertions, 94 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index bcc1cbeb8799..1abf662a0e91 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -46,37 +46,26 @@ enum hqd_dequeue_request_type { SAVE_WAVES }; -static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) -{ - return (struct amdgpu_device *)kgd; -} - -static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, +static void lock_srbm(struct amdgpu_device *adev, uint32_t mec, uint32_t pipe, uint32_t queue, uint32_t vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, mec, pipe, queue, vmid); } -static void unlock_srbm(struct kgd_dev *kgd) +static void unlock_srbm(struct amdgpu_device *adev) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } -static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, +static void acquire_queue(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, queue_id, 0); + lock_srbm(adev, mec, pipe, queue_id, 0); } static uint64_t get_queue_mask(struct amdgpu_device *adev, @@ -88,33 +77,29 @@ static uint64_t get_queue_mask(struct amdgpu_device *adev, return 1ull << bit; } -static void release_queue(struct kgd_dev *kgd) +static void release_queue(struct amdgpu_device *adev) { - unlock_srbm(kgd); + unlock_srbm(adev); } -void kgd_gfx_v9_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, +void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); /* APE1 no longer exists on GFX9 */ - unlock_srbm(kgd); + unlock_srbm(adev); } -int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, +int kgd_gfx_v9_set_pasid_vmid_mapping(struct amdgpu_device *adev, u32 pasid, unsigned int vmid) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - /* * We have to assume that there is no outstanding mapping. * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because @@ -171,22 +156,21 @@ int kgd_gfx_v9_set_pasid_vmid_mapping(struct kgd_dev *kgd, u32 pasid, * but still works */ -int kgd_gfx_v9_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +int kgd_gfx_v9_init_interrupts(struct amdgpu_device *adev, uint32_t pipe_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t mec; uint32_t pipe; mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); - lock_srbm(kgd, mec, pipe, 0, 0); + lock_srbm(adev, mec, pipe, 0, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), + WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); - unlock_srbm(kgd); + unlock_srbm(adev); return 0; } @@ -233,19 +217,18 @@ static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) return (struct v9_sdma_mqd *)mqd; } -int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, - uint32_t queue_id, uint32_t __user *wptr, - uint32_t wptr_shift, uint32_t wptr_mask, - struct mm_struct *mm) +int kgd_gfx_v9_hqd_load(struct amdgpu_device *adev, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t __user *wptr, uint32_t wptr_shift, + uint32_t wptr_mask, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_mqd *m; uint32_t *mqd_hqd; uint32_t reg, hqd_base, data; m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ mqd_hqd = &m->cp_mqd_base_addr_lo; @@ -296,7 +279,7 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, lower_32_bits((uintptr_t)wptr)); WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), upper_32_bits((uintptr_t)wptr)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), + WREG32_SOC15(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1, (uint32_t)get_queue_mask(adev, pipe_id, queue_id)); } @@ -308,16 +291,15 @@ int kgd_gfx_v9_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); - release_queue(kgd); + release_queue(adev); return 0; } -int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, +int kgd_gfx_v9_hiq_mqd_load(struct amdgpu_device *adev, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t doorbell_off) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; struct v9_mqd *m; uint32_t mec, pipe; @@ -325,7 +307,7 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, m = get_mqd(mqd); - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); @@ -361,16 +343,15 @@ int kgd_gfx_v9_hiq_mqd_load(struct kgd_dev *kgd, void *mqd, out_unlock: spin_unlock(&adev->gfx.kiq.ring_lock); - release_queue(kgd); + release_queue(adev); return r; } -int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, +int kgd_gfx_v9_hqd_dump(struct amdgpu_device *adev, uint32_t pipe_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t i = 0, reg; #define HQD_N_REGS 56 #define DUMP_REG(addr) do { \ @@ -384,13 +365,13 @@ int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, if (*dump == NULL) return -ENOMEM; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) DUMP_REG(reg); - release_queue(kgd); + release_queue(adev); WARN_ON_ONCE(i != HQD_N_REGS); *n_regs = i; @@ -398,10 +379,9 @@ int kgd_gfx_v9_hqd_dump(struct kgd_dev *kgd, return 0; } -static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_load(struct amdgpu_device *adev, void *mqd, uint32_t __user *wptr, struct mm_struct *mm) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; unsigned long end_jiffies; @@ -468,11 +448,10 @@ static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, return 0; } -static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, +static int kgd_hqd_sdma_dump(struct amdgpu_device *adev, uint32_t engine_id, uint32_t queue_id, uint32_t (**dump)[2], uint32_t *n_regs) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t sdma_rlc_reg_offset = get_sdma_rlc_reg_offset(adev, engine_id, queue_id); uint32_t i = 0, reg; @@ -500,31 +479,30 @@ static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, return 0; } -bool kgd_gfx_v9_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, - uint32_t pipe_id, uint32_t queue_id) +bool kgd_gfx_v9_hqd_is_occupied(struct amdgpu_device *adev, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t act; bool retval = false; uint32_t low, high; - acquire_queue(kgd, pipe_id, queue_id); - act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + acquire_queue(adev, pipe_id, queue_id); + act = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); if (act) { low = lower_32_bits(queue_address >> 8); high = upper_32_bits(queue_address >> 8); - if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && - high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) + if (low == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE) && + high == RREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI)) retval = true; } - release_queue(kgd); + release_queue(adev); return retval; } -static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +static bool kgd_hqd_sdma_is_occupied(struct amdgpu_device *adev, void *mqd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t sdma_rlc_rb_cntl; @@ -541,12 +519,11 @@ static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) return false; } -int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, +int kgd_gfx_v9_hqd_destroy(struct amdgpu_device *adev, void *mqd, enum kfd_preempt_type reset_type, unsigned int utimeout, uint32_t pipe_id, uint32_t queue_id) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); enum hqd_dequeue_request_type type; unsigned long end_jiffies; uint32_t temp; @@ -555,7 +532,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, if (amdgpu_in_reset(adev)) return -EIO; - acquire_queue(kgd, pipe_id, queue_id); + acquire_queue(adev, pipe_id, queue_id); if (m->cp_hqd_vmid == 0) WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); @@ -579,25 +556,24 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, end_jiffies = (utimeout * HZ / 1000) + jiffies; while (true) { - temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + temp = RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE); if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) break; if (time_after(jiffies, end_jiffies)) { pr_err("cp queue preemption time out.\n"); - release_queue(kgd); + release_queue(adev); return -ETIME; } usleep_range(500, 1000); } - release_queue(kgd); + release_queue(adev); return 0; } -static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, +static int kgd_hqd_sdma_destroy(struct amdgpu_device *adev, void *mqd, unsigned int utimeout) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); struct v9_sdma_mqd *m; uint32_t sdma_rlc_reg_offset; uint32_t temp; @@ -634,11 +610,10 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } -bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, +bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct amdgpu_device *adev, uint8_t vmid, uint16_t *p_pasid) { uint32_t value; - struct amdgpu_device *adev = (struct amdgpu_device *) kgd; value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid); @@ -647,12 +622,12 @@ bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); } -int kgd_gfx_v9_address_watch_disable(struct kgd_dev *kgd) +int kgd_gfx_v9_address_watch_disable(struct amdgpu_device *adev) { return 0; } -int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_address_watch_execute(struct amdgpu_device *adev, unsigned int watch_point_id, uint32_t cntl_val, uint32_t addr_hi, @@ -661,17 +636,16 @@ int kgd_gfx_v9_address_watch_execute(struct kgd_dev *kgd, return 0; } -int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, +int kgd_gfx_v9_wave_control_execute(struct amdgpu_device *adev, uint32_t gfx_index_val, uint32_t sq_cmd) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); uint32_t data = 0; mutex_lock(&adev->grbm_idx_mutex); WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); + WREG32_SOC15(GC, 0, mmSQ_CMD, sq_cmd); data = REG_SET_FIELD(data, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); @@ -686,18 +660,16 @@ int kgd_gfx_v9_wave_control_execute(struct kgd_dev *kgd, return 0; } -uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, +uint32_t kgd_gfx_v9_address_watch_get_offset(struct amdgpu_device *adev, unsigned int watch_point_id, unsigned int reg_offset) { return 0; } -void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, +void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, uint32_t vmid, uint64_t page_table_base) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("trying to set page table base for wrong VMID %u\n", vmid); @@ -750,7 +722,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe; queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe; soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0); - reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + + reg_val = RREG32_SOC15_IP(GC, SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + queue_slot); *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; if (*wave_cnt != 0) @@ -804,7 +776,7 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, * * Reading registers referenced above involves programming GRBM appropriately */ -void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, +void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid, int *pasid_wave_cnt, int *max_waves_per_cu) { int qidx; @@ -818,10 +790,8 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, int pasid_tmp; int max_queue_cnt; int vmid_wave_cnt = 0; - struct amdgpu_device *adev; DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES); - adev = get_amdgpu_device(kgd); lock_spi_csq_mutexes(adev); soc15_grbm_select(adev, 1, 0, 0, 0); @@ -839,8 +809,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff); - queue_map = RREG32(SOC15_REG_OFFSET(GC, 0, - mmSPI_CSQ_WF_ACTIVE_STATUS)); + queue_map = RREG32_SOC15(GC, 0, mmSPI_CSQ_WF_ACTIVE_STATUS); /* * Assumption: queue map encodes following schema: four @@ -882,30 +851,28 @@ void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, adev->gfx.cu_info.max_waves_per_simd; } -void kgd_gfx_v9_program_trap_handler_settings(struct kgd_dev *kgd, +void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - lock_srbm(kgd, 0, 0, 0, vmid); + lock_srbm(adev, 0, 0, 0, vmid); /* * Program TBA registers */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_LO), + WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_LO, lower_32_bits(tba_addr >> 8)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TBA_HI), + WREG32_SOC15(GC, 0, mmSQ_SHADER_TBA_HI, upper_32_bits(tba_addr >> 8)); /* * Program TMA registers */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_LO), + WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_LO, lower_32_bits(tma_addr >> 8)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_SHADER_TMA_HI), + WREG32_SOC15(GC, 0, mmSQ_SHADER_TMA_HI, upper_32_bits(tma_addr >> 8)); - unlock_srbm(kgd); + unlock_srbm(adev); } const struct kfd2kgd_calls gfx_v9_kfd2kgd = { |