diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 656 |
1 files changed, 361 insertions, 295 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index ee2f2139e2eb..37b45e4403d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -21,12 +21,13 @@ * */ #include <linux/firmware.h> -#include "drmP.h" +#include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_ih.h" #include "amdgpu_gfx.h" #include "cikd.h" #include "cik.h" +#include "cik_structs.h" #include "atom.h" #include "amdgpu_ucode.h" #include "clearstate_ci.h" @@ -48,7 +49,7 @@ #include "oss/oss_2_0_sh_mask.h" #define GFX7_NUM_GFX_RINGS 1 -#define GFX7_NUM_COMPUTE_RINGS 8 +#define GFX7_MEC_HPD_SIZE 2048 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); @@ -1607,19 +1608,6 @@ static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, } /** - * gfx_v7_0_create_bitmask - create a bitmask - * - * @bit_width: length of the mask - * - * create a variable length bit mask (CIK). - * Returns the bitmask. - */ -static u32 gfx_v7_0_create_bitmask(u32 bit_width) -{ - return (u32)((1ULL << bit_width) - 1); -} - -/** * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs * * @adev: amdgpu_device pointer @@ -1637,8 +1625,8 @@ static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev) data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; - mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se / - adev->gfx.config.max_sh_per_se); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); return (~data) & mask; } @@ -1837,7 +1825,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) /** * gmc_v7_0_init_compute_vmid - gart enable * - * @rdev: amdgpu_device pointer + * @adev: amdgpu_device pointer * * Initialize compute vmid sh_mem registers * @@ -2821,26 +2809,23 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev) } } -#define MEC_HPD_SIZE 2048 - static int gfx_v7_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; + size_t mec_hpd_size; - /* - * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total - * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total - * Nonetheless, we assign only 1 pipe because all other pipes will - * be handled by KFD - */ - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe = 1; - adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + /* allocate space for ALL pipes (even the ones we don't own) */ + mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec + * GFX7_MEC_HPD_SIZE * 2; if (adev->gfx.mec.hpd_eop_obj == NULL) { r = amdgpu_bo_create(adev, - adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, + mec_hpd_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); @@ -2870,7 +2855,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) } /* clear memory. Not sure if this is required or not */ - memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2); + memset(hpd, 0, mec_hpd_size); amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); @@ -2917,275 +2902,296 @@ struct hqd_registers u32 cp_mqd_control; }; -struct bonaire_mqd +static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, + int mec, int pipe) { - u32 header; - u32 dispatch_initiator; - u32 dimensions[3]; - u32 start_idx[3]; - u32 num_threads[3]; - u32 pipeline_stat_enable; - u32 perf_counter_enable; - u32 pgm[2]; - u32 tba[2]; - u32 tma[2]; - u32 pgm_rsrc[2]; - u32 vmid; - u32 resource_limits; - u32 static_thread_mgmt01[2]; - u32 tmp_ring_size; - u32 static_thread_mgmt23[2]; - u32 restart[3]; - u32 thread_trace_enable; - u32 reserved1; - u32 user_data[16]; - u32 vgtcs_invoke_count[2]; - struct hqd_registers queue_state; - u32 dequeue_cntr; - u32 interrupt_queue[64]; -}; - -/** - * gfx_v7_0_cp_compute_resume - setup the compute queue registers - * - * @adev: amdgpu_device pointer - * - * Program the compute queues and test them to make sure they - * are working. - * Returns 0 for success, error for failure. - */ -static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) -{ - int r, i, j; - u32 tmp; - bool use_doorbell = true; - u64 hqd_gpu_addr; - u64 mqd_gpu_addr; u64 eop_gpu_addr; - u64 wb_gpu_addr; - u32 *buf; - struct bonaire_mqd *mqd; - struct amdgpu_ring *ring; - - /* fix up chicken bits */ - tmp = RREG32(mmCP_CPF_DEBUG); - tmp |= (1 << 23); - WREG32(mmCP_CPF_DEBUG, tmp); + u32 tmp; + size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe) + * GFX7_MEC_HPD_SIZE * 2; - /* init the pipes */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) { - int me = (i < 4) ? 1 : 2; - int pipe = (i < 4) ? i : (i - 4); + eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset; - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); + cik_srbm_select(adev, mec + 1, pipe, 0, 0); - cik_srbm_select(adev, me, pipe, 0, 0); + /* write the EOP addr */ + WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); + WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); - /* write the EOP addr */ - WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); - WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); + /* set the VMID assigned */ + WREG32(mmCP_HPD_EOP_VMID, 0); - /* set the VMID assigned */ - WREG32(mmCP_HPD_EOP_VMID, 0); + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = RREG32(mmCP_HPD_EOP_CONTROL); + tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK; + tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8); + WREG32(mmCP_HPD_EOP_CONTROL, tmp); - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32(mmCP_HPD_EOP_CONTROL); - tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK; - tmp |= order_base_2(MEC_HPD_SIZE / 8); - WREG32(mmCP_HPD_EOP_CONTROL, tmp); - } cik_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); +} - /* init the queues. Just two for now. */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; +static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev) +{ + int i; - if (ring->mqd_obj == NULL) { - r = amdgpu_bo_create(adev, - sizeof(struct bonaire_mqd), - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, - &ring->mqd_obj); - if (r) { - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); - return r; - } + /* disable the queue if it's active */ + if (RREG32(mmCP_HQD_ACTIVE) & 1) { + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - gfx_v7_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, - &mqd_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); - gfx_v7_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); - if (r) { - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); - gfx_v7_0_cp_compute_fini(adev); - return r; - } + if (i == adev->usec_timeout) + return -ETIMEDOUT; - /* init the mqd struct */ - memset(buf, 0, sizeof(struct bonaire_mqd)); + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); + WREG32(mmCP_HQD_PQ_RPTR, 0); + WREG32(mmCP_HQD_PQ_WPTR, 0); + } - mqd = (struct bonaire_mqd *)buf; - mqd->header = 0xC0310800; - mqd->static_thread_mgmt01[0] = 0xffffffff; - mqd->static_thread_mgmt01[1] = 0xffffffff; - mqd->static_thread_mgmt23[0] = 0xffffffff; - mqd->static_thread_mgmt23[1] = 0xffffffff; + return 0; +} - mutex_lock(&adev->srbm_mutex); - cik_srbm_select(adev, ring->me, - ring->pipe, - ring->queue, 0); +static void gfx_v7_0_mqd_init(struct amdgpu_device *adev, + struct cik_mqd *mqd, + uint64_t mqd_gpu_addr, + struct amdgpu_ring *ring) +{ + u64 hqd_gpu_addr; + u64 wb_gpu_addr; - /* disable wptr polling */ - tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); - tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK; - WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); + /* init the mqd struct */ + memset(mqd, 0, sizeof(struct cik_mqd)); - /* enable doorbell? */ - mqd->queue_state.cp_hqd_pq_doorbell_control = - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - if (use_doorbell) - mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; - else - mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->queue_state.cp_hqd_pq_doorbell_control); - - /* disable the queue if it's active */ - mqd->queue_state.cp_hqd_dequeue_request = 0; - mqd->queue_state.cp_hqd_pq_rptr = 0; - mqd->queue_state.cp_hqd_pq_wptr= 0; - if (RREG32(mmCP_HQD_ACTIVE) & 1) { - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); - WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); - } + mqd->header = 0xC0310800; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; - /* set the pointer to the MQD */ - mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; - mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); - WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); - /* set MQD vmid to 0 */ - mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL); - mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; - WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); - - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - hqd_gpu_addr = ring->gpu_addr >> 8; - mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; - mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); - WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); - - /* set up the HQD, this is similar to CP_RB0_CNTL */ - mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); - mqd->queue_state.cp_hqd_pq_control &= - ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | - CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); - - mqd->queue_state.cp_hqd_pq_control |= - order_base_2(ring->ring_size / 8); - mqd->queue_state.cp_hqd_pq_control |= - (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); + /* enable doorbell? */ + mqd->cp_hqd_pq_doorbell_control = + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); + if (ring->use_doorbell) + mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; + else + mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); + + /* set MQD vmid to 0 */ + mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL); + mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); + mqd->cp_hqd_pq_control &= + ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | + CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); + + mqd->cp_hqd_pq_control |= + order_base_2(ring->ring_size / 8); + mqd->cp_hqd_pq_control |= + (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); #ifdef __BIG_ENDIAN - mqd->queue_state.cp_hqd_pq_control |= - 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; + mqd->cp_hqd_pq_control |= + 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; #endif - mqd->queue_state.cp_hqd_pq_control &= - ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | + mqd->cp_hqd_pq_control &= + ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK | CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK); - mqd->queue_state.cp_hqd_pq_control |= - CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | - CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ - WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); - - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); - - /* set the wb address wether it's enabled or not */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = - upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->queue_state.cp_hqd_pq_rptr_report_addr); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); - - /* enable the doorbell if requested */ - if (use_doorbell) { - mqd->queue_state.cp_hqd_pq_doorbell_control = - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - mqd->queue_state.cp_hqd_pq_doorbell_control &= - ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK; - mqd->queue_state.cp_hqd_pq_doorbell_control |= - (ring->doorbell_index << - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); - mqd->queue_state.cp_hqd_pq_doorbell_control |= - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; - mqd->queue_state.cp_hqd_pq_doorbell_control &= - ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); + mqd->cp_hqd_pq_control |= + CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | + CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ - } else { - mqd->queue_state.cp_hqd_pq_doorbell_control = 0; + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + /* set the wb address wether it's enabled or not */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + mqd->cp_hqd_pq_doorbell_control = + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); + mqd->cp_hqd_pq_doorbell_control &= + ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK; + mqd->cp_hqd_pq_doorbell_control |= + (ring->doorbell_index << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); + mqd->cp_hqd_pq_doorbell_control |= + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; + mqd->cp_hqd_pq_doorbell_control &= + ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); + + } else { + mqd->cp_hqd_pq_doorbell_control = 0; + } + + /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); + mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + + /* set the vmid for the queue */ + mqd->cp_hqd_vmid = 0; + + /* defaults */ + mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL); + mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR); + mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI); + mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR); + mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE); + mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD); + mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE); + mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO); + mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI); + mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO); + mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI); + mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); + mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); + mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); + mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR); + + /* activate the queue */ + mqd->cp_hqd_active = 1; +} + +int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd) +{ + uint32_t tmp; + uint32_t mqd_reg; + uint32_t *mqd_data; + + /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_MQD_CONTROL */ + mqd_data = &mqd->cp_mqd_base_addr_lo; + + /* disable wptr polling */ + tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); + tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); + WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); + + /* program all HQD registers */ + for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); + + /* activate the HQD */ + for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); + + return 0; +} + +static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id) +{ + int r; + u64 mqd_gpu_addr; + struct cik_mqd *mqd; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + if (ring->mqd_obj == NULL) { + r = amdgpu_bo_create(adev, + sizeof(struct cik_mqd), + PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, + &ring->mqd_obj); + if (r) { + dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); + return r; } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->queue_state.cp_hqd_pq_doorbell_control); + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto out; - /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - ring->wptr = 0; - mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); - mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, + &mqd_gpu_addr); + if (r) { + dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); + goto out_unreserve; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); + if (r) { + dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); + goto out_unreserve; + } - /* set the vmid for the queue */ - mqd->queue_state.cp_hqd_vmid = 0; - WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); + mutex_lock(&adev->srbm_mutex); + cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - /* activate the queue */ - mqd->queue_state.cp_hqd_active = 1; - WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); + gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring); + gfx_v7_0_mqd_deactivate(adev); + gfx_v7_0_mqd_commit(adev, mqd); - cik_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); + cik_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); - amdgpu_bo_kunmap(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); + amdgpu_bo_kunmap(ring->mqd_obj); +out_unreserve: + amdgpu_bo_unreserve(ring->mqd_obj); +out: + return 0; +} - ring->ready = true; +/** + * gfx_v7_0_cp_compute_resume - setup the compute queue registers + * + * @adev: amdgpu_device pointer + * + * Program the compute queues and test them to make sure they + * are working. + * Returns 0 for success, error for failure. + */ +static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) +{ + int r, i, j; + u32 tmp; + struct amdgpu_ring *ring; + + /* fix up chicken bits */ + tmp = RREG32(mmCP_CPF_DEBUG); + tmp |= (1 << 23); + WREG32(mmCP_CPF_DEBUG, tmp); + + /* init all pipes (even the ones we don't own) */ + for (i = 0; i < adev->gfx.mec.num_mec; i++) + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) + gfx_v7_0_compute_pipe_init(adev, i, j); + + /* init the queues */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + r = gfx_v7_0_compute_queue_init(adev, i); + if (r) { + gfx_v7_0_cp_compute_fini(adev); + return r; + } } gfx_v7_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - + ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) ring->ready = false; @@ -3797,6 +3803,9 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) gfx_v7_0_update_rlc(adev, tmp); data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + if (orig != data) + WREG32(mmRLC_CGCG_CGLS_CTRL, data); + } else { gfx_v7_0_enable_gui_idle_interrupt(adev, false); @@ -3806,11 +3815,11 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) RREG32(mmCB_CGTT_SCLK_CTRL); data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); - } - - if (orig != data) - WREG32(mmRLC_CGCG_CGLS_CTRL, data); + if (orig != data) + WREG32(mmRLC_CGCG_CGLS_CTRL, data); + gfx_v7_0_enable_gui_idle_interrupt(adev, true); + } } static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) @@ -4089,7 +4098,7 @@ static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev) data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; - mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_cu_per_sh); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); return (~data) & mask; } @@ -4470,7 +4479,7 @@ static int gfx_v7_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS; + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; adev->gfx.funcs = &gfx_v7_0_gfx_funcs; adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs; gfx_v7_0_set_ring_funcs(adev); @@ -4662,11 +4671,57 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.gb_addr_config = gb_addr_config; } +static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int mec, int pipe, int queue) +{ + int r; + unsigned irq_type; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type); + if (r) + return r; + + + return 0; +} + static int gfx_v7_0_sw_init(void *handle) { struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i, r; + int i, j, k, r, ring_id; + + switch (adev->asic_type) { + case CHIP_KAVERI: + adev->gfx.mec.num_mec = 2; + break; + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + default: + adev->gfx.mec.num_mec = 1; + break; + } + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; /* EOP Event */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); @@ -4716,29 +4771,23 @@ static int gfx_v7_0_sw_init(void *handle) return r; } - /* set up the compute queues */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - unsigned irq_type; - - /* max 32 queues per MEC */ - if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { - DRM_ERROR("Too many (%d) compute rings!\n", i); - break; + /* set up the compute queues - allocate horizontally across pipes */ + ring_id = 0; + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v7_0_compute_ring_init(adev, + ring_id, + i, k, j); + if (r) + return r; + + ring_id++; + } } - ring = &adev->gfx.compute_ring[i]; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; - ring->me = 1; /* first MEC */ - ring->pipe = i / 8; - ring->queue = i % 8; - sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); - irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; - /* type-2 packets are deprecated on MEC, use type-3 instead */ - r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, irq_type); - if (r) - return r; } /* reserve GDS, GWS and OA resource for gfx */ @@ -4969,8 +5018,8 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, u32 mec_int_cntl, mec_int_cntl_reg; /* - * amdgpu controls only pipe 0 of MEC1. That's why this function only - * handles the setting of interrupts for this specific pipe. All other + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other * pipes' interrupts are set by amdkfd. */ @@ -4979,6 +5028,15 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, case 0: mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; break; + case 1: + mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; + break; + case 2: + mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; + break; + case 3: + mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; + break; default: DRM_DEBUG("invalid pipe %d\n", pipe); return; @@ -5336,6 +5394,12 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; unsigned disable_masks[4 * 2]; + u32 ao_cu_num; + + if (adev->flags & AMD_IS_APU) + ao_cu_num = 2; + else + ao_cu_num = adev->gfx.config.max_cu_per_sh; memset(cu_info, 0, sizeof(*cu_info)); @@ -5354,16 +5418,18 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) bitmap = gfx_v7_0_get_cu_active_bitmap(adev); cu_info->bitmap[i][j] = bitmap; - for (k = 0; k < 16; k ++) { + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { - if (counter < 2) + if (counter < ao_cu_num) ao_bitmap |= mask; counter ++; } mask <<= 1; } active_cu_number += counter; - ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + if (i < 2 && j < 2) + ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); + cu_info->ao_cu_bitmap[i][j] = ao_bitmap; } } gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); |