diff options
author | Dave Airlie <airlied@redhat.com> | 2017-06-16 02:54:02 +0300 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2017-06-16 02:56:53 +0300 |
commit | 04d4fb5fa63876d8e7cf67f2788aecfafc6a28a7 (patch) | |
tree | 92aec67d7b5a1359baff1a508d381234f046743e /drivers/gpu/drm/amd/amdgpu | |
parent | bfda9aa15317838ddb259406027ef9911a1dffbc (diff) | |
parent | a1924005a2e9bfcc4e217b4acd0a4f2421969040 (diff) | |
download | linux-04d4fb5fa63876d8e7cf67f2788aecfafc6a28a7.tar.xz |
Merge branch 'drm-next-4.13' of git://people.freedesktop.org/~agd5f/linux into drm-next
New radeon and amdgpu features for 4.13:
- Lots of Vega10 bug fixes
- Preliminary Raven support
- KIQ support for compute rings
- MEC queue management rework from Andres
- Audio support for DCE6
- SR-IOV improvements
- Improved module parameters for controlling radeon vs amdgpu support
for SI and CIK
- Bug fixes
- General code cleanups
[airlied: dropped drmP.h header from one file was needed and build broke]
* 'drm-next-4.13' of git://people.freedesktop.org/~agd5f/linux: (362 commits)
drm/amdgpu: Fix compiler warnings
drm/amdgpu: vm_update_ptes remove code duplication
drm/amd/amdgpu: Port VCN over to new SOC15 macros
drm/amd/amdgpu: Port PSP v10.0 over to new SOC15 macros
drm/amd/amdgpu: Port PSP v3.1 over to new SOC15 macros
drm/amd/amdgpu: Port NBIO v7.0 driver over to new SOC15 macros
drm/amd/amdgpu: Port NBIO v6.1 driver over to new SOC15 macros
drm/amd/amdgpu: Port UVD 7.0 over to new SOC15 macros
drm/amd/amdgpu: Port MMHUB over to new SOC15 macros
drm/amd/amdgpu: Cleanup gfxhub read-modify-write patterns
drm/amd/amdgpu: Port GFXHUB over to new SOC15 macros
drm/amd/amdgpu: Add offset variant to SOC15 macros
drm/amd/powerplay: add avfs control for Vega10
drm/amdgpu: add virtual display support for raven
drm/amdgpu/gfx9: fix compute ring doorbell index
drm/amd/amdgpu: Rename KIQ ring to avoid spaces
drm/amd/amdgpu: gfx9 tidy ups (v2)
drm/amdgpu: add contiguous flag in ucode bo create
drm/amdgpu: fix missed gpu info firmware when cache firmware during S3
drm/amdgpu: export test ib debugfs interface
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
80 files changed, 9413 insertions, 4627 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 61360e27715f..26682454a446 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -5,15 +5,23 @@ config DRM_AMDGPU_SI Choose this option if you want to enable experimental support for SI asics. + SI is already supported in radeon. Experimental support for SI + in amdgpu will be disabled by default and is still provided by + radeon. Use module options to override this: + + radeon.si_support=0 amdgpu.si_support=1 + config DRM_AMDGPU_CIK bool "Enable amdgpu support for CIK parts" depends on DRM_AMDGPU help - Choose this option if you want to enable experimental support - for CIK asics. + Choose this option if you want to enable support for CIK asics. + + CIK is already supported in radeon. Support for CIK in amdgpu + will be disabled by default and is still provided by radeon. + Use module options to override this: - CIK is already supported in radeon. CIK support in amdgpu - is for experimentation and testing. + radeon.cik_support=0 amdgpu.cik_support=1 config DRM_AMDGPU_USERPTR bool "Always enable userptr write support" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 20bde726419e..faea6349228f 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -24,7 +24,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ - amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o + amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ + amdgpu_queue_mgr.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ @@ -34,7 +35,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o amdgpu-y += \ - vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o + vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o # add GMC block amdgpu-y += \ @@ -54,7 +55,8 @@ amdgpu-y += \ # add PSP block amdgpu-y += \ amdgpu_psp.o \ - psp_v3_1.o + psp_v3_1.o \ + psp_v10_0.o # add SMC block amdgpu-y += \ @@ -92,6 +94,11 @@ amdgpu-y += \ vce_v3_0.o \ vce_v4_0.o +# add VCN block +amdgpu-y += \ + amdgpu_vcn.o \ + vcn_v1_0.o + # add amdkfd interfaces amdgpu-y += \ amdgpu_amdkfd.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 77ff68f9932b..e0adad590ecb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -46,6 +46,8 @@ #include <drm/drm_gem.h> #include <drm/amdgpu_drm.h> +#include <kgd_kfd_interface.h> + #include "amd_shared.h" #include "amdgpu_mode.h" #include "amdgpu_ih.h" @@ -62,6 +64,7 @@ #include "amdgpu_acp.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" +#include "amdgpu_vcn.h" #include "gpu_scheduler.h" #include "amdgpu_virt.h" @@ -92,6 +95,7 @@ extern int amdgpu_vm_size; extern int amdgpu_vm_block_size; extern int amdgpu_vm_fault_stop; extern int amdgpu_vm_debug; +extern int amdgpu_vm_update_mode; extern int amdgpu_sched_jobs; extern int amdgpu_sched_hw_submission; extern int amdgpu_no_evict; @@ -109,6 +113,15 @@ extern int amdgpu_prim_buf_per_se; extern int amdgpu_pos_buf_per_se; extern int amdgpu_cntl_sb_buf_per_se; extern int amdgpu_param_buf_per_se; +extern int amdgpu_job_hang_limit; +extern int amdgpu_lbpw; + +#ifdef CONFIG_DRM_AMDGPU_SI +extern int amdgpu_si_support; +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK +extern int amdgpu_cik_support; +#endif #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 @@ -305,8 +318,8 @@ struct amdgpu_gart_funcs { /* set pte flags based per asic */ uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev, uint32_t flags); - /* adjust mc addr in fb for APU case */ - u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr); + /* get the pde for a given mc addr */ + u64 (*get_vm_pde)(struct amdgpu_device *adev, u64 addr); uint32_t (*get_invalidate_req)(unsigned int vm_id); }; @@ -554,7 +567,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); int amdgpu_gart_init(struct amdgpu_device *adev); void amdgpu_gart_fini(struct amdgpu_device *adev); -void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, +int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages); int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, struct page **pagelist, @@ -602,6 +615,7 @@ struct amdgpu_mc { uint32_t srbm_soft_reset; struct amdgpu_mode_mc_save save; bool prt_warning; + uint64_t stolen_size; /* apertures */ u64 shared_aperture_start; u64 shared_aperture_end; @@ -772,6 +786,29 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, struct dma_fence **f); /* + * Queue manager + */ +struct amdgpu_queue_mapper { + int hw_ip; + struct mutex lock; + /* protected by lock */ + struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS]; +}; + +struct amdgpu_queue_mgr { + struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM]; +}; + +int amdgpu_queue_mgr_init(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr); +int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr); +int amdgpu_queue_mgr_map(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr, + int hw_ip, int instance, int ring, + struct amdgpu_ring **out_ring); + +/* * context related structures */ @@ -784,6 +821,7 @@ struct amdgpu_ctx_ring { struct amdgpu_ctx { struct kref refcount; struct amdgpu_device *adev; + struct amdgpu_queue_mgr queue_mgr; unsigned reset_counter; spinlock_t ring_lock; struct dma_fence **fences; @@ -822,6 +860,7 @@ struct amdgpu_fpriv { struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; + u32 vram_lost_counter; }; /* @@ -893,20 +932,26 @@ struct amdgpu_rlc { u32 *register_restore; }; +#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES + struct amdgpu_mec { struct amdgpu_bo *hpd_eop_obj; u64 hpd_eop_gpu_addr; struct amdgpu_bo *mec_fw_obj; u64 mec_fw_gpu_addr; - u32 num_pipe; u32 num_mec; - u32 num_queue; + u32 num_pipe_per_mec; + u32 num_queue_per_pipe; void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; + + /* These are the resources for which amdgpu takes ownership */ + DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); }; struct amdgpu_kiq { u64 eop_gpu_addr; struct amdgpu_bo *eop_obj; + struct mutex ring_mutex; struct amdgpu_ring ring; struct amdgpu_irq_src irq; }; @@ -983,7 +1028,10 @@ struct amdgpu_gfx_config { struct amdgpu_cu_info { uint32_t number; /* total active CU number */ uint32_t ao_cu_mask; + uint32_t max_waves_per_simd; uint32_t wave_front_size; + uint32_t max_scratch_slots_per_cu; + uint32_t lds_size; uint32_t bitmap[4][4]; }; @@ -1061,6 +1109,8 @@ struct amdgpu_gfx { uint32_t grbm_soft_reset; uint32_t srbm_soft_reset; bool in_reset; + /* s3/s4 mask */ + bool in_suspend; /* NGG */ struct amdgpu_ngg ngg; }; @@ -1114,7 +1164,6 @@ struct amdgpu_cs_parser { #define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */ #define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */ #define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */ -#define AMDGPU_VM_DOMAIN (1 << 3) /* bit set means in virtual memory context */ struct amdgpu_job { struct amd_sched_job base; @@ -1122,6 +1171,8 @@ struct amdgpu_job { struct amdgpu_vm *vm; struct amdgpu_ring *ring; struct amdgpu_sync sync; + struct amdgpu_sync dep_sync; + struct amdgpu_sync sched_sync; struct amdgpu_ib *ibs; struct dma_fence *fence; /* the hw fence */ uint32_t preamble_status; @@ -1129,7 +1180,6 @@ struct amdgpu_job { void *owner; uint64_t fence_ctx; /* the fence_context this job uses */ bool vm_needs_flush; - bool need_pipeline_sync; unsigned vm_id; uint64_t vm_pd_addr; uint32_t gds_base, gds_size; @@ -1221,6 +1271,9 @@ struct amdgpu_firmware { const struct amdgpu_psp_funcs *funcs; struct amdgpu_bo *rbuf; struct mutex mutex; + + /* gpu info firmware data pointer */ + const struct firmware *gpu_info_fw; }; /* @@ -1296,7 +1349,6 @@ struct amdgpu_smumgr { */ struct amdgpu_allowed_register_entry { uint32_t reg_offset; - bool untouched; bool grbm_indexed; }; @@ -1424,6 +1476,7 @@ typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t); typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t); +#define AMDGPU_RESET_MAGIC_NUM 64 struct amdgpu_device { struct device *dev; struct drm_device *ddev; @@ -1523,7 +1576,9 @@ struct amdgpu_device { atomic64_t gtt_usage; atomic64_t num_bytes_moved; atomic64_t num_evictions; + atomic64_t num_vram_cpu_page_faults; atomic_t gpu_reset_counter; + atomic_t vram_lost_counter; /* data for buffer migration throttling */ struct { @@ -1570,11 +1625,18 @@ struct amdgpu_device { /* sdma */ struct amdgpu_sdma sdma; - /* uvd */ - struct amdgpu_uvd uvd; + union { + struct { + /* uvd */ + struct amdgpu_uvd uvd; + + /* vce */ + struct amdgpu_vce vce; + }; - /* vce */ - struct amdgpu_vce vce; + /* vcn */ + struct amdgpu_vcn vcn; + }; /* firmwares */ struct amdgpu_firmware firmware; @@ -1598,6 +1660,9 @@ struct amdgpu_device { /* amdkfd interface */ struct kfd_dev *kfd; + /* delayed work_func for deferring clockgating during resume */ + struct delayed_work late_init_work; + struct amdgpu_virt virt; /* link all shadow bo */ @@ -1606,9 +1671,13 @@ struct amdgpu_device { /* link all gtt */ spinlock_t gtt_list_lock; struct list_head gtt_list; + /* keep an lru list of rings by HW IP */ + struct list_head ring_lru_list; + spinlock_t ring_lru_list_lock; /* record hw reset is performed */ bool has_hw_reset; + u8 reset_magic[AMDGPU_RESET_MAGIC_NUM]; }; @@ -1617,7 +1686,6 @@ static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) return container_of(bdev, struct amdgpu_device, mman.bdev); } -bool amdgpu_device_is_px(struct drm_device *dev); int amdgpu_device_init(struct amdgpu_device *adev, struct drm_device *ddev, struct pci_dev *pdev, @@ -1733,30 +1801,31 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr unsigned occupied, chunk1, chunk2; void *dst; - if (ring->count_dw < count_dw) { + if (unlikely(ring->count_dw < count_dw)) { DRM_ERROR("amdgpu: writing more dwords to the ring than expected!\n"); - } else { - occupied = ring->wptr & ring->buf_mask; - dst = (void *)&ring->ring[occupied]; - chunk1 = ring->buf_mask + 1 - occupied; - chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; - chunk2 = count_dw - chunk1; - chunk1 <<= 2; - chunk2 <<= 2; - - if (chunk1) - memcpy(dst, src, chunk1); - - if (chunk2) { - src += chunk1; - dst = (void *)ring->ring; - memcpy(dst, src, chunk2); - } - - ring->wptr += count_dw; - ring->wptr &= ring->ptr_mask; - ring->count_dw -= count_dw; + return; + } + + occupied = ring->wptr & ring->buf_mask; + dst = (void *)&ring->ring[occupied]; + chunk1 = ring->buf_mask + 1 - occupied; + chunk1 = (chunk1 >= count_dw) ? count_dw: chunk1; + chunk2 = count_dw - chunk1; + chunk1 <<= 2; + chunk2 <<= 2; + + if (chunk1) + memcpy(dst, src, chunk1); + + if (chunk2) { + src += chunk1; + dst = (void *)ring->ring; + memcpy(dst, src, chunk2); } + + ring->wptr += count_dw; + ring->wptr &= ring->ptr_mask; + ring->count_dw -= count_dw; } static inline struct amdgpu_sdma_instance * @@ -1792,6 +1861,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) +#define amdgpu_gart_get_vm_pde(adev, addr) (adev)->gart.gart_funcs->get_vm_pde((adev), (addr)) #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) @@ -1813,6 +1883,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) #define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) +#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) @@ -1849,9 +1920,6 @@ bool amdgpu_need_post(struct amdgpu_device *adev); void amdgpu_update_display_priority(struct amdgpu_device *adev); int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data); -int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, - u32 ip_instance, u32 ring, - struct amdgpu_ring **out_ring); void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes); void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain); bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); @@ -1900,6 +1968,8 @@ static inline bool amdgpu_has_atpx(void) { return false; } extern const struct drm_ioctl_desc amdgpu_ioctls_kms[]; extern const int amdgpu_max_kms_ioctl; +bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, + struct amdgpu_fpriv *fpriv); int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags); void amdgpu_driver_unload_kms(struct drm_device *dev); void amdgpu_driver_lastclose_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index dba8a5b25e66..5f8ada1d872b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -24,6 +24,7 @@ #include "amd_shared.h" #include <drm/drmP.h> #include "amdgpu.h" +#include "amdgpu_gfx.h" #include <linux/module.h> const struct kfd2kgd_calls *kfd2kgd; @@ -60,9 +61,9 @@ int amdgpu_amdkfd_init(void) return ret; } -bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev) +bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev) { - switch (rdev->asic_type) { + switch (adev->asic_type) { #ifdef CONFIG_DRM_AMDGPU_CIK case CHIP_KAVERI: kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions(); @@ -86,59 +87,83 @@ void amdgpu_amdkfd_fini(void) } } -void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev) +void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) { if (kgd2kfd) - rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, - rdev->pdev, kfd2kgd); + adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, + adev->pdev, kfd2kgd); } -void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev) +void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { - if (rdev->kfd) { + int i; + int last_valid_bit; + if (adev->kfd) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = 0xFF00, - - .first_compute_pipe = 1, - .compute_pipe_count = 4 - 1, + .num_mec = adev->gfx.mec.num_mec, + .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, + .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe }; - amdgpu_doorbell_get_kfd_info(rdev, + /* this is going to have a few of the MSBs set that we need to + * clear */ + bitmap_complement(gpu_resources.queue_bitmap, + adev->gfx.mec.queue_bitmap, + KGD_MAX_QUEUES); + + /* remove the KIQ bit as well */ + if (adev->gfx.kiq.ring.ready) + clear_bit(amdgpu_gfx_queue_to_bit(adev, + adev->gfx.kiq.ring.me - 1, + adev->gfx.kiq.ring.pipe, + adev->gfx.kiq.ring.queue), + gpu_resources.queue_bitmap); + + /* According to linux/bitmap.h we shouldn't use bitmap_clear if + * nbits is not compile time constant */ + last_valid_bit = adev->gfx.mec.num_mec + * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) + clear_bit(i, gpu_resources.queue_bitmap); + + amdgpu_doorbell_get_kfd_info(adev, &gpu_resources.doorbell_physical_address, &gpu_resources.doorbell_aperture_size, &gpu_resources.doorbell_start_offset); - kgd2kfd->device_init(rdev->kfd, &gpu_resources); + kgd2kfd->device_init(adev->kfd, &gpu_resources); } } -void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev) +void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) { - if (rdev->kfd) { - kgd2kfd->device_exit(rdev->kfd); - rdev->kfd = NULL; + if (adev->kfd) { + kgd2kfd->device_exit(adev->kfd); + adev->kfd = NULL; } } -void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev, +void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry) { - if (rdev->kfd) - kgd2kfd->interrupt(rdev->kfd, ih_ring_entry); + if (adev->kfd) + kgd2kfd->interrupt(adev->kfd, ih_ring_entry); } -void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev) +void amdgpu_amdkfd_suspend(struct amdgpu_device *adev) { - if (rdev->kfd) - kgd2kfd->suspend(rdev->kfd); + if (adev->kfd) + kgd2kfd->suspend(adev->kfd); } -int amdgpu_amdkfd_resume(struct amdgpu_device *rdev) +int amdgpu_amdkfd_resume(struct amdgpu_device *adev) { int r = 0; - if (rdev->kfd) - r = kgd2kfd->resume(rdev->kfd); + if (adev->kfd) + r = kgd2kfd->resume(adev->kfd); return r; } @@ -147,7 +172,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, void **cpu_ptr) { - struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct kgd_mem **mem = (struct kgd_mem **) mem_obj; int r; @@ -159,10 +184,10 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, if ((*mem) == NULL) return -ENOMEM; - r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, + r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); if (r) { - dev_err(rdev->dev, + dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); return r; } @@ -170,21 +195,21 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, /* map the buffer */ r = amdgpu_bo_reserve((*mem)->bo, true); if (r) { - dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r); + dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r); goto allocate_mem_reserve_bo_failed; } r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT, &(*mem)->gpu_addr); if (r) { - dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r); + dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r); goto allocate_mem_pin_bo_failed; } *gpu_addr = (*mem)->gpu_addr; r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr); if (r) { - dev_err(rdev->dev, + dev_err(adev->dev, "(%d) failed to map bo to kernel for amdkfd\n", r); goto allocate_mem_kmap_bo_failed; } @@ -220,27 +245,27 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) uint64_t get_vmem_size(struct kgd_dev *kgd) { - struct amdgpu_device *rdev = + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; BUG_ON(kgd == NULL); - return rdev->mc.real_vram_size; + return adev->mc.real_vram_size; } uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) { - struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - if (rdev->gfx.funcs->get_gpu_clock_counter) - return rdev->gfx.funcs->get_gpu_clock_counter(rdev); + if (adev->gfx.funcs->get_gpu_clock_counter) + return adev->gfx.funcs->get_gpu_clock_counter(adev); return 0; } uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) { - struct amdgpu_device *rdev = (struct amdgpu_device *)kgd; + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; /* The sclk is in quantas of 10kHz */ - return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; + return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index de530f68d4e3..73f83a10ae14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -39,15 +39,15 @@ struct kgd_mem { int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void); -bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev); +bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev); -void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev); -int amdgpu_amdkfd_resume(struct amdgpu_device *rdev); -void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev, +void amdgpu_amdkfd_suspend(struct amdgpu_device *adev); +int amdgpu_amdkfd_resume(struct amdgpu_device *adev); +void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry); -void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev); -void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev); -void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev); +void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); +void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); +void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 1a0a5f7cccbc..5254562fd0f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -29,6 +29,7 @@ #include "cikd.h" #include "cik_sdma.h" #include "amdgpu_ucode.h" +#include "gfx_v7_0.h" #include "gca/gfx_7_2_d.h" #include "gca/gfx_7_2_enum.h" #include "gca/gfx_7_2_sh_mask.h" @@ -38,8 +39,6 @@ #include "gmc/gmc_7_1_sh_mask.h" #include "cik_structs.h" -#define CIK_PIPE_PER_MEC (4) - enum { MAX_TRAPID = 8, /* 3 bits in the bitfield. */ MAX_WATCH_ADDRESSES = 4 @@ -185,8 +184,10 @@ static void unlock_srbm(struct kgd_dev *kgd) static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id) { - uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; - uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, queue_id, 0); } @@ -243,18 +244,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr) { - struct amdgpu_device *adev = get_amdgpu_device(kgd); - - uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; - uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); - - lock_srbm(kgd, mec, pipe, 0, 0); - WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8)); - WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8)); - WREG32(mmCP_HPD_EOP_VMID, 0); - WREG32(mmCP_HPD_EOP_CONTROL, hpd_size); - unlock_srbm(kgd); - + /* amdgpu owns the per-pipe state */ return 0; } @@ -264,8 +254,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) uint32_t mec; uint32_t pipe; - mec = (pipe_id / CIK_PIPE_PER_MEC) + 1; - pipe = (pipe_id % CIK_PIPE_PER_MEC); + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, 0, 0); @@ -309,55 +299,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, m = get_mqd(mqd); is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); - - acquire_queue(kgd, pipe_id, queue_id); - WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); - WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); - WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control); - - WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); - WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); - WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); - - WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control); - WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo); - WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi); - - WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr); - - WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state); - WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd); - WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type); - - WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo); - WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi); - WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo); - WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi); - - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - m->cp_hqd_pq_rptr_report_addr_hi); - - WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr); - - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi); - - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control); - - WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid); - - WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum); - - WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); - WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); - - WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr); - if (is_wptr_shadow_valid) - WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow); + m->cp_hqd_pq_wptr = wptr_shadow; - WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); + acquire_queue(kgd, pipe_id, queue_id); + gfx_v7_0_mqd_commit(adev, m); release_queue(kgd); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 6697612239c2..133d06671e46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -28,6 +28,7 @@ #include "amdgpu.h" #include "amdgpu_amdkfd.h" #include "amdgpu_ucode.h" +#include "gfx_v8_0.h" #include "gca/gfx_8_0_sh_mask.h" #include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_enum.h" @@ -38,8 +39,6 @@ #include "vi_structs.h" #include "vid.h" -#define VI_PIPE_PER_MEC (4) - struct cik_sdma_rlc_registers; /* @@ -146,8 +145,10 @@ static void unlock_srbm(struct kgd_dev *kgd) static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t queue_id) { - uint32_t mec = (++pipe_id / VI_PIPE_PER_MEC) + 1; - uint32_t pipe = (pipe_id % VI_PIPE_PER_MEC); + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, queue_id, 0); } @@ -205,6 +206,7 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, uint32_t hpd_size, uint64_t hpd_gpu_addr) { + /* amdgpu owns the per-pipe state */ return 0; } @@ -214,8 +216,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) uint32_t mec; uint32_t pipe; - mec = (++pipe_id / VI_PIPE_PER_MEC) + 1; - pipe = (pipe_id % VI_PIPE_PER_MEC); + mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); lock_srbm(kgd, mec, pipe, 0, 0); @@ -251,53 +253,11 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, m = get_mqd(mqd); valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr)); - acquire_queue(kgd, pipe_id, queue_id); - - WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control); - WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); - WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); - - WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid); - WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state); - WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); - WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); - WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum); - WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); - WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - m->cp_hqd_pq_rptr_report_addr_hi); - - if (valid_wptr > 0) - WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr); - - WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control); - - WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo); - WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi); - WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control); - WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr); - WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr); - WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events); - - WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo); - WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi); - WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control); - WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset); - WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size); - WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset); - WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size); - - WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control); - - WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request); - WREG32(mmCP_HQD_ERROR, m->cp_hqd_error); - WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem); - WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones); - - WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active); + if (valid_wptr == 0) + m->cp_hqd_pq_wptr = shadow_wptr; + acquire_queue(kgd, pipe_id, queue_id); + gfx_v8_0_mqd_commit(adev, mqd); release_queue(kgd); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5b3e0f63a115..a37bdf4f8e9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -30,78 +30,6 @@ #include "amdgpu.h" #include "amdgpu_trace.h" -int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, - u32 ip_instance, u32 ring, - struct amdgpu_ring **out_ring) -{ - /* Right now all IPs have only one instance - multiple rings. */ - if (ip_instance != 0) { - DRM_ERROR("invalid ip instance: %d\n", ip_instance); - return -EINVAL; - } - - switch (ip_type) { - default: - DRM_ERROR("unknown ip type: %d\n", ip_type); - return -EINVAL; - case AMDGPU_HW_IP_GFX: - if (ring < adev->gfx.num_gfx_rings) { - *out_ring = &adev->gfx.gfx_ring[ring]; - } else { - DRM_ERROR("only %d gfx rings are supported now\n", - adev->gfx.num_gfx_rings); - return -EINVAL; - } - break; - case AMDGPU_HW_IP_COMPUTE: - if (ring < adev->gfx.num_compute_rings) { - *out_ring = &adev->gfx.compute_ring[ring]; - } else { - DRM_ERROR("only %d compute rings are supported now\n", - adev->gfx.num_compute_rings); - return -EINVAL; - } - break; - case AMDGPU_HW_IP_DMA: - if (ring < adev->sdma.num_instances) { - *out_ring = &adev->sdma.instance[ring].ring; - } else { - DRM_ERROR("only %d SDMA rings are supported\n", - adev->sdma.num_instances); - return -EINVAL; - } - break; - case AMDGPU_HW_IP_UVD: - *out_ring = &adev->uvd.ring; - break; - case AMDGPU_HW_IP_VCE: - if (ring < adev->vce.num_rings){ - *out_ring = &adev->vce.ring[ring]; - } else { - DRM_ERROR("only %d VCE rings are supported\n", adev->vce.num_rings); - return -EINVAL; - } - break; - case AMDGPU_HW_IP_UVD_ENC: - if (ring < adev->uvd.num_enc_rings){ - *out_ring = &adev->uvd.ring_enc[ring]; - } else { - DRM_ERROR("only %d UVD ENC rings are supported\n", - adev->uvd.num_enc_rings); - return -EINVAL; - } - break; - } - - if (!(*out_ring && (*out_ring)->adev)) { - DRM_ERROR("Ring %d is not initialized on IP %d\n", - ring, ip_type); - return -EINVAL; - } - - return 0; -} - static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, struct drm_amdgpu_cs_chunk_fence *data, uint32_t *offset) @@ -597,7 +525,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto error_free_pages; } - /* Fill the page arrays for all useptrs. */ + /* Fill the page arrays for all userptrs. */ list_for_each_entry(e, &need_pages, tv.head) { struct ttm_tt *ttm = e->robj->tbo.ttm; @@ -917,9 +845,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, return -EINVAL; } - r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type, - chunk_ib->ip_instance, chunk_ib->ring, - &ring); + r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type, + chunk_ib->ip_instance, chunk_ib->ring, &ring); if (r) return r; @@ -1021,16 +948,19 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, struct amdgpu_ctx *ctx; struct dma_fence *fence; - r = amdgpu_cs_get_ring(adev, deps[j].ip_type, - deps[j].ip_instance, - deps[j].ring, &ring); - if (r) - return r; - ctx = amdgpu_ctx_get(fpriv, deps[j].ctx_id); if (ctx == NULL) return -EINVAL; + r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, + deps[j].ip_type, + deps[j].ip_instance, + deps[j].ring, &ring); + if (r) { + amdgpu_ctx_put(ctx); + return r; + } + fence = amdgpu_ctx_get_fence(ctx, ring, deps[j].handle); if (IS_ERR(fence)) { @@ -1086,6 +1016,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_cs *cs = data; struct amdgpu_cs_parser parser = {}; bool reserved_buffers = false; @@ -1093,6 +1024,8 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (!adev->accel_working) return -EBUSY; + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; parser.adev = adev; parser.filp = filp; @@ -1154,21 +1087,28 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, { union drm_amdgpu_wait_cs *wait = data; struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); struct amdgpu_ring *ring = NULL; struct amdgpu_ctx *ctx; struct dma_fence *fence; long r; - r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance, - wait->in.ring, &ring); - if (r) - return r; + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); if (ctx == NULL) return -EINVAL; + r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, + wait->in.ip_type, wait->in.ip_instance, + wait->in.ring, &ring); + if (r) { + amdgpu_ctx_put(ctx); + return r; + } + fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); if (IS_ERR(fence)) r = PTR_ERR(fence); @@ -1204,15 +1144,17 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, struct dma_fence *fence; int r; - r = amdgpu_cs_get_ring(adev, user->ip_type, user->ip_instance, - user->ring, &ring); - if (r) - return ERR_PTR(r); - ctx = amdgpu_ctx_get(filp->driver_priv, user->ctx_id); if (ctx == NULL) return ERR_PTR(-EINVAL); + r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type, + user->ip_instance, user->ring, &ring); + if (r) { + amdgpu_ctx_put(ctx); + return ERR_PTR(r); + } + fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no); amdgpu_ctx_put(ctx); @@ -1333,12 +1275,15 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_wait_fences *wait = data; uint32_t fence_count = wait->in.fence_count; struct drm_amdgpu_fence *fences_user; struct drm_amdgpu_fence *fences; int r; + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; /* Get the fences from userspace */ fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 90d1ac8a80f8..a11e44340b23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -52,12 +52,20 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) struct amd_sched_rq *rq; rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + + if (ring == &adev->gfx.kiq.ring) + continue; + r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity, rq, amdgpu_sched_jobs); if (r) goto failed; } + r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr); + if (r) + goto failed; + return 0; failed: @@ -86,6 +94,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) for (i = 0; i < adev->num_rings; i++) amd_sched_entity_fini(&adev->rings[i]->sched, &ctx->rings[i].entity); + + amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); } static int amdgpu_ctx_alloc(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 43ca16b6eee2..875cde414be7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -54,8 +54,14 @@ #include <linux/pci.h> #include <linux/firmware.h> +MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); + +#define AMDGPU_RESUME_MS 2000 + static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev); +static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev); static const char *amdgpu_asic_name[] = { "TAHITI", @@ -77,6 +83,7 @@ static const char *amdgpu_asic_name[] = { "POLARIS11", "POLARIS12", "VEGA10", + "RAVEN", "LAST", }; @@ -478,9 +485,8 @@ void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, /* * amdgpu_wb_*() - * Writeback is the the method by which the the GPU updates special pages - * in memory with the status of certain GPU events (fences, ring pointers, - * etc.). + * Writeback is the method by which the GPU updates special pages in memory + * with the status of certain GPU events (fences, ring pointers,etc.). */ /** @@ -506,7 +512,7 @@ static void amdgpu_wb_fini(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * - * Disables Writeback and frees the Writeback memory (all asics). + * Initializes writeback and allocates writeback memory (all asics). * Used at driver startup. * Returns 0 on success or an -error on failure. */ @@ -614,7 +620,7 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb) * @mc: memory controller structure holding memory informations * @base: base address at which to put VRAM * - * Function will place try to place VRAM at base address provided + * Function will try to place VRAM at base address provided * as parameter (which is so far either PCI aperture address or * for IGP TOM base address). * @@ -636,7 +642,7 @@ void amdgpu_wb_free_64bit(struct amdgpu_device *adev, u32 wb) * ones) * * Note: IGP TOM addr should be the same as the aperture addr, we don't - * explicitly check for that thought. + * explicitly check for that though. * * FIXME: when reducing VRAM size align new size on power of 2. */ @@ -1342,6 +1348,9 @@ int amdgpu_ip_block_add(struct amdgpu_device *adev, if (!ip_block_version) return -EINVAL; + DRM_DEBUG("add ip block number %d <%s>\n", adev->num_ip_blocks, + ip_block_version->funcs->name); + adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; return 0; @@ -1392,6 +1401,104 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) } } +static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) +{ + const char *chip_name; + char fw_name[30]; + int err; + const struct gpu_info_firmware_header_v1_0 *hdr; + + adev->firmware.gpu_info_fw = NULL; + + switch (adev->asic_type) { + case CHIP_TOPAZ: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS11: + case CHIP_POLARIS10: + case CHIP_POLARIS12: + case CHIP_CARRIZO: + case CHIP_STONEY: +#ifdef CONFIG_DRM_AMDGPU_SI + case CHIP_VERDE: + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_OLAND: + case CHIP_HAINAN: +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KAVERI: + case CHIP_KABINI: + case CHIP_MULLINS: +#endif + default: + return 0; + case CHIP_VEGA10: + chip_name = "vega10"; + break; + case CHIP_RAVEN: + chip_name = "raven"; + break; + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name); + err = request_firmware(&adev->firmware.gpu_info_fw, fw_name, adev->dev); + if (err) { + dev_err(adev->dev, + "Failed to load gpu_info firmware \"%s\"\n", + fw_name); + goto out; + } + err = amdgpu_ucode_validate(adev->firmware.gpu_info_fw); + if (err) { + dev_err(adev->dev, + "Failed to validate gpu_info firmware \"%s\"\n", + fw_name); + goto out; + } + + hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; + amdgpu_ucode_print_gpu_info_hdr(&hdr->header); + + switch (hdr->version_major) { + case 1: + { + const struct gpu_info_firmware_v1_0 *gpu_info_fw = + (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + + adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); + adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); + adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); + adev->gfx.config.max_texture_channel_caches = + le32_to_cpu(gpu_info_fw->gc_num_tccs); + adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); + adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = + le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = + le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = + le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); + adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); + break; + } + default: + dev_err(adev->dev, + "Unsupported gpu_info table %d\n", hdr->header.ucode_version); + err = -EINVAL; + goto out; + } +out: + return err; +} + static int amdgpu_early_init(struct amdgpu_device *adev) { int i, r; @@ -1444,8 +1551,12 @@ static int amdgpu_early_init(struct amdgpu_device *adev) return r; break; #endif - case CHIP_VEGA10: - adev->family = AMDGPU_FAMILY_AI; + case CHIP_VEGA10: + case CHIP_RAVEN: + if (adev->asic_type == CHIP_RAVEN) + adev->family = AMDGPU_FAMILY_RV; + else + adev->family = AMDGPU_FAMILY_AI; r = soc15_set_ip_blocks(adev); if (r) @@ -1456,6 +1567,10 @@ static int amdgpu_early_init(struct amdgpu_device *adev) return -EINVAL; } + r = amdgpu_device_parse_gpu_info_fw(adev); + if (r) + return r; + if (amdgpu_sriov_vf(adev)) { r = amdgpu_virt_request_full_gpu(adev, true); if (r) @@ -1464,7 +1579,8 @@ static int amdgpu_early_init(struct amdgpu_device *adev) for (i = 0; i < adev->num_ip_blocks; i++) { if ((amdgpu_ip_block_mask & (1 << i)) == 0) { - DRM_ERROR("disabled ip block: %d\n", i); + DRM_ERROR("disabled ip block: %d <%s>\n", + i, adev->ip_blocks[i].version->funcs->name); adev->ip_blocks[i].status.valid = false; } else { if (adev->ip_blocks[i].version->funcs->early_init) { @@ -1552,22 +1668,24 @@ static int amdgpu_init(struct amdgpu_device *adev) return 0; } -static int amdgpu_late_init(struct amdgpu_device *adev) +static void amdgpu_fill_reset_magic(struct amdgpu_device *adev) +{ + memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); +} + +static bool amdgpu_check_vram_lost(struct amdgpu_device *adev) +{ + return !!memcmp(adev->gart.ptr, adev->reset_magic, + AMDGPU_RESET_MAGIC_NUM); +} + +static int amdgpu_late_set_cg_state(struct amdgpu_device *adev) { int i = 0, r; for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; - if (adev->ip_blocks[i].version->funcs->late_init) { - r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); - if (r) { - DRM_ERROR("late_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; - } - adev->ip_blocks[i].status.late_initialized = true; - } /* skip CG for VCE/UVD, it's handled specially */ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) { @@ -1581,6 +1699,31 @@ static int amdgpu_late_init(struct amdgpu_device *adev) } } } + return 0; +} + +static int amdgpu_late_init(struct amdgpu_device *adev) +{ + int i = 0, r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.valid) + continue; + if (adev->ip_blocks[i].version->funcs->late_init) { + r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); + if (r) { + DRM_ERROR("late_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + adev->ip_blocks[i].status.late_initialized = true; + } + } + + mod_delayed_work(system_wq, &adev->late_init_work, + msecs_to_jiffies(AMDGPU_RESUME_MS)); + + amdgpu_fill_reset_magic(adev); return 0; } @@ -1672,6 +1815,13 @@ static int amdgpu_fini(struct amdgpu_device *adev) return 0; } +static void amdgpu_late_init_func_handler(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, late_init_work.work); + amdgpu_late_set_cg_state(adev); +} + int amdgpu_suspend(struct amdgpu_device *adev) { int i, r; @@ -1717,19 +1867,25 @@ static int amdgpu_sriov_reinit_early(struct amdgpu_device *adev) { int i, r; - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) - continue; + static enum amd_ip_block_type ip_order[] = { + AMD_IP_BLOCK_TYPE_GMC, + AMD_IP_BLOCK_TYPE_COMMON, + AMD_IP_BLOCK_TYPE_IH, + }; - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) - r = adev->ip_blocks[i].version->funcs->hw_init(adev); + for (i = 0; i < ARRAY_SIZE(ip_order); i++) { + int j; + struct amdgpu_ip_block *block; - if (r) { - DRM_ERROR("resume of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; + for (j = 0; j < adev->num_ip_blocks; j++) { + block = &adev->ip_blocks[j]; + + if (block->version->type != ip_order[i] || + !block->status.valid) + continue; + + r = block->version->funcs->hw_init(adev); + DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); } } @@ -1740,33 +1896,67 @@ static int amdgpu_sriov_reinit_late(struct amdgpu_device *adev) { int i, r; + static enum amd_ip_block_type ip_order[] = { + AMD_IP_BLOCK_TYPE_SMC, + AMD_IP_BLOCK_TYPE_DCE, + AMD_IP_BLOCK_TYPE_GFX, + AMD_IP_BLOCK_TYPE_SDMA, + AMD_IP_BLOCK_TYPE_VCE, + }; + + for (i = 0; i < ARRAY_SIZE(ip_order); i++) { + int j; + struct amdgpu_ip_block *block; + + for (j = 0; j < adev->num_ip_blocks; j++) { + block = &adev->ip_blocks[j]; + + if (block->version->type != ip_order[i] || + !block->status.valid) + continue; + + r = block->version->funcs->hw_init(adev); + DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed"); + } + } + + return 0; +} + +static int amdgpu_resume_phase1(struct amdgpu_device *adev) +{ + int i, r; + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ) - continue; - - r = adev->ip_blocks[i].version->funcs->hw_init(adev); - if (r) { - DRM_ERROR("resume of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; + adev->ip_blocks[i].version->type == + AMD_IP_BLOCK_TYPE_IH) { + r = adev->ip_blocks[i].version->funcs->resume(adev); + if (r) { + DRM_ERROR("resume of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } } } return 0; } -static int amdgpu_resume(struct amdgpu_device *adev) +static int amdgpu_resume_phase2(struct amdgpu_device *adev) { int i, r; for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ) + continue; r = adev->ip_blocks[i].version->funcs->resume(adev); if (r) { DRM_ERROR("resume of IP block <%s> failed %d\n", @@ -1778,6 +1968,18 @@ static int amdgpu_resume(struct amdgpu_device *adev) return 0; } +static int amdgpu_resume(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_resume_phase1(adev); + if (r) + return r; + r = amdgpu_resume_phase2(adev); + + return r; +} + static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) { if (adev->is_atom_fw) { @@ -1860,8 +2062,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, amdgpu_check_arguments(adev); - /* Registers mapping */ - /* TODO: block userspace mapping of io register */ spin_lock_init(&adev->mmio_idx_lock); spin_lock_init(&adev->smc_idx_lock); spin_lock_init(&adev->pcie_idx_lock); @@ -1877,6 +2077,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_LIST_HEAD(&adev->gtt_list); spin_lock_init(&adev->gtt_list_lock); + INIT_LIST_HEAD(&adev->ring_lru_list); + spin_lock_init(&adev->ring_lru_list_lock); + + INIT_DELAYED_WORK(&adev->late_init_work, amdgpu_late_init_func_handler); + + /* Registers mapping */ + /* TODO: block userspace mapping of io register */ if (adev->asic_type >= CHIP_BONAIRE) { adev->rmmio_base = pci_resource_start(adev->pdev, 5); adev->rmmio_size = pci_resource_len(adev->pdev, 5); @@ -1989,6 +2196,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->accel_working = true; + amdgpu_vm_check_compute_bug(adev); + /* Initialize the buffer migration limit. */ if (amdgpu_moverate >= 0) max_MBps = amdgpu_moverate; @@ -2017,6 +2226,10 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) DRM_ERROR("registering register debugfs failed (%d).\n", r); + r = amdgpu_debugfs_test_ib_ring_init(adev); + if (r) + DRM_ERROR("registering register test ib ring debugfs failed (%d).\n", r); + r = amdgpu_debugfs_firmware_init(adev); if (r) DRM_ERROR("registering firmware debugfs failed (%d).\n", r); @@ -2073,7 +2286,12 @@ void amdgpu_device_fini(struct amdgpu_device *adev) amdgpu_fence_driver_fini(adev); amdgpu_fbdev_fini(adev); r = amdgpu_fini(adev); + if (adev->firmware.gpu_info_fw) { + release_firmware(adev->firmware.gpu_info_fw); + adev->firmware.gpu_info_fw = NULL; + } adev->accel_working = false; + cancel_delayed_work_sync(&adev->late_init_work); /* free i2c buses */ amdgpu_i2c_fini(adev); amdgpu_atombios_fini(adev); @@ -2458,16 +2676,15 @@ err: * amdgpu_sriov_gpu_reset - reset the asic * * @adev: amdgpu device pointer - * @voluntary: if this reset is requested by guest. - * (true means by guest and false means by HYPERVISOR ) + * @job: which job trigger hang * * Attempt the reset the GPU if it has hung (all asics). * for SRIOV case. * Returns 0 for success or an error on failure. */ -int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) +int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job) { - int i, r = 0; + int i, j, r = 0; int resched; struct amdgpu_bo *bo, *tmp; struct amdgpu_ring *ring; @@ -2480,22 +2697,39 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) /* block TTM */ resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); - /* block scheduler */ - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - ring = adev->rings[i]; + /* we start from the ring trigger GPU hang */ + j = job ? job->ring->idx : 0; + /* block scheduler */ + for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) { + ring = adev->rings[i % AMDGPU_MAX_RINGS]; if (!ring || !ring->sched.thread) continue; kthread_park(ring->sched.thread); + + if (job && j != i) + continue; + + /* here give the last chance to check if job removed from mirror-list + * since we already pay some time on kthread_park */ + if (job && list_empty(&job->base.node)) { + kthread_unpark(ring->sched.thread); + goto give_up_reset; + } + + if (amd_sched_invalidate_job(&job->base, amdgpu_job_hang_limit)) + amd_sched_job_kickout(&job->base); + + /* only do job_reset on the hang ring if @job not NULL */ amd_sched_hw_job_reset(&ring->sched); - } - /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ - amdgpu_fence_driver_force_completion(adev); + /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ + amdgpu_fence_driver_force_completion_ring(ring); + } /* request to take full control of GPU before re-initialization */ - if (voluntary) + if (job) amdgpu_virt_reset_gpu(adev); else amdgpu_virt_request_full_gpu(adev, true); @@ -2545,20 +2779,28 @@ int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary) } dma_fence_put(fence); - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; + for (i = j; i < j + AMDGPU_MAX_RINGS; ++i) { + ring = adev->rings[i % AMDGPU_MAX_RINGS]; if (!ring || !ring->sched.thread) continue; + if (job && j != i) { + kthread_unpark(ring->sched.thread); + continue; + } + amd_sched_job_recovery(&ring->sched); kthread_unpark(ring->sched.thread); } drm_helper_resume_force_mode(adev->ddev); +give_up_reset: ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); if (r) { /* bad news, how to tell it to userspace ? */ dev_info(adev->dev, "GPU reset failed\n"); + } else { + dev_info(adev->dev, "GPU reset successed!\n"); } adev->gfx.in_reset = false; @@ -2578,10 +2820,7 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) { int i, r; int resched; - bool need_full_reset; - - if (amdgpu_sriov_vf(adev)) - return amdgpu_sriov_gpu_reset(adev, true); + bool need_full_reset, vram_lost = false; if (!amdgpu_check_soft_reset(adev)) { DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); @@ -2641,16 +2880,27 @@ retry: if (!r) { dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); - r = amdgpu_resume(adev); + r = amdgpu_resume_phase1(adev); + if (r) + goto out; + vram_lost = amdgpu_check_vram_lost(adev); + if (vram_lost) { + DRM_ERROR("VRAM is lost!\n"); + atomic_inc(&adev->vram_lost_counter); + } + r = amdgpu_ttm_recover_gart(adev); + if (r) + goto out; + r = amdgpu_resume_phase2(adev); + if (r) + goto out; + if (vram_lost) + amdgpu_fill_reset_magic(adev); } } +out: if (!r) { amdgpu_irq_gpu_reset_resume_helper(adev); - if (need_full_reset && amdgpu_need_backup(adev)) { - r = amdgpu_ttm_recover_gart(adev); - if (r) - DRM_ERROR("gart recovery failed!!!\n"); - } r = amdgpu_ib_ring_tests(adev); if (r) { dev_err(adev->dev, "ib ring test failed (%d).\n", r); @@ -2712,10 +2962,11 @@ retry: drm_helper_resume_force_mode(adev->ddev); ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); - if (r) { + if (r) /* bad news, how to tell it to userspace ? */ dev_info(adev->dev, "GPU reset failed\n"); - } + else + dev_info(adev->dev, "GPU reset successed!\n"); return r; } @@ -3499,11 +3750,60 @@ static void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev) } } +static int amdgpu_debugfs_test_ib(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = dev->dev_private; + int r = 0, i; + + /* hold on the scheduler */ + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + + if (!ring || !ring->sched.thread) + continue; + kthread_park(ring->sched.thread); + } + + seq_printf(m, "run ib test:\n"); + r = amdgpu_ib_ring_tests(adev); + if (r) + seq_printf(m, "ib ring tests failed (%d).\n", r); + else + seq_printf(m, "ib ring tests passed.\n"); + + /* go on the scheduler */ + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + + if (!ring || !ring->sched.thread) + continue; + kthread_unpark(ring->sched.thread); + } + + return 0; +} + +static const struct drm_info_list amdgpu_debugfs_test_ib_ring_list[] = { + {"amdgpu_test_ib", &amdgpu_debugfs_test_ib} +}; + +static int amdgpu_debugfs_test_ib_ring_init(struct amdgpu_device *adev) +{ + return amdgpu_debugfs_add_files(adev, + amdgpu_debugfs_test_ib_ring_list, 1); +} + int amdgpu_debugfs_init(struct drm_minor *minor) { return 0; } #else +static int amdgpu_debugfs_test_ib_init(struct amdgpu_device *adev) +{ + return 0; +} static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 31eddd85eb40..8168f8ec711a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -65,9 +65,11 @@ * - 3.13.0 - Add PRT support * - 3.14.0 - Fix race in amdgpu_ctx_get_fence() and note new functionality * - 3.15.0 - Export more gpu info for gfx9 + * - 3.16.0 - Add reserved vmid support + * - 3.17.0 - Add AMDGPU_NUM_VRAM_CPU_PAGE_FAULTS. */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 15 +#define KMS_DRIVER_MINOR 17 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; @@ -92,7 +94,8 @@ int amdgpu_vm_size = -1; int amdgpu_vm_block_size = -1; int amdgpu_vm_fault_stop = 0; int amdgpu_vm_debug = 0; -int amdgpu_vram_page_split = 1024; +int amdgpu_vram_page_split = 512; +int amdgpu_vm_update_mode = -1; int amdgpu_exp_hw_support = 0; int amdgpu_sched_jobs = 32; int amdgpu_sched_hw_submission = 2; @@ -110,6 +113,8 @@ int amdgpu_prim_buf_per_se = 0; int amdgpu_pos_buf_per_se = 0; int amdgpu_cntl_sb_buf_per_se = 0; int amdgpu_param_buf_per_se = 0; +int amdgpu_job_hang_limit = 0; +int amdgpu_lbpw = -1; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); @@ -177,6 +182,9 @@ module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444); MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)"); module_param_named(vm_debug, amdgpu_vm_debug, int, 0644); +MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both"); +module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444); + MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 1024, -1 = disable)"); module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444); @@ -232,6 +240,24 @@ module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)"); module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); +MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)"); +module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444); + +MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)"); +module_param_named(lbpw, amdgpu_lbpw, int, 0444); + +#ifdef CONFIG_DRM_AMDGPU_SI +int amdgpu_si_support = 0; +MODULE_PARM_DESC(si_support, "SI support (1 = enabled, 0 = disabled (default))"); +module_param_named(si_support, amdgpu_si_support, int, 0444); +#endif + +#ifdef CONFIG_DRM_AMDGPU_CIK +int amdgpu_cik_support = 0; +MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled, 0 = disabled (default))"); +module_param_named(cik_support, amdgpu_cik_support, int, 0444); +#endif + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI @@ -460,6 +486,9 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10|AMD_EXP_HW_SUPPORT}, + /* Raven */ + {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU|AMD_EXP_HW_SUPPORT}, + {0, 0, 0} }; @@ -491,6 +520,7 @@ static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev) static int amdgpu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { + struct drm_device *dev; unsigned long flags = ent->driver_data; int ret; @@ -513,7 +543,29 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, if (ret) return ret; - return drm_get_pci_dev(pdev, ent, &kms_driver); + dev = drm_dev_alloc(&kms_driver, &pdev->dev); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + ret = pci_enable_device(pdev); + if (ret) + goto err_free; + + dev->pdev = pdev; + + pci_set_drvdata(pdev, dev); + + ret = drm_dev_register(dev, ent->driver_data); + if (ret) + goto err_pci; + + return 0; + +err_pci: + pci_disable_device(pdev); +err_free: + drm_dev_unref(dev); + return ret; } static void @@ -521,7 +573,8 @@ amdgpu_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); - drm_put_dev(dev); + drm_dev_unregister(dev); + drm_dev_unref(dev); } static void @@ -817,7 +870,7 @@ static int __init amdgpu_init(void) driver->num_ioctls = amdgpu_max_kms_ioctl; amdgpu_register_atpx_handler(); /* let modprobe override vga console setting */ - return drm_pci_init(driver, pdriver); + return pci_register_driver(pdriver); error_sched: amdgpu_fence_slab_fini(); @@ -832,7 +885,7 @@ error_sync: static void __exit amdgpu_exit(void) { amdgpu_amdkfd_fini(); - drm_pci_exit(driver, pdriver); + pci_unregister_driver(pdriver); amdgpu_unregister_atpx_handler(); amdgpu_sync_fini(); amd_sched_fence_slab_fini(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7b60fb79c3a6..333bad749067 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -541,6 +541,12 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev) } } +void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring) +{ + if (ring) + amdgpu_fence_write(ring, ring->fence_drv.sync_seq); +} + /* * Common fence implementation */ @@ -660,11 +666,17 @@ static const struct drm_info_list amdgpu_debugfs_fence_list[] = { {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL} }; + +static const struct drm_info_list amdgpu_debugfs_fence_list_sriov[] = { + {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, +}; #endif int amdgpu_debugfs_fence_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) + if (amdgpu_sriov_vf(adev)) + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list_sriov, 1); return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2); #else return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 902e6015abca..a57abc1a25fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -224,8 +224,9 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev) * * Unbinds the requested pages from the gart page table and * replaces them with the dummy page (all asics). + * Returns 0 for success, -EINVAL for failure. */ -void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, +int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages) { unsigned t; @@ -237,7 +238,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, if (!adev->gart.ready) { WARN(1, "trying to unbind memory from uninitialized GART !\n"); - return; + return -EINVAL; } t = offset / AMDGPU_GPU_PAGE_SIZE; @@ -258,6 +259,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, } mb(); amdgpu_gart_flush_gpu_tlb(adev, 0); + return 0; } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 94cb91cf93eb..621f739103a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -219,16 +219,6 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, ttm_eu_backoff_reservation(&ticket, &list); } -static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r) -{ - if (r == -EDEADLK) { - r = amdgpu_gpu_reset(adev); - if (!r) - r = -EAGAIN; - } - return r; -} - /* * GEM ioctls. */ @@ -249,20 +239,17 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_VRAM_CLEARED| AMDGPU_GEM_CREATE_SHADOW | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { - r = -EINVAL; - goto error_unlock; - } + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) + return -EINVAL; + /* reject invalid gem domains */ if (args->in.domains & ~(AMDGPU_GEM_DOMAIN_CPU | AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | - AMDGPU_GEM_DOMAIN_OA)) { - r = -EINVAL; - goto error_unlock; - } + AMDGPU_GEM_DOMAIN_OA)) + return -EINVAL; /* create a gem object to contain this object in */ if (args->in.domains & (AMDGPU_GEM_DOMAIN_GDS | @@ -274,10 +261,8 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, size = size << AMDGPU_GWS_SHIFT; else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA) size = size << AMDGPU_OA_SHIFT; - else { - r = -EINVAL; - goto error_unlock; - } + else + return -EINVAL; } size = roundup(size, PAGE_SIZE); @@ -286,21 +271,17 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, args->in.domain_flags, kernel, &gobj); if (r) - goto error_unlock; + return r; r = drm_gem_handle_create(filp, gobj, &handle); /* drop reference from allocate - handle holds it now */ drm_gem_object_unreference_unlocked(gobj); if (r) - goto error_unlock; + return r; memset(args, 0, sizeof(*args)); args->out.handle = handle; return 0; - -error_unlock: - r = amdgpu_gem_handle_lockup(adev, r); - return r; } int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, @@ -334,7 +315,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_DOMAIN_CPU, 0, 0, &gobj); if (r) - goto handle_lockup; + return r; bo = gem_to_amdgpu_bo(gobj); bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT; @@ -374,7 +355,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, /* drop reference from allocate - handle holds it now */ drm_gem_object_unreference_unlocked(gobj); if (r) - goto handle_lockup; + return r; args->handle = handle; return 0; @@ -388,9 +369,6 @@ unlock_mmap_sem: release_object: drm_gem_object_unreference_unlocked(gobj); -handle_lockup: - r = amdgpu_gem_handle_lockup(adev, r); - return r; } @@ -456,7 +434,6 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns) int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - struct amdgpu_device *adev = dev->dev_private; union drm_amdgpu_gem_wait_idle *args = data; struct drm_gem_object *gobj; struct amdgpu_bo *robj; @@ -484,7 +461,6 @@ int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, r = ret; drm_gem_object_unreference_unlocked(gobj); - r = amdgpu_gem_handle_lockup(adev, r); return r; } @@ -593,9 +569,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, uint64_t va_flags; int r = 0; - if (!adev->vm_manager.enabled) - return -ENOTTY; - if (args->va_address < AMDGPU_VA_RESERVED_SIZE) { dev_err(&dev->pdev->dev, "va_address 0x%lX is in reserved area 0x%X\n", @@ -621,6 +594,11 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->operation); return -EINVAL; } + if ((args->operation == AMDGPU_VA_OP_MAP) || + (args->operation == AMDGPU_VA_OP_REPLACE)) { + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; + } INIT_LIST_HEAD(&list); if ((args->operation != AMDGPU_VA_OP_CLEAR) && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 19943356cca7..e26108aad3fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -108,3 +108,209 @@ void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_s p = next + 1; } } + +void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) +{ + int i, queue, pipe, mec; + + /* policy for amdgpu compute queue ownership */ + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + queue = i % adev->gfx.mec.num_queue_per_pipe; + pipe = (i / adev->gfx.mec.num_queue_per_pipe) + % adev->gfx.mec.num_pipe_per_mec; + mec = (i / adev->gfx.mec.num_queue_per_pipe) + / adev->gfx.mec.num_pipe_per_mec; + + /* we've run out of HW */ + if (mec >= adev->gfx.mec.num_mec) + break; + + if (adev->gfx.mec.num_mec > 1) { + /* policy: amdgpu owns the first two queues of the first MEC */ + if (mec == 0 && queue < 2) + set_bit(i, adev->gfx.mec.queue_bitmap); + } else { + /* policy: amdgpu owns all queues in the first pipe */ + if (mec == 0 && pipe == 0) + set_bit(i, adev->gfx.mec.queue_bitmap); + } + } + + /* update the number of active compute rings */ + adev->gfx.num_compute_rings = + bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* If you hit this case and edited the policy, you probably just + * need to increase AMDGPU_MAX_COMPUTE_RINGS */ + if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; +} + +static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + int queue_bit; + int mec, pipe, queue; + + queue_bit = adev->gfx.mec.num_mec + * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + + while (queue_bit-- >= 0) { + if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) + continue; + + amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue); + + /* Using pipes 2/3 from MEC 2 seems cause problems */ + if (mec == 1 && pipe > 1) + continue; + + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + return 0; + } + + dev_err(adev->dev, "Failed to find a queue for KIQ\n"); + return -EINVAL; +} + +int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + int r = 0; + + mutex_init(&kiq->ring_mutex); + + r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); + if (r) + return r; + + ring->adev = NULL; + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = AMDGPU_DOORBELL_KIQ; + + r = amdgpu_gfx_kiq_acquire(adev, ring); + if (r) + return r; + + ring->eop_gpu_addr = kiq->eop_gpu_addr; + sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue); + r = amdgpu_ring_init(adev, ring, 1024, + irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); + if (r) + dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); + + return r; +} + +void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq) +{ + amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); + amdgpu_ring_fini(ring); +} + +void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); +} + +int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, + unsigned hpd_size) +{ + int r; + u32 *hpd; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + + r = amdgpu_bo_create_kernel(adev, hpd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, + &kiq->eop_gpu_addr, (void **)&hpd); + if (r) { + dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); + return r; + } + + memset(hpd, 0, hpd_size); + + r = amdgpu_bo_reserve(kiq->eop_obj, true); + if (unlikely(r != 0)) + dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); + amdgpu_bo_kunmap(kiq->eop_obj); + amdgpu_bo_unreserve(kiq->eop_obj); + + return 0; +} + +/* create MQD for each compute queue */ +int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, + unsigned mqd_size) +{ + struct amdgpu_ring *ring = NULL; + int r, i; + + /* create MQD for KIQ */ + ring = &adev->gfx.kiq.ring; + if (!ring->mqd_obj) { + r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &ring->mqd_gpu_addr, &ring->mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + return r; + } + + /* prepare MQD backup */ + adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(mqd_size, GFP_KERNEL); + if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); + } + + /* create MQD for each KCQ */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (!ring->mqd_obj) { + r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, + &ring->mqd_gpu_addr, &ring->mqd_ptr); + if (r) { + dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); + return r; + } + + /* prepare MQD backup */ + adev->gfx.mec.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL); + if (!adev->gfx.mec.mqd_backup[i]) + dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); + } + } + + return 0; +} + +void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int i; + + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + kfree(adev->gfx.mec.mqd_backup[i]); + amdgpu_bo_free_kernel(&ring->mqd_obj, + &ring->mqd_gpu_addr, + &ring->mqd_ptr); + } + + ring = &adev->gfx.kiq.ring; + kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); + amdgpu_bo_free_kernel(&ring->mqd_obj, + &ring->mqd_gpu_addr, + &ring->mqd_ptr); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index e02044086445..1f279050d334 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -30,4 +30,64 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg); void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh); +void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev); + +int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq); + +void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq); + +void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev); +int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, + unsigned hpd_size); + +int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, + unsigned mqd_size); +void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev); + +/** + * amdgpu_gfx_create_bitmask - create a bitmask + * + * @bit_width: length of the mask + * + * create a variable length bit mask. + * Returns the bitmask. + */ +static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width) +{ + return (u32)((1ULL << bit_width) - 1); +} + +static inline int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, + int mec, int pipe, int queue) +{ + int bit = 0; + + bit += mec * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + bit += pipe * adev->gfx.mec.num_queue_per_pipe; + bit += queue; + + return bit; +} + +static inline void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit, + int *mec, int *pipe, int *queue) +{ + *queue = bit % adev->gfx.mec.num_queue_per_pipe; + *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) + % adev->gfx.mec.num_pipe_per_mec; + *mec = (bit / adev->gfx.mec.num_queue_per_pipe) + / adev->gfx.mec.num_pipe_per_mec; + +} +static inline bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, + int mec, int pipe, int queue) +{ + return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue), + adev->gfx.mec.queue_bitmap); +} + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 6e4ae0d983c2..f774b3f497d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -121,6 +121,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; + struct dma_fence *tmp = NULL; bool skip_preamble, need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; @@ -160,8 +161,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, dev_err(adev->dev, "scheduling IB failed (%d).\n", r); return r; } - if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync) + + if (ring->funcs->emit_pipeline_sync && job && + ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || + amdgpu_vm_need_pipeline_sync(ring, job))) { amdgpu_ring_emit_pipeline_sync(ring); + dma_fence_put(tmp); + } + + if (ring->funcs->insert_start) + ring->funcs->insert_start(ring); if (vm) { r = amdgpu_vm_flush(ring, job); @@ -188,8 +197,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, status |= AMDGPU_HAVE_CTX_SWITCH; status |= job->preamble_status; - if (vm) - status |= AMDGPU_VM_DOMAIN; amdgpu_ring_emit_cntxcntl(ring, status); } @@ -208,6 +215,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, need_ctx_switch = false; } + if (ring->funcs->emit_tmz) + amdgpu_ring_emit_tmz(ring, false); + if (ring->funcs->emit_hdp_invalidate #ifdef CONFIG_X86_64 && !(adev->flags & AMD_IS_APU) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index a3da1a122fc8..3de8e74e5b3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -62,8 +62,9 @@ enum amdgpu_ih_clientid AMDGPU_IH_CLIENTID_MP0 = 0x1e, AMDGPU_IH_CLIENTID_MP1 = 0x1f, - AMDGPU_IH_CLIENTID_MAX + AMDGPU_IH_CLIENTID_MAX, + AMDGPU_IH_CLIENTID_VCN = AMDGPU_IH_CLIENTID_UVD }; #define AMDGPU_IH_CLIENTID_LEGACY 0 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index a6b7e367a860..62da6c5c6095 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -83,7 +83,8 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work) struct amdgpu_device *adev = container_of(work, struct amdgpu_device, reset_work); - amdgpu_gpu_reset(adev); + if (!amdgpu_sriov_vf(adev)) + amdgpu_gpu_reset(adev); } /* Disable *all* interrupts */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 7570f2439a11..3d641e10e6b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -36,7 +36,11 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job) job->base.sched->name, atomic_read(&job->ring->fence_drv.last_seq), job->ring->fence_drv.sync_seq); - amdgpu_gpu_reset(job->adev); + + if (amdgpu_sriov_vf(job->adev)) + amdgpu_sriov_gpu_reset(job->adev, job); + else + amdgpu_gpu_reset(job->adev); } int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, @@ -57,9 +61,10 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, (*job)->vm = vm; (*job)->ibs = (void *)&(*job)[1]; (*job)->num_ibs = num_ibs; - (*job)->need_pipeline_sync = false; amdgpu_sync_create(&(*job)->sync); + amdgpu_sync_create(&(*job)->dep_sync); + amdgpu_sync_create(&(*job)->sched_sync); return 0; } @@ -98,6 +103,8 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); + amdgpu_sync_free(&job->dep_sync); + amdgpu_sync_free(&job->sched_sync); kfree(job); } @@ -107,6 +114,8 @@ void amdgpu_job_free(struct amdgpu_job *job) dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); + amdgpu_sync_free(&job->dep_sync); + amdgpu_sync_free(&job->sched_sync); kfree(job); } @@ -138,11 +147,18 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) struct amdgpu_job *job = to_amdgpu_job(sched_job); struct amdgpu_vm *vm = job->vm; - struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync); + struct dma_fence *fence = amdgpu_sync_get_fence(&job->dep_sync); + int r; + if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) { + r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence); + if (r) + DRM_ERROR("Error adding fence to sync (%d)\n", r); + } + if (!fence) + fence = amdgpu_sync_get_fence(&job->sync); while (fence == NULL && vm && !job->vm_id) { struct amdgpu_ring *ring = job->ring; - int r; r = amdgpu_vm_grab_id(vm, ring, &job->sync, &job->base.s_fence->finished, @@ -153,9 +169,6 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) fence = amdgpu_sync_get_fence(&job->sync); } - if (amd_sched_dependency_optimized(fence, sched_job->s_entity)) - job->need_pipeline_sync = true; - return fence; } @@ -163,6 +176,7 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) { struct dma_fence *fence = NULL; struct amdgpu_job *job; + struct amdgpu_fpriv *fpriv = NULL; int r; if (!sched_job) { @@ -174,10 +188,16 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); trace_amdgpu_sched_run_job(job); - r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); - if (r) - DRM_ERROR("Error scheduling IBs (%d)\n", r); - + if (job->vm) + fpriv = container_of(job->vm, struct amdgpu_fpriv, vm); + /* skip ib schedule when vram is lost */ + if (fpriv && amdgpu_kms_vram_lost(job->adev, fpriv)) + DRM_ERROR("Skip scheduling IBs!\n"); + else { + r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); + if (r) + DRM_ERROR("Error scheduling IBs (%d)\n", r); + } /* if gpu reset, hw fence will be replaced here */ dma_fence_put(job->fence); job->fence = dma_fence_get(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index dca4be970d13..12497a40ef92 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -87,6 +87,41 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) struct amdgpu_device *adev; int r, acpi_status; +#ifdef CONFIG_DRM_AMDGPU_SI + if (!amdgpu_si_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + case CHIP_OLAND: + case CHIP_HAINAN: + dev_info(dev->dev, + "SI support provided by radeon.\n"); + dev_info(dev->dev, + "Use radeon.si_support=0 amdgpu.si_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif +#ifdef CONFIG_DRM_AMDGPU_CIK + if (!amdgpu_cik_support) { + switch (flags & AMD_ASIC_MASK) { + case CHIP_KAVERI: + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + dev_info(dev->dev, + "CIK support provided by radeon.\n"); + dev_info(dev->dev, + "Use radeon.cik_support=0 amdgpu.cik_support=1 to override.\n" + ); + return -ENODEV; + } + } +#endif + adev = kzalloc(sizeof(struct amdgpu_device), GFP_KERNEL); if (adev == NULL) { return -ENOMEM; @@ -235,6 +270,7 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; struct drm_amdgpu_info *info = data; struct amdgpu_mode_info *minfo = &adev->mode_info; void __user *out = (void __user *)(uintptr_t)info->return_pointer; @@ -247,6 +283,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (!info->return_size || !info->return_pointer) return -EINVAL; + if (amdgpu_kms_vram_lost(adev, fpriv)) + return -ENODEV; switch (info->query) { case AMDGPU_INFO_ACCEL_WORKING: @@ -319,6 +357,19 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; ib_size_alignment = 1; break; + case AMDGPU_HW_IP_VCN_DEC: + type = AMD_IP_BLOCK_TYPE_VCN; + ring_mask = adev->vcn.ring_dec.ready ? 1 : 0; + ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; + ib_size_alignment = 16; + break; + case AMDGPU_HW_IP_VCN_ENC: + type = AMD_IP_BLOCK_TYPE_VCN; + for (i = 0; i < adev->vcn.num_enc_rings; i++) + ring_mask |= ((adev->vcn.ring_enc[i].ready ? 1 : 0) << i); + ib_start_alignment = AMDGPU_GPU_PAGE_SIZE; + ib_size_alignment = 1; + break; default: return -EINVAL; } @@ -361,6 +412,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file case AMDGPU_HW_IP_UVD_ENC: type = AMD_IP_BLOCK_TYPE_UVD; break; + case AMDGPU_HW_IP_VCN_DEC: + case AMDGPU_HW_IP_VCN_ENC: + type = AMD_IP_BLOCK_TYPE_VCN; + break; default: return -EINVAL; } @@ -397,6 +452,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file case AMDGPU_INFO_NUM_EVICTIONS: ui64 = atomic64_read(&adev->num_evictions); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; + case AMDGPU_INFO_NUM_VRAM_CPU_PAGE_FAULTS: + ui64 = atomic64_read(&adev->num_vram_cpu_page_faults); + return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; case AMDGPU_INFO_VRAM_USAGE: ui64 = atomic64_read(&adev->vram_usage); return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0; @@ -730,6 +788,12 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev) vga_switcheroo_process_delayed_switch(); } +bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, + struct amdgpu_fpriv *fpriv) +{ + return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter); +} + /** * amdgpu_driver_open_kms - drm callback for open * @@ -757,7 +821,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) goto out_suspend; } - r = amdgpu_vm_init(adev, &fpriv->vm); + r = amdgpu_vm_init(adev, &fpriv->vm, + AMDGPU_VM_CONTEXT_GFX); if (r) { kfree(fpriv); goto out_suspend; @@ -782,6 +847,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); + fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter); file_priv->driver_priv = fpriv; out_suspend: @@ -814,8 +880,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); - amdgpu_uvd_free_handles(adev, file_priv); - amdgpu_vce_free_handles(adev, file_priv); + if (adev->asic_type != CHIP_RAVEN) { + amdgpu_uvd_free_handles(adev, file_priv); + amdgpu_vce_free_handles(adev, file_priv); + } amdgpu_vm_bo_rmv(adev, fpriv->prt_va); @@ -948,6 +1016,7 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe) const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), /* KMS */ DRM_IOCTL_DEF_DRV(AMDGPU_GEM_MMAP, amdgpu_gem_mmap_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 365883d7948d..8ee69652be8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -960,6 +960,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) return -EINVAL; /* hurrah the memory is not visible ! */ + atomic64_inc(&adev->num_vram_cpu_page_faults); amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM); lpfn = adev->mc.visible_vram_size >> PAGE_SHIFT; for (i = 0; i < abo->placement.num_placement; i++) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index f5ae871aa11c..72c03c744594 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -72,6 +72,7 @@ static int amdgpu_pp_early_init(void *handle) case CHIP_CARRIZO: case CHIP_STONEY: case CHIP_VEGA10: + case CHIP_RAVEN: adev->pp_enabled = true; if (amdgpu_create_pp_handle(adev)) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 596e3957bdd9..c224c5caba5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -30,6 +30,7 @@ #include "amdgpu_ucode.h" #include "soc15_common.h" #include "psp_v3_1.h" +#include "psp_v10_0.h" static void psp_set_funcs(struct amdgpu_device *adev); @@ -61,6 +62,12 @@ static int psp_sw_init(void *handle) psp->compare_sram_data = psp_v3_1_compare_sram_data; psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk; break; + case CHIP_RAVEN: + psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf; + psp->ring_init = psp_v10_0_ring_init; + psp->cmd_submit = psp_v10_0_cmd_submit; + psp->compare_sram_data = psp_v10_0_compare_sram_data; + break; default: return -EINVAL; } @@ -230,6 +237,13 @@ static int psp_asd_load(struct psp_context *psp) int ret; struct psp_gfx_cmd_resp *cmd; + /* If PSP version doesn't match ASD version, asd loading will be failed. + * add workaround to bypass it for sriov now. + * TODO: add version check to make it common + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!cmd) return -ENOMEM; @@ -542,3 +556,12 @@ const struct amdgpu_ip_block_version psp_v3_1_ip_block = .rev = 0, .funcs = &psp_ip_funcs, }; + +const struct amdgpu_ip_block_version psp_v10_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 10, + .minor = 0, + .rev = 0, + .funcs = &psp_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 0301e4e0b297..1a1c8b469f93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -138,4 +138,6 @@ extern const struct amdgpu_ip_block_version psp_v3_1_ip_block; extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, uint32_t field_val, uint32_t mask, bool check_changed); +extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c new file mode 100644 index 000000000000..befc09b68543 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c @@ -0,0 +1,299 @@ +/* + * Copyright 2017 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Andres Rodriguez + */ + +#include "amdgpu.h" +#include "amdgpu_ring.h" + +static int amdgpu_queue_mapper_init(struct amdgpu_queue_mapper *mapper, + int hw_ip) +{ + if (!mapper) + return -EINVAL; + + if (hw_ip > AMDGPU_MAX_IP_NUM) + return -EINVAL; + + mapper->hw_ip = hw_ip; + mutex_init(&mapper->lock); + + memset(mapper->queue_map, 0, sizeof(mapper->queue_map)); + + return 0; +} + +static struct amdgpu_ring *amdgpu_get_cached_map(struct amdgpu_queue_mapper *mapper, + int ring) +{ + return mapper->queue_map[ring]; +} + +static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper, + int ring, struct amdgpu_ring *pring) +{ + if (WARN_ON(mapper->queue_map[ring])) { + DRM_ERROR("Un-expected ring re-map\n"); + return -EINVAL; + } + + mapper->queue_map[ring] = pring; + + return 0; +} + +static int amdgpu_identity_map(struct amdgpu_device *adev, + struct amdgpu_queue_mapper *mapper, + int ring, + struct amdgpu_ring **out_ring) +{ + switch (mapper->hw_ip) { + case AMDGPU_HW_IP_GFX: + *out_ring = &adev->gfx.gfx_ring[ring]; + break; + case AMDGPU_HW_IP_COMPUTE: + *out_ring = &adev->gfx.compute_ring[ring]; + break; + case AMDGPU_HW_IP_DMA: + *out_ring = &adev->sdma.instance[ring].ring; + break; + case AMDGPU_HW_IP_UVD: + *out_ring = &adev->uvd.ring; + break; + case AMDGPU_HW_IP_VCE: + *out_ring = &adev->vce.ring[ring]; + break; + case AMDGPU_HW_IP_UVD_ENC: + *out_ring = &adev->uvd.ring_enc[ring]; + break; + case AMDGPU_HW_IP_VCN_DEC: + *out_ring = &adev->vcn.ring_dec; + break; + case AMDGPU_HW_IP_VCN_ENC: + *out_ring = &adev->vcn.ring_enc[ring]; + break; + default: + *out_ring = NULL; + DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip); + return -EINVAL; + } + + return amdgpu_update_cached_map(mapper, ring, *out_ring); +} + +static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip) +{ + switch (hw_ip) { + case AMDGPU_HW_IP_GFX: + return AMDGPU_RING_TYPE_GFX; + case AMDGPU_HW_IP_COMPUTE: + return AMDGPU_RING_TYPE_COMPUTE; + case AMDGPU_HW_IP_DMA: + return AMDGPU_RING_TYPE_SDMA; + case AMDGPU_HW_IP_UVD: + return AMDGPU_RING_TYPE_UVD; + case AMDGPU_HW_IP_VCE: + return AMDGPU_RING_TYPE_VCE; + default: + DRM_ERROR("Invalid HW IP specified %d\n", hw_ip); + return -1; + } +} + +static int amdgpu_lru_map(struct amdgpu_device *adev, + struct amdgpu_queue_mapper *mapper, + int user_ring, + struct amdgpu_ring **out_ring) +{ + int r, i, j; + int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip); + int ring_blacklist[AMDGPU_MAX_RINGS]; + struct amdgpu_ring *ring; + + /* 0 is a valid ring index, so initialize to -1 */ + memset(ring_blacklist, 0xff, sizeof(ring_blacklist)); + + for (i = 0, j = 0; i < AMDGPU_MAX_RINGS; i++) { + ring = mapper->queue_map[i]; + if (ring) + ring_blacklist[j++] = ring->idx; + } + + r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist, + j, out_ring); + if (r) + return r; + + return amdgpu_update_cached_map(mapper, user_ring, *out_ring); +} + +/** + * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct + * + * @adev: amdgpu_device pointer + * @mgr: amdgpu_queue_mgr structure holding queue information + * + * Initialize the the selected @mgr (all asics). + * + * Returns 0 on success, error on failure. + */ +int amdgpu_queue_mgr_init(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr) +{ + int i, r; + + if (!adev || !mgr) + return -EINVAL; + + memset(mgr, 0, sizeof(*mgr)); + + for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) { + r = amdgpu_queue_mapper_init(&mgr->mapper[i], i); + if (r) + return r; + } + + return 0; +} + +/** + * amdgpu_queue_mgr_fini - de-initialize an amdgpu_queue_mgr struct + * + * @adev: amdgpu_device pointer + * @mgr: amdgpu_queue_mgr structure holding queue information + * + * De-initialize the the selected @mgr (all asics). + * + * Returns 0 on success, error on failure. + */ +int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr) +{ + return 0; +} + +/** + * amdgpu_queue_mgr_map - Map a userspace ring id to an amdgpu_ring + * + * @adev: amdgpu_device pointer + * @mgr: amdgpu_queue_mgr structure holding queue information + * @hw_ip: HW IP enum + * @instance: HW instance + * @ring: user ring id + * @our_ring: pointer to mapped amdgpu_ring + * + * Map a userspace ring id to an appropriate kernel ring. Different + * policies are configurable at a HW IP level. + * + * Returns 0 on success, error on failure. + */ +int amdgpu_queue_mgr_map(struct amdgpu_device *adev, + struct amdgpu_queue_mgr *mgr, + int hw_ip, int instance, int ring, + struct amdgpu_ring **out_ring) +{ + int r, ip_num_rings; + struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip]; + + if (!adev || !mgr || !out_ring) + return -EINVAL; + + if (hw_ip >= AMDGPU_MAX_IP_NUM) + return -EINVAL; + + if (ring >= AMDGPU_MAX_RINGS) + return -EINVAL; + + /* Right now all IPs have only one instance - multiple rings. */ + if (instance != 0) { + DRM_ERROR("invalid ip instance: %d\n", instance); + return -EINVAL; + } + + switch (hw_ip) { + case AMDGPU_HW_IP_GFX: + ip_num_rings = adev->gfx.num_gfx_rings; + break; + case AMDGPU_HW_IP_COMPUTE: + ip_num_rings = adev->gfx.num_compute_rings; + break; + case AMDGPU_HW_IP_DMA: + ip_num_rings = adev->sdma.num_instances; + break; + case AMDGPU_HW_IP_UVD: + ip_num_rings = 1; + break; + case AMDGPU_HW_IP_VCE: + ip_num_rings = adev->vce.num_rings; + break; + case AMDGPU_HW_IP_UVD_ENC: + ip_num_rings = adev->uvd.num_enc_rings; + break; + case AMDGPU_HW_IP_VCN_DEC: + ip_num_rings = 1; + break; + case AMDGPU_HW_IP_VCN_ENC: + ip_num_rings = adev->vcn.num_enc_rings; + break; + default: + DRM_ERROR("unknown ip type: %d\n", hw_ip); + return -EINVAL; + } + + if (ring >= ip_num_rings) { + DRM_ERROR("Ring index:%d exceeds maximum:%d for ip:%d\n", + ring, ip_num_rings, hw_ip); + return -EINVAL; + } + + mutex_lock(&mapper->lock); + + *out_ring = amdgpu_get_cached_map(mapper, ring); + if (*out_ring) { + /* cache hit */ + r = 0; + goto out_unlock; + } + + switch (mapper->hw_ip) { + case AMDGPU_HW_IP_GFX: + case AMDGPU_HW_IP_UVD: + case AMDGPU_HW_IP_VCE: + case AMDGPU_HW_IP_UVD_ENC: + case AMDGPU_HW_IP_VCN_DEC: + case AMDGPU_HW_IP_VCN_ENC: + r = amdgpu_identity_map(adev, mapper, ring, out_ring); + break; + case AMDGPU_HW_IP_DMA: + case AMDGPU_HW_IP_COMPUTE: + r = amdgpu_lru_map(adev, mapper, ring, out_ring); + break; + default: + *out_ring = NULL; + r = -EINVAL; + DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip); + } + +out_unlock: + mutex_unlock(&mapper->lock); + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 6a85db0c0bc3..75165e07b1cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -135,6 +135,8 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) if (ring->funcs->end_use) ring->funcs->end_use(ring); + + amdgpu_ring_lru_touch(ring->adev, ring); } /** @@ -253,10 +255,13 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, } ring->max_dw = max_dw; + INIT_LIST_HEAD(&ring->lru_list); + amdgpu_ring_lru_touch(adev, ring); if (amdgpu_debugfs_ring_init(adev, ring)) { DRM_ERROR("Failed to register debugfs file for rings !\n"); } + return 0; } @@ -294,6 +299,84 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) ring->adev->rings[ring->idx] = NULL; } +static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + /* list_move_tail handles the case where ring isn't part of the list */ + list_move_tail(&ring->lru_list, &adev->ring_lru_list); +} + +static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring, + int *blacklist, int num_blacklist) +{ + int i; + + for (i = 0; i < num_blacklist; i++) { + if (ring->idx == blacklist[i]) + return true; + } + + return false; +} + +/** + * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block + * + * @adev: amdgpu_device pointer + * @type: amdgpu_ring_type enum + * @blacklist: blacklisted ring ids array + * @num_blacklist: number of entries in @blacklist + * @ring: output ring + * + * Retrieve the amdgpu_ring structure for the least recently used ring of + * a specific IP block (all asics). + * Returns 0 on success, error on failure. + */ +int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist, + int num_blacklist, struct amdgpu_ring **ring) +{ + struct amdgpu_ring *entry; + + /* List is sorted in LRU order, find first entry corresponding + * to the desired HW IP */ + *ring = NULL; + spin_lock(&adev->ring_lru_list_lock); + list_for_each_entry(entry, &adev->ring_lru_list, lru_list) { + if (entry->funcs->type != type) + continue; + + if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist)) + continue; + + *ring = entry; + amdgpu_ring_lru_touch_locked(adev, *ring); + break; + } + spin_unlock(&adev->ring_lru_list_lock); + + if (!*ring) { + DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type); + return -EINVAL; + } + + return 0; +} + +/** + * amdgpu_ring_lru_touch - mark a ring as recently being used + * + * @adev: amdgpu_device pointer + * @ring: ring to touch + * + * Move @ring to the tail of the lru list + */ +void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + spin_lock(&adev->ring_lru_list_lock); + amdgpu_ring_lru_touch_locked(adev, ring); + spin_unlock(&adev->ring_lru_list_lock); +} + /* * Debugfs info */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 944443c5b90a..bc8dec992f73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -47,7 +47,9 @@ enum amdgpu_ring_type { AMDGPU_RING_TYPE_UVD, AMDGPU_RING_TYPE_VCE, AMDGPU_RING_TYPE_KIQ, - AMDGPU_RING_TYPE_UVD_ENC + AMDGPU_RING_TYPE_UVD_ENC, + AMDGPU_RING_TYPE_VCN_DEC, + AMDGPU_RING_TYPE_VCN_ENC }; struct amdgpu_device; @@ -76,6 +78,7 @@ struct amdgpu_fence_driver { int amdgpu_fence_driver_init(struct amdgpu_device *adev); void amdgpu_fence_driver_fini(struct amdgpu_device *adev); void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev); +void amdgpu_fence_driver_force_completion_ring(struct amdgpu_ring *ring); int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring, unsigned num_hw_submission); @@ -130,6 +133,7 @@ struct amdgpu_ring_funcs { int (*test_ib)(struct amdgpu_ring *ring, long timeout); /* insert NOP packets */ void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); + void (*insert_start)(struct amdgpu_ring *ring); void (*insert_end)(struct amdgpu_ring *ring); /* pad the indirect buffer to the necessary number of dw */ void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); @@ -142,6 +146,7 @@ struct amdgpu_ring_funcs { void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); + void (*emit_tmz)(struct amdgpu_ring *ring, bool start); }; struct amdgpu_ring { @@ -149,6 +154,7 @@ struct amdgpu_ring { const struct amdgpu_ring_funcs *funcs; struct amdgpu_fence_driver fence_drv; struct amd_gpu_scheduler sched; + struct list_head lru_list; struct amdgpu_bo *ring_obj; volatile uint32_t *ring; @@ -180,6 +186,7 @@ struct amdgpu_ring { u64 cond_exe_gpu_addr; volatile u32 *cond_exe_cpu_addr; unsigned vm_inv_eng; + bool has_compute_vm_bug; #if defined(CONFIG_DEBUG_FS) struct dentry *ent; #endif @@ -194,6 +201,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned ring_size, struct amdgpu_irq_src *irq_src, unsigned irq_type); void amdgpu_ring_fini(struct amdgpu_ring *ring); +int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, int *blacklist, + int num_blacklist, struct amdgpu_ring **ring); +void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring); static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) { int i = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index ed814e6d0207..a6899180b265 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -298,6 +298,25 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) return NULL; } +int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr) +{ + struct amdgpu_sync_entry *e; + struct hlist_node *tmp; + int i, r; + + hash_for_each_safe(sync->fences, i, tmp, e, node) { + r = dma_fence_wait(e->fence, intr); + if (r) + return r; + + hash_del(&e->node); + dma_fence_put(e->fence); + kmem_cache_free(amdgpu_sync_slab, e); + } + + return 0; +} + /** * amdgpu_sync_free - free the sync object * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 605be266e07f..dc7687993317 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -49,6 +49,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); +int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); int amdgpu_sync_init(void); void amdgpu_sync_fini(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b5fa003c1341..c9b131b13ef7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -745,6 +745,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) return r; } + spin_lock(>t->adev->gtt_list_lock); flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, @@ -753,12 +754,13 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) if (r) { DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", ttm->num_pages, gtt->offset); - return r; + goto error_gart_bind; } - spin_lock(>t->adev->gtt_list_lock); + list_add_tail(>t->list, >t->adev->gtt_list); +error_gart_bind: spin_unlock(>t->adev->gtt_list_lock); - return 0; + return r; } int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) @@ -789,6 +791,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + int r; if (gtt->userptr) amdgpu_ttm_tt_unpin_userptr(ttm); @@ -797,14 +800,17 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) return 0; /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ - if (gtt->adev->gart.ready) - amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); - spin_lock(>t->adev->gtt_list_lock); + r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages); + if (r) { + DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", + gtt->ttm.ttm.num_pages, gtt->offset); + goto error_unbind; + } list_del_init(>t->list); +error_unbind: spin_unlock(>t->adev->gtt_list_lock); - - return 0; + return r; } static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) @@ -1115,7 +1121,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Change the size here instead of the init above so only lpfn is affected */ amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); - r = amdgpu_bo_create(adev, 256 * 1024, PAGE_SIZE, true, + r = amdgpu_bo_create(adev, adev->mc.stolen_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, @@ -1462,6 +1468,9 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf, if (size & 0x3 || *pos & 0x3) return -EINVAL; + if (*pos >= adev->mc.mc_vram_size) + return -ENXIO; + while (size) { unsigned long flags; uint32_t value; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index dfd1c98efa7c..4f50eeb65855 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -197,6 +197,27 @@ void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr) } } +void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr) +{ + uint16_t version_major = le16_to_cpu(hdr->header_version_major); + uint16_t version_minor = le16_to_cpu(hdr->header_version_minor); + + DRM_DEBUG("GPU_INFO\n"); + amdgpu_ucode_print_common_hdr(hdr); + + if (version_major == 1) { + const struct gpu_info_firmware_header_v1_0 *gpu_info_hdr = + container_of(hdr, struct gpu_info_firmware_header_v1_0, header); + + DRM_DEBUG("version_major: %u\n", + le16_to_cpu(gpu_info_hdr->version_major)); + DRM_DEBUG("version_minor: %u\n", + le16_to_cpu(gpu_info_hdr->version_minor)); + } else { + DRM_ERROR("Unknown gpu_info ucode version: %u.%u\n", version_major, version_minor); + } +} + int amdgpu_ucode_validate(const struct firmware *fw) { const struct common_firmware_header *hdr = @@ -253,6 +274,15 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; + case CHIP_RAVEN: +#if 0 + if (!load_type) + return AMDGPU_FW_LOAD_DIRECT; + else + return AMDGPU_FW_LOAD_PSP; +#else + return AMDGPU_FW_LOAD_DIRECT; +#endif default: DRM_ERROR("Unknow firmware load type\n"); } @@ -349,7 +379,8 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, - 0, NULL, NULL, bo); + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + NULL, NULL, bo); if (err) { dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err); goto failed; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 758f03a1770d..30b5500dc152 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -113,6 +113,32 @@ struct sdma_firmware_header_v1_1 { uint32_t digest_size; }; +/* gpu info payload */ +struct gpu_info_firmware_v1_0 { + uint32_t gc_num_se; + uint32_t gc_num_cu_per_sh; + uint32_t gc_num_sh_per_se; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_tccs; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; +}; + +/* version_major=1, version_minor=0 */ +struct gpu_info_firmware_header_v1_0 { + struct common_firmware_header header; + uint16_t version_major; /* version */ + uint16_t version_minor; /* version */ +}; + /* header is fixed size */ union amdgpu_firmware_header { struct common_firmware_header common; @@ -124,6 +150,7 @@ union amdgpu_firmware_header { struct rlc_firmware_header_v2_0 rlc_v2_0; struct sdma_firmware_header_v1_0 sdma; struct sdma_firmware_header_v1_1 sdma_v1_1; + struct gpu_info_firmware_header_v1_0 gpu_info; uint8_t raw[0x100]; }; @@ -184,6 +211,7 @@ void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr); +void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr); int amdgpu_ucode_validate(const struct firmware *fw); bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, uint16_t hdr_major, uint16_t hdr_minor); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 735c38d7db0d..b692ad402252 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -165,35 +165,14 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) adev->vce.fw_version = ((version_major << 24) | (version_minor << 16) | (binary_id << 8)); - /* allocate firmware, stack and heap BO */ - - r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, &adev->vce.vcpu_bo); + r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vce.vcpu_bo, + &adev->vce.gpu_addr, &adev->vce.cpu_addr); if (r) { dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r); return r; } - r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false); - if (r) { - amdgpu_bo_unref(&adev->vce.vcpu_bo); - dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r); - return r; - } - - r = amdgpu_bo_pin(adev->vce.vcpu_bo, AMDGPU_GEM_DOMAIN_VRAM, - &adev->vce.gpu_addr); - amdgpu_bo_unreserve(adev->vce.vcpu_bo); - if (r) { - amdgpu_bo_unref(&adev->vce.vcpu_bo); - dev_err(adev->dev, "(%d) VCE bo pin failed\n", r); - return r; - } - - ring = &adev->vce.ring[0]; rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; r = amd_sched_entity_init(&ring->sched, &adev->vce.entity, @@ -230,7 +209,8 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); - amdgpu_bo_unref(&adev->vce.vcpu_bo); + amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr, + (void **)&adev->vce.cpu_addr); for (i = 0; i < adev->vce.num_rings; i++) amdgpu_ring_fini(&adev->vce.ring[i]); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index 0a7f18c461e4..5ce54cde472d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -33,6 +33,8 @@ struct amdgpu_vce { struct amdgpu_bo *vcpu_bo; uint64_t gpu_addr; + void *cpu_addr; + void *saved_bo; unsigned fw_version; unsigned fb_version; atomic_t handles[AMDGPU_MAX_VCE_HANDLES]; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c new file mode 100644 index 000000000000..09190fadd228 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -0,0 +1,654 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#include <linux/firmware.h> +#include <linux/module.h> +#include <drm/drmP.h> +#include <drm/drm.h> + +#include "amdgpu.h" +#include "amdgpu_pm.h" +#include "amdgpu_vcn.h" +#include "soc15d.h" +#include "soc15_common.h" + +#include "vega10/soc15ip.h" +#include "raven1/VCN/vcn_1_0_offset.h" + +/* 1 second timeout */ +#define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) + +/* Firmware Names */ +#define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" + +MODULE_FIRMWARE(FIRMWARE_RAVEN); + +static void amdgpu_vcn_idle_work_handler(struct work_struct *work); + +int amdgpu_vcn_sw_init(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + struct amd_sched_rq *rq; + unsigned long bo_size; + const char *fw_name; + const struct common_firmware_header *hdr; + unsigned version_major, version_minor, family_id; + int r; + + INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); + + switch (adev->asic_type) { + case CHIP_RAVEN: + fw_name = FIRMWARE_RAVEN; + break; + default: + return -EINVAL; + } + + r = request_firmware(&adev->vcn.fw, fw_name, adev->dev); + if (r) { + dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n", + fw_name); + return r; + } + + r = amdgpu_ucode_validate(adev->vcn.fw); + if (r) { + dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n", + fw_name); + release_firmware(adev->vcn.fw); + adev->vcn.fw = NULL; + return r; + } + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + family_id = le32_to_cpu(hdr->ucode_version) & 0xff; + version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; + version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; + DRM_INFO("Found VCN firmware Version: %hu.%hu Family ID: %hu\n", + version_major, version_minor, family_id); + + + bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8) + + AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE + + AMDGPU_VCN_SESSION_SIZE * 40; + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo, + &adev->vcn.gpu_addr, &adev->vcn.cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); + return r; + } + + ring = &adev->vcn.ring_dec; + rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_dec, + rq, amdgpu_sched_jobs); + if (r != 0) { + DRM_ERROR("Failed setting up VCN dec run queue.\n"); + return r; + } + + ring = &adev->vcn.ring_enc[0]; + rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + r = amd_sched_entity_init(&ring->sched, &adev->vcn.entity_enc, + rq, amdgpu_sched_jobs); + if (r != 0) { + DRM_ERROR("Failed setting up VCN enc run queue.\n"); + return r; + } + + return 0; +} + +int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) +{ + int i; + + kfree(adev->vcn.saved_bo); + + amd_sched_entity_fini(&adev->vcn.ring_dec.sched, &adev->vcn.entity_dec); + + amd_sched_entity_fini(&adev->vcn.ring_enc[0].sched, &adev->vcn.entity_enc); + + amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo, + &adev->vcn.gpu_addr, + (void **)&adev->vcn.cpu_addr); + + amdgpu_ring_fini(&adev->vcn.ring_dec); + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + amdgpu_ring_fini(&adev->vcn.ring_enc[i]); + + release_firmware(adev->vcn.fw); + + return 0; +} + +int amdgpu_vcn_suspend(struct amdgpu_device *adev) +{ + unsigned size; + void *ptr; + + if (adev->vcn.vcpu_bo == NULL) + return 0; + + cancel_delayed_work_sync(&adev->vcn.idle_work); + + size = amdgpu_bo_size(adev->vcn.vcpu_bo); + ptr = adev->vcn.cpu_addr; + + adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL); + if (!adev->vcn.saved_bo) + return -ENOMEM; + + memcpy_fromio(adev->vcn.saved_bo, ptr, size); + + return 0; +} + +int amdgpu_vcn_resume(struct amdgpu_device *adev) +{ + unsigned size; + void *ptr; + + if (adev->vcn.vcpu_bo == NULL) + return -EINVAL; + + size = amdgpu_bo_size(adev->vcn.vcpu_bo); + ptr = adev->vcn.cpu_addr; + + if (adev->vcn.saved_bo != NULL) { + memcpy_toio(ptr, adev->vcn.saved_bo, size); + kfree(adev->vcn.saved_bo); + adev->vcn.saved_bo = NULL; + } else { + const struct common_firmware_header *hdr; + unsigned offset; + + hdr = (const struct common_firmware_header *)adev->vcn.fw->data; + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); + memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); + memset_io(ptr, 0, size); + } + + return 0; +} + +static void amdgpu_vcn_idle_work_handler(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, vcn.idle_work.work); + unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec); + + if (fences == 0) { + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_uvd(adev, false); + } else { + amdgpu_asic_set_uvd_clocks(adev, 0, 0); + } + } else { + schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); + } +} + +void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); + + if (set_clocks) { + if (adev->pm.dpm_enabled) { + amdgpu_dpm_enable_uvd(adev, true); + } else { + amdgpu_asic_set_uvd_clocks(adev, 53300, 40000); + } + } +} + +void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) +{ + schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); +} + +int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t tmp = 0; + unsigned i; + int r; + + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); + r = amdgpu_ring_alloc(ring, 3); + if (r) { + DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); + amdgpu_ring_write(ring, 0xDEADBEEF); + amdgpu_ring_commit(ring); + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + + if (i < adev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", + ring->idx, tmp); + r = -EINVAL; + } + return r; +} + +static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, + bool direct, struct dma_fence **fence) +{ + struct ttm_validate_buffer tv; + struct ww_acquire_ctx ticket; + struct list_head head; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + struct amdgpu_device *adev = ring->adev; + uint64_t addr; + int i, r; + + memset(&tv, 0, sizeof(tv)); + tv.bo = &bo->tbo; + + INIT_LIST_HEAD(&head); + list_add(&tv.head, &head); + + r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL); + if (r) + return r; + + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + if (r) + goto err; + + r = amdgpu_job_alloc_with_ib(adev, 64, &job); + if (r) + goto err; + + ib = &job->ibs[0]; + addr = amdgpu_bo_gpu_offset(bo); + ib->ptr[0] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0); + ib->ptr[1] = addr; + ib->ptr[2] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0); + ib->ptr[3] = addr >> 32; + ib->ptr[4] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0); + ib->ptr[5] = 0; + for (i = 6; i < 16; i += 2) { + ib->ptr[i] = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0); + ib->ptr[i+1] = 0; + } + ib->length_dw = 16; + + if (direct) { + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + job->fence = dma_fence_get(f); + if (r) + goto err_free; + + amdgpu_job_free(job); + } else { + r = amdgpu_job_submit(job, ring, &adev->vcn.entity_dec, + AMDGPU_FENCE_OWNER_UNDEFINED, &f); + if (r) + goto err_free; + } + + ttm_eu_fence_buffer_objects(&ticket, &head, f); + + if (fence) + *fence = dma_fence_get(f); + amdgpu_bo_unref(&bo); + dma_fence_put(f); + + return 0; + +err_free: + amdgpu_job_free(job); + +err: + ttm_eu_backoff_reservation(&ticket, &head); + return r; +} + +static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_bo *bo; + uint32_t *msg; + int r, i; + + r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + NULL, NULL, &bo); + if (r) + return r; + + r = amdgpu_bo_reserve(bo, false); + if (r) { + amdgpu_bo_unref(&bo); + return r; + } + + r = amdgpu_bo_kmap(bo, (void **)&msg); + if (r) { + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + return r; + } + + msg[0] = cpu_to_le32(0x00000028); + msg[1] = cpu_to_le32(0x00000038); + msg[2] = cpu_to_le32(0x00000001); + msg[3] = cpu_to_le32(0x00000000); + msg[4] = cpu_to_le32(handle); + msg[5] = cpu_to_le32(0x00000000); + msg[6] = cpu_to_le32(0x00000001); + msg[7] = cpu_to_le32(0x00000028); + msg[8] = cpu_to_le32(0x00000010); + msg[9] = cpu_to_le32(0x00000000); + msg[10] = cpu_to_le32(0x00000007); + msg[11] = cpu_to_le32(0x00000000); + msg[12] = cpu_to_le32(0x00000780); + msg[13] = cpu_to_le32(0x00000440); + for (i = 14; i < 1024; ++i) + msg[i] = cpu_to_le32(0x0); + + amdgpu_bo_kunmap(bo); + amdgpu_bo_unreserve(bo); + + return amdgpu_vcn_dec_send_msg(ring, bo, true, fence); +} + +static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + bool direct, struct dma_fence **fence) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_bo *bo; + uint32_t *msg; + int r, i; + + r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, + NULL, NULL, &bo); + if (r) + return r; + + r = amdgpu_bo_reserve(bo, false); + if (r) { + amdgpu_bo_unref(&bo); + return r; + } + + r = amdgpu_bo_kmap(bo, (void **)&msg); + if (r) { + amdgpu_bo_unreserve(bo); + amdgpu_bo_unref(&bo); + return r; + } + + msg[0] = cpu_to_le32(0x00000028); + msg[1] = cpu_to_le32(0x00000018); + msg[2] = cpu_to_le32(0x00000000); + msg[3] = cpu_to_le32(0x00000002); + msg[4] = cpu_to_le32(handle); + msg[5] = cpu_to_le32(0x00000000); + for (i = 6; i < 1024; ++i) + msg[i] = cpu_to_le32(0x0); + + amdgpu_bo_kunmap(bo); + amdgpu_bo_unreserve(bo); + + return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence); +} + +int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct dma_fence *fence; + long r; + + r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); + if (r) { + DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + goto error; + } + + r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, true, &fence); + if (r) { + DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + goto error; + } + + r = dma_fence_wait_timeout(fence, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out.\n"); + r = -ETIMEDOUT; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + } else { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } + + dma_fence_put(fence); + +error: + return r; +} + +int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t rptr = amdgpu_ring_get_rptr(ring); + unsigned i; + int r; + + r = amdgpu_ring_alloc(ring, 16); + if (r) { + DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n", + ring->idx, r); + return r; + } + amdgpu_ring_write(ring, VCN_ENC_CMD_END); + amdgpu_ring_commit(ring); + + for (i = 0; i < adev->usec_timeout; i++) { + if (amdgpu_ring_get_rptr(ring) != rptr) + break; + DRM_UDELAY(1); + } + + if (i < adev->usec_timeout) { + DRM_INFO("ring test on %d succeeded in %d usecs\n", + ring->idx, i); + } else { + DRM_ERROR("amdgpu: ring %d test failed\n", + ring->idx); + r = -ETIMEDOUT; + } + + return r; +} + +static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) +{ + const unsigned ib_size_dw = 16; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + uint64_t dummy; + int i, r; + + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + if (r) + return r; + + ib = &job->ibs[0]; + dummy = ib->gpu_addr + 1024; + + ib->length_dw = 0; + ib->ptr[ib->length_dw++] = 0x00000018; + ib->ptr[ib->length_dw++] = 0x00000001; /* session info */ + ib->ptr[ib->length_dw++] = handle; + ib->ptr[ib->length_dw++] = upper_32_bits(dummy); + ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = 0x0000000b; + + ib->ptr[ib->length_dw++] = 0x00000014; + ib->ptr[ib->length_dw++] = 0x00000002; /* task info */ + ib->ptr[ib->length_dw++] = 0x0000001c; + ib->ptr[ib->length_dw++] = 0x00000000; + ib->ptr[ib->length_dw++] = 0x00000000; + + ib->ptr[ib->length_dw++] = 0x00000008; + ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */ + + for (i = ib->length_dw; i < ib_size_dw; ++i) + ib->ptr[i] = 0x0; + + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + job->fence = dma_fence_get(f); + if (r) + goto err; + + amdgpu_job_free(job); + if (fence) + *fence = dma_fence_get(f); + dma_fence_put(f); + + return 0; + +err: + amdgpu_job_free(job); + return r; +} + +static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) +{ + const unsigned ib_size_dw = 16; + struct amdgpu_job *job; + struct amdgpu_ib *ib; + struct dma_fence *f = NULL; + uint64_t dummy; + int i, r; + + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + if (r) + return r; + + ib = &job->ibs[0]; + dummy = ib->gpu_addr + 1024; + + ib->length_dw = 0; + ib->ptr[ib->length_dw++] = 0x00000018; + ib->ptr[ib->length_dw++] = 0x00000001; + ib->ptr[ib->length_dw++] = handle; + ib->ptr[ib->length_dw++] = upper_32_bits(dummy); + ib->ptr[ib->length_dw++] = dummy; + ib->ptr[ib->length_dw++] = 0x0000000b; + + ib->ptr[ib->length_dw++] = 0x00000014; + ib->ptr[ib->length_dw++] = 0x00000002; + ib->ptr[ib->length_dw++] = 0x0000001c; + ib->ptr[ib->length_dw++] = 0x00000000; + ib->ptr[ib->length_dw++] = 0x00000000; + + ib->ptr[ib->length_dw++] = 0x00000008; + ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */ + + for (i = ib->length_dw; i < ib_size_dw; ++i) + ib->ptr[i] = 0x0; + + r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f); + job->fence = dma_fence_get(f); + if (r) + goto err; + + amdgpu_job_free(job); + if (fence) + *fence = dma_fence_get(f); + dma_fence_put(f); + + return 0; + +err: + amdgpu_job_free(job); + return r; +} + +int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) +{ + struct dma_fence *fence = NULL; + long r; + + r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); + if (r) { + DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + goto error; + } + + r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); + if (r) { + DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + goto error; + } + + r = dma_fence_wait_timeout(fence, false, timeout); + if (r == 0) { + DRM_ERROR("amdgpu: IB test timed out.\n"); + r = -ETIMEDOUT; + } else if (r < 0) { + DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); + } else { + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; + } +error: + dma_fence_put(fence); + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h new file mode 100644 index 000000000000..d50ba0657854 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -0,0 +1,77 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_VCN_H__ +#define __AMDGPU_VCN_H__ + +#define AMDGPU_VCN_STACK_SIZE (200*1024) +#define AMDGPU_VCN_HEAP_SIZE (256*1024) +#define AMDGPU_VCN_SESSION_SIZE (50*1024) +#define AMDGPU_VCN_FIRMWARE_OFFSET 256 +#define AMDGPU_VCN_MAX_ENC_RINGS 3 + +#define VCN_DEC_CMD_FENCE 0x00000000 +#define VCN_DEC_CMD_TRAP 0x00000001 +#define VCN_DEC_CMD_WRITE_REG 0x00000004 +#define VCN_DEC_CMD_REG_READ_COND_WAIT 0x00000006 +#define VCN_DEC_CMD_PACKET_START 0x0000000a +#define VCN_DEC_CMD_PACKET_END 0x0000000b + +#define VCN_ENC_CMD_NO_OP 0x00000000 +#define VCN_ENC_CMD_END 0x00000001 +#define VCN_ENC_CMD_IB 0x00000002 +#define VCN_ENC_CMD_FENCE 0x00000003 +#define VCN_ENC_CMD_TRAP 0x00000004 +#define VCN_ENC_CMD_REG_WRITE 0x0000000b +#define VCN_ENC_CMD_REG_WAIT 0x0000000c + +struct amdgpu_vcn { + struct amdgpu_bo *vcpu_bo; + void *cpu_addr; + uint64_t gpu_addr; + unsigned fw_version; + void *saved_bo; + struct delayed_work idle_work; + const struct firmware *fw; /* VCN firmware */ + struct amdgpu_ring ring_dec; + struct amdgpu_ring ring_enc[AMDGPU_VCN_MAX_ENC_RINGS]; + struct amdgpu_irq_src irq; + struct amd_sched_entity entity_dec; + struct amd_sched_entity entity_enc; + unsigned num_enc_rings; +}; + +int amdgpu_vcn_sw_init(struct amdgpu_device *adev); +int amdgpu_vcn_sw_fini(struct amdgpu_device *adev); +int amdgpu_vcn_suspend(struct amdgpu_device *adev); +int amdgpu_vcn_resume(struct amdgpu_device *adev); +void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring); +void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring); + +int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring); +int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout); + +int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring); +int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 6bf5cea294f2..8a081e162d13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -22,6 +22,7 @@ */ #include "amdgpu.h" +#define MAX_KIQ_REG_WAIT 100000 int amdgpu_allocate_static_csa(struct amdgpu_device *adev) { @@ -105,8 +106,9 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) /* enable virtual display */ adev->mode_info.num_crtc = 1; adev->enable_virtual_display = true; + adev->cg_flags = 0; + adev->pg_flags = 0; - mutex_init(&adev->virt.lock_kiq); mutex_init(&adev->virt.lock_reset); } @@ -120,17 +122,19 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) BUG_ON(!ring->funcs->emit_rreg); - mutex_lock(&adev->virt.lock_kiq); + mutex_lock(&kiq->ring_mutex); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg); amdgpu_fence_emit(ring, &f); amdgpu_ring_commit(ring); - mutex_unlock(&adev->virt.lock_kiq); + mutex_unlock(&kiq->ring_mutex); - r = dma_fence_wait(f, false); - if (r) - DRM_ERROR("wait for kiq fence error: %ld.\n", r); + r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); dma_fence_put(f); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return ~0; + } val = adev->wb.wb[adev->virt.reg_val_offs]; @@ -146,15 +150,15 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) BUG_ON(!ring->funcs->emit_wreg); - mutex_lock(&adev->virt.lock_kiq); + mutex_lock(&kiq->ring_mutex); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); amdgpu_fence_emit(ring, &f); amdgpu_ring_commit(ring); - mutex_unlock(&adev->virt.lock_kiq); + mutex_unlock(&kiq->ring_mutex); - r = dma_fence_wait(f, false); - if (r) + r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); + if (r < 1) DRM_ERROR("wait for kiq fence error: %ld.\n", r); dma_fence_put(f); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index a8ed162cc0bc..9e1062edb76e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -52,7 +52,6 @@ struct amdgpu_virt { uint64_t csa_vmid0_addr; bool chained_ib_support; uint32_t reg_val_offs; - struct mutex lock_kiq; struct mutex lock_reset; struct amdgpu_irq_src ack_irq; struct amdgpu_irq_src rcv_irq; @@ -97,7 +96,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); -int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, bool voluntary); +int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job); int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 83c172a6e938..5795f81369f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -79,6 +79,12 @@ struct amdgpu_pte_update_params { uint64_t flags); /* indicate update pt or its shadow */ bool shadow; + /* The next two are used during VM update by CPU + * DMA addresses to use for mapping + * Kernel pointer of PD/PT BO that needs to be updated + */ + dma_addr_t *pages_addr; + void *kptr; }; /* Helper to disable partial resident texture feature from a fence callback */ @@ -275,6 +281,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, adev->vm_manager.block_size; unsigned pt_idx, from, to; int r; + u64 flags; if (!parent->entries) { unsigned num_entries = amdgpu_vm_num_entries(adev, level); @@ -300,6 +307,14 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, saddr = saddr & ((1 << shift) - 1); eaddr = eaddr & ((1 << shift) - 1); + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED; + if (vm->use_cpu_for_update) + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + else + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_SHADOW); + /* walk over the address space and allocate the page tables */ for (pt_idx = from; pt_idx <= to; ++pt_idx) { struct reservation_object *resv = vm->root.bo->tbo.resv; @@ -311,10 +326,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, amdgpu_vm_bo_size(adev, level), AMDGPU_GPU_PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_SHADOW | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED, + flags, NULL, resv, &pt); if (r) return r; @@ -392,6 +404,71 @@ static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev, atomic_read(&adev->gpu_reset_counter); } +static bool amdgpu_vm_reserved_vmid_ready(struct amdgpu_vm *vm, unsigned vmhub) +{ + return !!vm->reserved_vmid[vmhub]; +} + +/* idr_mgr->lock must be held */ +static int amdgpu_vm_grab_reserved_vmid_locked(struct amdgpu_vm *vm, + struct amdgpu_ring *ring, + struct amdgpu_sync *sync, + struct dma_fence *fence, + struct amdgpu_job *job) +{ + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + uint64_t fence_context = adev->fence_context + ring->idx; + struct amdgpu_vm_id *id = vm->reserved_vmid[vmhub]; + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct dma_fence *updates = sync->last_vm_update; + int r = 0; + struct dma_fence *flushed, *tmp; + bool needs_flush = false; + + flushed = id->flushed_updates; + if ((amdgpu_vm_had_gpu_reset(adev, id)) || + (atomic64_read(&id->owner) != vm->client_id) || + (job->vm_pd_addr != id->pd_gpu_addr) || + (updates && (!flushed || updates->context != flushed->context || + dma_fence_is_later(updates, flushed))) || + (!id->last_flush || (id->last_flush->context != fence_context && + !dma_fence_is_signaled(id->last_flush)))) { + needs_flush = true; + /* to prevent one context starved by another context */ + id->pd_gpu_addr = 0; + tmp = amdgpu_sync_peek_fence(&id->active, ring); + if (tmp) { + r = amdgpu_sync_fence(adev, sync, tmp); + return r; + } + } + + /* Good we can use this VMID. Remember this submission as + * user of the VMID. + */ + r = amdgpu_sync_fence(ring->adev, &id->active, fence); + if (r) + goto out; + + if (updates && (!flushed || updates->context != flushed->context || + dma_fence_is_later(updates, flushed))) { + dma_fence_put(id->flushed_updates); + id->flushed_updates = dma_fence_get(updates); + } + id->pd_gpu_addr = job->vm_pd_addr; + atomic64_set(&id->owner, vm->client_id); + job->vm_needs_flush = needs_flush; + if (needs_flush) { + dma_fence_put(id->last_flush); + id->last_flush = NULL; + } + job->vm_id = id - id_mgr->ids; + trace_amdgpu_vm_grab_id(vm, ring, job); +out: + return r; +} + /** * amdgpu_vm_grab_id - allocate the next free VMID * @@ -416,12 +493,17 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, unsigned i; int r = 0; + mutex_lock(&id_mgr->lock); + if (amdgpu_vm_reserved_vmid_ready(vm, vmhub)) { + r = amdgpu_vm_grab_reserved_vmid_locked(vm, ring, sync, fence, job); + mutex_unlock(&id_mgr->lock); + return r; + } fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); - if (!fences) + if (!fences) { + mutex_unlock(&id_mgr->lock); return -ENOMEM; - - mutex_lock(&id_mgr->lock); - + } /* Check if we have an idle VMID */ i = 0; list_for_each_entry(idle, &id_mgr->ids_lru, list) { @@ -522,7 +604,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, id->pd_gpu_addr = job->vm_pd_addr; dma_fence_put(id->flushed_updates); id->flushed_updates = dma_fence_get(updates); - id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); atomic64_set(&id->owner, vm->client_id); needs_flush: @@ -541,40 +622,118 @@ error: return r; } -static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring) +static void amdgpu_vm_free_reserved_vmid(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub) +{ + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + + mutex_lock(&id_mgr->lock); + if (vm->reserved_vmid[vmhub]) { + list_add(&vm->reserved_vmid[vmhub]->list, + &id_mgr->ids_lru); + vm->reserved_vmid[vmhub] = NULL; + atomic_dec(&id_mgr->reserved_vmid_num); + } + mutex_unlock(&id_mgr->lock); +} + +static int amdgpu_vm_alloc_reserved_vmid(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + unsigned vmhub) +{ + struct amdgpu_vm_id_manager *id_mgr; + struct amdgpu_vm_id *idle; + int r = 0; + + id_mgr = &adev->vm_manager.id_mgr[vmhub]; + mutex_lock(&id_mgr->lock); + if (vm->reserved_vmid[vmhub]) + goto unlock; + if (atomic_inc_return(&id_mgr->reserved_vmid_num) > + AMDGPU_VM_MAX_RESERVED_VMID) { + DRM_ERROR("Over limitation of reserved vmid\n"); + atomic_dec(&id_mgr->reserved_vmid_num); + r = -EINVAL; + goto unlock; + } + /* Select the first entry VMID */ + idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vm_id, list); + list_del_init(&idle->list); + vm->reserved_vmid[vmhub] = idle; + mutex_unlock(&id_mgr->lock); + + return 0; +unlock: + mutex_unlock(&id_mgr->lock); + return r; +} + +/** + * amdgpu_vm_check_compute_bug - check whether asic has compute vm bug + * + * @adev: amdgpu_device pointer + */ +void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev) { - struct amdgpu_device *adev = ring->adev; const struct amdgpu_ip_block *ip_block; + bool has_compute_vm_bug; + struct amdgpu_ring *ring; + int i; - if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) - /* only compute rings */ - return false; + has_compute_vm_bug = false; ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); - if (!ip_block) - return false; + if (ip_block) { + /* Compute has a VM bug for GFX version < 7. + Compute has a VM bug for GFX 8 MEC firmware version < 673.*/ + if (ip_block->version->major <= 7) + has_compute_vm_bug = true; + else if (ip_block->version->major == 8) + if (adev->gfx.mec_fw_version < 673) + has_compute_vm_bug = true; + } - if (ip_block->version->major <= 7) { - /* gfx7 has no workaround */ - return true; - } else if (ip_block->version->major == 8) { - if (adev->gfx.mec_fw_version >= 673) - /* gfx8 is fixed in MEC firmware 673 */ - return false; + for (i = 0; i < adev->num_rings; i++) { + ring = adev->rings[i]; + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) + /* only compute rings */ + ring->has_compute_vm_bug = has_compute_vm_bug; else - return true; + ring->has_compute_vm_bug = false; } - return false; } -static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) +bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, + struct amdgpu_job *job) { - u64 addr = mc_addr; + struct amdgpu_device *adev = ring->adev; + unsigned vmhub = ring->funcs->vmhub; + struct amdgpu_vm_id_manager *id_mgr = &adev->vm_manager.id_mgr[vmhub]; + struct amdgpu_vm_id *id; + bool gds_switch_needed; + bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; + + if (job->vm_id == 0) + return false; + id = &id_mgr->ids[job->vm_id]; + gds_switch_needed = ring->funcs->emit_gds_switch && ( + id->gds_base != job->gds_base || + id->gds_size != job->gds_size || + id->gws_base != job->gws_base || + id->gws_size != job->gws_size || + id->oa_base != job->oa_base || + id->oa_size != job->oa_size); - if (adev->gart.gart_funcs->adjust_mc_addr) - addr = adev->gart.gart_funcs->adjust_mc_addr(adev, addr); + if (amdgpu_vm_had_gpu_reset(adev, id)) + return true; - return addr; + return vm_flush_needed || gds_switch_needed; +} + +static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev) +{ + return (adev->mc.real_vram_size == adev->mc.visible_vram_size); } /** @@ -599,8 +758,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) id->gws_size != job->gws_size || id->oa_base != job->oa_base || id->oa_size != job->oa_size); - bool vm_flush_needed = job->vm_needs_flush || - amdgpu_vm_ring_has_compute_vm_bug(ring); + bool vm_flush_needed = job->vm_needs_flush; unsigned patch_offset = 0; int r; @@ -615,15 +773,11 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) if (ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); - if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync) - amdgpu_ring_emit_pipeline_sync(ring); - if (ring->funcs->emit_vm_flush && vm_flush_needed) { - u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr); struct dma_fence *fence; - trace_amdgpu_vm_flush(ring, job->vm_id, pd_addr); - amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr); + trace_amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr); + amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr); r = amdgpu_fence_emit(ring, &fence); if (r) @@ -632,6 +786,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) mutex_lock(&id_mgr->lock); dma_fence_put(id->last_flush); id->last_flush = fence; + id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter); mutex_unlock(&id_mgr->lock); } @@ -806,6 +961,53 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) return result; } +/** + * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU + * + * @params: see amdgpu_pte_update_params definition + * @pe: kmap addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + * + * Write count number of PT/PD entries directly. + */ +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, + uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, + uint64_t flags) +{ + unsigned int i; + uint64_t value; + + for (i = 0; i < count; i++) { + value = params->pages_addr ? + amdgpu_vm_map_gart(params->pages_addr, addr) : + addr; + amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe, + i, value, flags); + addr += incr; + } + + /* Flush HDP */ + mb(); + amdgpu_gart_flush_gpu_tlb(params->adev, 0); +} + +static int amdgpu_vm_bo_wait(struct amdgpu_device *adev, struct amdgpu_bo *bo) +{ + struct amdgpu_sync sync; + int r; + + amdgpu_sync_create(&sync); + amdgpu_sync_resv(adev, &sync, bo->tbo.resv, AMDGPU_FENCE_OWNER_VM); + r = amdgpu_sync_wait(&sync, true); + amdgpu_sync_free(&sync); + + return r; +} + /* * amdgpu_vm_update_level - update a single level in the hierarchy * @@ -822,11 +1024,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, unsigned level) { struct amdgpu_bo *shadow; - struct amdgpu_ring *ring; - uint64_t pd_addr, shadow_addr; + struct amdgpu_ring *ring = NULL; + uint64_t pd_addr, shadow_addr = 0; uint32_t incr = amdgpu_vm_bo_size(adev, level + 1); uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0; - unsigned count = 0, pt_idx, ndw; + unsigned count = 0, pt_idx, ndw = 0; struct amdgpu_job *job; struct amdgpu_pte_update_params params; struct dma_fence *fence = NULL; @@ -835,34 +1037,54 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, if (!parent->entries) return 0; - ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); - /* padding, etc. */ - ndw = 64; + memset(¶ms, 0, sizeof(params)); + params.adev = adev; + shadow = parent->bo->shadow; - /* assume the worst case */ - ndw += parent->last_entry_used * 6; + WARN_ON(vm->use_cpu_for_update && shadow); + if (vm->use_cpu_for_update && !shadow) { + r = amdgpu_bo_kmap(parent->bo, (void **)&pd_addr); + if (r) + return r; + r = amdgpu_vm_bo_wait(adev, parent->bo); + if (unlikely(r)) { + amdgpu_bo_kunmap(parent->bo); + return r; + } + params.func = amdgpu_vm_cpu_set_ptes; + } else { + if (shadow) { + r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); + if (r) + return r; + } + ring = container_of(vm->entity.sched, struct amdgpu_ring, + sched); - pd_addr = amdgpu_bo_gpu_offset(parent->bo); + /* padding, etc. */ + ndw = 64; - shadow = parent->bo->shadow; - if (shadow) { - r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem); + /* assume the worst case */ + ndw += parent->last_entry_used * 6; + + pd_addr = amdgpu_bo_gpu_offset(parent->bo); + + if (shadow) { + shadow_addr = amdgpu_bo_gpu_offset(shadow); + ndw *= 2; + } else { + shadow_addr = 0; + } + + r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); if (r) return r; - shadow_addr = amdgpu_bo_gpu_offset(shadow); - ndw *= 2; - } else { - shadow_addr = 0; - } - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); - if (r) - return r; + params.ib = &job->ibs[0]; + params.func = amdgpu_vm_do_set_ptes; + } - memset(¶ms, 0, sizeof(params)); - params.adev = adev; - params.ib = &job->ibs[0]; /* walk over the address space and update the directory */ for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { @@ -882,6 +1104,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, } pt = amdgpu_bo_gpu_offset(bo); + pt = amdgpu_gart_get_vm_pde(adev, pt); if (parent->entries[pt_idx].addr == pt) continue; @@ -893,19 +1116,16 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, (count == AMDGPU_VM_MAX_UPDATE_SIZE)) { if (count) { - uint64_t pt_addr = - amdgpu_vm_adjust_mc_addr(adev, last_pt); - if (shadow) - amdgpu_vm_do_set_ptes(¶ms, - last_shadow, - pt_addr, count, - incr, - AMDGPU_PTE_VALID); - - amdgpu_vm_do_set_ptes(¶ms, last_pde, - pt_addr, count, incr, - AMDGPU_PTE_VALID); + params.func(¶ms, + last_shadow, + last_pt, count, + incr, + AMDGPU_PTE_VALID); + + params.func(¶ms, last_pde, + last_pt, count, incr, + AMDGPU_PTE_VALID); } count = 1; @@ -918,17 +1138,17 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, } if (count) { - uint64_t pt_addr = amdgpu_vm_adjust_mc_addr(adev, last_pt); - if (vm->root.bo->shadow) - amdgpu_vm_do_set_ptes(¶ms, last_shadow, pt_addr, - count, incr, AMDGPU_PTE_VALID); + params.func(¶ms, last_shadow, last_pt, + count, incr, AMDGPU_PTE_VALID); - amdgpu_vm_do_set_ptes(¶ms, last_pde, pt_addr, - count, incr, AMDGPU_PTE_VALID); + params.func(¶ms, last_pde, last_pt, + count, incr, AMDGPU_PTE_VALID); } - if (params.ib->length_dw == 0) { + if (params.func == amdgpu_vm_cpu_set_ptes) + amdgpu_bo_kunmap(parent->bo); + else if (params.ib->length_dw == 0) { amdgpu_job_free(job); } else { amdgpu_ring_pad_ib(ring, params.ib); @@ -972,6 +1192,32 @@ error_free: } /* + * amdgpu_vm_invalidate_level - mark all PD levels as invalid + * + * @parent: parent PD + * + * Mark all PD level as invalid after an error. + */ +static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent) +{ + unsigned pt_idx; + + /* + * Recurse into the subdirectories. This recursion is harmless because + * we only have a maximum of 5 layers. + */ + for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) { + struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; + + if (!entry->bo) + continue; + + entry->addr = ~0ULL; + amdgpu_vm_invalidate_level(entry); + } +} + +/* * amdgpu_vm_update_directories - make sure that all directories are valid * * @adev: amdgpu_device pointer @@ -983,7 +1229,13 @@ error_free: int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - return amdgpu_vm_update_level(adev, vm, &vm->root, 0); + int r; + + r = amdgpu_vm_update_level(adev, vm, &vm->root, 0); + if (r) + amdgpu_vm_invalidate_level(&vm->root); + + return r; } /** @@ -1023,58 +1275,37 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p, * @flags: mapping flags * * Update the page tables in the range @start - @end. + * Returns 0 for success, -EINVAL for failure. */ -static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, +static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags) { struct amdgpu_device *adev = params->adev; const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; - uint64_t cur_pe_start, cur_nptes, cur_dst; - uint64_t addr; /* next GPU address to be updated */ + uint64_t addr, pe_start; struct amdgpu_bo *pt; - unsigned nptes; /* next number of ptes to be updated */ - uint64_t next_pe_start; - - /* initialize the variables */ - addr = start; - pt = amdgpu_vm_get_pt(params, addr); - if (!pt) { - pr_err("PT not found, aborting update_ptes\n"); - return; - } - - if (params->shadow) { - if (!pt->shadow) - return; - pt = pt->shadow; - } - if ((addr & ~mask) == (end & ~mask)) - nptes = end - addr; - else - nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); - - cur_pe_start = amdgpu_bo_gpu_offset(pt); - cur_pe_start += (addr & mask) * 8; - cur_nptes = nptes; - cur_dst = dst; + unsigned nptes; + int r; + bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes); - /* for next ptb*/ - addr += nptes; - dst += nptes * AMDGPU_GPU_PAGE_SIZE; /* walk over the address space and update the page tables */ - while (addr < end) { + for (addr = start; addr < end; addr += nptes) { pt = amdgpu_vm_get_pt(params, addr); if (!pt) { pr_err("PT not found, aborting update_ptes\n"); - return; + return -EINVAL; } if (params->shadow) { + if (WARN_ONCE(use_cpu_update, + "CPU VM update doesn't suuport shadow pages")) + return 0; + if (!pt->shadow) - return; + return 0; pt = pt->shadow; } @@ -1083,32 +1314,25 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, else nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); - next_pe_start = amdgpu_bo_gpu_offset(pt); - next_pe_start += (addr & mask) * 8; + if (use_cpu_update) { + r = amdgpu_bo_kmap(pt, (void *)&pe_start); + if (r) + return r; + } else + pe_start = amdgpu_bo_gpu_offset(pt); - if ((cur_pe_start + 8 * cur_nptes) == next_pe_start && - ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) { - /* The next ptb is consecutive to current ptb. - * Don't call the update function now. - * Will update two ptbs together in future. - */ - cur_nptes += nptes; - } else { - params->func(params, cur_pe_start, cur_dst, cur_nptes, - AMDGPU_GPU_PAGE_SIZE, flags); + pe_start += (addr & mask) * 8; - cur_pe_start = next_pe_start; - cur_nptes = nptes; - cur_dst = dst; - } + params->func(params, pe_start, dst, nptes, + AMDGPU_GPU_PAGE_SIZE, flags); - /* for next ptb*/ - addr += nptes; dst += nptes * AMDGPU_GPU_PAGE_SIZE; + + if (use_cpu_update) + amdgpu_bo_kunmap(pt); } - params->func(params, cur_pe_start, cur_dst, cur_nptes, - AMDGPU_GPU_PAGE_SIZE, flags); + return 0; } /* @@ -1120,11 +1344,14 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, * @end: last PTE to handle * @dst: addr those PTEs should point to * @flags: hw mapping flags + * Returns 0 for success, -EINVAL for failure. */ -static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, +static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags) { + int r; + /** * The MC L1 TLB supports variable sized pages, based on a fragment * field in the PTE. When this field is set to a non-zero value, page @@ -1153,28 +1380,30 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, /* system pages are non continuously */ if (params->src || !(flags & AMDGPU_PTE_VALID) || - (frag_start >= frag_end)) { - - amdgpu_vm_update_ptes(params, start, end, dst, flags); - return; - } + (frag_start >= frag_end)) + return amdgpu_vm_update_ptes(params, start, end, dst, flags); /* handle the 4K area at the beginning */ if (start != frag_start) { - amdgpu_vm_update_ptes(params, start, frag_start, - dst, flags); + r = amdgpu_vm_update_ptes(params, start, frag_start, + dst, flags); + if (r) + return r; dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE; } /* handle the area in the middle */ - amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, - flags | frag_flags); + r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst, + flags | frag_flags); + if (r) + return r; /* handle the 4K area at the end */ if (frag_end != end) { dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE; - amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); + r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags); } + return r; } /** @@ -1216,6 +1445,25 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, params.vm = vm; params.src = src; + if (vm->use_cpu_for_update) { + /* params.src is used as flag to indicate system Memory */ + if (pages_addr) + params.src = ~0; + + /* Wait for PT BOs to be free. PTs share the same resv. object + * as the root PD BO + */ + r = amdgpu_vm_bo_wait(adev, vm->root.bo); + if (unlikely(r)) + return r; + + params.func = amdgpu_vm_cpu_set_ptes; + params.pages_addr = pages_addr; + params.shadow = false; + return amdgpu_vm_frag_ptes(¶ms, start, last + 1, + addr, flags); + } + ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); /* sync to everything on unmapping */ @@ -1295,9 +1543,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, goto error_free; params.shadow = true; - amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); + r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); + if (r) + goto error_free; params.shadow = false; - amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); + r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); + if (r) + goto error_free; amdgpu_ring_pad_ib(ring, params.ib); WARN_ON(params.ib->length_dw > ndw); @@ -2138,20 +2390,25 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size) * * @adev: amdgpu_device pointer * @vm: requested vm + * @vm_context: Indicates if it GFX or Compute context * * Init @vm fields. */ -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int vm_context) { const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, AMDGPU_VM_PTE_COUNT(adev) * 8); unsigned ring_instance; struct amdgpu_ring *ring; struct amd_sched_rq *rq; - int r; + int r, i; + u64 flags; vm->va = RB_ROOT; vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter); + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) + vm->reserved_vmid[i] = NULL; spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->invalidated); INIT_LIST_HEAD(&vm->cleared); @@ -2168,14 +2425,29 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) return r; + if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) + vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & + AMDGPU_VM_USE_CPU_FOR_COMPUTE); + else + vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & + AMDGPU_VM_USE_CPU_FOR_GFX); + DRM_DEBUG_DRIVER("VM update mode is %s\n", + vm->use_cpu_for_update ? "CPU" : "SDMA"); + WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)), + "CPU update of VM recommended only for large BAR system\n"); vm->last_dir_update = NULL; + flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_VRAM_CLEARED; + if (vm->use_cpu_for_update) + flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + else + flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS | + AMDGPU_GEM_CREATE_SHADOW); + r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true, AMDGPU_GEM_DOMAIN_VRAM, - AMDGPU_GEM_CREATE_NO_CPU_ACCESS | - AMDGPU_GEM_CREATE_SHADOW | - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED, + flags, NULL, NULL, &vm->root.bo); if (r) goto error_free_sched_entity; @@ -2236,6 +2508,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; + int i; amd_sched_entity_fini(vm->entity.sched, &vm->entity); @@ -2259,6 +2532,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_levels(&vm->root); dma_fence_put(vm->last_dir_update); + for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) + amdgpu_vm_free_reserved_vmid(adev, vm, i); } /** @@ -2278,6 +2553,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) mutex_init(&id_mgr->lock); INIT_LIST_HEAD(&id_mgr->ids_lru); + atomic_set(&id_mgr->reserved_vmid_num, 0); /* skip over VMID 0, since it is the system VM */ for (j = 1; j < id_mgr->num_ids; ++j) { @@ -2296,6 +2572,23 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) atomic64_set(&adev->vm_manager.client_counter, 0); spin_lock_init(&adev->vm_manager.prt_lock); atomic_set(&adev->vm_manager.num_prt_users, 0); + + /* If not overridden by the user, by default, only in large BAR systems + * Compute VM tables will be updated by CPU + */ +#ifdef CONFIG_X86_64 + if (amdgpu_vm_update_mode == -1) { + if (amdgpu_vm_is_large_bar(adev)) + adev->vm_manager.vm_update_mode = + AMDGPU_VM_USE_CPU_FOR_COMPUTE; + else + adev->vm_manager.vm_update_mode = 0; + } else + adev->vm_manager.vm_update_mode = amdgpu_vm_update_mode; +#else + adev->vm_manager.vm_update_mode = 0; +#endif + } /** @@ -2323,3 +2616,28 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev) } } } + +int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + union drm_amdgpu_vm *args = data; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_fpriv *fpriv = filp->driver_priv; + int r; + + switch (args->in.op) { + case AMDGPU_VM_OP_RESERVE_VMID: + /* current, we only have requirement to reserve vmid from gfxhub */ + r = amdgpu_vm_alloc_reserved_vmid(adev, &fpriv->vm, + AMDGPU_GFXHUB); + if (r) + return r; + break; + case AMDGPU_VM_OP_UNRESERVE_VMID: + amdgpu_vm_free_reserved_vmid(adev, &fpriv->vm, AMDGPU_GFXHUB); + break; + default: + return -EINVAL; + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index e1d951ece433..936f158bc5ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -84,6 +84,16 @@ struct amdgpu_bo_list_entry; /* hardcode that limit for now */ #define AMDGPU_VA_RESERVED_SIZE (8 << 20) +/* max vmids dedicated for process */ +#define AMDGPU_VM_MAX_RESERVED_VMID 1 + +#define AMDGPU_VM_CONTEXT_GFX 0 +#define AMDGPU_VM_CONTEXT_COMPUTE 1 + +/* See vm_update_mode */ +#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) +#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) + struct amdgpu_vm_pt { struct amdgpu_bo *bo; @@ -123,8 +133,13 @@ struct amdgpu_vm { /* client id */ u64 client_id; + /* dedicated to vm */ + struct amdgpu_vm_id *reserved_vmid[AMDGPU_MAX_VMHUBS]; /* each VM will map on CSA */ struct amdgpu_bo_va *csa_bo_va; + + /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ + bool use_cpu_for_update; }; struct amdgpu_vm_id { @@ -152,6 +167,7 @@ struct amdgpu_vm_id_manager { unsigned num_ids; struct list_head ids_lru; struct amdgpu_vm_id ids[AMDGPU_NUM_VM]; + atomic_t reserved_vmid_num; }; struct amdgpu_vm_manager { @@ -168,8 +184,6 @@ struct amdgpu_vm_manager { uint32_t block_size; /* vram base address for page table entry */ u64 vram_base_offset; - /* is vm enabled? */ - bool enabled; /* vm pte handling */ const struct amdgpu_vm_pte_funcs *vm_pte_funcs; struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS]; @@ -181,11 +195,18 @@ struct amdgpu_vm_manager { /* partial resident texture handling */ spinlock_t prt_lock; atomic_t num_prt_users; + + /* controls how VM page tables are updated for Graphics and Compute. + * BIT0[= 0] Graphics updated by SDMA [= 1] by CPU + * BIT1[= 0] Compute updated by SDMA [= 1] by CPU + */ + int vm_update_mode; }; void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int vm_context); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct list_head *validated, @@ -239,5 +260,9 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev, void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va); void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size); +int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, + struct amdgpu_job *job); +void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 6b2034533f68..37a499ab30eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -964,62 +964,62 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev, } static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = { - {mmGRBM_STATUS, false}, - {mmGB_ADDR_CONFIG, false}, - {mmMC_ARB_RAMCFG, false}, - {mmGB_TILE_MODE0, false}, - {mmGB_TILE_MODE1, false}, - {mmGB_TILE_MODE2, false}, - {mmGB_TILE_MODE3, false}, - {mmGB_TILE_MODE4, false}, - {mmGB_TILE_MODE5, false}, - {mmGB_TILE_MODE6, false}, - {mmGB_TILE_MODE7, false}, - {mmGB_TILE_MODE8, false}, - {mmGB_TILE_MODE9, false}, - {mmGB_TILE_MODE10, false}, - {mmGB_TILE_MODE11, false}, - {mmGB_TILE_MODE12, false}, - {mmGB_TILE_MODE13, false}, - {mmGB_TILE_MODE14, false}, - {mmGB_TILE_MODE15, false}, - {mmGB_TILE_MODE16, false}, - {mmGB_TILE_MODE17, false}, - {mmGB_TILE_MODE18, false}, - {mmGB_TILE_MODE19, false}, - {mmGB_TILE_MODE20, false}, - {mmGB_TILE_MODE21, false}, - {mmGB_TILE_MODE22, false}, - {mmGB_TILE_MODE23, false}, - {mmGB_TILE_MODE24, false}, - {mmGB_TILE_MODE25, false}, - {mmGB_TILE_MODE26, false}, - {mmGB_TILE_MODE27, false}, - {mmGB_TILE_MODE28, false}, - {mmGB_TILE_MODE29, false}, - {mmGB_TILE_MODE30, false}, - {mmGB_TILE_MODE31, false}, - {mmGB_MACROTILE_MODE0, false}, - {mmGB_MACROTILE_MODE1, false}, - {mmGB_MACROTILE_MODE2, false}, - {mmGB_MACROTILE_MODE3, false}, - {mmGB_MACROTILE_MODE4, false}, - {mmGB_MACROTILE_MODE5, false}, - {mmGB_MACROTILE_MODE6, false}, - {mmGB_MACROTILE_MODE7, false}, - {mmGB_MACROTILE_MODE8, false}, - {mmGB_MACROTILE_MODE9, false}, - {mmGB_MACROTILE_MODE10, false}, - {mmGB_MACROTILE_MODE11, false}, - {mmGB_MACROTILE_MODE12, false}, - {mmGB_MACROTILE_MODE13, false}, - {mmGB_MACROTILE_MODE14, false}, - {mmGB_MACROTILE_MODE15, false}, - {mmCC_RB_BACKEND_DISABLE, false, true}, - {mmGC_USER_RB_BACKEND_DISABLE, false, true}, - {mmGB_BACKEND_MAP, false, false}, - {mmPA_SC_RASTER_CONFIG, false, true}, - {mmPA_SC_RASTER_CONFIG_1, false, true}, + {mmGRBM_STATUS}, + {mmGB_ADDR_CONFIG}, + {mmMC_ARB_RAMCFG}, + {mmGB_TILE_MODE0}, + {mmGB_TILE_MODE1}, + {mmGB_TILE_MODE2}, + {mmGB_TILE_MODE3}, + {mmGB_TILE_MODE4}, + {mmGB_TILE_MODE5}, + {mmGB_TILE_MODE6}, + {mmGB_TILE_MODE7}, + {mmGB_TILE_MODE8}, + {mmGB_TILE_MODE9}, + {mmGB_TILE_MODE10}, + {mmGB_TILE_MODE11}, + {mmGB_TILE_MODE12}, + {mmGB_TILE_MODE13}, + {mmGB_TILE_MODE14}, + {mmGB_TILE_MODE15}, + {mmGB_TILE_MODE16}, + {mmGB_TILE_MODE17}, + {mmGB_TILE_MODE18}, + {mmGB_TILE_MODE19}, + {mmGB_TILE_MODE20}, + {mmGB_TILE_MODE21}, + {mmGB_TILE_MODE22}, + {mmGB_TILE_MODE23}, + {mmGB_TILE_MODE24}, + {mmGB_TILE_MODE25}, + {mmGB_TILE_MODE26}, + {mmGB_TILE_MODE27}, + {mmGB_TILE_MODE28}, + {mmGB_TILE_MODE29}, + {mmGB_TILE_MODE30}, + {mmGB_TILE_MODE31}, + {mmGB_MACROTILE_MODE0}, + {mmGB_MACROTILE_MODE1}, + {mmGB_MACROTILE_MODE2}, + {mmGB_MACROTILE_MODE3}, + {mmGB_MACROTILE_MODE4}, + {mmGB_MACROTILE_MODE5}, + {mmGB_MACROTILE_MODE6}, + {mmGB_MACROTILE_MODE7}, + {mmGB_MACROTILE_MODE8}, + {mmGB_MACROTILE_MODE9}, + {mmGB_MACROTILE_MODE10}, + {mmGB_MACROTILE_MODE11}, + {mmGB_MACROTILE_MODE12}, + {mmGB_MACROTILE_MODE13}, + {mmGB_MACROTILE_MODE14}, + {mmGB_MACROTILE_MODE15}, + {mmCC_RB_BACKEND_DISABLE, true}, + {mmGC_USER_RB_BACKEND_DISABLE, true}, + {mmGB_BACKEND_MAP, false}, + {mmPA_SC_RASTER_CONFIG, true}, + {mmPA_SC_RASTER_CONFIG_1, true}, }; static uint32_t cik_read_indexed_register(struct amdgpu_device *adev, @@ -1050,11 +1050,10 @@ static int cik_read_register(struct amdgpu_device *adev, u32 se_num, if (reg_offset != cik_allowed_read_registers[i].reg_offset) continue; - if (!cik_allowed_read_registers[i].untouched) - *value = cik_allowed_read_registers[i].grbm_indexed ? - cik_read_indexed_register(adev, se_num, - sh_num, reg_offset) : - RREG32(reg_offset); + *value = cik_allowed_read_registers[i].grbm_indexed ? + cik_read_indexed_register(adev, se_num, + sh_num, reg_offset) : + RREG32(reg_offset); return 0; } return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 3f3a25493327..786b5d02f44e 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -118,14 +118,27 @@ static const struct { static u32 dce_v6_0_audio_endpt_rreg(struct amdgpu_device *adev, u32 block_offset, u32 reg) { - DRM_INFO("xxxx: dce_v6_0_audio_endpt_rreg ----no impl!!!!\n"); - return 0; + unsigned long flags; + u32 r; + + spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); + WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, reg); + r = RREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset); + spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); + + return r; } static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev, u32 block_offset, u32 reg, u32 v) { - DRM_INFO("xxxx: dce_v6_0_audio_endpt_wreg ----no impl!!!!\n"); + unsigned long flags; + + spin_lock_irqsave(&adev->audio_endpt_idx_lock, flags); + WREG32(mmAZALIA_F0_CODEC_ENDPOINT_INDEX + block_offset, + reg | AZALIA_F0_CODEC_ENDPOINT_INDEX__AZALIA_ENDPOINT_REG_WRITE_EN_MASK); + WREG32(mmAZALIA_F0_CODEC_ENDPOINT_DATA + block_offset, v); + spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags); } static bool dce_v6_0_is_in_vblank(struct amdgpu_device *adev, int crtc) @@ -501,21 +514,16 @@ static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev, static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev) { - int num_crtc = 0; - switch (adev->asic_type) { case CHIP_TAHITI: case CHIP_PITCAIRN: case CHIP_VERDE: - num_crtc = 6; - break; + return 6; case CHIP_OLAND: - num_crtc = 2; - break; + return 2; default: - num_crtc = 0; + return 0; } - return num_crtc; } void dce_v6_0_disable_dce(struct amdgpu_device *adev) @@ -1222,17 +1230,17 @@ static void dce_v6_0_bandwidth_update(struct amdgpu_device *adev) dce_v6_0_program_watermarks(adev, adev->mode_info.crtcs[i+1], lb_size, num_heads); } } -/* + static void dce_v6_0_audio_get_connected_pins(struct amdgpu_device *adev) { int i; - u32 offset, tmp; + u32 tmp; for (i = 0; i < adev->mode_info.audio.num_pins; i++) { - offset = adev->mode_info.audio.pin[i].offset; - tmp = RREG32_AUDIO_ENDPT(offset, - AZ_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT); - if (((tmp & PORT_CONNECTIVITY_MASK) >> PORT_CONNECTIVITY_SHIFT) == 1) + tmp = RREG32_AUDIO_ENDPT(adev->mode_info.audio.pin[i].offset, + ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT); + if (REG_GET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_CONFIGURATION_DEFAULT, + PORT_CONNECTIVITY)) adev->mode_info.audio.pin[i].connected = false; else adev->mode_info.audio.pin[i].connected = true; @@ -1254,45 +1262,206 @@ static struct amdgpu_audio_pin *dce_v6_0_audio_get_pin(struct amdgpu_device *ade return NULL; } -static void dce_v6_0_afmt_audio_select_pin(struct drm_encoder *encoder) +static void dce_v6_0_audio_select_pin(struct drm_encoder *encoder) { struct amdgpu_device *adev = encoder->dev->dev_private; struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; - u32 offset; if (!dig || !dig->afmt || !dig->afmt->pin) return; - offset = dig->afmt->offset; - - WREG32(AFMT_AUDIO_SRC_CONTROL + offset, - AFMT_AUDIO_SRC_SELECT(dig->afmt->pin->id)); - + WREG32(mmAFMT_AUDIO_SRC_CONTROL + dig->afmt->offset, + REG_SET_FIELD(0, AFMT_AUDIO_SRC_CONTROL, AFMT_AUDIO_SRC_SELECT, + dig->afmt->pin->id)); } static void dce_v6_0_audio_write_latency_fields(struct drm_encoder *encoder, struct drm_display_mode *mode) { - DRM_INFO("xxxx: dce_v6_0_audio_write_latency_fields---no imp!!!!!\n"); + struct amdgpu_device *adev = encoder->dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + struct drm_connector *connector; + struct amdgpu_connector *amdgpu_connector = NULL; + int interlace = 0; + u32 tmp; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) { + amdgpu_connector = to_amdgpu_connector(connector); + break; + } + } + + if (!amdgpu_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + if (mode->flags & DRM_MODE_FLAG_INTERLACE) + interlace = 1; + + if (connector->latency_present[interlace]) { + tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, + VIDEO_LIPSYNC, connector->video_latency[interlace]); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, + AUDIO_LIPSYNC, connector->audio_latency[interlace]); + } else { + tmp = REG_SET_FIELD(0, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, + VIDEO_LIPSYNC, 0); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, + AUDIO_LIPSYNC, 0); + } + WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, + ixAZALIA_F0_CODEC_PIN_CONTROL_RESPONSE_LIPSYNC, tmp); } static void dce_v6_0_audio_write_speaker_allocation(struct drm_encoder *encoder) { - DRM_INFO("xxxx: dce_v6_0_audio_write_speaker_allocation---no imp!!!!!\n"); + struct amdgpu_device *adev = encoder->dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + struct drm_connector *connector; + struct amdgpu_connector *amdgpu_connector = NULL; + u8 *sadb = NULL; + int sad_count; + u32 tmp; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) { + amdgpu_connector = to_amdgpu_connector(connector); + break; + } + } + + if (!amdgpu_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + sad_count = drm_edid_to_speaker_allocation(amdgpu_connector_edid(connector), &sadb); + if (sad_count < 0) { + DRM_ERROR("Couldn't read Speaker Allocation Data Block: %d\n", sad_count); + sad_count = 0; + } + + /* program the speaker allocation */ + tmp = RREG32_AUDIO_ENDPT(dig->afmt->pin->offset, + ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + HDMI_CONNECTION, 0); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + DP_CONNECTION, 0); + + if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + DP_CONNECTION, 1); + else + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + HDMI_CONNECTION, 1); + + if (sad_count) + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + SPEAKER_ALLOCATION, sadb[0]); + else + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, + SPEAKER_ALLOCATION, 5); /* stereo */ + + WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, + ixAZALIA_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER, tmp); + + kfree(sadb); } static void dce_v6_0_audio_write_sad_regs(struct drm_encoder *encoder) { - DRM_INFO("xxxx: dce_v6_0_audio_write_sad_regs---no imp!!!!!\n"); + struct amdgpu_device *adev = encoder->dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + struct drm_connector *connector; + struct amdgpu_connector *amdgpu_connector = NULL; + struct cea_sad *sads; + int i, sad_count; + + static const u16 eld_reg_to_type[][2] = { + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, HDMI_AUDIO_CODING_TYPE_PCM }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1, HDMI_AUDIO_CODING_TYPE_AC3 }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2, HDMI_AUDIO_CODING_TYPE_MPEG1 }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR3, HDMI_AUDIO_CODING_TYPE_MP3 }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR4, HDMI_AUDIO_CODING_TYPE_MPEG2 }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR5, HDMI_AUDIO_CODING_TYPE_AAC_LC }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR6, HDMI_AUDIO_CODING_TYPE_DTS }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR7, HDMI_AUDIO_CODING_TYPE_ATRAC }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR9, HDMI_AUDIO_CODING_TYPE_EAC3 }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR10, HDMI_AUDIO_CODING_TYPE_DTS_HD }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR11, HDMI_AUDIO_CODING_TYPE_MLP }, + { ixAZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR13, HDMI_AUDIO_CODING_TYPE_WMA_PRO }, + }; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) { + amdgpu_connector = to_amdgpu_connector(connector); + break; + } + } + + if (!amdgpu_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + sad_count = drm_edid_to_sad(amdgpu_connector_edid(connector), &sads); + if (sad_count <= 0) { + DRM_ERROR("Couldn't read SADs: %d\n", sad_count); + return; + } + + for (i = 0; i < ARRAY_SIZE(eld_reg_to_type); i++) { + u32 tmp = 0; + u8 stereo_freqs = 0; + int max_channels = -1; + int j; + + for (j = 0; j < sad_count; j++) { + struct cea_sad *sad = &sads[j]; + + if (sad->format == eld_reg_to_type[i][1]) { + if (sad->channels > max_channels) { + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, + MAX_CHANNELS, sad->channels); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, + DESCRIPTOR_BYTE_2, sad->byte2); + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, + SUPPORTED_FREQUENCIES, sad->freq); + max_channels = sad->channels; + } + + if (sad->format == HDMI_AUDIO_CODING_TYPE_PCM) + stereo_freqs |= sad->freq; + else + break; + } + } + + tmp = REG_SET_FIELD(tmp, AZALIA_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0, + SUPPORTED_FREQUENCIES_STEREO, stereo_freqs); + WREG32_AUDIO_ENDPT(dig->afmt->pin->offset, eld_reg_to_type[i][0], tmp); + } + + kfree(sads); } -*/ + static void dce_v6_0_audio_enable(struct amdgpu_device *adev, struct amdgpu_audio_pin *pin, bool enable) { - DRM_INFO("xxxx: dce_v6_0_audio_enable---no imp!!!!!\n"); + if (!pin) + return; + + WREG32_AUDIO_ENDPT(pin->offset, ixAZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL, + enable ? AZALIA_F0_CODEC_PIN_CONTROL_HOT_PLUG_CONTROL__AUDIO_ENABLED_MASK : 0); } static const u32 pin_offsets[7] = @@ -1308,42 +1477,372 @@ static const u32 pin_offsets[7] = static int dce_v6_0_audio_init(struct amdgpu_device *adev) { + int i; + + if (!amdgpu_audio) + return 0; + + adev->mode_info.audio.enabled = true; + + switch (adev->asic_type) { + case CHIP_TAHITI: + case CHIP_PITCAIRN: + case CHIP_VERDE: + default: + adev->mode_info.audio.num_pins = 6; + break; + case CHIP_OLAND: + adev->mode_info.audio.num_pins = 2; + break; + } + + for (i = 0; i < adev->mode_info.audio.num_pins; i++) { + adev->mode_info.audio.pin[i].channels = -1; + adev->mode_info.audio.pin[i].rate = -1; + adev->mode_info.audio.pin[i].bits_per_sample = -1; + adev->mode_info.audio.pin[i].status_bits = 0; + adev->mode_info.audio.pin[i].category_code = 0; + adev->mode_info.audio.pin[i].connected = false; + adev->mode_info.audio.pin[i].offset = pin_offsets[i]; + adev->mode_info.audio.pin[i].id = i; + dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); + } + return 0; } static void dce_v6_0_audio_fini(struct amdgpu_device *adev) { + int i; + if (!amdgpu_audio) + return; + + if (!adev->mode_info.audio.enabled) + return; + + for (i = 0; i < adev->mode_info.audio.num_pins; i++) + dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); + + adev->mode_info.audio.enabled = false; } -/* -static void dce_v6_0_afmt_update_ACR(struct drm_encoder *encoder, uint32_t clock) +static void dce_v6_0_audio_set_vbi_packet(struct drm_encoder *encoder) { - DRM_INFO("xxxx: dce_v6_0_afmt_update_ACR---no imp!!!!!\n"); + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + tmp = RREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_NULL_SEND, 1); + tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_SEND, 1); + tmp = REG_SET_FIELD(tmp, HDMI_VBI_PACKET_CONTROL, HDMI_GC_CONT, 1); + WREG32(mmHDMI_VBI_PACKET_CONTROL + dig->afmt->offset, tmp); } -*/ -/* - * build a HDMI Video Info Frame - */ -/* -static void dce_v6_0_afmt_update_avi_infoframe(struct drm_encoder *encoder, - void *buffer, size_t size) + +static void dce_v6_0_audio_set_acr(struct drm_encoder *encoder, + uint32_t clock, int bpc) +{ + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_afmt_acr acr = amdgpu_afmt_acr(clock); + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + tmp = RREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_AUTO_SEND, 1); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_PACKET_CONTROL, HDMI_ACR_SOURCE, + bpc > 8 ? 0 : 1); + WREG32(mmHDMI_ACR_PACKET_CONTROL + dig->afmt->offset, tmp); + + tmp = RREG32(mmHDMI_ACR_32_0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_0, HDMI_ACR_CTS_32, acr.cts_32khz); + WREG32(mmHDMI_ACR_32_0 + dig->afmt->offset, tmp); + tmp = RREG32(mmHDMI_ACR_32_1 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_32_1, HDMI_ACR_N_32, acr.n_32khz); + WREG32(mmHDMI_ACR_32_1 + dig->afmt->offset, tmp); + + tmp = RREG32(mmHDMI_ACR_44_0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_0, HDMI_ACR_CTS_44, acr.cts_44_1khz); + WREG32(mmHDMI_ACR_44_0 + dig->afmt->offset, tmp); + tmp = RREG32(mmHDMI_ACR_44_1 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_44_1, HDMI_ACR_N_44, acr.n_44_1khz); + WREG32(mmHDMI_ACR_44_1 + dig->afmt->offset, tmp); + + tmp = RREG32(mmHDMI_ACR_48_0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_0, HDMI_ACR_CTS_48, acr.cts_48khz); + WREG32(mmHDMI_ACR_48_0 + dig->afmt->offset, tmp); + tmp = RREG32(mmHDMI_ACR_48_1 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_ACR_48_1, HDMI_ACR_N_48, acr.n_48khz); + WREG32(mmHDMI_ACR_48_1 + dig->afmt->offset, tmp); +} + +static void dce_v6_0_audio_set_avi_infoframe(struct drm_encoder *encoder, + struct drm_display_mode *mode) { - DRM_INFO("xxxx: dce_v6_0_afmt_update_avi_infoframe---no imp!!!!!\n"); + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + struct hdmi_avi_infoframe frame; + u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE]; + uint8_t *payload = buffer + 3; + uint8_t *header = buffer; + ssize_t err; + u32 tmp; + + err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode); + if (err < 0) { + DRM_ERROR("failed to setup AVI infoframe: %zd\n", err); + return; + } + + err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer)); + if (err < 0) { + DRM_ERROR("failed to pack AVI infoframe: %zd\n", err); + return; + } + + WREG32(mmAFMT_AVI_INFO0 + dig->afmt->offset, + payload[0x0] | (payload[0x1] << 8) | (payload[0x2] << 16) | (payload[0x3] << 24)); + WREG32(mmAFMT_AVI_INFO1 + dig->afmt->offset, + payload[0x4] | (payload[0x5] << 8) | (payload[0x6] << 16) | (payload[0x7] << 24)); + WREG32(mmAFMT_AVI_INFO2 + dig->afmt->offset, + payload[0x8] | (payload[0x9] << 8) | (payload[0xA] << 16) | (payload[0xB] << 24)); + WREG32(mmAFMT_AVI_INFO3 + dig->afmt->offset, + payload[0xC] | (payload[0xD] << 8) | (header[1] << 24)); + + tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset); + /* anything other than 0 */ + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, + HDMI_AUDIO_INFO_LINE, 2); + WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp); } static void dce_v6_0_audio_set_dto(struct drm_encoder *encoder, u32 clock) { - DRM_INFO("xxxx: dce_v6_0_audio_set_dto---no imp!!!!!\n"); + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc); + int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); + u32 tmp; + + /* + * Two dtos: generally use dto0 for hdmi, dto1 for dp. + * Express [24MHz / target pixel clock] as an exact rational + * number (coefficient of two integer numbers. DCCG_AUDIO_DTOx_PHASE + * is the numerator, DCCG_AUDIO_DTOx_MODULE is the denominator + */ + tmp = RREG32(mmDCCG_AUDIO_DTO_SOURCE); + tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, + DCCG_AUDIO_DTO0_SOURCE_SEL, amdgpu_crtc->crtc_id); + if (em == ATOM_ENCODER_MODE_HDMI) { + tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, + DCCG_AUDIO_DTO_SEL, 0); + } else if (ENCODER_MODE_IS_DP(em)) { + tmp = REG_SET_FIELD(tmp, DCCG_AUDIO_DTO_SOURCE, + DCCG_AUDIO_DTO_SEL, 1); + } + WREG32(mmDCCG_AUDIO_DTO_SOURCE, tmp); + if (em == ATOM_ENCODER_MODE_HDMI) { + WREG32(mmDCCG_AUDIO_DTO0_PHASE, 24000); + WREG32(mmDCCG_AUDIO_DTO0_MODULE, clock); + } else if (ENCODER_MODE_IS_DP(em)) { + WREG32(mmDCCG_AUDIO_DTO1_PHASE, 24000); + WREG32(mmDCCG_AUDIO_DTO1_MODULE, clock); + } } -*/ -/* - * update the info frames with the data from the current display mode - */ + +static void dce_v6_0_audio_set_packet(struct drm_encoder *encoder) +{ + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + tmp = RREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_INFOFRAME_CONTROL0, AFMT_AUDIO_INFO_UPDATE, 1); + WREG32(mmAFMT_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_60958_0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_60958_0, AFMT_60958_CS_CHANNEL_NUMBER_L, 1); + WREG32(mmAFMT_60958_0 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_60958_1 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_60958_1, AFMT_60958_CS_CHANNEL_NUMBER_R, 2); + WREG32(mmAFMT_60958_1 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_60958_2 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_2, 3); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_3, 4); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_4, 5); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_5, 6); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_6, 7); + tmp = REG_SET_FIELD(tmp, AFMT_60958_2, AFMT_60958_CS_CHANNEL_NUMBER_7, 8); + WREG32(mmAFMT_60958_2 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL2, AFMT_AUDIO_CHANNEL_ENABLE, 0xff); + WREG32(mmAFMT_AUDIO_PACKET_CONTROL2 + dig->afmt->offset, tmp); + + tmp = RREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_DELAY_EN, 1); + tmp = REG_SET_FIELD(tmp, HDMI_AUDIO_PACKET_CONTROL, HDMI_AUDIO_PACKETS_PER_LINE, 3); + WREG32(mmHDMI_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_RESET_FIFO_WHEN_AUDIO_DIS, 1); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_60958_CS_UPDATE, 1); + WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); +} + +static void dce_v6_0_audio_set_mute(struct drm_encoder *encoder, bool mute) +{ + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + tmp = RREG32(mmHDMI_GC + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_GC, HDMI_GC_AVMUTE, mute ? 1 : 0); + WREG32(mmHDMI_GC + dig->afmt->offset, tmp); +} + +static void dce_v6_0_audio_hdmi_enable(struct drm_encoder *encoder, bool enable) +{ + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + if (enable) { + tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 1); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 1); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 1); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 1); + WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); + + tmp = RREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL1, HDMI_AVI_INFO_LINE, 2); + WREG32(mmHDMI_INFOFRAME_CONTROL1 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1); + WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); + } else { + tmp = RREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_SEND, 0); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AVI_INFO_CONT, 0); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_SEND, 0); + tmp = REG_SET_FIELD(tmp, HDMI_INFOFRAME_CONTROL0, HDMI_AUDIO_INFO_CONT, 0); + WREG32(mmHDMI_INFOFRAME_CONTROL0 + dig->afmt->offset, tmp); + + tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 0); + WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); + } +} + +static void dce_v6_0_audio_dp_enable(struct drm_encoder *encoder, bool enable) +{ + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + u32 tmp; + + if (enable) { + tmp = RREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, AFMT_AUDIO_PACKET_CONTROL, AFMT_AUDIO_SAMPLE_SEND, 1); + WREG32(mmAFMT_AUDIO_PACKET_CONTROL + dig->afmt->offset, tmp); + + tmp = RREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, DP_SEC_TIMESTAMP, DP_SEC_TIMESTAMP_MODE, 1); + WREG32(mmDP_SEC_TIMESTAMP + dig->afmt->offset, tmp); + + tmp = RREG32(mmDP_SEC_CNTL + dig->afmt->offset); + tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ASP_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_ATP_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_AIP_ENABLE, 1); + tmp = REG_SET_FIELD(tmp, DP_SEC_CNTL, DP_SEC_STREAM_ENABLE, 1); + WREG32(mmDP_SEC_CNTL + dig->afmt->offset, tmp); + } else { + WREG32(mmDP_SEC_CNTL + dig->afmt->offset, 0); + } +} + static void dce_v6_0_afmt_setmode(struct drm_encoder *encoder, struct drm_display_mode *mode) { - DRM_INFO("xxxx: dce_v6_0_afmt_setmode ----no impl !!!!!!!!\n"); + struct drm_device *dev = encoder->dev; + struct amdgpu_device *adev = dev->dev_private; + struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv; + struct drm_connector *connector; + struct amdgpu_connector *amdgpu_connector = NULL; + int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); + int bpc = 8; + + if (!dig || !dig->afmt) + return; + + list_for_each_entry(connector, &encoder->dev->mode_config.connector_list, head) { + if (connector->encoder == encoder) { + amdgpu_connector = to_amdgpu_connector(connector); + break; + } + } + + if (!amdgpu_connector) { + DRM_ERROR("Couldn't find encoder's connector\n"); + return; + } + + if (!dig->afmt->enabled) + return; + + dig->afmt->pin = dce_v6_0_audio_get_pin(adev); + if (!dig->afmt->pin) + return; + + if (encoder->crtc) { + struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(encoder->crtc); + bpc = amdgpu_crtc->bpc; + } + + /* disable audio before setting up hw */ + dce_v6_0_audio_enable(adev, dig->afmt->pin, false); + + dce_v6_0_audio_set_mute(encoder, true); + dce_v6_0_audio_write_speaker_allocation(encoder); + dce_v6_0_audio_write_sad_regs(encoder); + dce_v6_0_audio_write_latency_fields(encoder, mode); + if (em == ATOM_ENCODER_MODE_HDMI) { + dce_v6_0_audio_set_dto(encoder, mode->clock); + dce_v6_0_audio_set_vbi_packet(encoder); + dce_v6_0_audio_set_acr(encoder, mode->clock, bpc); + } else if (ENCODER_MODE_IS_DP(em)) { + dce_v6_0_audio_set_dto(encoder, adev->clock.default_dispclk * 10); + } + dce_v6_0_audio_set_packet(encoder); + dce_v6_0_audio_select_pin(encoder); + dce_v6_0_audio_set_avi_infoframe(encoder, mode); + dce_v6_0_audio_set_mute(encoder, false); + if (em == ATOM_ENCODER_MODE_HDMI) { + dce_v6_0_audio_hdmi_enable(encoder, 1); + } else if (ENCODER_MODE_IS_DP(em)) { + dce_v6_0_audio_dp_enable(encoder, 1); + } + + /* enable audio after setting up hw */ + dce_v6_0_audio_enable(adev, dig->afmt->pin, true); } static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable) @@ -1359,6 +1858,7 @@ static void dce_v6_0_afmt_enable(struct drm_encoder *encoder, bool enable) /* Silent, r600_hdmi_enable will raise WARN for us */ if (enable && dig->afmt->enabled) return; + if (!enable && !dig->afmt->enabled) return; @@ -2753,6 +3253,7 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, { struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); + int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); amdgpu_encoder->pixel_clock = adjusted_mode->clock; @@ -2762,7 +3263,7 @@ dce_v6_0_encoder_mode_set(struct drm_encoder *encoder, /* set scaler clears this on some chips */ dce_v6_0_set_interleave(encoder->crtc, mode); - if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) { + if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em)) { dce_v6_0_afmt_enable(encoder, true); dce_v6_0_afmt_setmode(encoder, adjusted_mode); } @@ -2824,11 +3325,12 @@ static void dce_v6_0_encoder_disable(struct drm_encoder *encoder) struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); struct amdgpu_encoder_atom_dig *dig; + int em = amdgpu_atombios_encoder_get_encoder_mode(encoder); amdgpu_atombios_encoder_dpms(encoder, DRM_MODE_DPMS_OFF); if (amdgpu_atombios_encoder_is_digital(encoder)) { - if (amdgpu_atombios_encoder_get_encoder_mode(encoder) == ATOM_ENCODER_MODE_HDMI) + if (em == ATOM_ENCODER_MODE_HDMI || ENCODER_MODE_IS_DP(em)) dce_v6_0_afmt_enable(encoder, false); dig = amdgpu_encoder->enc_priv; dig->dig_encoder = -1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index a125f9d44577..7b0b3cf16334 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -393,8 +393,11 @@ out: static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) { - const u32 num_tile_mode_states = 32; - u32 reg_offset, gb_tile_moden, split_equal_to_row_size; + const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array); + u32 reg_offset, split_equal_to_row_size, *tilemode; + + memset(adev->gfx.config.tile_mode_array, 0, sizeof(adev->gfx.config.tile_mode_array)); + tilemode = adev->gfx.config.tile_mode_array; switch (adev->gfx.config.mem_row_size_in_kb) { case 1: @@ -410,887 +413,680 @@ static void gfx_v6_0_tiling_mode_table_init(struct amdgpu_device *adev) } if (adev->asic_type == CHIP_VERDE) { - for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { - switch (reg_offset) { - case 0: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 1: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 2: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 3: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 4: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16)); - break; - case 5: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 6: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 7: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 8: - gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED)); - break; - case 9: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16)); - break; - case 10: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 11: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 12: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 13: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16)); - break; - case 14: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 15: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 16: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 17: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 18: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P4_8x16)); - break; - case 19: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 20: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 21: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 22: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 23: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 24: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 25: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 26: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 27: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 28: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 29: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 30: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - default: - continue; - } - adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; - WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); - } - } else if (adev->asic_type == CHIP_OLAND || - adev->asic_type == CHIP_HAINAN) { - for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { - switch (reg_offset) { - case 0: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 1: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 2: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 3: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 4: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2)); - break; - case 5: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 6: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 7: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 8: - gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED)); - break; - case 9: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2)); - break; - case 10: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 11: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 12: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 13: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2)); - break; - case 14: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 15: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 16: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 17: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 18: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P2)); - break; - case 19: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | - PIPE_CONFIG(ADDR_SURF_P2) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 20: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P2) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 21: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 22: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 23: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 24: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 25: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 26: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 27: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 28: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 29: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 30: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - default: - continue; - } - adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; - WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); - } + tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16); + tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); + tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16); + tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16); + tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_8x16); + tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); + } else if (adev->asic_type == CHIP_OLAND || adev->asic_type == CHIP_HAINAN) { + tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2); + tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); + tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2); + tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2); + tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P2); + tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P2) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P2) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK); + tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); } else if ((adev->asic_type == CHIP_TAHITI) || (adev->asic_type == CHIP_PITCAIRN)) { - for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { - switch (reg_offset) { - case 0: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 1: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 2: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 3: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 4: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); - break; - case 5: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 6: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 7: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 8: - gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED)); - break; - case 9: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); - break; - case 10: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 11: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 12: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 13: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); - break; - case 14: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 15: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 16: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 17: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 18: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_1D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16)); - break; - case 19: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 20: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 21: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 22: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 23: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 24: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 25: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 26: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 27: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 28: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 29: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - case 30: - gb_tile_moden = (MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | - ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_8x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | - BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_2_BANK)); - break; - default: - continue; - } - adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; - WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); - } - } else{ - + tilemode[0] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[1] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[2] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[3] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[4] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16); + tilemode[5] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[6] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[7] = MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[8] = ARRAY_MODE(ARRAY_LINEAR_ALIGNED); + tilemode[9] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16); + tilemode[10] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[11] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[12] = MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[13] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16); + tilemode[14] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[15] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[16] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK); + tilemode[17] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[18] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16); + tilemode[19] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[20] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK) | + TILE_SPLIT(split_equal_to_row_size); + tilemode[21] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[22] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK); + tilemode[23] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[24] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[25] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[26] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[27] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[28] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[29] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + tilemode[30] = MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) | + ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_8x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) | + BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_2_BANK); + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, tilemode[reg_offset]); + } else { DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type); } - } static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, @@ -1318,11 +1114,6 @@ static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, WREG32(mmGRBM_GFX_INDEX, data); } -static u32 gfx_v6_0_create_bitmask(u32 bit_width) -{ - return (u32)(((u64)1 << bit_width) - 1); -} - static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev) { u32 data, mask; @@ -1332,8 +1123,8 @@ static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev) data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); - mask = gfx_v6_0_create_bitmask(adev->gfx.config.max_backends_per_se/ - adev->gfx.config.max_sh_per_se); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se/ + adev->gfx.config.max_sh_per_se); return ~data & mask; } @@ -1399,11 +1190,10 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) { raster_config_se &= ~PA_SC_RASTER_CONFIG__SE_MAP_MASK; - if (!se_mask[idx]) { + if (!se_mask[idx]) raster_config_se |= RASTER_CONFIG_SE_MAP_3 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT; - } else { + else raster_config_se |= RASTER_CONFIG_SE_MAP_0 << PA_SC_RASTER_CONFIG__SE_MAP__SHIFT; - } } pkr0_mask &= rb_mask; @@ -1411,11 +1201,10 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) { raster_config_se &= ~PA_SC_RASTER_CONFIG__PKR_MAP_MASK; - if (!pkr0_mask) { + if (!pkr0_mask) raster_config_se |= RASTER_CONFIG_PKR_MAP_3 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT; - } else { + else raster_config_se |= RASTER_CONFIG_PKR_MAP_0 << PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT; - } } if (rb_per_se >= 2) { @@ -1427,13 +1216,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, if (!rb0_mask || !rb1_mask) { raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR0_MASK; - if (!rb0_mask) { + if (!rb0_mask) raster_config_se |= RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT; - } else { + else raster_config_se |= RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR0__SHIFT; - } } if (rb_per_se > 2) { @@ -1444,13 +1232,12 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev, if (!rb0_mask || !rb1_mask) { raster_config_se &= ~PA_SC_RASTER_CONFIG__RB_MAP_PKR1_MASK; - if (!rb0_mask) { + if (!rb0_mask) raster_config_se |= RASTER_CONFIG_RB_MAP_3 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT; - } else { + else raster_config_se |= RASTER_CONFIG_RB_MAP_0 << PA_SC_RASTER_CONFIG__RB_MAP_PKR1__SHIFT; - } } } } @@ -1479,8 +1266,9 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff); data = gfx_v6_0_get_rb_active_bitmap(adev); - active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * - rb_bitmap_width_per_sh); + active_rbs |= data << + ((i * adev->gfx.config.max_sh_per_se + j) * + rb_bitmap_width_per_sh); } } gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); @@ -1494,13 +1282,12 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) gfx_v6_0_raster_config(adev, &raster_config); if (!adev->gfx.config.backend_enable_mask || - adev->gfx.config.num_rbs >= num_rb_pipes) { + adev->gfx.config.num_rbs >= num_rb_pipes) WREG32(mmPA_SC_RASTER_CONFIG, raster_config); - } else { + else gfx_v6_0_write_harvested_raster_configs(adev, raster_config, adev->gfx.config.backend_enable_mask, num_rb_pipes); - } /* cache the values for userspace */ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { @@ -1517,11 +1304,6 @@ static void gfx_v6_0_setup_rb(struct amdgpu_device *adev) gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); } -/* -static void gmc_v6_0_init_compute_vmid(struct amdgpu_device *adev) -{ -} -*/ static void gfx_v6_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev, u32 bitmap) @@ -1544,7 +1326,7 @@ static u32 gfx_v6_0_get_cu_enabled(struct amdgpu_device *adev) data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); - mask = gfx_v6_0_create_bitmask(adev->gfx.config.max_cu_per_sh); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; } @@ -1688,7 +1470,8 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); - mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); + adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); + mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; adev->gfx.config.mem_max_burst_length_bytes = 256; @@ -3719,6 +3502,12 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; unsigned disable_masks[4 * 2]; + u32 ao_cu_num; + + if (adev->flags & AMD_IS_APU) + ao_cu_num = 2; + else + ao_cu_num = adev->gfx.config.max_cu_per_sh; memset(cu_info, 0, sizeof(*cu_info)); @@ -3737,9 +3526,9 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) bitmap = gfx_v6_0_get_cu_enabled(adev); cu_info->bitmap[i][j] = bitmap; - for (k = 0; k < 16; k++) { + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) { if (bitmap & mask) { - if (counter < 2) + if (counter < ao_cu_num) ao_bitmap |= mask; counter ++; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index f7414cabd4ff..ec754288f146 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -27,6 +27,7 @@ #include "amdgpu_gfx.h" #include "cikd.h" #include "cik.h" +#include "cik_structs.h" #include "atom.h" #include "amdgpu_ucode.h" #include "clearstate_ci.h" @@ -48,7 +49,7 @@ #include "oss/oss_2_0_sh_mask.h" #define GFX7_NUM_GFX_RINGS 1 -#define GFX7_NUM_COMPUTE_RINGS 8 +#define GFX7_MEC_HPD_SIZE 2048 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev); @@ -1607,19 +1608,6 @@ static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, } /** - * gfx_v7_0_create_bitmask - create a bitmask - * - * @bit_width: length of the mask - * - * create a variable length bit mask (CIK). - * Returns the bitmask. - */ -static u32 gfx_v7_0_create_bitmask(u32 bit_width) -{ - return (u32)((1ULL << bit_width) - 1); -} - -/** * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs * * @adev: amdgpu_device pointer @@ -1637,8 +1625,8 @@ static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev) data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; - mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se / - adev->gfx.config.max_sh_per_se); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); return (~data) & mask; } @@ -1837,7 +1825,7 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) /** * gmc_v7_0_init_compute_vmid - gart enable * - * @rdev: amdgpu_device pointer + * @adev: amdgpu_device pointer * * Initialize compute vmid sh_mem registers * @@ -2821,26 +2809,23 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev) } } -#define MEC_HPD_SIZE 2048 - static int gfx_v7_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; + size_t mec_hpd_size; - /* - * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total - * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total - * Nonetheless, we assign only 1 pipe because all other pipes will - * be handled by KFD - */ - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe = 1; - adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + /* allocate space for ALL pipes (even the ones we don't own) */ + mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec + * GFX7_MEC_HPD_SIZE * 2; if (adev->gfx.mec.hpd_eop_obj == NULL) { r = amdgpu_bo_create(adev, - adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2, + mec_hpd_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); @@ -2870,7 +2855,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) } /* clear memory. Not sure if this is required or not */ - memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2); + memset(hpd, 0, mec_hpd_size); amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); @@ -2917,275 +2902,296 @@ struct hqd_registers u32 cp_mqd_control; }; -struct bonaire_mqd +static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, + int mec, int pipe) { - u32 header; - u32 dispatch_initiator; - u32 dimensions[3]; - u32 start_idx[3]; - u32 num_threads[3]; - u32 pipeline_stat_enable; - u32 perf_counter_enable; - u32 pgm[2]; - u32 tba[2]; - u32 tma[2]; - u32 pgm_rsrc[2]; - u32 vmid; - u32 resource_limits; - u32 static_thread_mgmt01[2]; - u32 tmp_ring_size; - u32 static_thread_mgmt23[2]; - u32 restart[3]; - u32 thread_trace_enable; - u32 reserved1; - u32 user_data[16]; - u32 vgtcs_invoke_count[2]; - struct hqd_registers queue_state; - u32 dequeue_cntr; - u32 interrupt_queue[64]; -}; - -/** - * gfx_v7_0_cp_compute_resume - setup the compute queue registers - * - * @adev: amdgpu_device pointer - * - * Program the compute queues and test them to make sure they - * are working. - * Returns 0 for success, error for failure. - */ -static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) -{ - int r, i, j; - u32 tmp; - bool use_doorbell = true; - u64 hqd_gpu_addr; - u64 mqd_gpu_addr; u64 eop_gpu_addr; - u64 wb_gpu_addr; - u32 *buf; - struct bonaire_mqd *mqd; - struct amdgpu_ring *ring; - - /* fix up chicken bits */ - tmp = RREG32(mmCP_CPF_DEBUG); - tmp |= (1 << 23); - WREG32(mmCP_CPF_DEBUG, tmp); + u32 tmp; + size_t eop_offset = (mec * adev->gfx.mec.num_pipe_per_mec + pipe) + * GFX7_MEC_HPD_SIZE * 2; - /* init the pipes */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) { - int me = (i < 4) ? 1 : 2; - int pipe = (i < 4) ? i : (i - 4); + eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset; - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); + cik_srbm_select(adev, mec + 1, pipe, 0, 0); - cik_srbm_select(adev, me, pipe, 0, 0); + /* write the EOP addr */ + WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); + WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); - /* write the EOP addr */ - WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); - WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); + /* set the VMID assigned */ + WREG32(mmCP_HPD_EOP_VMID, 0); - /* set the VMID assigned */ - WREG32(mmCP_HPD_EOP_VMID, 0); + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = RREG32(mmCP_HPD_EOP_CONTROL); + tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK; + tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8); + WREG32(mmCP_HPD_EOP_CONTROL, tmp); - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32(mmCP_HPD_EOP_CONTROL); - tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK; - tmp |= order_base_2(MEC_HPD_SIZE / 8); - WREG32(mmCP_HPD_EOP_CONTROL, tmp); - } cik_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); +} - /* init the queues. Just two for now. */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; +static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev) +{ + int i; - if (ring->mqd_obj == NULL) { - r = amdgpu_bo_create(adev, - sizeof(struct bonaire_mqd), - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, - &ring->mqd_obj); - if (r) { - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); - return r; - } + /* disable the queue if it's active */ + if (RREG32(mmCP_HQD_ACTIVE) & 1) { + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); } - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - gfx_v7_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, - &mqd_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); - gfx_v7_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); - if (r) { - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); - gfx_v7_0_cp_compute_fini(adev); - return r; - } + if (i == adev->usec_timeout) + return -ETIMEDOUT; - /* init the mqd struct */ - memset(buf, 0, sizeof(struct bonaire_mqd)); + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); + WREG32(mmCP_HQD_PQ_RPTR, 0); + WREG32(mmCP_HQD_PQ_WPTR, 0); + } - mqd = (struct bonaire_mqd *)buf; - mqd->header = 0xC0310800; - mqd->static_thread_mgmt01[0] = 0xffffffff; - mqd->static_thread_mgmt01[1] = 0xffffffff; - mqd->static_thread_mgmt23[0] = 0xffffffff; - mqd->static_thread_mgmt23[1] = 0xffffffff; + return 0; +} - mutex_lock(&adev->srbm_mutex); - cik_srbm_select(adev, ring->me, - ring->pipe, - ring->queue, 0); +static void gfx_v7_0_mqd_init(struct amdgpu_device *adev, + struct cik_mqd *mqd, + uint64_t mqd_gpu_addr, + struct amdgpu_ring *ring) +{ + u64 hqd_gpu_addr; + u64 wb_gpu_addr; - /* disable wptr polling */ - tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); - tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK; - WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); + /* init the mqd struct */ + memset(mqd, 0, sizeof(struct cik_mqd)); - /* enable doorbell? */ - mqd->queue_state.cp_hqd_pq_doorbell_control = - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - if (use_doorbell) - mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; - else - mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->queue_state.cp_hqd_pq_doorbell_control); - - /* disable the queue if it's active */ - mqd->queue_state.cp_hqd_dequeue_request = 0; - mqd->queue_state.cp_hqd_pq_rptr = 0; - mqd->queue_state.cp_hqd_pq_wptr= 0; - if (RREG32(mmCP_HQD_ACTIVE) & 1) { - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request); - WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); - } + mqd->header = 0xC0310800; + mqd->compute_static_thread_mgmt_se0 = 0xffffffff; + mqd->compute_static_thread_mgmt_se1 = 0xffffffff; + mqd->compute_static_thread_mgmt_se2 = 0xffffffff; + mqd->compute_static_thread_mgmt_se3 = 0xffffffff; - /* set the pointer to the MQD */ - mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc; - mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); - WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr); - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi); - /* set MQD vmid to 0 */ - mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL); - mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; - WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control); - - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - hqd_gpu_addr = ring->gpu_addr >> 8; - mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr; - mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); - WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base); - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi); - - /* set up the HQD, this is similar to CP_RB0_CNTL */ - mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); - mqd->queue_state.cp_hqd_pq_control &= - ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | - CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); - - mqd->queue_state.cp_hqd_pq_control |= - order_base_2(ring->ring_size / 8); - mqd->queue_state.cp_hqd_pq_control |= - (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); + /* enable doorbell? */ + mqd->cp_hqd_pq_doorbell_control = + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); + if (ring->use_doorbell) + mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; + else + mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; + + /* set the pointer to the MQD */ + mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; + mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); + + /* set MQD vmid to 0 */ + mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL); + mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK; + + /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ + hqd_gpu_addr = ring->gpu_addr >> 8; + mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; + mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); + + /* set up the HQD, this is similar to CP_RB0_CNTL */ + mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL); + mqd->cp_hqd_pq_control &= + ~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK | + CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK); + + mqd->cp_hqd_pq_control |= + order_base_2(ring->ring_size / 8); + mqd->cp_hqd_pq_control |= + (order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8); #ifdef __BIG_ENDIAN - mqd->queue_state.cp_hqd_pq_control |= - 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; + mqd->cp_hqd_pq_control |= + 2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT; #endif - mqd->queue_state.cp_hqd_pq_control &= - ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | + mqd->cp_hqd_pq_control &= + ~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK | CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK | CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK); - mqd->queue_state.cp_hqd_pq_control |= - CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | - CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ - WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control); - - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc; - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, - mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi); - - /* set the wb address wether it's enabled or not */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc; - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi = - upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->queue_state.cp_hqd_pq_rptr_report_addr); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi); - - /* enable the doorbell if requested */ - if (use_doorbell) { - mqd->queue_state.cp_hqd_pq_doorbell_control = - RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - mqd->queue_state.cp_hqd_pq_doorbell_control &= - ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK; - mqd->queue_state.cp_hqd_pq_doorbell_control |= - (ring->doorbell_index << - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); - mqd->queue_state.cp_hqd_pq_doorbell_control |= - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; - mqd->queue_state.cp_hqd_pq_doorbell_control &= - ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | - CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); + mqd->cp_hqd_pq_control |= + CP_HQD_PQ_CONTROL__PRIV_STATE_MASK | + CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */ - } else { - mqd->queue_state.cp_hqd_pq_doorbell_control = 0; + /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; + + /* set the wb address wether it's enabled or not */ + wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); + mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; + mqd->cp_hqd_pq_rptr_report_addr_hi = + upper_32_bits(wb_gpu_addr) & 0xffff; + + /* enable the doorbell if requested */ + if (ring->use_doorbell) { + mqd->cp_hqd_pq_doorbell_control = + RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); + mqd->cp_hqd_pq_doorbell_control &= + ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK; + mqd->cp_hqd_pq_doorbell_control |= + (ring->doorbell_index << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT); + mqd->cp_hqd_pq_doorbell_control |= + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK; + mqd->cp_hqd_pq_doorbell_control &= + ~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK | + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK); + + } else { + mqd->cp_hqd_pq_doorbell_control = 0; + } + + /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ + ring->wptr = 0; + mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); + mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + + /* set the vmid for the queue */ + mqd->cp_hqd_vmid = 0; + + /* defaults */ + mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL); + mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR); + mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI); + mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR); + mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE); + mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD); + mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE); + mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO); + mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI); + mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO); + mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI); + mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); + mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); + mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); + mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR); + + /* activate the queue */ + mqd->cp_hqd_active = 1; +} + +int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd) +{ + uint32_t tmp; + uint32_t mqd_reg; + uint32_t *mqd_data; + + /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_MQD_CONTROL */ + mqd_data = &mqd->cp_mqd_base_addr_lo; + + /* disable wptr polling */ + tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); + tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); + WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); + + /* program all HQD registers */ + for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_MQD_CONTROL; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); + + /* activate the HQD */ + for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); + + return 0; +} + +static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id) +{ + int r; + u64 mqd_gpu_addr; + struct cik_mqd *mqd; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + if (ring->mqd_obj == NULL) { + r = amdgpu_bo_create(adev, + sizeof(struct cik_mqd), + PAGE_SIZE, true, + AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, + &ring->mqd_obj); + if (r) { + dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); + return r; } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->queue_state.cp_hqd_pq_doorbell_control); + } + + r = amdgpu_bo_reserve(ring->mqd_obj, false); + if (unlikely(r != 0)) + goto out; - /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - ring->wptr = 0; - mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr); - mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); + r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, + &mqd_gpu_addr); + if (r) { + dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); + goto out_unreserve; + } + r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd); + if (r) { + dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); + goto out_unreserve; + } - /* set the vmid for the queue */ - mqd->queue_state.cp_hqd_vmid = 0; - WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid); + mutex_lock(&adev->srbm_mutex); + cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - /* activate the queue */ - mqd->queue_state.cp_hqd_active = 1; - WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active); + gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring); + gfx_v7_0_mqd_deactivate(adev); + gfx_v7_0_mqd_commit(adev, mqd); - cik_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); + cik_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); - amdgpu_bo_kunmap(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); + amdgpu_bo_kunmap(ring->mqd_obj); +out_unreserve: + amdgpu_bo_unreserve(ring->mqd_obj); +out: + return 0; +} - ring->ready = true; +/** + * gfx_v7_0_cp_compute_resume - setup the compute queue registers + * + * @adev: amdgpu_device pointer + * + * Program the compute queues and test them to make sure they + * are working. + * Returns 0 for success, error for failure. + */ +static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) +{ + int r, i, j; + u32 tmp; + struct amdgpu_ring *ring; + + /* fix up chicken bits */ + tmp = RREG32(mmCP_CPF_DEBUG); + tmp |= (1 << 23); + WREG32(mmCP_CPF_DEBUG, tmp); + + /* init all pipes (even the ones we don't own) */ + for (i = 0; i < adev->gfx.mec.num_mec; i++) + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) + gfx_v7_0_compute_pipe_init(adev, i, j); + + /* init the queues */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + r = gfx_v7_0_compute_queue_init(adev, i); + if (r) { + gfx_v7_0_cp_compute_fini(adev); + return r; + } } gfx_v7_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - + ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) ring->ready = false; @@ -3797,6 +3803,9 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) gfx_v7_0_update_rlc(adev, tmp); data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK; + if (orig != data) + WREG32(mmRLC_CGCG_CGLS_CTRL, data); + } else { gfx_v7_0_enable_gui_idle_interrupt(adev, false); @@ -3806,11 +3815,11 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable) RREG32(mmCB_CGTT_SCLK_CTRL); data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); - } - - if (orig != data) - WREG32(mmRLC_CGCG_CGLS_CTRL, data); + if (orig != data) + WREG32(mmRLC_CGCG_CGLS_CTRL, data); + gfx_v7_0_enable_gui_idle_interrupt(adev, true); + } } static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable) @@ -4089,7 +4098,7 @@ static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev) data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; - mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_cu_per_sh); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); return (~data) & mask; } @@ -4470,7 +4479,7 @@ static int gfx_v7_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS; + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; adev->gfx.funcs = &gfx_v7_0_gfx_funcs; adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs; gfx_v7_0_set_ring_funcs(adev); @@ -4662,11 +4671,57 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.gb_addr_config = gb_addr_config; } +static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int mec, int pipe, int queue) +{ + int r; + unsigned irq_type; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type); + if (r) + return r; + + + return 0; +} + static int gfx_v7_0_sw_init(void *handle) { struct amdgpu_ring *ring; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i, r; + int i, j, k, r, ring_id; + + switch (adev->asic_type) { + case CHIP_KAVERI: + adev->gfx.mec.num_mec = 2; + break; + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_KABINI: + case CHIP_MULLINS: + default: + adev->gfx.mec.num_mec = 1; + break; + } + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; /* EOP Event */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); @@ -4716,29 +4771,23 @@ static int gfx_v7_0_sw_init(void *handle) return r; } - /* set up the compute queues */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - unsigned irq_type; - - /* max 32 queues per MEC */ - if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { - DRM_ERROR("Too many (%d) compute rings!\n", i); - break; + /* set up the compute queues - allocate horizontally across pipes */ + ring_id = 0; + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v7_0_compute_ring_init(adev, + ring_id, + i, k, j); + if (r) + return r; + + ring_id++; + } } - ring = &adev->gfx.compute_ring[i]; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; - ring->me = 1; /* first MEC */ - ring->pipe = i / 8; - ring->queue = i % 8; - sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); - irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; - /* type-2 packets are deprecated on MEC, use type-3 instead */ - r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, irq_type); - if (r) - return r; } /* reserve GDS, GWS and OA resource for gfx */ @@ -4969,8 +5018,8 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, u32 mec_int_cntl, mec_int_cntl_reg; /* - * amdgpu controls only pipe 0 of MEC1. That's why this function only - * handles the setting of interrupts for this specific pipe. All other + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other * pipes' interrupts are set by amdkfd. */ @@ -4979,6 +5028,15 @@ static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, case 0: mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; break; + case 1: + mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; + break; + case 2: + mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; + break; + case 3: + mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; + break; default: DRM_DEBUG("invalid pipe %d\n", pipe); return; @@ -5336,6 +5394,12 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; unsigned disable_masks[4 * 2]; + u32 ao_cu_num; + + if (adev->flags & AMD_IS_APU) + ao_cu_num = 2; + else + ao_cu_num = adev->gfx.config.max_cu_per_sh; memset(cu_info, 0, sizeof(*cu_info)); @@ -5354,9 +5418,9 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) bitmap = gfx_v7_0_get_cu_active_bitmap(adev); cu_info->bitmap[i][j] = bitmap; - for (k = 0; k < 16; k ++) { + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { - if (counter < 2) + if (counter < ao_cu_num) ao_bitmap |= mask; counter ++; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h index 2f5164cc0e53..6fb9c1524691 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h @@ -29,4 +29,9 @@ extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block; extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block; extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block; +struct amdgpu_device; +struct cik_mqd; + +int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 404d12785853..142924212b43 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -52,7 +52,7 @@ #include "smu/smu_7_1_3_d.h" #define GFX8_NUM_GFX_RINGS 1 -#define GFX8_NUM_COMPUTE_RINGS 8 +#define GFX8_MEC_HPD_SIZE 2048 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 @@ -657,10 +657,8 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev); static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev); static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev); static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev); -static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t addr); -static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t addr); -static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev); -static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev); +static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring); +static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring); static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev) { @@ -859,7 +857,8 @@ err1: } -static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) { +static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) +{ release_firmware(adev->gfx.pfp_fw); adev->gfx.pfp_fw = NULL; release_firmware(adev->gfx.me_fw); @@ -941,12 +940,6 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - /* chain ib ucode isn't formal released, just disable it by far - * TODO: when ucod ready we should use ucode version to judge if - * chain-ib support or not. - */ - adev->virt.chained_ib_support = false; - adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); @@ -960,6 +953,17 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + /* + * Support for MCBP/Virtualization in combination with chained IBs is + * formal released on feature version #46 + */ + if (adev->gfx.ce_feature_version >= 46 && + adev->gfx.pfp_feature_version >= 46) { + adev->virt.chained_ib_support = true; + DRM_INFO("Chained IB support enabled!\n"); + } else + adev->virt.chained_ib_support = false; + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) @@ -1373,64 +1377,22 @@ static void gfx_v8_0_mec_fini(struct amdgpu_device *adev) } } -static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - int r = 0; - - r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); - if (r) - return r; - - ring->adev = NULL; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_KIQ; - if (adev->gfx.mec2_fw) { - ring->me = 2; - ring->pipe = 0; - } else { - ring->me = 1; - ring->pipe = 1; - } - - ring->queue = 0; - ring->eop_gpu_addr = kiq->eop_gpu_addr; - sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); - r = amdgpu_ring_init(adev, ring, 1024, - irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); - if (r) - dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); - - return r; -} -static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq) -{ - amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); - amdgpu_ring_fini(ring); -} - -#define MEC_HPD_SIZE 2048 - static int gfx_v8_0_mec_init(struct amdgpu_device *adev) { int r; u32 *hpd; + size_t mec_hpd_size; - /* - * we assign only 1 pipe because all other pipes will - * be handled by KFD - */ - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe = 1; - adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + + mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; if (adev->gfx.mec.hpd_eop_obj == NULL) { r = amdgpu_bo_create(adev, - adev->gfx.mec.num_queue * MEC_HPD_SIZE, + mec_hpd_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); @@ -1459,7 +1421,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) return r; } - memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE); + memset(hpd, 0, mec_hpd_size); amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj); amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj); @@ -1467,38 +1429,6 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) return 0; } -static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - - amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); -} - -static int gfx_v8_0_kiq_init(struct amdgpu_device *adev) -{ - int r; - u32 *hpd; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - - r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, - &kiq->eop_gpu_addr, (void **)&hpd); - if (r) { - dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); - return r; - } - - memset(hpd, 0, MEC_HPD_SIZE); - - r = amdgpu_bo_reserve(kiq->eop_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); - amdgpu_bo_kunmap(kiq->eop_obj); - amdgpu_bo_unreserve(kiq->eop_obj); - - return 0; -} - static const u32 vgpr_init_compute_shader[] = { 0x7e000209, 0x7e020208, @@ -1907,46 +1837,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.max_tile_pipes = 2; adev->gfx.config.max_sh_per_se = 1; adev->gfx.config.max_backends_per_se = 2; - - switch (adev->pdev->revision) { - case 0xc4: - case 0x84: - case 0xc8: - case 0xcc: - case 0xe1: - case 0xe3: - /* B10 */ - adev->gfx.config.max_cu_per_sh = 8; - break; - case 0xc5: - case 0x81: - case 0x85: - case 0xc9: - case 0xcd: - case 0xe2: - case 0xe4: - /* B8 */ - adev->gfx.config.max_cu_per_sh = 6; - break; - case 0xc6: - case 0xca: - case 0xce: - case 0x88: - case 0xe6: - /* B6 */ - adev->gfx.config.max_cu_per_sh = 6; - break; - case 0xc7: - case 0x87: - case 0xcb: - case 0xe5: - case 0x89: - default: - /* B4 */ - adev->gfx.config.max_cu_per_sh = 4; - break; - } - + adev->gfx.config.max_cu_per_sh = 8; adev->gfx.config.max_texture_channel_caches = 2; adev->gfx.config.max_gprs = 256; adev->gfx.config.max_gs_threads = 32; @@ -1963,35 +1854,7 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.max_tile_pipes = 2; adev->gfx.config.max_sh_per_se = 1; adev->gfx.config.max_backends_per_se = 1; - - switch (adev->pdev->revision) { - case 0x80: - case 0x81: - case 0xc0: - case 0xc1: - case 0xc2: - case 0xc4: - case 0xc8: - case 0xc9: - case 0xd6: - case 0xda: - case 0xe9: - case 0xea: - adev->gfx.config.max_cu_per_sh = 3; - break; - case 0x83: - case 0xd0: - case 0xd1: - case 0xd2: - case 0xd4: - case 0xdb: - case 0xe1: - case 0xe2: - default: - adev->gfx.config.max_cu_per_sh = 2; - break; - } - + adev->gfx.config.max_cu_per_sh = 3; adev->gfx.config.max_texture_channel_caches = 2; adev->gfx.config.max_gprs = 256; adev->gfx.config.max_gs_threads = 16; @@ -2083,13 +1946,67 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) return 0; } +static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int mec, int pipe, int queue) +{ + int r; + unsigned irq_type; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + (ring_id * GFX8_MEC_HPD_SIZE); + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type); + if (r) + return r; + + + return 0; +} + static int gfx_v8_0_sw_init(void *handle) { - int i, r; + int i, j, k, r, ring_id; struct amdgpu_ring *ring; struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + switch (adev->asic_type) { + case CHIP_FIJI: + case CHIP_TONGA: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_POLARIS10: + case CHIP_CARRIZO: + adev->gfx.mec.num_mec = 2; + break; + case CHIP_TOPAZ: + case CHIP_STONEY: + default: + adev->gfx.mec.num_mec = 1; + break; + } + + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + /* KIQ event */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq); if (r) @@ -2151,49 +2068,41 @@ static int gfx_v8_0_sw_init(void *handle) return r; } - /* set up the compute queues */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - unsigned irq_type; - /* max 32 queues per MEC */ - if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { - DRM_ERROR("Too many (%d) compute rings!\n", i); - break; + /* set up the compute queues - allocate horizontally across pipes */ + ring_id = 0; + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v8_0_compute_ring_init(adev, + ring_id, + i, k, j); + if (r) + return r; + + ring_id++; + } } - ring = &adev->gfx.compute_ring[i]; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i; - ring->me = 1; /* first MEC */ - ring->pipe = i / 8; - ring->queue = i % 8; - ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); - sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); - irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; - /* type-2 packets are deprecated on MEC, use type-3 instead */ - r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, - irq_type); - if (r) - return r; } - if (amdgpu_sriov_vf(adev)) { - r = gfx_v8_0_kiq_init(adev); - if (r) { - DRM_ERROR("Failed to init KIQ BOs!\n"); - return r; - } + r = amdgpu_gfx_kiq_init(adev, GFX8_MEC_HPD_SIZE); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; + } - kiq = &adev->gfx.kiq; - r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); - if (r) - return r; + kiq = &adev->gfx.kiq; + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + if (r) + return r; - /* create MQD for all compute queues as wel as KIQ for SRIOV case */ - r = gfx_v8_0_compute_mqd_sw_init(adev); - if (r) - return r; - } + /* create MQD for all compute queues as well as KIQ for SRIOV case */ + r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd)); + if (r) + return r; /* reserve GDS, GWS and OA resource for gfx */ r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, @@ -2237,11 +2146,9 @@ static int gfx_v8_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - if (amdgpu_sriov_vf(adev)) { - gfx_v8_0_compute_mqd_sw_fini(adev); - gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); - gfx_v8_0_kiq_fini(adev); - } + amdgpu_gfx_compute_mqd_sw_fini(adev); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); + amdgpu_gfx_kiq_fini(adev); gfx_v8_0_mec_fini(adev); gfx_v8_0_rlc_fini(adev); @@ -3594,11 +3501,6 @@ static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, WREG32(mmGRBM_GFX_INDEX, data); } -static u32 gfx_v8_0_create_bitmask(u32 bit_width) -{ - return (u32)((1ULL << bit_width) - 1); -} - static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) { u32 data, mask; @@ -3608,8 +3510,8 @@ static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE); - mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / - adev->gfx.config.max_sh_per_se); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); return (~data) & mask; } @@ -3823,7 +3725,7 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) /** * gfx_v8_0_init_compute_vmid - gart enable * - * @rdev: amdgpu_device pointer + * @adev: amdgpu_device pointer * * Initialize compute vmid sh_mem registers * @@ -4481,6 +4383,39 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) return 0; } +static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + u32 tmp; + /* no gfx doorbells on iceland */ + if (adev->asic_type == CHIP_TOPAZ) + return; + + tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); + + if (ring->use_doorbell) { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_OFFSET, ring->doorbell_index); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_HIT, 0); + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, + DOORBELL_EN, 1); + } else { + tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0); + } + + WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); + + if (adev->flags & AMD_IS_APU) + return; + + tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, + DOORBELL_RANGE_LOWER, + AMDGPU_DOORBELL_GFX_RING0); + WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); + + WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, + CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); +} static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) { @@ -4528,34 +4463,7 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) WREG32(mmCP_RB0_BASE, rb_addr); WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr)); - /* no gfx doorbells on iceland */ - if (adev->asic_type != CHIP_TOPAZ) { - tmp = RREG32(mmCP_RB_DOORBELL_CONTROL); - if (ring->use_doorbell) { - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_HIT, 0); - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_EN, 1); - } else { - tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, - DOORBELL_EN, 0); - } - WREG32(mmCP_RB_DOORBELL_CONTROL, tmp); - - if (adev->asic_type == CHIP_TONGA) { - tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, - DOORBELL_RANGE_LOWER, - AMDGPU_DOORBELL_GFX_RING0); - WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); - - WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, - CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK); - } - - } - + gfx_v8_0_set_cpg_door_bell(adev, ring); /* start the ring */ amdgpu_ring_clear_ring(ring); gfx_v8_0_cp_gfx_start(adev); @@ -4628,29 +4536,6 @@ static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) return 0; } -static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev) -{ - int i, r; - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - if (ring->mqd_obj) { - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); - - amdgpu_bo_unpin(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); - - amdgpu_bo_unref(&ring->mqd_obj); - ring->mqd_obj = NULL; - ring->mqd_ptr = NULL; - ring->mqd_gpu_addr = 0; - } - } -} - /* KIQ functions */ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) { @@ -4666,45 +4551,161 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) WREG32(mmRLC_CP_SCHEDULERS, tmp); } -static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring) +static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) { - amdgpu_ring_alloc(ring, 8); + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + uint32_t scratch, tmp = 0; + uint64_t queue_mask = 0; + int r, i; + + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) + continue; + + /* This situation may be hit in the future if a new HW + * generation exposes more than 64 queues. If so, the + * definition of queue_mask needs updating */ + if (WARN_ON(i > (sizeof(queue_mask)*8))) { + DRM_ERROR("Invalid KCQ enabled: %d\n", i); + break; + } + + queue_mask |= (1ull << i); + } + + r = amdgpu_gfx_scratch_get(adev, &scratch); + if (r) { + DRM_ERROR("Failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + + r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + amdgpu_gfx_scratch_free(adev, scratch); + return r; + } /* set resources */ - amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); - amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ - amdgpu_ring_write(ring, 0); /* queue mask hi */ - amdgpu_ring_write(ring, 0); /* gws mask lo */ - amdgpu_ring_write(ring, 0); /* gws mask hi */ - amdgpu_ring_write(ring, 0); /* oac mask */ - amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ - amdgpu_ring_commit(ring); - udelay(50); + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ + amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + + /* map queues */ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); + amdgpu_ring_write(kiq_ring, + PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) | + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */ + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); + } + /* write to scratch for completion */ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); + amdgpu_ring_write(kiq_ring, 0xDEADBEEF); + amdgpu_ring_commit(kiq_ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i >= adev->usec_timeout) { + DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + amdgpu_gfx_scratch_free(adev, scratch); + + return r; } -static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring, - struct amdgpu_ring *ring) +static int gfx_v8_0_kiq_kcq_disable(struct amdgpu_device *adev) { - struct amdgpu_device *adev = kiq_ring->adev; - uint64_t mqd_addr, wptr_addr; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + uint32_t scratch, tmp = 0; + int r, i; - mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); - wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - amdgpu_ring_alloc(kiq_ring, 8); + r = amdgpu_gfx_scratch_get(adev, &scratch); + if (r) { + DRM_ERROR("Failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ - amdgpu_ring_write(kiq_ring, 0x21010000); - amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) | - (ring->queue << 26) | - (ring->pipe << 29) | - ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */ - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); + r = amdgpu_ring_alloc(kiq_ring, 6 + 3); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + amdgpu_gfx_scratch_free(adev, scratch); + return r; + } + /* unmap queues */ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_ACTION(1)| /* RESET_QUEUES */ + PACKET3_UNMAP_QUEUES_QUEUE_SEL(2)); /* select all queues */ + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + /* write to scratch for completion */ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); + amdgpu_ring_write(kiq_ring, 0xDEADBEEF); amdgpu_ring_commit(kiq_ring); - udelay(50); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i >= adev->usec_timeout) { + DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + amdgpu_gfx_scratch_free(adev, scratch); + + return r; +} + +static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req) +{ + int i, r = 0; + + if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { + WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req); + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + udelay(1); + } + if (i == adev->usec_timeout) + r = -ETIMEDOUT; + } + WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); + WREG32(mmCP_HQD_PQ_RPTR, 0); + WREG32(mmCP_HQD_PQ_WPTR, 0); + + return r; } static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) @@ -4714,6 +4715,9 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; uint32_t tmp; + /* init the mqd struct */ + memset(mqd, 0, sizeof(struct vi_mqd)); + mqd->header = 0xC0310800; mqd->compute_pipelinestat_enable = 0x00000001; mqd->compute_static_thread_mgmt_se0 = 0xffffffff; @@ -4729,7 +4733,7 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ tmp = RREG32(mmCP_HQD_EOP_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, - (order_base_2(MEC_HPD_SIZE / 4) - 1)); + (order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; @@ -4741,11 +4745,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) mqd->cp_hqd_pq_doorbell_control = tmp; - /* disable the queue if it's active */ - mqd->cp_hqd_dequeue_request = 0; - mqd->cp_hqd_pq_rptr = 0; - mqd->cp_hqd_pq_wptr = 0; - /* set the pointer to the MQD */ mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc; mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr); @@ -4815,149 +4814,170 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring) tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); mqd->cp_hqd_persistent_state = tmp; + /* set MTYPE */ + tmp = RREG32(mmCP_HQD_IB_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); + tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3); + mqd->cp_hqd_ib_control = tmp; + + tmp = RREG32(mmCP_HQD_IQ_TIMER); + tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3); + mqd->cp_hqd_iq_timer = tmp; + + tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL); + tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3); + mqd->cp_hqd_ctx_save_control = tmp; + + /* defaults */ + mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR); + mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR); + mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY); + mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY); + mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); + mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO); + mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI); + mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET); + mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE); + mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET); + mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE); + mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS); + mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR); + mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM); + mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES); + /* activate the queue */ mqd->cp_hqd_active = 1; return 0; } -static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring) +int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, + struct vi_mqd *mqd) { - struct amdgpu_device *adev = ring->adev; - struct vi_mqd *mqd = ring->mqd_ptr; - int j; + uint32_t mqd_reg; + uint32_t *mqd_data; + + /* HQD registers extend from mmCP_MQD_BASE_ADDR to mmCP_HQD_ERROR */ + mqd_data = &mqd->cp_mqd_base_addr_lo; /* disable wptr polling */ WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo); - WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi); - - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control); - - /* enable doorbell? */ - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); + /* program all HQD registers */ + for (mqd_reg = mmCP_HQD_VMID; mqd_reg <= mmCP_HQD_EOP_CONTROL; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); - /* disable the queue if it's active */ - if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) - break; - udelay(1); - } - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); + /* Tonga errata: EOP RPTR/WPTR should be left unmodified. + * This is safe since EOP RPTR==WPTR for any inactive HQD + * on ASICs that do not support context-save. + * EOP writes/reads can start anywhere in the ring. + */ + if (adev->asic_type != CHIP_TONGA) { + WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr); + WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr); + WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem); } - /* set the pointer to the MQD */ - WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); + for (mqd_reg = mmCP_HQD_EOP_EVENTS; mqd_reg <= mmCP_HQD_ERROR; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); - /* set MQD vmid to 0 */ - WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control); + /* activate the HQD */ + for (mqd_reg = mmCP_MQD_BASE_ADDR; mqd_reg <= mmCP_HQD_ACTIVE; mqd_reg++) + WREG32(mqd_reg, mqd_data[mqd_reg - mmCP_MQD_BASE_ADDR]); - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); + return 0; +} - /* set up the HQD, this is similar to CP_RB0_CNTL */ - WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control); +static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) +{ + int r = 0; + struct amdgpu_device *adev = ring->adev; + struct vi_mqd *mqd = ring->mqd_ptr; + int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; - /* set the wb address whether it's enabled or not */ - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->cp_hqd_pq_rptr_report_addr_hi); + gfx_v8_0_kiq_setting(ring); - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi); + if (adev->gfx.in_reset) { /* for GPU_RESET case */ + /* reset MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); - /* enable the doorbell if requested */ - if (ring->use_doorbell) { - if ((adev->asic_type == CHIP_CARRIZO) || - (adev->asic_type == CHIP_FIJI) || - (adev->asic_type == CHIP_STONEY)) { - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, - AMDGPU_DOORBELL_KIQ << 2); - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, - AMDGPU_DOORBELL_MEC_RING7 << 2); + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + r = gfx_v8_0_deactivate_hqd(adev, 1); + if (r) { + dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name); + goto out_unlock; } - } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control); - - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); - - /* set the vmid for the queue */ - WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); - - WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state); + gfx_v8_0_mqd_commit(adev, mqd); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else { + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_mqd_init(ring); + r = gfx_v8_0_deactivate_hqd(adev, 1); + if (r) { + dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name); + goto out_unlock; + } + gfx_v8_0_mqd_commit(adev, mqd); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); - /* activate the queue */ - WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + } - if (ring->use_doorbell) - WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); + return r; - return 0; +out_unlock: + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + return r; } -static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring) +static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct vi_mqd *mqd = ring->mqd_ptr; - bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); - int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; - - if (is_kiq) { - gfx_v8_0_kiq_setting(&kiq->ring); - } else { - mqd_idx = ring - &adev->gfx.compute_ring[0]; - } + int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!adev->gfx.in_reset) { - memset((void *)mqd, 0, sizeof(*mqd)); + if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { mutex_lock(&adev->srbm_mutex); vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v8_0_mqd_init(ring); - if (is_kiq) - gfx_v8_0_kiq_init_register(ring); vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); - } else { /* for GPU_RESET case */ + } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); - /* reset ring buffer */ ring->wptr = 0; amdgpu_ring_clear_ring(ring); - - if (is_kiq) { - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - gfx_v8_0_kiq_init_register(ring); - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - } + } else { + amdgpu_ring_clear_ring(ring); } - - if (is_kiq) - gfx_v8_0_kiq_enable(ring); - else - gfx_v8_0_map_queue_enable(&kiq->ring, ring); - return 0; } +static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) +{ + if (adev->asic_type > CHIP_TONGA) { + WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); + WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); + } + /* enable doorbells */ + WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); +} + static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) { struct amdgpu_ring *ring = NULL; @@ -4981,13 +5001,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) if (r) goto done; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; - goto done; - } - for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -4996,272 +5009,41 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); if (!r) { - r = gfx_v8_0_kiq_init_queue(ring); + r = gfx_v8_0_kcq_init_queue(ring); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } amdgpu_bo_unreserve(ring->mqd_obj); if (r) goto done; - - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; } -done: - return r; -} + gfx_v8_0_set_mec_doorbell_range(adev); -static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) -{ - int r, i, j; - u32 tmp; - bool use_doorbell = true; - u64 hqd_gpu_addr; - u64 mqd_gpu_addr; - u64 eop_gpu_addr; - u64 wb_gpu_addr; - u32 *buf; - struct vi_mqd *mqd; - - /* init the queues. */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - if (ring->mqd_obj == NULL) { - r = amdgpu_bo_create(adev, - sizeof(struct vi_mqd), - PAGE_SIZE, true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, - NULL, &ring->mqd_obj); - if (r) { - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - gfx_v8_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, - &mqd_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); - gfx_v8_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); - if (r) { - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); - gfx_v8_0_cp_compute_fini(adev); - return r; - } - - /* init the mqd struct */ - memset(buf, 0, sizeof(struct vi_mqd)); - - mqd = (struct vi_mqd *)buf; - mqd->header = 0xC0310800; - mqd->compute_pipelinestat_enable = 0x00000001; - mqd->compute_static_thread_mgmt_se0 = 0xffffffff; - mqd->compute_static_thread_mgmt_se1 = 0xffffffff; - mqd->compute_static_thread_mgmt_se2 = 0xffffffff; - mqd->compute_static_thread_mgmt_se3 = 0xffffffff; - mqd->compute_misc_reserved = 0x00000003; - - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, - ring->pipe, - ring->queue, 0); - - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); - eop_gpu_addr >>= 8; - - /* write the EOP addr */ - WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr); - WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); - - /* set the VMID assigned */ - WREG32(mmCP_HQD_VMID, 0); - - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32(mmCP_HQD_EOP_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, - (order_base_2(MEC_HPD_SIZE / 4) - 1)); - WREG32(mmCP_HQD_EOP_CONTROL, tmp); - - /* disable wptr polling */ - tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL); - tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0); - WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp); - - mqd->cp_hqd_eop_base_addr_lo = - RREG32(mmCP_HQD_EOP_BASE_ADDR); - mqd->cp_hqd_eop_base_addr_hi = - RREG32(mmCP_HQD_EOP_BASE_ADDR_HI); - - /* enable doorbell? */ - tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - if (use_doorbell) { - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - } else { - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); - } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); - mqd->cp_hqd_pq_doorbell_control = tmp; - - /* disable the queue if it's active */ - mqd->cp_hqd_dequeue_request = 0; - mqd->cp_hqd_pq_rptr = 0; - mqd->cp_hqd_pq_wptr= 0; - if (RREG32(mmCP_HQD_ACTIVE) & 1) { - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); - } - - /* set the pointer to the MQD */ - mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; - mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); - WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); - - /* set MQD vmid to 0 */ - tmp = RREG32(mmCP_MQD_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); - WREG32(mmCP_MQD_CONTROL, tmp); - mqd->cp_mqd_control = tmp; - - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - hqd_gpu_addr = ring->gpu_addr >> 8; - mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; - mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); - WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); - - /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32(mmCP_HQD_PQ_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, - (order_base_2(ring->ring_size / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); -#ifdef __BIG_ENDIAN - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); -#endif - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); - WREG32(mmCP_HQD_PQ_CONTROL, tmp); - mqd->cp_hqd_pq_control = tmp; - - /* set the wb address wether it's enabled or not */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; - mqd->cp_hqd_pq_rptr_report_addr_hi = - upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->cp_hqd_pq_rptr_report_addr_hi); - - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; - mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, - mqd->cp_hqd_pq_wptr_poll_addr_hi); - - /* enable the doorbell if requested */ - if (use_doorbell) { - if ((adev->asic_type == CHIP_CARRIZO) || - (adev->asic_type == CHIP_FIJI) || - (adev->asic_type == CHIP_STONEY) || - (adev->asic_type == CHIP_POLARIS11) || - (adev->asic_type == CHIP_POLARIS10) || - (adev->asic_type == CHIP_POLARIS12)) { - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, - AMDGPU_DOORBELL_KIQ << 2); - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, - AMDGPU_DOORBELL_MEC_RING7 << 2); - } - tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); - mqd->cp_hqd_pq_doorbell_control = tmp; - - } else { - mqd->cp_hqd_pq_doorbell_control = 0; - } - WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->cp_hqd_pq_doorbell_control); - - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - ring->wptr = 0; - mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr); - WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr); - mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR); - - /* set the vmid for the queue */ - mqd->cp_hqd_vmid = 0; - WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid); - - tmp = RREG32(mmCP_HQD_PERSISTENT_STATE); - tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); - WREG32(mmCP_HQD_PERSISTENT_STATE, tmp); - mqd->cp_hqd_persistent_state = tmp; - if (adev->asic_type == CHIP_STONEY || - adev->asic_type == CHIP_POLARIS11 || - adev->asic_type == CHIP_POLARIS10 || - adev->asic_type == CHIP_POLARIS12) { - tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL); - tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1); - WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp); - } - - /* activate the queue */ - mqd->cp_hqd_active = 1; - WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active); - - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - - amdgpu_bo_kunmap(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); - } + r = gfx_v8_0_kiq_kcq_enable(adev); + if (r) + goto done; - if (use_doorbell) { - tmp = RREG32(mmCP_PQ_STATUS); - tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1); - WREG32(mmCP_PQ_STATUS, tmp); + /* Test KIQ */ + ring = &adev->gfx.kiq.ring; + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; } - gfx_v8_0_cp_compute_enable(adev, true); - + /* Test KCQs */ for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - + ring = &adev->gfx.compute_ring[i]; ring->ready = true; r = amdgpu_ring_test_ring(ring); if (r) ring->ready = false; } - return 0; +done: + return r; } static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) @@ -5314,10 +5096,7 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) if (r) return r; - if (amdgpu_sriov_vf(adev)) - r = gfx_v8_0_kiq_resume(adev); - else - r = gfx_v8_0_cp_compute_resume(adev); + r = gfx_v8_0_kiq_resume(adev); if (r) return r; @@ -5359,9 +5138,9 @@ static int gfx_v8_0_hw_fini(void *handle) pr_debug("For SRIOV client, shouldn't do anything.\n"); return 0; } + gfx_v8_0_kiq_kcq_disable(adev); gfx_v8_0_cp_enable(adev, false); gfx_v8_0_rlc_stop(adev); - gfx_v8_0_cp_compute_fini(adev); amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE); @@ -5372,15 +5151,18 @@ static int gfx_v8_0_hw_fini(void *handle) static int gfx_v8_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - + adev->gfx.in_suspend = true; return gfx_v8_0_hw_fini(adev); } static int gfx_v8_0_resume(void *handle) { + int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - return gfx_v8_0_hw_init(adev); + r = gfx_v8_0_hw_init(adev); + adev->gfx.in_suspend = false; + return r; } static bool gfx_v8_0_is_idle(void *handle) @@ -5469,25 +5251,6 @@ static bool gfx_v8_0_check_soft_reset(void *handle) } } -static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - int i; - - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) { - WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, 2); - for (i = 0; i < adev->usec_timeout; i++) { - if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK)) - break; - udelay(1); - } - } - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); -} - static int gfx_v8_0_pre_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -5517,7 +5280,11 @@ static int gfx_v8_0_pre_soft_reset(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - gfx_v8_0_inactive_hqd(adev, ring); + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_deactivate_hqd(adev, 2); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); } /* Disable MEC parsing/prefetching */ gfx_v8_0_cp_compute_enable(adev, false); @@ -5588,18 +5355,6 @@ static int gfx_v8_0_soft_reset(void *handle) return 0; } -static void gfx_v8_0_init_hqd(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - mutex_lock(&adev->srbm_mutex); - vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0); - WREG32(mmCP_HQD_PQ_RPTR, 0); - WREG32(mmCP_HQD_PQ_WPTR, 0); - vi_srbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); -} - static int gfx_v8_0_post_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -5625,9 +5380,13 @@ static int gfx_v8_0_post_soft_reset(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - gfx_v8_0_init_hqd(adev, ring); + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v8_0_deactivate_hqd(adev, 2); + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); } - gfx_v8_0_cp_compute_resume(adev); + gfx_v8_0_kiq_resume(adev); } gfx_v8_0_rlc_start(adev); @@ -5773,7 +5532,7 @@ static int gfx_v8_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS; + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; adev->gfx.funcs = &gfx_v8_0_gfx_funcs; gfx_v8_0_set_ring_funcs(adev); gfx_v8_0_set_irq_funcs(adev); @@ -6265,6 +6024,8 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); if (temp != data) WREG32(mmRLC_CGCG_CGLS_CTRL, data); + /* enable interrupts again for PG */ + gfx_v8_0_enable_gui_idle_interrupt(adev, true); } gfx_v8_0_wait_for_rlc_serdes(adev); @@ -6568,9 +6329,13 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, control |= ib->length_dw | (vm_id << 24); - if (amdgpu_sriov_vf(ring->adev) && ib->flags & AMDGPU_IB_FLAG_PREEMPT) + if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { control |= INDIRECT_BUFFER_PRE_ENB(1); + if (!(ib->flags & AMDGPU_IB_FLAG_CE)) + gfx_v8_0_ring_emit_de_meta(ring); + } + amdgpu_ring_write(ring, header); amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN @@ -6753,8 +6518,7 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) uint32_t dw2 = 0; if (amdgpu_sriov_vf(ring->adev)) - gfx_v8_0_ring_emit_ce_meta_init(ring, - (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); + gfx_v8_0_ring_emit_ce_meta(ring); dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ if (flags & AMDGPU_HAVE_CTX_SWITCH) { @@ -6780,10 +6544,6 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); amdgpu_ring_write(ring, dw2); amdgpu_ring_write(ring, 0); - - if (amdgpu_sriov_vf(ring->adev)) - gfx_v8_0_ring_emit_de_meta_init(ring, - (flags & AMDGPU_VM_DOMAIN) ? AMDGPU_CSA_VADDR : ring->adev->virt.csa_vmid0_addr); } static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) @@ -6813,7 +6573,6 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; } - static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) { struct amdgpu_device *adev = ring->adev; @@ -6851,15 +6610,27 @@ static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, int me, int pipe, enum amdgpu_interrupt_state state) { + u32 mec_int_cntl, mec_int_cntl_reg; + /* - * amdgpu controls only pipe 0 of MEC1. That's why this function only - * handles the setting of interrupts for this specific pipe. All other + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other * pipes' interrupts are set by amdkfd. */ if (me == 1) { switch (pipe) { case 0: + mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL; + break; + case 1: + mec_int_cntl_reg = mmCP_ME1_PIPE1_INT_CNTL; + break; + case 2: + mec_int_cntl_reg = mmCP_ME1_PIPE2_INT_CNTL; + break; + case 3: + mec_int_cntl_reg = mmCP_ME1_PIPE3_INT_CNTL; break; default: DRM_DEBUG("invalid pipe %d\n", pipe); @@ -6870,8 +6641,20 @@ static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, return; } - WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE, - state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); + switch (state) { + case AMDGPU_IRQ_STATE_DISABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + case AMDGPU_IRQ_STATE_ENABLE: + mec_int_cntl = RREG32(mec_int_cntl_reg); + mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK; + WREG32(mec_int_cntl_reg, mec_int_cntl); + break; + default: + break; + } } static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev, @@ -6992,8 +6775,6 @@ static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, { struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); - BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); - switch (type) { case AMDGPU_CP_KIQ_IRQ_DRIVER0: WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, @@ -7023,8 +6804,6 @@ static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, u8 me_id, pipe_id, queue_id; struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); - BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ); - me_id = (entry->ring_id & 0x0c) >> 2; pipe_id = (entry->ring_id & 0x03) >> 0; queue_id = (entry->ring_id & 0x70) >> 4; @@ -7257,7 +7036,7 @@ static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) | RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); - mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask; } @@ -7268,9 +7047,15 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; unsigned disable_masks[4 * 2]; + u32 ao_cu_num; memset(cu_info, 0, sizeof(*cu_info)); + if (adev->flags & AMD_IS_APU) + ao_cu_num = 2; + else + ao_cu_num = adev->gfx.config.max_cu_per_sh; + amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2); mutex_lock(&adev->grbm_idx_mutex); @@ -7286,9 +7071,9 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) bitmap = gfx_v8_0_get_cu_active_bitmap(adev); cu_info->bitmap[i][j] = bitmap; - for (k = 0; k < 16; k ++) { + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { - if (counter < 2) + if (counter < ao_cu_num) ao_bitmap |= mask; counter ++; } @@ -7323,7 +7108,7 @@ const struct amdgpu_ip_block_version gfx_v8_1_ip_block = .funcs = &gfx_v8_0_ip_funcs, }; -static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) +static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring) { uint64_t ce_payload_addr; int cnt_ce; @@ -7333,10 +7118,12 @@ static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t c } ce_payload = {}; if (ring->adev->virt.chained_ib_support) { - ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); + ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + + offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload); cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2; } else { - ce_payload_addr = csa_addr + offsetof(struct vi_gfx_meta_data, ce_payload); + ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 + + offsetof(struct vi_gfx_meta_data, ce_payload); cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2; } @@ -7350,15 +7137,16 @@ static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t c amdgpu_ring_write_multiple(ring, (void *)&ce_payload, cnt_ce - 2); } -static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t csa_addr) +static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring) { - uint64_t de_payload_addr, gds_addr; + uint64_t de_payload_addr, gds_addr, csa_addr; int cnt_de; static union { struct vi_de_ib_state regular; struct vi_de_ib_state_chained_ib chained; } de_payload = {}; + csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096; gds_addr = csa_addr + 4096; if (ring->adev->virt.chained_ib_support) { de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr); @@ -7381,68 +7169,3 @@ static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t c amdgpu_ring_write(ring, upper_32_bits(de_payload_addr)); amdgpu_ring_write_multiple(ring, (void *)&de_payload, cnt_de - 2); } - -/* create MQD for each compute queue */ -static int gfx_v8_0_compute_mqd_sw_init(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = NULL; - int r, i; - - /* create MQD for KIQ */ - ring = &adev->gfx.kiq.ring; - if (!ring->mqd_obj) { - r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, - &ring->mqd_gpu_addr, &ring->mqd_ptr); - if (r) { - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); - return r; - } - - /* prepare MQD backup */ - adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); - if (!adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]) - dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); - } - - /* create MQD for each KCQ */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - if (!ring->mqd_obj) { - r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, - &ring->mqd_gpu_addr, &ring->mqd_ptr); - if (r) { - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); - return r; - } - - /* prepare MQD backup */ - adev->gfx.mec.mqd_backup[i] = kmalloc(sizeof(struct vi_mqd), GFP_KERNEL); - if (!adev->gfx.mec.mqd_backup[i]) - dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name); - } - } - - return 0; -} - -static void gfx_v8_0_compute_mqd_sw_fini(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = NULL; - int i; - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - kfree(adev->gfx.mec.mqd_backup[i]); - amdgpu_bo_free_kernel(&ring->mqd_obj, - &ring->mqd_gpu_addr, - &ring->mqd_ptr); - } - - ring = &adev->gfx.kiq.ring; - kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]); - amdgpu_bo_free_kernel(&ring->mqd_obj, - &ring->mqd_gpu_addr, - &ring->mqd_ptr); -} diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h index 788cc3ab584b..ec3f11fa986c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h @@ -27,4 +27,9 @@ extern const struct amdgpu_ip_block_version gfx_v8_0_ip_block; extern const struct amdgpu_ip_block_version gfx_v8_1_ip_block; +struct amdgpu_device; +struct vi_mqd; + +int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 125b11950071..ba228f613027 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -38,8 +38,17 @@ #include "v9_structs.h" #define GFX9_NUM_GFX_RINGS 1 -#define GFX9_NUM_COMPUTE_RINGS 8 -#define RLCG_UCODE_LOADING_START_ADDRESS 0x2000 +#define GFX9_MEC_HPD_SIZE 2048 +#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +#define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L +#define GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH 34 + +#define mmPWR_MISC_CNTL_STATUS 0x0183 +#define mmPWR_MISC_CNTL_STATUS_BASE_IDX 0 +#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT 0x0 +#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT 0x1 +#define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK 0x00000001L +#define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK 0x00000006L MODULE_FIRMWARE("amdgpu/vega10_ce.bin"); MODULE_FIRMWARE("amdgpu/vega10_pfp.bin"); @@ -48,6 +57,13 @@ MODULE_FIRMWARE("amdgpu/vega10_mec.bin"); MODULE_FIRMWARE("amdgpu/vega10_mec2.bin"); MODULE_FIRMWARE("amdgpu/vega10_rlc.bin"); +MODULE_FIRMWARE("amdgpu/raven_ce.bin"); +MODULE_FIRMWARE("amdgpu/raven_pfp.bin"); +MODULE_FIRMWARE("amdgpu/raven_me.bin"); +MODULE_FIRMWARE("amdgpu/raven_mec.bin"); +MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); +MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); + static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = { {SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), @@ -86,14 +102,27 @@ static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] = static const u32 golden_settings_gc_9_0[] = { - SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00ffeff, 0x00000400, + SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420, + SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000, + SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080, SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, + SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1), 0x0000000f, 0x01000107, SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x4a2c0e68, SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0xb5d3f197, - SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff + SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000, + SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000003ff, + SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080 }; static const u32 golden_settings_gc_9_0_vg10[] = @@ -104,11 +133,47 @@ static const u32 golden_settings_gc_9_0_vg10[] = SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x2a114042, SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0x00008000, 0x00048000, SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, - SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800, - SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1),0x0000000f, 0x00000007 + SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x00001800, 0x00000800 +}; + +static const u32 golden_settings_gc_9_1[] = +{ + SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL), 0xfffdf3cf, 0x00014104, + SOC15_REG_OFFSET(GC, 0, mmCPC_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmCPF_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmCPG_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2), 0xf00fffff, 0x00000420, + SOC15_REG_OFFSET(GC, 0, mmGB_GPU_ID), 0x0000000f, 0x00000000, + SOC15_REG_OFFSET(GC, 0, mmIA_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3), 0x00000003, 0x82400024, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE), 0x3fffffff, 0x00000001, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE), 0x0000ff0f, 0x00000000, + SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_0), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_1), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UTCL1_CNTL_2), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_UTCL1_CNTL), 0x08000000, 0x08000080, + SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX), 0xfffffeef, 0x010b0000, + SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_HI), 0xffffffff, 0x00000000, + SOC15_REG_OFFSET(GC, 0, mmTCP_CHAN_STEER_LO), 0xffffffff, 0x00003120, + SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION), 0x3fff3af3, 0x19200000, + SOC15_REG_OFFSET(GC, 0, mmVGT_GS_MAX_WAVE_ID), 0x00000fff, 0x000000ff, + SOC15_REG_OFFSET(GC, 0, mmWD_UTCL1_CNTL), 0x08000000, 0x08000080 +}; + +static const u32 golden_settings_gc_9_1_rv1[] = +{ + SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL_3), 0x30000000, 0x10000000, + SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), 0xffff77ff, 0x24000042, + SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG_READ), 0xffff77ff, 0x24000042, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE_1), 0xffffffff, 0x04048000, + SOC15_REG_OFFSET(GC, 0, mmPA_SC_MODE_CNTL_1), 0x06000000, 0x06000000, + SOC15_REG_OFFSET(GC, 0, mmRMI_UTCL1_CNTL2), 0x00030000, 0x00020000, + SOC15_REG_OFFSET(GC, 0, mmTD_CNTL), 0x01bd9f33, 0x00000800 }; #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 +#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); @@ -118,6 +183,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) { @@ -130,6 +196,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_gc_9_0_vg10, (const u32)ARRAY_SIZE(golden_settings_gc_9_0_vg10)); break; + case CHIP_RAVEN: + amdgpu_program_register_sequence(adev, + golden_settings_gc_9_1, + (const u32)ARRAY_SIZE(golden_settings_gc_9_1)); + amdgpu_program_register_sequence(adev, + golden_settings_gc_9_1_rv1, + (const u32)ARRAY_SIZE(golden_settings_gc_9_1_rv1)); + break; default: break; } @@ -284,6 +358,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; const struct gfx_firmware_header_v1_0 *cp_hdr; + const struct rlc_firmware_header_v2_0 *rlc_hdr; + unsigned int *tmp = NULL; + unsigned int i = 0; DRM_DEBUG("\n"); @@ -291,6 +368,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) case CHIP_VEGA10: chip_name = "vega10"; break; + case CHIP_RAVEN: + chip_name = "raven"; + break; default: BUG(); } @@ -333,9 +413,46 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) if (err) goto out; err = amdgpu_ucode_validate(adev->gfx.rlc_fw); - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data; - adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); - adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); + rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version); + adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version); + adev->gfx.rlc.save_and_restore_offset = + le32_to_cpu(rlc_hdr->save_and_restore_offset); + adev->gfx.rlc.clear_state_descriptor_offset = + le32_to_cpu(rlc_hdr->clear_state_descriptor_offset); + adev->gfx.rlc.avail_scratch_ram_locations = + le32_to_cpu(rlc_hdr->avail_scratch_ram_locations); + adev->gfx.rlc.reg_restore_list_size = + le32_to_cpu(rlc_hdr->reg_restore_list_size); + adev->gfx.rlc.reg_list_format_start = + le32_to_cpu(rlc_hdr->reg_list_format_start); + adev->gfx.rlc.reg_list_format_separate_start = + le32_to_cpu(rlc_hdr->reg_list_format_separate_start); + adev->gfx.rlc.starting_offsets_start = + le32_to_cpu(rlc_hdr->starting_offsets_start); + adev->gfx.rlc.reg_list_format_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_format_size_bytes); + adev->gfx.rlc.reg_list_size_bytes = + le32_to_cpu(rlc_hdr->reg_list_size_bytes); + adev->gfx.rlc.register_list_format = + kmalloc(adev->gfx.rlc.reg_list_format_size_bytes + + adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL); + if (!adev->gfx.rlc.register_list_format) { + err = -ENOMEM; + goto out; + } + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) + adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); + + adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; + + tmp = (unsigned int *)((uintptr_t)rlc_hdr + + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); + for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) + adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); @@ -447,6 +564,261 @@ out: return err; } +static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) +{ + u32 count = 0; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + /* begin clear state */ + count += 2; + /* context control state */ + count += 3; + + for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) + count += 2 + ext->reg_count; + else + return 0; + } + } + + /* end clear state */ + count += 2; + /* clear state */ + count += 2; + + return count; +} + +static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, + volatile u32 *buffer) +{ + u32 count = 0, i; + const struct cs_section_def *sect = NULL; + const struct cs_extent_def *ext = NULL; + + if (adev->gfx.rlc.cs_data == NULL) + return; + if (buffer == NULL) + return; + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + buffer[count++] = cpu_to_le32(0x80000000); + buffer[count++] = cpu_to_le32(0x80000000); + + for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { + for (ext = sect->section; ext->extent != NULL; ++ext) { + if (sect->id == SECT_CONTEXT) { + buffer[count++] = + cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); + buffer[count++] = cpu_to_le32(ext->reg_index - + PACKET3_SET_CONTEXT_REG_START); + for (i = 0; i < ext->reg_count; i++) + buffer[count++] = cpu_to_le32(ext->extent[i]); + } else { + return; + } + } + } + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); + buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); + + buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); + buffer[count++] = cpu_to_le32(0); +} + +static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) +{ + uint32_t data; + + /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ + WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); + WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7); + WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); + WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16)); + + /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ + WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); + + /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ + WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500); + + mutex_lock(&adev->grbm_idx_mutex); + /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); + + /* set mmRLC_LB_PARAMS = 0x003F_1006 */ + data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); + data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); + data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); + WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); + + /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ + data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); + data &= 0x0000FFFF; + data |= 0x00C00000; + WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); + + /* set RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF */ + WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, 0xFFF); + + /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, + * but used for RLC_LB_CNTL configuration */ + data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; + data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); + data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); + WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); + mutex_unlock(&adev->grbm_idx_mutex); +} + +static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) +{ + WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); +} + +static void rv_init_cp_jump_table(struct amdgpu_device *adev) +{ + const __le32 *fw_data; + volatile u32 *dst_ptr; + int me, i, max_me = 5; + u32 bo_offset = 0; + u32 table_offset, table_size; + + /* write the cp table buffer */ + dst_ptr = adev->gfx.rlc.cp_table_ptr; + for (me = 0; me < max_me; me++) { + if (me == 0) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; + fw_data = (const __le32 *) + (adev->gfx.ce_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } else if (me == 1) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; + fw_data = (const __le32 *) + (adev->gfx.pfp_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } else if (me == 2) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; + fw_data = (const __le32 *) + (adev->gfx.me_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } else if (me == 3) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + fw_data = (const __le32 *) + (adev->gfx.mec_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } else if (me == 4) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; + fw_data = (const __le32 *) + (adev->gfx.mec2_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } + + for (i = 0; i < table_size; i ++) { + dst_ptr[bo_offset + i] = + cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); + } + + bo_offset += table_size; + } +} + +static void gfx_v9_0_rlc_fini(struct amdgpu_device *adev) +{ + /* clear state block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); + + /* jump table block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); +} + +static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) +{ + volatile u32 *dst_ptr; + u32 dws; + const struct cs_section_def *cs_data; + int r; + + adev->gfx.rlc.cs_data = gfx9_cs_data; + + cs_data = adev->gfx.rlc.cs_data; + + if (cs_data) { + /* clear state block */ + adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev); + if (adev->gfx.rlc.clear_state_obj == NULL) { + r = amdgpu_bo_create_kernel(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); + if (r) { + dev_err(adev->dev, + "(%d) failed to create rlc csb bo\n", r); + gfx_v9_0_rlc_fini(adev); + return r; + } + } + /* set up the cs buffer */ + dst_ptr = adev->gfx.rlc.cs_ptr; + gfx_v9_0_get_csb_buffer(adev, dst_ptr); + amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + } + + if (adev->asic_type == CHIP_RAVEN) { + /* TODO: double check the cp_table_size for RV */ + adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ + if (adev->gfx.rlc.cp_table_obj == NULL) { + r = amdgpu_bo_create_kernel(adev, adev->gfx.rlc.cp_table_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); + if (r) { + dev_err(adev->dev, + "(%d) failed to create cp table bo\n", r); + gfx_v9_0_rlc_fini(adev); + return r; + } + } + + rv_init_cp_jump_table(adev); + amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); + + gfx_v9_0_init_lbpw(adev); + } + + return 0; +} + static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) { int r; @@ -473,8 +845,6 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev) } } -#define MEC_HPD_SIZE 2048 - static int gfx_v9_0_mec_init(struct amdgpu_device *adev) { int r; @@ -482,20 +852,19 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) const __le32 *fw_data; unsigned fw_size; u32 *fw; + size_t mec_hpd_size; const struct gfx_firmware_header_v1_0 *mec_hdr; - /* - * we assign only 1 pipe because all other pipes will - * be handled by KFD - */ - adev->gfx.mec.num_mec = 1; - adev->gfx.mec.num_pipe = 1; - adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8; + bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + + /* take ownership of the relevant compute queues */ + amdgpu_gfx_compute_queue_acquire(adev); + mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; if (adev->gfx.mec.hpd_eop_obj == NULL) { r = amdgpu_bo_create(adev, - adev->gfx.mec.num_queue * MEC_HPD_SIZE, + mec_hpd_size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL, &adev->gfx.mec.hpd_eop_obj); @@ -575,131 +944,6 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) return 0; } -static void gfx_v9_0_kiq_fini(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - - amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL); -} - -static int gfx_v9_0_kiq_init(struct amdgpu_device *adev) -{ - int r; - u32 *hpd; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - - r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj, - &kiq->eop_gpu_addr, (void **)&hpd); - if (r) { - dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r); - return r; - } - - memset(hpd, 0, MEC_HPD_SIZE); - - r = amdgpu_bo_reserve(kiq->eop_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r); - amdgpu_bo_kunmap(kiq->eop_obj); - amdgpu_bo_unreserve(kiq->eop_obj); - - return 0; -} - -static int gfx_v9_0_kiq_init_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq; - int r = 0; - - r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); - if (r) - return r; - - ring->adev = NULL; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_KIQ; - if (adev->gfx.mec2_fw) { - ring->me = 2; - ring->pipe = 0; - } else { - ring->me = 1; - ring->pipe = 1; - } - - ring->queue = 0; - ring->eop_gpu_addr = kiq->eop_gpu_addr; - sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue); - r = amdgpu_ring_init(adev, ring, 1024, - irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); - if (r) - dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); - - return r; -} -static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq) -{ - amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs); - amdgpu_ring_fini(ring); -} - -/* create MQD for each compute queue */ -static int gfx_v9_0_compute_mqd_sw_init(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = NULL; - int r, i; - - /* create MQD for KIQ */ - ring = &adev->gfx.kiq.ring; - if (!ring->mqd_obj) { - r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, - &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); - if (r) { - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); - return r; - } - - /*TODO: prepare MQD backup */ - } - - /* create MQD for each KCQ */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - if (!ring->mqd_obj) { - r = amdgpu_bo_create_kernel(adev, sizeof(struct v9_mqd), PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj, - &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); - if (r) { - dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r); - return r; - } - - /* TODO: prepare MQD backup */ - } - } - - return 0; -} - -static void gfx_v9_0_compute_mqd_sw_fini(struct amdgpu_device *adev) -{ - struct amdgpu_ring *ring = NULL; - int i; - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - ring = &adev->gfx.compute_ring[i]; - amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); - } - - ring = &adev->gfx.kiq.ring; - amdgpu_bo_free_kernel(&ring->mqd_obj, &ring->mqd_gpu_addr, (void **)&ring->mqd_ptr); -} - static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) { WREG32_SOC15(GC, 0, mmSQ_IND_INDEX, @@ -770,23 +1014,21 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: - adev->gfx.config.max_shader_engines = 4; - adev->gfx.config.max_cu_per_sh = 16; - adev->gfx.config.max_sh_per_se = 1; - adev->gfx.config.max_backends_per_se = 4; - adev->gfx.config.max_texture_channel_caches = 16; - adev->gfx.config.max_gprs = 256; - adev->gfx.config.max_gs_threads = 32; adev->gfx.config.max_hw_contexts = 8; - adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; - adev->gfx.config.gs_vgt_table_depth = 32; - adev->gfx.config.gs_prim_buffer_depth = 1792; gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN; break; + case CHIP_RAVEN: + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; + break; default: BUG(); break; @@ -1023,13 +1265,61 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) return 0; } +static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + int mec, int pipe, int queue) +{ + int r; + unsigned irq_type; + struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id]; + + ring = &adev->gfx.compute_ring[ring_id]; + + /* mec0 is me1 */ + ring->me = mec + 1; + ring->pipe = pipe; + ring->queue = queue; + + ring->ring_obj = NULL; + ring->use_doorbell = true; + ring->doorbell_index = (AMDGPU_DOORBELL_MEC_RING0 + ring_id) << 1; + ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + + (ring_id * GFX9_MEC_HPD_SIZE); + sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); + + irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + + ring->pipe; + + /* type-2 packets are deprecated on MEC, use type-3 instead */ + r = amdgpu_ring_init(adev, ring, 1024, + &adev->gfx.eop_irq, irq_type); + if (r) + return r; + + + return 0; +} + static int gfx_v9_0_sw_init(void *handle) { - int i, r; + int i, j, k, r, ring_id; struct amdgpu_ring *ring; struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_RAVEN: + adev->gfx.mec.num_mec = 2; + break; + default: + adev->gfx.mec.num_mec = 1; + break; + } + + adev->gfx.mec.num_pipe_per_mec = 4; + adev->gfx.mec.num_queue_per_pipe = 8; + /* KIQ event */ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq); if (r) @@ -1062,6 +1352,12 @@ static int gfx_v9_0_sw_init(void *handle) return r; } + r = gfx_v9_0_rlc_init(adev); + if (r) { + DRM_ERROR("Failed to init rlc BOs!\n"); + return r; + } + r = gfx_v9_0_mec_init(adev); if (r) { DRM_ERROR("Failed to init MEC BOs!\n"); @@ -1081,49 +1377,40 @@ static int gfx_v9_0_sw_init(void *handle) return r; } - /* set up the compute queues */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - unsigned irq_type; - - /* max 32 queues per MEC */ - if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) { - DRM_ERROR("Too many (%d) compute rings!\n", i); - break; + /* set up the compute queues - allocate horizontally across pipes */ + ring_id = 0; + for (i = 0; i < adev->gfx.mec.num_mec; ++i) { + for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { + for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { + if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) + continue; + + r = gfx_v9_0_compute_ring_init(adev, + ring_id, + i, k, j); + if (r) + return r; + + ring_id++; + } } - ring = &adev->gfx.compute_ring[i]; - ring->ring_obj = NULL; - ring->use_doorbell = true; - ring->doorbell_index = (AMDGPU_DOORBELL64_MEC_RING0 + i) << 1; - ring->me = 1; /* first MEC */ - ring->pipe = i / 8; - ring->queue = i % 8; - ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE); - sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); - irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe; - /* type-2 packets are deprecated on MEC, use type-3 instead */ - r = amdgpu_ring_init(adev, ring, 1024, - &adev->gfx.eop_irq, irq_type); - if (r) - return r; } - if (amdgpu_sriov_vf(adev)) { - r = gfx_v9_0_kiq_init(adev); - if (r) { - DRM_ERROR("Failed to init KIQ BOs!\n"); - return r; - } + r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); + if (r) { + DRM_ERROR("Failed to init KIQ BOs!\n"); + return r; + } - kiq = &adev->gfx.kiq; - r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); - if (r) - return r; + kiq = &adev->gfx.kiq; + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + if (r) + return r; - /* create MQD for all compute queues as wel as KIQ for SRIOV case */ - r = gfx_v9_0_compute_mqd_sw_init(adev); - if (r) - return r; - } + /* create MQD for all compute queues as wel as KIQ for SRIOV case */ + r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct v9_mqd)); + if (r) + return r; /* reserve GDS, GWS and OA resource for gfx */ r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, @@ -1170,11 +1457,9 @@ static int gfx_v9_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - if (amdgpu_sriov_vf(adev)) { - gfx_v9_0_compute_mqd_sw_fini(adev); - gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); - gfx_v9_0_kiq_fini(adev); - } + amdgpu_gfx_compute_mqd_sw_fini(adev); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); + amdgpu_gfx_kiq_fini(adev); gfx_v9_0_mec_fini(adev); gfx_v9_0_ngg_fini(adev); @@ -1208,11 +1493,6 @@ static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, data); } -static u32 gfx_v9_0_create_bitmask(u32 bit_width) -{ - return (u32)((1ULL << bit_width) - 1); -} - static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) { u32 data, mask; @@ -1223,8 +1503,8 @@ static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev) data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; - mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_backends_per_se / - adev->gfx.config.max_sh_per_se); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); return (~data) & mask; } @@ -1272,7 +1552,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) sh_mem_config = SH_MEM_ADDRESS_MODE_64 | SH_MEM_ALIGNMENT_MODE_UNALIGNED << - SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; mutex_lock(&adev->srbm_mutex); for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) { @@ -1370,9 +1650,6 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, { u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0); - if (enable) - return; - tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0); tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0); @@ -1381,6 +1658,373 @@ static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp); } +static void gfx_v9_0_init_csb(struct amdgpu_device *adev) +{ + /* csib */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI), + adev->gfx.rlc.clear_state_gpu_addr >> 32); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO), + adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH), + adev->gfx.rlc.clear_state_size); +} + +static void gfx_v9_0_parse_ind_reg_list(int *register_list_format, + int indirect_offset, + int list_size, + int *unique_indirect_regs, + int *unique_indirect_reg_count, + int max_indirect_reg_count, + int *indirect_start_offsets, + int *indirect_start_offsets_count, + int max_indirect_start_offsets_count) +{ + int idx; + bool new_entry = true; + + for (; indirect_offset < list_size; indirect_offset++) { + + if (new_entry) { + new_entry = false; + indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset; + *indirect_start_offsets_count = *indirect_start_offsets_count + 1; + BUG_ON(*indirect_start_offsets_count >= max_indirect_start_offsets_count); + } + + if (register_list_format[indirect_offset] == 0xFFFFFFFF) { + new_entry = true; + continue; + } + + indirect_offset += 2; + + /* look for the matching indice */ + for (idx = 0; idx < *unique_indirect_reg_count; idx++) { + if (unique_indirect_regs[idx] == + register_list_format[indirect_offset]) + break; + } + + if (idx >= *unique_indirect_reg_count) { + unique_indirect_regs[*unique_indirect_reg_count] = + register_list_format[indirect_offset]; + idx = *unique_indirect_reg_count; + *unique_indirect_reg_count = *unique_indirect_reg_count + 1; + BUG_ON(*unique_indirect_reg_count >= max_indirect_reg_count); + } + + register_list_format[indirect_offset] = idx; + } +} + +static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) +{ + int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; + int unique_indirect_reg_count = 0; + + int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}; + int indirect_start_offsets_count = 0; + + int list_size = 0; + int i = 0; + u32 tmp = 0; + + u32 *register_list_format = + kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL); + if (!register_list_format) + return -ENOMEM; + memcpy(register_list_format, adev->gfx.rlc.register_list_format, + adev->gfx.rlc.reg_list_format_size_bytes); + + /* setup unique_indirect_regs array and indirect_start_offsets array */ + gfx_v9_0_parse_ind_reg_list(register_list_format, + GFX9_RLC_FORMAT_DIRECT_REG_LIST_LENGTH, + adev->gfx.rlc.reg_list_format_size_bytes >> 2, + unique_indirect_regs, + &unique_indirect_reg_count, + sizeof(unique_indirect_regs)/sizeof(int), + indirect_start_offsets, + &indirect_start_offsets_count, + sizeof(indirect_start_offsets)/sizeof(int)); + + /* enable auto inc in case it is disabled */ + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); + tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK; + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); + + /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), + RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET); + for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), + adev->gfx.rlc.register_restore[i]); + + /* load direct register */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR), 0); + for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA), + adev->gfx.rlc.register_restore[i]); + + /* load indirect register */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), + adev->gfx.rlc.reg_list_format_start); + for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), + register_list_format[i]); + + /* set save/restore list size */ + list_size = adev->gfx.rlc.reg_list_size_bytes >> 2; + list_size = list_size >> 1; + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), + adev->gfx.rlc.reg_restore_list_size); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size); + + /* write the starting offsets to RLC scratch ram */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), + adev->gfx.rlc.starting_offsets_start); + for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), + indirect_start_offsets[i]); + + /* load unique indirect regs*/ + for (i = 0; i < sizeof(unique_indirect_regs)/sizeof(int); i++) { + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i, + unique_indirect_regs[i] & 0x3FFFF); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i, + unique_indirect_regs[i] >> 20); + } + + kfree(register_list_format); + return 0; +} + +static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev) +{ + u32 tmp = 0; + + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); + tmp |= RLC_SRM_CNTL__SRM_ENABLE_MASK; + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp); +} + +static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data = 0; + uint32_t default_data = 0; + + default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS)); + if (enable == true) { + /* enable GFXIP control over CGPG */ + data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); + + /* update status */ + data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK; + data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT); + if(default_data != data) + WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); + } else { + /* restore GFXIP control over GCPG */ + data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data); + } +} + +static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev) +{ + uint32_t data = 0; + + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_GFX_DMG)) { + /* init IDLE_POLL_COUNT = 60 */ + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL)); + data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK; + data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data); + + /* init RLC PG Delay */ + data = 0; + data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT); + data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT); + data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT); + data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data); + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2)); + data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK; + data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data); + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3)); + data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK; + data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data); + + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL)); + data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK; + + /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */ + data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data); + + pwr_10_0_gfxip_control_over_cgpg(adev, true); + } +} + +static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data = 0; + uint32_t default_data = 0; + + default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); + + if (enable == true) { + data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK; + if (default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); + } else { + data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); + } +} + +static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data = 0; + uint32_t default_data = 0; + + default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); + + if (enable == true) { + data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); + } else { + data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); + } +} + +static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data = 0; + uint32_t default_data = 0; + + default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); + + if (enable == true) { + data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); + } else { + data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); + } +} + +static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, default_data; + + default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); + if (enable == true) + data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + else + data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); +} + +static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, default_data; + + default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); + if (enable == true) + data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK; + else + data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); + + if (!enable) + /* read any GFX register to wake up GFX */ + data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL)); +} + +void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, default_data; + + default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); + if (enable == true) + data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; + else + data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); +} + +void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, default_data; + + default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL)); + if (enable == true) + data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; + else + data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK; + if(default_data != data) + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data); +} + +static void gfx_v9_0_init_pg(struct amdgpu_device *adev) +{ + if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | + AMD_PG_SUPPORT_GFX_SMG | + AMD_PG_SUPPORT_GFX_DMG | + AMD_PG_SUPPORT_CP | + AMD_PG_SUPPORT_GDS | + AMD_PG_SUPPORT_RLC_SMU_HS)) { + gfx_v9_0_init_csb(adev); + gfx_v9_0_init_rlc_save_restore_list(adev); + gfx_v9_0_enable_save_restore_machine(adev); + + if (adev->asic_type == CHIP_RAVEN) { + WREG32(mmRLC_JUMP_TABLE_RESTORE, + adev->gfx.rlc.cp_table_gpu_addr >> 8); + gfx_v9_0_init_gfx_power_gating(adev); + + if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { + gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); + gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); + } else { + gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); + gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); + } + + if (adev->pg_flags & AMD_PG_SUPPORT_CP) + gfx_v9_0_enable_cp_power_gating(adev, true); + else + gfx_v9_0_enable_cp_power_gating(adev, false); + } + } +} + void gfx_v9_0_rlc_stop(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(GC, 0, mmRLC_CNTL); @@ -1425,7 +2069,7 @@ static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) * default is 0x9C4 to create a 100us interval */ WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4); /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr - * to disable the page fault retry interrupts, default is + * to disable the page fault retry interrupts, default is * 0x100 (256) */ WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100); } @@ -1474,6 +2118,8 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) gfx_v9_0_rlc_reset(adev); + gfx_v9_0_init_pg(adev); + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { /* legacy rlc firmware loading */ r = gfx_v9_0_rlc_load_microcode(adev); @@ -1481,6 +2127,13 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) return r; } + if (adev->asic_type == CHIP_RAVEN) { + if (amdgpu_lbpw != 0) + gfx_v9_0_enable_lbpw(adev, true); + else + gfx_v9_0_enable_lbpw(adev, false); + } + gfx_v9_0_rlc_start(adev); return 0; @@ -1559,35 +2212,6 @@ static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev) return 0; } -static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) -{ - u32 count = 0; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; - - /* begin clear state */ - count += 2; - /* context control state */ - count += 3; - - for (sect = gfx9_cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) - count += 2 + ext->reg_count; - else - return 0; - } - } - /* pa_sc_raster_config/pa_sc_raster_config1 */ - count += 4; - /* end clear state */ - count += 2; - /* clear state */ - count += 2; - - return count; -} - static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev) { struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0]; @@ -1730,13 +2354,6 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) udelay(50); } -static int gfx_v9_0_cp_compute_start(struct amdgpu_device *adev) -{ - gfx_v9_0_cp_compute_enable(adev, true); - - return 0; -} - static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) { const struct gfx_firmware_header_v1_0 *mec_hdr; @@ -1764,7 +2381,7 @@ static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000); WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI, upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr)); - + /* MEC1 */ WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR, mec_hdr->jt_offset); @@ -1779,45 +2396,6 @@ static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) return 0; } -static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev) -{ - int i, r; - - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - - if (ring->mqd_obj) { - r = amdgpu_bo_reserve(ring->mqd_obj, true); - if (unlikely(r != 0)) - dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r); - - amdgpu_bo_unpin(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); - - amdgpu_bo_unref(&ring->mqd_obj); - ring->mqd_obj = NULL; - } - } -} - -static int gfx_v9_0_init_queue(struct amdgpu_ring *ring); - -static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev) -{ - int i, r; - for (i = 0; i < adev->gfx.num_compute_rings; i++) { - struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; - if (gfx_v9_0_init_queue(ring)) - dev_warn(adev->dev, "compute queue %d init failed!\n", i); - } - - r = gfx_v9_0_cp_compute_start(adev); - if (r) - return r; - - return 0; -} - /* KIQ functions */ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) { @@ -1833,51 +2411,145 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); } -static void gfx_v9_0_kiq_enable(struct amdgpu_ring *ring) +static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) { - amdgpu_ring_alloc(ring, 8); + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + uint32_t scratch, tmp = 0; + uint64_t queue_mask = 0; + int r, i; + + for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { + if (!test_bit(i, adev->gfx.mec.queue_bitmap)) + continue; + + /* This situation may be hit in the future if a new HW + * generation exposes more than 64 queues. If so, the + * definition of queue_mask needs updating */ + if (WARN_ON(i > (sizeof(queue_mask)*8))) { + DRM_ERROR("Invalid KCQ enabled: %d\n", i); + break; + } + + queue_mask |= (1ull << i); + } + + r = amdgpu_gfx_scratch_get(adev, &scratch); + if (r) { + DRM_ERROR("Failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + + r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 11); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + amdgpu_gfx_scratch_free(adev, scratch); + return r; + } + /* set resources */ - amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6)); - amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */ - amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */ - amdgpu_ring_write(ring, 0); /* queue mask hi */ - amdgpu_ring_write(ring, 0); /* gws mask lo */ - amdgpu_ring_write(ring, 0); /* gws mask hi */ - amdgpu_ring_write(ring, 0); /* oac mask */ - amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */ - amdgpu_ring_commit(ring); - udelay(50); + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6)); + amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) | + PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */ + amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */ + amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */ + amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */ + amdgpu_ring_write(kiq_ring, 0); /* oac mask */ + amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); + uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); + /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */ + PACKET3_MAP_QUEUES_VMID(0) | /* VMID */ + PACKET3_MAP_QUEUES_QUEUE(ring->queue) | + PACKET3_MAP_QUEUES_PIPE(ring->pipe) | + PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) | + PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */ + PACKET3_MAP_QUEUES_ALLOC_FORMAT(1) | /* alloc format: all_on_one_pipe */ + PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */ + PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */ + amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); + amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); + amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); + } + /* write to scratch for completion */ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); + amdgpu_ring_write(kiq_ring, 0xDEADBEEF); + amdgpu_ring_commit(kiq_ring); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i >= adev->usec_timeout) { + DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + amdgpu_gfx_scratch_free(adev, scratch); + + return r; } -static void gfx_v9_0_map_queue_enable(struct amdgpu_ring *kiq_ring, - struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = kiq_ring->adev; - uint64_t mqd_addr, wptr_addr; - - mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj); - wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - amdgpu_ring_alloc(kiq_ring, 8); - - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5)); - /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/ - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ - (0 << 4) | /* Queue_Sel */ - (0 << 8) | /* VMID */ - (ring->queue << 13 ) | - (ring->pipe << 16) | - ((ring->me == 1 ? 0 : 1) << 18) | - (0 << 21) | /*queue_type: normal compute queue */ - (1 << 24) | /* alloc format: all_on_one_pipe */ - (0 << 26) | /* engine_sel: compute */ - (1 << 29)); /* num_queues: must be 1 */ - amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2)); - amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr)); - amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); - amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); +static int gfx_v9_0_kiq_kcq_disable(struct amdgpu_device *adev) +{ + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; + uint32_t scratch, tmp = 0; + int r, i; + + r = amdgpu_gfx_scratch_get(adev, &scratch); + if (r) { + DRM_ERROR("Failed to get scratch reg (%d).\n", r); + return r; + } + WREG32(scratch, 0xCAFEDEAD); + + r = amdgpu_ring_alloc(kiq_ring, 6 + 3); + if (r) { + DRM_ERROR("Failed to lock KIQ (%d).\n", r); + amdgpu_gfx_scratch_free(adev, scratch); + return r; + } + /* unmap queues */ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, + PACKET3_UNMAP_QUEUES_ACTION(1)| /* RESET_QUEUES */ + PACKET3_UNMAP_QUEUES_QUEUE_SEL(2)); /* select all queues */ + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + /* write to scratch for completion */ + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); + amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); + amdgpu_ring_write(kiq_ring, 0xDEADBEEF); amdgpu_ring_commit(kiq_ring); - udelay(50); + + for (i = 0; i < adev->usec_timeout; i++) { + tmp = RREG32(scratch); + if (tmp == 0xDEADBEEF) + break; + DRM_UDELAY(1); + } + if (i >= adev->usec_timeout) { + DRM_ERROR("KCQ disable failed (scratch(0x%04X)=0x%08X)\n", + scratch, tmp); + r = -EINVAL; + } + amdgpu_gfx_scratch_free(adev, scratch); + + return r; } static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) @@ -1902,7 +2574,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, - (order_base_2(MEC_HPD_SIZE / 4) - 1)); + (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; @@ -2119,47 +2791,69 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct v9_mqd *mqd = ring->mqd_ptr; - bool is_kiq = (ring->funcs->type == AMDGPU_RING_TYPE_KIQ); int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; - if (is_kiq) { - gfx_v9_0_kiq_setting(&kiq->ring); + gfx_v9_0_kiq_setting(ring); + + if (adev->gfx.in_reset) { /* for GPU_RESET case */ + /* reset MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); + + /* reset ring buffer */ + ring->wptr = 0; + amdgpu_ring_clear_ring(ring); + + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v9_0_kiq_init_register(ring); + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); } else { - mqd_idx = ring - &adev->gfx.compute_ring[0]; + memset((void *)mqd, 0, sizeof(*mqd)); + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + gfx_v9_0_mqd_init(ring); + gfx_v9_0_kiq_init_register(ring); + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); } - if (!adev->gfx.in_reset) { + return 0; +} + +static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct v9_mqd *mqd = ring->mqd_ptr; + int mqd_idx = ring - &adev->gfx.compute_ring[0]; + + if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { memset((void *)mqd, 0, sizeof(*mqd)); mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); gfx_v9_0_mqd_init(ring); - if (is_kiq) - gfx_v9_0_kiq_init_register(ring); soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - } else { /* for GPU_RESET case */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); + } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ /* reset MQD to a clean status */ + if (adev->gfx.mec.mqd_backup[mqd_idx]) + memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); /* reset ring buffer */ ring->wptr = 0; - - if (is_kiq) { - mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - gfx_v9_0_kiq_init_register(ring); - soc15_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - } + amdgpu_ring_clear_ring(ring); + } else { + amdgpu_ring_clear_ring(ring); } - if (is_kiq) - gfx_v9_0_kiq_enable(ring); - else - gfx_v9_0_map_queue_enable(&kiq->ring, ring); - return 0; } @@ -2194,7 +2888,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) goto done; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); if (!r) { - r = gfx_v9_0_kiq_init_queue(ring); + r = gfx_v9_0_kcq_init_queue(ring); amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; } @@ -2203,13 +2897,14 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) goto done; } + r = gfx_v9_0_kiq_kcq_enable(adev); done: return r; } static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) { - int r,i; + int r, i; struct amdgpu_ring *ring; if (!(adev->flags & AMD_IS_APU)) @@ -2230,10 +2925,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) if (r) return r; - if (amdgpu_sriov_vf(adev)) - r = gfx_v9_0_kiq_resume(adev); - else - r = gfx_v9_0_cp_compute_resume(adev); + r = gfx_v9_0_kiq_resume(adev); if (r) return r; @@ -2243,6 +2935,13 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) ring->ready = false; return r; } + + ring = &adev->gfx.kiq.ring; + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) + ring->ready = false; + for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -2252,14 +2951,6 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) ring->ready = false; } - if (amdgpu_sriov_vf(adev)) { - ring = &adev->gfx.kiq.ring; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; - } - gfx_v9_0_enable_gui_idle_interrupt(adev, true); return 0; @@ -2305,9 +2996,9 @@ static int gfx_v9_0_hw_fini(void *handle) pr_debug("For SRIOV client, shouldn't do anything.\n"); return 0; } + gfx_v9_0_kiq_kcq_disable(adev); gfx_v9_0_cp_enable(adev, false); gfx_v9_0_rlc_stop(adev); - gfx_v9_0_cp_compute_fini(adev); return 0; } @@ -2316,14 +3007,18 @@ static int gfx_v9_0_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + adev->gfx.in_suspend = true; return gfx_v9_0_hw_fini(adev); } static int gfx_v9_0_resume(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; - return gfx_v9_0_hw_init(adev); + r = gfx_v9_0_hw_init(adev); + adev->gfx.in_suspend = false; + return r; } static bool gfx_v9_0_is_idle(void *handle) @@ -2470,7 +3165,7 @@ static int gfx_v9_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; - adev->gfx.num_compute_rings = GFX9_NUM_COMPUTE_RINGS; + adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; gfx_v9_0_set_ring_funcs(adev); gfx_v9_0_set_irq_funcs(adev); gfx_v9_0_set_gds_init(adev); @@ -2549,6 +3244,43 @@ static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) } } +static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, + bool enable) +{ + /* TODO: double check if we need to perform under safe mdoe */ + /* gfx_v9_0_enter_rlc_safe_mode(adev); */ + + if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { + gfx_v9_0_enable_gfx_cg_power_gating(adev, true); + if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE) + gfx_v9_0_enable_gfx_pipeline_powergating(adev, true); + } else { + gfx_v9_0_enable_gfx_cg_power_gating(adev, false); + gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); + } + + /* gfx_v9_0_exit_rlc_safe_mode(adev); */ +} + +static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, + bool enable) +{ + /* TODO: double check if we need to perform under safe mode */ + /* gfx_v9_0_enter_rlc_safe_mode(adev); */ + + if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable) + gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true); + else + gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false); + + if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable) + gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true); + else + gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false); + + /* gfx_v9_0_exit_rlc_safe_mode(adev); */ +} + static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable) { @@ -2739,6 +3471,34 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { static int gfx_v9_0_set_powergating_state(void *handle, enum amd_powergating_state state) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = (state == AMD_PG_STATE_GATE) ? true : false; + + switch (adev->asic_type) { + case CHIP_RAVEN: + if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { + gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); + gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); + } else { + gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false); + gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false); + } + + if (adev->pg_flags & AMD_PG_SUPPORT_CP) + gfx_v9_0_enable_cp_power_gating(adev, true); + else + gfx_v9_0_enable_cp_power_gating(adev, false); + + /* update gfx cgpg state */ + gfx_v9_0_update_gfx_cg_power_gating(adev, enable); + + /* update mgcg state */ + gfx_v9_0_update_gfx_mg_power_gating(adev, enable); + break; + default: + break; + } + return 0; } @@ -2752,6 +3512,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_VEGA10: + case CHIP_RAVEN: gfx_v9_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; @@ -2879,31 +3640,33 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, struct amdgpu_ib *ib, unsigned vm_id, bool ctx_switch) { - u32 header, control = 0; + u32 header, control = 0; - if (ib->flags & AMDGPU_IB_FLAG_CE) - header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); - else - header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); + if (ib->flags & AMDGPU_IB_FLAG_CE) + header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2); + else + header = PACKET3(PACKET3_INDIRECT_BUFFER, 2); - control |= ib->length_dw | (vm_id << 24); + control |= ib->length_dw | (vm_id << 24); - if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) - control |= INDIRECT_BUFFER_PRE_ENB(1); + if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { + control |= INDIRECT_BUFFER_PRE_ENB(1); - amdgpu_ring_write(ring, header); - BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ - amdgpu_ring_write(ring, + if (!(ib->flags & AMDGPU_IB_FLAG_CE)) + gfx_v9_0_ring_emit_de_meta(ring); + } + + amdgpu_ring_write(ring, header); +BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ + amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN - (2 << 0) | + (2 << 0) | #endif - lower_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, control); + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); } -#define INDIRECT_BUFFER_VALID (1 << 23) - static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_ib *ib, unsigned vm_id, bool ctx_switch) @@ -2971,9 +3734,8 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); unsigned eng = ring->vm_inv_eng; - pd_addr = pd_addr | 0x1; /* valid bit */ - /* now only use physical base address of PDE and valid */ - BUG_ON(pd_addr & 0xFFFF00000000003EULL); + pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); + pd_addr |= AMDGPU_PTE_VALID; gfx_v9_0_write_data_to_reg(ring, usepfp, true, hub->ctx0_ptb_addr_lo32 + (2 * vm_id), @@ -3130,9 +3892,6 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); amdgpu_ring_write(ring, dw2); amdgpu_ring_write(ring, 0); - - if (amdgpu_sriov_vf(ring->adev)) - gfx_v9_0_ring_emit_de_meta(ring); } static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring) @@ -3160,6 +3919,12 @@ static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne ring->ring[offset] = (ring->ring_size>>2) - offset + cur; } +static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start) +{ + amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0)); + amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */ +} + static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg) { struct amdgpu_device *adev = ring->adev; @@ -3208,8 +3973,8 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, u32 mec_int_cntl, mec_int_cntl_reg; /* - * amdgpu controls only pipe 0 of MEC1. That's why this function only - * handles the setting of interrupts for this specific pipe. All other + * amdgpu controls only the first MEC. That's why this function only + * handles the setting of interrupts for this specific MEC. All other * pipes' interrupts are set by amdkfd. */ @@ -3218,6 +3983,15 @@ static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev, case 0: mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); break; + case 1: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL); + break; + case 2: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL); + break; + case 3: + mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL); + break; default: DRM_DEBUG("invalid pipe %d\n", pipe); return; @@ -3494,6 +4268,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, + .emit_tmz = gfx_v9_0_ring_emit_tmz, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -3605,6 +4380,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) { switch (adev->asic_type) { case CHIP_VEGA10: + case CHIP_RAVEN: adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs; break; default: @@ -3650,7 +4426,7 @@ static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev) data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; - mask = gfx_v9_0_create_bitmask(adev->gfx.config.max_cu_per_sh); + mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh); return (~data) & mask; } @@ -3664,8 +4440,6 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, if (!adev || !cu_info) return -EINVAL; - memset(cu_info, 0, sizeof(*cu_info)); - mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { @@ -3676,9 +4450,9 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, bitmap = gfx_v9_0_get_cu_active_bitmap(adev); cu_info->bitmap[i][j] = bitmap; - for (k = 0; k < 16; k ++) { + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (bitmap & mask) { - if (counter < 2) + if (counter < adev->gfx.config.max_cu_per_sh) ao_bitmap |= mask; counter ++; } @@ -3697,218 +4471,6 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, return 0; } -static int gfx_v9_0_init_queue(struct amdgpu_ring *ring) -{ - int r, j; - u32 tmp; - bool use_doorbell = true; - u64 hqd_gpu_addr; - u64 mqd_gpu_addr; - u64 eop_gpu_addr; - u64 wb_gpu_addr; - u32 *buf; - struct v9_mqd *mqd; - struct amdgpu_device *adev; - - adev = ring->adev; - if (ring->mqd_obj == NULL) { - r = amdgpu_bo_create(adev, - sizeof(struct v9_mqd), - PAGE_SIZE,true, - AMDGPU_GEM_DOMAIN_GTT, 0, NULL, - NULL, &ring->mqd_obj); - if (r) { - dev_warn(adev->dev, "(%d) create MQD bo failed\n", r); - return r; - } - } - - r = amdgpu_bo_reserve(ring->mqd_obj, false); - if (unlikely(r != 0)) { - gfx_v9_0_cp_compute_fini(adev); - return r; - } - - r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT, - &mqd_gpu_addr); - if (r) { - dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r); - gfx_v9_0_cp_compute_fini(adev); - return r; - } - r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf); - if (r) { - dev_warn(adev->dev, "(%d) map MQD bo failed\n", r); - gfx_v9_0_cp_compute_fini(adev); - return r; - } - - /* init the mqd struct */ - memset(buf, 0, sizeof(struct v9_mqd)); - - mqd = (struct v9_mqd *)buf; - mqd->header = 0xC0310800; - mqd->compute_pipelinestat_enable = 0x00000001; - mqd->compute_static_thread_mgmt_se0 = 0xffffffff; - mqd->compute_static_thread_mgmt_se1 = 0xffffffff; - mqd->compute_static_thread_mgmt_se2 = 0xffffffff; - mqd->compute_static_thread_mgmt_se3 = 0xffffffff; - mqd->compute_misc_reserved = 0x00000003; - mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, - ring->pipe, - ring->queue, 0); - /* disable wptr polling */ - WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0); - - /* write the EOP addr */ - BUG_ON(ring->me != 1 || ring->pipe != 0); /* can't handle other cases eop address */ - eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE); - eop_gpu_addr >>= 8; - - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, lower_32_bits(eop_gpu_addr)); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr)); - mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr); - mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr); - - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, - (order_base_2(MEC_HPD_SIZE / 4) - 1)); - WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, tmp); - - /* enable doorbell? */ - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); - if (use_doorbell) - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - else - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); - - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, tmp); - mqd->cp_hqd_pq_doorbell_control = tmp; - - /* disable the queue if it's active */ - ring->wptr = 0; - mqd->cp_hqd_dequeue_request = 0; - mqd->cp_hqd_pq_rptr = 0; - mqd->cp_hqd_pq_wptr_lo = 0; - mqd->cp_hqd_pq_wptr_hi = 0; - if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) { - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1); - for (j = 0; j < adev->usec_timeout; j++) { - if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); - } - - /* set the pointer to the MQD */ - mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc; - mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr); - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo); - WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi); - - /* set MQD vmid to 0 */ - tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); - WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, tmp); - mqd->cp_mqd_control = tmp; - - /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */ - hqd_gpu_addr = ring->gpu_addr >> 8; - mqd->cp_hqd_pq_base_lo = hqd_gpu_addr; - mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi); - - /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, - (order_base_2(ring->ring_size / 4) - 1)); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, - ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8)); -#ifdef __BIG_ENDIAN - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1); -#endif - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, tmp); - mqd->cp_hqd_pq_control = tmp; - - /* set the wb address wether it's enabled or not */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4); - mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc; - mqd->cp_hqd_pq_rptr_report_addr_hi = - upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR, - mqd->cp_hqd_pq_rptr_report_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, - mqd->cp_hqd_pq_rptr_report_addr_hi); - - /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */ - wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc; - mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff; - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR, - mqd->cp_hqd_pq_wptr_poll_addr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, - mqd->cp_hqd_pq_wptr_poll_addr_hi); - - /* enable the doorbell if requested */ - if (use_doorbell) { - WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, - (AMDGPU_DOORBELL64_KIQ * 2) << 2); - WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, - (AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2); - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, - DOORBELL_OFFSET, ring->doorbell_index); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0); - tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); - mqd->cp_hqd_pq_doorbell_control = tmp; - - } else { - mqd->cp_hqd_pq_doorbell_control = 0; - } - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, - mqd->cp_hqd_pq_doorbell_control); - - /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo); - WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi); - - /* set the vmid for the queue */ - mqd->cp_hqd_vmid = 0; - WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid); - - tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE); - tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53); - WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, tmp); - mqd->cp_hqd_persistent_state = tmp; - - /* activate the queue */ - mqd->cp_hqd_active = 1; - WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active); - - soc15_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - - amdgpu_bo_kunmap(ring->mqd_obj); - amdgpu_bo_unreserve(ring->mqd_obj); - - if (use_doorbell) - WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1); - - return 0; -} - const struct amdgpu_ip_block_version gfx_v9_0_ip_block = { .type = AMD_IP_BLOCK_TYPE_GFX, diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 005075ff00f7..a42f483767e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -31,178 +31,161 @@ #include "soc15_common.h" -int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) +u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev) { - u32 tmp; - u64 value; - u32 i; + return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24; +} - /* Program MC. */ - /* Update configuration */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR), - adev->mc.vram_start >> 18); - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR), - adev->mc.vram_end >> 18); +static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) +{ + uint64_t value; - value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); + value = adev->gart.table_addr - adev->mc.vram_start + adev->vm_manager.vram_base_offset; - WREG32(SOC15_REG_OFFSET(GC, 0, - mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB), - (u32)(value >> 12)); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB), - (u32)(value >> 44)); + value &= 0x0000FFFFFFFFF000ULL; + value |= 0x1; /*valid bit*/ - if (amdgpu_sriov_vf(adev)) { - /* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so - vbios post doesn't program them, for SRIOV driver need to program them */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_BASE), - adev->mc.vram_start >> 24); - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_FB_LOCATION_TOP), - adev->mc.vram_end >> 24); - } + WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + lower_32_bits(value)); + + WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + upper_32_bits(value)); +} + +static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + gfxhub_v1_0_init_gart_pt_regs(adev); + + WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->mc.gtt_start >> 12)); + WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->mc.gtt_start >> 44)); + + WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->mc.gtt_end >> 12)); + WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->mc.gtt_end >> 44)); +} + +static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; /* Disable AGP. */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BASE), 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_TOP), 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_AGP_BOT), 0xFFFFFFFF); + WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0); + WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0); + WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFFFFFFFF); - /* GART Enable. */ + /* Program the system aperture low logical page number. */ + WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, + adev->mc.vram_start >> 18); + WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + adev->mc.vram_end >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page.addr >> 12)); + WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page.addr >> 44)); + + WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); +} + +static void gfxhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; /* Setup TLB control */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL)); + tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - SYSTEM_ACCESS_MODE, - 3); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - ENABLE_ADVANCED_DRIVER_MODEL, - 1); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - SYSTEM_APERTURE_UNMAPPED_ACCESS, - 0); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - ECO_BITS, - 0); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - MTYPE, - MTYPE_UC);/* XXX for emulation. */ - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - ATC_EN, - 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); + + WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; /* Setup L2 cache */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL)); + tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL, - ENABLE_L2_FRAGMENT_PROCESSING, - 0); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL, - L2_PDE0_CACHE_TAG_GENERATION_MODE, - 0);/* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, + 0); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL, - CONTEXT1_IDENTITY_ACCESS_MODE, - 1); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL, - IDENTITY_MODE_FRAGMENT_SIZE, - 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(GC, 0, mmVM_L2_CNTL, tmp); - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2)); + tmp = RREG32_SOC15(GC, 0, mmVM_L2_CNTL2); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL2), tmp); + WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp); tmp = mmVM_L2_CNTL3_DEFAULT; - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), tmp); + WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, tmp); - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4)); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL4, - VMC_TAP_PDE_REQUEST_PHYSICAL, - 0); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL4, - VMC_TAP_PTE_REQUEST_PHYSICAL, - 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL4), tmp); - - /* setup context0 */ - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32), - (u32)(adev->mc.gtt_start >> 12)); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32), - (u32)(adev->mc.gtt_start >> 44)); - - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32), - (u32)(adev->mc.gtt_end >> 12)); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32), - (u32)(adev->mc.gtt_end >> 44)); + tmp = mmVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(GC, 0, mmVM_L2_CNTL4, tmp); +} - BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); - value = adev->gart.table_addr - adev->mc.vram_start - + adev->vm_manager.vram_base_offset; - value &= 0x0000FFFFFFFFF000ULL; - value |= 0x1; /*valid bit*/ +static void gfxhub_v1_0_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32), - (u32)value); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32), - (u32)(value >> 32)); - - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32), - (u32)(adev->dummy_page.addr >> 12)); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32), - (u32)((u64)adev->dummy_page.addr >> 44)); - - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2)); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, - ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, - 1); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp); - - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL)); + tmp = RREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL), tmp); + WREG32_SOC15(GC, 0, mmVM_CONTEXT0_CNTL, tmp); +} + +static void gfxhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) +{ + WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0XFFFFFFFF); + WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, + 0); + WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, + 0); - /* Disable identity aperture.*/ - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F); + WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, 0); + WREG32_SOC15(GC, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0); +} - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0); +static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) +{ + int i; + uint32_t tmp; for (i = 0; i <= 14; i++) { - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i); + tmp = RREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, adev->vm_manager.num_level); @@ -223,15 +206,52 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, adev->vm_manager.block_size - 9); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i, tmp); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2, + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i, tmp); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2, + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, upper_32_bits(adev->vm_manager.max_pfn - 1)); } +} + +static void gfxhub_v1_0_program_invalidation(struct amdgpu_device *adev) +{ + unsigned i; + + for (i = 0 ; i < 18; ++i) { + WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + 2 * i, 0xffffffff); + WREG32_SOC15_OFFSET(GC, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + 2 * i, 0x1f); + } +} + +int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) { + /* + * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are + * VF copy registers so vbios post doesn't program them, for + * SRIOV driver need to program them + */ + WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE, + adev->mc.vram_start >> 24); + WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP, + adev->mc.vram_end >> 24); + } + /* GART Enable. */ + gfxhub_v1_0_init_gart_aperture_regs(adev); + gfxhub_v1_0_init_system_aperture_regs(adev); + gfxhub_v1_0_init_tlb_regs(adev); + gfxhub_v1_0_init_cache_regs(adev); + + gfxhub_v1_0_enable_system_domain(adev); + gfxhub_v1_0_disable_identity_aperture(adev); + gfxhub_v1_0_setup_vmid_config(adev); + gfxhub_v1_0_program_invalidation(adev); return 0; } @@ -243,22 +263,20 @@ void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev) /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL) + i, 0); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_CNTL, i, 0); /* Setup TLB control */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL)); + tmp = RREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_ADVANCED_DRIVER_MODEL, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); + WREG32_SOC15(GC, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); /* Setup L2 cache */ - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL)); - tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL), tmp); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_CNTL3), 0); + WREG32_FIELD15(GC, 0, VM_L2_CNTL, ENABLE_L2_CACHE, 0); + WREG32_SOC15(GC, 0, mmVM_L2_CNTL3, 0); } /** @@ -271,7 +289,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; - tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL)); + tmp = RREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, @@ -296,22 +314,11 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp); -} - -static int gfxhub_v1_0_early_init(void *handle) -{ - return 0; -} - -static int gfxhub_v1_0_late_init(void *handle) -{ - return 0; + WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); } -static int gfxhub_v1_0_sw_init(void *handle) +void gfxhub_v1_0_init(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB]; hub->ctx0_ptb_addr_lo32 = @@ -330,96 +337,4 @@ static int gfxhub_v1_0_sw_init(void *handle) SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_STATUS); hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL); - - return 0; -} - -static int gfxhub_v1_0_sw_fini(void *handle) -{ - return 0; } - -static int gfxhub_v1_0_hw_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - unsigned i; - - for (i = 0 ; i < 18; ++i) { - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) + - 2 * i, 0xffffffff); - WREG32(SOC15_REG_OFFSET(GC, 0, - mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) + - 2 * i, 0x1f); - } - - return 0; -} - -static int gfxhub_v1_0_hw_fini(void *handle) -{ - return 0; -} - -static int gfxhub_v1_0_suspend(void *handle) -{ - return 0; -} - -static int gfxhub_v1_0_resume(void *handle) -{ - return 0; -} - -static bool gfxhub_v1_0_is_idle(void *handle) -{ - return true; -} - -static int gfxhub_v1_0_wait_for_idle(void *handle) -{ - return 0; -} - -static int gfxhub_v1_0_soft_reset(void *handle) -{ - return 0; -} - -static int gfxhub_v1_0_set_clockgating_state(void *handle, - enum amd_clockgating_state state) -{ - return 0; -} - -static int gfxhub_v1_0_set_powergating_state(void *handle, - enum amd_powergating_state state) -{ - return 0; -} - -const struct amd_ip_funcs gfxhub_v1_0_ip_funcs = { - .name = "gfxhub_v1_0", - .early_init = gfxhub_v1_0_early_init, - .late_init = gfxhub_v1_0_late_init, - .sw_init = gfxhub_v1_0_sw_init, - .sw_fini = gfxhub_v1_0_sw_fini, - .hw_init = gfxhub_v1_0_hw_init, - .hw_fini = gfxhub_v1_0_hw_fini, - .suspend = gfxhub_v1_0_suspend, - .resume = gfxhub_v1_0_resume, - .is_idle = gfxhub_v1_0_is_idle, - .wait_for_idle = gfxhub_v1_0_wait_for_idle, - .soft_reset = gfxhub_v1_0_soft_reset, - .set_clockgating_state = gfxhub_v1_0_set_clockgating_state, - .set_powergating_state = gfxhub_v1_0_set_powergating_state, -}; - -const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block = -{ - .type = AMD_IP_BLOCK_TYPE_GFXHUB, - .major = 1, - .minor = 0, - .rev = 0, - .funcs = &gfxhub_v1_0_ip_funcs, -}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h index 5129a8ff0932..d2dbb085f480 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h @@ -28,7 +28,8 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev); void gfxhub_v1_0_gart_disable(struct amdgpu_device *adev); void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value); - +void gfxhub_v1_0_init(struct amdgpu_device *adev); +u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev); extern const struct amd_ip_funcs gfxhub_v1_0_ip_funcs; extern const struct amdgpu_ip_block_version gfxhub_v1_0_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 9776ad3d2d71..ce68d609b619 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -395,6 +395,12 @@ static uint64_t gmc_v6_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } +static uint64_t gmc_v6_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr) +{ + BUG_ON(addr & 0xFFFFFF0000000FFFULL); + return addr; +} + static void gmc_v6_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { @@ -614,33 +620,6 @@ static void gmc_v6_0_gart_fini(struct amdgpu_device *adev) amdgpu_gart_fini(adev); } -static int gmc_v6_0_vm_init(struct amdgpu_device *adev) -{ - /* - * number of VMs - * VMID 0 is reserved for System - * amdgpu graphics/compute will use VMIDs 1-7 - * amdkfd will use VMIDs 8-15 - */ - adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.num_level = 1; - amdgpu_vm_manager_init(adev); - - /* base offset of vram pages */ - if (adev->flags & AMD_IS_APU) { - u64 tmp = RREG32(mmMC_VM_FB_OFFSET); - tmp <<= 22; - adev->vm_manager.vram_base_offset = tmp; - } else - adev->vm_manager.vram_base_offset = 0; - - return 0; -} - -static void gmc_v6_0_vm_fini(struct amdgpu_device *adev) -{ -} - static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev, u32 status, u32 addr, u32 mc_client) { @@ -855,6 +834,8 @@ static int gmc_v6_0_sw_init(void *handle) adev->mc.mc_mask = 0xffffffffffULL; + adev->mc.stolen_size = 256 * 1024; + adev->need_dma32 = false; dma_bits = adev->need_dma32 ? 32 : 40; r = pci_set_dma_mask(adev->pdev, DMA_BIT_MASK(dma_bits)); @@ -887,26 +868,34 @@ static int gmc_v6_0_sw_init(void *handle) if (r) return r; - if (!adev->vm_manager.enabled) { - r = gmc_v6_0_vm_init(adev); - if (r) { - dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); - return r; - } - adev->vm_manager.enabled = true; + /* + * number of VMs + * VMID 0 is reserved for System + * amdgpu graphics/compute will use VMIDs 1-7 + * amdkfd will use VMIDs 8-15 + */ + adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.num_level = 1; + amdgpu_vm_manager_init(adev); + + /* base offset of vram pages */ + if (adev->flags & AMD_IS_APU) { + u64 tmp = RREG32(mmMC_VM_FB_OFFSET); + + tmp <<= 22; + adev->vm_manager.vram_base_offset = tmp; + } else { + adev->vm_manager.vram_base_offset = 0; } - return r; + return 0; } static int gmc_v6_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->vm_manager.enabled) { - gmc_v6_0_vm_fini(adev); - adev->vm_manager.enabled = false; - } + amdgpu_vm_manager_fini(adev); gmc_v6_0_gart_fini(adev); amdgpu_gem_force_release(adev); amdgpu_bo_fini(adev); @@ -984,16 +973,10 @@ static bool gmc_v6_0_is_idle(void *handle) static int gmc_v6_0_wait_for_idle(void *handle) { unsigned i; - u32 tmp; struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__MCB_BUSY_MASK | - SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK | - SRBM_STATUS__MCC_BUSY_MASK | - SRBM_STATUS__MCD_BUSY_MASK | - SRBM_STATUS__VMC_BUSY_MASK); - if (!tmp) + if (gmc_v6_0_is_idle(handle)) return 0; udelay(1); } @@ -1146,6 +1129,7 @@ static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = { .flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb, .set_pte_pde = gmc_v6_0_gart_set_pte_pde, .set_prt = gmc_v6_0_set_prt, + .get_vm_pde = gmc_v6_0_get_vm_pde, .get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index fca8e77182c9..7e9ea53edf8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -472,6 +472,12 @@ static uint64_t gmc_v7_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } +static uint64_t gmc_v7_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr) +{ + BUG_ON(addr & 0xFFFFFF0000000FFFULL); + return addr; +} + /** * gmc_v8_0_set_fault_enable_default - update VM fault handling * @@ -724,55 +730,6 @@ static void gmc_v7_0_gart_fini(struct amdgpu_device *adev) amdgpu_gart_fini(adev); } -/* - * vm - * VMID 0 is the physical GPU addresses as used by the kernel. - * VMIDs 1-15 are used for userspace clients and are handled - * by the amdgpu vm/hsa code. - */ -/** - * gmc_v7_0_vm_init - cik vm init callback - * - * @adev: amdgpu_device pointer - * - * Inits cik specific vm parameters (number of VMs, base of vram for - * VMIDs 1-15) (CIK). - * Returns 0 for success. - */ -static int gmc_v7_0_vm_init(struct amdgpu_device *adev) -{ - /* - * number of VMs - * VMID 0 is reserved for System - * amdgpu graphics/compute will use VMIDs 1-7 - * amdkfd will use VMIDs 8-15 - */ - adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.num_level = 1; - amdgpu_vm_manager_init(adev); - - /* base offset of vram pages */ - if (adev->flags & AMD_IS_APU) { - u64 tmp = RREG32(mmMC_VM_FB_OFFSET); - tmp <<= 22; - adev->vm_manager.vram_base_offset = tmp; - } else - adev->vm_manager.vram_base_offset = 0; - - return 0; -} - -/** - * gmc_v7_0_vm_fini - cik vm fini callback - * - * @adev: amdgpu_device pointer - * - * Tear down any asic specific VM setup (CIK). - */ -static void gmc_v7_0_vm_fini(struct amdgpu_device *adev) -{ -} - /** * gmc_v7_0_vm_decode_fault - print human readable fault info * @@ -1013,6 +970,8 @@ static int gmc_v7_0_sw_init(void *handle) */ adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ + adev->mc.stolen_size = 256 * 1024; + /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. * IGP - can handle 40-bits @@ -1051,27 +1010,34 @@ static int gmc_v7_0_sw_init(void *handle) if (r) return r; - if (!adev->vm_manager.enabled) { - r = gmc_v7_0_vm_init(adev); - if (r) { - dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); - return r; - } - adev->vm_manager.enabled = true; + /* + * number of VMs + * VMID 0 is reserved for System + * amdgpu graphics/compute will use VMIDs 1-7 + * amdkfd will use VMIDs 8-15 + */ + adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.num_level = 1; + amdgpu_vm_manager_init(adev); + + /* base offset of vram pages */ + if (adev->flags & AMD_IS_APU) { + u64 tmp = RREG32(mmMC_VM_FB_OFFSET); + + tmp <<= 22; + adev->vm_manager.vram_base_offset = tmp; + } else { + adev->vm_manager.vram_base_offset = 0; } - return r; + return 0; } static int gmc_v7_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->vm_manager.enabled) { - amdgpu_vm_manager_fini(adev); - gmc_v7_0_vm_fini(adev); - adev->vm_manager.enabled = false; - } + amdgpu_vm_manager_fini(adev); gmc_v7_0_gart_fini(adev); amdgpu_gem_force_release(adev); amdgpu_bo_fini(adev); @@ -1335,7 +1301,8 @@ static const struct amdgpu_gart_funcs gmc_v7_0_gart_funcs = { .flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb, .set_pte_pde = gmc_v7_0_gart_set_pte_pde, .set_prt = gmc_v7_0_set_prt, - .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags + .get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags, + .get_vm_pde = gmc_v7_0_get_vm_pde }; static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index e9c127037b39..cc9f88057cd5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -656,6 +656,12 @@ static uint64_t gmc_v8_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } +static uint64_t gmc_v8_0_get_vm_pde(struct amdgpu_device *adev, uint64_t addr) +{ + BUG_ON(addr & 0xFFFFFF0000000FFFULL); + return addr; +} + /** * gmc_v8_0_set_fault_enable_default - update VM fault handling * @@ -927,55 +933,6 @@ static void gmc_v8_0_gart_fini(struct amdgpu_device *adev) amdgpu_gart_fini(adev); } -/* - * vm - * VMID 0 is the physical GPU addresses as used by the kernel. - * VMIDs 1-15 are used for userspace clients and are handled - * by the amdgpu vm/hsa code. - */ -/** - * gmc_v8_0_vm_init - cik vm init callback - * - * @adev: amdgpu_device pointer - * - * Inits cik specific vm parameters (number of VMs, base of vram for - * VMIDs 1-15) (CIK). - * Returns 0 for success. - */ -static int gmc_v8_0_vm_init(struct amdgpu_device *adev) -{ - /* - * number of VMs - * VMID 0 is reserved for System - * amdgpu graphics/compute will use VMIDs 1-7 - * amdkfd will use VMIDs 8-15 - */ - adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.num_level = 1; - amdgpu_vm_manager_init(adev); - - /* base offset of vram pages */ - if (adev->flags & AMD_IS_APU) { - u64 tmp = RREG32(mmMC_VM_FB_OFFSET); - tmp <<= 22; - adev->vm_manager.vram_base_offset = tmp; - } else - adev->vm_manager.vram_base_offset = 0; - - return 0; -} - -/** - * gmc_v8_0_vm_fini - cik vm fini callback - * - * @adev: amdgpu_device pointer - * - * Tear down any asic specific VM setup (CIK). - */ -static void gmc_v8_0_vm_fini(struct amdgpu_device *adev) -{ -} - /** * gmc_v8_0_vm_decode_fault - print human readable fault info * @@ -1097,6 +1054,8 @@ static int gmc_v8_0_sw_init(void *handle) */ adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */ + adev->mc.stolen_size = 256 * 1024; + /* set DMA mask + need_dma32 flags. * PCIE - can handle 40-bits. * IGP - can handle 40-bits @@ -1135,27 +1094,34 @@ static int gmc_v8_0_sw_init(void *handle) if (r) return r; - if (!adev->vm_manager.enabled) { - r = gmc_v8_0_vm_init(adev); - if (r) { - dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); - return r; - } - adev->vm_manager.enabled = true; + /* + * number of VMs + * VMID 0 is reserved for System + * amdgpu graphics/compute will use VMIDs 1-7 + * amdkfd will use VMIDs 8-15 + */ + adev->vm_manager.id_mgr[0].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.num_level = 1; + amdgpu_vm_manager_init(adev); + + /* base offset of vram pages */ + if (adev->flags & AMD_IS_APU) { + u64 tmp = RREG32(mmMC_VM_FB_OFFSET); + + tmp <<= 22; + adev->vm_manager.vram_base_offset = tmp; + } else { + adev->vm_manager.vram_base_offset = 0; } - return r; + return 0; } static int gmc_v8_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->vm_manager.enabled) { - amdgpu_vm_manager_fini(adev); - gmc_v8_0_vm_fini(adev); - adev->vm_manager.enabled = false; - } + amdgpu_vm_manager_fini(adev); gmc_v8_0_gart_fini(adev); amdgpu_gem_force_release(adev); amdgpu_bo_fini(adev); @@ -1654,7 +1620,8 @@ static const struct amdgpu_gart_funcs gmc_v8_0_gart_funcs = { .flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb, .set_pte_pde = gmc_v8_0_gart_set_pte_pde, .set_prt = gmc_v8_0_set_prt, - .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags + .get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags, + .get_vm_pde = gmc_v8_0_get_vm_pde }; static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index f936332a069d..68172aace3ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -33,6 +33,7 @@ #include "soc15_common.h" #include "nbio_v6_1.h" +#include "nbio_v7_0.h" #include "gfxhub_v1_0.h" #include "mmhub_v1_0.h" @@ -215,7 +216,10 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev, unsigned i, j; /* flush hdp cache */ - nbio_v6_1_hdp_flush(adev); + if (adev->flags & AMD_IS_APU) + nbio_v7_0_hdp_flush(adev); + else + nbio_v6_1_hdp_flush(adev); spin_lock(&adev->mc.invalidate_lock); @@ -354,17 +358,19 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, return pte_flag; } -static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr) +static u64 gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, u64 addr) { - return adev->vm_manager.vram_base_offset + mc_addr - adev->mc.vram_start; + addr = adev->vm_manager.vram_base_offset + addr - adev->mc.vram_start; + BUG_ON(addr & 0xFFFF00000000003FULL); + return addr; } static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = { .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb, .set_pte_pde = gmc_v9_0_gart_set_pte_pde, - .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, - .adjust_mc_addr = gmc_v9_0_adjust_mc_addr, .get_invalidate_req = gmc_v9_0_get_invalidate_req, + .get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags, + .get_vm_pde = gmc_v9_0_get_vm_pde }; static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev) @@ -415,6 +421,11 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, amdgpu_vram_location(adev, &adev->mc, base); adev->mc.gtt_base_align = 0; amdgpu_gtt_location(adev, mc); + /* base offset of vram pages */ + if (adev->flags & AMD_IS_APU) + adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); + else + adev->vm_manager.vram_base_offset = 0; } /** @@ -434,7 +445,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) /* hbm memory channel size */ chansize = 128; - tmp = RREG32(SOC15_REG_OFFSET(DF, 0, mmDF_CS_AON0_DramBaseAddress0)); + tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0); tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK; tmp >>= DF_CS_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; switch (tmp) { @@ -474,7 +485,8 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) adev->mc.aper_size = pci_resource_len(adev->pdev, 0); /* size in MB on si */ adev->mc.mc_vram_size = - nbio_v6_1_get_memsize(adev) * 1024ULL * 1024ULL; + ((adev->flags & AMD_IS_APU) ? nbio_v7_0_get_memsize(adev) : + nbio_v6_1_get_memsize(adev)) * 1024ULL * 1024ULL; adev->mc.real_vram_size = adev->mc.mc_vram_size; adev->mc.visible_vram_size = adev->mc.aper_size; @@ -514,64 +526,15 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) return amdgpu_gart_table_vram_alloc(adev); } -/* - * vm - * VMID 0 is the physical GPU addresses as used by the kernel. - * VMIDs 1-15 are used for userspace clients and are handled - * by the amdgpu vm/hsa code. - */ -/** - * gmc_v9_0_vm_init - vm init callback - * - * @adev: amdgpu_device pointer - * - * Inits vega10 specific vm parameters (number of VMs, base of vram for - * VMIDs 1-15) (vega10). - * Returns 0 for success. - */ -static int gmc_v9_0_vm_init(struct amdgpu_device *adev) -{ - /* - * number of VMs - * VMID 0 is reserved for System - * amdgpu graphics/compute will use VMIDs 1-7 - * amdkfd will use VMIDs 8-15 - */ - adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; - adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; - - /* TODO: fix num_level for APU when updating vm size and block size */ - if (adev->flags & AMD_IS_APU) - adev->vm_manager.num_level = 1; - else - adev->vm_manager.num_level = 3; - amdgpu_vm_manager_init(adev); - - /* base offset of vram pages */ - /*XXX This value is not zero for APU*/ - adev->vm_manager.vram_base_offset = 0; - - return 0; -} - -/** - * gmc_v9_0_vm_fini - vm fini callback - * - * @adev: amdgpu_device pointer - * - * Tear down any asic specific VM setup. - */ -static void gmc_v9_0_vm_fini(struct amdgpu_device *adev) -{ - return; -} - static int gmc_v9_0_sw_init(void *handle) { int r; int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + gfxhub_v1_0_init(adev); + mmhub_v1_0_init(adev); + spin_lock_init(&adev->mc.invalidate_lock); if (adev->flags & AMD_IS_APU) { @@ -609,6 +572,12 @@ static int gmc_v9_0_sw_init(void *handle) */ adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */ + /* + * It needs to reserve 8M stolen memory for vega10 + * TODO: Figure out how to avoid that... + */ + adev->mc.stolen_size = 8 * 1024 * 1024; + /* set DMA mask + need_dma32 flags. * PCIE - can handle 44-bits. * IGP - can handle 44-bits @@ -641,15 +610,23 @@ static int gmc_v9_0_sw_init(void *handle) if (r) return r; - if (!adev->vm_manager.enabled) { - r = gmc_v9_0_vm_init(adev); - if (r) { - dev_err(adev->dev, "vm manager initialization failed (%d).\n", r); - return r; - } - adev->vm_manager.enabled = true; - } - return r; + /* + * number of VMs + * VMID 0 is reserved for System + * amdgpu graphics/compute will use VMIDs 1-7 + * amdkfd will use VMIDs 8-15 + */ + adev->vm_manager.id_mgr[AMDGPU_GFXHUB].num_ids = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.id_mgr[AMDGPU_MMHUB].num_ids = AMDGPU_NUM_OF_VMIDS; + + /* TODO: fix num_level for APU when updating vm size and block size */ + if (adev->flags & AMD_IS_APU) + adev->vm_manager.num_level = 1; + else + adev->vm_manager.num_level = 3; + amdgpu_vm_manager_init(adev); + + return 0; } /** @@ -669,11 +646,7 @@ static int gmc_v9_0_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->vm_manager.enabled) { - amdgpu_vm_manager_fini(adev); - gmc_v9_0_vm_fini(adev); - adev->vm_manager.enabled = false; - } + amdgpu_vm_manager_fini(adev); gmc_v9_0_gart_fini(adev); amdgpu_gem_force_release(adev); amdgpu_bo_fini(adev); @@ -686,6 +659,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: break; + case CHIP_RAVEN: + break; default: break; } @@ -715,7 +690,10 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) return r; /* After HDP is initialized, flush HDP.*/ - nbio_v6_1_hdp_flush(adev); + if (adev->flags & AMD_IS_APU) + nbio_v7_0_hdp_flush(adev); + else + nbio_v6_1_hdp_flush(adev); r = gfxhub_v1_0_gart_enable(adev); if (r) @@ -725,12 +703,12 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) if (r) return r; - tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL)); + tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL); tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK; - WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MISC_CNTL), tmp); + WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp); - tmp = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL)); - WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_HOST_PATH_CNTL), tmp); + tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); + WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) @@ -781,6 +759,12 @@ static int gmc_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (amdgpu_sriov_vf(adev)) { + /* full access mode, so don't touch any GMC register */ + DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); + return 0; + } + amdgpu_irq_put(adev, &adev->mc.vm_fault, 0); gmc_v9_0_gart_disable(adev); @@ -831,7 +815,16 @@ static int gmc_v9_0_soft_reset(void *handle) static int gmc_v9_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) { - return 0; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return mmhub_v1_0_set_clockgating(adev, state); +} + +static void gmc_v9_0_get_clockgating_state(void *handle, u32 *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + mmhub_v1_0_get_clockgating(adev, flags); } static int gmc_v9_0_set_powergating_state(void *handle, @@ -855,6 +848,7 @@ const struct amd_ip_funcs gmc_v9_0_ip_funcs = { .soft_reset = gmc_v9_0_soft_reset, .set_clockgating_state = gmc_v9_0_set_clockgating_state, .set_powergating_state = gmc_v9_0_set_powergating_state, + .get_clockgating_state = gmc_v9_0_get_clockgating_state, }; const struct amdgpu_ip_block_version gmc_v9_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index dbfe48d1207a..f50b5a77f45a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -34,9 +34,12 @@ #include "soc15_common.h" +#define mmDAGB0_CNTL_MISC2_RV 0x008f +#define mmDAGB0_CNTL_MISC2_RV_BASE_IDX 0 + u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) { - u64 base = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE)); + u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE); base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; base <<= 24; @@ -44,184 +47,160 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) return base; } -int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) +static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) { - u32 tmp; - u64 value; - uint64_t addr; - u32 i; + uint64_t value; - /* Program MC. */ - /* Update configuration */ - DRM_INFO("%s -- in\n", __func__); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR), - adev->mc.vram_start >> 18); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR), - adev->mc.vram_end >> 18); - value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + + BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); + value = adev->gart.table_addr - adev->mc.vram_start + adev->vm_manager.vram_base_offset; - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB), - (u32)(value >> 12)); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB), - (u32)(value >> 44)); + value &= 0x0000FFFFFFFFF000ULL; + value |= 0x1; /* valid bit */ - if (amdgpu_sriov_vf(adev)) { - /* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are VF copy registers so - vbios post doesn't program them, for SRIOV driver need to program them */ - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE), - adev->mc.vram_start >> 24); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP), - adev->mc.vram_end >> 24); - } + WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + lower_32_bits(value)); + + WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + upper_32_bits(value)); +} + +static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) +{ + mmhub_v1_0_init_gart_pt_regs(adev); + + WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, + (u32)(adev->mc.gtt_start >> 12)); + WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32, + (u32)(adev->mc.gtt_start >> 44)); + + WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32, + (u32)(adev->mc.gtt_end >> 12)); + WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32, + (u32)(adev->mc.gtt_end >> 44)); +} + +static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) +{ + uint64_t value; + uint32_t tmp; /* Disable AGP. */ - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BASE), 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_TOP), 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_AGP_BOT), 0x00FFFFFF); + WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BASE, 0); + WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_TOP, 0); + WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BOT, 0x00FFFFFF); - /* GART Enable. */ + /* Program the system aperture low logical page number. */ + WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, + adev->mc.vram_start >> 18); + WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + adev->mc.vram_end >> 18); + + /* Set default page address. */ + value = adev->vram_scratch.gpu_addr - adev->mc.vram_start + + adev->vm_manager.vram_base_offset; + WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB, + (u32)(value >> 12)); + WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_MSB, + (u32)(value >> 44)); + + /* Program "protection fault". */ + WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32, + (u32)(adev->dummy_page.addr >> 12)); + WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32, + (u32)((u64)adev->dummy_page.addr >> 44)); + + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2); + tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, + ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1); + WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2, tmp); +} + +static void mmhub_v1_0_init_tlb_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; /* Setup TLB control */ - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL)); + tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - SYSTEM_ACCESS_MODE, - 3); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - ENABLE_ADVANCED_DRIVER_MODEL, - 1); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - SYSTEM_APERTURE_UNMAPPED_ACCESS, - 0); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - ECO_BITS, - 0); - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - MTYPE, - MTYPE_UC);/* XXX for emulation. */ - tmp = REG_SET_FIELD(tmp, - MC_VM_MX_L1_TLB_CNTL, - ATC_EN, - 1); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE, 3); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + ENABLE_ADVANCED_DRIVER_MODEL, 1); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + SYSTEM_APERTURE_UNMAPPED_ACCESS, 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ECO_BITS, 0); + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, + MTYPE, MTYPE_UC);/* XXX for emulation. */ + tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ATC_EN, 1); + + WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); +} + +static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; /* Setup L2 cache */ - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL)); + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL, - ENABLE_L2_FRAGMENT_PROCESSING, - 0); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL, - L2_PDE0_CACHE_TAG_GENERATION_MODE, - 0);/* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_FRAGMENT_PROCESSING, 0); + /* XXX for emulation, Refer to closed source code.*/ + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, L2_PDE0_CACHE_TAG_GENERATION_MODE, + 0); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, PDE_FAULT_CLASSIFICATION, 1); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL, - CONTEXT1_IDENTITY_ACCESS_MODE, - 1); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL, - IDENTITY_MODE_FRAGMENT_SIZE, - 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL), tmp); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, CONTEXT1_IDENTITY_ACCESS_MODE, 1); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, IDENTITY_MODE_FRAGMENT_SIZE, 0); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL2)); + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS, 1); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL2), tmp); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp); tmp = mmVM_L2_CNTL3_DEFAULT; - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL3), tmp); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, tmp); - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL4)); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL4, - VMC_TAP_PDE_REQUEST_PHYSICAL, - 0); - tmp = REG_SET_FIELD(tmp, - VM_L2_CNTL4, - VMC_TAP_PTE_REQUEST_PHYSICAL, - 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL4), tmp); - - /* setup context0 */ - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32), - (u32)(adev->mc.gtt_start >> 12)); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32), - (u32)(adev->mc.gtt_start >> 44)); - - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32), - (u32)(adev->mc.gtt_end >> 12)); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32), - (u32)(adev->mc.gtt_end >> 44)); - - BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); - value = adev->gart.table_addr - adev->mc.vram_start + - adev->vm_manager.vram_base_offset; - value &= 0x0000FFFFFFFFF000ULL; - value |= 0x1; /* valid bit */ - - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32), - (u32)value); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32), - (u32)(value >> 32)); - - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32), - (u32)(adev->dummy_page.addr >> 12)); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32), - (u32)((u64)adev->dummy_page.addr >> 44)); - - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2)); - tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2, - ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, - 1); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2), tmp); + tmp = mmVM_L2_CNTL4_DEFAULT; + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PDE_REQUEST_PHYSICAL, 0); + tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_PTE_REQUEST_PHYSICAL, 0); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL4, tmp); +} - addr = SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL); - tmp = RREG32(addr); +static void mmhub_v1_0_enable_system_domain(struct amdgpu_device *adev) +{ + uint32_t tmp; + tmp = RREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL), tmp); - - tmp = RREG32(addr); - - /* Disable identity aperture.*/ - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32), 0XFFFFFFFF); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32), 0x0000000F); + WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_CNTL, tmp); +} - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32), 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32), 0); +static void mmhub_v1_0_disable_identity_aperture(struct amdgpu_device *adev) +{ + WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, + 0XFFFFFFFF); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_HI32, + 0x0000000F); + + WREG32_SOC15(MMHUB, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_LO32, 0); + WREG32_SOC15(MMHUB, 0, + mmVM_L2_CONTEXT1_IDENTITY_APERTURE_HIGH_ADDR_HI32, 0); + + WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32, + 0); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32, + 0); +} - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_LO32), 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_L2_CONTEXT_IDENTITY_PHYSICAL_OFFSET_HI32), 0); +static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) +{ + int i; + uint32_t tmp; for (i = 0; i <= 14; i++) { - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL) - + i); + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, @@ -243,14 +222,52 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, adev->vm_manager.block_size - 9); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL) + i, tmp); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32) + i*2, + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL, i, tmp); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, i*2, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32, i*2, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_LO32, i*2, lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32) + i*2, + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_END_ADDR_HI32, i*2, upper_32_bits(adev->vm_manager.max_pfn - 1)); } +} + +static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev) +{ + unsigned i; + + for (i = 0; i < 18; ++i) { + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, + 2 * i, 0xffffffff); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32, + 2 * i, 0x1f); + } +} + +int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) { + /* + * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are + * VF copy registers so vbios post doesn't program them, for + * SRIOV driver need to program them + */ + WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE, + adev->mc.vram_start >> 24); + WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP, + adev->mc.vram_end >> 24); + } + + /* GART Enable. */ + mmhub_v1_0_init_gart_aperture_regs(adev); + mmhub_v1_0_init_system_aperture_regs(adev); + mmhub_v1_0_init_tlb_regs(adev); + mmhub_v1_0_init_cache_regs(adev); + + mmhub_v1_0_enable_system_domain(adev); + mmhub_v1_0_disable_identity_aperture(adev); + mmhub_v1_0_setup_vmid_config(adev); + mmhub_v1_0_program_invalidation(adev); return 0; } @@ -262,22 +279,22 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) /* Disable all tables */ for (i = 0; i < 16; i++) - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL) + i, 0); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_CNTL, i, 0); /* Setup TLB control */ - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL)); + tmp = RREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 0); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_ADVANCED_DRIVER_MODEL, 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL), tmp); + WREG32_SOC15(MMHUB, 0, mmMC_VM_MX_L1_TLB_CNTL, tmp); /* Setup L2 cache */ - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL)); + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL), tmp); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_CNTL3), 0); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL, tmp); + WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL3, 0); } /** @@ -289,7 +306,7 @@ void mmhub_v1_0_gart_disable(struct amdgpu_device *adev) void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; - tmp = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL)); + tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, @@ -314,22 +331,11 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value) WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, value); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, value); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp); + WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL, tmp); } -static int mmhub_v1_0_early_init(void *handle) +void mmhub_v1_0_init(struct amdgpu_device *adev) { - return 0; -} - -static int mmhub_v1_0_late_init(void *handle) -{ - return 0; -} - -static int mmhub_v1_0_sw_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB]; hub->ctx0_ptb_addr_lo32 = @@ -349,69 +355,20 @@ static int mmhub_v1_0_sw_init(void *handle) hub->vm_l2_pro_fault_cntl = SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL); - return 0; -} - -static int mmhub_v1_0_sw_fini(void *handle) -{ - return 0; -} - -static int mmhub_v1_0_hw_init(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - unsigned i; - - for (i = 0; i < 18; ++i) { - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_INVALIDATE_ENG0_ADDR_RANGE_LO32) + - 2 * i, 0xffffffff); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmVM_INVALIDATE_ENG0_ADDR_RANGE_HI32) + - 2 * i, 0x1f); - } - - return 0; -} - -static int mmhub_v1_0_hw_fini(void *handle) -{ - return 0; -} - -static int mmhub_v1_0_suspend(void *handle) -{ - return 0; -} - -static int mmhub_v1_0_resume(void *handle) -{ - return 0; -} - -static bool mmhub_v1_0_is_idle(void *handle) -{ - return true; -} - -static int mmhub_v1_0_wait_for_idle(void *handle) -{ - return 0; -} - -static int mmhub_v1_0_soft_reset(void *handle) -{ - return 0; } static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, bool enable) { - uint32_t def, data, def1, data1, def2, data2; + uint32_t def, data, def1, data1, def2 = 0, data2 = 0; - def = data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); - def1 = data1 = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB0_CNTL_MISC2)); - def2 = data2 = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_CNTL_MISC2)); + def = data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); + + if (adev->asic_type != CHIP_RAVEN) { + def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2); + def2 = data2 = RREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2); + } else + def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_RV); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) { data |= ATC_L2_MISC_CG__ENABLE_MASK; @@ -423,12 +380,13 @@ static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *ad DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); - data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + if (adev->asic_type != CHIP_RAVEN) + data2 &= ~(DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); } else { data &= ~ATC_L2_MISC_CG__ENABLE_MASK; @@ -439,22 +397,27 @@ static void mmhub_v1_0_update_medium_grain_clock_gating(struct amdgpu_device *ad DAGB0_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | DAGB0_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); - data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | - DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); + if (adev->asic_type != CHIP_RAVEN) + data2 |= (DAGB1_CNTL_MISC2__DISABLE_WRREQ_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_WRRET_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_RDREQ_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_RDRET_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_TLBWR_CG_MASK | + DAGB1_CNTL_MISC2__DISABLE_TLBRD_CG_MASK); } if (def != data) - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG), data); + WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data); - if (def1 != data1) - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB0_CNTL_MISC2), data1); + if (def1 != data1) { + if (adev->asic_type != CHIP_RAVEN) + WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2, data1); + else + WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_RV, data1); + } - if (def2 != data2) - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmDAGB1_CNTL_MISC2), data2); + if (adev->asic_type != CHIP_RAVEN && def2 != data2) + WREG32_SOC15(MMHUB, 0, mmDAGB1_CNTL_MISC2, data2); } static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, @@ -462,7 +425,7 @@ static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, { uint32_t def, data; - def = data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG)) data |= ATHUB_MISC_CNTL__CG_ENABLE_MASK; @@ -470,7 +433,7 @@ static void athub_update_medium_grain_clock_gating(struct amdgpu_device *adev, data &= ~ATHUB_MISC_CNTL__CG_ENABLE_MASK; if (def != data) - WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL), data); + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); } static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *adev, @@ -478,7 +441,7 @@ static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *ade { uint32_t def, data; - def = data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); + def = data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS)) data |= ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; @@ -486,7 +449,7 @@ static void mmhub_v1_0_update_medium_grain_light_sleep(struct amdgpu_device *ade data &= ~ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK; if (def != data) - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG), data); + WREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG, data); } static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, @@ -494,7 +457,7 @@ static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, { uint32_t def, data; - def = data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); + def = data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS) && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) @@ -503,19 +466,18 @@ static void athub_update_medium_grain_light_sleep(struct amdgpu_device *adev, data &= ~ATHUB_MISC_CNTL__CG_MEM_LS_ENABLE_MASK; if(def != data) - WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL), data); + WREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL, data); } -static int mmhub_v1_0_set_clockgating_state(void *handle, - enum amd_clockgating_state state) +int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (amdgpu_sriov_vf(adev)) return 0; switch (adev->asic_type) { case CHIP_VEGA10: + case CHIP_RAVEN: mmhub_v1_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); athub_update_medium_grain_clock_gating(adev, @@ -532,54 +494,20 @@ static int mmhub_v1_0_set_clockgating_state(void *handle, return 0; } -static void mmhub_v1_0_get_clockgating_state(void *handle, u32 *flags) +void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; int data; if (amdgpu_sriov_vf(adev)) *flags = 0; /* AMD_CG_SUPPORT_MC_MGCG */ - data = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATHUB_MISC_CNTL)); + data = RREG32_SOC15(ATHUB, 0, mmATHUB_MISC_CNTL); if (data & ATHUB_MISC_CNTL__CG_ENABLE_MASK) *flags |= AMD_CG_SUPPORT_MC_MGCG; /* AMD_CG_SUPPORT_MC_LS */ - data = RREG32(SOC15_REG_OFFSET(MMHUB, 0, mmATC_L2_MISC_CG)); + data = RREG32_SOC15(MMHUB, 0, mmATC_L2_MISC_CG); if (data & ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK) *flags |= AMD_CG_SUPPORT_MC_LS; } - -static int mmhub_v1_0_set_powergating_state(void *handle, - enum amd_powergating_state state) -{ - return 0; -} - -const struct amd_ip_funcs mmhub_v1_0_ip_funcs = { - .name = "mmhub_v1_0", - .early_init = mmhub_v1_0_early_init, - .late_init = mmhub_v1_0_late_init, - .sw_init = mmhub_v1_0_sw_init, - .sw_fini = mmhub_v1_0_sw_fini, - .hw_init = mmhub_v1_0_hw_init, - .hw_fini = mmhub_v1_0_hw_fini, - .suspend = mmhub_v1_0_suspend, - .resume = mmhub_v1_0_resume, - .is_idle = mmhub_v1_0_is_idle, - .wait_for_idle = mmhub_v1_0_wait_for_idle, - .soft_reset = mmhub_v1_0_soft_reset, - .set_clockgating_state = mmhub_v1_0_set_clockgating_state, - .set_powergating_state = mmhub_v1_0_set_powergating_state, - .get_clockgating_state = mmhub_v1_0_get_clockgating_state, -}; - -const struct amdgpu_ip_block_version mmhub_v1_0_ip_block = -{ - .type = AMD_IP_BLOCK_TYPE_MMHUB, - .major = 1, - .minor = 0, - .rev = 0, - .funcs = &mmhub_v1_0_ip_funcs, -}; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h index aadedf99c028..bbfacbcdc4a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h @@ -28,6 +28,10 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev); void mmhub_v1_0_gart_disable(struct amdgpu_device *adev); void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value); +void mmhub_v1_0_init(struct amdgpu_device *adev); +int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, + enum amd_clockgating_state state); +void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); extern const struct amd_ip_funcs mmhub_v1_0_ip_funcs; extern const struct amdgpu_ip_block_version mmhub_v1_0_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 1493301b6a94..bde3ca3c21c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -124,8 +124,8 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev) r = -ETIME; break; } - msleep(1); - timeout -= 1; + mdelay(5); + timeout -= 5; reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL)); @@ -141,12 +141,12 @@ static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event) r = xgpu_ai_mailbox_rcv_msg(adev, event); while (r) { if (timeout <= 0) { - pr_err("Doesn't get ack from pf.\n"); + pr_err("Doesn't get msg:%d from pf.\n", event); r = -ETIME; break; } - msleep(1); - timeout -= 1; + mdelay(5); + timeout -= 5; r = xgpu_ai_mailbox_rcv_msg(adev, event); } @@ -165,7 +165,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, /* start to poll ack */ r = xgpu_ai_poll_ack(adev); if (r) - return r; + pr_err("Doesn't get ack from pf, continue\n"); xgpu_ai_mailbox_set_valid(adev, false); @@ -174,8 +174,10 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, req == IDH_REQ_GPU_FINI_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) { r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); - if (r) + if (r) { + pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); return r; + } } return 0; @@ -241,7 +243,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) } /* Trigger recovery due to world switch failure */ - amdgpu_sriov_gpu_reset(adev, false); + amdgpu_sriov_gpu_reset(adev, NULL); } static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -264,12 +266,15 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev, { int r; - /* see what event we get */ - r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); + /* trigger gpu-reset by hypervisor only if TDR disbaled */ + if (amdgpu_lockup_timeout == 0) { + /* see what event we get */ + r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); - /* only handle FLR_NOTIFY now */ - if (!r) - schedule_work(&adev->virt.flr_work); + /* only handle FLR_NOTIFY now */ + if (!r) + schedule_work(&adev->virt.flr_work); + } return 0; } @@ -296,11 +301,11 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev) { int r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 138, &adev->virt.ack_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq); if (r) { amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 7bdc51b02326..171a658135b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -398,8 +398,8 @@ static int xgpu_vi_poll_ack(struct amdgpu_device *adev) r = -ETIME; break; } - msleep(1); - timeout -= 1; + mdelay(5); + timeout -= 5; reg = RREG32_NO_KIQ(mmMAILBOX_CONTROL); } @@ -418,8 +418,8 @@ static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event) r = -ETIME; break; } - msleep(1); - timeout -= 1; + mdelay(5); + timeout -= 5; r = xgpu_vi_mailbox_rcv_msg(adev, event); } @@ -447,7 +447,7 @@ static int xgpu_vi_send_access_requests(struct amdgpu_device *adev, request == IDH_REQ_GPU_RESET_ACCESS) { r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU); if (r) - return r; + pr_err("Doesn't get ack from pf, continue\n"); } return 0; @@ -514,7 +514,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) } /* Trigger recovery due to world switch failure */ - amdgpu_sriov_gpu_reset(adev, false); + amdgpu_sriov_gpu_reset(adev, NULL); } static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -537,12 +537,15 @@ static int xgpu_vi_mailbox_rcv_irq(struct amdgpu_device *adev, { int r; - /* see what event we get */ - r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); + /* trigger gpu-reset by hypervisor only if TDR disbaled */ + if (amdgpu_lockup_timeout == 0) { + /* see what event we get */ + r = xgpu_vi_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION); - /* only handle FLR_NOTIFY now */ - if (!r) - schedule_work(&adev->virt.flr_work); + /* only handle FLR_NOTIFY now */ + if (!r) + schedule_work(&adev->virt.flr_work); + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 97057f4a10de..1e272f785def 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -35,7 +35,7 @@ u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) { - u32 tmp = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0)); + u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; @@ -46,32 +46,33 @@ u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) u32 nbio_v6_1_get_atombios_scratch_regs(struct amdgpu_device *adev, uint32_t idx) { - return RREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0) + idx); + return RREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx); } void nbio_v6_1_set_atombios_scratch_regs(struct amdgpu_device *adev, uint32_t idx, uint32_t val) { - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0) + idx, val); + WREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx, val); } void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable) { if (enable) - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_FB_EN), - BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); + WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, + BIF_FB_EN__FB_READ_EN_MASK | + BIF_FB_EN__FB_WRITE_EN_MASK); else - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_FB_EN), 0); + WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); } void nbio_v6_1_hdp_flush(struct amdgpu_device *adev) { - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL), 0); + WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL, 0); } u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) { - return RREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_PF_0_0_RCC_CONFIG_MEMSIZE)); + return RREG32_SOC15(NBIO, 0, mmRCC_PF_0_0_RCC_CONFIG_MEMSIZE); } static const u32 nbio_sdma_doorbell_range_reg[] = @@ -97,15 +98,7 @@ void nbio_v6_1_sdma_doorbell_range(struct amdgpu_device *adev, int instance, void nbio_v6_1_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { - u32 tmp; - - tmp = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_PF_0_0_RCC_DOORBELL_APER_EN)); - if (enable) - tmp = REG_SET_FIELD(tmp, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, 1); - else - tmp = REG_SET_FIELD(tmp, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, 0); - - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmRCC_PF_0_0_RCC_DOORBELL_APER_EN), tmp); + WREG32_FIELD15(NBIO, 0, RCC_PF_0_0_RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0); } void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, @@ -115,23 +108,23 @@ void nbio_v6_1_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, if (enable) { tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_EN, 1) | - REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_MODE, 1) | - REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_SIZE, 0); + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_MODE, 1) | + REG_SET_FIELD(tmp, BIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, DOORBELL_SELFRING_GPA_APER_SIZE, 0); - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW), - lower_32_bits(adev->doorbell.base)); - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH), - upper_32_bits(adev->doorbell.base)); + WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_LOW, + lower_32_bits(adev->doorbell.base)); + WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_BASE_HIGH, + upper_32_bits(adev->doorbell.base)); } - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL), tmp); + WREG32_SOC15(NBIO, 0, mmBIF_BX_PF0_DOORBELL_SELFRING_GPA_APER_CNTL, tmp); } void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, bool use_doorbell, int doorbell_index) { - u32 ih_doorbell_range = RREG32(SOC15_REG_OFFSET(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE)); + u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE); if (use_doorbell) { ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); @@ -139,7 +132,7 @@ void nbio_v6_1_ih_doorbell_range(struct amdgpu_device *adev, } else ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0); - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmBIF_IH_DOORBELL_RANGE), ih_doorbell_range); + WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range); } void nbio_v6_1_ih_control(struct amdgpu_device *adev) @@ -147,15 +140,15 @@ void nbio_v6_1_ih_control(struct amdgpu_device *adev) u32 interrupt_cntl; /* setup interrupt control */ - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmINTERRUPT_CNTL2), adev->dummy_page.addr >> 8); - interrupt_cntl = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmINTERRUPT_CNTL)); + WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); + interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL); /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN */ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0); /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0); - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmINTERRUPT_CNTL), interrupt_cntl); + WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl); } void nbio_v6_1_update_medium_grain_clock_gating(struct amdgpu_device *adev, @@ -251,8 +244,7 @@ void nbio_v6_1_detect_hw_virt(struct amdgpu_device *adev) { uint32_t reg; - reg = RREG32(SOC15_REG_OFFSET(NBIO, 0, - mmRCC_PF_0_0_RCC_IOV_FUNC_IDENTIFIER)); + reg = RREG32_SOC15(NBIO, 0, mmRCC_PF_0_0_RCC_IOV_FUNC_IDENTIFIER); if (reg & 1) adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c new file mode 100644 index 000000000000..aa04632523fa --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -0,0 +1,212 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "nbio_v7_0.h" + +#include "vega10/soc15ip.h" +#include "raven1/NBIO/nbio_7_0_default.h" +#include "raven1/NBIO/nbio_7_0_offset.h" +#include "raven1/NBIO/nbio_7_0_sh_mask.h" +#include "vega10/vega10_enum.h" + +#define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c + +u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) +{ + u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); + + tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; + tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; + + return tmp; +} + +u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev, + uint32_t idx) +{ + return RREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx); +} + +void nbio_v7_0_set_atombios_scratch_regs(struct amdgpu_device *adev, + uint32_t idx, uint32_t val) +{ + WREG32_SOC15_OFFSET(NBIO, 0, mmBIOS_SCRATCH_0, idx, val); +} + +void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable) +{ + if (enable) + WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, + BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); + else + WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); +} + +void nbio_v7_0_hdp_flush(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); +} + +u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) +{ + return RREG32_SOC15(NBIO, 0, mmRCC_CONFIG_MEMSIZE); +} + +static const u32 nbio_sdma_doorbell_range_reg[] = +{ + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE), + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE) +}; + +void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index) +{ + u32 doorbell_range = RREG32(nbio_sdma_doorbell_range_reg[instance]); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); + + WREG32(nbio_sdma_doorbell_range_reg[instance], doorbell_range); +} + +void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + WREG32_FIELD15(NBIO, 0, RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0); +} + +void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ + u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE); + + if (use_doorbell) { + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 2); + } else + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0); + + WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range); +} + +static uint32_t nbio_7_0_read_syshub_ind_mmr(struct amdgpu_device *adev, uint32_t offset) +{ + uint32_t data; + + WREG32_SOC15(NBIO, 0, mmSYSHUB_INDEX, offset); + data = RREG32_SOC15(NBIO, 0, mmSYSHUB_DATA); + + return data; +} + +static void nbio_7_0_write_syshub_ind_mmr(struct amdgpu_device *adev, uint32_t offset, + uint32_t data) +{ + WREG32_SOC15(NBIO, 0, mmSYSHUB_INDEX, offset); + WREG32_SOC15(NBIO, 0, mmSYSHUB_DATA, data); +} + +void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + /* NBIF_MGCG_CTRL_LCLK */ + def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + data |= NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_EN_LCLK_MASK; + else + data &= ~NBIF_MGCG_CTRL_LCLK__NBIF_MGCG_EN_LCLK_MASK; + + if (def != data) + WREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK, data); + + /* SYSHUB_MGCG_CTRL_SOCCLK */ + def = data = nbio_7_0_read_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SOCCLK); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + data |= SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SOCCLK__SYSHUB_MGCG_EN_SOCCLK_MASK; + else + data &= ~SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SOCCLK__SYSHUB_MGCG_EN_SOCCLK_MASK; + + if (def != data) + nbio_7_0_write_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SOCCLK, data); + + /* SYSHUB_MGCG_CTRL_SHUBCLK */ + def = data = nbio_7_0_read_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SHUBCLK); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_MGCG)) + data |= SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SHUBCLK__SYSHUB_MGCG_EN_SHUBCLK_MASK; + else + data &= ~SYSHUB_MMREG_DIRECT_SYSHUB_MGCG_CTRL_SHUBCLK__SYSHUB_MGCG_EN_SHUBCLK_MASK; + + if (def != data) + nbio_7_0_write_syshub_ind_mmr(adev, ixSYSHUB_MMREG_IND_SYSHUB_MGCG_CTRL_SHUBCLK, data); +} + +void nbio_v7_0_ih_control(struct amdgpu_device *adev) +{ + u32 interrupt_cntl; + + /* setup interrupt control */ + WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8); + interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL); + /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi + * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN + */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0); + /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0); + WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl); +} + +struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg; +struct nbio_pcie_index_data nbio_v7_0_pcie_index_data; + +int nbio_v7_0_init(struct amdgpu_device *adev) +{ + nbio_v7_0_hdp_flush_reg.hdp_flush_req_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ); + nbio_v7_0_hdp_flush_reg.hdp_flush_done_offset = SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE); + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK; + nbio_v7_0_hdp_flush_reg.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK; + + nbio_v7_0_pcie_index_data.index_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2); + nbio_v7_0_pcie_index_data.data_offset = SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h new file mode 100644 index 000000000000..054ff49427e6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.h @@ -0,0 +1,49 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NBIO_V7_0_H__ +#define __NBIO_V7_0_H__ + +#include "soc15_common.h" + +extern struct nbio_hdp_flush_reg nbio_v7_0_hdp_flush_reg; +extern struct nbio_pcie_index_data nbio_v7_0_pcie_index_data; +int nbio_v7_0_init(struct amdgpu_device *adev); +u32 nbio_v7_0_get_atombios_scratch_regs(struct amdgpu_device *adev, + uint32_t idx); +void nbio_v7_0_set_atombios_scratch_regs(struct amdgpu_device *adev, + uint32_t idx, uint32_t val); +void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable); +void nbio_v7_0_hdp_flush(struct amdgpu_device *adev); +u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev); +void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index); +void nbio_v7_0_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable); +void nbio_v7_0_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index); +void nbio_v7_0_ih_control(struct amdgpu_device *adev); +u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev); +void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c new file mode 100644 index 000000000000..20c1e539ff35 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -0,0 +1,308 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ + +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v10_0.h" + +#include "vega10/soc15ip.h" +#include "raven1/MP/mp_10_0_offset.h" +#include "raven1/GC/gc_9_1_offset.h" +#include "raven1/SDMA0/sdma0_4_1_offset.h" + +static int +psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) +{ + switch(ucode->ucode_id) { + case AMDGPU_UCODE_ID_SDMA0: + *type = GFX_FW_TYPE_SDMA0; + break; + case AMDGPU_UCODE_ID_SDMA1: + *type = GFX_FW_TYPE_SDMA1; + break; + case AMDGPU_UCODE_ID_CP_CE: + *type = GFX_FW_TYPE_CP_CE; + break; + case AMDGPU_UCODE_ID_CP_PFP: + *type = GFX_FW_TYPE_CP_PFP; + break; + case AMDGPU_UCODE_ID_CP_ME: + *type = GFX_FW_TYPE_CP_ME; + break; + case AMDGPU_UCODE_ID_CP_MEC1: + *type = GFX_FW_TYPE_CP_MEC; + break; + case AMDGPU_UCODE_ID_CP_MEC1_JT: + *type = GFX_FW_TYPE_CP_MEC_ME1; + break; + case AMDGPU_UCODE_ID_CP_MEC2: + *type = GFX_FW_TYPE_CP_MEC; + break; + case AMDGPU_UCODE_ID_CP_MEC2_JT: + *type = GFX_FW_TYPE_CP_MEC_ME2; + break; + case AMDGPU_UCODE_ID_RLC_G: + *type = GFX_FW_TYPE_RLC_G; + break; + case AMDGPU_UCODE_ID_SMC: + *type = GFX_FW_TYPE_SMU; + break; + case AMDGPU_UCODE_ID_UVD: + *type = GFX_FW_TYPE_UVD; + break; + case AMDGPU_UCODE_ID_VCE: + *type = GFX_FW_TYPE_VCE; + break; + case AMDGPU_UCODE_ID_MAXIMUM: + default: + return -EINVAL; + } + + return 0; +} + +int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd) +{ + int ret; + uint64_t fw_mem_mc_addr = ucode->mc_addr; + struct common_firmware_header *header; + + memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + header = (struct common_firmware_header *)ucode->fw; + + cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; + cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = (uint32_t)fw_mem_mc_addr; + cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = (uint32_t)((uint64_t)fw_mem_mc_addr >> 32); + cmd->cmd.cmd_load_ip_fw.fw_size = le32_to_cpu(header->ucode_size_bytes); + + ret = psp_v10_0_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); + if (ret) + DRM_ERROR("Unknown firmware type\n"); + + return ret; +} + +int psp_v10_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring; + struct amdgpu_device *adev = psp->adev; + + ring = &psp->km_ring; + + ring->ring_type = ring_type; + + /* allocate 4k Page of Local Frame Buffer memory for ring */ + ring->ring_size = 0x1000; + ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + if (ret) { + ring->ring_size = 0; + return ret; + } + + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + /* Wait for response flag (bit 31) in C2PMSG_64 */ + psp_ring_reg = 0; + while ((psp_ring_reg & 0x80000000) == 0) { + psp_ring_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64); + } + + return 0; +} + +int psp_v10_0_cmd_submit(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, + int index) +{ + unsigned int psp_write_ptr_reg = 0; + struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + /* KM (GPCOM) prepare write pointer */ + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + + /* Update KM RB frame pointer to new frame */ + if ((psp_write_ptr_reg % ring->ring_size) == 0) + write_frame = ring->ring_mem; + else + write_frame = ring->ring_mem + (psp_write_ptr_reg / (sizeof(struct psp_gfx_rb_frame) / 4)); + + /* Update KM RB frame */ + write_frame->cmd_buf_addr_hi = (unsigned int)(cmd_buf_mc_addr >> 32); + write_frame->cmd_buf_addr_lo = (unsigned int)(cmd_buf_mc_addr); + write_frame->fence_addr_hi = (unsigned int)(fence_mc_addr >> 32); + write_frame->fence_addr_lo = (unsigned int)(fence_mc_addr); + write_frame->fence_value = index; + + /* Update the write Pointer in DWORDs */ + psp_write_ptr_reg += sizeof(struct psp_gfx_rb_frame) / 4; + psp_write_ptr_reg = (psp_write_ptr_reg >= ring->ring_size) ? 0 : psp_write_ptr_reg; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); + + return 0; +} + +static int +psp_v10_0_sram_map(unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, + unsigned int *sram_data_reg_offset, + enum AMDGPU_UCODE_ID ucode_id) +{ + int ret = 0; + + switch(ucode_id) { +/* TODO: needs to confirm */ +#if 0 + case AMDGPU_UCODE_ID_SMC: + *sram_offset = 0; + *sram_addr_reg_offset = 0; + *sram_data_reg_offset = 0; + break; +#endif + + case AMDGPU_UCODE_ID_CP_CE: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_PFP: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_ME: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_MEC1: + *sram_offset = 0x10000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_MEC2: + *sram_offset = 0x10000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_RLC_G: + *sram_offset = 0x2000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_SDMA0: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA); + break; + +/* TODO: needs to confirm */ +#if 0 + case AMDGPU_UCODE_ID_SDMA1: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; + + case AMDGPU_UCODE_ID_UVD: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; + + case AMDGPU_UCODE_ID_VCE: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; +#endif + + case AMDGPU_UCODE_ID_MAXIMUM: + default: + ret = -EINVAL; + break; + } + + return ret; +} + +bool psp_v10_0_compare_sram_data(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + enum AMDGPU_UCODE_ID ucode_type) +{ + int err = 0; + unsigned int fw_sram_reg_val = 0; + unsigned int fw_sram_addr_reg_offset = 0; + unsigned int fw_sram_data_reg_offset = 0; + unsigned int ucode_size; + uint32_t *ucode_mem = NULL; + struct amdgpu_device *adev = psp->adev; + + err = psp_v10_0_sram_map(&fw_sram_reg_val, &fw_sram_addr_reg_offset, + &fw_sram_data_reg_offset, ucode_type); + if (err) + return false; + + WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val); + + ucode_size = ucode->ucode_size; + ucode_mem = (uint32_t *)ucode->kaddr; + while (!ucode_size) { + fw_sram_reg_val = RREG32(fw_sram_data_reg_offset); + + if (*ucode_mem != fw_sram_reg_val) + return false; + + ucode_mem++; + /* 4 bytes */ + ucode_size -= 4; + } + + return true; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h new file mode 100644 index 000000000000..2022b7b7151e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h @@ -0,0 +1,41 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Huang Rui + * + */ +#ifndef __PSP_V10_0_H__ +#define __PSP_V10_0_H__ + +#include "amdgpu_psp.h" + +extern int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, + struct psp_gfx_cmd_resp *cmd); +extern int psp_v10_0_ring_init(struct psp_context *psp, + enum psp_ring_type ring_type); +extern int psp_v10_0_cmd_submit(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, + int index); +extern bool psp_v10_0_compare_sram_data(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + enum AMDGPU_UCODE_ID ucode_type); +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index eef89abc0cee..6e5c6edabb84 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -172,7 +172,7 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) /* Check sOS sign of life register to confirm sys driver and sOS * are already been loaded. */ - sol_reg = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)); + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); if (sol_reg) return 0; @@ -188,10 +188,10 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); /* Provide the sys driver to bootrom */ - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36), + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); psp_gfxdrv_command_reg = 1 << 16; - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); /* there might be handshake issue with hardware which needs delay */ @@ -213,7 +213,7 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp) /* Check sOS sign of life register to confirm sys driver and sOS * are already been loaded. */ - sol_reg = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)); + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); if (sol_reg) return 0; @@ -229,17 +229,17 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp) memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); /* Provide the PSP secure OS to bootrom */ - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_36), + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); psp_gfxdrv_command_reg = 2 << 16; - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, psp_gfxdrv_command_reg); /* there might be handshake issue with hardware which needs delay */ mdelay(20); #if 0 ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), - RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81)), + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), 0, true); #endif @@ -299,17 +299,17 @@ int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type) /* Write low address of the ring to C2PMSG_69 */ psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_69), psp_ring_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); /* Write high address of the ring to C2PMSG_70 */ psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_70), psp_ring_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); /* Write size of ring to C2PMSG_71 */ psp_ring_reg = ring->ring_size; - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_71), psp_ring_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); /* Write the ring initialization command to C2PMSG_64 */ psp_ring_reg = ring_type; psp_ring_reg = psp_ring_reg << 16; - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), psp_ring_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); /* there might be handshake issue with hardware which needs delay */ mdelay(20); @@ -332,7 +332,7 @@ int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type) /* Write the ring destroy command to C2PMSG_64 */ psp_ring_reg = 3 << 16; - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), psp_ring_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); /* there might be handshake issue with hardware which needs delay */ mdelay(20); @@ -361,7 +361,7 @@ int psp_v3_1_cmd_submit(struct psp_context *psp, uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; /* KM (GPCOM) prepare write pointer */ - psp_write_ptr_reg = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_67)); + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); /* Update KM RB frame pointer to new frame */ /* write_frame ptr increments by size of rb_frame in bytes */ @@ -383,7 +383,7 @@ int psp_v3_1_cmd_submit(struct psp_context *psp, /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; - WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_67), psp_write_ptr_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); return 0; } @@ -515,7 +515,7 @@ bool psp_v3_1_smu_reload_quirk(struct psp_context *psp) uint32_t reg; reg = smnMP1_FIRMWARE_FLAGS | 0x03b00000; - WREG32(SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2), reg); - reg = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2)); + WREG32_SOC15(NBIO, 0, mmPCIE_INDEX2, reg); + reg = RREG32_SOC15(NBIO, 0, mmPCIE_DATA2); return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index a69e5d4e1d2a..1d766ae98dc8 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -643,8 +643,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); /* Initialize the ring buffer's read and write pointers */ + ring->wptr = 0; WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0); - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0); + sdma_v3_0_ring_set_wptr(ring); WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0); WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0); @@ -659,9 +660,6 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8); WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40); - ring->wptr = 0; - WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); - doorbell = RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]); if (ring->use_doorbell) { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index ecc70a730a54..4a65697ccc94 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -35,6 +35,7 @@ #include "vega10/MMHUB/mmhub_1_0_offset.h" #include "vega10/MMHUB/mmhub_1_0_sh_mask.h" #include "vega10/HDP/hdp_4_0_offset.h" +#include "raven1/SDMA0/sdma0_4_1_default.h" #include "soc15_common.h" #include "soc15.h" @@ -42,6 +43,10 @@ MODULE_FIRMWARE("amdgpu/vega10_sdma.bin"); MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin"); +MODULE_FIRMWARE("amdgpu/raven_sdma.bin"); + +#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L +#define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); @@ -82,6 +87,26 @@ static const u32 golden_settings_sdma_vg10[] = { SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00104002 }; +static const u32 golden_settings_sdma_4_1[] = +{ + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CHICKEN_BITS), 0xfe931f07, 0x02831f07, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), 0xffffffff, 0x3f000100, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_IB_CNTL), 0x800f0111, 0x00000100, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), 0xfc3fffff, 0x40000051, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_IB_CNTL), 0x800f0111, 0x00000100, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_IB_CNTL), 0x800f0111, 0x00000100, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL), 0xfffffff7, 0x00403000, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UTCL1_PAGE), 0x000003ff, 0x000003c0 +}; + +static const u32 golden_settings_sdma_rv1[] = +{ + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG), 0x0018773f, 0x00000002, + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ), 0x0018773f, 0x00000002 +}; + static u32 sdma_v4_0_get_reg_offset(u32 instance, u32 internal_offset) { u32 base = 0; @@ -112,25 +137,19 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) golden_settings_sdma_vg10, (const u32)ARRAY_SIZE(golden_settings_sdma_vg10)); break; + case CHIP_RAVEN: + amdgpu_program_register_sequence(adev, + golden_settings_sdma_4_1, + (const u32)ARRAY_SIZE(golden_settings_sdma_4_1)); + amdgpu_program_register_sequence(adev, + golden_settings_sdma_rv1, + (const u32)ARRAY_SIZE(golden_settings_sdma_rv1)); + break; default: break; } } -static void sdma_v4_0_print_ucode_regs(void *handle) -{ - int i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - dev_info(adev->dev, "VEGA10 SDMA ucode registers\n"); - for (i = 0; i < adev->sdma.num_instances; i++) { - dev_info(adev->dev, " SDMA%d_UCODE_ADDR=0x%08X\n", - i, RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR))); - dev_info(adev->dev, " SDMA%d_UCODE_CHECKSUM=0x%08X\n", - i, RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_CHECKSUM))); - } -} - /** * sdma_v4_0_init_microcode - load ucode images from disk * @@ -158,6 +177,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) case CHIP_VEGA10: chip_name = "vega10"; break; + case CHIP_RAVEN: + chip_name = "raven"; + break; default: BUG(); } @@ -350,7 +372,9 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) u32 ref_and_mask = 0; struct nbio_hdp_flush_reg *nbio_hf_reg; - if (ring->adev->asic_type == CHIP_VEGA10) + if (ring->adev->flags & AMD_IS_APU) + nbio_hf_reg = &nbio_v7_0_hdp_flush_reg; + else nbio_hf_reg = &nbio_v6_1_hdp_flush_reg; if (ring == &ring->adev->sdma.instance[0].ring) @@ -581,7 +605,10 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) } WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL), doorbell); WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); - nbio_v6_1_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); + if (adev->flags & AMD_IS_APU) + nbio_v7_0_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); + else + nbio_v6_1_sdma_doorbell_range(adev, i, ring->use_doorbell, ring->doorbell_index); if (amdgpu_sriov_vf(adev)) sdma_v4_0_ring_set_wptr(ring); @@ -633,6 +660,69 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) return 0; } +static void +sdma_v4_1_update_power_gating(struct amdgpu_device *adev, bool enable) +{ + uint32_t def, data; + + if (enable && (adev->pg_flags & AMD_PG_SUPPORT_SDMA)) { + /* disable idle interrupt */ + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL)); + data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK; + + if (data != def) + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data); + } else { + /* disable idle interrupt */ + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL)); + data &= ~SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK; + if (data != def) + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data); + } +} + +static void sdma_v4_1_init_power_gating(struct amdgpu_device *adev) +{ + uint32_t def, data; + + /* Enable HW based PG. */ + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); + data |= SDMA0_POWER_CNTL__PG_CNTL_ENABLE_MASK; + if (data != def) + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); + + /* enable interrupt */ + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL)); + data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK; + if (data != def) + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL), data); + + /* Configure hold time to filter in-valid power on/off request. Use default right now */ + def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL)); + data &= ~SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK; + data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK); + /* Configure switch time for hysteresis purpose. Use default right now */ + data &= ~SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK; + data |= (mmSDMA0_POWER_CNTL_DEFAULT & SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK); + if(data != def) + WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data); +} + +static void sdma_v4_0_init_pg(struct amdgpu_device *adev) +{ + if (!(adev->pg_flags & AMD_PG_SUPPORT_SDMA)) + return; + + switch (adev->asic_type) { + case CHIP_RAVEN: + sdma_v4_1_init_power_gating(adev); + sdma_v4_1_update_power_gating(adev, true); + break; + default: + break; + } +} + /** * sdma_v4_0_rlc_resume - setup and start the async dma engines * @@ -643,7 +733,8 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) */ static int sdma_v4_0_rlc_resume(struct amdgpu_device *adev) { - /* XXX todo */ + sdma_v4_0_init_pg(adev); + return 0; } @@ -699,8 +790,6 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); } - sdma_v4_0_print_ucode_regs(adev); - return 0; } @@ -726,7 +815,6 @@ static int sdma_v4_0_start(struct amdgpu_device *adev) } if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - DRM_INFO("Loading via direct write\n"); r = sdma_v4_0_load_microcode(adev); if (r) return r; @@ -764,8 +852,6 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) u32 tmp; u64 gpu_addr; - DRM_INFO("In Ring test func\n"); - r = amdgpu_wb_get(adev, &index); if (r) { dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); @@ -1038,9 +1124,8 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring, uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); unsigned eng = ring->vm_inv_eng; - pd_addr = pd_addr | 0x1; /* valid bit */ - /* now only use physical base address of PDE and valid */ - BUG_ON(pd_addr & 0xFFFF00000000003EULL); + pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); + pd_addr |= AMDGPU_PTE_VALID; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) | SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf)); @@ -1074,7 +1159,10 @@ static int sdma_v4_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->sdma.num_instances = 2; + if (adev->asic_type == CHIP_RAVEN) + adev->sdma.num_instances = 1; + else + adev->sdma.num_instances = 2; sdma_v4_0_set_ring_funcs(adev); sdma_v4_0_set_buffer_funcs(adev); @@ -1406,6 +1494,7 @@ static int sdma_v4_0_set_clockgating_state(void *handle, switch (adev->asic_type) { case CHIP_VEGA10: + case CHIP_RAVEN: sdma_v4_0_update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); sdma_v4_0_update_medium_grain_light_sleep(adev, @@ -1420,6 +1509,17 @@ static int sdma_v4_0_set_clockgating_state(void *handle, static int sdma_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + switch (adev->asic_type) { + case CHIP_RAVEN: + sdma_v4_1_update_power_gating(adev, + state == AMD_PG_STATE_GATE ? true : false); + break; + default: + break; + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 2431639baf47..f45fb0f022b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -971,44 +971,44 @@ static void si_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) } static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { - {GRBM_STATUS, false}, - {GB_ADDR_CONFIG, false}, - {MC_ARB_RAMCFG, false}, - {GB_TILE_MODE0, false}, - {GB_TILE_MODE1, false}, - {GB_TILE_MODE2, false}, - {GB_TILE_MODE3, false}, - {GB_TILE_MODE4, false}, - {GB_TILE_MODE5, false}, - {GB_TILE_MODE6, false}, - {GB_TILE_MODE7, false}, - {GB_TILE_MODE8, false}, - {GB_TILE_MODE9, false}, - {GB_TILE_MODE10, false}, - {GB_TILE_MODE11, false}, - {GB_TILE_MODE12, false}, - {GB_TILE_MODE13, false}, - {GB_TILE_MODE14, false}, - {GB_TILE_MODE15, false}, - {GB_TILE_MODE16, false}, - {GB_TILE_MODE17, false}, - {GB_TILE_MODE18, false}, - {GB_TILE_MODE19, false}, - {GB_TILE_MODE20, false}, - {GB_TILE_MODE21, false}, - {GB_TILE_MODE22, false}, - {GB_TILE_MODE23, false}, - {GB_TILE_MODE24, false}, - {GB_TILE_MODE25, false}, - {GB_TILE_MODE26, false}, - {GB_TILE_MODE27, false}, - {GB_TILE_MODE28, false}, - {GB_TILE_MODE29, false}, - {GB_TILE_MODE30, false}, - {GB_TILE_MODE31, false}, - {CC_RB_BACKEND_DISABLE, false, true}, - {GC_USER_RB_BACKEND_DISABLE, false, true}, - {PA_SC_RASTER_CONFIG, false, true}, + {GRBM_STATUS}, + {GB_ADDR_CONFIG}, + {MC_ARB_RAMCFG}, + {GB_TILE_MODE0}, + {GB_TILE_MODE1}, + {GB_TILE_MODE2}, + {GB_TILE_MODE3}, + {GB_TILE_MODE4}, + {GB_TILE_MODE5}, + {GB_TILE_MODE6}, + {GB_TILE_MODE7}, + {GB_TILE_MODE8}, + {GB_TILE_MODE9}, + {GB_TILE_MODE10}, + {GB_TILE_MODE11}, + {GB_TILE_MODE12}, + {GB_TILE_MODE13}, + {GB_TILE_MODE14}, + {GB_TILE_MODE15}, + {GB_TILE_MODE16}, + {GB_TILE_MODE17}, + {GB_TILE_MODE18}, + {GB_TILE_MODE19}, + {GB_TILE_MODE20}, + {GB_TILE_MODE21}, + {GB_TILE_MODE22}, + {GB_TILE_MODE23}, + {GB_TILE_MODE24}, + {GB_TILE_MODE25}, + {GB_TILE_MODE26}, + {GB_TILE_MODE27}, + {GB_TILE_MODE28}, + {GB_TILE_MODE29}, + {GB_TILE_MODE30}, + {GB_TILE_MODE31}, + {CC_RB_BACKEND_DISABLE, true}, + {GC_USER_RB_BACKEND_DISABLE, true}, + {PA_SC_RASTER_CONFIG, true}, }; static uint32_t si_get_register_value(struct amdgpu_device *adev, @@ -1093,13 +1093,13 @@ static int si_read_register(struct amdgpu_device *adev, u32 se_num, *value = 0; for (i = 0; i < ARRAY_SIZE(si_allowed_read_registers); i++) { + bool indexed = si_allowed_read_registers[i].grbm_indexed; + if (reg_offset != si_allowed_read_registers[i].reg_offset) continue; - if (!si_allowed_read_registers[i].untouched) - *value = si_get_register_value(adev, - si_allowed_read_registers[i].grbm_indexed, - se_num, sh_num, reg_offset); + *value = si_get_register_value(adev, indexed, se_num, sh_num, + reg_offset); return 0; } return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index e945f8b07487..5fdb05a0c88a 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -57,6 +57,7 @@ #include "sdma_v4_0.h" #include "uvd_v7_0.h" #include "vce_v4_0.h" +#include "vcn_v1_0.h" #include "amdgpu_powerplay.h" #include "dce_virtual.h" #include "mxgpu_ai.h" @@ -104,10 +105,10 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg) u32 r; struct nbio_pcie_index_data *nbio_pcie_id; - if (adev->asic_type == CHIP_VEGA10) - nbio_pcie_id = &nbio_v6_1_pcie_index_data; + if (adev->flags & AMD_IS_APU) + nbio_pcie_id = &nbio_v7_0_pcie_index_data; else - BUG(); + nbio_pcie_id = &nbio_v6_1_pcie_index_data; address = nbio_pcie_id->index_offset; data = nbio_pcie_id->data_offset; @@ -125,10 +126,10 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) unsigned long flags, address, data; struct nbio_pcie_index_data *nbio_pcie_id; - if (adev->asic_type == CHIP_VEGA10) - nbio_pcie_id = &nbio_v6_1_pcie_index_data; + if (adev->flags & AMD_IS_APU) + nbio_pcie_id = &nbio_v7_0_pcie_index_data; else - BUG(); + nbio_pcie_id = &nbio_v6_1_pcie_index_data; address = nbio_pcie_id->index_offset; data = nbio_pcie_id->data_offset; @@ -199,13 +200,20 @@ static void soc15_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v) static u32 soc15_get_config_memsize(struct amdgpu_device *adev) { - return nbio_v6_1_get_memsize(adev); + if (adev->flags & AMD_IS_APU) + return nbio_v7_0_get_memsize(adev); + else + return nbio_v6_1_get_memsize(adev); } static const u32 vega10_golden_init[] = { }; +static const u32 raven_golden_init[] = +{ +}; + static void soc15_init_golden_registers(struct amdgpu_device *adev) { /* Some of the registers might be dependent on GRBM_GFX_INDEX */ @@ -217,6 +225,11 @@ static void soc15_init_golden_registers(struct amdgpu_device *adev) vega10_golden_init, (const u32)ARRAY_SIZE(vega10_golden_init)); break; + case CHIP_RAVEN: + amdgpu_program_register_sequence(adev, + raven_golden_init, + (const u32)ARRAY_SIZE(raven_golden_init)); + break; default: break; } @@ -280,29 +293,25 @@ static bool soc15_read_bios_from_rom(struct amdgpu_device *adev, return true; } -static struct amdgpu_allowed_register_entry vega10_allowed_read_registers[] = { - /* todo */ -}; - static struct amdgpu_allowed_register_entry soc15_allowed_read_registers[] = { - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS), false}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2), false}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE0), false}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE1), false}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE2), false}, - { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE3), false}, - { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_STATUS_REG), false}, - { SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_STATUS_REG), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_STAT), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT1), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT2), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT3), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_BUSY_STAT), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STALLED_STAT1), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STATUS), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STALLED_STAT1), false}, - { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STATUS), false}, - { SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG), false}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2)}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE0)}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE1)}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE2)}, + { SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE3)}, + { SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_STATUS_REG)}, + { SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_STATUS_REG)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_STAT)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT1)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT2)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT3)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_BUSY_STAT)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STALLED_STAT1)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STATUS)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STALLED_STAT1)}, + { SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STATUS)}, + { SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)}, }; static uint32_t soc15_read_indexed_register(struct amdgpu_device *adev, u32 se_num, @@ -341,41 +350,16 @@ static uint32_t soc15_get_register_value(struct amdgpu_device *adev, static int soc15_read_register(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 reg_offset, u32 *value) { - struct amdgpu_allowed_register_entry *asic_register_table = NULL; - struct amdgpu_allowed_register_entry *asic_register_entry; - uint32_t size, i; + uint32_t i; *value = 0; - switch (adev->asic_type) { - case CHIP_VEGA10: - asic_register_table = vega10_allowed_read_registers; - size = ARRAY_SIZE(vega10_allowed_read_registers); - break; - default: - return -EINVAL; - } - - if (asic_register_table) { - for (i = 0; i < size; i++) { - asic_register_entry = asic_register_table + i; - if (reg_offset != asic_register_entry->reg_offset) - continue; - if (!asic_register_entry->untouched) - *value = soc15_get_register_value(adev, - asic_register_entry->grbm_indexed, - se_num, sh_num, reg_offset); - return 0; - } - } - for (i = 0; i < ARRAY_SIZE(soc15_allowed_read_registers); i++) { if (reg_offset != soc15_allowed_read_registers[i].reg_offset) continue; - if (!soc15_allowed_read_registers[i].untouched) - *value = soc15_get_register_value(adev, - soc15_allowed_read_registers[i].grbm_indexed, - se_num, sh_num, reg_offset); + *value = soc15_get_register_value(adev, + soc15_allowed_read_registers[i].grbm_indexed, + se_num, sh_num, reg_offset); return 0; } return -EINVAL; @@ -396,7 +380,10 @@ static void soc15_gpu_pci_config_reset(struct amdgpu_device *adev) /* wait for asic to come out of reset */ for (i = 0; i < adev->usec_timeout; i++) { - if (nbio_v6_1_get_memsize(adev) != 0xffffffff) + u32 memsize = (adev->flags & AMD_IS_APU) ? + nbio_v7_0_get_memsize(adev) : + nbio_v6_1_get_memsize(adev); + if (memsize != 0xffffffff) break; udelay(1); } @@ -470,8 +457,12 @@ static void soc15_program_aspm(struct amdgpu_device *adev) static void soc15_enable_doorbell_aperture(struct amdgpu_device *adev, bool enable) { - nbio_v6_1_enable_doorbell_aperture(adev, enable); - nbio_v6_1_enable_doorbell_selfring_aperture(adev, enable); + if (adev->flags & AMD_IS_APU) { + nbio_v7_0_enable_doorbell_aperture(adev, enable); + } else { + nbio_v6_1_enable_doorbell_aperture(adev, enable); + nbio_v6_1_enable_doorbell_selfring_aperture(adev, enable); + } } static const struct amdgpu_ip_block_version vega10_common_ip_block = @@ -493,8 +484,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: amdgpu_ip_block_add(adev, &vega10_common_ip_block); - amdgpu_ip_block_add(adev, &gfxhub_v1_0_ip_block); - amdgpu_ip_block_add(adev, &mmhub_v1_0_ip_block); amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block); amdgpu_ip_block_add(adev, &vega10_ih_ip_block); if (amdgpu_fw_load_type == 2 || amdgpu_fw_load_type == -1) @@ -508,6 +497,18 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_ip_block_add(adev, &uvd_v7_0_ip_block); amdgpu_ip_block_add(adev, &vce_v4_0_ip_block); break; + case CHIP_RAVEN: + amdgpu_ip_block_add(adev, &vega10_common_ip_block); + amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block); + amdgpu_ip_block_add(adev, &vega10_ih_ip_block); + amdgpu_ip_block_add(adev, &psp_v10_0_ip_block); + amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block); + if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) + amdgpu_ip_block_add(adev, &dce_virtual_ip_block); + amdgpu_ip_block_add(adev, &gfx_v9_0_ip_block); + amdgpu_ip_block_add(adev, &sdma_v4_0_ip_block); + amdgpu_ip_block_add(adev, &vcn_v1_0_ip_block); + break; default: return -EINVAL; } @@ -517,7 +518,10 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) { - return nbio_v6_1_get_rev_id(adev); + if (adev->flags & AMD_IS_APU) + return nbio_v7_0_get_rev_id(adev); + else + return nbio_v6_1_get_rev_id(adev); } @@ -560,11 +564,6 @@ static int soc15_common_early_init(void *handle) (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP))) psp_enabled = true; - if (amdgpu_sriov_vf(adev)) { - amdgpu_virt_init_setting(adev); - xgpu_ai_mailbox_set_irq_funcs(adev); - } - /* * nbio need be used for both sdma and gfx9, but only * initializes once @@ -573,6 +572,9 @@ static int soc15_common_early_init(void *handle) case CHIP_VEGA10: nbio_v6_1_init(adev); break; + case CHIP_RAVEN: + nbio_v7_0_init(adev); + break; default: return -EINVAL; } @@ -603,11 +605,39 @@ static int soc15_common_early_init(void *handle) adev->pg_flags = 0; adev->external_rev_id = 0x1; break; + case CHIP_RAVEN: + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_DRM_MGCG | + AMD_CG_SUPPORT_DRM_LS | + AMD_CG_SUPPORT_ROM_MGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS; + adev->pg_flags = AMD_PG_SUPPORT_SDMA; + adev->external_rev_id = 0x1; + break; default: /* FIXME: not supported yet */ return -EINVAL; } + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_init_setting(adev); + xgpu_ai_mailbox_set_irq_funcs(adev); + } + adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); amdgpu_get_pcie_info(adev); @@ -825,6 +855,20 @@ static int soc15_common_set_clockgating_state(void *handle, soc15_update_df_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE ? true : false); break; + case CHIP_RAVEN: + nbio_v7_0_update_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + nbio_v6_1_update_medium_grain_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + soc15_update_hdp_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + soc15_update_drm_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + soc15_update_drm_light_sleep(adev, + state == AMD_CG_STATE_GATE ? true : false); + soc15_update_rom_medium_grain_clock_gating(adev, + state == AMD_CG_STATE_GATE ? true : false); + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 378a46da585a..acb3cdb119f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -25,6 +25,7 @@ #define __SOC15_H__ #include "nbio_v6_1.h" +#include "nbio_v7_0.h" extern const struct amd_ip_funcs soc15_common_ip_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index e8df6d820dbe..e2d330eed952 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -63,6 +63,13 @@ struct nbio_pcie_index_data { (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ (ip##_BASE__INST##inst##_SEG4 + reg)))))) +#define RREG32_SOC15_OFFSET(ip, inst, reg, offset) \ + RREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ + (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ + (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ + (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ + (ip##_BASE__INST##inst##_SEG4 + reg))))) + offset) + #define WREG32_SOC15(ip, inst, reg, value) \ WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ @@ -70,6 +77,13 @@ struct nbio_pcie_index_data { (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ (ip##_BASE__INST##inst##_SEG4 + reg))))), value) +#define WREG32_SOC15_OFFSET(ip, inst, reg, offset, value) \ + WREG32( (0 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG0 + reg : \ + (1 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG1 + reg : \ + (2 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG2 + reg : \ + (3 == reg##_BASE_IDX ? ip##_BASE__INST##inst##_SEG3 + reg : \ + (ip##_BASE__INST##inst##_SEG4 + reg))))) + offset, value) + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 75403c7c8c9e..e79befd80eed 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -132,6 +132,7 @@ * 1 - pfp */ #define PACKET3_INDIRECT_BUFFER 0x3F +#define INDIRECT_BUFFER_VALID (1 << 23) #define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) /* 0 - LRU * 1 - Stream @@ -259,8 +260,97 @@ #define PACKET3_WAIT_ON_CE_COUNTER 0x86 #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 #define PACKET3_SWITCH_BUFFER 0x8B +#define PACKET3_FRAME_CONTROL 0x90 +# define FRAME_CMD(x) ((x) << 28) + /* + * x=0: tmz_begin + * x=1: tmz_end + */ + #define PACKET3_SET_RESOURCES 0xA0 +/* 1. header + * 2. CONTROL + * 3. QUEUE_MASK_LO [31:0] + * 4. QUEUE_MASK_HI [31:0] + * 5. GWS_MASK_LO [31:0] + * 6. GWS_MASK_HI [31:0] + * 7. OAC_MASK [15:0] + * 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0] + */ +# define PACKET3_SET_RESOURCES_VMID_MASK(x) ((x) << 0) +# define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16) +# define PACKET3_SET_RESOURCES_QUEUE_TYPE(x) ((x) << 29) #define PACKET3_MAP_QUEUES 0xA2 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. MQD_ADDR_LO [31:0] + * 5. MQD_ADDR_HI [31:0] + * 6. WPTR_ADDR_LO [31:0] + * 7. WPTR_ADDR_HI [31:0] + */ +/* CONTROL */ +# define PACKET3_MAP_QUEUES_QUEUE_SEL(x) ((x) << 4) +# define PACKET3_MAP_QUEUES_VMID(x) ((x) << 8) +# define PACKET3_MAP_QUEUES_QUEUE(x) ((x) << 13) +# define PACKET3_MAP_QUEUES_PIPE(x) ((x) << 16) +# define PACKET3_MAP_QUEUES_ME(x) ((x) << 18) +# define PACKET3_MAP_QUEUES_QUEUE_TYPE(x) ((x) << 21) +# define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x) ((x) << 24) +# define PACKET3_MAP_QUEUES_ENGINE_SEL(x) ((x) << 26) +# define PACKET3_MAP_QUEUES_NUM_QUEUES(x) ((x) << 29) +/* CONTROL2 */ +# define PACKET3_MAP_QUEUES_CHECK_DISABLE(x) ((x) << 1) +# define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2) +#define PACKET3_UNMAP_QUEUES 0xA3 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. CONTROL3 + * 5. CONTROL4 + * 6. CONTROL5 + */ +/* CONTROL */ +# define PACKET3_UNMAP_QUEUES_ACTION(x) ((x) << 0) + /* 0 - PREEMPT_QUEUES + * 1 - RESET_QUEUES + * 2 - DISABLE_PROCESS_QUEUES + * 3 - PREEMPT_QUEUES_NO_UNMAP + */ +# define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x) ((x) << 4) +# define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x) ((x) << 26) +# define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x) ((x) << 29) +/* CONTROL2a */ +# define PACKET3_UNMAP_QUEUES_PASID(x) ((x) << 0) +/* CONTROL2b */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2) +/* CONTROL3a */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2) +/* CONTROL3b */ +# define PACKET3_UNMAP_QUEUES_RB_WPTR(x) ((x) << 0) +/* CONTROL4 */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2) +/* CONTROL5 */ +# define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2) +#define PACKET3_QUERY_STATUS 0xA4 +/* 1. header + * 2. CONTROL + * 3. CONTROL2 + * 4. ADDR_LO [31:0] + * 5. ADDR_HI [31:0] + * 6. DATA_LO [31:0] + * 7. DATA_HI [31:0] + */ +/* CONTROL */ +# define PACKET3_QUERY_STATUS_CONTEXT_ID(x) ((x) << 0) +# define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x) ((x) << 28) +# define PACKET3_QUERY_STATUS_COMMAND(x) ((x) << 30) +/* CONTROL2a */ +# define PACKET3_QUERY_STATUS_PASID(x) ((x) << 0) +/* CONTROL2b */ +# define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x) ((x) << 2) +# define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) + #define VCE_CMD_NO_OP 0x00000000 #define VCE_CMD_END 0x00000001 diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index eca8f6e01e97..987b958368ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -58,7 +58,7 @@ static uint64_t uvd_v7_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR)); + return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); } /** @@ -73,9 +73,9 @@ static uint64_t uvd_v7_0_enc_ring_get_rptr(struct amdgpu_ring *ring) struct amdgpu_device *adev = ring->adev; if (ring == &adev->uvd.ring_enc[0]) - return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR)); + return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); else - return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR2)); + return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); } /** @@ -89,7 +89,7 @@ static uint64_t uvd_v7_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR)); + return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR); } /** @@ -107,9 +107,9 @@ static uint64_t uvd_v7_0_enc_ring_get_wptr(struct amdgpu_ring *ring) return adev->wb.wb[ring->wptr_offs]; if (ring == &adev->uvd.ring_enc[0]) - return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR)); + return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); else - return RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR2)); + return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); } /** @@ -123,7 +123,7 @@ static void uvd_v7_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR), lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } /** @@ -145,10 +145,10 @@ static void uvd_v7_0_enc_ring_set_wptr(struct amdgpu_ring *ring) } if (ring == &adev->uvd.ring_enc[0]) - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR), + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); else - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR2), + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); } @@ -562,7 +562,13 @@ static int uvd_v7_0_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring = &adev->uvd.ring; - uvd_v7_0_stop(adev); + if (!amdgpu_sriov_vf(adev)) + uvd_v7_0_stop(adev); + else { + /* full access mode, so don't touch any UVD register */ + DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); + } + ring->ready = false; return 0; @@ -611,46 +617,46 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev) uint32_t offset; if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); offset = 0; } else { - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, lower_32_bits(adev->uvd.gpu_addr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, upper_32_bits(adev->uvd.gpu_addr)); offset = size; } - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0), + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, AMDGPU_UVD_FIRMWARE_OFFSET >> 3); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE0), size); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, lower_32_bits(adev->uvd.gpu_addr + offset)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, upper_32_bits(adev->uvd.gpu_addr + offset)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1), (1 << 21)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_UVD_HEAP_SIZE); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, (1 << 21)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_UVD_HEAP_SIZE); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, lower_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, upper_32_bits(adev->uvd.gpu_addr + offset + AMDGPU_UVD_HEAP_SIZE)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2), (2 << 21)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CACHE_SIZE2), + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, (2 << 21)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_UVD_STACK_SIZE + (AMDGPU_UVD_SESSION_SIZE * 40)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_ADDR_CONFIG), + WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG), + WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG), + WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH4), adev->uvd.max_handles); + WREG32_SOC15(UVD, 0, mmUVD_GP_SCRATCH4, adev->uvd.max_handles); } static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, @@ -664,29 +670,29 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, size = header->header_size + header->vce_table_size + header->uvd_table_size; /* 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of memory descriptor location */ - WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO), lower_32_bits(addr)); - WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI), upper_32_bits(addr)); + WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr)); + WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr)); /* 2, update vmid of descriptor */ - data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID)); + data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_VMID); data &= ~VCE_MMSCH_VF_VMID__VF_CTX_VMID_MASK; data |= (0 << VCE_MMSCH_VF_VMID__VF_CTX_VMID__SHIFT); /* use domain0 for MM scheduler */ - WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_VMID), data); + WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_VMID, data); /* 3, notify mmsch about the size of this descriptor */ - WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE), size); + WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_CTX_SIZE, size); /* 4, set resp to zero */ - WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP), 0); + WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP, 0); /* 5, kick off the initialization and wait until VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero */ - WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST), 0x10000001); + WREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_HOST, 0x10000001); - data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); + data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP); loop = 1000; while ((data & 0x10000002) != 0x10000002) { udelay(10); - data = RREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP)); + data = RREG32_SOC15(VCE, 0, mmVCE_MMSCH_VF_MAILBOX_RESP); loop--; if (!loop) break; @@ -696,6 +702,7 @@ static int uvd_v7_0_mmsch_start(struct amdgpu_device *adev, dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); return -EBUSY; } + WDOORBELL32(adev->uvd.ring_enc[0].doorbell_index, 0); return 0; } @@ -928,7 +935,7 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) mdelay(1); /* put LMI, VCPU, RBC etc... into reset */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | @@ -940,7 +947,7 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) mdelay(5); /* initialize UVD memory controller */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL), + WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | @@ -953,23 +960,23 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) lmi_swap_cntl = 0xa; mp_swap_cntl = 0; #endif - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_SWAP_CNTL), lmi_swap_cntl); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MP_SWAP_CNTL), mp_swap_cntl); + WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); + WREG32_SOC15(UVD, 0, mmUVD_MP_SWAP_CNTL, mp_swap_cntl); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA0), 0x40c2040); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXA1), 0x0); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB0), 0x40c2040); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUXB1), 0x0); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_ALU), 0); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_MPC_SET_MUX), 0x88); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88); /* take all subblocks out of reset, except VCPU */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); mdelay(5); /* enable VCPU clock */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK); /* enable UMC */ @@ -977,14 +984,14 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); /* boot up the VCPU */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0); + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0); mdelay(10); for (i = 0; i < 10; ++i) { uint32_t status; for (j = 0; j < 100; ++j) { - status = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS)); + status = RREG32_SOC15(UVD, 0, mmUVD_STATUS); if (status & 2) break; mdelay(10); @@ -1025,44 +1032,44 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), tmp); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); /* set the write pointer delay */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL), 0); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); /* set the wb address */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR), + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, (upper_32_bits(ring->gpu_addr) >> 2)); /* programm the RB_BASE for ring buffer */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW), + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH), + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); /* Initialize the ring buffer's read and write pointers */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR), 0); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); - ring->wptr = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_RPTR)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_WPTR), + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); ring = &adev->uvd.ring_enc[0]; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR), lower_32_bits(ring->wptr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR), lower_32_bits(ring->wptr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO), ring->gpu_addr); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI), upper_32_bits(ring->gpu_addr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE), ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); ring = &adev->uvd.ring_enc[1]; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_RPTR2), lower_32_bits(ring->wptr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_WPTR2), lower_32_bits(ring->wptr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_LO2), ring->gpu_addr); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_BASE_HI2), upper_32_bits(ring->gpu_addr)); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RB_SIZE2), ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); return 0; } @@ -1077,7 +1084,7 @@ static int uvd_v7_0_start(struct amdgpu_device *adev) static void uvd_v7_0_stop(struct amdgpu_device *adev) { /* force RBC into idle state */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0x11010101); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101); /* Stall UMC and register bus before resetting VCPU */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), @@ -1086,12 +1093,12 @@ static void uvd_v7_0_stop(struct amdgpu_device *adev) mdelay(1); /* put VCPU into reset */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); mdelay(5); /* disable VCPU clock */ - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0x0); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0); /* Unstall UMC and register bus */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, @@ -1196,7 +1203,7 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); + WREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", @@ -1208,7 +1215,7 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); + tmp = RREG32_SOC15(UVD, 0, mmUVD_CONTEXT_ID); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); @@ -1309,9 +1316,8 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring, uint32_t data0, data1, mask; unsigned eng = ring->vm_inv_eng; - pd_addr = pd_addr | 0x1; /* valid bit */ - /* now only use physical base address of PDE and valid */ - BUG_ON(pd_addr & 0xFFFF00000000003EULL); + pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); + pd_addr |= AMDGPU_PTE_VALID; data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; data1 = upper_32_bits(pd_addr); @@ -1350,9 +1356,8 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); unsigned eng = ring->vm_inv_eng; - pd_addr = pd_addr | 0x1; /* valid bit */ - /* now only use physical base address of PDE and valid */ - BUG_ON(pd_addr & 0xFFFF00000000003EULL); + pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); + pd_addr |= AMDGPU_PTE_VALID; amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE); amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); @@ -1408,8 +1413,8 @@ static bool uvd_v7_0_check_soft_reset(void *handle) if (REG_GET_FIELD(tmp, SRBM_STATUS, UVD_RQ_PENDING) || REG_GET_FIELD(tmp, SRBM_STATUS, UVD_BUSY) || - (RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS) & - AMDGPU_UVD_STATUS_BUSY_MASK))) + (RREG32_SOC15(UVD, 0, mmUVD_STATUS) & + AMDGPU_UVD_STATUS_BUSY_MASK)) srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); @@ -1516,9 +1521,9 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev) { uint32_t data, data1, data2, suvd_flags; - data = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL)); - data1 = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE)); - data2 = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_CTRL)); + data = RREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL); + data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE); + data2 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL); data &= ~(UVD_CGC_CTRL__CLK_OFF_DELAY_MASK | UVD_CGC_CTRL__CLK_GATE_DLY_TIMER_MASK); @@ -1562,18 +1567,18 @@ static void uvd_v7_0_set_sw_clock_gating(struct amdgpu_device *adev) UVD_SUVD_CGC_CTRL__SDB_MODE_MASK); data1 |= suvd_flags; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_CTRL), data); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_GATE), 0); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE), data1); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_CTRL), data2); + WREG32_SOC15(UVD, 0, mmUVD_CGC_CTRL, data); + WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, 0); + WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1); + WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_CTRL, data2); } static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) { uint32_t data, data1, cgc_flags, suvd_flags; - data = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_GATE)); - data1 = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE)); + data = RREG32_SOC15(UVD, 0, mmUVD_CGC_GATE); + data1 = RREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE); cgc_flags = UVD_CGC_GATE__SYS_MASK | UVD_CGC_GATE__UDEC_MASK | @@ -1605,8 +1610,8 @@ static void uvd_v7_0_set_hw_clock_gating(struct amdgpu_device *adev) data |= cgc_flags; data1 |= suvd_flags; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CGC_GATE), data); - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SUVD_CGC_GATE), data1); + WREG32_SOC15(UVD, 0, mmUVD_CGC_GATE, data); + WREG32_SOC15(UVD, 0, mmUVD_SUVD_CGC_GATE, data1); } static void uvd_v7_0_set_bypass_mode(struct amdgpu_device *adev, bool enable) @@ -1665,7 +1670,7 @@ static int uvd_v7_0_set_powergating_state(void *handle, if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD)) return 0; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), UVD_POWER_STATUS__UVD_PG_EN_MASK); + WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK); if (state == AMD_PG_STATE_GATE) { uvd_v7_0_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index fb0819359909..90332f55cfba 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -77,13 +77,26 @@ static int vce_v3_0_set_clockgating_state(void *handle, static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + u32 v; + + mutex_lock(&adev->grbm_idx_mutex); + if (adev->vce.harvest_config == 0 || + adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); + else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); if (ring == &adev->vce.ring[0]) - return RREG32(mmVCE_RB_RPTR); + v = RREG32(mmVCE_RB_RPTR); else if (ring == &adev->vce.ring[1]) - return RREG32(mmVCE_RB_RPTR2); + v = RREG32(mmVCE_RB_RPTR2); else - return RREG32(mmVCE_RB_RPTR3); + v = RREG32(mmVCE_RB_RPTR3); + + WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); + mutex_unlock(&adev->grbm_idx_mutex); + + return v; } /** @@ -96,13 +109,26 @@ static uint64_t vce_v3_0_ring_get_rptr(struct amdgpu_ring *ring) static uint64_t vce_v3_0_ring_get_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + u32 v; + + mutex_lock(&adev->grbm_idx_mutex); + if (adev->vce.harvest_config == 0 || + adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); + else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); if (ring == &adev->vce.ring[0]) - return RREG32(mmVCE_RB_WPTR); + v = RREG32(mmVCE_RB_WPTR); else if (ring == &adev->vce.ring[1]) - return RREG32(mmVCE_RB_WPTR2); + v = RREG32(mmVCE_RB_WPTR2); else - return RREG32(mmVCE_RB_WPTR3); + v = RREG32(mmVCE_RB_WPTR3); + + WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); + mutex_unlock(&adev->grbm_idx_mutex); + + return v; } /** @@ -116,12 +142,22 @@ static void vce_v3_0_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + mutex_lock(&adev->grbm_idx_mutex); + if (adev->vce.harvest_config == 0 || + adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE1) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(0)); + else if (adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) + WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(1)); + if (ring == &adev->vce.ring[0]) WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); else if (ring == &adev->vce.ring[1]) WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); else WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); + + WREG32(mmGRBM_GFX_INDEX, mmGRBM_GFX_INDEX_DEFAULT); + mutex_unlock(&adev->grbm_idx_mutex); } static void vce_v3_0_override_vce_clock_gating(struct amdgpu_device *adev, bool override) @@ -231,33 +267,38 @@ static int vce_v3_0_start(struct amdgpu_device *adev) struct amdgpu_ring *ring; int idx, r; - ring = &adev->vce.ring[0]; - WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); - WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); - - ring = &adev->vce.ring[1]; - WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); - WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); - - ring = &adev->vce.ring[2]; - WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); - WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr); - WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr)); - WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4); - mutex_lock(&adev->grbm_idx_mutex); for (idx = 0; idx < 2; ++idx) { if (adev->vce.harvest_config & (1 << idx)) continue; WREG32(mmGRBM_GFX_INDEX, GET_VCE_INSTANCE(idx)); + + /* Program instance 0 reg space for two instances or instance 0 case + program instance 1 reg space for only instance 1 available case */ + if (idx != 1 || adev->vce.harvest_config == AMDGPU_VCE_HARVEST_VCE0) { + ring = &adev->vce.ring[0]; + WREG32(mmVCE_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr); + WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32(mmVCE_RB_SIZE, ring->ring_size / 4); + + ring = &adev->vce.ring[1]; + WREG32(mmVCE_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr); + WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4); + + ring = &adev->vce.ring[2]; + WREG32(mmVCE_RB_RPTR3, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_WPTR3, lower_32_bits(ring->wptr)); + WREG32(mmVCE_RB_BASE_LO3, ring->gpu_addr); + WREG32(mmVCE_RB_BASE_HI3, upper_32_bits(ring->gpu_addr)); + WREG32(mmVCE_RB_SIZE3, ring->ring_size / 4); + } + vce_v3_0_mc_resume(adev, idx); WREG32_FIELD(VCE_STATUS, JOB_BUSY, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 139f964196b4..1ecd6bb90c1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -190,6 +190,7 @@ static int vce_v4_0_mmsch_start(struct amdgpu_device *adev, dev_err(adev->dev, "failed to init MMSCH, mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data); return -EBUSY; } + WDOORBELL32(adev->vce.ring[0].doorbell_index, 0); return 0; } @@ -418,15 +419,19 @@ static int vce_v4_0_sw_init(void *handle) if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { const struct common_firmware_header *hdr; + unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); + + adev->vce.saved_bo = kmalloc(size, GFP_KERNEL); + if (!adev->vce.saved_bo) + return -ENOMEM; + hdr = (const struct common_firmware_header *)adev->vce.fw->data; adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].ucode_id = AMDGPU_UCODE_ID_VCE; adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].fw = adev->vce.fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); DRM_INFO("PSP loading VCE firmware\n"); - } - - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + } else { r = amdgpu_vce_resume(adev); if (r) return r; @@ -465,6 +470,11 @@ static int vce_v4_0_sw_fini(void *handle) /* free MM table */ amdgpu_virt_free_mm_table(adev); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + kfree(adev->vce.saved_bo); + adev->vce.saved_bo = NULL; + } + r = amdgpu_vce_suspend(adev); if (r) return r; @@ -505,8 +515,14 @@ static int vce_v4_0_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - /* vce_v4_0_wait_for_idle(handle); */ - vce_v4_0_stop(adev); + if (!amdgpu_sriov_vf(adev)) { + /* vce_v4_0_wait_for_idle(handle); */ + vce_v4_0_stop(adev); + } else { + /* full access mode, so don't touch any VCE register */ + DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); + } + for (i = 0; i < adev->vce.num_rings; i++) adev->vce.ring[i].ready = false; @@ -515,8 +531,18 @@ static int vce_v4_0_hw_fini(void *handle) static int vce_v4_0_suspend(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + if (adev->vce.vcpu_bo == NULL) + return 0; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); + void *ptr = adev->vce.cpu_addr; + + memcpy_fromio(adev->vce.saved_bo, ptr, size); + } r = vce_v4_0_hw_fini(adev); if (r) @@ -527,12 +553,22 @@ static int vce_v4_0_suspend(void *handle) static int vce_v4_0_resume(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; - r = amdgpu_vce_resume(adev); - if (r) - return r; + if (adev->vce.vcpu_bo == NULL) + return -EINVAL; + + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo); + void *ptr = adev->vce.cpu_addr; + + memcpy_toio(ptr, adev->vce.saved_bo, size); + } else { + r = amdgpu_vce_resume(adev); + if (r) + return r; + } return vce_v4_0_hw_init(adev); } @@ -919,9 +955,8 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring, uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); unsigned eng = ring->vm_inv_eng; - pd_addr = pd_addr | 0x1; /* valid bit */ - /* now only use physical base address of PDE and valid */ - BUG_ON(pd_addr & 0xFFFF00000000003EULL); + pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); + pd_addr |= AMDGPU_PTE_VALID; amdgpu_ring_write(ring, VCE_CMD_REG_WRITE); amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c new file mode 100644 index 000000000000..21e7b88401e1 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -0,0 +1,1189 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "soc15d.h" +#include "soc15_common.h" + +#include "vega10/soc15ip.h" +#include "raven1/VCN/vcn_1_0_offset.h" +#include "raven1/VCN/vcn_1_0_sh_mask.h" +#include "vega10/HDP/hdp_4_0_offset.h" +#include "raven1/MMHUB/mmhub_9_1_offset.h" +#include "raven1/MMHUB/mmhub_9_1_sh_mask.h" + +static int vcn_v1_0_start(struct amdgpu_device *adev); +static int vcn_v1_0_stop(struct amdgpu_device *adev); +static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); +static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); +static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); + +/** + * vcn_v1_0_early_init - set function pointers + * + * @handle: amdgpu_device pointer + * + * Set ring and irq function pointers + */ +static int vcn_v1_0_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->vcn.num_enc_rings = 2; + + vcn_v1_0_set_dec_ring_funcs(adev); + vcn_v1_0_set_enc_ring_funcs(adev); + vcn_v1_0_set_irq_funcs(adev); + + return 0; +} + +/** + * vcn_v1_0_sw_init - sw init for VCN block + * + * @handle: amdgpu_device pointer + * + * Load firmware and sw initialization + */ +static int vcn_v1_0_sw_init(void *handle) +{ + struct amdgpu_ring *ring; + int i, r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* VCN DEC TRAP */ + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCN, 124, &adev->vcn.irq); + if (r) + return r; + + /* VCN ENC TRAP */ + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCN, i + 119, + &adev->vcn.irq); + if (r) + return r; + } + + r = amdgpu_vcn_sw_init(adev); + if (r) + return r; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + ring = &adev->vcn.ring_dec; + sprintf(ring->name, "vcn_dec"); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.ring_enc[i]; + sprintf(ring->name, "vcn_enc%d", i); + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.irq, 0); + if (r) + return r; + } + + return r; +} + +/** + * vcn_v1_0_sw_fini - sw fini for VCN block + * + * @handle: amdgpu_device pointer + * + * VCN suspend and free up sw allocation + */ +static int vcn_v1_0_sw_fini(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_suspend(adev); + if (r) + return r; + + r = amdgpu_vcn_sw_fini(adev); + + return r; +} + +/** + * vcn_v1_0_hw_init - start and test VCN block + * + * @handle: amdgpu_device pointer + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v1_0_hw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->vcn.ring_dec; + int i, r; + + r = vcn_v1_0_start(adev); + if (r) + goto done; + + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; + } + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + ring = &adev->vcn.ring_enc[i]; + ring->ready = true; + r = amdgpu_ring_test_ring(ring); + if (r) { + ring->ready = false; + goto done; + } + } + +done: + if (!r) + DRM_INFO("VCN decode and encode initialized successfully.\n"); + + return r; +} + +/** + * vcn_v1_0_hw_fini - stop the hardware block + * + * @handle: amdgpu_device pointer + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v1_0_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_ring *ring = &adev->vcn.ring_dec; + int r; + + r = vcn_v1_0_stop(adev); + if (r) + return r; + + ring->ready = false; + + return 0; +} + +/** + * vcn_v1_0_suspend - suspend VCN block + * + * @handle: amdgpu_device pointer + * + * HW fini and suspend VCN block + */ +static int vcn_v1_0_suspend(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = vcn_v1_0_hw_fini(adev); + if (r) + return r; + + r = amdgpu_vcn_suspend(adev); + + return r; +} + +/** + * vcn_v1_0_resume - resume VCN block + * + * @handle: amdgpu_device pointer + * + * Resume firmware and hw init VCN block + */ +static int vcn_v1_0_resume(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = amdgpu_vcn_resume(adev); + if (r) + return r; + + r = vcn_v1_0_hw_init(adev); + + return r; +} + +/** + * vcn_v1_0_mc_resume - memory controller programming + * + * @adev: amdgpu_device pointer + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v1_0_mc_resume(struct amdgpu_device *adev) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); + + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.gpu_addr + size)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.gpu_addr + size)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_HEAP_SIZE); + + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, + AMDGPU_VCN_STACK_SIZE + (AMDGPU_VCN_SESSION_SIZE * 40)); + + WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); +} + +/** + * vcn_v1_0_disable_clock_gating - disable VCN clock gating + * + * @adev: amdgpu_device pointer + * @sw: enable SW clock gating + * + * Disable clock gating for VCN block + */ +static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev, bool sw) +{ + uint32_t data; + + /* JPEG disable CGC */ + data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); + + if (sw) + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~JPEG_CGC_CTRL__DYN_CLOCK_MODE_MASK; + + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); + data &= ~(JPEG_CGC_GATE__JPEG_MASK | JPEG_CGC_GATE__JPEG2_MASK); + WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, data); + + /* UVD disable CGC */ + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); + if (sw) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~ UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_GATE); + data &= ~(UVD_CGC_GATE__SYS_MASK + | UVD_CGC_GATE__UDEC_MASK + | UVD_CGC_GATE__MPEG2_MASK + | UVD_CGC_GATE__REGS_MASK + | UVD_CGC_GATE__RBC_MASK + | UVD_CGC_GATE__LMI_MC_MASK + | UVD_CGC_GATE__LMI_UMC_MASK + | UVD_CGC_GATE__IDCT_MASK + | UVD_CGC_GATE__MPRD_MASK + | UVD_CGC_GATE__MPC_MASK + | UVD_CGC_GATE__LBSI_MASK + | UVD_CGC_GATE__LRBBM_MASK + | UVD_CGC_GATE__UDEC_RE_MASK + | UVD_CGC_GATE__UDEC_CM_MASK + | UVD_CGC_GATE__UDEC_IT_MASK + | UVD_CGC_GATE__UDEC_DB_MASK + | UVD_CGC_GATE__UDEC_MP_MASK + | UVD_CGC_GATE__WCB_MASK + | UVD_CGC_GATE__VCPU_MASK + | UVD_CGC_GATE__SCPU_MASK); + WREG32_SOC15(VCN, 0, mmUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); + data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__SCPU_MODE_MASK); + WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); + + /* turn on */ + data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE); + data |= (UVD_SUVD_CGC_GATE__SRE_MASK + | UVD_SUVD_CGC_GATE__SIT_MASK + | UVD_SUVD_CGC_GATE__SMP_MASK + | UVD_SUVD_CGC_GATE__SCM_MASK + | UVD_SUVD_CGC_GATE__SDB_MASK + | UVD_SUVD_CGC_GATE__SRE_H264_MASK + | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK + | UVD_SUVD_CGC_GATE__SIT_H264_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCM_H264_MASK + | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK + | UVD_SUVD_CGC_GATE__SDB_H264_MASK + | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCLR_MASK + | UVD_SUVD_CGC_GATE__UVD_SC_MASK + | UVD_SUVD_CGC_GATE__ENT_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK + | UVD_SUVD_CGC_GATE__SITE_MASK + | UVD_SUVD_CGC_GATE__SRE_VP9_MASK + | UVD_SUVD_CGC_GATE__SCM_VP9_MASK + | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK + | UVD_SUVD_CGC_GATE__SDB_VP9_MASK + | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); + WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL); + data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); +} + +/** + * vcn_v1_0_enable_clock_gating - enable VCN clock gating + * + * @adev: amdgpu_device pointer + * @sw: enable SW clock gating + * + * Enable clock gating for VCN block + */ +static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev, bool sw) +{ + uint32_t data = 0; + + /* enable JPEG CGC */ + data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL); + if (sw) + data |= 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, 0, mmJPEG_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE); + data |= (JPEG_CGC_GATE__JPEG_MASK | JPEG_CGC_GATE__JPEG2_MASK); + WREG32_SOC15(VCN, 0, mmJPEG_CGC_GATE, data); + + /* enable UVD CGC */ + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); + if (sw) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL); + data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__SCPU_MODE_MASK); + WREG32_SOC15(VCN, 0, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL); + data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); +} + +/** + * vcn_v1_0_start - start VCN block + * + * @adev: amdgpu_device pointer + * + * Setup and start the VCN block + */ +static int vcn_v1_0_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->vcn.ring_dec; + uint32_t rb_bufsz, tmp; + uint32_t lmi_swap_cntl; + int i, j, r; + + /* disable byte swapping */ + lmi_swap_cntl = 0; + + vcn_v1_0_mc_resume(adev); + + /* disable clock gating */ + vcn_v1_0_disable_clock_gating(adev, true); + + /* disable interupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* stall UMC and register bus before resetting VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + mdelay(1); + + /* put LMI, VCPU, RBC etc... into reset */ + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, + UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | + UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | + UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | + UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | + UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | + UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | + UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); + mdelay(5); + + /* initialize VCN memory controller */ + WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, + (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + 0x00100000L); + +#ifdef __BIG_ENDIAN + /* swap (8 in 32) RB and IB */ + lmi_swap_cntl = 0xa; +#endif + WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); + + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0); + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88); + + /* take all subblocks out of reset, except VCPU */ + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(5); + + /* enable VCPU clock */ + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, + UVD_VCPU_CNTL__CLK_EN_MASK); + + /* enable UMC */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + /* boot up the VCPU */ + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0); + mdelay(10); + + for (i = 0; i < 10; ++i) { + uint32_t status; + + for (j = 0; j < 100; ++j) { + status = RREG32_SOC15(UVD, 0, mmUVD_STATUS); + if (status & 2) + break; + mdelay(10); + } + r = 0; + if (status & 2) + break; + + DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(10); + r = -1; + } + + if (r) { + DRM_ERROR("VCN decode not responding, giving up!!!\n"); + return r; + } + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), + (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), + ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); + + /* clear the bit 4 of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); + + /* set the write pointer delay */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); + + /* set the wb address */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, + (upper_32_bits(ring->gpu_addr) >> 2)); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); + + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, + ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); + + ring = &adev->vcn.ring_enc[0]; + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); + + ring = &adev->vcn.ring_enc[1]; + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + + return 0; +} + +/** + * vcn_v1_0_stop - stop VCN block + * + * @adev: amdgpu_device pointer + * + * stop the VCN block + */ +static int vcn_v1_0_stop(struct amdgpu_device *adev) +{ + /* force RBC into idle state */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101); + + /* Stall UMC and register bus before resetting VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), + UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + mdelay(1); + + /* put VCPU into reset */ + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + mdelay(5); + + /* disable VCPU clock */ + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0); + + /* Unstall UMC and register bus */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + /* enable clock gating */ + vcn_v1_0_enable_clock_gating(adev, true); + + return 0; +} + +static int vcn_v1_0_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + /* needed for driver unload*/ + return 0; +} + +/** + * vcn_v1_0_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t vcn_v1_0_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); +} + +/** + * vcn_v1_0_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t vcn_v1_0_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR); +} + +/** + * vcn_v1_0_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void vcn_v1_0_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); +} + +/** + * vcn_v1_0_dec_ring_insert_start - insert a start command + * + * @ring: amdgpu_ring pointer + * + * Write a start command to the ring. + */ +static void vcn_v1_0_dec_ring_insert_start(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_START << 1); +} + +/** + * vcn_v1_0_dec_ring_insert_end - insert a end command + * + * @ring: amdgpu_ring pointer + * + * Write a end command to the ring. + */ +static void vcn_v1_0_dec_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, VCN_DEC_CMD_PACKET_END << 1); +} + +/** + * vcn_v1_0_dec_ring_emit_fence - emit an fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write a fence and a trap command to the ring. + */ +static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, + unsigned flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, addr & 0xffffffff); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, VCN_DEC_CMD_FENCE << 1); + + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, VCN_DEC_CMD_TRAP << 1); +} + +/** + * vcn_v1_0_dec_ring_hdp_invalidate - emit an hdp invalidate + * + * @ring: amdgpu_ring pointer + * + * Emits an hdp invalidate. + */ +static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_DEBUG0), 0)); + amdgpu_ring_write(ring, 1); +} + +/** + * vcn_v1_0_dec_ring_emit_ib - execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write ring commands to execute the indirect buffer + */ +static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_ib *ib, + unsigned vm_id, bool ctx_switch) +{ + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); + amdgpu_ring_write(ring, vm_id); + + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_LOW), 0)); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH), 0)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_IB_SIZE), 0)); + amdgpu_ring_write(ring, ib->length_dw); +} + +static void vcn_v1_0_dec_vm_reg_write(struct amdgpu_ring *ring, + uint32_t data0, uint32_t data1) +{ + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, data0); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, data1); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1); +} + +static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring, + uint32_t data0, uint32_t data1, uint32_t mask) +{ + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0)); + amdgpu_ring_write(ring, data0); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0)); + amdgpu_ring_write(ring, data1); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0)); + amdgpu_ring_write(ring, mask); + amdgpu_ring_write(ring, + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0)); + amdgpu_ring_write(ring, VCN_DEC_CMD_REG_READ_COND_WAIT << 1); +} + +static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned vm_id, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + uint32_t data0, data1, mask; + unsigned eng = ring->vm_inv_eng; + + pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); + pd_addr |= AMDGPU_PTE_VALID; + + data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2; + data1 = upper_32_bits(pd_addr); + vcn_v1_0_dec_vm_reg_write(ring, data0, data1); + + data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data1 = lower_32_bits(pd_addr); + vcn_v1_0_dec_vm_reg_write(ring, data0, data1); + + data0 = (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2; + data1 = lower_32_bits(pd_addr); + mask = 0xffffffff; + vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask); + + /* flush TLB */ + data0 = (hub->vm_inv_eng0_req + eng) << 2; + data1 = req; + vcn_v1_0_dec_vm_reg_write(ring, data0, data1); + + /* wait for flush */ + data0 = (hub->vm_inv_eng0_ack + eng) << 2; + data1 = 1 << vm_id; + mask = 1 << vm_id; + vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask); +} + +/** + * vcn_v1_0_enc_ring_get_rptr - get enc read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc read pointer + */ +static uint64_t vcn_v1_0_enc_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.ring_enc[0]) + return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR); + else + return RREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2); +} + + /** + * vcn_v1_0_enc_ring_get_wptr - get enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware enc write pointer + */ +static uint64_t vcn_v1_0_enc_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.ring_enc[0]) + return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); + else + return RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); +} + + /** + * vcn_v1_0_enc_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v1_0_enc_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring == &adev->vcn.ring_enc[0]) + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, + lower_32_bits(ring->wptr)); + else + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, + lower_32_bits(ring->wptr)); +} + +/** + * vcn_v1_0_enc_ring_emit_fence - emit an enc fence & trap command + * + * @ring: amdgpu_ring pointer + * @fence: fence to emit + * + * Write enc a fence and a trap command to the ring. + */ +static void vcn_v1_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, + u64 seq, unsigned flags) +{ + WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); + + amdgpu_ring_write(ring, VCN_ENC_CMD_FENCE); + amdgpu_ring_write(ring, addr); + amdgpu_ring_write(ring, upper_32_bits(addr)); + amdgpu_ring_write(ring, seq); + amdgpu_ring_write(ring, VCN_ENC_CMD_TRAP); +} + +static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring) +{ + amdgpu_ring_write(ring, VCN_ENC_CMD_END); +} + +/** + * vcn_v1_0_enc_ring_emit_ib - enc execute indirect buffer + * + * @ring: amdgpu_ring pointer + * @ib: indirect buffer to execute + * + * Write enc ring commands to execute the indirect buffer + */ +static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_ib *ib, unsigned int vm_id, bool ctx_switch) +{ + amdgpu_ring_write(ring, VCN_ENC_CMD_IB); + amdgpu_ring_write(ring, vm_id); + amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, ib->length_dw); +} + +static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vm_id, uint64_t pd_addr) +{ + struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; + uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id); + unsigned eng = ring->vm_inv_eng; + + pd_addr = amdgpu_gart_get_vm_pde(ring->adev, pd_addr); + pd_addr |= AMDGPU_PTE_VALID; + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, + (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, upper_32_bits(pd_addr)); + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, + (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, + (hub->ctx0_ptb_addr_lo32 + vm_id * 2) << 2); + amdgpu_ring_write(ring, 0xffffffff); + amdgpu_ring_write(ring, lower_32_bits(pd_addr)); + + /* flush TLB */ + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2); + amdgpu_ring_write(ring, req); + + /* wait for flush */ + amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT); + amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); + amdgpu_ring_write(ring, 1 << vm_id); + amdgpu_ring_write(ring, 1 << vm_id); +} + +static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int vcn_v1_0_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + DRM_DEBUG("IH: VCN TRAP\n"); + + switch (entry->src_id) { + case 124: + amdgpu_fence_process(&adev->vcn.ring_dec); + break; + case 119: + amdgpu_fence_process(&adev->vcn.ring_enc[0]); + break; + case 120: + amdgpu_fence_process(&adev->vcn.ring_enc[1]); + break; + default: + DRM_ERROR("Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { + .name = "vcn_v1_0", + .early_init = vcn_v1_0_early_init, + .late_init = NULL, + .sw_init = vcn_v1_0_sw_init, + .sw_fini = vcn_v1_0_sw_fini, + .hw_init = vcn_v1_0_hw_init, + .hw_fini = vcn_v1_0_hw_fini, + .suspend = vcn_v1_0_suspend, + .resume = vcn_v1_0_resume, + .is_idle = NULL /* vcn_v1_0_is_idle */, + .wait_for_idle = NULL /* vcn_v1_0_wait_for_idle */, + .check_soft_reset = NULL /* vcn_v1_0_check_soft_reset */, + .pre_soft_reset = NULL /* vcn_v1_0_pre_soft_reset */, + .soft_reset = NULL /* vcn_v1_0_soft_reset */, + .post_soft_reset = NULL /* vcn_v1_0_post_soft_reset */, + .set_clockgating_state = vcn_v1_0_set_clockgating_state, + .set_powergating_state = NULL /* vcn_v1_0_set_powergating_state */, +}; + +static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_DEC, + .align_mask = 0xf, + .nop = PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP), 0), + .support_64bit_ptrs = false, + .vmhub = AMDGPU_MMHUB, + .get_rptr = vcn_v1_0_dec_ring_get_rptr, + .get_wptr = vcn_v1_0_dec_ring_get_wptr, + .set_wptr = vcn_v1_0_dec_ring_set_wptr, + .emit_frame_size = + 2 + /* vcn_v1_0_dec_ring_emit_hdp_invalidate */ + 34 + /* vcn_v1_0_dec_ring_emit_vm_flush */ + 14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */ + 6, + .emit_ib_size = 8, /* vcn_v1_0_dec_ring_emit_ib */ + .emit_ib = vcn_v1_0_dec_ring_emit_ib, + .emit_fence = vcn_v1_0_dec_ring_emit_fence, + .emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush, + .emit_hdp_invalidate = vcn_v1_0_dec_ring_emit_hdp_invalidate, + .test_ring = amdgpu_vcn_dec_ring_test_ring, + .test_ib = amdgpu_vcn_dec_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_start = vcn_v1_0_dec_ring_insert_start, + .insert_end = vcn_v1_0_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, +}; + +static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .support_64bit_ptrs = false, + .vmhub = AMDGPU_MMHUB, + .get_rptr = vcn_v1_0_enc_ring_get_rptr, + .get_wptr = vcn_v1_0_enc_ring_get_wptr, + .set_wptr = vcn_v1_0_enc_ring_set_wptr, + .emit_frame_size = + 17 + /* vcn_v1_0_enc_ring_emit_vm_flush */ + 5 + 5 + /* vcn_v1_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v1_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v1_0_enc_ring_emit_ib */ + .emit_ib = vcn_v1_0_enc_ring_emit_ib, + .emit_fence = vcn_v1_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v1_0_enc_ring_emit_vm_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_enc_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v1_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, +}; + +static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + adev->vcn.ring_dec.funcs = &vcn_v1_0_dec_ring_vm_funcs; + DRM_INFO("VCN decode is enabled in VM mode\n"); +} + +static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) + adev->vcn.ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs; + + DRM_INFO("VCN encode is enabled in VM mode\n"); +} + +static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = { + .set = vcn_v1_0_set_interrupt_state, + .process = vcn_v1_0_process_interrupt, +}; + +static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev) +{ + adev->uvd.irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs; +} + +const struct amdgpu_ip_block_version vcn_v1_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 1, + .minor = 0, + .rev = 0, + .funcs = &vcn_v1_0_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h new file mode 100644 index 000000000000..2a497a7a4840 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.h @@ -0,0 +1,29 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_V1_0_H__ +#define __VCN_V1_0_H__ + +extern const struct amdgpu_ip_block_version vcn_v1_0_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 3b9740fb2c41..56150e8d1ed2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -97,7 +97,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) /* disable irqs */ vega10_ih_disable_interrupts(adev); - nbio_v6_1_ih_control(adev); + if (adev->flags & AMD_IS_APU) + nbio_v7_0_ih_control(adev); + else + nbio_v6_1_ih_control(adev); ih_rb_cntl = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL)); /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ @@ -148,7 +151,10 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) ENABLE, 0); } WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR), ih_doorbell_rtpr); - nbio_v6_1_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); + if (adev->flags & AMD_IS_APU) + nbio_v7_0_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); + else + nbio_v6_1_ih_doorbell_range(adev, adev->irq.ih.use_doorbell, adev->irq.ih.doorbell_index); tmp = RREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL)); tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 3a187619286f..6cac291c96da 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -463,89 +463,83 @@ static void vi_detect_hw_virtualization(struct amdgpu_device *adev) } } -static const struct amdgpu_allowed_register_entry tonga_allowed_read_registers[] = { -}; - -static const struct amdgpu_allowed_register_entry cz_allowed_read_registers[] = { -}; - static const struct amdgpu_allowed_register_entry vi_allowed_read_registers[] = { - {mmGRBM_STATUS, false}, - {mmGRBM_STATUS2, false}, - {mmGRBM_STATUS_SE0, false}, - {mmGRBM_STATUS_SE1, false}, - {mmGRBM_STATUS_SE2, false}, - {mmGRBM_STATUS_SE3, false}, - {mmSRBM_STATUS, false}, - {mmSRBM_STATUS2, false}, - {mmSRBM_STATUS3, false}, - {mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET, false}, - {mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET, false}, - {mmCP_STAT, false}, - {mmCP_STALLED_STAT1, false}, - {mmCP_STALLED_STAT2, false}, - {mmCP_STALLED_STAT3, false}, - {mmCP_CPF_BUSY_STAT, false}, - {mmCP_CPF_STALLED_STAT1, false}, - {mmCP_CPF_STATUS, false}, - {mmCP_CPC_BUSY_STAT, false}, - {mmCP_CPC_STALLED_STAT1, false}, - {mmCP_CPC_STATUS, false}, - {mmGB_ADDR_CONFIG, false}, - {mmMC_ARB_RAMCFG, false}, - {mmGB_TILE_MODE0, false}, - {mmGB_TILE_MODE1, false}, - {mmGB_TILE_MODE2, false}, - {mmGB_TILE_MODE3, false}, - {mmGB_TILE_MODE4, false}, - {mmGB_TILE_MODE5, false}, - {mmGB_TILE_MODE6, false}, - {mmGB_TILE_MODE7, false}, - {mmGB_TILE_MODE8, false}, - {mmGB_TILE_MODE9, false}, - {mmGB_TILE_MODE10, false}, - {mmGB_TILE_MODE11, false}, - {mmGB_TILE_MODE12, false}, - {mmGB_TILE_MODE13, false}, - {mmGB_TILE_MODE14, false}, - {mmGB_TILE_MODE15, false}, - {mmGB_TILE_MODE16, false}, - {mmGB_TILE_MODE17, false}, - {mmGB_TILE_MODE18, false}, - {mmGB_TILE_MODE19, false}, - {mmGB_TILE_MODE20, false}, - {mmGB_TILE_MODE21, false}, - {mmGB_TILE_MODE22, false}, - {mmGB_TILE_MODE23, false}, - {mmGB_TILE_MODE24, false}, - {mmGB_TILE_MODE25, false}, - {mmGB_TILE_MODE26, false}, - {mmGB_TILE_MODE27, false}, - {mmGB_TILE_MODE28, false}, - {mmGB_TILE_MODE29, false}, - {mmGB_TILE_MODE30, false}, - {mmGB_TILE_MODE31, false}, - {mmGB_MACROTILE_MODE0, false}, - {mmGB_MACROTILE_MODE1, false}, - {mmGB_MACROTILE_MODE2, false}, - {mmGB_MACROTILE_MODE3, false}, - {mmGB_MACROTILE_MODE4, false}, - {mmGB_MACROTILE_MODE5, false}, - {mmGB_MACROTILE_MODE6, false}, - {mmGB_MACROTILE_MODE7, false}, - {mmGB_MACROTILE_MODE8, false}, - {mmGB_MACROTILE_MODE9, false}, - {mmGB_MACROTILE_MODE10, false}, - {mmGB_MACROTILE_MODE11, false}, - {mmGB_MACROTILE_MODE12, false}, - {mmGB_MACROTILE_MODE13, false}, - {mmGB_MACROTILE_MODE14, false}, - {mmGB_MACROTILE_MODE15, false}, - {mmCC_RB_BACKEND_DISABLE, false, true}, - {mmGC_USER_RB_BACKEND_DISABLE, false, true}, - {mmGB_BACKEND_MAP, false, false}, - {mmPA_SC_RASTER_CONFIG, false, true}, - {mmPA_SC_RASTER_CONFIG_1, false, true}, + {mmGRBM_STATUS}, + {mmGRBM_STATUS2}, + {mmGRBM_STATUS_SE0}, + {mmGRBM_STATUS_SE1}, + {mmGRBM_STATUS_SE2}, + {mmGRBM_STATUS_SE3}, + {mmSRBM_STATUS}, + {mmSRBM_STATUS2}, + {mmSRBM_STATUS3}, + {mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET}, + {mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET}, + {mmCP_STAT}, + {mmCP_STALLED_STAT1}, + {mmCP_STALLED_STAT2}, + {mmCP_STALLED_STAT3}, + {mmCP_CPF_BUSY_STAT}, + {mmCP_CPF_STALLED_STAT1}, + {mmCP_CPF_STATUS}, + {mmCP_CPC_BUSY_STAT}, + {mmCP_CPC_STALLED_STAT1}, + {mmCP_CPC_STATUS}, + {mmGB_ADDR_CONFIG}, + {mmMC_ARB_RAMCFG}, + {mmGB_TILE_MODE0}, + {mmGB_TILE_MODE1}, + {mmGB_TILE_MODE2}, + {mmGB_TILE_MODE3}, + {mmGB_TILE_MODE4}, + {mmGB_TILE_MODE5}, + {mmGB_TILE_MODE6}, + {mmGB_TILE_MODE7}, + {mmGB_TILE_MODE8}, + {mmGB_TILE_MODE9}, + {mmGB_TILE_MODE10}, + {mmGB_TILE_MODE11}, + {mmGB_TILE_MODE12}, + {mmGB_TILE_MODE13}, + {mmGB_TILE_MODE14}, + {mmGB_TILE_MODE15}, + {mmGB_TILE_MODE16}, + {mmGB_TILE_MODE17}, + {mmGB_TILE_MODE18}, + {mmGB_TILE_MODE19}, + {mmGB_TILE_MODE20}, + {mmGB_TILE_MODE21}, + {mmGB_TILE_MODE22}, + {mmGB_TILE_MODE23}, + {mmGB_TILE_MODE24}, + {mmGB_TILE_MODE25}, + {mmGB_TILE_MODE26}, + {mmGB_TILE_MODE27}, + {mmGB_TILE_MODE28}, + {mmGB_TILE_MODE29}, + {mmGB_TILE_MODE30}, + {mmGB_TILE_MODE31}, + {mmGB_MACROTILE_MODE0}, + {mmGB_MACROTILE_MODE1}, + {mmGB_MACROTILE_MODE2}, + {mmGB_MACROTILE_MODE3}, + {mmGB_MACROTILE_MODE4}, + {mmGB_MACROTILE_MODE5}, + {mmGB_MACROTILE_MODE6}, + {mmGB_MACROTILE_MODE7}, + {mmGB_MACROTILE_MODE8}, + {mmGB_MACROTILE_MODE9}, + {mmGB_MACROTILE_MODE10}, + {mmGB_MACROTILE_MODE11}, + {mmGB_MACROTILE_MODE12}, + {mmGB_MACROTILE_MODE13}, + {mmGB_MACROTILE_MODE14}, + {mmGB_MACROTILE_MODE15}, + {mmCC_RB_BACKEND_DISABLE, true}, + {mmGC_USER_RB_BACKEND_DISABLE, true}, + {mmGB_BACKEND_MAP, false}, + {mmPA_SC_RASTER_CONFIG, true}, + {mmPA_SC_RASTER_CONFIG_1, true}, }; static uint32_t vi_get_register_value(struct amdgpu_device *adev, @@ -647,51 +641,17 @@ static uint32_t vi_get_register_value(struct amdgpu_device *adev, static int vi_read_register(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 reg_offset, u32 *value) { - const struct amdgpu_allowed_register_entry *asic_register_table = NULL; - const struct amdgpu_allowed_register_entry *asic_register_entry; - uint32_t size, i; + uint32_t i; *value = 0; - switch (adev->asic_type) { - case CHIP_TOPAZ: - asic_register_table = tonga_allowed_read_registers; - size = ARRAY_SIZE(tonga_allowed_read_registers); - break; - case CHIP_FIJI: - case CHIP_TONGA: - case CHIP_POLARIS11: - case CHIP_POLARIS10: - case CHIP_POLARIS12: - case CHIP_CARRIZO: - case CHIP_STONEY: - asic_register_table = cz_allowed_read_registers; - size = ARRAY_SIZE(cz_allowed_read_registers); - break; - default: - return -EINVAL; - } - - if (asic_register_table) { - for (i = 0; i < size; i++) { - asic_register_entry = asic_register_table + i; - if (reg_offset != asic_register_entry->reg_offset) - continue; - if (!asic_register_entry->untouched) - *value = vi_get_register_value(adev, - asic_register_entry->grbm_indexed, - se_num, sh_num, reg_offset); - return 0; - } - } - for (i = 0; i < ARRAY_SIZE(vi_allowed_read_registers); i++) { + bool indexed = vi_allowed_read_registers[i].grbm_indexed; + if (reg_offset != vi_allowed_read_registers[i].reg_offset) continue; - if (!vi_allowed_read_registers[i].untouched) - *value = vi_get_register_value(adev, - vi_allowed_read_registers[i].grbm_indexed, - se_num, sh_num, reg_offset); + *value = vi_get_register_value(adev, indexed, se_num, sh_num, + reg_offset); return 0; } return -EINVAL; @@ -934,11 +894,6 @@ static int vi_common_early_init(void *handle) (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_SMC))) smc_enabled = true; - if (amdgpu_sriov_vf(adev)) { - amdgpu_virt_init_setting(adev); - xgpu_vi_mailbox_set_irq_funcs(adev); - } - adev->rev_id = vi_get_rev_id(adev); adev->external_rev_id = 0xFF; switch (adev->asic_type) { @@ -1073,7 +1028,7 @@ static int vi_common_early_init(void *handle) /* rev0 hardware requires workarounds to support PG */ adev->pg_flags = 0; if (adev->rev_id != 0x00 || CZ_REV_BRISTOL(adev->pdev->revision)) { - adev->pg_flags |= + adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG | AMD_PG_SUPPORT_GFX_PIPELINE | AMD_PG_SUPPORT_CP | @@ -1111,6 +1066,11 @@ static int vi_common_early_init(void *handle) return -EINVAL; } + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_init_setting(adev); + xgpu_vi_mailbox_set_irq_funcs(adev); + } + /* vi use smc load by default */ adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type); diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h index 5f2ab9c1609a..a6485254a169 100644 --- a/drivers/gpu/drm/amd/amdgpu/vid.h +++ b/drivers/gpu/drm/amd/amdgpu/vid.h @@ -361,6 +361,12 @@ #define PACKET3_WAIT_ON_CE_COUNTER 0x86 #define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 #define PACKET3_SWITCH_BUFFER 0x8B +#define PACKET3_FRAME_CONTROL 0x90 +# define FRAME_CMD(x) ((x) << 28) + /* + * x=0: tmz_begin + * x=1: tmz_end + */ #define PACKET3_SET_RESOURCES 0xA0 /* 1. header * 2. CONTROL |