summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h152
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c160
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h98
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c179
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c81
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c83
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c1577
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c95
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c550
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c742
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c127
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c54
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h112
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c412
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c59
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c110
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h79
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c130
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c566
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c290
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c164
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c62
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_test.c23
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c303
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c137
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c87
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c95
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c399
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_encoders.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_encoders.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.c307
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c42
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_dpm.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c89
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v6_0.c89
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c89
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_virtual.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/emu_soc.c (renamed from drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.h)18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c78
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c98
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c374
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c28
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c139
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c189
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c216
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c188
-rw-r--r--drivers/gpu/drm/amd/amdgpu/iceland_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_dpm.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c93
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c200
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v10_0.h20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.c70
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v3_1.h24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c44
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c125
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.c53
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dma.c50
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dpm.c86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/si_dpm.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c66
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/tonga_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c30
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c68
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c148
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c2
-rw-r--r--[-rwxr-xr-x]drivers/gpu/drm/amd/amdgpu/vce_v4_0.c56
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c159
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.h2
122 files changed, 6823 insertions, 4582 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index d6e5b7273853..2ca2b5154d52 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -30,7 +30,6 @@ FULL_AMD_DISPLAY_PATH = $(FULL_AMD_PATH)/$(DISPLAY_FOLDER_NAME)
ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_PATH)/include \
-I$(FULL_AMD_PATH)/amdgpu \
- -I$(FULL_AMD_PATH)/scheduler \
-I$(FULL_AMD_PATH)/powerplay/inc \
-I$(FULL_AMD_PATH)/acp/include \
-I$(FULL_AMD_DISPLAY_PATH) \
@@ -63,7 +62,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o
amdgpu-y += \
- vi.o mxgpu_vi.o nbio_v6_1.o soc15.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o
+ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o
# add GMC block
amdgpu-y += \
@@ -88,8 +87,7 @@ amdgpu-y += \
# add SMC block
amdgpu-y += \
- amdgpu_dpm.o \
- amdgpu_powerplay.o
+ amdgpu_dpm.o
# add DCE block
amdgpu-y += \
@@ -130,6 +128,8 @@ amdgpu-y += \
# add amdkfd interfaces
amdgpu-y += \
amdgpu_amdkfd.o \
+ amdgpu_amdkfd_fence.o \
+ amdgpu_amdkfd_gpuvm.o \
amdgpu_amdkfd_gfx_v8.o
# add cgs
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d5a2eefd6c3e..f44a83ab2bf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -68,6 +68,7 @@
#include "amdgpu_vce.h"
#include "amdgpu_vcn.h"
#include "amdgpu_mn.h"
+#include "amdgpu_gmc.h"
#include "amdgpu_dm.h"
#include "amdgpu_virt.h"
#include "amdgpu_gart.h"
@@ -127,6 +128,7 @@ extern int amdgpu_job_hang_limit;
extern int amdgpu_lbpw;
extern int amdgpu_compute_multipipe;
extern int amdgpu_gpu_recovery;
+extern int amdgpu_emu_mode;
#ifdef CONFIG_DRM_AMDGPU_SI
extern int amdgpu_si_support;
@@ -179,10 +181,6 @@ extern int amdgpu_cik_support;
#define CIK_CURSOR_WIDTH 128
#define CIK_CURSOR_HEIGHT 128
-/* GPU RESET flags */
-#define AMDGPU_RESET_INFO_VRAM_LOST (1 << 0)
-#define AMDGPU_RESET_INFO_FULLRESET (1 << 1)
-
struct amdgpu_device;
struct amdgpu_ib;
struct amdgpu_cs_parser;
@@ -318,13 +316,6 @@ struct amdgpu_vm_pte_funcs {
void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
uint64_t value, unsigned count,
uint32_t incr);
-
- /* maximum nums of PTEs/PDEs in a single operation */
- uint32_t set_max_nums_pte_pde;
-
- /* number of dw to reserve per operation */
- unsigned set_pte_pde_num_dw;
-
/* for linear pte/pde updates without addr mapping */
void (*set_pte_pde)(struct amdgpu_ib *ib,
uint64_t pe,
@@ -332,28 +323,6 @@ struct amdgpu_vm_pte_funcs {
uint32_t incr, uint64_t flags);
};
-/* provided by the gmc block */
-struct amdgpu_gart_funcs {
- /* flush the vm tlb via mmio */
- void (*flush_gpu_tlb)(struct amdgpu_device *adev,
- uint32_t vmid);
- /* write pte/pde updates using the cpu */
- int (*set_pte_pde)(struct amdgpu_device *adev,
- void *cpu_pt_addr, /* cpu addr of page table */
- uint32_t gpu_page_idx, /* pte/pde to update */
- uint64_t addr, /* addr to write into pte/pde */
- uint64_t flags); /* access flags */
- /* enable/disable PRT support */
- void (*set_prt)(struct amdgpu_device *adev, bool enable);
- /* set pte flags based per asic */
- uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
- uint32_t flags);
- /* get the pde for a given mc addr */
- void (*get_vm_pde)(struct amdgpu_device *adev, int level,
- u64 *dst, u64 *flags);
- uint32_t (*get_invalidate_req)(unsigned int vmid);
-};
-
/* provided by the ih block */
struct amdgpu_ih_funcs {
/* ring read/write ptr handling, called from interrupt context */
@@ -371,14 +340,6 @@ bool amdgpu_get_bios(struct amdgpu_device *adev);
bool amdgpu_read_bios(struct amdgpu_device *adev);
/*
- * Dummy page
- */
-struct amdgpu_dummy_page {
- struct page *page;
- dma_addr_t addr;
-};
-
-/*
* Clocks
*/
@@ -418,8 +379,8 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj,
int flags);
-int amdgpu_gem_prime_pin(struct drm_gem_object *obj);
-void amdgpu_gem_prime_unpin(struct drm_gem_object *obj);
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+ struct dma_buf *dma_buf);
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
@@ -480,7 +441,7 @@ struct amdgpu_sa_bo {
void amdgpu_gem_force_release(struct amdgpu_device *adev);
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain,
- u64 flags, bool kernel,
+ u64 flags, enum ttm_bo_type type,
struct reservation_object *resv,
struct drm_gem_object **obj);
@@ -494,56 +455,6 @@ int amdgpu_fence_slab_init(void);
void amdgpu_fence_slab_fini(void);
/*
- * VMHUB structures, functions & helpers
- */
-struct amdgpu_vmhub {
- uint32_t ctx0_ptb_addr_lo32;
- uint32_t ctx0_ptb_addr_hi32;
- uint32_t vm_inv_eng0_req;
- uint32_t vm_inv_eng0_ack;
- uint32_t vm_context0_cntl;
- uint32_t vm_l2_pro_fault_status;
- uint32_t vm_l2_pro_fault_cntl;
-};
-
-/*
- * GPU MC structures, functions & helpers
- */
-struct amdgpu_mc {
- resource_size_t aper_size;
- resource_size_t aper_base;
- resource_size_t agp_base;
- /* for some chips with <= 32MB we need to lie
- * about vram size near mc fb location */
- u64 mc_vram_size;
- u64 visible_vram_size;
- u64 gart_size;
- u64 gart_start;
- u64 gart_end;
- u64 vram_start;
- u64 vram_end;
- unsigned vram_width;
- u64 real_vram_size;
- int vram_mtrr;
- u64 mc_mask;
- const struct firmware *fw; /* MC firmware */
- uint32_t fw_version;
- struct amdgpu_irq_src vm_fault;
- uint32_t vram_type;
- uint32_t srbm_soft_reset;
- bool prt_warning;
- uint64_t stolen_size;
- /* apertures */
- u64 shared_aperture_start;
- u64 shared_aperture_end;
- u64 private_aperture_start;
- u64 private_aperture_end;
- /* protects concurrent invalidation */
- spinlock_t invalidate_lock;
- bool translate_further;
-};
-
-/*
* GPU doorbell structures, functions & helpers
*/
typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
@@ -1125,8 +1036,9 @@ struct amdgpu_job {
void *owner;
uint64_t fence_ctx; /* the fence_context this job uses */
bool vm_needs_flush;
- unsigned vmid;
uint64_t vm_pd_addr;
+ unsigned vmid;
+ unsigned pasid;
uint32_t gds_base, gds_size;
uint32_t gws_base, gws_size;
uint32_t oa_base, oa_size;
@@ -1156,7 +1068,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
/*
* Writeback
*/
-#define AMDGPU_MAX_WB 512 /* Reserve at most 512 WB slots for amdgpu-owned rings. */
+#define AMDGPU_MAX_WB 128 /* Reserve at most 128 WB slots for amdgpu-owned rings. */
struct amdgpu_wb {
struct amdgpu_bo *wb_obj;
@@ -1169,8 +1081,6 @@ struct amdgpu_wb {
int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb);
void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb);
-void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
-
/*
* SDMA
*/
@@ -1288,6 +1198,11 @@ struct amdgpu_asic_funcs {
void (*set_pcie_lanes)(struct amdgpu_device *adev, int lanes);
/* get config memsize register */
u32 (*get_config_memsize)(struct amdgpu_device *adev);
+ /* flush hdp write queue */
+ void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
+ /* invalidate hdp read cache */
+ void (*invalidate_hdp)(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring);
};
/*
@@ -1431,7 +1346,7 @@ struct amdgpu_nbio_funcs {
u32 (*get_pcie_data_offset)(struct amdgpu_device *adev);
u32 (*get_rev_id)(struct amdgpu_device *adev);
void (*mc_access_enable)(struct amdgpu_device *adev, bool enable);
- void (*hdp_flush)(struct amdgpu_device *adev);
+ void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
u32 (*get_memsize)(struct amdgpu_device *adev);
void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
bool use_doorbell, int doorbell_index);
@@ -1478,9 +1393,7 @@ enum amd_hw_ip_block_type {
#define HWIP_MAX_INSTANCE 6
struct amd_powerplay {
- struct cgs_device *cgs_device;
void *pp_handle;
- const struct amd_ip_funcs *ip_funcs;
const struct amd_pm_funcs *pp_funcs;
};
@@ -1504,6 +1417,7 @@ struct amdgpu_device {
const struct amdgpu_asic_funcs *asic_funcs;
bool shutdown;
bool need_dma32;
+ bool need_swiotlb;
bool accel_working;
struct work_struct reset_work;
struct notifier_block acpi_nb;
@@ -1573,9 +1487,9 @@ struct amdgpu_device {
struct amdgpu_clock clock;
/* MC */
- struct amdgpu_mc mc;
+ struct amdgpu_gmc gmc;
struct amdgpu_gart gart;
- struct amdgpu_dummy_page dummy_page;
+ dma_addr_t dummy_page_addr;
struct amdgpu_vm_manager vm_manager;
struct amdgpu_vmhub vmhub[AMDGPU_MAX_VMHUBS];
@@ -1714,6 +1628,9 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
uint32_t acc_flags);
void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags);
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value);
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
+
u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
@@ -1725,6 +1642,8 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
+int emu_soc_asic_init(struct amdgpu_device *adev);
+
/*
* Registers read & write functions.
*/
@@ -1735,6 +1654,9 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
#define RREG32_NO_KIQ(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_NO_KIQ)
#define WREG32_NO_KIQ(reg, v) amdgpu_mm_wreg(adev, (reg), (v), AMDGPU_REGS_NO_KIQ)
+#define RREG8(reg) amdgpu_mm_rreg8(adev, (reg))
+#define WREG8(reg, v) amdgpu_mm_wreg8(adev, (reg), (v))
+
#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), 0)
#define RREG32_IDX(reg) amdgpu_mm_rreg(adev, (reg), AMDGPU_REGS_IDX)
#define DREG32(reg) printk(KERN_INFO "REGISTER: " #reg " : 0x%08X\n", amdgpu_mm_rreg(adev, (reg), 0))
@@ -1837,13 +1759,17 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
#define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
#define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
-#define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
-#define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
-#define amdgpu_gart_get_vm_pde(adev, level, dst, flags) (adev)->gart.gart_funcs->get_vm_pde((adev), (level), (dst), (flags))
+#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
+#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
+#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid))
+#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
+#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
+#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
+#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
+#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags))
#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
-#define amdgpu_vm_get_pte_flags(adev, flags) (adev)->gart.gart_funcs->get_vm_pte_flags((adev),(flags))
#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
@@ -1856,11 +1782,11 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
-#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d))
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
+#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))
#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
@@ -1870,7 +1796,6 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc))
-#define amdgpu_display_vblank_wait(adev, crtc) (adev)->mode_info.funcs->vblank_wait((adev), (crtc))
#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l))
#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e))
#define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h))
@@ -1893,20 +1818,17 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job* job, bool force);
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
-void amdgpu_update_display_priority(struct amdgpu_device *adev);
+void amdgpu_display_update_priority(struct amdgpu_device *adev);
void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
u64 num_vis_bytes);
void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
void amdgpu_device_vram_location(struct amdgpu_device *adev,
- struct amdgpu_mc *mc, u64 base);
+ struct amdgpu_gmc *mc, u64 base);
void amdgpu_device_gart_location(struct amdgpu_device *adev,
- struct amdgpu_mc *mc);
+ struct amdgpu_gmc *mc);
int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev);
-void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size);
-int amdgpu_ttm_init(struct amdgpu_device *adev);
-void amdgpu_ttm_fini(struct amdgpu_device *adev);
void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
const u32 *registers,
const u32 array_size);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 57afad79f55d..8fa850a070e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -540,6 +540,9 @@ int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
size_t size;
u32 retry = 3;
+ if (amdgpu_acpi_pcie_notify_device_ready(adev))
+ return -EINVAL;
+
/* Get the device handle */
handle = ACPI_HANDLE(&adev->pdev->dev);
if (!handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 1d605e1c1d66..4d36203ffb11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -30,6 +30,8 @@
const struct kgd2kfd_calls *kgd2kfd;
bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**);
+static const unsigned int compute_vmid_bitmap = 0xFF00;
+
int amdgpu_amdkfd_init(void)
{
int ret;
@@ -56,6 +58,7 @@ int amdgpu_amdkfd_init(void)
#else
ret = -ENOENT;
#endif
+ amdgpu_amdkfd_gpuvm_init_mem_limits();
return ret;
}
@@ -78,10 +81,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
switch (adev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
+ case CHIP_HAWAII:
kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
break;
#endif
case CHIP_CARRIZO:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
default:
@@ -132,9 +140,13 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
int last_valid_bit;
if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
- .compute_vmid_bitmap = 0xFF00,
+ .compute_vmid_bitmap = compute_vmid_bitmap,
.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
- .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
+ .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
+ .gpuvm_size = min(adev->vm_manager.max_pfn
+ << AMDGPU_GPU_PAGE_SHIFT,
+ AMDGPU_VA_HOLE_START),
+ .drm_render_minor = adev->ddev->render->index
};
/* this is going to have a few of the MSBs set that we need to
@@ -204,20 +216,14 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **cpu_ptr)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
- struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
+ struct amdgpu_bo *bo = NULL;
int r;
+ uint64_t gpu_addr_tmp = 0;
+ void *cpu_ptr_tmp = NULL;
- BUG_ON(kgd == NULL);
- BUG_ON(gpu_addr == NULL);
- BUG_ON(cpu_ptr == NULL);
-
- *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
- if ((*mem) == NULL)
- return -ENOMEM;
-
- r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
- AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 0,
- &(*mem)->bo);
+ r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC, ttm_bo_type_kernel,
+ NULL, &bo);
if (r) {
dev_err(adev->dev,
"failed to allocate BO for amdkfd (%d)\n", r);
@@ -225,54 +231,53 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
}
/* map the buffer */
- r = amdgpu_bo_reserve((*mem)->bo, true);
+ r = amdgpu_bo_reserve(bo, true);
if (r) {
dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
goto allocate_mem_reserve_bo_failed;
}
- r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
- &(*mem)->gpu_addr);
+ r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
+ &gpu_addr_tmp);
if (r) {
dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
goto allocate_mem_pin_bo_failed;
}
- *gpu_addr = (*mem)->gpu_addr;
- r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
+ r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
if (r) {
dev_err(adev->dev,
"(%d) failed to map bo to kernel for amdkfd\n", r);
goto allocate_mem_kmap_bo_failed;
}
- *cpu_ptr = (*mem)->cpu_ptr;
- amdgpu_bo_unreserve((*mem)->bo);
+ *mem_obj = bo;
+ *gpu_addr = gpu_addr_tmp;
+ *cpu_ptr = cpu_ptr_tmp;
+
+ amdgpu_bo_unreserve(bo);
return 0;
allocate_mem_kmap_bo_failed:
- amdgpu_bo_unpin((*mem)->bo);
+ amdgpu_bo_unpin(bo);
allocate_mem_pin_bo_failed:
- amdgpu_bo_unreserve((*mem)->bo);
+ amdgpu_bo_unreserve(bo);
allocate_mem_reserve_bo_failed:
- amdgpu_bo_unref(&(*mem)->bo);
+ amdgpu_bo_unref(&bo);
return r;
}
void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
{
- struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
-
- BUG_ON(mem == NULL);
+ struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
- amdgpu_bo_reserve(mem->bo, true);
- amdgpu_bo_kunmap(mem->bo);
- amdgpu_bo_unpin(mem->bo);
- amdgpu_bo_unreserve(mem->bo);
- amdgpu_bo_unref(&(mem->bo));
- kfree(mem);
+ amdgpu_bo_reserve(bo, true);
+ amdgpu_bo_kunmap(bo);
+ amdgpu_bo_unpin(bo);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&(bo));
}
void get_local_mem_info(struct kgd_dev *kgd,
@@ -281,24 +286,29 @@ void get_local_mem_info(struct kgd_dev *kgd,
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask :
~((1ULL << 32) - 1);
- resource_size_t aper_limit = adev->mc.aper_base + adev->mc.aper_size;
+ resource_size_t aper_limit = adev->gmc.aper_base + adev->gmc.aper_size;
memset(mem_info, 0, sizeof(*mem_info));
- if (!(adev->mc.aper_base & address_mask || aper_limit & address_mask)) {
- mem_info->local_mem_size_public = adev->mc.visible_vram_size;
- mem_info->local_mem_size_private = adev->mc.real_vram_size -
- adev->mc.visible_vram_size;
+ if (!(adev->gmc.aper_base & address_mask || aper_limit & address_mask)) {
+ mem_info->local_mem_size_public = adev->gmc.visible_vram_size;
+ mem_info->local_mem_size_private = adev->gmc.real_vram_size -
+ adev->gmc.visible_vram_size;
} else {
mem_info->local_mem_size_public = 0;
- mem_info->local_mem_size_private = adev->mc.real_vram_size;
+ mem_info->local_mem_size_private = adev->gmc.real_vram_size;
}
- mem_info->vram_width = adev->mc.vram_width;
+ mem_info->vram_width = adev->gmc.vram_width;
pr_debug("Address base: %pap limit %pap public 0x%llx private 0x%llx\n",
- &adev->mc.aper_base, &aper_limit,
+ &adev->gmc.aper_base, &aper_limit,
mem_info->local_mem_size_public,
mem_info->local_mem_size_private);
+ if (amdgpu_emu_mode == 1) {
+ mem_info->mem_clk_max = 100;
+ return;
+ }
+
if (amdgpu_sriov_vf(adev))
mem_info->mem_clk_max = adev->clock.default_mclk / 100;
else
@@ -319,6 +329,9 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
/* the sclk is in quantas of 10kHz */
+ if (amdgpu_emu_mode == 1)
+ return 100;
+
if (amdgpu_sriov_vf(adev))
return adev->clock.default_sclk / 100;
@@ -354,3 +367,68 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
}
+
+int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
+ uint32_t vmid, uint64_t gpu_addr,
+ uint32_t *ib_cmd, uint32_t ib_len)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
+ struct amdgpu_ring *ring;
+ struct dma_fence *f = NULL;
+ int ret;
+
+ switch (engine) {
+ case KGD_ENGINE_MEC1:
+ ring = &adev->gfx.compute_ring[0];
+ break;
+ case KGD_ENGINE_SDMA1:
+ ring = &adev->sdma.instance[0].ring;
+ break;
+ case KGD_ENGINE_SDMA2:
+ ring = &adev->sdma.instance[1].ring;
+ break;
+ default:
+ pr_err("Invalid engine in IB submission: %d\n", engine);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = amdgpu_job_alloc(adev, 1, &job, NULL);
+ if (ret)
+ goto err;
+
+ ib = &job->ibs[0];
+ memset(ib, 0, sizeof(struct amdgpu_ib));
+
+ ib->gpu_addr = gpu_addr;
+ ib->ptr = ib_cmd;
+ ib->length_dw = ib_len;
+ /* This works for NO_HWS. TODO: need to handle without knowing VMID */
+ job->vmid = vmid;
+
+ ret = amdgpu_ib_schedule(ring, 1, ib, job, &f);
+ if (ret) {
+ DRM_ERROR("amdgpu: failed to schedule IB.\n");
+ goto err_ib_sched;
+ }
+
+ ret = dma_fence_wait(f, false);
+
+err_ib_sched:
+ dma_fence_put(f);
+ amdgpu_job_free(job);
+err:
+ return ret;
+}
+
+bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
+{
+ if (adev->kfd) {
+ if ((1 << vmid) & compute_vmid_bitmap)
+ return true;
+ }
+
+ return false;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 2a519f9062ee..c2c2bea731e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -26,15 +26,71 @@
#define AMDGPU_AMDKFD_H_INCLUDED
#include <linux/types.h>
+#include <linux/mm.h>
#include <linux/mmu_context.h>
#include <kgd_kfd_interface.h>
+#include <drm/ttm/ttm_execbuf_util.h>
+#include "amdgpu_sync.h"
+#include "amdgpu_vm.h"
+
+extern const struct kgd2kfd_calls *kgd2kfd;
struct amdgpu_device;
+struct kfd_bo_va_list {
+ struct list_head bo_list;
+ struct amdgpu_bo_va *bo_va;
+ void *kgd_dev;
+ bool is_mapped;
+ uint64_t va;
+ uint64_t pte_flags;
+};
+
struct kgd_mem {
+ struct mutex lock;
struct amdgpu_bo *bo;
- uint64_t gpu_addr;
- void *cpu_ptr;
+ struct list_head bo_va_list;
+ /* protected by amdkfd_process_info.lock */
+ struct ttm_validate_buffer validate_list;
+ struct ttm_validate_buffer resv_list;
+ uint32_t domain;
+ unsigned int mapped_to_gpu_memory;
+ uint64_t va;
+
+ uint32_t mapping_flags;
+
+ struct amdkfd_process_info *process_info;
+
+ struct amdgpu_sync sync;
+
+ bool aql_queue;
+};
+
+/* KFD Memory Eviction */
+struct amdgpu_amdkfd_fence {
+ struct dma_fence base;
+ struct mm_struct *mm;
+ spinlock_t lock;
+ char timeline_name[TASK_COMM_LEN];
+};
+
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+ struct mm_struct *mm);
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+
+struct amdkfd_process_info {
+ /* List head of all VMs that belong to a KFD process */
+ struct list_head vm_list_head;
+ /* List head for all KFD BOs that belong to a KFD process. */
+ struct list_head kfd_bo_list;
+ /* Lock to protect kfd_bo_list */
+ struct mutex lock;
+
+ /* Number of VMs */
+ unsigned int n_vms;
+ /* Eviction Fence */
+ struct amdgpu_amdkfd_fence *eviction_fence;
};
int amdgpu_amdkfd_init(void);
@@ -48,9 +104,15 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
+int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
+ uint32_t vmid, uint64_t gpu_addr,
+ uint32_t *ib_cmd, uint32_t ib_len);
+
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
+bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
+
/* Shared API */
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
@@ -79,4 +141,36 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
valid; \
})
+/* GPUVM API */
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
+ void **process_info,
+ struct dma_fence **ef);
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
+ struct file *filp,
+ void **vm, void **process_info,
+ struct dma_fence **ef);
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm);
+void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
+uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
+int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+ struct kgd_dev *kgd, uint64_t va, uint64_t size,
+ void *vm, struct kgd_mem **mem,
+ uint64_t *offset, uint32_t flags);
+int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
+ struct kgd_dev *kgd, struct kgd_mem *mem);
+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+ struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
+int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+ struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
+int amdgpu_amdkfd_gpuvm_sync_memory(
+ struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
+ struct kgd_mem *mem, void **kptr, uint64_t *size);
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
+ struct dma_fence **ef);
+
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
+void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
+
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
new file mode 100644
index 000000000000..2c14025e5e76
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/stacktrace.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sched/mm.h>
+#include "amdgpu_amdkfd.h"
+
+static const struct dma_fence_ops amdkfd_fence_ops;
+static atomic_t fence_seq = ATOMIC_INIT(0);
+
+/* Eviction Fence
+ * Fence helper functions to deal with KFD memory eviction.
+ * Big Idea - Since KFD submissions are done by user queues, a BO cannot be
+ * evicted unless all the user queues for that process are evicted.
+ *
+ * All the BOs in a process share an eviction fence. When process X wants
+ * to map VRAM memory but TTM can't find enough space, TTM will attempt to
+ * evict BOs from its LRU list. TTM checks if the BO is valuable to evict
+ * by calling ttm_bo_driver->eviction_valuable().
+ *
+ * ttm_bo_driver->eviction_valuable() - will return false if the BO belongs
+ * to process X. Otherwise, it will return true to indicate BO can be
+ * evicted by TTM.
+ *
+ * If ttm_bo_driver->eviction_valuable returns true, then TTM will continue
+ * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move
+ * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler.
+ *
+ * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to
+ * nofity when the BO is free to move. fence_add_callback --> enable_signaling
+ * --> amdgpu_amdkfd_fence.enable_signaling
+ *
+ * amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce
+ * user queues and signal fence. The work item will also start another delayed
+ * work item to restore BOs
+ */
+
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+ struct mm_struct *mm)
+{
+ struct amdgpu_amdkfd_fence *fence;
+
+ fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+ if (fence == NULL)
+ return NULL;
+
+ /* This reference gets released in amdkfd_fence_release */
+ mmgrab(mm);
+ fence->mm = mm;
+ get_task_comm(fence->timeline_name, current);
+ spin_lock_init(&fence->lock);
+
+ dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
+ context, atomic_inc_return(&fence_seq));
+
+ return fence;
+}
+
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
+{
+ struct amdgpu_amdkfd_fence *fence;
+
+ if (!f)
+ return NULL;
+
+ fence = container_of(f, struct amdgpu_amdkfd_fence, base);
+ if (fence && f->ops == &amdkfd_fence_ops)
+ return fence;
+
+ return NULL;
+}
+
+static const char *amdkfd_fence_get_driver_name(struct dma_fence *f)
+{
+ return "amdgpu_amdkfd_fence";
+}
+
+static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
+{
+ struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+ return fence->timeline_name;
+}
+
+/**
+ * amdkfd_fence_enable_signaling - This gets called when TTM wants to evict
+ * a KFD BO and schedules a job to move the BO.
+ * If fence is already signaled return true.
+ * If fence is not signaled schedule a evict KFD process work item.
+ */
+static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
+{
+ struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+ if (!fence)
+ return false;
+
+ if (dma_fence_is_signaled(f))
+ return true;
+
+ if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f))
+ return true;
+
+ return false;
+}
+
+/**
+ * amdkfd_fence_release - callback that fence can be freed
+ *
+ * @fence: fence
+ *
+ * This function is called when the reference count becomes zero.
+ * Drops the mm_struct reference and RCU schedules freeing up the fence.
+ */
+static void amdkfd_fence_release(struct dma_fence *f)
+{
+ struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+ /* Unconditionally signal the fence. The process is getting
+ * terminated.
+ */
+ if (WARN_ON(!fence))
+ return; /* Not an amdgpu_amdkfd_fence */
+
+ mmdrop(fence->mm);
+ kfree_rcu(f, rcu);
+}
+
+/**
+ * amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f
+ * if same return TRUE else return FALSE.
+ *
+ * @f: [IN] fence
+ * @mm: [IN] mm that needs to be verified
+ */
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
+{
+ struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+ if (!fence)
+ return false;
+ else if (fence->mm == mm)
+ return true;
+
+ return false;
+}
+
+static const struct dma_fence_ops amdkfd_fence_ops = {
+ .get_driver_name = amdkfd_fence_get_driver_name,
+ .get_timeline_name = amdkfd_fence_get_timeline_name,
+ .enable_signaling = amdkfd_fence_enable_signaling,
+ .signaled = NULL,
+ .wait = dma_fence_default_wait,
+ .release = amdkfd_fence_release,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index a9e6aea0e5f8..ea54e53172b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -139,11 +139,14 @@ static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, uint8_t vmid);
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
uint8_t vmid);
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
static void set_scratch_backing_va(struct kgd_dev *kgd,
uint64_t va, uint32_t vmid);
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t page_table_base);
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
/* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file.
@@ -196,12 +199,26 @@ static const struct kfd2kgd_calls kfd2kgd = {
.address_watch_get_offset = kgd_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_pasid = get_atc_vmid_pasid_mapping_pasid,
.get_atc_vmid_pasid_mapping_valid = get_atc_vmid_pasid_mapping_valid,
- .write_vmid_invalidate_request = write_vmid_invalidate_request,
.get_fw_version = get_fw_version,
.set_scratch_backing_va = set_scratch_backing_va,
.get_tile_config = get_tile_config,
.get_cu_info = get_cu_info,
- .get_vram_usage = amdgpu_amdkfd_get_vram_usage
+ .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
+ .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+ .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
+ .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+ .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+ .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+ .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+ .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
+ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+ .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
+ .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
+ .invalidate_tlbs = invalidate_tlbs,
+ .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+ .submit_ib = amdgpu_amdkfd_submit_ib,
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -787,14 +804,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+ return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
}
static void set_scratch_backing_va(struct kgd_dev *kgd,
@@ -812,8 +822,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
const union amdgpu_firmware_header *hdr;
- BUG_ON(kgd == NULL);
-
switch (type) {
case KGD_ENGINE_PFP:
hdr = (const union amdgpu_firmware_header *)
@@ -866,3 +874,50 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
return hdr->common.ucode_version;
}
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t page_table_base)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("trying to set page table base for wrong VMID\n");
+ return;
+ }
+ WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
+}
+
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ int vmid;
+ unsigned int tmp;
+
+ for (vmid = 0; vmid < 16; vmid++) {
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
+ continue;
+
+ tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+ (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("non kfd vmid\n");
+ return 0;
+ }
+
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index b127259d7d85..89264c9a5e9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -81,7 +81,6 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
uint32_t queue_id);
static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
unsigned int utimeout);
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
static int kgd_address_watch_disable(struct kgd_dev *kgd);
static int kgd_address_watch_execute(struct kgd_dev *kgd,
unsigned int watch_point_id,
@@ -99,10 +98,13 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
uint8_t vmid);
static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
uint8_t vmid);
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid);
static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
static void set_scratch_backing_va(struct kgd_dev *kgd,
uint64_t va, uint32_t vmid);
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t page_table_base);
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
/* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file.
@@ -157,12 +159,26 @@ static const struct kfd2kgd_calls kfd2kgd = {
get_atc_vmid_pasid_mapping_pasid,
.get_atc_vmid_pasid_mapping_valid =
get_atc_vmid_pasid_mapping_valid,
- .write_vmid_invalidate_request = write_vmid_invalidate_request,
.get_fw_version = get_fw_version,
.set_scratch_backing_va = set_scratch_backing_va,
.get_tile_config = get_tile_config,
.get_cu_info = get_cu_info,
- .get_vram_usage = amdgpu_amdkfd_get_vram_usage
+ .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
+ .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+ .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
+ .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+ .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+ .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+ .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+ .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
+ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+ .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
+ .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
+ .invalidate_tlbs = invalidate_tlbs,
+ .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+ .submit_ib = amdgpu_amdkfd_submit_ib,
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
@@ -704,14 +720,7 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
reg = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
- return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
-}
-
-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
- WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+ return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
}
static int kgd_address_watch_disable(struct kgd_dev *kgd)
@@ -775,8 +784,6 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
const union amdgpu_firmware_header *hdr;
- BUG_ON(kgd == NULL);
-
switch (type) {
case KGD_ENGINE_PFP:
hdr = (const union amdgpu_firmware_header *)
@@ -828,3 +835,51 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
/* Only 12 bit in use*/
return hdr->common.ucode_version;
}
+
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t page_table_base)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("trying to set page table base for wrong VMID\n");
+ return;
+ }
+ WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base);
+}
+
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ int vmid;
+ unsigned int tmp;
+
+ for (vmid = 0; vmid < 16; vmid++) {
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
+ continue;
+
+ tmp = RREG32(mmATC_VMID0_PASID_MAPPING + vmid);
+ if ((tmp & ATC_VMID0_PASID_MAPPING__VALID_MASK) &&
+ (tmp & ATC_VMID0_PASID_MAPPING__PASID_MASK) == pasid) {
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("non kfd vmid %d\n", vmid);
+ return -EINVAL;
+ }
+
+ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
+ RREG32(mmVM_INVALIDATE_RESPONSE);
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
new file mode 100644
index 000000000000..1d6e1479da38
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -0,0 +1,1577 @@
+/*
+ * Copyright 2014-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "kfd2kgd: " fmt
+
+#include <linux/list.h>
+#include <drm/drmP.h>
+#include "amdgpu_object.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_amdkfd.h"
+
+/* Special VM and GART address alignment needed for VI pre-Fiji due to
+ * a HW bug.
+ */
+#define VI_BO_SIZE_ALIGN (0x8000)
+
+/* Impose limit on how much memory KFD can use */
+static struct {
+ uint64_t max_system_mem_limit;
+ int64_t system_mem_used;
+ spinlock_t mem_limit_lock;
+} kfd_mem_limit;
+
+/* Struct used for amdgpu_amdkfd_bo_validate */
+struct amdgpu_vm_parser {
+ uint32_t domain;
+ bool wait;
+};
+
+static const char * const domain_bit_to_string[] = {
+ "CPU",
+ "GTT",
+ "VRAM",
+ "GDS",
+ "GWS",
+ "OA"
+};
+
+#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
+
+
+
+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+{
+ return (struct amdgpu_device *)kgd;
+}
+
+static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
+ struct kgd_mem *mem)
+{
+ struct kfd_bo_va_list *entry;
+
+ list_for_each_entry(entry, &mem->bo_va_list, bo_list)
+ if (entry->bo_va->base.vm == avm)
+ return false;
+
+ return true;
+}
+
+/* Set memory usage limits. Current, limits are
+ * System (kernel) memory - 3/8th System RAM
+ */
+void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
+{
+ struct sysinfo si;
+ uint64_t mem;
+
+ si_meminfo(&si);
+ mem = si.totalram - si.totalhigh;
+ mem *= si.mem_unit;
+
+ spin_lock_init(&kfd_mem_limit.mem_limit_lock);
+ kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
+ pr_debug("Kernel memory limit %lluM\n",
+ (kfd_mem_limit.max_system_mem_limit >> 20));
+}
+
+static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 domain)
+{
+ size_t acc_size;
+ int ret = 0;
+
+ acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
+ sizeof(struct amdgpu_bo));
+
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
+ if (domain == AMDGPU_GEM_DOMAIN_GTT) {
+ if (kfd_mem_limit.system_mem_used + (acc_size + size) >
+ kfd_mem_limit.max_system_mem_limit) {
+ ret = -ENOMEM;
+ goto err_no_mem;
+ }
+ kfd_mem_limit.system_mem_used += (acc_size + size);
+ }
+err_no_mem:
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+ return ret;
+}
+
+static void unreserve_system_mem_limit(struct amdgpu_device *adev,
+ uint64_t size, u32 domain)
+{
+ size_t acc_size;
+
+ acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
+ sizeof(struct amdgpu_bo));
+
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
+ if (domain == AMDGPU_GEM_DOMAIN_GTT)
+ kfd_mem_limit.system_mem_used -= (acc_size + size);
+ WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+ "kfd system memory accounting unbalanced");
+
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+}
+
+void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
+{
+ spin_lock(&kfd_mem_limit.mem_limit_lock);
+
+ if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
+ kfd_mem_limit.system_mem_used -=
+ (bo->tbo.acc_size + amdgpu_bo_size(bo));
+ }
+ WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
+ "kfd system memory accounting unbalanced");
+
+ spin_unlock(&kfd_mem_limit.mem_limit_lock);
+}
+
+
+/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's
+ * reservation object.
+ *
+ * @bo: [IN] Remove eviction fence(s) from this BO
+ * @ef: [IN] If ef is specified, then this eviction fence is removed if it
+ * is present in the shared list.
+ * @ef_list: [OUT] Returns list of eviction fences. These fences are removed
+ * from BO's reservation object shared list.
+ * @ef_count: [OUT] Number of fences in ef_list.
+ *
+ * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be
+ * called to restore the eviction fences and to avoid memory leak. This is
+ * useful for shared BOs.
+ * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
+ */
+static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
+ struct amdgpu_amdkfd_fence *ef,
+ struct amdgpu_amdkfd_fence ***ef_list,
+ unsigned int *ef_count)
+{
+ struct reservation_object_list *fobj;
+ struct reservation_object *resv;
+ unsigned int i = 0, j = 0, k = 0, shared_count;
+ unsigned int count = 0;
+ struct amdgpu_amdkfd_fence **fence_list;
+
+ if (!ef && !ef_list)
+ return -EINVAL;
+
+ if (ef_list) {
+ *ef_list = NULL;
+ *ef_count = 0;
+ }
+
+ resv = bo->tbo.resv;
+ fobj = reservation_object_get_list(resv);
+
+ if (!fobj)
+ return 0;
+
+ preempt_disable();
+ write_seqcount_begin(&resv->seq);
+
+ /* Go through all the shared fences in the resevation object. If
+ * ef is specified and it exists in the list, remove it and reduce the
+ * count. If ef is not specified, then get the count of eviction fences
+ * present.
+ */
+ shared_count = fobj->shared_count;
+ for (i = 0; i < shared_count; ++i) {
+ struct dma_fence *f;
+
+ f = rcu_dereference_protected(fobj->shared[i],
+ reservation_object_held(resv));
+
+ if (ef) {
+ if (f->context == ef->base.context) {
+ dma_fence_put(f);
+ fobj->shared_count--;
+ } else {
+ RCU_INIT_POINTER(fobj->shared[j++], f);
+ }
+ } else if (to_amdgpu_amdkfd_fence(f))
+ count++;
+ }
+ write_seqcount_end(&resv->seq);
+ preempt_enable();
+
+ if (ef || !count)
+ return 0;
+
+ /* Alloc memory for count number of eviction fence pointers. Fill the
+ * ef_list array and ef_count
+ */
+ fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *),
+ GFP_KERNEL);
+ if (!fence_list)
+ return -ENOMEM;
+
+ preempt_disable();
+ write_seqcount_begin(&resv->seq);
+
+ j = 0;
+ for (i = 0; i < shared_count; ++i) {
+ struct dma_fence *f;
+ struct amdgpu_amdkfd_fence *efence;
+
+ f = rcu_dereference_protected(fobj->shared[i],
+ reservation_object_held(resv));
+
+ efence = to_amdgpu_amdkfd_fence(f);
+ if (efence) {
+ fence_list[k++] = efence;
+ fobj->shared_count--;
+ } else {
+ RCU_INIT_POINTER(fobj->shared[j++], f);
+ }
+ }
+
+ write_seqcount_end(&resv->seq);
+ preempt_enable();
+
+ *ef_list = fence_list;
+ *ef_count = k;
+
+ return 0;
+}
+
+/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's
+ * reservation object.
+ *
+ * @bo: [IN] Add eviction fences to this BO
+ * @ef_list: [IN] List of eviction fences to be added
+ * @ef_count: [IN] Number of fences in ef_list.
+ *
+ * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this
+ * function.
+ */
+static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo,
+ struct amdgpu_amdkfd_fence **ef_list,
+ unsigned int ef_count)
+{
+ int i;
+
+ if (!ef_list || !ef_count)
+ return;
+
+ for (i = 0; i < ef_count; i++) {
+ amdgpu_bo_fence(bo, &ef_list[i]->base, true);
+ /* Re-adding the fence takes an additional reference. Drop that
+ * reference.
+ */
+ dma_fence_put(&ef_list[i]->base);
+ }
+
+ kfree(ef_list);
+}
+
+static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
+ bool wait)
+{
+ struct ttm_operation_ctx ctx = { false, false };
+ int ret;
+
+ if (WARN(amdgpu_ttm_tt_get_usermm(bo->tbo.ttm),
+ "Called with userptr BO"))
+ return -EINVAL;
+
+ amdgpu_ttm_placement_from_domain(bo, domain);
+
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ goto validate_fail;
+ if (wait) {
+ struct amdgpu_amdkfd_fence **ef_list;
+ unsigned int ef_count;
+
+ ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list,
+ &ef_count);
+ if (ret)
+ goto validate_fail;
+
+ ttm_bo_wait(&bo->tbo, false, false);
+ amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count);
+ }
+
+validate_fail:
+ return ret;
+}
+
+static int amdgpu_amdkfd_validate(void *param, struct amdgpu_bo *bo)
+{
+ struct amdgpu_vm_parser *p = param;
+
+ return amdgpu_amdkfd_bo_validate(bo, p->domain, p->wait);
+}
+
+/* vm_validate_pt_pd_bos - Validate page table and directory BOs
+ *
+ * Page directories are not updated here because huge page handling
+ * during page table updates can invalidate page directory entries
+ * again. Page directories are only updated after updating page
+ * tables.
+ */
+static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
+{
+ struct amdgpu_bo *pd = vm->root.base.bo;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
+ struct amdgpu_vm_parser param;
+ uint64_t addr, flags = AMDGPU_PTE_VALID;
+ int ret;
+
+ param.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ param.wait = false;
+
+ ret = amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_amdkfd_validate,
+ &param);
+ if (ret) {
+ pr_err("amdgpu: failed to validate PT BOs\n");
+ return ret;
+ }
+
+ ret = amdgpu_amdkfd_validate(&param, pd);
+ if (ret) {
+ pr_err("amdgpu: failed to validate PD\n");
+ return ret;
+ }
+
+ addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
+ amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags);
+ vm->pd_phys_addr = addr;
+
+ if (vm->use_cpu_for_update) {
+ ret = amdgpu_bo_kmap(pd, NULL);
+ if (ret) {
+ pr_err("amdgpu: failed to kmap PD, ret=%d\n", ret);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
+ struct dma_fence *f)
+{
+ int ret = amdgpu_sync_fence(adev, sync, f, false);
+
+ /* Sync objects can't handle multiple GPUs (contexts) updating
+ * sync->last_vm_update. Fortunately we don't need it for
+ * KFD's purposes, so we can just drop that fence.
+ */
+ if (sync->last_vm_update) {
+ dma_fence_put(sync->last_vm_update);
+ sync->last_vm_update = NULL;
+ }
+
+ return ret;
+}
+
+static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
+{
+ struct amdgpu_bo *pd = vm->root.base.bo;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
+ int ret;
+
+ ret = amdgpu_vm_update_directories(adev, vm);
+ if (ret)
+ return ret;
+
+ return sync_vm_fence(adev, sync, vm->last_update);
+}
+
+/* add_bo_to_vm - Add a BO to a VM
+ *
+ * Everything that needs to bo done only once when a BO is first added
+ * to a VM. It can later be mapped and unmapped many times without
+ * repeating these steps.
+ *
+ * 1. Allocate and initialize BO VA entry data structure
+ * 2. Add BO to the VM
+ * 3. Determine ASIC-specific PTE flags
+ * 4. Alloc page tables and directories if needed
+ * 4a. Validate new page tables and directories
+ */
+static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
+ struct amdgpu_vm *vm, bool is_aql,
+ struct kfd_bo_va_list **p_bo_va_entry)
+{
+ int ret;
+ struct kfd_bo_va_list *bo_va_entry;
+ struct amdgpu_bo *pd = vm->root.base.bo;
+ struct amdgpu_bo *bo = mem->bo;
+ uint64_t va = mem->va;
+ struct list_head *list_bo_va = &mem->bo_va_list;
+ unsigned long bo_size = bo->tbo.mem.size;
+
+ if (!va) {
+ pr_err("Invalid VA when adding BO to VM\n");
+ return -EINVAL;
+ }
+
+ if (is_aql)
+ va += bo_size;
+
+ bo_va_entry = kzalloc(sizeof(*bo_va_entry), GFP_KERNEL);
+ if (!bo_va_entry)
+ return -ENOMEM;
+
+ pr_debug("\t add VA 0x%llx - 0x%llx to vm %p\n", va,
+ va + bo_size, vm);
+
+ /* Add BO to VM internal data structures*/
+ bo_va_entry->bo_va = amdgpu_vm_bo_add(adev, vm, bo);
+ if (!bo_va_entry->bo_va) {
+ ret = -EINVAL;
+ pr_err("Failed to add BO object to VM. ret == %d\n",
+ ret);
+ goto err_vmadd;
+ }
+
+ bo_va_entry->va = va;
+ bo_va_entry->pte_flags = amdgpu_gmc_get_pte_flags(adev,
+ mem->mapping_flags);
+ bo_va_entry->kgd_dev = (void *)adev;
+ list_add(&bo_va_entry->bo_list, list_bo_va);
+
+ if (p_bo_va_entry)
+ *p_bo_va_entry = bo_va_entry;
+
+ /* Allocate new page tables if needed and validate
+ * them. Clearing of new page tables and validate need to wait
+ * on move fences. We don't want that to trigger the eviction
+ * fence, so remove it temporarily.
+ */
+ amdgpu_amdkfd_remove_eviction_fence(pd,
+ vm->process_info->eviction_fence,
+ NULL, NULL);
+
+ ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
+ if (ret) {
+ pr_err("Failed to allocate pts, err=%d\n", ret);
+ goto err_alloc_pts;
+ }
+
+ ret = vm_validate_pt_pd_bos(vm);
+ if (ret) {
+ pr_err("validate_pt_pd_bos() failed\n");
+ goto err_alloc_pts;
+ }
+
+ /* Add the eviction fence back */
+ amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
+
+ return 0;
+
+err_alloc_pts:
+ amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
+ amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
+ list_del(&bo_va_entry->bo_list);
+err_vmadd:
+ kfree(bo_va_entry);
+ return ret;
+}
+
+static void remove_bo_from_vm(struct amdgpu_device *adev,
+ struct kfd_bo_va_list *entry, unsigned long size)
+{
+ pr_debug("\t remove VA 0x%llx - 0x%llx in entry %p\n",
+ entry->va,
+ entry->va + size, entry);
+ amdgpu_vm_bo_rmv(adev, entry->bo_va);
+ list_del(&entry->bo_list);
+ kfree(entry);
+}
+
+static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
+ struct amdkfd_process_info *process_info)
+{
+ struct ttm_validate_buffer *entry = &mem->validate_list;
+ struct amdgpu_bo *bo = mem->bo;
+
+ INIT_LIST_HEAD(&entry->head);
+ entry->shared = true;
+ entry->bo = &bo->tbo;
+ mutex_lock(&process_info->lock);
+ list_add_tail(&entry->head, &process_info->kfd_bo_list);
+ mutex_unlock(&process_info->lock);
+}
+
+/* Reserving a BO and its page table BOs must happen atomically to
+ * avoid deadlocks. Some operations update multiple VMs at once. Track
+ * all the reservation info in a context structure. Optionally a sync
+ * object can track VM updates.
+ */
+struct bo_vm_reservation_context {
+ struct amdgpu_bo_list_entry kfd_bo; /* BO list entry for the KFD BO */
+ unsigned int n_vms; /* Number of VMs reserved */
+ struct amdgpu_bo_list_entry *vm_pd; /* Array of VM BO list entries */
+ struct ww_acquire_ctx ticket; /* Reservation ticket */
+ struct list_head list, duplicates; /* BO lists */
+ struct amdgpu_sync *sync; /* Pointer to sync object */
+ bool reserved; /* Whether BOs are reserved */
+};
+
+enum bo_vm_match {
+ BO_VM_NOT_MAPPED = 0, /* Match VMs where a BO is not mapped */
+ BO_VM_MAPPED, /* Match VMs where a BO is mapped */
+ BO_VM_ALL, /* Match all VMs a BO was added to */
+};
+
+/**
+ * reserve_bo_and_vm - reserve a BO and a VM unconditionally.
+ * @mem: KFD BO structure.
+ * @vm: the VM to reserve.
+ * @ctx: the struct that will be used in unreserve_bo_and_vms().
+ */
+static int reserve_bo_and_vm(struct kgd_mem *mem,
+ struct amdgpu_vm *vm,
+ struct bo_vm_reservation_context *ctx)
+{
+ struct amdgpu_bo *bo = mem->bo;
+ int ret;
+
+ WARN_ON(!vm);
+
+ ctx->reserved = false;
+ ctx->n_vms = 1;
+ ctx->sync = &mem->sync;
+
+ INIT_LIST_HEAD(&ctx->list);
+ INIT_LIST_HEAD(&ctx->duplicates);
+
+ ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd), GFP_KERNEL);
+ if (!ctx->vm_pd)
+ return -ENOMEM;
+
+ ctx->kfd_bo.robj = bo;
+ ctx->kfd_bo.priority = 0;
+ ctx->kfd_bo.tv.bo = &bo->tbo;
+ ctx->kfd_bo.tv.shared = true;
+ ctx->kfd_bo.user_pages = NULL;
+ list_add(&ctx->kfd_bo.tv.head, &ctx->list);
+
+ amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);
+
+ ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
+ false, &ctx->duplicates);
+ if (!ret)
+ ctx->reserved = true;
+ else {
+ pr_err("Failed to reserve buffers in ttm\n");
+ kfree(ctx->vm_pd);
+ ctx->vm_pd = NULL;
+ }
+
+ return ret;
+}
+
+/**
+ * reserve_bo_and_cond_vms - reserve a BO and some VMs conditionally
+ * @mem: KFD BO structure.
+ * @vm: the VM to reserve. If NULL, then all VMs associated with the BO
+ * is used. Otherwise, a single VM associated with the BO.
+ * @map_type: the mapping status that will be used to filter the VMs.
+ * @ctx: the struct that will be used in unreserve_bo_and_vms().
+ *
+ * Returns 0 for success, negative for failure.
+ */
+static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
+ struct amdgpu_vm *vm, enum bo_vm_match map_type,
+ struct bo_vm_reservation_context *ctx)
+{
+ struct amdgpu_bo *bo = mem->bo;
+ struct kfd_bo_va_list *entry;
+ unsigned int i;
+ int ret;
+
+ ctx->reserved = false;
+ ctx->n_vms = 0;
+ ctx->vm_pd = NULL;
+ ctx->sync = &mem->sync;
+
+ INIT_LIST_HEAD(&ctx->list);
+ INIT_LIST_HEAD(&ctx->duplicates);
+
+ list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+ if ((vm && vm != entry->bo_va->base.vm) ||
+ (entry->is_mapped != map_type
+ && map_type != BO_VM_ALL))
+ continue;
+
+ ctx->n_vms++;
+ }
+
+ if (ctx->n_vms != 0) {
+ ctx->vm_pd = kcalloc(ctx->n_vms, sizeof(*ctx->vm_pd),
+ GFP_KERNEL);
+ if (!ctx->vm_pd)
+ return -ENOMEM;
+ }
+
+ ctx->kfd_bo.robj = bo;
+ ctx->kfd_bo.priority = 0;
+ ctx->kfd_bo.tv.bo = &bo->tbo;
+ ctx->kfd_bo.tv.shared = true;
+ ctx->kfd_bo.user_pages = NULL;
+ list_add(&ctx->kfd_bo.tv.head, &ctx->list);
+
+ i = 0;
+ list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+ if ((vm && vm != entry->bo_va->base.vm) ||
+ (entry->is_mapped != map_type
+ && map_type != BO_VM_ALL))
+ continue;
+
+ amdgpu_vm_get_pd_bo(entry->bo_va->base.vm, &ctx->list,
+ &ctx->vm_pd[i]);
+ i++;
+ }
+
+ ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
+ false, &ctx->duplicates);
+ if (!ret)
+ ctx->reserved = true;
+ else
+ pr_err("Failed to reserve buffers in ttm.\n");
+
+ if (ret) {
+ kfree(ctx->vm_pd);
+ ctx->vm_pd = NULL;
+ }
+
+ return ret;
+}
+
+/**
+ * unreserve_bo_and_vms - Unreserve BO and VMs from a reservation context
+ * @ctx: Reservation context to unreserve
+ * @wait: Optionally wait for a sync object representing pending VM updates
+ * @intr: Whether the wait is interruptible
+ *
+ * Also frees any resources allocated in
+ * reserve_bo_and_(cond_)vm(s). Returns the status from
+ * amdgpu_sync_wait.
+ */
+static int unreserve_bo_and_vms(struct bo_vm_reservation_context *ctx,
+ bool wait, bool intr)
+{
+ int ret = 0;
+
+ if (wait)
+ ret = amdgpu_sync_wait(ctx->sync, intr);
+
+ if (ctx->reserved)
+ ttm_eu_backoff_reservation(&ctx->ticket, &ctx->list);
+ kfree(ctx->vm_pd);
+
+ ctx->sync = NULL;
+
+ ctx->reserved = false;
+ ctx->vm_pd = NULL;
+
+ return ret;
+}
+
+static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
+ struct kfd_bo_va_list *entry,
+ struct amdgpu_sync *sync)
+{
+ struct amdgpu_bo_va *bo_va = entry->bo_va;
+ struct amdgpu_vm *vm = bo_va->base.vm;
+ struct amdgpu_bo *pd = vm->root.base.bo;
+
+ /* Remove eviction fence from PD (and thereby from PTs too as
+ * they share the resv. object). Otherwise during PT update
+ * job (see amdgpu_vm_bo_update_mapping), eviction fence would
+ * get added to job->sync object and job execution would
+ * trigger the eviction fence.
+ */
+ amdgpu_amdkfd_remove_eviction_fence(pd,
+ vm->process_info->eviction_fence,
+ NULL, NULL);
+ amdgpu_vm_bo_unmap(adev, bo_va, entry->va);
+
+ amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);
+
+ /* Add the eviction fence back */
+ amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
+
+ sync_vm_fence(adev, sync, bo_va->last_pt_update);
+
+ return 0;
+}
+
+static int update_gpuvm_pte(struct amdgpu_device *adev,
+ struct kfd_bo_va_list *entry,
+ struct amdgpu_sync *sync)
+{
+ int ret;
+ struct amdgpu_vm *vm;
+ struct amdgpu_bo_va *bo_va;
+ struct amdgpu_bo *bo;
+
+ bo_va = entry->bo_va;
+ vm = bo_va->base.vm;
+ bo = bo_va->base.bo;
+
+ /* Update the page tables */
+ ret = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (ret) {
+ pr_err("amdgpu_vm_bo_update failed\n");
+ return ret;
+ }
+
+ return sync_vm_fence(adev, sync, bo_va->last_pt_update);
+}
+
+static int map_bo_to_gpuvm(struct amdgpu_device *adev,
+ struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
+{
+ int ret;
+
+ /* Set virtual address for the allocation */
+ ret = amdgpu_vm_bo_map(adev, entry->bo_va, entry->va, 0,
+ amdgpu_bo_size(entry->bo_va->base.bo),
+ entry->pte_flags);
+ if (ret) {
+ pr_err("Failed to map VA 0x%llx in vm. ret %d\n",
+ entry->va, ret);
+ return ret;
+ }
+
+ ret = update_gpuvm_pte(adev, entry, sync);
+ if (ret) {
+ pr_err("update_gpuvm_pte() failed\n");
+ goto update_gpuvm_pte_failed;
+ }
+
+ return 0;
+
+update_gpuvm_pte_failed:
+ unmap_bo_from_gpuvm(adev, entry, sync);
+ return ret;
+}
+
+static int process_validate_vms(struct amdkfd_process_info *process_info)
+{
+ struct amdgpu_vm *peer_vm;
+ int ret;
+
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ ret = vm_validate_pt_pd_bos(peer_vm);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int process_update_pds(struct amdkfd_process_info *process_info,
+ struct amdgpu_sync *sync)
+{
+ struct amdgpu_vm *peer_vm;
+ int ret;
+
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ ret = vm_update_pds(peer_vm, sync);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
+ struct dma_fence **ef)
+{
+ struct amdkfd_process_info *info = NULL;
+ int ret;
+
+ if (!*process_info) {
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ mutex_init(&info->lock);
+ INIT_LIST_HEAD(&info->vm_list_head);
+ INIT_LIST_HEAD(&info->kfd_bo_list);
+
+ info->eviction_fence =
+ amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
+ current->mm);
+ if (!info->eviction_fence) {
+ pr_err("Failed to create eviction fence\n");
+ ret = -ENOMEM;
+ goto create_evict_fence_fail;
+ }
+
+ *process_info = info;
+ *ef = dma_fence_get(&info->eviction_fence->base);
+ }
+
+ vm->process_info = *process_info;
+
+ /* Validate page directory and attach eviction fence */
+ ret = amdgpu_bo_reserve(vm->root.base.bo, true);
+ if (ret)
+ goto reserve_pd_fail;
+ ret = vm_validate_pt_pd_bos(vm);
+ if (ret) {
+ pr_err("validate_pt_pd_bos() failed\n");
+ goto validate_pd_fail;
+ }
+ ret = ttm_bo_wait(&vm->root.base.bo->tbo, false, false);
+ if (ret)
+ goto wait_pd_fail;
+ amdgpu_bo_fence(vm->root.base.bo,
+ &vm->process_info->eviction_fence->base, true);
+ amdgpu_bo_unreserve(vm->root.base.bo);
+
+ /* Update process info */
+ mutex_lock(&vm->process_info->lock);
+ list_add_tail(&vm->vm_list_node,
+ &(vm->process_info->vm_list_head));
+ vm->process_info->n_vms++;
+ mutex_unlock(&vm->process_info->lock);
+
+ return 0;
+
+wait_pd_fail:
+validate_pd_fail:
+ amdgpu_bo_unreserve(vm->root.base.bo);
+reserve_pd_fail:
+ vm->process_info = NULL;
+ if (info) {
+ /* Two fence references: one in info and one in *ef */
+ dma_fence_put(&info->eviction_fence->base);
+ dma_fence_put(*ef);
+ *ef = NULL;
+ *process_info = NULL;
+create_evict_fence_fail:
+ mutex_destroy(&info->lock);
+ kfree(info);
+ }
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
+ void **process_info,
+ struct dma_fence **ef)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct amdgpu_vm *new_vm;
+ int ret;
+
+ new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
+ if (!new_vm)
+ return -ENOMEM;
+
+ /* Initialize AMDGPU part of the VM */
+ ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
+ if (ret) {
+ pr_err("Failed init vm ret %d\n", ret);
+ goto amdgpu_vm_init_fail;
+ }
+
+ /* Initialize KFD part of the VM and process info */
+ ret = init_kfd_vm(new_vm, process_info, ef);
+ if (ret)
+ goto init_kfd_vm_fail;
+
+ *vm = (void *) new_vm;
+
+ return 0;
+
+init_kfd_vm_fail:
+ amdgpu_vm_fini(adev, new_vm);
+amdgpu_vm_init_fail:
+ kfree(new_vm);
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
+ struct file *filp,
+ void **vm, void **process_info,
+ struct dma_fence **ef)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct drm_file *drm_priv = filp->private_data;
+ struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
+ struct amdgpu_vm *avm = &drv_priv->vm;
+ int ret;
+
+ /* Already a compute VM? */
+ if (avm->process_info)
+ return -EINVAL;
+
+ /* Convert VM into a compute VM */
+ ret = amdgpu_vm_make_compute(adev, avm);
+ if (ret)
+ return ret;
+
+ /* Initialize KFD part of the VM and process info */
+ ret = init_kfd_vm(avm, process_info, ef);
+ if (ret)
+ return ret;
+
+ *vm = (void *)avm;
+
+ return 0;
+}
+
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm)
+{
+ struct amdkfd_process_info *process_info = vm->process_info;
+ struct amdgpu_bo *pd = vm->root.base.bo;
+
+ if (!process_info)
+ return;
+
+ /* Release eviction fence from PD */
+ amdgpu_bo_reserve(pd, false);
+ amdgpu_bo_fence(pd, NULL, false);
+ amdgpu_bo_unreserve(pd);
+
+ /* Update process info */
+ mutex_lock(&process_info->lock);
+ process_info->n_vms--;
+ list_del(&vm->vm_list_node);
+ mutex_unlock(&process_info->lock);
+
+ /* Release per-process resources when last compute VM is destroyed */
+ if (!process_info->n_vms) {
+ WARN_ON(!list_empty(&process_info->kfd_bo_list));
+
+ dma_fence_put(&process_info->eviction_fence->base);
+ mutex_destroy(&process_info->lock);
+ kfree(process_info);
+ }
+}
+
+void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+
+ if (WARN_ON(!kgd || !vm))
+ return;
+
+ pr_debug("Destroying process vm %p\n", vm);
+
+ /* Release the VM context */
+ amdgpu_vm_fini(adev, avm);
+ kfree(vm);
+}
+
+uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
+{
+ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+
+ return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT;
+}
+
+int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
+ struct kgd_dev *kgd, uint64_t va, uint64_t size,
+ void *vm, struct kgd_mem **mem,
+ uint64_t *offset, uint32_t flags)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ struct amdgpu_bo *bo;
+ int byte_align;
+ u32 alloc_domain;
+ u64 alloc_flags;
+ uint32_t mapping_flags;
+ int ret;
+
+ /*
+ * Check on which domain to allocate BO
+ */
+ if (flags & ALLOC_MEM_FLAGS_VRAM) {
+ alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
+ alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
+ alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
+ AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+ } else if (flags & ALLOC_MEM_FLAGS_GTT) {
+ alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_flags = 0;
+ } else {
+ return -EINVAL;
+ }
+
+ *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+ if (!*mem)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&(*mem)->bo_va_list);
+ mutex_init(&(*mem)->lock);
+ (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
+
+ /* Workaround for AQL queue wraparound bug. Map the same
+ * memory twice. That means we only actually allocate half
+ * the memory.
+ */
+ if ((*mem)->aql_queue)
+ size = size >> 1;
+
+ /* Workaround for TLB bug on older VI chips */
+ byte_align = (adev->family == AMDGPU_FAMILY_VI &&
+ adev->asic_type != CHIP_FIJI &&
+ adev->asic_type != CHIP_POLARIS10 &&
+ adev->asic_type != CHIP_POLARIS11) ?
+ VI_BO_SIZE_ALIGN : 1;
+
+ mapping_flags = AMDGPU_VM_PAGE_READABLE;
+ if (flags & ALLOC_MEM_FLAGS_WRITABLE)
+ mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
+ if (flags & ALLOC_MEM_FLAGS_EXECUTABLE)
+ mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
+ if (flags & ALLOC_MEM_FLAGS_COHERENT)
+ mapping_flags |= AMDGPU_VM_MTYPE_UC;
+ else
+ mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ (*mem)->mapping_flags = mapping_flags;
+
+ amdgpu_sync_create(&(*mem)->sync);
+
+ ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
+ if (ret) {
+ pr_debug("Insufficient system memory\n");
+ goto err_reserve_system_mem;
+ }
+
+ pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
+ va, size, domain_string(alloc_domain));
+
+ ret = amdgpu_bo_create(adev, size, byte_align,
+ alloc_domain, alloc_flags, ttm_bo_type_device, NULL, &bo);
+ if (ret) {
+ pr_debug("Failed to create BO on domain %s. ret %d\n",
+ domain_string(alloc_domain), ret);
+ goto err_bo_create;
+ }
+ bo->kfd_bo = *mem;
+ (*mem)->bo = bo;
+
+ (*mem)->va = va;
+ (*mem)->domain = alloc_domain;
+ (*mem)->mapped_to_gpu_memory = 0;
+ (*mem)->process_info = avm->process_info;
+ add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info);
+
+ if (offset)
+ *offset = amdgpu_bo_mmap_offset(bo);
+
+ return 0;
+
+err_bo_create:
+ unreserve_system_mem_limit(adev, size, alloc_domain);
+err_reserve_system_mem:
+ mutex_destroy(&(*mem)->lock);
+ kfree(*mem);
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
+ struct kgd_dev *kgd, struct kgd_mem *mem)
+{
+ struct amdkfd_process_info *process_info = mem->process_info;
+ unsigned long bo_size = mem->bo->tbo.mem.size;
+ struct kfd_bo_va_list *entry, *tmp;
+ struct bo_vm_reservation_context ctx;
+ struct ttm_validate_buffer *bo_list_entry;
+ int ret;
+
+ mutex_lock(&mem->lock);
+
+ if (mem->mapped_to_gpu_memory > 0) {
+ pr_debug("BO VA 0x%llx size 0x%lx is still mapped.\n",
+ mem->va, bo_size);
+ mutex_unlock(&mem->lock);
+ return -EBUSY;
+ }
+
+ mutex_unlock(&mem->lock);
+ /* lock is not needed after this, since mem is unused and will
+ * be freed anyway
+ */
+
+ /* Make sure restore workers don't access the BO any more */
+ bo_list_entry = &mem->validate_list;
+ mutex_lock(&process_info->lock);
+ list_del(&bo_list_entry->head);
+ mutex_unlock(&process_info->lock);
+
+ ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
+ if (unlikely(ret))
+ return ret;
+
+ /* The eviction fence should be removed by the last unmap.
+ * TODO: Log an error condition if the bo still has the eviction fence
+ * attached
+ */
+ amdgpu_amdkfd_remove_eviction_fence(mem->bo,
+ process_info->eviction_fence,
+ NULL, NULL);
+ pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
+ mem->va + bo_size * (1 + mem->aql_queue));
+
+ /* Remove from VM internal data structures */
+ list_for_each_entry_safe(entry, tmp, &mem->bo_va_list, bo_list)
+ remove_bo_from_vm((struct amdgpu_device *)entry->kgd_dev,
+ entry, bo_size);
+
+ ret = unreserve_bo_and_vms(&ctx, false, false);
+
+ /* Free the sync object */
+ amdgpu_sync_free(&mem->sync);
+
+ /* Free the BO*/
+ amdgpu_bo_unref(&mem->bo);
+ mutex_destroy(&mem->lock);
+ kfree(mem);
+
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
+ struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ int ret;
+ struct amdgpu_bo *bo;
+ uint32_t domain;
+ struct kfd_bo_va_list *entry;
+ struct bo_vm_reservation_context ctx;
+ struct kfd_bo_va_list *bo_va_entry = NULL;
+ struct kfd_bo_va_list *bo_va_entry_aql = NULL;
+ unsigned long bo_size;
+
+ /* Make sure restore is not running concurrently.
+ */
+ mutex_lock(&mem->process_info->lock);
+
+ mutex_lock(&mem->lock);
+
+ bo = mem->bo;
+
+ if (!bo) {
+ pr_err("Invalid BO when mapping memory to GPU\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ domain = mem->domain;
+ bo_size = bo->tbo.mem.size;
+
+ pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
+ mem->va,
+ mem->va + bo_size * (1 + mem->aql_queue),
+ vm, domain_string(domain));
+
+ ret = reserve_bo_and_vm(mem, vm, &ctx);
+ if (unlikely(ret))
+ goto out;
+
+ if (check_if_add_bo_to_vm(avm, mem)) {
+ ret = add_bo_to_vm(adev, mem, avm, false,
+ &bo_va_entry);
+ if (ret)
+ goto add_bo_to_vm_failed;
+ if (mem->aql_queue) {
+ ret = add_bo_to_vm(adev, mem, avm,
+ true, &bo_va_entry_aql);
+ if (ret)
+ goto add_bo_to_vm_failed_aql;
+ }
+ } else {
+ ret = vm_validate_pt_pd_bos(avm);
+ if (unlikely(ret))
+ goto add_bo_to_vm_failed;
+ }
+
+ if (mem->mapped_to_gpu_memory == 0) {
+ /* Validate BO only once. The eviction fence gets added to BO
+ * the first time it is mapped. Validate will wait for all
+ * background evictions to complete.
+ */
+ ret = amdgpu_amdkfd_bo_validate(bo, domain, true);
+ if (ret) {
+ pr_debug("Validate failed\n");
+ goto map_bo_to_gpuvm_failed;
+ }
+ }
+
+ list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+ if (entry->bo_va->base.vm == vm && !entry->is_mapped) {
+ pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
+ entry->va, entry->va + bo_size,
+ entry);
+
+ ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
+ if (ret) {
+ pr_err("Failed to map radeon bo to gpuvm\n");
+ goto map_bo_to_gpuvm_failed;
+ }
+
+ ret = vm_update_pds(vm, ctx.sync);
+ if (ret) {
+ pr_err("Failed to update page directories\n");
+ goto map_bo_to_gpuvm_failed;
+ }
+
+ entry->is_mapped = true;
+ mem->mapped_to_gpu_memory++;
+ pr_debug("\t INC mapping count %d\n",
+ mem->mapped_to_gpu_memory);
+ }
+ }
+
+ if (!amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && !bo->pin_count)
+ amdgpu_bo_fence(bo,
+ &avm->process_info->eviction_fence->base,
+ true);
+ ret = unreserve_bo_and_vms(&ctx, false, false);
+
+ goto out;
+
+map_bo_to_gpuvm_failed:
+ if (bo_va_entry_aql)
+ remove_bo_from_vm(adev, bo_va_entry_aql, bo_size);
+add_bo_to_vm_failed_aql:
+ if (bo_va_entry)
+ remove_bo_from_vm(adev, bo_va_entry, bo_size);
+add_bo_to_vm_failed:
+ unreserve_bo_and_vms(&ctx, false, false);
+out:
+ mutex_unlock(&mem->process_info->lock);
+ mutex_unlock(&mem->lock);
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
+ struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct amdkfd_process_info *process_info =
+ ((struct amdgpu_vm *)vm)->process_info;
+ unsigned long bo_size = mem->bo->tbo.mem.size;
+ struct kfd_bo_va_list *entry;
+ struct bo_vm_reservation_context ctx;
+ int ret;
+
+ mutex_lock(&mem->lock);
+
+ ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx);
+ if (unlikely(ret))
+ goto out;
+ /* If no VMs were reserved, it means the BO wasn't actually mapped */
+ if (ctx.n_vms == 0) {
+ ret = -EINVAL;
+ goto unreserve_out;
+ }
+
+ ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm);
+ if (unlikely(ret))
+ goto unreserve_out;
+
+ pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
+ mem->va,
+ mem->va + bo_size * (1 + mem->aql_queue),
+ vm);
+
+ list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
+ if (entry->bo_va->base.vm == vm && entry->is_mapped) {
+ pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
+ entry->va,
+ entry->va + bo_size,
+ entry);
+
+ ret = unmap_bo_from_gpuvm(adev, entry, ctx.sync);
+ if (ret == 0) {
+ entry->is_mapped = false;
+ } else {
+ pr_err("failed to unmap VA 0x%llx\n",
+ mem->va);
+ goto unreserve_out;
+ }
+
+ mem->mapped_to_gpu_memory--;
+ pr_debug("\t DEC mapping count %d\n",
+ mem->mapped_to_gpu_memory);
+ }
+ }
+
+ /* If BO is unmapped from all VMs, unfence it. It can be evicted if
+ * required.
+ */
+ if (mem->mapped_to_gpu_memory == 0 &&
+ !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count)
+ amdgpu_amdkfd_remove_eviction_fence(mem->bo,
+ process_info->eviction_fence,
+ NULL, NULL);
+
+unreserve_out:
+ unreserve_bo_and_vms(&ctx, false, false);
+out:
+ mutex_unlock(&mem->lock);
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_sync_memory(
+ struct kgd_dev *kgd, struct kgd_mem *mem, bool intr)
+{
+ struct amdgpu_sync sync;
+ int ret;
+
+ amdgpu_sync_create(&sync);
+
+ mutex_lock(&mem->lock);
+ amdgpu_sync_clone(&mem->sync, &sync);
+ mutex_unlock(&mem->lock);
+
+ ret = amdgpu_sync_wait(&sync, intr);
+ amdgpu_sync_free(&sync);
+ return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
+ struct kgd_mem *mem, void **kptr, uint64_t *size)
+{
+ int ret;
+ struct amdgpu_bo *bo = mem->bo;
+
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+ pr_err("userptr can't be mapped to kernel\n");
+ return -EINVAL;
+ }
+
+ /* delete kgd_mem from kfd_bo_list to avoid re-validating
+ * this BO in BO's restoring after eviction.
+ */
+ mutex_lock(&mem->process_info->lock);
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ pr_err("Failed to reserve bo. ret %d\n", ret);
+ goto bo_reserve_failed;
+ }
+
+ ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
+ if (ret) {
+ pr_err("Failed to pin bo. ret %d\n", ret);
+ goto pin_failed;
+ }
+
+ ret = amdgpu_bo_kmap(bo, kptr);
+ if (ret) {
+ pr_err("Failed to map bo to kernel. ret %d\n", ret);
+ goto kmap_failed;
+ }
+
+ amdgpu_amdkfd_remove_eviction_fence(
+ bo, mem->process_info->eviction_fence, NULL, NULL);
+ list_del_init(&mem->validate_list.head);
+
+ if (size)
+ *size = amdgpu_bo_size(bo);
+
+ amdgpu_bo_unreserve(bo);
+
+ mutex_unlock(&mem->process_info->lock);
+ return 0;
+
+kmap_failed:
+ amdgpu_bo_unpin(bo);
+pin_failed:
+ amdgpu_bo_unreserve(bo);
+bo_reserve_failed:
+ mutex_unlock(&mem->process_info->lock);
+
+ return ret;
+}
+
+/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
+ * KFD process identified by process_info
+ *
+ * @process_info: amdkfd_process_info of the KFD process
+ *
+ * After memory eviction, restore thread calls this function. The function
+ * should be called when the Process is still valid. BO restore involves -
+ *
+ * 1. Release old eviction fence and create new one
+ * 2. Get two copies of PD BO list from all the VMs. Keep one copy as pd_list.
+ * 3 Use the second PD list and kfd_bo_list to create a list (ctx.list) of
+ * BOs that need to be reserved.
+ * 4. Reserve all the BOs
+ * 5. Validate of PD and PT BOs.
+ * 6. Validate all KFD BOs using kfd_bo_list and Map them and add new fence
+ * 7. Add fence to all PD and PT BOs.
+ * 8. Unreserve all BOs
+ */
+int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
+{
+ struct amdgpu_bo_list_entry *pd_bo_list;
+ struct amdkfd_process_info *process_info = info;
+ struct amdgpu_vm *peer_vm;
+ struct kgd_mem *mem;
+ struct bo_vm_reservation_context ctx;
+ struct amdgpu_amdkfd_fence *new_fence;
+ int ret = 0, i;
+ struct list_head duplicate_save;
+ struct amdgpu_sync sync_obj;
+
+ INIT_LIST_HEAD(&duplicate_save);
+ INIT_LIST_HEAD(&ctx.list);
+ INIT_LIST_HEAD(&ctx.duplicates);
+
+ pd_bo_list = kcalloc(process_info->n_vms,
+ sizeof(struct amdgpu_bo_list_entry),
+ GFP_KERNEL);
+ if (!pd_bo_list)
+ return -ENOMEM;
+
+ i = 0;
+ mutex_lock(&process_info->lock);
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node)
+ amdgpu_vm_get_pd_bo(peer_vm, &ctx.list, &pd_bo_list[i++]);
+
+ /* Reserve all BOs and page tables/directory. Add all BOs from
+ * kfd_bo_list to ctx.list
+ */
+ list_for_each_entry(mem, &process_info->kfd_bo_list,
+ validate_list.head) {
+
+ list_add_tail(&mem->resv_list.head, &ctx.list);
+ mem->resv_list.bo = mem->validate_list.bo;
+ mem->resv_list.shared = mem->validate_list.shared;
+ }
+
+ ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
+ false, &duplicate_save);
+ if (ret) {
+ pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
+ goto ttm_reserve_fail;
+ }
+
+ amdgpu_sync_create(&sync_obj);
+
+ /* Validate PDs and PTs */
+ ret = process_validate_vms(process_info);
+ if (ret)
+ goto validate_map_fail;
+
+ /* Wait for PD/PTs validate to finish */
+ /* FIXME: I think this isn't needed */
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ struct amdgpu_bo *bo = peer_vm->root.base.bo;
+
+ ttm_bo_wait(&bo->tbo, false, false);
+ }
+
+ /* Validate BOs and map them to GPUVM (update VM page tables). */
+ list_for_each_entry(mem, &process_info->kfd_bo_list,
+ validate_list.head) {
+
+ struct amdgpu_bo *bo = mem->bo;
+ uint32_t domain = mem->domain;
+ struct kfd_bo_va_list *bo_va_entry;
+
+ ret = amdgpu_amdkfd_bo_validate(bo, domain, false);
+ if (ret) {
+ pr_debug("Memory eviction: Validate BOs failed. Try again\n");
+ goto validate_map_fail;
+ }
+
+ list_for_each_entry(bo_va_entry, &mem->bo_va_list,
+ bo_list) {
+ ret = update_gpuvm_pte((struct amdgpu_device *)
+ bo_va_entry->kgd_dev,
+ bo_va_entry,
+ &sync_obj);
+ if (ret) {
+ pr_debug("Memory eviction: update PTE failed. Try again\n");
+ goto validate_map_fail;
+ }
+ }
+ }
+
+ /* Update page directories */
+ ret = process_update_pds(process_info, &sync_obj);
+ if (ret) {
+ pr_debug("Memory eviction: update PDs failed. Try again\n");
+ goto validate_map_fail;
+ }
+
+ amdgpu_sync_wait(&sync_obj, false);
+
+ /* Release old eviction fence and create new one, because fence only
+ * goes from unsignaled to signaled, fence cannot be reused.
+ * Use context and mm from the old fence.
+ */
+ new_fence = amdgpu_amdkfd_fence_create(
+ process_info->eviction_fence->base.context,
+ process_info->eviction_fence->mm);
+ if (!new_fence) {
+ pr_err("Failed to create eviction fence\n");
+ ret = -ENOMEM;
+ goto validate_map_fail;
+ }
+ dma_fence_put(&process_info->eviction_fence->base);
+ process_info->eviction_fence = new_fence;
+ *ef = dma_fence_get(&new_fence->base);
+
+ /* Wait for validate to finish and attach new eviction fence */
+ list_for_each_entry(mem, &process_info->kfd_bo_list,
+ validate_list.head)
+ ttm_bo_wait(&mem->bo->tbo, false, false);
+ list_for_each_entry(mem, &process_info->kfd_bo_list,
+ validate_list.head)
+ amdgpu_bo_fence(mem->bo,
+ &process_info->eviction_fence->base, true);
+
+ /* Attach eviction fence to PD / PT BOs */
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node) {
+ struct amdgpu_bo *bo = peer_vm->root.base.bo;
+
+ amdgpu_bo_fence(bo, &process_info->eviction_fence->base, true);
+ }
+
+validate_map_fail:
+ ttm_eu_backoff_reservation(&ctx.ticket, &ctx.list);
+ amdgpu_sync_free(&sync_obj);
+ttm_reserve_fail:
+ mutex_unlock(&process_info->lock);
+ kfree(pd_bo_list);
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index ff8efd0f8fd5..a0f48cb9b8f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -114,6 +114,9 @@ union igp_info {
struct atom_integrated_system_info_v1_11 v11;
};
+union umc_info {
+ struct atom_umc_info_v3_1 v31;
+};
/*
* Return vram width from integrated system info table, if available,
* or 0 if not.
@@ -143,6 +146,94 @@ int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev)
return 0;
}
+static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
+ int atom_mem_type)
+{
+ int vram_type;
+
+ if (adev->flags & AMD_IS_APU) {
+ switch (atom_mem_type) {
+ case Ddr2MemType:
+ case LpDdr2MemType:
+ vram_type = AMDGPU_VRAM_TYPE_DDR2;
+ break;
+ case Ddr3MemType:
+ case LpDdr3MemType:
+ vram_type = AMDGPU_VRAM_TYPE_DDR3;
+ break;
+ case Ddr4MemType:
+ case LpDdr4MemType:
+ vram_type = AMDGPU_VRAM_TYPE_DDR4;
+ break;
+ default:
+ vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+ break;
+ }
+ } else {
+ switch (atom_mem_type) {
+ case ATOM_DGPU_VRAM_TYPE_GDDR5:
+ vram_type = AMDGPU_VRAM_TYPE_GDDR5;
+ break;
+ case ATOM_DGPU_VRAM_TYPE_HBM:
+ vram_type = AMDGPU_VRAM_TYPE_HBM;
+ break;
+ default:
+ vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+ break;
+ }
+ }
+
+ return vram_type;
+}
+/*
+ * Return vram type from either integrated system info table
+ * or umc info table, if available, or 0 (TYPE_UNKNOWN) if not
+ */
+int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
+{
+ struct amdgpu_mode_info *mode_info = &adev->mode_info;
+ int index;
+ u16 data_offset, size;
+ union igp_info *igp_info;
+ union umc_info *umc_info;
+ u8 frev, crev;
+ u8 mem_type;
+
+ if (adev->flags & AMD_IS_APU)
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ integratedsysteminfo);
+ else
+ index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+ umc_info);
+ if (amdgpu_atom_parse_data_header(mode_info->atom_context,
+ index, &size,
+ &frev, &crev, &data_offset)) {
+ if (adev->flags & AMD_IS_APU) {
+ igp_info = (union igp_info *)
+ (mode_info->atom_context->bios + data_offset);
+ switch (crev) {
+ case 11:
+ mem_type = igp_info->v11.memorytype;
+ return convert_atom_mem_type_to_vram_type(adev, mem_type);
+ default:
+ return 0;
+ }
+ } else {
+ umc_info = (union umc_info *)
+ (mode_info->atom_context->bios + data_offset);
+ switch (crev) {
+ case 1:
+ mem_type = umc_info->v31.vram_type;
+ return convert_atom_mem_type_to_vram_type(adev, mem_type);
+ default:
+ return 0;
+ }
+ }
+ }
+
+ return 0;
+}
+
union firmware_info {
struct atom_firmware_info_v3_1 v31;
};
@@ -151,10 +242,6 @@ union smu_info {
struct atom_smu_info_v3_1 v31;
};
-union umc_info {
- struct atom_umc_info_v3_1 v31;
-};
-
int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev)
{
struct amdgpu_mode_info *mode_info = &adev->mode_info;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
index 288b97e54347..7689c961c4ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.h
@@ -28,6 +28,7 @@ bool amdgpu_atomfirmware_gpu_supports_virtualization(struct amdgpu_device *adev)
void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev);
int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev);
+int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev);
int amdgpu_atomfirmware_get_clock_info(struct amdgpu_device *adev);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index e2c3c5ec42d1..1ae5ae8c45a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -568,6 +568,8 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
/* HG _PR3 doesn't seem to work on this A+A weston board */
{ 0x1002, 0x6900, 0x1002, 0x0124, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
+ { 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
+ { 0x1002, 0x67DF, 0x1028, 0x0774, AMDGPU_PX_QUIRK_FORCE_ATPX },
{ 0, 0, 0, 0, 0 },
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 63ec1e1bb6aa..02b849be083b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -80,8 +80,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
int time;
n = AMDGPU_BENCHMARK_ITERATIONS;
- r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL,
- NULL, 0, &sobj);
+ r = amdgpu_bo_create(adev, size, PAGE_SIZE,sdomain, 0,
+ ttm_bo_type_kernel, NULL, &sobj);
if (r) {
goto out_cleanup;
}
@@ -93,8 +93,8 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
if (r) {
goto out_cleanup;
}
- r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL,
- NULL, 0, &dobj);
+ r = amdgpu_bo_create(adev, size, PAGE_SIZE, ddomain, 0,
+ ttm_bo_type_kernel, NULL, &dobj);
if (r) {
goto out_cleanup;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 59089e027f4d..92be7f6de197 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -233,8 +233,10 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
for (i = 0; i < list->num_entries; i++) {
unsigned priority = list->array[i].priority;
- list_add_tail(&list->array[i].tv.head,
- &bucket[priority]);
+ if (!list->array[i].robj->parent)
+ list_add_tail(&list->array[i].tv.head,
+ &bucket[priority]);
+
list->array[i].user_pages = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 4466f3535e2d..71a57b2f7f04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -24,12 +24,10 @@
#include <linux/list.h>
#include <linux/slab.h>
#include <linux/pci.h>
-#include <linux/acpi.h>
#include <drm/drmP.h>
#include <linux/firmware.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
-#include "cgs_linux.h"
#include "atom.h"
#include "amdgpu_ucode.h"
@@ -42,152 +40,6 @@ struct amdgpu_cgs_device {
struct amdgpu_device *adev = \
((struct amdgpu_cgs_device *)cgs_device)->adev
-static void *amdgpu_cgs_register_pp_handle(struct cgs_device *cgs_device,
- int (*call_back_func)(struct amd_pp_init *, void **))
-{
- CGS_FUNC_ADEV;
- struct amd_pp_init pp_init;
- struct amd_powerplay *amd_pp;
-
- if (call_back_func == NULL)
- return NULL;
-
- amd_pp = &(adev->powerplay);
- pp_init.chip_family = adev->family;
- pp_init.chip_id = adev->asic_type;
- pp_init.pm_en = (amdgpu_dpm != 0 && !amdgpu_sriov_vf(adev)) ? true : false;
- pp_init.feature_mask = amdgpu_pp_feature_mask;
- pp_init.device = cgs_device;
- if (call_back_func(&pp_init, &(amd_pp->pp_handle)))
- return NULL;
-
- return adev->powerplay.pp_handle;
-}
-
-static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device,
- enum cgs_gpu_mem_type type,
- uint64_t size, uint64_t align,
- cgs_handle_t *handle)
-{
- CGS_FUNC_ADEV;
- uint16_t flags = 0;
- int ret = 0;
- uint32_t domain = 0;
- struct amdgpu_bo *obj;
-
- /* fail if the alignment is not a power of 2 */
- if (((align != 1) && (align & (align - 1)))
- || size == 0 || align == 0)
- return -EINVAL;
-
-
- switch(type) {
- case CGS_GPU_MEM_TYPE__VISIBLE_CONTIG_FB:
- case CGS_GPU_MEM_TYPE__VISIBLE_FB:
- flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- domain = AMDGPU_GEM_DOMAIN_VRAM;
- break;
- case CGS_GPU_MEM_TYPE__INVISIBLE_CONTIG_FB:
- case CGS_GPU_MEM_TYPE__INVISIBLE_FB:
- flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
- domain = AMDGPU_GEM_DOMAIN_VRAM;
- break;
- case CGS_GPU_MEM_TYPE__GART_CACHEABLE:
- domain = AMDGPU_GEM_DOMAIN_GTT;
- break;
- case CGS_GPU_MEM_TYPE__GART_WRITECOMBINE:
- flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- domain = AMDGPU_GEM_DOMAIN_GTT;
- break;
- default:
- return -EINVAL;
- }
-
-
- *handle = 0;
-
- ret = amdgpu_bo_create(adev, size, align, true, domain, flags,
- NULL, NULL, 0, &obj);
- if (ret) {
- DRM_ERROR("(%d) bo create failed\n", ret);
- return ret;
- }
- *handle = (cgs_handle_t)obj;
-
- return ret;
-}
-
-static int amdgpu_cgs_free_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
-{
- struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-
- if (obj) {
- int r = amdgpu_bo_reserve(obj, true);
- if (likely(r == 0)) {
- amdgpu_bo_kunmap(obj);
- amdgpu_bo_unpin(obj);
- amdgpu_bo_unreserve(obj);
- }
- amdgpu_bo_unref(&obj);
-
- }
- return 0;
-}
-
-static int amdgpu_cgs_gmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle,
- uint64_t *mcaddr)
-{
- int r;
- struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
-
- WARN_ON_ONCE(obj->placement.num_placement > 1);
-
- r = amdgpu_bo_reserve(obj, true);
- if (unlikely(r != 0))
- return r;
- r = amdgpu_bo_pin(obj, obj->preferred_domains, mcaddr);
- amdgpu_bo_unreserve(obj);
- return r;
-}
-
-static int amdgpu_cgs_gunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
-{
- int r;
- struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
- r = amdgpu_bo_reserve(obj, true);
- if (unlikely(r != 0))
- return r;
- r = amdgpu_bo_unpin(obj);
- amdgpu_bo_unreserve(obj);
- return r;
-}
-
-static int amdgpu_cgs_kmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle,
- void **map)
-{
- int r;
- struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
- r = amdgpu_bo_reserve(obj, true);
- if (unlikely(r != 0))
- return r;
- r = amdgpu_bo_kmap(obj, map);
- amdgpu_bo_unreserve(obj);
- return r;
-}
-
-static int amdgpu_cgs_kunmap_gpu_mem(struct cgs_device *cgs_device, cgs_handle_t handle)
-{
- int r;
- struct amdgpu_bo *obj = (struct amdgpu_bo *)handle;
- r = amdgpu_bo_reserve(obj, true);
- if (unlikely(r != 0))
- return r;
- amdgpu_bo_kunmap(obj);
- amdgpu_bo_unreserve(obj);
- return r;
-}
static uint32_t amdgpu_cgs_read_register(struct cgs_device *cgs_device, unsigned offset)
{
@@ -329,109 +181,6 @@ static int amdgpu_cgs_atom_exec_cmd_table(struct cgs_device *cgs_device, unsigne
adev->mode_info.atom_context, table, args);
}
-struct cgs_irq_params {
- unsigned src_id;
- cgs_irq_source_set_func_t set;
- cgs_irq_handler_func_t handler;
- void *private_data;
-};
-
-static int cgs_set_irq_state(struct amdgpu_device *adev,
- struct amdgpu_irq_src *src,
- unsigned type,
- enum amdgpu_interrupt_state state)
-{
- struct cgs_irq_params *irq_params =
- (struct cgs_irq_params *)src->data;
- if (!irq_params)
- return -EINVAL;
- if (!irq_params->set)
- return -EINVAL;
- return irq_params->set(irq_params->private_data,
- irq_params->src_id,
- type,
- (int)state);
-}
-
-static int cgs_process_irq(struct amdgpu_device *adev,
- struct amdgpu_irq_src *source,
- struct amdgpu_iv_entry *entry)
-{
- struct cgs_irq_params *irq_params =
- (struct cgs_irq_params *)source->data;
- if (!irq_params)
- return -EINVAL;
- if (!irq_params->handler)
- return -EINVAL;
- return irq_params->handler(irq_params->private_data,
- irq_params->src_id,
- entry->iv_entry);
-}
-
-static const struct amdgpu_irq_src_funcs cgs_irq_funcs = {
- .set = cgs_set_irq_state,
- .process = cgs_process_irq,
-};
-
-static int amdgpu_cgs_add_irq_source(void *cgs_device,
- unsigned client_id,
- unsigned src_id,
- unsigned num_types,
- cgs_irq_source_set_func_t set,
- cgs_irq_handler_func_t handler,
- void *private_data)
-{
- CGS_FUNC_ADEV;
- int ret = 0;
- struct cgs_irq_params *irq_params;
- struct amdgpu_irq_src *source =
- kzalloc(sizeof(struct amdgpu_irq_src), GFP_KERNEL);
- if (!source)
- return -ENOMEM;
- irq_params =
- kzalloc(sizeof(struct cgs_irq_params), GFP_KERNEL);
- if (!irq_params) {
- kfree(source);
- return -ENOMEM;
- }
- source->num_types = num_types;
- source->funcs = &cgs_irq_funcs;
- irq_params->src_id = src_id;
- irq_params->set = set;
- irq_params->handler = handler;
- irq_params->private_data = private_data;
- source->data = (void *)irq_params;
- ret = amdgpu_irq_add_id(adev, client_id, src_id, source);
- if (ret) {
- kfree(irq_params);
- kfree(source);
- }
-
- return ret;
-}
-
-static int amdgpu_cgs_irq_get(void *cgs_device, unsigned client_id,
- unsigned src_id, unsigned type)
-{
- CGS_FUNC_ADEV;
-
- if (!adev->irq.client[client_id].sources)
- return -EINVAL;
-
- return amdgpu_irq_get(adev, adev->irq.client[client_id].sources[src_id], type);
-}
-
-static int amdgpu_cgs_irq_put(void *cgs_device, unsigned client_id,
- unsigned src_id, unsigned type)
-{
- CGS_FUNC_ADEV;
-
- if (!adev->irq.client[client_id].sources)
- return -EINVAL;
-
- return amdgpu_irq_put(adev, adev->irq.client[client_id].sources[src_id], type);
-}
-
static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device,
enum amd_ip_block_type block_type,
enum amd_clockgating_state state)
@@ -801,11 +550,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
else
strcpy(fw_name, "amdgpu/vega10_smc.bin");
break;
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- case CHIP_RAVEN:
- adev->pm.fw_version = info->version;
- return 0;
+ case CHIP_VEGA12:
+ strcpy(fw_name, "amdgpu/vega12_smc.bin");
+ break;
default:
DRM_ERROR("SMC firmware not supported\n");
return -EINVAL;
@@ -857,61 +604,6 @@ static int amdgpu_cgs_is_virtualization_enabled(void *cgs_device)
return amdgpu_sriov_vf(adev);
}
-static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
- struct cgs_system_info *sys_info)
-{
- CGS_FUNC_ADEV;
-
- if (NULL == sys_info)
- return -ENODEV;
-
- if (sizeof(struct cgs_system_info) != sys_info->size)
- return -ENODEV;
-
- switch (sys_info->info_id) {
- case CGS_SYSTEM_INFO_ADAPTER_BDF_ID:
- sys_info->value = adev->pdev->devfn | (adev->pdev->bus->number << 8);
- break;
- case CGS_SYSTEM_INFO_PCIE_GEN_INFO:
- sys_info->value = adev->pm.pcie_gen_mask;
- break;
- case CGS_SYSTEM_INFO_PCIE_MLW:
- sys_info->value = adev->pm.pcie_mlw_mask;
- break;
- case CGS_SYSTEM_INFO_PCIE_DEV:
- sys_info->value = adev->pdev->device;
- break;
- case CGS_SYSTEM_INFO_PCIE_REV:
- sys_info->value = adev->pdev->revision;
- break;
- case CGS_SYSTEM_INFO_CG_FLAGS:
- sys_info->value = adev->cg_flags;
- break;
- case CGS_SYSTEM_INFO_PG_FLAGS:
- sys_info->value = adev->pg_flags;
- break;
- case CGS_SYSTEM_INFO_GFX_CU_INFO:
- sys_info->value = adev->gfx.cu_info.number;
- break;
- case CGS_SYSTEM_INFO_GFX_SE_INFO:
- sys_info->value = adev->gfx.config.max_shader_engines;
- break;
- case CGS_SYSTEM_INFO_PCIE_SUB_SYS_ID:
- sys_info->value = adev->pdev->subsystem_device;
- break;
- case CGS_SYSTEM_INFO_PCIE_SUB_SYS_VENDOR_ID:
- sys_info->value = adev->pdev->subsystem_vendor;
- break;
- case CGS_SYSTEM_INFO_PCIE_BUS_DEVFN:
- sys_info->value = adev->pdev->devfn;
- break;
- default:
- return -ENODEV;
- }
-
- return 0;
-}
-
static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
struct cgs_display_info *info)
{
@@ -922,12 +614,9 @@ static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
return -EINVAL;
mode_info = info->mode_info;
- if (mode_info) {
+ if (mode_info)
/* if the displays are off, vblank time is max */
mode_info->vblank_time_us = 0xffffffff;
- /* always set the reference clock */
- mode_info->ref_clock = adev->clock.spll.reference_freq;
- }
if (!amdgpu_device_has_dc_support(adev)) {
struct amdgpu_crtc *amdgpu_crtc;
@@ -953,6 +642,11 @@ static int amdgpu_cgs_get_active_displays_info(struct cgs_device *cgs_device,
(amdgpu_crtc->v_border * 2);
mode_info->vblank_time_us = vblank_lines * line_time_us;
mode_info->refresh_rate = drm_mode_vrefresh(&amdgpu_crtc->hw_mode);
+ /* we have issues with mclk switching with refresh rates
+ * over 120 hz on the non-DC code.
+ */
+ if (mode_info->refresh_rate > 120)
+ mode_info->vblank_time_us = 0;
mode_info = NULL;
}
}
@@ -977,223 +671,7 @@ static int amdgpu_cgs_notify_dpm_enabled(struct cgs_device *cgs_device, bool ena
return 0;
}
-/** \brief evaluate acpi namespace object, handle or pathname must be valid
- * \param cgs_device
- * \param info input/output arguments for the control method
- * \return status
- */
-
-#if defined(CONFIG_ACPI)
-static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
- struct cgs_acpi_method_info *info)
-{
- CGS_FUNC_ADEV;
- acpi_handle handle;
- struct acpi_object_list input;
- struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
- union acpi_object *params, *obj;
- uint8_t name[5] = {'\0'};
- struct cgs_acpi_method_argument *argument;
- uint32_t i, count;
- acpi_status status;
- int result;
-
- handle = ACPI_HANDLE(&adev->pdev->dev);
- if (!handle)
- return -ENODEV;
-
- memset(&input, 0, sizeof(struct acpi_object_list));
-
- /* validate input info */
- if (info->size != sizeof(struct cgs_acpi_method_info))
- return -EINVAL;
-
- input.count = info->input_count;
- if (info->input_count > 0) {
- if (info->pinput_argument == NULL)
- return -EINVAL;
- argument = info->pinput_argument;
- for (i = 0; i < info->input_count; i++) {
- if (((argument->type == ACPI_TYPE_STRING) ||
- (argument->type == ACPI_TYPE_BUFFER)) &&
- (argument->pointer == NULL))
- return -EINVAL;
- argument++;
- }
- }
-
- if (info->output_count > 0) {
- if (info->poutput_argument == NULL)
- return -EINVAL;
- argument = info->poutput_argument;
- for (i = 0; i < info->output_count; i++) {
- if (((argument->type == ACPI_TYPE_STRING) ||
- (argument->type == ACPI_TYPE_BUFFER))
- && (argument->pointer == NULL))
- return -EINVAL;
- argument++;
- }
- }
-
- /* The path name passed to acpi_evaluate_object should be null terminated */
- if ((info->field & CGS_ACPI_FIELD_METHOD_NAME) != 0) {
- strncpy(name, (char *)&(info->name), sizeof(uint32_t));
- name[4] = '\0';
- }
-
- /* parse input parameters */
- if (input.count > 0) {
- input.pointer = params =
- kzalloc(sizeof(union acpi_object) * input.count, GFP_KERNEL);
- if (params == NULL)
- return -EINVAL;
-
- argument = info->pinput_argument;
-
- for (i = 0; i < input.count; i++) {
- params->type = argument->type;
- switch (params->type) {
- case ACPI_TYPE_INTEGER:
- params->integer.value = argument->value;
- break;
- case ACPI_TYPE_STRING:
- params->string.length = argument->data_length;
- params->string.pointer = argument->pointer;
- break;
- case ACPI_TYPE_BUFFER:
- params->buffer.length = argument->data_length;
- params->buffer.pointer = argument->pointer;
- break;
- default:
- break;
- }
- params++;
- argument++;
- }
- }
-
- /* parse output info */
- count = info->output_count;
- argument = info->poutput_argument;
-
- /* evaluate the acpi method */
- status = acpi_evaluate_object(handle, name, &input, &output);
-
- if (ACPI_FAILURE(status)) {
- result = -EIO;
- goto free_input;
- }
-
- /* return the output info */
- obj = output.pointer;
-
- if (count > 1) {
- if ((obj->type != ACPI_TYPE_PACKAGE) ||
- (obj->package.count != count)) {
- result = -EIO;
- goto free_obj;
- }
- params = obj->package.elements;
- } else
- params = obj;
-
- if (params == NULL) {
- result = -EIO;
- goto free_obj;
- }
-
- for (i = 0; i < count; i++) {
- if (argument->type != params->type) {
- result = -EIO;
- goto free_obj;
- }
- switch (params->type) {
- case ACPI_TYPE_INTEGER:
- argument->value = params->integer.value;
- break;
- case ACPI_TYPE_STRING:
- if ((params->string.length != argument->data_length) ||
- (params->string.pointer == NULL)) {
- result = -EIO;
- goto free_obj;
- }
- strncpy(argument->pointer,
- params->string.pointer,
- params->string.length);
- break;
- case ACPI_TYPE_BUFFER:
- if (params->buffer.pointer == NULL) {
- result = -EIO;
- goto free_obj;
- }
- memcpy(argument->pointer,
- params->buffer.pointer,
- argument->data_length);
- break;
- default:
- break;
- }
- argument++;
- params++;
- }
-
- result = 0;
-free_obj:
- kfree(obj);
-free_input:
- kfree((void *)input.pointer);
- return result;
-}
-#else
-static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
- struct cgs_acpi_method_info *info)
-{
- return -EIO;
-}
-#endif
-
-static int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
- uint32_t acpi_method,
- uint32_t acpi_function,
- void *pinput, void *poutput,
- uint32_t output_count,
- uint32_t input_size,
- uint32_t output_size)
-{
- struct cgs_acpi_method_argument acpi_input[2] = { {0}, {0} };
- struct cgs_acpi_method_argument acpi_output = {0};
- struct cgs_acpi_method_info info = {0};
-
- acpi_input[0].type = CGS_ACPI_TYPE_INTEGER;
- acpi_input[0].data_length = sizeof(uint32_t);
- acpi_input[0].value = acpi_function;
-
- acpi_input[1].type = CGS_ACPI_TYPE_BUFFER;
- acpi_input[1].data_length = input_size;
- acpi_input[1].pointer = pinput;
-
- acpi_output.type = CGS_ACPI_TYPE_BUFFER;
- acpi_output.data_length = output_size;
- acpi_output.pointer = poutput;
-
- info.size = sizeof(struct cgs_acpi_method_info);
- info.field = CGS_ACPI_FIELD_METHOD_NAME | CGS_ACPI_FIELD_INPUT_ARGUMENT_COUNT;
- info.input_count = 2;
- info.name = acpi_method;
- info.pinput_argument = acpi_input;
- info.output_count = output_count;
- info.poutput_argument = &acpi_output;
-
- return amdgpu_cgs_acpi_eval_object(cgs_device, &info);
-}
-
static const struct cgs_ops amdgpu_cgs_ops = {
- .alloc_gpu_mem = amdgpu_cgs_alloc_gpu_mem,
- .free_gpu_mem = amdgpu_cgs_free_gpu_mem,
- .gmap_gpu_mem = amdgpu_cgs_gmap_gpu_mem,
- .gunmap_gpu_mem = amdgpu_cgs_gunmap_gpu_mem,
- .kmap_gpu_mem = amdgpu_cgs_kmap_gpu_mem,
- .kunmap_gpu_mem = amdgpu_cgs_kunmap_gpu_mem,
.read_register = amdgpu_cgs_read_register,
.write_register = amdgpu_cgs_write_register,
.read_ind_register = amdgpu_cgs_read_ind_register,
@@ -1208,18 +686,9 @@ static const struct cgs_ops amdgpu_cgs_ops = {
.set_clockgating_state = amdgpu_cgs_set_clockgating_state,
.get_active_displays_info = amdgpu_cgs_get_active_displays_info,
.notify_dpm_enabled = amdgpu_cgs_notify_dpm_enabled,
- .call_acpi_method = amdgpu_cgs_call_acpi_method,
- .query_system_info = amdgpu_cgs_query_system_info,
.is_virtualization_enabled = amdgpu_cgs_is_virtualization_enabled,
.enter_safe_mode = amdgpu_cgs_enter_safe_mode,
.lock_grbm_idx = amdgpu_cgs_lock_grbm_idx,
- .register_pp_handle = amdgpu_cgs_register_pp_handle,
-};
-
-static const struct cgs_os_ops amdgpu_cgs_os_ops = {
- .add_irq_source = amdgpu_cgs_add_irq_source,
- .irq_get = amdgpu_cgs_irq_get,
- .irq_put = amdgpu_cgs_irq_put
};
struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)
@@ -1233,7 +702,6 @@ struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev)
}
cgs_device->base.ops = &amdgpu_cgs_ops;
- cgs_device->base.os_ops = &amdgpu_cgs_os_ops;
cgs_device->adev = adev;
return (struct cgs_device *)cgs_device;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 8ca3783f2deb..96501ff0e55b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -69,25 +69,18 @@ void amdgpu_connector_hotplug(struct drm_connector *connector)
/* don't do anything if sink is not display port, i.e.,
* passive dp->(dvi|hdmi) adaptor
*/
- if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) {
- int saved_dpms = connector->dpms;
- /* Only turn off the display if it's physically disconnected */
- if (!amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd)) {
- drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
- } else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
- /* Don't try to start link training before we
- * have the dpcd */
- if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
- return;
-
- /* set it to OFF so that drm_helper_connector_dpms()
- * won't return immediately since the current state
- * is ON at this point.
- */
- connector->dpms = DRM_MODE_DPMS_OFF;
- drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
- }
- connector->dpms = saved_dpms;
+ if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT &&
+ amdgpu_display_hpd_sense(adev, amdgpu_connector->hpd.hpd) &&
+ amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
+ /* Don't start link training before we have the DPCD */
+ if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+ return;
+
+ /* Turn the connector off and back on immediately, which
+ * will trigger link training
+ */
+ drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+ drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
}
}
}
@@ -736,9 +729,11 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force)
enum drm_connector_status ret = connector_status_disconnected;
int r;
- r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
- return connector_status_disconnected;
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0)
+ return connector_status_disconnected;
+ }
if (encoder) {
struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
@@ -757,8 +752,12 @@ amdgpu_connector_lvds_detect(struct drm_connector *connector, bool force)
/* check acpi lid status ??? */
amdgpu_connector_update_scratch_regs(connector, ret);
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
+
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
+
return ret;
}
@@ -868,16 +867,18 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
enum drm_connector_status ret = connector_status_disconnected;
int r;
- r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
- return connector_status_disconnected;
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0)
+ return connector_status_disconnected;
+ }
encoder = amdgpu_connector_best_single_encoder(connector);
if (!encoder)
ret = connector_status_disconnected;
if (amdgpu_connector->ddc_bus)
- dret = amdgpu_ddc_probe(amdgpu_connector, false);
+ dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
if (dret) {
amdgpu_connector->detected_by_load = false;
amdgpu_connector_free_edid(connector);
@@ -924,8 +925,10 @@ amdgpu_connector_vga_detect(struct drm_connector *connector, bool force)
amdgpu_connector_update_scratch_regs(connector, ret);
out:
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
return ret;
}
@@ -988,9 +991,11 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
enum drm_connector_status ret = connector_status_disconnected;
bool dret = false, broken_edid = false;
- r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
- return connector_status_disconnected;
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0)
+ return connector_status_disconnected;
+ }
if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
ret = connector->status;
@@ -998,7 +1003,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
}
if (amdgpu_connector->ddc_bus)
- dret = amdgpu_ddc_probe(amdgpu_connector, false);
+ dret = amdgpu_display_ddc_probe(amdgpu_connector, false);
if (dret) {
amdgpu_connector->detected_by_load = false;
amdgpu_connector_free_edid(connector);
@@ -1115,8 +1120,10 @@ out:
amdgpu_connector_update_scratch_regs(connector, ret);
exit:
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
return ret;
}
@@ -1359,9 +1366,11 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector);
int r;
- r = pm_runtime_get_sync(connector->dev->dev);
- if (r < 0)
- return connector_status_disconnected;
+ if (!drm_kms_helper_is_poll_worker()) {
+ r = pm_runtime_get_sync(connector->dev->dev);
+ if (r < 0)
+ return connector_status_disconnected;
+ }
if (!force && amdgpu_connector_check_hpd_status_unchanged(connector)) {
ret = connector->status;
@@ -1401,7 +1410,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
/* setup ddc on the bridge */
amdgpu_atombios_encoder_setup_ext_encoder_ddc(encoder);
/* bridge chips are always aux */
- if (amdgpu_ddc_probe(amdgpu_connector, true)) /* try DDC */
+ /* try DDC */
+ if (amdgpu_display_ddc_probe(amdgpu_connector, true))
ret = connector_status_connected;
else if (amdgpu_connector->dac_load_detect) { /* try load detection */
const struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
@@ -1421,7 +1431,8 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
ret = connector_status_connected;
} else {
/* try non-aux ddc (DP to DVI/HDMI/etc. adapter) */
- if (amdgpu_ddc_probe(amdgpu_connector, false))
+ if (amdgpu_display_ddc_probe(amdgpu_connector,
+ false))
ret = connector_status_connected;
}
}
@@ -1429,8 +1440,10 @@ amdgpu_connector_dp_detect(struct drm_connector *connector, bool force)
amdgpu_connector_update_scratch_regs(connector, ret);
out:
- pm_runtime_mark_last_busy(connector->dev->dev);
- pm_runtime_put_autosuspend(connector->dev->dev);
+ if (!drm_kms_helper_is_poll_worker()) {
+ pm_runtime_mark_last_busy(connector->dev->dev);
+ pm_runtime_put_autosuspend(connector->dev->dev);
+ }
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index e80fc38141b5..dc34b50e6b29 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -257,7 +257,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
return;
}
- total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
+ total_vram = adev->gmc.real_vram_size - adev->vram_pin_size;
used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;
@@ -302,8 +302,8 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
/* Do the same for visible VRAM if half of it is free */
- if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
- u64 total_vis_vram = adev->mc.visible_vram_size;
+ if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) {
+ u64 total_vis_vram = adev->gmc.visible_vram_size;
u64 used_vis_vram =
amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
@@ -346,8 +346,8 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
struct ttm_operation_ctx ctx = {
.interruptible = true,
.no_wait_gpu = false,
- .allow_reserved_eviction = false,
- .resv = bo->tbo.resv
+ .resv = bo->tbo.resv,
+ .flags = 0
};
uint32_t domain;
int r;
@@ -359,7 +359,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
* to move it. Don't move anything if the threshold is zero.
*/
if (p->bytes_moved < p->bytes_moved_threshold) {
- if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+ if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
(bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
/* And don't move a CPU_ACCESS_REQUIRED BO to limited
* visible VRAM if we've depleted our allowance to do
@@ -381,9 +381,9 @@ retry:
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
p->bytes_moved += ctx.bytes_moved;
- if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+ if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
bo->tbo.mem.mem_type == TTM_PL_VRAM &&
- bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+ bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
p->bytes_moved_vis += ctx.bytes_moved;
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
@@ -437,9 +437,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
/* Good we can try to move this BO somewhere else */
amdgpu_ttm_placement_from_domain(bo, other);
update_bytes_moved_vis =
- adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+ adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
bo->tbo.mem.mem_type == TTM_PL_VRAM &&
- bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT;
+ bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT;
initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
bytes_moved = atomic64_read(&adev->num_bytes_moved) -
@@ -542,7 +542,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
INIT_LIST_HEAD(&duplicates);
amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
- if (p->uf_entry.robj)
+ if (p->uf_entry.robj && !p->uf_entry.robj->parent)
list_add(&p->uf_entry.tv.head, &p->validated);
while (1) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index ee76b468774a..369beb5041a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -767,10 +767,21 @@ static int amdgpu_debugfs_evict_vram(struct seq_file *m, void *data)
return 0;
}
+static int amdgpu_debugfs_evict_gtt(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct amdgpu_device *adev = dev->dev_private;
+
+ seq_printf(m, "(%d)\n", ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_TT));
+ return 0;
+}
+
static const struct drm_info_list amdgpu_debugfs_list[] = {
{"amdgpu_vbios", amdgpu_debugfs_get_vbios_dump},
{"amdgpu_test_ib", &amdgpu_debugfs_test_ib},
- {"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram}
+ {"amdgpu_evict_vram", &amdgpu_debugfs_evict_vram},
+ {"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt},
};
int amdgpu_debugfs_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 00a50cc5ec9a..34af664b9f93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -59,6 +59,7 @@
#include "amdgpu_pm.h"
MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
#define AMDGPU_RESUME_MS 2000
@@ -83,10 +84,21 @@ static const char *amdgpu_asic_name[] = {
"POLARIS11",
"POLARIS12",
"VEGA10",
+ "VEGA12",
"RAVEN",
"LAST",
};
+static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
+
+/**
+ * amdgpu_device_is_px - Is the device is a dGPU with HG/PX power control
+ *
+ * @dev: drm_device pointer
+ *
+ * Returns true if the device is a dGPU with HG/PX power control,
+ * otherwise return false.
+ */
bool amdgpu_device_is_px(struct drm_device *dev)
{
struct amdgpu_device *adev = dev->dev_private;
@@ -99,6 +111,15 @@ bool amdgpu_device_is_px(struct drm_device *dev)
/*
* MMIO register access helper functions.
*/
+/**
+ * amdgpu_mm_rreg - read a memory mapped IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @acc_flags: access flags which require special behavior
+ *
+ * Returns the 32 bit value from the offset specified.
+ */
uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
uint32_t acc_flags)
{
@@ -121,6 +142,58 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
return ret;
}
+/*
+ * MMIO register read with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ *
+*/
+
+/**
+ * amdgpu_mm_rreg8 - read a memory mapped IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: byte aligned register offset
+ *
+ * Returns the 8 bit value from the offset specified.
+ */
+uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) {
+ if (offset < adev->rmmio_size)
+ return (readb(adev->rmmio + offset));
+ BUG();
+}
+
+/*
+ * MMIO register write with bytes helper functions
+ * @offset:bytes offset from MMIO start
+ * @value: the value want to be written to the register
+ *
+*/
+/**
+ * amdgpu_mm_wreg8 - read a memory mapped IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @offset: byte aligned register offset
+ * @value: 8 bit value to write
+ *
+ * Writes the value specified to the offset specified.
+ */
+void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) {
+ if (offset < adev->rmmio_size)
+ writeb(value, adev->rmmio + offset);
+ else
+ BUG();
+}
+
+/**
+ * amdgpu_mm_wreg - write to a memory mapped IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @v: 32 bit value to write to the register
+ * @acc_flags: access flags which require special behavior
+ *
+ * Writes the value specified to the offset specified.
+ */
void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
uint32_t acc_flags)
{
@@ -149,6 +222,14 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
}
}
+/**
+ * amdgpu_io_rreg - read an IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ *
+ * Returns the 32 bit value from the offset specified.
+ */
u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
{
if ((reg * 4) < adev->rio_mem_size)
@@ -159,6 +240,15 @@ u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg)
}
}
+/**
+ * amdgpu_io_wreg - write to an IO register
+ *
+ * @adev: amdgpu_device pointer
+ * @reg: dword aligned register offset
+ * @v: 32 bit value to write to the register
+ *
+ * Writes the value specified to the offset specified.
+ */
void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
{
if (adev->asic_type >= CHIP_VEGA10 && reg == 0) {
@@ -327,6 +417,14 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
BUG();
}
+/**
+ * amdgpu_device_vram_scratch_init - allocate the VRAM scratch page
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Allocates a scratch page of VRAM for use by various things in the
+ * driver.
+ */
static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
{
return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE,
@@ -336,6 +434,13 @@ static int amdgpu_device_vram_scratch_init(struct amdgpu_device *adev)
(void **)&adev->vram_scratch.ptr);
}
+/**
+ * amdgpu_device_vram_scratch_fini - Free the VRAM scratch page
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Frees the VRAM scratch page.
+ */
static void amdgpu_device_vram_scratch_fini(struct amdgpu_device *adev)
{
amdgpu_bo_free_kernel(&adev->vram_scratch.robj, NULL, NULL);
@@ -377,6 +482,14 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
}
}
+/**
+ * amdgpu_device_pci_config_reset - reset the GPU
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Resets the GPU using the pci config reset sequence.
+ * Only applicable to asics prior to vega10.
+ */
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
{
pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
@@ -492,7 +605,7 @@ static int amdgpu_device_wb_init(struct amdgpu_device *adev)
memset(&adev->wb.used, 0, sizeof(adev->wb.used));
/* clear wb memory */
- memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t));
+ memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
}
return 0;
@@ -530,12 +643,14 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
*/
void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
{
+ wb >>= 3;
if (wb < adev->wb.num_wb)
- __clear_bit(wb >> 3, adev->wb.used);
+ __clear_bit(wb, adev->wb.used);
}
/**
* amdgpu_device_vram_location - try to find VRAM location
+ *
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
* @base: base address at which to put VRAM
@@ -544,7 +659,7 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
* as parameter.
*/
void amdgpu_device_vram_location(struct amdgpu_device *adev,
- struct amdgpu_mc *mc, u64 base)
+ struct amdgpu_gmc *mc, u64 base)
{
uint64_t limit = (uint64_t)amdgpu_vram_limit << 20;
@@ -559,6 +674,7 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev,
/**
* amdgpu_device_gart_location - try to find GTT location
+ *
* @adev: amdgpu device structure holding all necessary informations
* @mc: memory controller structure holding memory informations
*
@@ -570,11 +686,11 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev,
* FIXME: when reducing GTT size align new size on power of 2.
*/
void amdgpu_device_gart_location(struct amdgpu_device *adev,
- struct amdgpu_mc *mc)
+ struct amdgpu_gmc *mc)
{
u64 size_af, size_bf;
- size_af = adev->mc.mc_mask - mc->vram_end;
+ size_af = adev->gmc.mc_mask - mc->vram_end;
size_bf = mc->vram_start;
if (size_bf > size_af) {
if (mc->gart_size > size_bf) {
@@ -608,7 +724,7 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev,
*/
int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
{
- u64 space_needed = roundup_pow_of_two(adev->mc.real_vram_size);
+ u64 space_needed = roundup_pow_of_two(adev->gmc.real_vram_size);
u32 rbar_size = order_base_2(((space_needed >> 20) | 1)) - 1;
struct pci_bus *root;
struct resource *res;
@@ -745,6 +861,16 @@ static unsigned int amdgpu_device_vga_set_decode(void *cookie, bool state)
return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
}
+/**
+ * amdgpu_device_check_block_size - validate the vm block size
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Validates the vm block size specified via module parameter.
+ * The vm block size defines number of bits in page table versus page directory,
+ * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
+ * page table and the remaining bits are in the page directory.
+ */
static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
{
/* defines number of bits in page table versus page directory,
@@ -760,6 +886,14 @@ static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
}
}
+/**
+ * amdgpu_device_check_vm_size - validate the vm size
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Validates the vm size in GB specified via module parameter.
+ * The VM size is the size of the GPU virtual memory space in GB.
+ */
static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
{
/* no need to check the default value */
@@ -829,6 +963,8 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
amdgpu_lockup_timeout = 10000;
}
+
+ adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
}
/**
@@ -892,6 +1028,17 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
.can_switch = amdgpu_switcheroo_can_switch,
};
+/**
+ * amdgpu_device_ip_set_clockgating_state - set the CG state
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ * @state: clockgating state (gate or ungate)
+ *
+ * Sets the requested clockgating state for all instances of
+ * the hardware IP specified.
+ * Returns the error code from the last instance.
+ */
int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
enum amd_ip_block_type block_type,
enum amd_clockgating_state state)
@@ -914,6 +1061,17 @@ int amdgpu_device_ip_set_clockgating_state(struct amdgpu_device *adev,
return r;
}
+/**
+ * amdgpu_device_ip_set_powergating_state - set the PG state
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ * @state: powergating state (gate or ungate)
+ *
+ * Sets the requested powergating state for all instances of
+ * the hardware IP specified.
+ * Returns the error code from the last instance.
+ */
int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
enum amd_ip_block_type block_type,
enum amd_powergating_state state)
@@ -936,6 +1094,17 @@ int amdgpu_device_ip_set_powergating_state(struct amdgpu_device *adev,
return r;
}
+/**
+ * amdgpu_device_ip_get_clockgating_state - get the CG state
+ *
+ * @adev: amdgpu_device pointer
+ * @flags: clockgating feature flags
+ *
+ * Walks the list of IPs on the device and updates the clockgating
+ * flags for each IP.
+ * Updates @flags with the feature flags for each hardware IP where
+ * clockgating is enabled.
+ */
void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
u32 *flags)
{
@@ -949,6 +1118,15 @@ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
}
}
+/**
+ * amdgpu_device_ip_wait_for_idle - wait for idle
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Waits for the request hardware IP to be idle.
+ * Returns 0 for success or a negative error code on failure.
+ */
int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
enum amd_ip_block_type block_type)
{
@@ -968,6 +1146,15 @@ int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
}
+/**
+ * amdgpu_device_ip_is_idle - is the hardware IP idle
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Check if the hardware IP is idle or not.
+ * Returns true if it the IP is idle, false if not.
+ */
bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
enum amd_ip_block_type block_type)
{
@@ -983,6 +1170,15 @@ bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
}
+/**
+ * amdgpu_device_ip_get_ip_block - get a hw IP pointer
+ *
+ * @adev: amdgpu_device pointer
+ * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ *
+ * Returns a pointer to the hardware IP block structure
+ * if it exists for the asic, otherwise NULL.
+ */
struct amdgpu_ip_block *
amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
enum amd_ip_block_type type)
@@ -1036,7 +1232,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
if (!ip_block_version)
return -EINVAL;
- DRM_DEBUG("add ip block number %d <%s>\n", adev->num_ip_blocks,
+ DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
ip_block_version->funcs->name);
adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
@@ -1044,6 +1240,18 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
return 0;
}
+/**
+ * amdgpu_device_enable_virtual_display - enable virtual display feature
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Enabled the virtual display feature if the user has enabled it via
+ * the module parameter virtual_display. This feature provides a virtual
+ * display hardware on headless boards or in virtualized environments.
+ * This function parses and validates the configuration string specified by
+ * the user and configues the virtual display configuration (number of
+ * virtual connectors, crtcs, etc.) specified.
+ */
static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
{
adev->enable_virtual_display = false;
@@ -1089,6 +1297,16 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
}
}
+/**
+ * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Parses the asic configuration parameters specified in the gpu info
+ * firmware and makes them availale to the driver for use in configuring
+ * the asic.
+ * Returns 0 on success, -EINVAL on failure.
+ */
static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
{
const char *chip_name;
@@ -1126,6 +1344,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
case CHIP_VEGA10:
chip_name = "vega10";
break;
+ case CHIP_VEGA12:
+ chip_name = "vega12";
+ break;
case CHIP_RAVEN:
chip_name = "raven";
break;
@@ -1187,6 +1408,16 @@ out:
return err;
}
+/**
+ * amdgpu_device_ip_early_init - run early init for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Early initialization pass for hardware IPs. The hardware IPs that make
+ * up each asic are discovered each IP's early_init callback is run. This
+ * is the first stage in initializing the asic.
+ * Returns 0 on success, negative error code on failure.
+ */
static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
{
int i, r;
@@ -1239,8 +1470,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
return r;
break;
#endif
- case CHIP_VEGA10:
- case CHIP_RAVEN:
+ case CHIP_VEGA10:
+ case CHIP_VEGA12:
+ case CHIP_RAVEN:
if (adev->asic_type == CHIP_RAVEN)
adev->family = AMDGPU_FAMILY_RV;
else
@@ -1296,6 +1528,17 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
return 0;
}
+/**
+ * amdgpu_device_ip_init - run init for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main initialization pass for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the sw_init and hw_init callbacks
+ * are run. sw_init initializes the software state associated with each IP
+ * and hw_init initializes the hardware associated with each IP.
+ * Returns 0 on success, negative error code on failure.
+ */
static int amdgpu_device_ip_init(struct amdgpu_device *adev)
{
int i, r;
@@ -1310,6 +1553,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
return r;
}
adev->ip_blocks[i].status.sw = true;
+
/* need to do gmc hw init early so we can allocate gpu mem */
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
r = amdgpu_device_vram_scratch_init(adev);
@@ -1343,8 +1587,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.sw)
continue;
- /* gmc hw init is done early */
- if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC)
+ if (adev->ip_blocks[i].status.hw)
continue;
r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
if (r) {
@@ -1363,27 +1606,61 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
return 0;
}
+/**
+ * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Writes a reset magic value to the gart pointer in VRAM. The driver calls
+ * this function before a GPU reset. If the value is retained after a
+ * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents.
+ */
static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
{
memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
}
+/**
+ * amdgpu_device_check_vram_lost - check if vram is valid
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Checks the reset magic value written to the gart pointer in VRAM.
+ * The driver calls this after a GPU reset to see if the contents of
+ * VRAM is lost or now.
+ * returns true if vram is lost, false if not.
+ */
static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
{
return !!memcmp(adev->gart.ptr, adev->reset_magic,
AMDGPU_RESET_MAGIC_NUM);
}
+/**
+ * amdgpu_device_ip_late_set_cg_state - late init for clockgating
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Late initialization pass enabling clockgating for hardware IPs.
+ * The list of all the hardware IPs that make up the asic is walked and the
+ * set_clockgating_state callbacks are run. This stage is run late
+ * in the init process.
+ * Returns 0 on success, negative error code on failure.
+ */
static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
{
int i = 0, r;
+ if (amdgpu_emu_mode == 1)
+ return 0;
+
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.valid)
continue;
/* skip CG for VCE/UVD, it's handled specially */
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
- adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+ adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* enable clockgating to save power */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_GATE);
@@ -1397,6 +1674,18 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
return 0;
}
+/**
+ * amdgpu_device_ip_late_init - run late init for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Late initialization pass for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the late_init callbacks are run.
+ * late_init covers any special initialization that an IP requires
+ * after all of the have been initialized or something that needs to happen
+ * late in the init process.
+ * Returns 0 on success, negative error code on failure.
+ */
static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
{
int i = 0, r;
@@ -1423,6 +1712,17 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
return 0;
}
+/**
+ * amdgpu_device_ip_fini - run fini for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main teardown pass for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
+ * are run. hw_fini tears down the hardware associated with each IP
+ * and sw_fini tears down any software state associated with each IP.
+ * Returns 0 on success, negative error code on failure.
+ */
static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
{
int i, r;
@@ -1432,7 +1732,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
for (i = 0; i < adev->num_ip_blocks; i++) {
if (!adev->ip_blocks[i].status.hw)
continue;
- if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC &&
+ adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* ungate blocks before hw fini so that we can shutdown the blocks safely */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_UNGATE);
@@ -1455,14 +1756,10 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.hw)
continue;
- if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
- amdgpu_free_static_csa(adev);
- amdgpu_device_wb_fini(adev);
- amdgpu_device_vram_scratch_fini(adev);
- }
if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
- adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE) {
+ adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+ adev->ip_blocks[i].version->funcs->set_clockgating_state) {
/* ungate blocks before hw fini so that we can shutdown the blocks safely */
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_UNGATE);
@@ -1483,9 +1780,17 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
adev->ip_blocks[i].status.hw = false;
}
+
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
if (!adev->ip_blocks[i].status.sw)
continue;
+
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
+ amdgpu_free_static_csa(adev);
+ amdgpu_device_wb_fini(adev);
+ amdgpu_device_vram_scratch_fini(adev);
+ }
+
r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
/* XXX handle errors */
if (r) {
@@ -1511,6 +1816,15 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
return 0;
}
+/**
+ * amdgpu_device_ip_late_init_func_handler - work handler for clockgating
+ *
+ * @work: work_struct
+ *
+ * Work handler for amdgpu_device_ip_late_set_cg_state. We put the
+ * clockgating setup into a worker thread to speed up driver init and
+ * resume from suspend.
+ */
static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
{
struct amdgpu_device *adev =
@@ -1518,6 +1832,17 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
amdgpu_device_ip_late_set_cg_state(adev);
}
+/**
+ * amdgpu_device_ip_suspend - run suspend for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main suspend function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked, clockgating is disabled and the
+ * suspend callbacks are run. suspend puts the hardware and software state
+ * in each IP into a state suitable for suspend.
+ * Returns 0 on success, negative error code on failure.
+ */
int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
{
int i, r;
@@ -1536,7 +1861,8 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].status.valid)
continue;
/* ungate blocks so that suspend can properly shut them down */
- if (i != AMD_IP_BLOCK_TYPE_SMC) {
+ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_SMC &&
+ adev->ip_blocks[i].version->funcs->set_clockgating_state) {
r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
AMD_CG_STATE_UNGATE);
if (r) {
@@ -1582,6 +1908,8 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
r = block->version->funcs->hw_init(adev);
DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
+ if (r)
+ return r;
}
}
@@ -1615,12 +1943,26 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
r = block->version->funcs->hw_init(adev);
DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
+ if (r)
+ return r;
}
}
return 0;
}
+/**
+ * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * First resume function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the resume callbacks are run for
+ * COMMON, GMC, and IH. resume puts the hardware into a functional state
+ * after a suspend and updates the software state as necessary. This
+ * function is also used for restoring the GPU after a GPU reset.
+ * Returns 0 on success, negative error code on failure.
+ */
static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
{
int i, r;
@@ -1629,9 +1971,8 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].status.valid)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
- adev->ip_blocks[i].version->type ==
- AMD_IP_BLOCK_TYPE_IH) {
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
r = adev->ip_blocks[i].version->funcs->resume(adev);
if (r) {
DRM_ERROR("resume of IP block <%s> failed %d\n",
@@ -1644,6 +1985,19 @@ static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
return 0;
}
+/**
+ * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * First resume function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the resume callbacks are run for
+ * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
+ * functional state after a suspend and updates the software state as
+ * necessary. This function is also used for restoring the GPU after a GPU
+ * reset.
+ * Returns 0 on success, negative error code on failure.
+ */
static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
{
int i, r;
@@ -1652,8 +2006,8 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
if (!adev->ip_blocks[i].status.valid)
continue;
if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
- adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH )
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)
continue;
r = adev->ip_blocks[i].version->funcs->resume(adev);
if (r) {
@@ -1666,6 +2020,18 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
return 0;
}
+/**
+ * amdgpu_device_ip_resume - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main resume function for hardware IPs. The hardware IPs
+ * are split into two resume functions because they are
+ * are also used in in recovering from a GPU reset and some additional
+ * steps need to be take between them. In this case (S3/S4) they are
+ * run sequentially.
+ * Returns 0 on success, negative error code on failure.
+ */
static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
{
int r;
@@ -1678,6 +2044,13 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
return r;
}
+/**
+ * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Query the VBIOS data tables to determine if the board supports SR-IOV.
+ */
static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
{
if (amdgpu_sriov_vf(adev)) {
@@ -1694,6 +2067,14 @@ static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
}
}
+/**
+ * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
+ *
+ * @asic_type: AMD asic type
+ *
+ * Check if there is DC (new modesetting infrastructre) support for an asic.
+ * returns true if DC has support, false if not.
+ */
bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
{
switch (asic_type) {
@@ -1701,6 +2082,8 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
case CHIP_BONAIRE:
case CHIP_HAWAII:
case CHIP_KAVERI:
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
case CHIP_CARRIZO:
case CHIP_STONEY:
case CHIP_POLARIS11:
@@ -1711,10 +2094,8 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
#if defined(CONFIG_DRM_AMD_DC_PRE_VEGA)
return amdgpu_dc != 0;
#endif
- case CHIP_KABINI:
- case CHIP_MULLINS:
- return amdgpu_dc > 0;
case CHIP_VEGA10:
+ case CHIP_VEGA12:
#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
case CHIP_RAVEN:
#endif
@@ -1768,14 +2149,16 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->flags = flags;
adev->asic_type = flags & AMD_ASIC_MASK;
adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
- adev->mc.gart_size = 512 * 1024 * 1024;
+ if (amdgpu_emu_mode == 1)
+ adev->usec_timeout *= 2;
+ adev->gmc.gart_size = 512 * 1024 * 1024;
adev->accel_working = false;
adev->num_rings = 0;
adev->mman.buffer_funcs = NULL;
adev->mman.buffer_funcs_ring = NULL;
adev->vm_manager.vm_pte_funcs = NULL;
adev->vm_manager.vm_pte_num_rings = 0;
- adev->gart.gart_funcs = NULL;
+ adev->gmc.gmc_funcs = NULL;
adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
@@ -1864,6 +2247,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (adev->rio_mem == NULL)
DRM_INFO("PCI I/O BAR is not found.\n");
+ amdgpu_device_get_pcie_info(adev);
+
/* early init functions */
r = amdgpu_device_ip_early_init(adev);
if (r)
@@ -1882,6 +2267,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (runtime)
vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
+ if (amdgpu_emu_mode == 1) {
+ /* post the asic on emulation mode */
+ emu_soc_asic_init(adev);
+ goto fence_driver_init;
+ }
+
/* Read BIOS */
if (!amdgpu_get_bios(adev)) {
r = -EINVAL;
@@ -1934,6 +2325,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
amdgpu_atombios_i2c_init(adev);
}
+fence_driver_init:
/* Fence driver */
r = amdgpu_fence_driver_init(adev);
if (r) {
@@ -1961,7 +2353,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
}
dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
- amdgpu_device_ip_fini(adev);
goto failed;
}
@@ -2060,11 +2451,17 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
DRM_INFO("amdgpu: finishing device.\n");
adev->shutdown = true;
- if (adev->mode_info.mode_config_initialized)
- drm_crtc_force_disable_all(adev->ddev);
-
+ /* disable all interrupts */
+ amdgpu_irq_disable_all(adev);
+ if (adev->mode_info.mode_config_initialized){
+ if (!amdgpu_device_has_dc_support(adev))
+ drm_crtc_force_disable_all(adev->ddev);
+ else
+ drm_atomic_helper_shutdown(adev->ddev);
+ }
amdgpu_ib_pool_fini(adev);
amdgpu_fence_driver_fini(adev);
+ amdgpu_pm_sysfs_fini(adev);
amdgpu_fbdev_fini(adev);
r = amdgpu_device_ip_fini(adev);
if (adev->firmware.gpu_info_fw) {
@@ -2076,7 +2473,10 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
/* free i2c buses */
if (!amdgpu_device_has_dc_support(adev))
amdgpu_i2c_fini(adev);
- amdgpu_atombios_fini(adev);
+
+ if (amdgpu_emu_mode != 1)
+ amdgpu_atombios_fini(adev);
+
kfree(adev->bios);
adev->bios = NULL;
if (!pci_is_thunderbolt_attached(adev->pdev))
@@ -2090,7 +2490,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
iounmap(adev->rmmio);
adev->rmmio = NULL;
amdgpu_device_doorbell_fini(adev);
- amdgpu_pm_sysfs_fini(adev);
amdgpu_debugfs_regs_cleanup(adev);
}
@@ -2284,14 +2683,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
}
drm_modeset_unlock_all(dev);
- } else {
- /*
- * There is no equivalent atomic helper to turn on
- * display, so we defined our own function for this,
- * once suspend resume is supported by the atomic
- * framework this will be reworked
- */
- amdgpu_dm_display_resume(adev);
}
}
@@ -2327,6 +2718,16 @@ unlock:
return r;
}
+/**
+ * amdgpu_device_ip_check_soft_reset - did soft reset succeed
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The list of all the hardware IPs that make up the asic is walked and
+ * the check_soft_reset callbacks are run. check_soft_reset determines
+ * if the asic is still hung or not.
+ * Returns true if any of the IPs are still in a hung state, false if not.
+ */
static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
{
int i;
@@ -2349,6 +2750,17 @@ static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
return asic_hang;
}
+/**
+ * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The list of all the hardware IPs that make up the asic is walked and the
+ * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
+ * handles any IP specific hardware or software state changes that are
+ * necessary for a soft reset to succeed.
+ * Returns 0 on success, negative error code on failure.
+ */
static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
{
int i, r = 0;
@@ -2367,6 +2779,15 @@ static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
return 0;
}
+/**
+ * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Some hardware IPs cannot be soft reset. If they are hung, a full gpu
+ * reset is necessary to recover.
+ * Returns true if a full asic reset is required, false if not.
+ */
static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
{
int i;
@@ -2388,6 +2809,17 @@ static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
return false;
}
+/**
+ * amdgpu_device_ip_soft_reset - do a soft reset
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The list of all the hardware IPs that make up the asic is walked and the
+ * soft_reset callbacks are run if the block is hung. soft_reset handles any
+ * IP specific hardware or software state changes that are necessary to soft
+ * reset the IP.
+ * Returns 0 on success, negative error code on failure.
+ */
static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
{
int i, r = 0;
@@ -2406,6 +2838,17 @@ static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
return 0;
}
+/**
+ * amdgpu_device_ip_post_soft_reset - clean up from soft reset
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * The list of all the hardware IPs that make up the asic is walked and the
+ * post_soft_reset callbacks are run if the asic was hung. post_soft_reset
+ * handles any IP specific hardware or software state changes that are
+ * necessary after the IP has been soft reset.
+ * Returns 0 on success, negative error code on failure.
+ */
static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
{
int i, r = 0;
@@ -2423,6 +2866,19 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
return 0;
}
+/**
+ * amdgpu_device_recover_vram_from_shadow - restore shadowed VRAM buffers
+ *
+ * @adev: amdgpu_device pointer
+ * @ring: amdgpu_ring for the engine handling the buffer operations
+ * @bo: amdgpu_bo buffer whose shadow is being restored
+ * @fence: dma_fence associated with the operation
+ *
+ * Restores the VRAM buffer contents from the shadow in GTT. Used to
+ * restore things like GPUVM page tables after a GPU reset where
+ * the contents of VRAM might be lost.
+ * Returns 0 on success, negative error code on failure.
+ */
static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
struct amdgpu_bo *bo,
@@ -2458,17 +2914,81 @@ err:
return r;
}
-/*
+/**
+ * amdgpu_device_handle_vram_lost - Handle the loss of VRAM contents
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Restores the contents of VRAM buffers from the shadows in GTT. Used to
+ * restore things like GPUVM page tables after a GPU reset where
+ * the contents of VRAM might be lost.
+ * Returns 0 on success, 1 on failure.
+ */
+static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
+{
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct amdgpu_bo *bo, *tmp;
+ struct dma_fence *fence = NULL, *next = NULL;
+ long r = 1;
+ int i = 0;
+ long tmo;
+
+ if (amdgpu_sriov_runtime(adev))
+ tmo = msecs_to_jiffies(amdgpu_lockup_timeout);
+ else
+ tmo = msecs_to_jiffies(100);
+
+ DRM_INFO("recover vram bo from shadow start\n");
+ mutex_lock(&adev->shadow_list_lock);
+ list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
+ next = NULL;
+ amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
+ if (fence) {
+ r = dma_fence_wait_timeout(fence, false, tmo);
+ if (r == 0)
+ pr_err("wait fence %p[%d] timeout\n", fence, i);
+ else if (r < 0)
+ pr_err("wait fence %p[%d] interrupted\n", fence, i);
+ if (r < 1) {
+ dma_fence_put(fence);
+ fence = next;
+ break;
+ }
+ i++;
+ }
+
+ dma_fence_put(fence);
+ fence = next;
+ }
+ mutex_unlock(&adev->shadow_list_lock);
+
+ if (fence) {
+ r = dma_fence_wait_timeout(fence, false, tmo);
+ if (r == 0)
+ pr_err("wait fence %p[%d] timeout\n", fence, i);
+ else if (r < 0)
+ pr_err("wait fence %p[%d] interrupted\n", fence, i);
+
+ }
+ dma_fence_put(fence);
+
+ if (r > 0)
+ DRM_INFO("recover vram bo from shadow done\n");
+ else
+ DRM_ERROR("recover vram bo from shadow failed\n");
+
+ return (r > 0) ? 0 : 1;
+}
+
+/**
* amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough
*
* @adev: amdgpu device pointer
- * @reset_flags: output param tells caller the reset result
*
* attempt to do soft-reset or full-reset and reinitialize Asic
* return 0 means successed otherwise failed
-*/
-static int amdgpu_device_reset(struct amdgpu_device *adev,
- uint64_t* reset_flags)
+ */
+static int amdgpu_device_reset(struct amdgpu_device *adev)
{
bool need_full_reset, vram_lost = 0;
int r;
@@ -2483,7 +3003,6 @@ static int amdgpu_device_reset(struct amdgpu_device *adev,
DRM_INFO("soft reset failed, will fallback to full reset!\n");
need_full_reset = true;
}
-
}
if (need_full_reset) {
@@ -2532,28 +3051,21 @@ out:
}
}
- if (reset_flags) {
- if (vram_lost)
- (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST;
-
- if (need_full_reset)
- (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET;
- }
+ if (!r && ((need_full_reset && !(adev->flags & AMD_IS_APU)) || vram_lost))
+ r = amdgpu_device_handle_vram_lost(adev);
return r;
}
-/*
+/**
* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
*
* @adev: amdgpu device pointer
- * @reset_flags: output param tells caller the reset result
*
* do VF FLR and reinitialize Asic
* return 0 means successed otherwise failed
-*/
+ */
static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
- uint64_t *reset_flags,
bool from_hypervisor)
{
int r;
@@ -2575,28 +3087,20 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
/* now we are okay to resume SMC/CP/SDMA */
r = amdgpu_device_ip_reinit_late_sriov(adev);
+ amdgpu_virt_release_full_gpu(adev, true);
if (r)
goto error;
amdgpu_irq_gpu_reset_resume_helper(adev);
r = amdgpu_ib_ring_tests(adev);
- if (r)
- dev_err(adev->dev, "[GPU_RESET] ib ring test failed (%d).\n", r);
-
-error:
- /* release full control of GPU after ib test */
- amdgpu_virt_release_full_gpu(adev, true);
-
- if (reset_flags) {
- if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
- (*reset_flags) |= AMDGPU_RESET_INFO_VRAM_LOST;
- atomic_inc(&adev->vram_lost_counter);
- }
- /* VF FLR or hotlink reset is always full-reset */
- (*reset_flags) |= AMDGPU_RESET_INFO_FULLRESET;
+ if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
+ atomic_inc(&adev->vram_lost_counter);
+ r = amdgpu_device_handle_vram_lost(adev);
}
+error:
+
return r;
}
@@ -2614,7 +3118,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job, bool force)
{
struct drm_atomic_state *state = NULL;
- uint64_t reset_flags = 0;
int i, r, resched;
if (!force && !amdgpu_device_ip_check_soft_reset(adev)) {
@@ -2636,22 +3139,23 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
/* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
+
/* store modesetting */
if (amdgpu_device_has_dc_support(adev))
state = drm_atomic_helper_suspend(adev->ddev);
- /* block scheduler */
+ /* block all schedulers and reset given job's ring */
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->sched.thread)
continue;
- /* only focus on the ring hit timeout if &job not NULL */
+ kthread_park(ring->sched.thread);
+
if (job && job->ring->idx != i)
continue;
- kthread_park(ring->sched.thread);
drm_sched_hw_job_reset(&ring->sched, &job->base);
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
@@ -2659,74 +3163,29 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
}
if (amdgpu_sriov_vf(adev))
- r = amdgpu_device_reset_sriov(adev, &reset_flags, job ? false : true);
+ r = amdgpu_device_reset_sriov(adev, job ? false : true);
else
- r = amdgpu_device_reset(adev, &reset_flags);
-
- if (!r) {
- if (((reset_flags & AMDGPU_RESET_INFO_FULLRESET) && !(adev->flags & AMD_IS_APU)) ||
- (reset_flags & AMDGPU_RESET_INFO_VRAM_LOST)) {
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
- struct amdgpu_bo *bo, *tmp;
- struct dma_fence *fence = NULL, *next = NULL;
-
- DRM_INFO("recover vram bo from shadow\n");
- mutex_lock(&adev->shadow_list_lock);
- list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
- next = NULL;
- amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
- if (fence) {
- r = dma_fence_wait(fence, false);
- if (r) {
- WARN(r, "recovery from shadow isn't completed\n");
- break;
- }
- }
-
- dma_fence_put(fence);
- fence = next;
- }
- mutex_unlock(&adev->shadow_list_lock);
- if (fence) {
- r = dma_fence_wait(fence, false);
- if (r)
- WARN(r, "recovery from shadow isn't completed\n");
- }
- dma_fence_put(fence);
- }
+ r = amdgpu_device_reset(adev);
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
-
- if (!ring || !ring->sched.thread)
- continue;
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ struct amdgpu_ring *ring = adev->rings[i];
- /* only focus on the ring hit timeout if &job not NULL */
- if (job && job->ring->idx != i)
- continue;
+ if (!ring || !ring->sched.thread)
+ continue;
+ /* only need recovery sched of the given job's ring
+ * or all rings (in the case @job is NULL)
+ * after above amdgpu_reset accomplished
+ */
+ if ((!job || job->ring->idx == i) && !r)
drm_sched_job_recovery(&ring->sched);
- kthread_unpark(ring->sched.thread);
- }
- } else {
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *ring = adev->rings[i];
- if (!ring || !ring->sched.thread)
- continue;
-
- /* only focus on the ring hit timeout if &job not NULL */
- if (job && job->ring->idx != i)
- continue;
-
- kthread_unpark(adev->rings[i]->sched.thread);
- }
+ kthread_unpark(ring->sched.thread);
}
if (amdgpu_device_has_dc_support(adev)) {
if (drm_atomic_helper_resume(adev->ddev, state))
dev_info(adev->dev, "drm resume failed:%d\n", r);
- amdgpu_dm_display_resume(adev);
} else {
drm_helper_resume_force_mode(adev->ddev);
}
@@ -2747,7 +3206,16 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
return r;
}
-void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
+/**
+ * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Fetchs and stores in the driver the PCIE capabilities (gen speed
+ * and lanes) of the slot the device is in. Handles APUs and
+ * virtualized environments where PCIE config space may not be available.
+ */
+static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
{
u32 mask;
int ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 38d47559f098..93f700ab1bfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -29,6 +29,7 @@
#include "amdgpu_i2c.h"
#include "atom.h"
#include "amdgpu_connectors.h"
+#include "amdgpu_display.h"
#include <asm/div64.h>
#include <linux/pm_runtime.h>
@@ -36,7 +37,8 @@
#include <drm/drm_edid.h>
#include <drm/drm_fb_helper.h>
-static void amdgpu_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb)
+static void amdgpu_display_flip_callback(struct dma_fence *f,
+ struct dma_fence_cb *cb)
{
struct amdgpu_flip_work *work =
container_of(cb, struct amdgpu_flip_work, cb);
@@ -45,8 +47,8 @@ static void amdgpu_flip_callback(struct dma_fence *f, struct dma_fence_cb *cb)
schedule_work(&work->flip_work.work);
}
-static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work,
- struct dma_fence **f)
+static bool amdgpu_display_flip_handle_fence(struct amdgpu_flip_work *work,
+ struct dma_fence **f)
{
struct dma_fence *fence= *f;
@@ -55,14 +57,15 @@ static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work,
*f = NULL;
- if (!dma_fence_add_callback(fence, &work->cb, amdgpu_flip_callback))
+ if (!dma_fence_add_callback(fence, &work->cb,
+ amdgpu_display_flip_callback))
return true;
dma_fence_put(fence);
return false;
}
-static void amdgpu_flip_work_func(struct work_struct *__work)
+static void amdgpu_display_flip_work_func(struct work_struct *__work)
{
struct delayed_work *delayed_work =
container_of(__work, struct delayed_work, work);
@@ -76,20 +79,20 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
unsigned i;
int vpos, hpos;
- if (amdgpu_flip_handle_fence(work, &work->excl))
+ if (amdgpu_display_flip_handle_fence(work, &work->excl))
return;
for (i = 0; i < work->shared_count; ++i)
- if (amdgpu_flip_handle_fence(work, &work->shared[i]))
+ if (amdgpu_display_flip_handle_fence(work, &work->shared[i]))
return;
/* Wait until we're out of the vertical blank period before the one
* targeted by the flip
*/
if (amdgpu_crtc->enabled &&
- (amdgpu_get_crtc_scanoutpos(adev->ddev, work->crtc_id, 0,
- &vpos, &hpos, NULL, NULL,
- &crtc->hwmode)
+ (amdgpu_display_get_crtc_scanoutpos(adev->ddev, work->crtc_id, 0,
+ &vpos, &hpos, NULL, NULL,
+ &crtc->hwmode)
& (DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK)) ==
(DRM_SCANOUTPOS_VALID | DRM_SCANOUTPOS_IN_VBLANK) &&
(int)(work->target_vblank -
@@ -117,7 +120,7 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
/*
* Handle unpin events outside the interrupt handler proper.
*/
-static void amdgpu_unpin_work_func(struct work_struct *__work)
+static void amdgpu_display_unpin_work_func(struct work_struct *__work)
{
struct amdgpu_flip_work *work =
container_of(__work, struct amdgpu_flip_work, unpin_work);
@@ -139,11 +142,11 @@ static void amdgpu_unpin_work_func(struct work_struct *__work)
kfree(work);
}
-int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
- struct drm_framebuffer *fb,
- struct drm_pending_vblank_event *event,
- uint32_t page_flip_flags, uint32_t target,
- struct drm_modeset_acquire_ctx *ctx)
+int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ struct drm_pending_vblank_event *event,
+ uint32_t page_flip_flags, uint32_t target,
+ struct drm_modeset_acquire_ctx *ctx)
{
struct drm_device *dev = crtc->dev;
struct amdgpu_device *adev = dev->dev_private;
@@ -162,8 +165,8 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
if (work == NULL)
return -ENOMEM;
- INIT_DELAYED_WORK(&work->flip_work, amdgpu_flip_work_func);
- INIT_WORK(&work->unpin_work, amdgpu_unpin_work_func);
+ INIT_DELAYED_WORK(&work->flip_work, amdgpu_display_flip_work_func);
+ INIT_WORK(&work->unpin_work, amdgpu_display_unpin_work_func);
work->event = event;
work->adev = adev;
@@ -189,7 +192,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
goto cleanup;
}
- r = amdgpu_bo_pin(new_abo, AMDGPU_GEM_DOMAIN_VRAM, &base);
+ r = amdgpu_bo_pin(new_abo, amdgpu_display_framebuffer_domains(adev), &base);
if (unlikely(r != 0)) {
DRM_ERROR("failed to pin new abo buffer before flip\n");
goto unreserve;
@@ -207,7 +210,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
amdgpu_bo_unreserve(new_abo);
work->base = base;
- work->target_vblank = target - drm_crtc_vblank_count(crtc) +
+ work->target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) +
amdgpu_get_vblank_counter_kms(dev, work->crtc_id);
/* we borrow the event spin lock for protecting flip_wrok */
@@ -228,7 +231,7 @@ int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
/* update crtc fb */
crtc->primary->fb = fb;
spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
- amdgpu_flip_work_func(&work->flip_work.work);
+ amdgpu_display_flip_work_func(&work->flip_work.work);
return 0;
pflip_cleanup:
@@ -254,8 +257,8 @@ cleanup:
return r;
}
-int amdgpu_crtc_set_config(struct drm_mode_set *set,
- struct drm_modeset_acquire_ctx *ctx)
+int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
+ struct drm_modeset_acquire_ctx *ctx)
{
struct drm_device *dev;
struct amdgpu_device *adev;
@@ -352,7 +355,7 @@ static const char *hpd_names[6] = {
"HPD6",
};
-void amdgpu_print_display_setup(struct drm_device *dev)
+void amdgpu_display_print_display_setup(struct drm_device *dev)
{
struct drm_connector *connector;
struct amdgpu_connector *amdgpu_connector;
@@ -429,11 +432,11 @@ void amdgpu_print_display_setup(struct drm_device *dev)
}
/**
- * amdgpu_ddc_probe
+ * amdgpu_display_ddc_probe
*
*/
-bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector,
- bool use_aux)
+bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
+ bool use_aux)
{
u8 out = 0x0;
u8 buf[8];
@@ -479,7 +482,7 @@ bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector,
return true;
}
-static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb)
+static void amdgpu_display_user_framebuffer_destroy(struct drm_framebuffer *fb)
{
struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
@@ -488,9 +491,10 @@ static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb)
kfree(amdgpu_fb);
}
-static int amdgpu_user_framebuffer_create_handle(struct drm_framebuffer *fb,
- struct drm_file *file_priv,
- unsigned int *handle)
+static int amdgpu_display_user_framebuffer_create_handle(
+ struct drm_framebuffer *fb,
+ struct drm_file *file_priv,
+ unsigned int *handle)
{
struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
@@ -498,15 +502,28 @@ static int amdgpu_user_framebuffer_create_handle(struct drm_framebuffer *fb,
}
static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
- .destroy = amdgpu_user_framebuffer_destroy,
- .create_handle = amdgpu_user_framebuffer_create_handle,
+ .destroy = amdgpu_display_user_framebuffer_destroy,
+ .create_handle = amdgpu_display_user_framebuffer_create_handle,
};
-int
-amdgpu_framebuffer_init(struct drm_device *dev,
- struct amdgpu_framebuffer *rfb,
- const struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object *obj)
+uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev)
+{
+ uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+#if defined(CONFIG_DRM_AMD_DC)
+ if (adev->asic_type >= CHIP_CARRIZO && adev->asic_type < CHIP_RAVEN &&
+ adev->flags & AMD_IS_APU &&
+ amdgpu_device_asic_has_dc_support(adev->asic_type))
+ domain |= AMDGPU_GEM_DOMAIN_GTT;
+#endif
+
+ return domain;
+}
+
+int amdgpu_display_framebuffer_init(struct drm_device *dev,
+ struct amdgpu_framebuffer *rfb,
+ const struct drm_mode_fb_cmd2 *mode_cmd,
+ struct drm_gem_object *obj)
{
int ret;
rfb->obj = obj;
@@ -520,9 +537,9 @@ amdgpu_framebuffer_init(struct drm_device *dev,
}
struct drm_framebuffer *
-amdgpu_user_framebuffer_create(struct drm_device *dev,
- struct drm_file *file_priv,
- const struct drm_mode_fb_cmd2 *mode_cmd)
+amdgpu_display_user_framebuffer_create(struct drm_device *dev,
+ struct drm_file *file_priv,
+ const struct drm_mode_fb_cmd2 *mode_cmd)
{
struct drm_gem_object *obj;
struct amdgpu_framebuffer *amdgpu_fb;
@@ -547,7 +564,7 @@ amdgpu_user_framebuffer_create(struct drm_device *dev,
return ERR_PTR(-ENOMEM);
}
- ret = amdgpu_framebuffer_init(dev, amdgpu_fb, mode_cmd, obj);
+ ret = amdgpu_display_framebuffer_init(dev, amdgpu_fb, mode_cmd, obj);
if (ret) {
kfree(amdgpu_fb);
drm_gem_object_put_unlocked(obj);
@@ -558,7 +575,7 @@ amdgpu_user_framebuffer_create(struct drm_device *dev,
}
const struct drm_mode_config_funcs amdgpu_mode_funcs = {
- .fb_create = amdgpu_user_framebuffer_create,
+ .fb_create = amdgpu_display_user_framebuffer_create,
.output_poll_changed = drm_fb_helper_output_poll_changed,
};
@@ -580,7 +597,7 @@ static const struct drm_prop_enum_list amdgpu_dither_enum_list[] =
{ AMDGPU_FMT_DITHER_ENABLE, "on" },
};
-int amdgpu_modeset_create_props(struct amdgpu_device *adev)
+int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
{
int sz;
@@ -629,7 +646,7 @@ int amdgpu_modeset_create_props(struct amdgpu_device *adev)
return 0;
}
-void amdgpu_update_display_priority(struct amdgpu_device *adev)
+void amdgpu_display_update_priority(struct amdgpu_device *adev)
{
/* adjustment options for the display watermarks */
if ((amdgpu_disp_priority == 0) || (amdgpu_disp_priority > 2))
@@ -639,7 +656,7 @@ void amdgpu_update_display_priority(struct amdgpu_device *adev)
}
-static bool is_hdtv_mode(const struct drm_display_mode *mode)
+static bool amdgpu_display_is_hdtv_mode(const struct drm_display_mode *mode)
{
/* try and guess if this is a tv or a monitor */
if ((mode->vdisplay == 480 && mode->hdisplay == 720) || /* 480p */
@@ -651,9 +668,9 @@ static bool is_hdtv_mode(const struct drm_display_mode *mode)
return false;
}
-bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
- const struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
+bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode)
{
struct drm_device *dev = crtc->dev;
struct drm_encoder *encoder;
@@ -696,7 +713,7 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
((amdgpu_encoder->underscan_type == UNDERSCAN_ON) ||
((amdgpu_encoder->underscan_type == UNDERSCAN_AUTO) &&
drm_detect_hdmi_monitor(amdgpu_connector_edid(connector)) &&
- is_hdtv_mode(mode)))) {
+ amdgpu_display_is_hdtv_mode(mode)))) {
if (amdgpu_encoder->underscan_hborder != 0)
amdgpu_crtc->h_border = amdgpu_encoder->underscan_hborder;
else
@@ -764,10 +781,10 @@ bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
* unknown small number of scanlines wrt. real scanout position.
*
*/
-int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
- unsigned int flags, int *vpos, int *hpos,
- ktime_t *stime, ktime_t *etime,
- const struct drm_display_mode *mode)
+int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
+ unsigned int pipe, unsigned int flags, int *vpos,
+ int *hpos, ktime_t *stime, ktime_t *etime,
+ const struct drm_display_mode *mode)
{
u32 vbl = 0, position = 0;
int vbl_start, vbl_end, vtotal, ret = 0;
@@ -859,7 +876,7 @@ int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
return ret;
}
-int amdgpu_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc)
+int amdgpu_display_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc)
{
if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
return AMDGPU_CRTC_IRQ_NONE;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index 0bcb6c6e0ca9..2b11d808f297 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -23,9 +23,10 @@
#ifndef __AMDGPU_DISPLAY_H__
#define __AMDGPU_DISPLAY_H__
+uint32_t amdgpu_display_framebuffer_domains(struct amdgpu_device *adev);
struct drm_framebuffer *
-amdgpu_user_framebuffer_create(struct drm_device *dev,
- struct drm_file *file_priv,
- const struct drm_mode_fb_cmd2 *mode_cmd);
+amdgpu_display_user_framebuffer_create(struct drm_device *dev,
+ struct drm_file *file_priv,
+ const struct drm_mode_fb_cmd2 *mode_cmd);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
index a8437a3296a6..643d008410c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@@ -265,9 +265,6 @@ enum amdgpu_pcie_gen {
#define amdgpu_dpm_read_sensor(adev, idx, value, size) \
((adev)->powerplay.pp_funcs->read_sensor((adev)->powerplay.pp_handle, (idx), (value), (size)))
-#define amdgpu_dpm_get_temperature(adev) \
- ((adev)->powerplay.pp_funcs->get_temperature((adev)->powerplay.pp_handle))
-
#define amdgpu_dpm_set_fan_control_mode(adev, m) \
((adev)->powerplay.pp_funcs->set_fan_control_mode((adev)->powerplay.pp_handle, (m)))
@@ -328,8 +325,8 @@ enum amdgpu_pcie_gen {
#define amdgpu_dpm_set_mclk_od(adev, value) \
((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value))
-#define amdgpu_dpm_dispatch_task(adev, task_id, input, output) \
- ((adev)->powerplay.pp_funcs->dispatch_tasks)((adev)->powerplay.pp_handle, (task_id), (input), (output))
+#define amdgpu_dpm_dispatch_task(adev, task_id, user_state) \
+ ((adev)->powerplay.pp_funcs->dispatch_tasks)((adev)->powerplay.pp_handle, (task_id), (user_state))
#define amdgpu_dpm_check_state_equal(adev, cps, rps, equal) \
((adev)->powerplay.pp_funcs->check_state_equal((adev)->powerplay.pp_handle, (cps), (rps), (equal)))
@@ -344,17 +341,9 @@ enum amdgpu_pcie_gen {
((adev)->powerplay.pp_funcs->reset_power_profile_state(\
(adev)->powerplay.pp_handle, request))
-#define amdgpu_dpm_get_power_profile_state(adev, query) \
- ((adev)->powerplay.pp_funcs->get_power_profile_state(\
- (adev)->powerplay.pp_handle, query))
-
-#define amdgpu_dpm_set_power_profile_state(adev, request) \
- ((adev)->powerplay.pp_funcs->set_power_profile_state(\
- (adev)->powerplay.pp_handle, request))
-
-#define amdgpu_dpm_switch_power_profile(adev, type) \
+#define amdgpu_dpm_switch_power_profile(adev, type, en) \
((adev)->powerplay.pp_funcs->switch_power_profile(\
- (adev)->powerplay.pp_handle, type))
+ (adev)->powerplay.pp_handle, type, en))
#define amdgpu_dpm_set_clockgating_by_smu(adev, msg_id) \
((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
@@ -366,6 +355,22 @@ enum amdgpu_pcie_gen {
(adev)->powerplay.pp_handle, virtual_addr_low, \
virtual_addr_hi, mc_addr_low, mc_addr_hi, size)
+#define amdgpu_dpm_get_power_profile_mode(adev, buf) \
+ ((adev)->powerplay.pp_funcs->get_power_profile_mode(\
+ (adev)->powerplay.pp_handle, buf))
+
+#define amdgpu_dpm_set_power_profile_mode(adev, parameter, size) \
+ ((adev)->powerplay.pp_funcs->set_power_profile_mode(\
+ (adev)->powerplay.pp_handle, parameter, size))
+
+#define amdgpu_dpm_odn_edit_dpm_table(adev, type, parameter, size) \
+ ((adev)->powerplay.pp_funcs->odn_edit_dpm_table(\
+ (adev)->powerplay.pp_handle, type, parameter, size))
+
+#define amdgpu_dpm_set_mmhub_powergating_by_smu(adev) \
+ ((adev)->powerplay.pp_funcs->set_mmhub_powergating_by_smu( \
+ (adev)->powerplay.pp_handle))
+
struct amdgpu_dpm {
struct amdgpu_ps *ps;
/* number of valid power states */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 50afcf65181a..7379aa5a6849 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -73,9 +73,11 @@
* - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
* - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
* - 3.23.0 - Add query for VRAM lost counter
+ * - 3.24.0 - Add high priority compute support for gfx9
+ * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 23
+#define KMS_DRIVER_MINOR 25
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
@@ -119,7 +121,7 @@ uint amdgpu_pg_mask = 0xffffffff;
uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu = NULL;
char *amdgpu_virtual_display = NULL;
-uint amdgpu_pp_feature_mask = 0xffffffff;
+uint amdgpu_pp_feature_mask = 0xffffbfff;
int amdgpu_ngg = 0;
int amdgpu_prim_buf_per_se = 0;
int amdgpu_pos_buf_per_se = 0;
@@ -129,6 +131,7 @@ int amdgpu_job_hang_limit = 0;
int amdgpu_lbpw = -1;
int amdgpu_compute_multipipe = -1;
int amdgpu_gpu_recovery = -1; /* auto */
+int amdgpu_emu_mode = 0;
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -281,9 +284,12 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444);
MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)");
module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
-MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto");
+MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
+MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)");
+module_param_named(emu_mode, amdgpu_emu_mode, int, 0444);
+
#ifdef CONFIG_DRM_AMDGPU_SI
#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
@@ -538,6 +544,12 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
{0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
{0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+ /* Vega 12 */
+ {0x1002, 0x69A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+ {0x1002, 0x69A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+ {0x1002, 0x69A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+ {0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
+ {0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
/* Raven */
{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
@@ -576,6 +588,11 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
struct drm_device *dev;
unsigned long flags = ent->driver_data;
int ret, retry = 0;
+ bool supports_atomic = false;
+
+ if (!amdgpu_virtual_display &&
+ amdgpu_device_asic_has_dc_support(flags & AMD_ASIC_MASK))
+ supports_atomic = true;
if ((flags & AMD_EXP_HW_SUPPORT) && !amdgpu_exp_hw_support) {
DRM_INFO("This hardware requires experimental hardware support.\n"
@@ -596,6 +613,13 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
if (ret)
return ret;
+ /* warn the user if they mix atomic and non-atomic capable GPUs */
+ if ((kms_driver.driver_features & DRIVER_ATOMIC) && !supports_atomic)
+ DRM_ERROR("Mixing atomic and non-atomic capable GPUs!\n");
+ /* support atomic early so the atomic debugfs stuff gets created */
+ if (supports_atomic)
+ kms_driver.driver_features |= DRIVER_ATOMIC;
+
dev = drm_dev_alloc(&kms_driver, &pdev->dev);
if (IS_ERR(dev))
return PTR_ERR(dev);
@@ -720,7 +744,6 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
drm_kms_helper_poll_disable(drm_dev);
- vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_OFF);
ret = amdgpu_device_suspend(drm_dev, false, false);
pci_save_state(pdev);
@@ -757,7 +780,6 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
ret = amdgpu_device_resume(drm_dev, false, false);
drm_kms_helper_poll_enable(drm_dev);
- vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_ON);
drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
return 0;
}
@@ -835,8 +857,8 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
ktime_t *stime, ktime_t *etime,
const struct drm_display_mode *mode)
{
- return amdgpu_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
- stime, etime, mode);
+ return amdgpu_display_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
+ stime, etime, mode);
}
static struct drm_driver kms_driver = {
@@ -854,9 +876,6 @@ static struct drm_driver kms_driver = {
.disable_vblank = amdgpu_disable_vblank_kms,
.get_vblank_timestamp = drm_calc_vbltimestamp_from_scanoutpos,
.get_scanout_position = amdgpu_get_crtc_scanout_position,
- .irq_preinstall = amdgpu_irq_preinstall,
- .irq_postinstall = amdgpu_irq_postinstall,
- .irq_uninstall = amdgpu_irq_uninstall,
.irq_handler = amdgpu_irq_handler,
.ioctls = amdgpu_ioctls_kms,
.gem_free_object_unlocked = amdgpu_gem_object_free,
@@ -869,9 +888,7 @@ static struct drm_driver kms_driver = {
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_export = amdgpu_gem_prime_export,
- .gem_prime_import = drm_gem_prime_import,
- .gem_prime_pin = amdgpu_gem_prime_pin,
- .gem_prime_unpin = amdgpu_gem_prime_unpin,
+ .gem_prime_import = amdgpu_gem_prime_import,
.gem_prime_res_obj = amdgpu_gem_prime_res_obj,
.gem_prime_get_sg_table = amdgpu_gem_prime_get_sg_table,
.gem_prime_import_sg_table = amdgpu_gem_prime_import_sg_table,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index ff3e9beb7d19..12063019751b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -38,6 +38,8 @@
#include <linux/vga_switcheroo.h>
+#include "amdgpu_display.h"
+
/* object hierarchy -
this contains a helper + a amdgpu fb
the helper contains a pointer to amdgpu framebuffer baseclass.
@@ -124,7 +126,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
struct drm_gem_object *gobj = NULL;
struct amdgpu_bo *abo = NULL;
bool fb_tiled = false; /* useful for testing */
- u32 tiling_flags = 0;
+ u32 tiling_flags = 0, domain;
int ret;
int aligned_size, size;
int height = mode_cmd->height;
@@ -135,12 +137,12 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
/* need to align pitch with crtc limits */
mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
fb_tiled);
+ domain = amdgpu_display_framebuffer_domains(adev);
height = ALIGN(mode_cmd->height, 8);
size = mode_cmd->pitches[0] * height;
aligned_size = ALIGN(size, PAGE_SIZE);
- ret = amdgpu_gem_object_create(adev, aligned_size, 0,
- AMDGPU_GEM_DOMAIN_VRAM,
+ ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_VRAM_CLEARED,
@@ -166,7 +168,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
}
- ret = amdgpu_bo_pin(abo, AMDGPU_GEM_DOMAIN_VRAM, NULL);
+ ret = amdgpu_bo_pin(abo, domain, NULL);
if (ret) {
amdgpu_bo_unreserve(abo);
goto out_unref;
@@ -225,7 +227,8 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
info->par = rfbdev;
info->skip_vt_switch = true;
- ret = amdgpu_framebuffer_init(adev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
+ ret = amdgpu_display_framebuffer_init(adev->ddev, &rfbdev->rfb,
+ &mode_cmd, gobj);
if (ret) {
DRM_ERROR("failed to initialize framebuffer %d\n", ret);
goto out;
@@ -242,8 +245,8 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
info->fbops = &amdgpufb_ops;
- tmp = amdgpu_bo_gpu_offset(abo) - adev->mc.vram_start;
- info->fix.smem_start = adev->mc.aper_base + tmp;
+ tmp = amdgpu_bo_gpu_offset(abo) - adev->gmc.vram_start;
+ info->fix.smem_start = adev->gmc.aper_base + tmp;
info->fix.smem_len = amdgpu_bo_size(abo);
info->screen_base = amdgpu_bo_kptr(abo);
info->screen_size = amdgpu_bo_size(abo);
@@ -252,7 +255,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
/* setup aperture base/size for vesafb takeover */
info->apertures->ranges[0].base = adev->ddev->mode_config.fb_base;
- info->apertures->ranges[0].size = adev->mc.aper_size;
+ info->apertures->ranges[0].size = adev->gmc.aper_size;
/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
@@ -262,7 +265,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
}
DRM_INFO("fb mappable at 0x%lX\n", info->fix.smem_start);
- DRM_INFO("vram apper at 0x%lX\n", (unsigned long)adev->mc.aper_base);
+ DRM_INFO("vram apper at 0x%lX\n", (unsigned long)adev->gmc.aper_base);
DRM_INFO("size %lu\n", (unsigned long)amdgpu_bo_size(abo));
DRM_INFO("fb depth is %d\n", fb->format->depth);
DRM_INFO(" pitch is %d\n", fb->pitches[0]);
@@ -319,7 +322,7 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev)
return 0;
/* select 8 bpp console on low vram cards */
- if (adev->mc.real_vram_size <= (32*1024*1024))
+ if (adev->gmc.real_vram_size <= (32*1024*1024))
bpp_sel = 8;
rfbdev = kzalloc(sizeof(struct amdgpu_fbdev), GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 008e1984b7e3..455a81e4c246 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -435,7 +435,9 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
num_hw_submission, amdgpu_job_hang_limit,
- msecs_to_jiffies(amdgpu_lockup_timeout), ring->name);
+ (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ?
+ MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(amdgpu_lockup_timeout),
+ ring->name);
if (r) {
DRM_ERROR("Failed to create scheduler on ring %s.\n",
ring->name);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 0a4f34afaaaa..cf0f186c6092 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -68,17 +68,15 @@
*/
static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
{
- if (adev->dummy_page.page)
+ struct page *dummy_page = adev->mman.bdev.glob->dummy_read_page;
+
+ if (adev->dummy_page_addr)
return 0;
- adev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO);
- if (adev->dummy_page.page == NULL)
- return -ENOMEM;
- adev->dummy_page.addr = pci_map_page(adev->pdev, adev->dummy_page.page,
- 0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- if (pci_dma_mapping_error(adev->pdev, adev->dummy_page.addr)) {
+ adev->dummy_page_addr = pci_map_page(adev->pdev, dummy_page, 0,
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ if (pci_dma_mapping_error(adev->pdev, adev->dummy_page_addr)) {
dev_err(&adev->pdev->dev, "Failed to DMA MAP the dummy page\n");
- __free_page(adev->dummy_page.page);
- adev->dummy_page.page = NULL;
+ adev->dummy_page_addr = 0;
return -ENOMEM;
}
return 0;
@@ -93,12 +91,11 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
*/
static void amdgpu_gart_dummy_page_fini(struct amdgpu_device *adev)
{
- if (adev->dummy_page.page == NULL)
+ if (!adev->dummy_page_addr)
return;
- pci_unmap_page(adev->pdev, adev->dummy_page.addr,
- PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
- __free_page(adev->dummy_page.page);
- adev->dummy_page.page = NULL;
+ pci_unmap_page(adev->pdev, adev->dummy_page_addr,
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ adev->dummy_page_addr = 0;
}
/**
@@ -116,11 +113,12 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
int r;
if (adev->gart.robj == NULL) {
- r = amdgpu_bo_create(adev, adev->gart.table_size,
- PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
+ r = amdgpu_bo_create(adev, adev->gart.table_size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
- NULL, NULL, 0, &adev->gart.robj);
+ ttm_bo_type_kernel, NULL,
+ &adev->gart.robj);
if (r) {
return r;
}
@@ -236,18 +234,19 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
adev->gart.pages[p] = NULL;
#endif
- page_base = adev->dummy_page.addr;
+ page_base = adev->dummy_page_addr;
if (!adev->gart.ptr)
continue;
for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
- amdgpu_gart_set_pte_pde(adev, adev->gart.ptr,
- t, page_base, flags);
+ amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr,
+ t, page_base, flags);
page_base += AMDGPU_GPU_PAGE_SIZE;
}
}
mb();
- amdgpu_gart_flush_gpu_tlb(adev, 0);
+ amdgpu_asic_flush_hdp(adev, NULL);
+ amdgpu_gmc_flush_gpu_tlb(adev, 0);
return 0;
}
@@ -279,7 +278,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,
for (i = 0; i < pages; i++) {
page_base = dma_addr[i];
for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
- amdgpu_gart_set_pte_pde(adev, dst, t, page_base, flags);
+ amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags);
page_base += AMDGPU_GPU_PAGE_SIZE;
}
}
@@ -317,7 +316,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
t = offset / AMDGPU_GPU_PAGE_SIZE;
p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
for (i = 0; i < pages; i++, p++)
- adev->gart.pages[p] = pagelist[i];
+ adev->gart.pages[p] = pagelist ? pagelist[i] : NULL;
#endif
if (!adev->gart.ptr)
@@ -329,7 +328,8 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
return r;
mb();
- amdgpu_gart_flush_gpu_tlb(adev, 0);
+ amdgpu_asic_flush_hdp(adev, NULL);
+ amdgpu_gmc_flush_gpu_tlb(adev, 0);
return 0;
}
@@ -345,7 +345,7 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
{
int r;
- if (adev->dummy_page.page)
+ if (adev->dummy_page_addr)
return 0;
/* We need PAGE_SIZE >= AMDGPU_GPU_PAGE_SIZE */
@@ -357,8 +357,8 @@ int amdgpu_gart_init(struct amdgpu_device *adev)
if (r)
return r;
/* Compute table size */
- adev->gart.num_cpu_pages = adev->mc.gart_size / PAGE_SIZE;
- adev->gart.num_gpu_pages = adev->mc.gart_size / AMDGPU_GPU_PAGE_SIZE;
+ adev->gart.num_cpu_pages = adev->gmc.gart_size / PAGE_SIZE;
+ adev->gart.num_gpu_pages = adev->gmc.gart_size / AMDGPU_GPU_PAGE_SIZE;
DRM_INFO("GART: num cpu pages %u, num gpu pages %u\n",
adev->gart.num_cpu_pages, adev->gart.num_gpu_pages);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
index d4a43302c2be..456295c00291 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@@ -31,7 +31,6 @@
*/
struct amdgpu_device;
struct amdgpu_bo;
-struct amdgpu_gart_funcs;
#define AMDGPU_GPU_PAGE_SIZE 4096
#define AMDGPU_GPU_PAGE_MASK (AMDGPU_GPU_PAGE_SIZE - 1)
@@ -52,8 +51,6 @@ struct amdgpu_gart {
/* Asic default pte flags */
uint64_t gart_pte_flags;
-
- const struct amdgpu_gart_funcs *gart_funcs;
};
int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index e48b4ec88c8c..28c2706e48d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -36,8 +36,6 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj);
if (robj) {
- if (robj->gem_base.import_attach)
- drm_prime_gem_destroy(&robj->gem_base, robj->tbo.sg);
amdgpu_mn_unregister(robj);
amdgpu_bo_unref(&robj);
}
@@ -45,7 +43,7 @@ void amdgpu_gem_object_free(struct drm_gem_object *gobj)
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain,
- u64 flags, bool kernel,
+ u64 flags, enum ttm_bo_type type,
struct reservation_object *resv,
struct drm_gem_object **obj)
{
@@ -58,23 +56,11 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
alignment = PAGE_SIZE;
}
-retry:
- r = amdgpu_bo_create(adev, size, alignment, kernel, initial_domain,
- flags, NULL, resv, 0, &bo);
+ r = amdgpu_bo_create(adev, size, alignment, initial_domain,
+ flags, type, resv, &bo);
if (r) {
- if (r != -ERESTARTSYS) {
- if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
- flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- goto retry;
- }
-
- if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
- initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
- goto retry;
- }
- DRM_DEBUG("Failed to allocate GEM object (%ld, %d, %u, %d)\n",
- size, initial_domain, alignment, r);
- }
+ DRM_DEBUG("Failed to allocate GEM object (%ld, %d, %u, %d)\n",
+ size, initial_domain, alignment, r);
return r;
}
*obj = &bo->gem_base;
@@ -523,12 +509,13 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
goto error;
if (operation == AMDGPU_VA_OP_MAP ||
- operation == AMDGPU_VA_OP_REPLACE)
+ operation == AMDGPU_VA_OP_REPLACE) {
r = amdgpu_vm_bo_update(adev, bo_va, false);
+ if (r)
+ goto error;
+ }
r = amdgpu_vm_update_directories(adev, vm);
- if (r)
- goto error;
error:
if (r && r != -ERESTARTSYS)
@@ -634,7 +621,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
if (r)
goto error_backoff;
- va_flags = amdgpu_vm_get_pte_flags(adev, args->flags);
+ va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
r = amdgpu_vm_bo_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size,
va_flags);
@@ -654,7 +641,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
if (r)
goto error_backoff;
- va_flags = amdgpu_vm_get_pte_flags(adev, args->flags);
+ va_flags = amdgpu_gmc_get_pte_flags(adev, args->flags);
r = amdgpu_vm_bo_replace_map(adev, bo_va, args->va_address,
args->offset_in_bo, args->map_size,
va_flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
new file mode 100644
index 000000000000..893c2490b783
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+#ifndef __AMDGPU_GMC_H__
+#define __AMDGPU_GMC_H__
+
+#include <linux/types.h>
+
+#include "amdgpu_irq.h"
+
+struct firmware;
+
+/*
+ * VMHUB structures, functions & helpers
+ */
+struct amdgpu_vmhub {
+ uint32_t ctx0_ptb_addr_lo32;
+ uint32_t ctx0_ptb_addr_hi32;
+ uint32_t vm_inv_eng0_req;
+ uint32_t vm_inv_eng0_ack;
+ uint32_t vm_context0_cntl;
+ uint32_t vm_l2_pro_fault_status;
+ uint32_t vm_l2_pro_fault_cntl;
+};
+
+/*
+ * GPU MC structures, functions & helpers
+ */
+struct amdgpu_gmc_funcs {
+ /* flush the vm tlb via mmio */
+ void (*flush_gpu_tlb)(struct amdgpu_device *adev,
+ uint32_t vmid);
+ /* flush the vm tlb via ring */
+ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
+ uint64_t pd_addr);
+ /* Change the VMID -> PASID mapping */
+ void (*emit_pasid_mapping)(struct amdgpu_ring *ring, unsigned vmid,
+ unsigned pasid);
+ /* write pte/pde updates using the cpu */
+ int (*set_pte_pde)(struct amdgpu_device *adev,
+ void *cpu_pt_addr, /* cpu addr of page table */
+ uint32_t gpu_page_idx, /* pte/pde to update */
+ uint64_t addr, /* addr to write into pte/pde */
+ uint64_t flags); /* access flags */
+ /* enable/disable PRT support */
+ void (*set_prt)(struct amdgpu_device *adev, bool enable);
+ /* set pte flags based per asic */
+ uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
+ uint32_t flags);
+ /* get the pde for a given mc addr */
+ void (*get_vm_pde)(struct amdgpu_device *adev, int level,
+ u64 *dst, u64 *flags);
+};
+
+struct amdgpu_gmc {
+ resource_size_t aper_size;
+ resource_size_t aper_base;
+ /* for some chips with <= 32MB we need to lie
+ * about vram size near mc fb location */
+ u64 mc_vram_size;
+ u64 visible_vram_size;
+ u64 gart_size;
+ u64 gart_start;
+ u64 gart_end;
+ u64 vram_start;
+ u64 vram_end;
+ unsigned vram_width;
+ u64 real_vram_size;
+ int vram_mtrr;
+ u64 mc_mask;
+ const struct firmware *fw; /* MC firmware */
+ uint32_t fw_version;
+ struct amdgpu_irq_src vm_fault;
+ uint32_t vram_type;
+ uint32_t srbm_soft_reset;
+ bool prt_warning;
+ uint64_t stolen_size;
+ /* apertures */
+ u64 shared_aperture_start;
+ u64 shared_aperture_end;
+ u64 private_aperture_start;
+ u64 private_aperture_end;
+ /* protects concurrent invalidation */
+ spinlock_t invalidate_lock;
+ bool translate_further;
+
+ const struct amdgpu_gmc_funcs *gmc_funcs;
+};
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index e14ab34d8262..da7b1b92d9cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -56,7 +56,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man,
return -ENOMEM;
start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
- size = (adev->mc.gart_size >> PAGE_SHIFT) - start;
+ size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
drm_mm_init(&mgr->mm, start, size);
spin_lock_init(&mgr->lock);
atomic64_set(&mgr->available, p_size);
@@ -75,7 +75,7 @@ static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man,
static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man)
{
struct amdgpu_gtt_mgr *mgr = man->priv;
-
+ spin_lock(&mgr->lock);
drm_mm_takedown(&mgr->mm);
spin_unlock(&mgr->lock);
kfree(mgr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index a162d87ca0c8..311589e02d17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -181,15 +181,18 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
}
}
- if (ring->funcs->init_cond_exec)
+ if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
- if (ring->funcs->emit_hdp_flush
#ifdef CONFIG_X86_64
- && !(adev->flags & AMD_IS_APU)
+ if (!(adev->flags & AMD_IS_APU))
#endif
- )
- amdgpu_ring_emit_hdp_flush(ring);
+ {
+ if (ring->funcs->emit_hdp_flush)
+ amdgpu_ring_emit_hdp_flush(ring);
+ else
+ amdgpu_asic_flush_hdp(adev, ring);
+ }
skip_preamble = ring->current_ctx == fence_ctx;
need_ctx_switch = ring->current_ctx != fence_ctx;
@@ -219,12 +222,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
if (ring->funcs->emit_tmz)
amdgpu_ring_emit_tmz(ring, false);
- if (ring->funcs->emit_hdp_invalidate
#ifdef CONFIG_X86_64
- && !(adev->flags & AMD_IS_APU)
+ if (!(adev->flags & AMD_IS_APU))
#endif
- )
- amdgpu_ring_emit_hdp_invalidate(ring);
+ amdgpu_asic_invalidate_hdp(adev, ring);
r = amdgpu_fence_emit(ring, f);
if (r) {
@@ -278,11 +279,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
return r;
}
- r = amdgpu_sa_bo_manager_start(adev, &adev->ring_tmp_bo);
- if (r) {
- return r;
- }
-
adev->ib_pool_ready = true;
if (amdgpu_debugfs_sa_init(adev)) {
dev_err(adev->dev, "failed to register debugfs file for SA\n");
@@ -301,7 +297,6 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
void amdgpu_ib_pool_fini(struct amdgpu_device *adev)
{
if (adev->ib_pool_ready) {
- amdgpu_sa_bo_manager_suspend(adev, &adev->ring_tmp_bo);
amdgpu_sa_bo_manager_fini(adev, &adev->ring_tmp_bo);
adev->ib_pool_ready = false;
}
@@ -321,14 +316,45 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
{
unsigned i;
int r, ret = 0;
+ long tmo_gfx, tmo_mm;
+
+ tmo_mm = tmo_gfx = AMDGPU_IB_TEST_TIMEOUT;
+ if (amdgpu_sriov_vf(adev)) {
+ /* for MM engines in hypervisor side they are not scheduled together
+ * with CP and SDMA engines, so even in exclusive mode MM engine could
+ * still running on other VF thus the IB TEST TIMEOUT for MM engines
+ * under SR-IOV should be set to a long time. 8 sec should be enough
+ * for the MM comes back to this VF.
+ */
+ tmo_mm = 8 * AMDGPU_IB_TEST_TIMEOUT;
+ }
+
+ if (amdgpu_sriov_runtime(adev)) {
+ /* for CP & SDMA engines since they are scheduled together so
+ * need to make the timeout width enough to cover the time
+ * cost waiting for it coming back under RUNTIME only
+ */
+ tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT;
+ }
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
+ long tmo;
if (!ring || !ring->ready)
continue;
- r = amdgpu_ring_test_ib(ring, AMDGPU_IB_TEST_TIMEOUT);
+ /* MM engine need more time */
+ if (ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+ ring->funcs->type == AMDGPU_RING_TYPE_VCE ||
+ ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC ||
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC ||
+ ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+ tmo = tmo_mm;
+ else
+ tmo = tmo_gfx;
+
+ r = amdgpu_ring_test_ib(ring, tmo);
if (r) {
ring->ready = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 16884a0b677b..a1c78f90eadf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -40,6 +40,12 @@
*/
static DEFINE_IDA(amdgpu_pasid_ida);
+/* Helper to free pasid from a fence callback */
+struct amdgpu_pasid_cb {
+ struct dma_fence_cb cb;
+ unsigned int pasid;
+};
+
/**
* amdgpu_pasid_alloc - Allocate a PASID
* @bits: Maximum width of the PASID in bits, must be at least 1
@@ -63,6 +69,9 @@ int amdgpu_pasid_alloc(unsigned int bits)
break;
}
+ if (pasid >= 0)
+ trace_amdgpu_pasid_allocated(pasid);
+
return pasid;
}
@@ -72,9 +81,86 @@ int amdgpu_pasid_alloc(unsigned int bits)
*/
void amdgpu_pasid_free(unsigned int pasid)
{
+ trace_amdgpu_pasid_freed(pasid);
ida_simple_remove(&amdgpu_pasid_ida, pasid);
}
+static void amdgpu_pasid_free_cb(struct dma_fence *fence,
+ struct dma_fence_cb *_cb)
+{
+ struct amdgpu_pasid_cb *cb =
+ container_of(_cb, struct amdgpu_pasid_cb, cb);
+
+ amdgpu_pasid_free(cb->pasid);
+ dma_fence_put(fence);
+ kfree(cb);
+}
+
+/**
+ * amdgpu_pasid_free_delayed - free pasid when fences signal
+ *
+ * @resv: reservation object with the fences to wait for
+ * @pasid: pasid to free
+ *
+ * Free the pasid only after all the fences in resv are signaled.
+ */
+void amdgpu_pasid_free_delayed(struct reservation_object *resv,
+ unsigned int pasid)
+{
+ struct dma_fence *fence, **fences;
+ struct amdgpu_pasid_cb *cb;
+ unsigned count;
+ int r;
+
+ r = reservation_object_get_fences_rcu(resv, NULL, &count, &fences);
+ if (r)
+ goto fallback;
+
+ if (count == 0) {
+ amdgpu_pasid_free(pasid);
+ return;
+ }
+
+ if (count == 1) {
+ fence = fences[0];
+ kfree(fences);
+ } else {
+ uint64_t context = dma_fence_context_alloc(1);
+ struct dma_fence_array *array;
+
+ array = dma_fence_array_create(count, fences, context,
+ 1, false);
+ if (!array) {
+ kfree(fences);
+ goto fallback;
+ }
+ fence = &array->base;
+ }
+
+ cb = kmalloc(sizeof(*cb), GFP_KERNEL);
+ if (!cb) {
+ /* Last resort when we are OOM */
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ amdgpu_pasid_free(pasid);
+ } else {
+ cb->pasid = pasid;
+ if (dma_fence_add_callback(fence, &cb->cb,
+ amdgpu_pasid_free_cb))
+ amdgpu_pasid_free_cb(fence, &cb->cb);
+ }
+
+ return;
+
+fallback:
+ /* Not enough memory for the delayed delete, as last resort
+ * block for all the fences to complete.
+ */
+ reservation_object_wait_timeout_rcu(resv, true, false,
+ MAX_SCHEDULE_TIMEOUT);
+ amdgpu_pasid_free(pasid);
+}
+
/*
* VMID manager
*
@@ -96,164 +182,185 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
atomic_read(&adev->gpu_reset_counter);
}
-/* idr_mgr->lock must be held */
-static int amdgpu_vmid_grab_reserved_locked(struct amdgpu_vm *vm,
- struct amdgpu_ring *ring,
- struct amdgpu_sync *sync,
- struct dma_fence *fence,
- struct amdgpu_job *job)
-{
- struct amdgpu_device *adev = ring->adev;
- unsigned vmhub = ring->funcs->vmhub;
- uint64_t fence_context = adev->fence_context + ring->idx;
- struct amdgpu_vmid *id = vm->reserved_vmid[vmhub];
- struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
- struct dma_fence *updates = sync->last_vm_update;
- int r = 0;
- struct dma_fence *flushed, *tmp;
- bool needs_flush = vm->use_cpu_for_update;
-
- flushed = id->flushed_updates;
- if ((amdgpu_vmid_had_gpu_reset(adev, id)) ||
- (atomic64_read(&id->owner) != vm->entity.fence_context) ||
- (job->vm_pd_addr != id->pd_gpu_addr) ||
- (updates && (!flushed || updates->context != flushed->context ||
- dma_fence_is_later(updates, flushed))) ||
- (!id->last_flush || (id->last_flush->context != fence_context &&
- !dma_fence_is_signaled(id->last_flush)))) {
- needs_flush = true;
- /* to prevent one context starved by another context */
- id->pd_gpu_addr = 0;
- tmp = amdgpu_sync_peek_fence(&id->active, ring);
- if (tmp) {
- r = amdgpu_sync_fence(adev, sync, tmp, false);
- return r;
- }
- }
-
- /* Good we can use this VMID. Remember this submission as
- * user of the VMID.
- */
- r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
- if (r)
- goto out;
-
- if (updates && (!flushed || updates->context != flushed->context ||
- dma_fence_is_later(updates, flushed))) {
- dma_fence_put(id->flushed_updates);
- id->flushed_updates = dma_fence_get(updates);
- }
- id->pd_gpu_addr = job->vm_pd_addr;
- atomic64_set(&id->owner, vm->entity.fence_context);
- job->vm_needs_flush = needs_flush;
- if (needs_flush) {
- dma_fence_put(id->last_flush);
- id->last_flush = NULL;
- }
- job->vmid = id - id_mgr->ids;
- trace_amdgpu_vm_grab_id(vm, ring, job);
-out:
- return r;
-}
-
/**
- * amdgpu_vm_grab_id - allocate the next free VMID
+ * amdgpu_vm_grab_idle - grab idle VMID
*
* @vm: vm to allocate id for
* @ring: ring we want to submit job to
* @sync: sync object where we add dependencies
- * @fence: fence protecting ID from reuse
+ * @idle: resulting idle VMID
*
- * Allocate an id for the vm, adding fences to the sync obj as necessary.
+ * Try to find an idle VMID, if none is idle add a fence to wait to the sync
+ * object. Returns -ENOMEM when we are out of memory.
*/
-int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
- struct amdgpu_sync *sync, struct dma_fence *fence,
- struct amdgpu_job *job)
+static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
+ struct amdgpu_ring *ring,
+ struct amdgpu_sync *sync,
+ struct amdgpu_vmid **idle)
{
struct amdgpu_device *adev = ring->adev;
unsigned vmhub = ring->funcs->vmhub;
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
- uint64_t fence_context = adev->fence_context + ring->idx;
- struct dma_fence *updates = sync->last_vm_update;
- struct amdgpu_vmid *id, *idle;
struct dma_fence **fences;
unsigned i;
- int r = 0;
+ int r;
+
+ if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait))
+ return amdgpu_sync_fence(adev, sync, ring->vmid_wait, false);
- mutex_lock(&id_mgr->lock);
- if (vm->reserved_vmid[vmhub]) {
- r = amdgpu_vmid_grab_reserved_locked(vm, ring, sync, fence, job);
- mutex_unlock(&id_mgr->lock);
- return r;
- }
fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL);
- if (!fences) {
- mutex_unlock(&id_mgr->lock);
+ if (!fences)
return -ENOMEM;
- }
+
/* Check if we have an idle VMID */
i = 0;
- list_for_each_entry(idle, &id_mgr->ids_lru, list) {
- fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
+ list_for_each_entry((*idle), &id_mgr->ids_lru, list) {
+ fences[i] = amdgpu_sync_peek_fence(&(*idle)->active, ring);
if (!fences[i])
break;
++i;
}
/* If we can't find a idle VMID to use, wait till one becomes available */
- if (&idle->list == &id_mgr->ids_lru) {
+ if (&(*idle)->list == &id_mgr->ids_lru) {
u64 fence_context = adev->vm_manager.fence_context + ring->idx;
unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
struct dma_fence_array *array;
unsigned j;
+ *idle = NULL;
for (j = 0; j < i; ++j)
dma_fence_get(fences[j]);
array = dma_fence_array_create(i, fences, fence_context,
- seqno, true);
+ seqno, true);
if (!array) {
for (j = 0; j < i; ++j)
dma_fence_put(fences[j]);
kfree(fences);
- r = -ENOMEM;
- goto error;
+ return -ENOMEM;
}
+ r = amdgpu_sync_fence(adev, sync, &array->base, false);
+ dma_fence_put(ring->vmid_wait);
+ ring->vmid_wait = &array->base;
+ return r;
+ }
+ kfree(fences);
- r = amdgpu_sync_fence(ring->adev, sync, &array->base, false);
- dma_fence_put(&array->base);
- if (r)
- goto error;
+ return 0;
+}
- mutex_unlock(&id_mgr->lock);
- return 0;
+/**
+ * amdgpu_vm_grab_reserved - try to assign reserved VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @sync: sync object where we add dependencies
+ * @fence: fence protecting ID from reuse
+ * @job: job who wants to use the VMID
+ *
+ * Try to assign a reserved VMID.
+ */
+static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
+ struct amdgpu_ring *ring,
+ struct amdgpu_sync *sync,
+ struct dma_fence *fence,
+ struct amdgpu_job *job,
+ struct amdgpu_vmid **id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned vmhub = ring->funcs->vmhub;
+ uint64_t fence_context = adev->fence_context + ring->idx;
+ struct dma_fence *updates = sync->last_vm_update;
+ bool needs_flush = vm->use_cpu_for_update;
+ int r = 0;
+
+ *id = vm->reserved_vmid[vmhub];
+ if (updates && (*id)->flushed_updates &&
+ updates->context == (*id)->flushed_updates->context &&
+ !dma_fence_is_later(updates, (*id)->flushed_updates))
+ updates = NULL;
+
+ if ((*id)->owner != vm->entity.fence_context ||
+ job->vm_pd_addr != (*id)->pd_gpu_addr ||
+ updates || !(*id)->last_flush ||
+ ((*id)->last_flush->context != fence_context &&
+ !dma_fence_is_signaled((*id)->last_flush))) {
+ struct dma_fence *tmp;
+ /* to prevent one context starved by another context */
+ (*id)->pd_gpu_addr = 0;
+ tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
+ if (tmp) {
+ *id = NULL;
+ r = amdgpu_sync_fence(adev, sync, tmp, false);
+ return r;
+ }
+ needs_flush = true;
}
- kfree(fences);
+
+ /* Good we can use this VMID. Remember this submission as
+ * user of the VMID.
+ */
+ r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false);
+ if (r)
+ return r;
+
+ if (updates) {
+ dma_fence_put((*id)->flushed_updates);
+ (*id)->flushed_updates = dma_fence_get(updates);
+ }
+ job->vm_needs_flush = needs_flush;
+ return 0;
+}
+
+/**
+ * amdgpu_vm_grab_used - try to reuse a VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @sync: sync object where we add dependencies
+ * @fence: fence protecting ID from reuse
+ * @job: job who wants to use the VMID
+ * @id: resulting VMID
+ *
+ * Try to reuse a VMID for this submission.
+ */
+static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
+ struct amdgpu_ring *ring,
+ struct amdgpu_sync *sync,
+ struct dma_fence *fence,
+ struct amdgpu_job *job,
+ struct amdgpu_vmid **id)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned vmhub = ring->funcs->vmhub;
+ struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+ uint64_t fence_context = adev->fence_context + ring->idx;
+ struct dma_fence *updates = sync->last_vm_update;
+ int r;
job->vm_needs_flush = vm->use_cpu_for_update;
+
/* Check if we can use a VMID already assigned to this VM */
- list_for_each_entry_reverse(id, &id_mgr->ids_lru, list) {
- struct dma_fence *flushed;
+ list_for_each_entry_reverse((*id), &id_mgr->ids_lru, list) {
bool needs_flush = vm->use_cpu_for_update;
+ struct dma_fence *flushed;
/* Check all the prerequisites to using this VMID */
- if (amdgpu_vmid_had_gpu_reset(adev, id))
- continue;
-
- if (atomic64_read(&id->owner) != vm->entity.fence_context)
+ if ((*id)->owner != vm->entity.fence_context)
continue;
- if (job->vm_pd_addr != id->pd_gpu_addr)
+ if ((*id)->pd_gpu_addr != job->vm_pd_addr)
continue;
- if (!id->last_flush ||
- (id->last_flush->context != fence_context &&
- !dma_fence_is_signaled(id->last_flush)))
+ if (!(*id)->last_flush ||
+ ((*id)->last_flush->context != fence_context &&
+ !dma_fence_is_signaled((*id)->last_flush)))
needs_flush = true;
- flushed = id->flushed_updates;
+ flushed = (*id)->flushed_updates;
if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
needs_flush = true;
@@ -261,47 +368,91 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
if (adev->asic_type < CHIP_VEGA10 && needs_flush)
continue;
- /* Good we can use this VMID. Remember this submission as
+ /* Good, we can use this VMID. Remember this submission as
* user of the VMID.
*/
- r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
+ r = amdgpu_sync_fence(ring->adev, &(*id)->active, fence, false);
if (r)
- goto error;
+ return r;
if (updates && (!flushed || dma_fence_is_later(updates, flushed))) {
- dma_fence_put(id->flushed_updates);
- id->flushed_updates = dma_fence_get(updates);
+ dma_fence_put((*id)->flushed_updates);
+ (*id)->flushed_updates = dma_fence_get(updates);
}
- if (needs_flush)
- goto needs_flush;
- else
- goto no_flush_needed;
+ job->vm_needs_flush |= needs_flush;
+ return 0;
+ }
- };
+ *id = NULL;
+ return 0;
+}
- /* Still no ID to use? Then use the idle one found earlier */
- id = idle;
+/**
+ * amdgpu_vm_grab_id - allocate the next free VMID
+ *
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @sync: sync object where we add dependencies
+ * @fence: fence protecting ID from reuse
+ * @job: job who wants to use the VMID
+ *
+ * Allocate an id for the vm, adding fences to the sync obj as necessary.
+ */
+int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+ struct amdgpu_sync *sync, struct dma_fence *fence,
+ struct amdgpu_job *job)
+{
+ struct amdgpu_device *adev = ring->adev;
+ unsigned vmhub = ring->funcs->vmhub;
+ struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
+ struct amdgpu_vmid *idle = NULL;
+ struct amdgpu_vmid *id = NULL;
+ int r = 0;
- /* Remember this submission as user of the VMID */
- r = amdgpu_sync_fence(ring->adev, &id->active, fence, false);
- if (r)
+ mutex_lock(&id_mgr->lock);
+ r = amdgpu_vmid_grab_idle(vm, ring, sync, &idle);
+ if (r || !idle)
goto error;
- id->pd_gpu_addr = job->vm_pd_addr;
- dma_fence_put(id->flushed_updates);
- id->flushed_updates = dma_fence_get(updates);
- atomic64_set(&id->owner, vm->entity.fence_context);
+ if (vm->reserved_vmid[vmhub]) {
+ r = amdgpu_vmid_grab_reserved(vm, ring, sync, fence, job, &id);
+ if (r || !id)
+ goto error;
+ } else {
+ r = amdgpu_vmid_grab_used(vm, ring, sync, fence, job, &id);
+ if (r)
+ goto error;
-needs_flush:
- job->vm_needs_flush = true;
- dma_fence_put(id->last_flush);
- id->last_flush = NULL;
+ if (!id) {
+ struct dma_fence *updates = sync->last_vm_update;
-no_flush_needed:
- list_move_tail(&id->list, &id_mgr->ids_lru);
+ /* Still no ID to use? Then use the idle one found earlier */
+ id = idle;
+ /* Remember this submission as user of the VMID */
+ r = amdgpu_sync_fence(ring->adev, &id->active,
+ fence, false);
+ if (r)
+ goto error;
+
+ dma_fence_put(id->flushed_updates);
+ id->flushed_updates = dma_fence_get(updates);
+ job->vm_needs_flush = true;
+ }
+
+ list_move_tail(&id->list, &id_mgr->ids_lru);
+ }
+
+ id->pd_gpu_addr = job->vm_pd_addr;
+ id->owner = vm->entity.fence_context;
+
+ if (job->vm_needs_flush) {
+ dma_fence_put(id->last_flush);
+ id->last_flush = NULL;
+ }
job->vmid = id - id_mgr->ids;
+ job->pasid = vm->pasid;
trace_amdgpu_vm_grab_id(vm, ring, job);
error:
@@ -370,13 +521,15 @@ void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
struct amdgpu_vmid *id = &id_mgr->ids[vmid];
- atomic64_set(&id->owner, 0);
+ mutex_lock(&id_mgr->lock);
+ id->owner = 0;
id->gds_base = 0;
id->gds_size = 0;
id->gws_base = 0;
id->gws_size = 0;
id->oa_base = 0;
id->oa_size = 0;
+ mutex_unlock(&id_mgr->lock);
}
/**
@@ -454,6 +607,7 @@ void amdgpu_vmid_mgr_fini(struct amdgpu_device *adev)
amdgpu_sync_free(&id->active);
dma_fence_put(id->flushed_updates);
dma_fence_put(id->last_flush);
+ dma_fence_put(id->pasid_mapping);
}
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index ad931fa570b3..7625419f0fc2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -43,7 +43,7 @@ struct amdgpu_vmid {
struct list_head list;
struct amdgpu_sync active;
struct dma_fence *last_flush;
- atomic64_t owner;
+ uint64_t owner;
uint64_t pd_gpu_addr;
/* last flushed PD/PT update */
@@ -57,6 +57,9 @@ struct amdgpu_vmid {
uint32_t gws_size;
uint32_t oa_base;
uint32_t oa_size;
+
+ unsigned pasid;
+ struct dma_fence *pasid_mapping;
};
struct amdgpu_vmid_mgr {
@@ -69,6 +72,8 @@ struct amdgpu_vmid_mgr {
int amdgpu_pasid_alloc(unsigned int bits);
void amdgpu_pasid_free(unsigned int pasid);
+void amdgpu_pasid_free_delayed(struct reservation_object *resv,
+ unsigned int pasid);
bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
struct amdgpu_vmid *id);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 29cf10927a92..0e01f115bbe5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -25,51 +25,12 @@
#define __AMDGPU_IH_H__
#include <linux/chash.h>
+#include "soc15_ih_clientid.h"
struct amdgpu_device;
- /*
- * vega10+ IH clients
- */
-enum amdgpu_ih_clientid
-{
- AMDGPU_IH_CLIENTID_IH = 0x00,
- AMDGPU_IH_CLIENTID_ACP = 0x01,
- AMDGPU_IH_CLIENTID_ATHUB = 0x02,
- AMDGPU_IH_CLIENTID_BIF = 0x03,
- AMDGPU_IH_CLIENTID_DCE = 0x04,
- AMDGPU_IH_CLIENTID_ISP = 0x05,
- AMDGPU_IH_CLIENTID_PCIE0 = 0x06,
- AMDGPU_IH_CLIENTID_RLC = 0x07,
- AMDGPU_IH_CLIENTID_SDMA0 = 0x08,
- AMDGPU_IH_CLIENTID_SDMA1 = 0x09,
- AMDGPU_IH_CLIENTID_SE0SH = 0x0a,
- AMDGPU_IH_CLIENTID_SE1SH = 0x0b,
- AMDGPU_IH_CLIENTID_SE2SH = 0x0c,
- AMDGPU_IH_CLIENTID_SE3SH = 0x0d,
- AMDGPU_IH_CLIENTID_SYSHUB = 0x0e,
- AMDGPU_IH_CLIENTID_THM = 0x0f,
- AMDGPU_IH_CLIENTID_UVD = 0x10,
- AMDGPU_IH_CLIENTID_VCE0 = 0x11,
- AMDGPU_IH_CLIENTID_VMC = 0x12,
- AMDGPU_IH_CLIENTID_XDMA = 0x13,
- AMDGPU_IH_CLIENTID_GRBM_CP = 0x14,
- AMDGPU_IH_CLIENTID_ATS = 0x15,
- AMDGPU_IH_CLIENTID_ROM_SMUIO = 0x16,
- AMDGPU_IH_CLIENTID_DF = 0x17,
- AMDGPU_IH_CLIENTID_VCE1 = 0x18,
- AMDGPU_IH_CLIENTID_PWR = 0x19,
- AMDGPU_IH_CLIENTID_UTCL2 = 0x1b,
- AMDGPU_IH_CLIENTID_EA = 0x1c,
- AMDGPU_IH_CLIENTID_UTCL2LOG = 0x1d,
- AMDGPU_IH_CLIENTID_MP0 = 0x1e,
- AMDGPU_IH_CLIENTID_MP1 = 0x1f,
-
- AMDGPU_IH_CLIENTID_MAX,
-
- AMDGPU_IH_CLIENTID_VCN = AMDGPU_IH_CLIENTID_UVD
-};
#define AMDGPU_IH_CLIENTID_LEGACY 0
+#define AMDGPU_IH_CLIENTID_MAX SOC15_IH_CLIENTID_MAX
#define AMDGPU_PAGEFAULT_HASH_BITS 8
struct amdgpu_retryfault_hashtable {
@@ -109,7 +70,7 @@ struct amdgpu_iv_entry {
unsigned vmid_src;
uint64_t timestamp;
unsigned timestamp_src;
- unsigned pas_id;
+ unsigned pasid;
unsigned pasid_src;
unsigned src_data[AMDGPU_IH_SRC_DATA_MAX_SIZE_DW];
const uint32_t *iv_entry;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 56bcd59c3399..3a5ca462abf0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -92,7 +92,7 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
}
/* Disable *all* interrupts */
-static void amdgpu_irq_disable_all(struct amdgpu_device *adev)
+void amdgpu_irq_disable_all(struct amdgpu_device *adev)
{
unsigned long irqflags;
unsigned i, j, k;
@@ -123,55 +123,6 @@ static void amdgpu_irq_disable_all(struct amdgpu_device *adev)
}
/**
- * amdgpu_irq_preinstall - drm irq preinstall callback
- *
- * @dev: drm dev pointer
- *
- * Gets the hw ready to enable irqs (all asics).
- * This function disables all interrupt sources on the GPU.
- */
-void amdgpu_irq_preinstall(struct drm_device *dev)
-{
- struct amdgpu_device *adev = dev->dev_private;
-
- /* Disable *all* interrupts */
- amdgpu_irq_disable_all(adev);
- /* Clear bits */
- amdgpu_ih_process(adev);
-}
-
-/**
- * amdgpu_irq_postinstall - drm irq preinstall callback
- *
- * @dev: drm dev pointer
- *
- * Handles stuff to be done after enabling irqs (all asics).
- * Returns 0 on success.
- */
-int amdgpu_irq_postinstall(struct drm_device *dev)
-{
- dev->max_vblank_count = 0x00ffffff;
- return 0;
-}
-
-/**
- * amdgpu_irq_uninstall - drm irq uninstall callback
- *
- * @dev: drm dev pointer
- *
- * This function disables all interrupt sources on the GPU (all asics).
- */
-void amdgpu_irq_uninstall(struct drm_device *dev)
-{
- struct amdgpu_device *adev = dev->dev_private;
-
- if (adev == NULL) {
- return;
- }
- amdgpu_irq_disable_all(adev);
-}
-
-/**
* amdgpu_irq_handler - irq handler
*
* @int irq, void *arg: args
@@ -257,10 +208,12 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
if (r) {
adev->irq.installed = false;
- flush_work(&adev->hotplug_work);
+ if (!amdgpu_device_has_dc_support(adev))
+ flush_work(&adev->hotplug_work);
cancel_work_sync(&adev->reset_work);
return r;
}
+ adev->ddev->max_vblank_count = 0x00ffffff;
DRM_DEBUG("amdgpu: irq initialized.\n");
return 0;
@@ -282,7 +235,8 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
adev->irq.installed = false;
if (adev->irq.msi_enabled)
pci_disable_msi(adev->pdev);
- flush_work(&adev->hotplug_work);
+ if (!amdgpu_device_has_dc_support(adev))
+ flush_work(&adev->hotplug_work);
cancel_work_sync(&adev->reset_work);
}
@@ -305,6 +259,7 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
}
}
kfree(adev->irq.client[i].sources);
+ adev->irq.client[i].sources = NULL;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index 0610cc4a9788..3375ad778edc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -78,9 +78,7 @@ struct amdgpu_irq {
uint32_t srbm_soft_reset;
};
-void amdgpu_irq_preinstall(struct drm_device *dev);
-int amdgpu_irq_postinstall(struct drm_device *dev);
-void amdgpu_irq_uninstall(struct drm_device *dev);
+void amdgpu_irq_disable_all(struct amdgpu_device *adev);
irqreturn_t amdgpu_irq_handler(int irq, void *arg);
int amdgpu_irq_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index bd6e9a40f421..4b7824d30e73 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -190,8 +190,12 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
fw_info->ver = adev->uvd.fw_version;
fw_info->feature = 0;
break;
+ case AMDGPU_INFO_FW_VCN:
+ fw_info->ver = adev->vcn.fw_version;
+ fw_info->feature = 0;
+ break;
case AMDGPU_INFO_FW_GMC:
- fw_info->ver = adev->mc.fw_version;
+ fw_info->ver = adev->gmc.fw_version;
fw_info->feature = 0;
break;
case AMDGPU_INFO_FW_GFX_ME:
@@ -470,9 +474,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
case AMDGPU_INFO_VRAM_GTT: {
struct drm_amdgpu_info_vram_gtt vram_gtt;
- vram_gtt.vram_size = adev->mc.real_vram_size;
+ vram_gtt.vram_size = adev->gmc.real_vram_size;
vram_gtt.vram_size -= adev->vram_pin_size;
- vram_gtt.vram_cpu_accessible_size = adev->mc.visible_vram_size;
+ vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size;
vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size);
vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size;
vram_gtt.gtt_size *= PAGE_SIZE;
@@ -484,17 +488,17 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
struct drm_amdgpu_memory_info mem;
memset(&mem, 0, sizeof(mem));
- mem.vram.total_heap_size = adev->mc.real_vram_size;
+ mem.vram.total_heap_size = adev->gmc.real_vram_size;
mem.vram.usable_heap_size =
- adev->mc.real_vram_size - adev->vram_pin_size;
+ adev->gmc.real_vram_size - adev->vram_pin_size;
mem.vram.heap_usage =
amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;
mem.cpu_accessible_vram.total_heap_size =
- adev->mc.visible_vram_size;
+ adev->gmc.visible_vram_size;
mem.cpu_accessible_vram.usable_heap_size =
- adev->mc.visible_vram_size -
+ adev->gmc.visible_vram_size -
(adev->vram_pin_size - adev->invisible_pin_size);
mem.cpu_accessible_vram.heap_usage =
amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
@@ -580,11 +584,16 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
+ vm_size -= AMDGPU_VA_RESERVED_SIZE;
+
+ /* Older VCE FW versions are buggy and can handle only 40bits */
+ if (adev->vce.fw_version < AMDGPU_VCE_FW_53_45)
+ vm_size = min(vm_size, 1ULL << 40);
+
dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE;
dev_info.virtual_address_max =
min(vm_size, AMDGPU_VA_HOLE_START);
- vm_size -= AMDGPU_VA_RESERVED_SIZE;
if (vm_size > AMDGPU_VA_HOLE_START) {
dev_info.high_va_offset = AMDGPU_VA_HOLE_END;
dev_info.high_va_max = AMDGPU_VA_HOLE_END | vm_size;
@@ -599,8 +608,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
sizeof(adev->gfx.cu_info.ao_cu_bitmap));
memcpy(&dev_info.cu_bitmap[0], &adev->gfx.cu_info.bitmap[0],
sizeof(adev->gfx.cu_info.bitmap));
- dev_info.vram_type = adev->mc.vram_type;
- dev_info.vram_bit_width = adev->mc.vram_width;
+ dev_info.vram_type = adev->gmc.vram_type;
+ dev_info.vram_bit_width = adev->gmc.vram_width;
dev_info.vce_harvest_config = adev->vce.harvest_config;
dev_info.gc_double_offchip_lds_buf =
adev->gfx.config.double_offchip_lds_buf;
@@ -758,6 +767,24 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
return -EINVAL;
}
break;
+ case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_SCLK:
+ /* get stable pstate sclk in Mhz */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ ui32 /= 100;
+ break;
+ case AMDGPU_INFO_SENSOR_STABLE_PSTATE_GFX_MCLK:
+ /* get stable pstate mclk in Mhz */
+ if (amdgpu_dpm_read_sensor(adev,
+ AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK,
+ (void *)&ui32, &ui32_size)) {
+ return -EINVAL;
+ }
+ ui32 /= 100;
+ break;
default:
DRM_DEBUG_KMS("Invalid request %d\n",
info->sensor_info.type);
@@ -805,7 +832,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
{
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv;
- int r;
+ int r, pasid;
file_priv->driver_priv = NULL;
@@ -819,28 +846,25 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
goto out_suspend;
}
- r = amdgpu_vm_init(adev, &fpriv->vm,
- AMDGPU_VM_CONTEXT_GFX, 0);
- if (r) {
- kfree(fpriv);
- goto out_suspend;
+ pasid = amdgpu_pasid_alloc(16);
+ if (pasid < 0) {
+ dev_warn(adev->dev, "No more PASIDs available!");
+ pasid = 0;
}
+ r = amdgpu_vm_init(adev, &fpriv->vm, AMDGPU_VM_CONTEXT_GFX, pasid);
+ if (r)
+ goto error_pasid;
fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
if (!fpriv->prt_va) {
r = -ENOMEM;
- amdgpu_vm_fini(adev, &fpriv->vm);
- kfree(fpriv);
- goto out_suspend;
+ goto error_vm;
}
if (amdgpu_sriov_vf(adev)) {
r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va);
- if (r) {
- amdgpu_vm_fini(adev, &fpriv->vm);
- kfree(fpriv);
- goto out_suspend;
- }
+ if (r)
+ goto error_vm;
}
mutex_init(&fpriv->bo_list_lock);
@@ -849,6 +873,16 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
file_priv->driver_priv = fpriv;
+ goto out_suspend;
+
+error_vm:
+ amdgpu_vm_fini(adev, &fpriv->vm);
+
+error_pasid:
+ if (pasid)
+ amdgpu_pasid_free(pasid);
+
+ kfree(fpriv);
out_suspend:
pm_runtime_mark_last_busy(dev->dev);
@@ -871,6 +905,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
struct amdgpu_device *adev = dev->dev_private;
struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
struct amdgpu_bo_list *list;
+ struct amdgpu_bo *pd;
+ unsigned int pasid;
int handle;
if (!fpriv)
@@ -895,7 +931,13 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
amdgpu_bo_unreserve(adev->virt.csa_obj);
}
+ pasid = fpriv->vm.pasid;
+ pd = amdgpu_bo_ref(fpriv->vm.root.base.bo);
+
amdgpu_vm_fini(adev, &fpriv->vm);
+ if (pasid)
+ amdgpu_pasid_free_delayed(pd->tbo.resv, pasid);
+ amdgpu_bo_unref(&pd);
idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
amdgpu_bo_list_free(list);
@@ -947,11 +989,11 @@ u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe)
*/
do {
count = amdgpu_display_vblank_get_counter(adev, pipe);
- /* Ask amdgpu_get_crtc_scanoutpos to return vpos as
- * distance to start of vblank, instead of regular
- * vertical scanout pos.
+ /* Ask amdgpu_display_get_crtc_scanoutpos to return
+ * vpos as distance to start of vblank, instead of
+ * regular vertical scanout pos.
*/
- stat = amdgpu_get_crtc_scanoutpos(
+ stat = amdgpu_display_get_crtc_scanoutpos(
dev, pipe, GET_DISTANCE_TO_VBLANKSTART,
&vpos, &hpos, NULL, NULL,
&adev->mode_info.crtcs[pipe]->base.hwmode);
@@ -992,7 +1034,7 @@ u32 amdgpu_get_vblank_counter_kms(struct drm_device *dev, unsigned int pipe)
int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe)
{
struct amdgpu_device *adev = dev->dev_private;
- int idx = amdgpu_crtc_idx_to_irq_type(adev, pipe);
+ int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe);
return amdgpu_irq_get(adev, &adev->crtc_irq, idx);
}
@@ -1008,7 +1050,7 @@ int amdgpu_enable_vblank_kms(struct drm_device *dev, unsigned int pipe)
void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe)
{
struct amdgpu_device *adev = dev->dev_private;
- int idx = amdgpu_crtc_idx_to_irq_type(adev, pipe);
+ int idx = amdgpu_display_crtc_idx_to_irq_type(adev, pipe);
amdgpu_irq_put(adev, &adev->crtc_irq, idx);
}
@@ -1160,6 +1202,14 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
i, fw_info.feature, fw_info.ver);
}
+ /* VCN */
+ query_fw.fw_type = AMDGPU_INFO_FW_VCN;
+ ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+ if (ret)
+ return ret;
+ seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n",
+ fw_info.feature, fw_info.ver);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 54f06c959340..d6416ee52e32 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -267,8 +267,6 @@ struct amdgpu_display_funcs {
void (*bandwidth_update)(struct amdgpu_device *adev);
/* get frame count */
u32 (*vblank_get_counter)(struct amdgpu_device *adev, int crtc);
- /* wait for vblank */
- void (*vblank_wait)(struct amdgpu_device *adev, int crtc);
/* set backlight level */
void (*backlight_set_level)(struct amdgpu_encoder *amdgpu_encoder,
u8 level);
@@ -352,6 +350,7 @@ struct amdgpu_mode_info {
u16 firmware_flags;
/* pointer to backlight encoder */
struct amdgpu_encoder *bl_encoder;
+ u8 bl_level; /* saved backlight level */
struct amdgpu_audio audio; /* audio stuff */
int num_crtc; /* number of crtcs */
int num_hpd; /* number of hpd pins */
@@ -552,14 +551,6 @@ struct amdgpu_connector {
/* we need to mind the EDID between detect
and get modes due to analog/digital/tvencoder */
struct edid *edid;
- /* number of modes generated from EDID at 'dc_sink' */
- int num_modes;
- /* The 'old' sink - before an HPD.
- * The 'current' sink is in dc_link->sink. */
- struct dc_sink *dc_sink;
- struct dc_link *dc_link;
- struct dc_sink *dc_em_sink;
- const struct dc_stream *stream;
void *con_priv;
bool dac_load_detect;
bool detected_by_load; /* if the connection status was determined by load */
@@ -570,27 +561,6 @@ struct amdgpu_connector {
enum amdgpu_connector_audio audio;
enum amdgpu_connector_dither dither;
unsigned pixelclock_for_modeset;
-
- struct drm_dp_mst_topology_mgr mst_mgr;
- struct amdgpu_dm_dp_aux dm_dp_aux;
- struct drm_dp_mst_port *port;
- struct amdgpu_connector *mst_port;
- struct amdgpu_encoder *mst_encoder;
- struct semaphore mst_sem;
-
- /* TODO see if we can merge with ddc_bus or make a dm_connector */
- struct amdgpu_i2c_adapter *i2c;
-
- /* Monitor range limits */
- int min_vfreq ;
- int max_vfreq ;
- int pixel_clock_mhz;
-
- /*freesync caps*/
- struct mod_freesync_caps caps;
-
- struct mutex hpd_lock;
-
};
/* TODO: start to use this struct and remove same field from base one */
@@ -608,7 +578,7 @@ struct amdgpu_mst_connector {
#define ENCODER_MODE_IS_DP(em) (((em) == ATOM_ENCODER_MODE_DP) || \
((em) == ATOM_ENCODER_MODE_DP_MST))
-/* Driver internal use only flags of amdgpu_get_crtc_scanoutpos() */
+/* Driver internal use only flags of amdgpu_display_get_crtc_scanoutpos() */
#define DRM_SCANOUTPOS_VALID (1 << 0)
#define DRM_SCANOUTPOS_IN_VBLANK (1 << 1)
#define DRM_SCANOUTPOS_ACCURATE (1 << 2)
@@ -627,30 +597,31 @@ bool amdgpu_dig_monitor_is_duallink(struct drm_encoder *encoder,
u16 amdgpu_encoder_get_dp_bridge_encoder_id(struct drm_encoder *encoder);
struct drm_encoder *amdgpu_get_external_encoder(struct drm_encoder *encoder);
-bool amdgpu_ddc_probe(struct amdgpu_connector *amdgpu_connector, bool use_aux);
+bool amdgpu_display_ddc_probe(struct amdgpu_connector *amdgpu_connector,
+ bool use_aux);
void amdgpu_encoder_set_active_device(struct drm_encoder *encoder);
-int amdgpu_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe,
- unsigned int flags, int *vpos, int *hpos,
- ktime_t *stime, ktime_t *etime,
- const struct drm_display_mode *mode);
+int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev,
+ unsigned int pipe, unsigned int flags, int *vpos,
+ int *hpos, ktime_t *stime, ktime_t *etime,
+ const struct drm_display_mode *mode);
-int amdgpu_framebuffer_init(struct drm_device *dev,
- struct amdgpu_framebuffer *rfb,
- const struct drm_mode_fb_cmd2 *mode_cmd,
- struct drm_gem_object *obj);
+int amdgpu_display_framebuffer_init(struct drm_device *dev,
+ struct amdgpu_framebuffer *rfb,
+ const struct drm_mode_fb_cmd2 *mode_cmd,
+ struct drm_gem_object *obj);
int amdgpufb_remove(struct drm_device *dev, struct drm_framebuffer *fb);
void amdgpu_enc_destroy(struct drm_encoder *encoder);
void amdgpu_copy_fb(struct drm_device *dev, struct drm_gem_object *dst_obj);
-bool amdgpu_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
- const struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode);
+bool amdgpu_display_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
+ const struct drm_display_mode *mode,
+ struct drm_display_mode *adjusted_mode);
void amdgpu_panel_mode_fixup(struct drm_encoder *encoder,
struct drm_display_mode *adjusted_mode);
-int amdgpu_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc);
+int amdgpu_display_crtc_idx_to_irq_type(struct amdgpu_device *adev, int crtc);
/* fbdev layer */
int amdgpu_fbdev_init(struct amdgpu_device *adev);
@@ -662,15 +633,15 @@ bool amdgpu_fbdev_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
int amdgpu_align_pitch(struct amdgpu_device *adev, int width, int bpp, bool tiled);
/* amdgpu_display.c */
-void amdgpu_print_display_setup(struct drm_device *dev);
-int amdgpu_modeset_create_props(struct amdgpu_device *adev);
-int amdgpu_crtc_set_config(struct drm_mode_set *set,
- struct drm_modeset_acquire_ctx *ctx);
-int amdgpu_crtc_page_flip_target(struct drm_crtc *crtc,
- struct drm_framebuffer *fb,
- struct drm_pending_vblank_event *event,
- uint32_t page_flip_flags, uint32_t target,
- struct drm_modeset_acquire_ctx *ctx);
+void amdgpu_display_print_display_setup(struct drm_device *dev);
+int amdgpu_display_modeset_create_props(struct amdgpu_device *adev);
+int amdgpu_display_crtc_set_config(struct drm_mode_set *set,
+ struct drm_modeset_acquire_ctx *ctx);
+int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ struct drm_pending_vblank_event *event,
+ uint32_t page_flip_flags, uint32_t target,
+ struct drm_modeset_acquire_ctx *ctx);
extern const struct drm_mode_config_funcs amdgpu_mode_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 5c4c3e0d527b..fac4b6067efd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -36,6 +36,7 @@
#include <drm/drm_cache.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
static bool amdgpu_need_backup(struct amdgpu_device *adev)
{
@@ -54,8 +55,13 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);
+ if (bo->kfd_bo)
+ amdgpu_amdkfd_unreserve_system_memory_limit(bo);
+
amdgpu_bo_kunmap(bo);
+ if (bo->gem_base.import_attach)
+ drm_prime_gem_destroy(&bo->gem_base, bo->tbo.sg);
drm_gem_object_release(&bo->gem_base);
amdgpu_bo_unref(&bo->parent);
if (!list_empty(&bo->shadow_list)) {
@@ -83,7 +89,7 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
u32 c = 0;
if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
- unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
+ unsigned visible_pfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
places[c].fpfn = 0;
places[c].lpfn = 0;
@@ -103,7 +109,7 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
if (domain & AMDGPU_GEM_DOMAIN_GTT) {
places[c].fpfn = 0;
if (flags & AMDGPU_GEM_CREATE_SHADOW)
- places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT;
+ places[c].lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
else
places[c].lpfn = 0;
places[c].flags = TTM_PL_FLAG_TT;
@@ -169,13 +175,15 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
* @size: size for the new BO
* @align: alignment for the new BO
* @domain: where to place it
- * @bo_ptr: resulting BO
+ * @bo_ptr: used to initialize BOs in structures
* @gpu_addr: GPU addr of the pinned BO
* @cpu_addr: optional CPU address mapping
*
* Allocates and pins a BO for kernel internal use, and returns it still
* reserved.
*
+ * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
+ *
* Returns 0 on success, negative error code otherwise.
*/
int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
@@ -187,10 +195,10 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
int r;
if (!*bo_ptr) {
- r = amdgpu_bo_create(adev, size, align, true, domain,
+ r = amdgpu_bo_create(adev, size, align, domain,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
- NULL, NULL, 0, bo_ptr);
+ ttm_bo_type_kernel, NULL, bo_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate kernel bo\n",
r);
@@ -238,12 +246,14 @@ error_free:
* @size: size for the new BO
* @align: alignment for the new BO
* @domain: where to place it
- * @bo_ptr: resulting BO
+ * @bo_ptr: used to initialize BOs in structures
* @gpu_addr: GPU addr of the pinned BO
* @cpu_addr: optional CPU address mapping
*
* Allocates and pins a BO for kernel internal use.
*
+ * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
+ *
* Returns 0 on success, negative error code otherwise.
*/
int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
@@ -331,24 +341,22 @@ fail:
return false;
}
-static int amdgpu_bo_do_create(struct amdgpu_device *adev,
- unsigned long size, int byte_align,
- bool kernel, u32 domain, u64 flags,
- struct sg_table *sg,
+static int amdgpu_bo_do_create(struct amdgpu_device *adev, unsigned long size,
+ int byte_align, u32 domain,
+ u64 flags, enum ttm_bo_type type,
struct reservation_object *resv,
- uint64_t init_value,
struct amdgpu_bo **bo_ptr)
{
struct ttm_operation_ctx ctx = {
- .interruptible = !kernel,
+ .interruptible = (type != ttm_bo_type_kernel),
.no_wait_gpu = false,
- .allow_reserved_eviction = true,
- .resv = resv
+ .resv = resv,
+ .flags = TTM_OPT_FLAG_ALLOW_RES_EVICT
};
struct amdgpu_bo *bo;
- enum ttm_bo_type type;
unsigned long page_align;
size_t acc_size;
+ u32 domains;
int r;
page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
@@ -357,13 +365,6 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
if (!amdgpu_bo_validate_size(adev, size, domain))
return -ENOMEM;
- if (kernel) {
- type = ttm_bo_type_kernel;
- } else if (sg) {
- type = ttm_bo_type_sg;
- } else {
- type = ttm_bo_type_device;
- }
*bo_ptr = NULL;
acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
@@ -372,11 +373,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
bo = kzalloc(sizeof(struct amdgpu_bo), GFP_KERNEL);
if (bo == NULL)
return -ENOMEM;
- r = drm_gem_object_init(adev->ddev, &bo->gem_base, size);
- if (unlikely(r)) {
- kfree(bo);
- return r;
- }
+ drm_gem_private_object_init(adev->ddev, &bo->gem_base, size);
INIT_LIST_HEAD(&bo->shadow_list);
INIT_LIST_HEAD(&bo->va);
bo->preferred_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
@@ -386,7 +383,8 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
AMDGPU_GEM_DOMAIN_GWS |
AMDGPU_GEM_DOMAIN_OA);
bo->allowed_domains = bo->preferred_domains;
- if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
+ if (type != ttm_bo_type_kernel &&
+ bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
bo->flags = flags;
@@ -420,30 +418,41 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
#endif
bo->tbo.bdev = &adev->mman.bdev;
- amdgpu_ttm_placement_from_domain(bo, domain);
-
+ domains = bo->preferred_domains;
+retry:
+ amdgpu_ttm_placement_from_domain(bo, domains);
r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
- &bo->placement, page_align, &ctx, NULL,
- acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
- if (unlikely(r != 0))
+ &bo->placement, page_align, &ctx, acc_size,
+ NULL, resv, &amdgpu_ttm_bo_destroy);
+
+ if (unlikely(r && r != -ERESTARTSYS)) {
+ if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
+ bo->flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+ goto retry;
+ } else if (domains != bo->preferred_domains) {
+ domains = bo->allowed_domains;
+ goto retry;
+ }
+ }
+ if (unlikely(r))
return r;
- if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+ if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
bo->tbo.mem.mem_type == TTM_PL_VRAM &&
- bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+ bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
ctx.bytes_moved);
else
amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved, 0);
- if (kernel)
+ if (type == ttm_bo_type_kernel)
bo->tbo.priority = 1;
if (flags & AMDGPU_GEM_CREATE_VRAM_CLEARED &&
bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
struct dma_fence *fence;
- r = amdgpu_fill_buffer(bo, init_value, bo->tbo.resv, &fence);
+ r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, &fence);
if (unlikely(r))
goto fail_unreserve;
@@ -480,12 +489,11 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
if (bo->shadow)
return 0;
- r = amdgpu_bo_do_create(adev, size, byte_align, true,
- AMDGPU_GEM_DOMAIN_GTT,
+ r = amdgpu_bo_do_create(adev, size, byte_align, AMDGPU_GEM_DOMAIN_GTT,
AMDGPU_GEM_CREATE_CPU_GTT_USWC |
AMDGPU_GEM_CREATE_SHADOW,
- NULL, bo->tbo.resv, 0,
- &bo->shadow);
+ ttm_bo_type_kernel,
+ bo->tbo.resv, &bo->shadow);
if (!r) {
bo->shadow->parent = amdgpu_bo_ref(bo);
mutex_lock(&adev->shadow_list_lock);
@@ -496,22 +504,17 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
return r;
}
-/* init_value will only take effect when flags contains
- * AMDGPU_GEM_CREATE_VRAM_CLEARED.
- */
-int amdgpu_bo_create(struct amdgpu_device *adev,
- unsigned long size, int byte_align,
- bool kernel, u32 domain, u64 flags,
- struct sg_table *sg,
+int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
+ int byte_align, u32 domain,
+ u64 flags, enum ttm_bo_type type,
struct reservation_object *resv,
- uint64_t init_value,
struct amdgpu_bo **bo_ptr)
{
uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
int r;
- r = amdgpu_bo_do_create(adev, size, byte_align, kernel, domain,
- parent_flags, sg, resv, init_value, bo_ptr);
+ r = amdgpu_bo_do_create(adev, size, byte_align, domain,
+ parent_flags, type, resv, bo_ptr);
if (r)
return r;
@@ -826,31 +829,32 @@ static const char *amdgpu_vram_names[] = {
"GDDR4",
"GDDR5",
"HBM",
- "DDR3"
+ "DDR3",
+ "DDR4",
};
int amdgpu_bo_init(struct amdgpu_device *adev)
{
/* reserve PAT memory space to WC for VRAM */
- arch_io_reserve_memtype_wc(adev->mc.aper_base,
- adev->mc.aper_size);
+ arch_io_reserve_memtype_wc(adev->gmc.aper_base,
+ adev->gmc.aper_size);
/* Add an MTRR for the VRAM */
- adev->mc.vram_mtrr = arch_phys_wc_add(adev->mc.aper_base,
- adev->mc.aper_size);
+ adev->gmc.vram_mtrr = arch_phys_wc_add(adev->gmc.aper_base,
+ adev->gmc.aper_size);
DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
- adev->mc.mc_vram_size >> 20,
- (unsigned long long)adev->mc.aper_size >> 20);
+ adev->gmc.mc_vram_size >> 20,
+ (unsigned long long)adev->gmc.aper_size >> 20);
DRM_INFO("RAM width %dbits %s\n",
- adev->mc.vram_width, amdgpu_vram_names[adev->mc.vram_type]);
+ adev->gmc.vram_width, amdgpu_vram_names[adev->gmc.vram_type]);
return amdgpu_ttm_init(adev);
}
void amdgpu_bo_fini(struct amdgpu_device *adev)
{
amdgpu_ttm_fini(adev);
- arch_phys_wc_del(adev->mc.vram_mtrr);
- arch_io_free_memtype_wc(adev->mc.aper_base, adev->mc.aper_size);
+ arch_phys_wc_del(adev->gmc.vram_mtrr);
+ arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
}
int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
@@ -980,7 +984,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
size = bo->mem.num_pages << PAGE_SHIFT;
offset = bo->mem.start << PAGE_SHIFT;
- if ((offset + size) <= adev->mc.visible_vram_size)
+ if ((offset + size) <= adev->gmc.visible_vram_size)
return 0;
/* Can't move a pinned BO to visible VRAM */
@@ -1003,7 +1007,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
offset = bo->mem.start << PAGE_SHIFT;
/* this should never happen */
if (bo->mem.mem_type == TTM_PL_VRAM &&
- (offset + size) > adev->mc.visible_vram_size)
+ (offset + size) > adev->gmc.visible_vram_size)
return -EINVAL;
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 33615e2ea2e6..546f77cb7882 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -92,6 +92,8 @@ struct amdgpu_bo {
struct list_head mn_list;
struct list_head shadow_list;
};
+
+ struct kgd_mem *kfd_bo;
};
static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo)
@@ -201,13 +203,11 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
}
-int amdgpu_bo_create(struct amdgpu_device *adev,
- unsigned long size, int byte_align,
- bool kernel, u32 domain, u64 flags,
- struct sg_table *sg,
- struct reservation_object *resv,
- uint64_t init_value,
- struct amdgpu_bo **bo_ptr);
+int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size,
+ int byte_align, u32 domain,
+ u64 flags, enum ttm_bo_type type,
+ struct reservation_object *resv,
+ struct amdgpu_bo **bo_ptr);
int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
unsigned long size, int align,
u32 domain, struct amdgpu_bo **bo_ptr,
@@ -282,8 +282,6 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager);
int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
struct amdgpu_sa_manager *sa_manager);
-int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
- struct amdgpu_sa_manager *sa_manager);
int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
struct amdgpu_sa_bo **sa_bo,
unsigned size, unsigned align);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 01a996c6b802..361975cf45a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -116,7 +116,7 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev,
}
if (adev->powerplay.pp_funcs->dispatch_tasks) {
- amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL);
+ amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_ENABLE_USER_STATE, &state);
} else {
mutex_lock(&adev->pm.mutex);
adev->pm.dpm.user_state = state;
@@ -316,7 +316,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
state != POWER_STATE_TYPE_DEFAULT) {
amdgpu_dpm_dispatch_task(adev,
- AMD_PP_TASK_ENABLE_USER_STATE, &state, NULL);
+ AMD_PP_TASK_ENABLE_USER_STATE, &state);
adev->pp_force_state_enabled = true;
}
}
@@ -360,6 +360,90 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
return count;
}
+static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ int ret;
+ uint32_t parameter_size = 0;
+ long parameter[64];
+ char buf_cpy[128];
+ char *tmp_str;
+ char *sub_str;
+ const char delimiter[3] = {' ', '\n', '\0'};
+ uint32_t type;
+
+ if (count > 127)
+ return -EINVAL;
+
+ if (*buf == 's')
+ type = PP_OD_EDIT_SCLK_VDDC_TABLE;
+ else if (*buf == 'm')
+ type = PP_OD_EDIT_MCLK_VDDC_TABLE;
+ else if(*buf == 'r')
+ type = PP_OD_RESTORE_DEFAULT_TABLE;
+ else if (*buf == 'c')
+ type = PP_OD_COMMIT_DPM_TABLE;
+ else
+ return -EINVAL;
+
+ memcpy(buf_cpy, buf, count+1);
+
+ tmp_str = buf_cpy;
+
+ while (isspace(*++tmp_str));
+
+ while (tmp_str[0]) {
+ sub_str = strsep(&tmp_str, delimiter);
+ ret = kstrtol(sub_str, 0, &parameter[parameter_size]);
+ if (ret)
+ return -EINVAL;
+ parameter_size++;
+
+ while (isspace(*tmp_str))
+ tmp_str++;
+ }
+
+ if (adev->powerplay.pp_funcs->odn_edit_dpm_table)
+ ret = amdgpu_dpm_odn_edit_dpm_table(adev, type,
+ parameter, parameter_size);
+
+ if (ret)
+ return -EINVAL;
+
+ if (type == PP_OD_COMMIT_DPM_TABLE) {
+ if (adev->powerplay.pp_funcs->dispatch_tasks) {
+ amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL);
+ return count;
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ return count;
+}
+
+static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ uint32_t size = 0;
+
+ if (adev->powerplay.pp_funcs->print_clock_levels) {
+ size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf);
+ size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size);
+ return size;
+ } else {
+ return snprintf(buf, PAGE_SIZE, "\n");
+ }
+
+}
+
static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
struct device_attribute *attr,
char *buf)
@@ -530,7 +614,7 @@ static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
amdgpu_dpm_set_sclk_od(adev, (uint32_t)value);
if (adev->powerplay.pp_funcs->dispatch_tasks) {
- amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL);
+ amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL);
} else {
adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
amdgpu_pm_compute_clocks(adev);
@@ -574,7 +658,7 @@ static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
amdgpu_dpm_set_mclk_od(adev, (uint32_t)value);
if (adev->powerplay.pp_funcs->dispatch_tasks) {
- amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL, NULL);
+ amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_READJUST_POWER_STATE, NULL);
} else {
adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
amdgpu_pm_compute_clocks(adev);
@@ -584,159 +668,70 @@ fail:
return count;
}
-static ssize_t amdgpu_get_pp_power_profile(struct device *dev,
- char *buf, struct amd_pp_profile *query)
+static ssize_t amdgpu_get_pp_power_profile_mode(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
- int ret = 0xff;
- if (adev->powerplay.pp_funcs->get_power_profile_state)
- ret = amdgpu_dpm_get_power_profile_state(
- adev, query);
+ if (adev->powerplay.pp_funcs->get_power_profile_mode)
+ return amdgpu_dpm_get_power_profile_mode(adev, buf);
- if (ret)
- return ret;
-
- return snprintf(buf, PAGE_SIZE,
- "%d %d %d %d %d\n",
- query->min_sclk / 100,
- query->min_mclk / 100,
- query->activity_threshold,
- query->up_hyst,
- query->down_hyst);
+ return snprintf(buf, PAGE_SIZE, "\n");
}
-static ssize_t amdgpu_get_pp_gfx_power_profile(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct amd_pp_profile query = {0};
-
- query.type = AMD_PP_GFX_PROFILE;
-
- return amdgpu_get_pp_power_profile(dev, buf, &query);
-}
-static ssize_t amdgpu_get_pp_compute_power_profile(struct device *dev,
+static ssize_t amdgpu_set_pp_power_profile_mode(struct device *dev,
struct device_attribute *attr,
- char *buf)
-{
- struct amd_pp_profile query = {0};
-
- query.type = AMD_PP_COMPUTE_PROFILE;
-
- return amdgpu_get_pp_power_profile(dev, buf, &query);
-}
-
-static ssize_t amdgpu_set_pp_power_profile(struct device *dev,
const char *buf,
- size_t count,
- struct amd_pp_profile *request)
+ size_t count)
{
+ int ret = 0xff;
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
- uint32_t loop = 0;
- char *sub_str, buf_cpy[128], *tmp_str;
+ uint32_t parameter_size = 0;
+ long parameter[64];
+ char *sub_str, buf_cpy[128];
+ char *tmp_str;
+ uint32_t i = 0;
+ char tmp[2];
+ long int profile_mode = 0;
const char delimiter[3] = {' ', '\n', '\0'};
- long int value;
- int ret = 0xff;
-
- if (strncmp("reset", buf, strlen("reset")) == 0) {
- if (adev->powerplay.pp_funcs->reset_power_profile_state)
- ret = amdgpu_dpm_reset_power_profile_state(
- adev, request);
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
- return count;
- }
-
- if (strncmp("set", buf, strlen("set")) == 0) {
- if (adev->powerplay.pp_funcs->set_power_profile_state)
- ret = amdgpu_dpm_set_power_profile_state(
- adev, request);
-
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
- return count;
- }
- if (count + 1 >= 128) {
- count = -EINVAL;
+ tmp[0] = *(buf);
+ tmp[1] = '\0';
+ ret = kstrtol(tmp, 0, &profile_mode);
+ if (ret)
goto fail;
- }
-
- memcpy(buf_cpy, buf, count + 1);
- tmp_str = buf_cpy;
-
- while (tmp_str[0]) {
- sub_str = strsep(&tmp_str, delimiter);
- ret = kstrtol(sub_str, 0, &value);
- if (ret) {
- count = -EINVAL;
- goto fail;
- }
- switch (loop) {
- case 0:
- /* input unit MHz convert to dpm table unit 10KHz*/
- request->min_sclk = (uint32_t)value * 100;
- break;
- case 1:
- /* input unit MHz convert to dpm table unit 10KHz*/
- request->min_mclk = (uint32_t)value * 100;
- break;
- case 2:
- request->activity_threshold = (uint16_t)value;
- break;
- case 3:
- request->up_hyst = (uint8_t)value;
- break;
- case 4:
- request->down_hyst = (uint8_t)value;
- break;
- default:
- break;
+ if (profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
+ if (count < 2 || count > 127)
+ return -EINVAL;
+ while (isspace(*++buf))
+ i++;
+ memcpy(buf_cpy, buf, count-i);
+ tmp_str = buf_cpy;
+ while (tmp_str[0]) {
+ sub_str = strsep(&tmp_str, delimiter);
+ ret = kstrtol(sub_str, 0, &parameter[parameter_size]);
+ if (ret) {
+ count = -EINVAL;
+ goto fail;
+ }
+ parameter_size++;
+ while (isspace(*tmp_str))
+ tmp_str++;
}
-
- loop++;
}
- if (adev->powerplay.pp_funcs->set_power_profile_state)
- ret = amdgpu_dpm_set_power_profile_state(adev, request);
-
- if (ret)
- count = -EINVAL;
+ parameter[parameter_size] = profile_mode;
+ if (adev->powerplay.pp_funcs->set_power_profile_mode)
+ ret = amdgpu_dpm_set_power_profile_mode(adev, parameter, parameter_size);
+ if (!ret)
+ return count;
fail:
- return count;
-}
-
-static ssize_t amdgpu_set_pp_gfx_power_profile(struct device *dev,
- struct device_attribute *attr,
- const char *buf,
- size_t count)
-{
- struct amd_pp_profile request = {0};
-
- request.type = AMD_PP_GFX_PROFILE;
-
- return amdgpu_set_pp_power_profile(dev, buf, count, &request);
-}
-
-static ssize_t amdgpu_set_pp_compute_power_profile(struct device *dev,
- struct device_attribute *attr,
- const char *buf,
- size_t count)
-{
- struct amd_pp_profile request = {0};
-
- request.type = AMD_PP_COMPUTE_PROFILE;
-
- return amdgpu_set_pp_power_profile(dev, buf, count, &request);
+ return -EINVAL;
}
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
@@ -766,12 +761,12 @@ static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR,
static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR,
amdgpu_get_pp_mclk_od,
amdgpu_set_pp_mclk_od);
-static DEVICE_ATTR(pp_gfx_power_profile, S_IRUGO | S_IWUSR,
- amdgpu_get_pp_gfx_power_profile,
- amdgpu_set_pp_gfx_power_profile);
-static DEVICE_ATTR(pp_compute_power_profile, S_IRUGO | S_IWUSR,
- amdgpu_get_pp_compute_power_profile,
- amdgpu_set_pp_compute_power_profile);
+static DEVICE_ATTR(pp_power_profile_mode, S_IRUGO | S_IWUSR,
+ amdgpu_get_pp_power_profile_mode,
+ amdgpu_set_pp_power_profile_mode);
+static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
+ amdgpu_get_pp_od_clk_voltage,
+ amdgpu_set_pp_od_clk_voltage);
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
struct device_attribute *attr,
@@ -779,17 +774,23 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
struct drm_device *ddev = adev->ddev;
- int temp;
+ int r, temp, size = sizeof(temp);
/* Can't get temperature when the card is off */
if ((adev->flags & AMD_IS_PX) &&
(ddev->switch_power_state != DRM_SWITCH_POWER_ON))
return -EINVAL;
- if (!adev->powerplay.pp_funcs->get_temperature)
- temp = 0;
- else
- temp = amdgpu_dpm_get_temperature(adev);
+ /* sanity check PP is enabled */
+ if (!(adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->read_sensor))
+ return -EINVAL;
+
+ /* get the temperature */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
+ (void *)&temp, &size);
+ if (r)
+ return r;
return snprintf(buf, PAGE_SIZE, "%d\n", temp);
}
@@ -834,6 +835,11 @@ static ssize_t amdgpu_hwmon_set_pwm1_enable(struct device *dev,
int err;
int value;
+ /* Can't adjust fan when the card is off */
+ if ((adev->flags & AMD_IS_PX) &&
+ (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ return -EINVAL;
+
if (!adev->powerplay.pp_funcs->set_fan_control_mode)
return -EINVAL;
@@ -868,6 +874,11 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev,
int err;
u32 value;
+ /* Can't adjust fan when the card is off */
+ if ((adev->flags & AMD_IS_PX) &&
+ (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ return -EINVAL;
+
err = kstrtou32(buf, 10, &value);
if (err)
return err;
@@ -891,6 +902,11 @@ static ssize_t amdgpu_hwmon_get_pwm1(struct device *dev,
int err;
u32 speed = 0;
+ /* Can't adjust fan when the card is off */
+ if ((adev->flags & AMD_IS_PX) &&
+ (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ return -EINVAL;
+
if (adev->powerplay.pp_funcs->get_fan_speed_percent) {
err = amdgpu_dpm_get_fan_speed_percent(adev, &speed);
if (err)
@@ -910,6 +926,11 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
int err;
u32 speed = 0;
+ /* Can't adjust fan when the card is off */
+ if ((adev->flags & AMD_IS_PX) &&
+ (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ return -EINVAL;
+
if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
err = amdgpu_dpm_get_fan_speed_rpm(adev, &speed);
if (err)
@@ -919,6 +940,175 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev,
return sprintf(buf, "%i\n", speed);
}
+static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ struct drm_device *ddev = adev->ddev;
+ u32 vddgfx;
+ int r, size = sizeof(vddgfx);
+
+ /* Can't get voltage when the card is off */
+ if ((adev->flags & AMD_IS_PX) &&
+ (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ return -EINVAL;
+
+ /* sanity check PP is enabled */
+ if (!(adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->read_sensor))
+ return -EINVAL;
+
+ /* get the voltage */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX,
+ (void *)&vddgfx, &size);
+ if (r)
+ return r;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", vddgfx);
+}
+
+static ssize_t amdgpu_hwmon_show_vddgfx_label(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "vddgfx\n");
+}
+
+static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ struct drm_device *ddev = adev->ddev;
+ u32 vddnb;
+ int r, size = sizeof(vddnb);
+
+ /* only APUs have vddnb */
+ if (adev->flags & AMD_IS_APU)
+ return -EINVAL;
+
+ /* Can't get voltage when the card is off */
+ if ((adev->flags & AMD_IS_PX) &&
+ (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ return -EINVAL;
+
+ /* sanity check PP is enabled */
+ if (!(adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->read_sensor))
+ return -EINVAL;
+
+ /* get the voltage */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB,
+ (void *)&vddnb, &size);
+ if (r)
+ return r;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", vddnb);
+}
+
+static ssize_t amdgpu_hwmon_show_vddnb_label(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, PAGE_SIZE, "vddnb\n");
+}
+
+static ssize_t amdgpu_hwmon_show_power_avg(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ struct drm_device *ddev = adev->ddev;
+ struct pp_gpu_power query = {0};
+ int r, size = sizeof(query);
+ unsigned uw;
+
+ /* Can't get power when the card is off */
+ if ((adev->flags & AMD_IS_PX) &&
+ (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ return -EINVAL;
+
+ /* sanity check PP is enabled */
+ if (!(adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->read_sensor))
+ return -EINVAL;
+
+ /* get the voltage */
+ r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_POWER,
+ (void *)&query, &size);
+ if (r)
+ return r;
+
+ /* convert to microwatts */
+ uw = (query.average_gpu_power >> 8) * 1000000;
+
+ return snprintf(buf, PAGE_SIZE, "%u\n", uw);
+}
+
+static ssize_t amdgpu_hwmon_show_power_cap_min(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%i\n", 0);
+}
+
+static ssize_t amdgpu_hwmon_show_power_cap_max(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ uint32_t limit = 0;
+
+ if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
+ adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, true);
+ return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+ } else {
+ return snprintf(buf, PAGE_SIZE, "\n");
+ }
+}
+
+static ssize_t amdgpu_hwmon_show_power_cap(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ uint32_t limit = 0;
+
+ if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->get_power_limit) {
+ adev->powerplay.pp_funcs->get_power_limit(adev->powerplay.pp_handle, &limit, false);
+ return snprintf(buf, PAGE_SIZE, "%u\n", limit * 1000000);
+ } else {
+ return snprintf(buf, PAGE_SIZE, "\n");
+ }
+}
+
+
+static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct amdgpu_device *adev = dev_get_drvdata(dev);
+ int err;
+ u32 value;
+
+ err = kstrtou32(buf, 10, &value);
+ if (err)
+ return err;
+
+ value = value / 1000000; /* convert to Watt */
+ if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_power_limit) {
+ err = adev->powerplay.pp_funcs->set_power_limit(adev->powerplay.pp_handle, value);
+ if (err)
+ return err;
+ } else {
+ return -EINVAL;
+ }
+
+ return count;
+}
+
static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, amdgpu_hwmon_show_temp, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 0);
static SENSOR_DEVICE_ATTR(temp1_crit_hyst, S_IRUGO, amdgpu_hwmon_show_temp_thresh, NULL, 1);
@@ -927,6 +1117,14 @@ static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_
static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0);
static SENSOR_DEVICE_ATTR(pwm1_max, S_IRUGO, amdgpu_hwmon_get_pwm1_max, NULL, 0);
static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, amdgpu_hwmon_get_fan1_input, NULL, 0);
+static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, amdgpu_hwmon_show_vddgfx, NULL, 0);
+static SENSOR_DEVICE_ATTR(in0_label, S_IRUGO, amdgpu_hwmon_show_vddgfx_label, NULL, 0);
+static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, amdgpu_hwmon_show_vddnb, NULL, 0);
+static SENSOR_DEVICE_ATTR(in1_label, S_IRUGO, amdgpu_hwmon_show_vddnb_label, NULL, 0);
+static SENSOR_DEVICE_ATTR(power1_average, S_IRUGO, amdgpu_hwmon_show_power_avg, NULL, 0);
+static SENSOR_DEVICE_ATTR(power1_cap_max, S_IRUGO, amdgpu_hwmon_show_power_cap_max, NULL, 0);
+static SENSOR_DEVICE_ATTR(power1_cap_min, S_IRUGO, amdgpu_hwmon_show_power_cap_min, NULL, 0);
+static SENSOR_DEVICE_ATTR(power1_cap, S_IRUGO | S_IWUSR, amdgpu_hwmon_show_power_cap, amdgpu_hwmon_set_power_cap, 0);
static struct attribute *hwmon_attributes[] = {
&sensor_dev_attr_temp1_input.dev_attr.attr,
@@ -937,6 +1135,14 @@ static struct attribute *hwmon_attributes[] = {
&sensor_dev_attr_pwm1_min.dev_attr.attr,
&sensor_dev_attr_pwm1_max.dev_attr.attr,
&sensor_dev_attr_fan1_input.dev_attr.attr,
+ &sensor_dev_attr_in0_input.dev_attr.attr,
+ &sensor_dev_attr_in0_label.dev_attr.attr,
+ &sensor_dev_attr_in1_input.dev_attr.attr,
+ &sensor_dev_attr_in1_label.dev_attr.attr,
+ &sensor_dev_attr_power1_average.dev_attr.attr,
+ &sensor_dev_attr_power1_cap_max.dev_attr.attr,
+ &sensor_dev_attr_power1_cap_min.dev_attr.attr,
+ &sensor_dev_attr_power1_cap.dev_attr.attr,
NULL
};
@@ -947,9 +1153,19 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
struct amdgpu_device *adev = dev_get_drvdata(dev);
umode_t effective_mode = attr->mode;
- /* no skipping for powerplay */
- if (adev->powerplay.cgs_device)
- return effective_mode;
+ /* handle non-powerplay limitations */
+ if (!adev->powerplay.pp_handle) {
+ /* Skip fan attributes if fan is not present */
+ if (adev->pm.no_fan &&
+ (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
+ attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
+ return 0;
+ /* requires powerplay */
+ if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr)
+ return 0;
+ }
/* Skip limit attributes if DPM is not enabled */
if (!adev->pm.dpm_enabled &&
@@ -961,14 +1177,6 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
return 0;
- /* Skip fan attributes if fan is not present */
- if (adev->pm.no_fan &&
- (attr == &sensor_dev_attr_pwm1.dev_attr.attr ||
- attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr ||
- attr == &sensor_dev_attr_pwm1_max.dev_attr.attr ||
- attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
- return 0;
-
/* mask fan attributes if we have no bindings for this asic to expose */
if ((!adev->powerplay.pp_funcs->get_fan_speed_percent &&
attr == &sensor_dev_attr_pwm1.dev_attr.attr) || /* can't query fan */
@@ -982,6 +1190,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr)) /* can't manage state */
effective_mode &= ~S_IWUSR;
+ if ((adev->flags & AMD_IS_APU) &&
+ (attr == &sensor_dev_attr_power1_cap_max.dev_attr.attr ||
+ attr == &sensor_dev_attr_power1_cap_min.dev_attr.attr||
+ attr == &sensor_dev_attr_power1_cap.dev_attr.attr))
+ return 0;
+
/* hide max/min values if we can't both query and manage the fan */
if ((!adev->powerplay.pp_funcs->set_fan_speed_percent &&
!adev->powerplay.pp_funcs->get_fan_speed_percent) &&
@@ -989,8 +1203,10 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
attr == &sensor_dev_attr_pwm1_min.dev_attr.attr))
return 0;
- /* requires powerplay */
- if (attr == &sensor_dev_attr_fan1_input.dev_attr.attr)
+ /* only APUs have vddnb */
+ if (!(adev->flags & AMD_IS_APU) &&
+ (attr == &sensor_dev_attr_in1_input.dev_attr.attr ||
+ attr == &sensor_dev_attr_in1_label.dev_attr.attr))
return 0;
return effective_mode;
@@ -1013,13 +1229,15 @@ void amdgpu_dpm_thermal_work_handler(struct work_struct *work)
pm.dpm.thermal.work);
/* switch to the thermal state */
enum amd_pm_state_type dpm_state = POWER_STATE_TYPE_INTERNAL_THERMAL;
+ int temp, size = sizeof(temp);
if (!adev->pm.dpm_enabled)
return;
- if (adev->powerplay.pp_funcs->get_temperature) {
- int temp = amdgpu_dpm_get_temperature(adev);
-
+ if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->read_sensor &&
+ !amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_TEMP,
+ (void *)&temp, &size)) {
if (temp < adev->pm.dpm.thermal.min_temp)
/* switch back the user state */
dpm_state = adev->pm.dpm.user_state;
@@ -1319,9 +1537,6 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
if (adev->pm.dpm_enabled == 0)
return 0;
- if (adev->powerplay.pp_funcs->get_temperature == NULL)
- return 0;
-
adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev,
DRIVER_NAME, adev,
hwmon_groups);
@@ -1391,20 +1606,19 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
return ret;
}
ret = device_create_file(adev->dev,
- &dev_attr_pp_gfx_power_profile);
+ &dev_attr_pp_power_profile_mode);
if (ret) {
DRM_ERROR("failed to create device file "
- "pp_gfx_power_profile\n");
+ "pp_power_profile_mode\n");
return ret;
}
ret = device_create_file(adev->dev,
- &dev_attr_pp_compute_power_profile);
+ &dev_attr_pp_od_clk_voltage);
if (ret) {
DRM_ERROR("failed to create device file "
- "pp_compute_power_profile\n");
+ "pp_od_clk_voltage\n");
return ret;
}
-
ret = amdgpu_debugfs_pm_init(adev);
if (ret) {
DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -1437,9 +1651,9 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
device_remove_file(adev->dev,
- &dev_attr_pp_gfx_power_profile);
+ &dev_attr_pp_power_profile_mode);
device_remove_file(adev->dev,
- &dev_attr_pp_compute_power_profile);
+ &dev_attr_pp_od_clk_voltage);
}
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
@@ -1462,7 +1676,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
}
if (adev->powerplay.pp_funcs->dispatch_tasks) {
- amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL, NULL);
+ amdgpu_dpm_dispatch_task(adev, AMD_PP_TASK_DISPLAY_CONFIG_CHANGE, NULL);
} else {
mutex_lock(&adev->pm.mutex);
adev->pm.dpm.new_active_crtcs = 0;
@@ -1512,6 +1726,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a
seq_printf(m, "\t%u MHz (MCLK)\n", value/100);
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK, (void *)&value, &size))
seq_printf(m, "\t%u MHz (SCLK)\n", value/100);
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK, (void *)&value, &size))
+ seq_printf(m, "\t%u MHz (PSTATE_SCLK)\n", value/100);
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK, (void *)&value, &size))
+ seq_printf(m, "\t%u MHz (PSTATE_MCLK)\n", value/100);
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDGFX, (void *)&value, &size))
seq_printf(m, "\t%u mV (VDDGFX)\n", value);
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_VDDNB, (void *)&value, &size))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
deleted file mode 100644
index 5f5aa5fddc16..000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ /dev/null
@@ -1,290 +0,0 @@
-/*
- * Copyright 2015 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: AMD
- *
- */
-#include "atom.h"
-#include "amdgpu.h"
-#include "amd_shared.h"
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include "amdgpu_pm.h"
-#include <drm/amdgpu_drm.h>
-#include "amdgpu_powerplay.h"
-#include "si_dpm.h"
-#include "cik_dpm.h"
-#include "vi_dpm.h"
-
-static int amdgpu_pp_early_init(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct amd_powerplay *amd_pp;
- int ret = 0;
-
- amd_pp = &(adev->powerplay);
- amd_pp->pp_handle = (void *)adev;
-
- switch (adev->asic_type) {
- case CHIP_POLARIS11:
- case CHIP_POLARIS10:
- case CHIP_POLARIS12:
- case CHIP_TONGA:
- case CHIP_FIJI:
- case CHIP_TOPAZ:
- case CHIP_CARRIZO:
- case CHIP_STONEY:
- case CHIP_VEGA10:
- case CHIP_RAVEN:
- amd_pp->cgs_device = amdgpu_cgs_create_device(adev);
- amd_pp->ip_funcs = &pp_ip_funcs;
- amd_pp->pp_funcs = &pp_dpm_funcs;
- break;
- /* These chips don't have powerplay implemenations */
-#ifdef CONFIG_DRM_AMDGPU_SI
- case CHIP_TAHITI:
- case CHIP_PITCAIRN:
- case CHIP_VERDE:
- case CHIP_OLAND:
- case CHIP_HAINAN:
- amd_pp->ip_funcs = &si_dpm_ip_funcs;
- amd_pp->pp_funcs = &si_dpm_funcs;
- break;
-#endif
-#ifdef CONFIG_DRM_AMDGPU_CIK
- case CHIP_BONAIRE:
- case CHIP_HAWAII:
- if (amdgpu_dpm == -1) {
- amd_pp->ip_funcs = &ci_dpm_ip_funcs;
- amd_pp->pp_funcs = &ci_dpm_funcs;
- } else {
- amd_pp->cgs_device = amdgpu_cgs_create_device(adev);
- amd_pp->ip_funcs = &pp_ip_funcs;
- amd_pp->pp_funcs = &pp_dpm_funcs;
- }
- break;
- case CHIP_KABINI:
- case CHIP_MULLINS:
- case CHIP_KAVERI:
- amd_pp->ip_funcs = &kv_dpm_ip_funcs;
- amd_pp->pp_funcs = &kv_dpm_funcs;
- break;
-#endif
- default:
- ret = -EINVAL;
- break;
- }
-
- if (adev->powerplay.ip_funcs->early_init)
- ret = adev->powerplay.ip_funcs->early_init(
- amd_pp->cgs_device ? amd_pp->cgs_device :
- amd_pp->pp_handle);
-
- return ret;
-}
-
-
-static int amdgpu_pp_late_init(void *handle)
-{
- int ret = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (adev->powerplay.ip_funcs->late_init)
- ret = adev->powerplay.ip_funcs->late_init(
- adev->powerplay.pp_handle);
-
- return ret;
-}
-
-static int amdgpu_pp_sw_init(void *handle)
-{
- int ret = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (adev->powerplay.ip_funcs->sw_init)
- ret = adev->powerplay.ip_funcs->sw_init(
- adev->powerplay.pp_handle);
-
- return ret;
-}
-
-static int amdgpu_pp_sw_fini(void *handle)
-{
- int ret = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (adev->powerplay.ip_funcs->sw_fini)
- ret = adev->powerplay.ip_funcs->sw_fini(
- adev->powerplay.pp_handle);
- if (ret)
- return ret;
-
- return ret;
-}
-
-static int amdgpu_pp_hw_init(void *handle)
-{
- int ret = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
- amdgpu_ucode_init_bo(adev);
-
- if (adev->powerplay.ip_funcs->hw_init)
- ret = adev->powerplay.ip_funcs->hw_init(
- adev->powerplay.pp_handle);
-
- return ret;
-}
-
-static int amdgpu_pp_hw_fini(void *handle)
-{
- int ret = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (adev->powerplay.ip_funcs->hw_fini)
- ret = adev->powerplay.ip_funcs->hw_fini(
- adev->powerplay.pp_handle);
-
- if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU)
- amdgpu_ucode_fini_bo(adev);
-
- return ret;
-}
-
-static void amdgpu_pp_late_fini(void *handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (adev->powerplay.ip_funcs->late_fini)
- adev->powerplay.ip_funcs->late_fini(
- adev->powerplay.pp_handle);
-
- if (adev->powerplay.cgs_device)
- amdgpu_cgs_destroy_device(adev->powerplay.cgs_device);
-}
-
-static int amdgpu_pp_suspend(void *handle)
-{
- int ret = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (adev->powerplay.ip_funcs->suspend)
- ret = adev->powerplay.ip_funcs->suspend(
- adev->powerplay.pp_handle);
- return ret;
-}
-
-static int amdgpu_pp_resume(void *handle)
-{
- int ret = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (adev->powerplay.ip_funcs->resume)
- ret = adev->powerplay.ip_funcs->resume(
- adev->powerplay.pp_handle);
- return ret;
-}
-
-static int amdgpu_pp_set_clockgating_state(void *handle,
- enum amd_clockgating_state state)
-{
- int ret = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (adev->powerplay.ip_funcs->set_clockgating_state)
- ret = adev->powerplay.ip_funcs->set_clockgating_state(
- adev->powerplay.pp_handle, state);
- return ret;
-}
-
-static int amdgpu_pp_set_powergating_state(void *handle,
- enum amd_powergating_state state)
-{
- int ret = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (adev->powerplay.ip_funcs->set_powergating_state)
- ret = adev->powerplay.ip_funcs->set_powergating_state(
- adev->powerplay.pp_handle, state);
- return ret;
-}
-
-
-static bool amdgpu_pp_is_idle(void *handle)
-{
- bool ret = true;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (adev->powerplay.ip_funcs->is_idle)
- ret = adev->powerplay.ip_funcs->is_idle(
- adev->powerplay.pp_handle);
- return ret;
-}
-
-static int amdgpu_pp_wait_for_idle(void *handle)
-{
- int ret = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (adev->powerplay.ip_funcs->wait_for_idle)
- ret = adev->powerplay.ip_funcs->wait_for_idle(
- adev->powerplay.pp_handle);
- return ret;
-}
-
-static int amdgpu_pp_soft_reset(void *handle)
-{
- int ret = 0;
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (adev->powerplay.ip_funcs->soft_reset)
- ret = adev->powerplay.ip_funcs->soft_reset(
- adev->powerplay.pp_handle);
- return ret;
-}
-
-static const struct amd_ip_funcs amdgpu_pp_ip_funcs = {
- .name = "amdgpu_powerplay",
- .early_init = amdgpu_pp_early_init,
- .late_init = amdgpu_pp_late_init,
- .sw_init = amdgpu_pp_sw_init,
- .sw_fini = amdgpu_pp_sw_fini,
- .hw_init = amdgpu_pp_hw_init,
- .hw_fini = amdgpu_pp_hw_fini,
- .late_fini = amdgpu_pp_late_fini,
- .suspend = amdgpu_pp_suspend,
- .resume = amdgpu_pp_resume,
- .is_idle = amdgpu_pp_is_idle,
- .wait_for_idle = amdgpu_pp_wait_for_idle,
- .soft_reset = amdgpu_pp_soft_reset,
- .set_clockgating_state = amdgpu_pp_set_clockgating_state,
- .set_powergating_state = amdgpu_pp_set_powergating_state,
-};
-
-const struct amdgpu_ip_block_version amdgpu_pp_ip_block =
-{
- .type = AMD_IP_BLOCK_TYPE_SMC,
- .major = 1,
- .minor = 0,
- .rev = 0,
- .funcs = &amdgpu_pp_ip_funcs,
-};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index ae9c106979d7..4b584cb75bf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -26,9 +26,12 @@
#include <drm/drmP.h>
#include "amdgpu.h"
+#include "amdgpu_display.h"
#include <drm/amdgpu_drm.h>
#include <linux/dma-buf.h>
+static const struct dma_buf_ops amdgpu_dmabuf_ops;
+
struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@@ -102,59 +105,95 @@ amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
int ret;
ww_mutex_lock(&resv->lock, NULL);
- ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE, false,
- AMDGPU_GEM_DOMAIN_GTT, 0, sg, resv, 0, &bo);
- ww_mutex_unlock(&resv->lock);
+ ret = amdgpu_bo_create(adev, attach->dmabuf->size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_CPU, 0, ttm_bo_type_sg,
+ resv, &bo);
if (ret)
- return ERR_PTR(ret);
+ goto error;
- bo->prime_shared_count = 1;
+ bo->tbo.sg = sg;
+ bo->tbo.ttm->sg = sg;
+ bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+ bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
+ if (attach->dmabuf->ops != &amdgpu_dmabuf_ops)
+ bo->prime_shared_count = 1;
+
+ ww_mutex_unlock(&resv->lock);
return &bo->gem_base;
+
+error:
+ ww_mutex_unlock(&resv->lock);
+ return ERR_PTR(ret);
}
-int amdgpu_gem_prime_pin(struct drm_gem_object *obj)
+static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
+ struct device *target_dev,
+ struct dma_buf_attachment *attach)
{
+ struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
- long ret = 0;
-
- ret = amdgpu_bo_reserve(bo, false);
- if (unlikely(ret != 0))
- return ret;
-
- /*
- * Wait for all shared fences to complete before we switch to future
- * use of exclusive fence on this prime shared bo.
- */
- ret = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
- MAX_SCHEDULE_TIMEOUT);
- if (unlikely(ret < 0)) {
- DRM_DEBUG_PRIME("Fence wait failed: %li\n", ret);
- amdgpu_bo_unreserve(bo);
- return ret;
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ long r;
+
+ r = drm_gem_map_attach(dma_buf, target_dev, attach);
+ if (r)
+ return r;
+
+ r = amdgpu_bo_reserve(bo, false);
+ if (unlikely(r != 0))
+ goto error_detach;
+
+
+ if (attach->dev->driver != adev->dev->driver) {
+ /*
+ * Wait for all shared fences to complete before we switch to future
+ * use of exclusive fence on this prime shared bo.
+ */
+ r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
+ true, false,
+ MAX_SCHEDULE_TIMEOUT);
+ if (unlikely(r < 0)) {
+ DRM_DEBUG_PRIME("Fence wait failed: %li\n", r);
+ goto error_unreserve;
+ }
}
/* pin buffer into GTT */
- ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
- if (likely(ret == 0))
+ r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
+ if (r)
+ goto error_unreserve;
+
+ if (attach->dev->driver != adev->dev->driver)
bo->prime_shared_count++;
+error_unreserve:
amdgpu_bo_unreserve(bo);
- return ret;
+
+error_detach:
+ if (r)
+ drm_gem_map_detach(dma_buf, attach);
+ return r;
}
-void amdgpu_gem_prime_unpin(struct drm_gem_object *obj)
+static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
+ struct dma_buf_attachment *attach)
{
+ struct drm_gem_object *obj = dma_buf->priv;
struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
int ret = 0;
ret = amdgpu_bo_reserve(bo, true);
if (unlikely(ret != 0))
- return;
+ goto error;
amdgpu_bo_unpin(bo);
- if (bo->prime_shared_count)
+ if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
bo->prime_shared_count--;
amdgpu_bo_unreserve(bo);
+
+error:
+ drm_gem_map_detach(dma_buf, attach);
}
struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
@@ -164,6 +203,50 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
return bo->tbo.resv;
}
+static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
+ enum dma_data_direction direction)
+{
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ struct ttm_operation_ctx ctx = { true, false };
+ u32 domain = amdgpu_display_framebuffer_domains(adev);
+ int ret;
+ bool reads = (direction == DMA_BIDIRECTIONAL ||
+ direction == DMA_FROM_DEVICE);
+
+ if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT))
+ return 0;
+
+ /* move to gtt */
+ ret = amdgpu_bo_reserve(bo, false);
+ if (unlikely(ret != 0))
+ return ret;
+
+ if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
+ amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ }
+
+ amdgpu_bo_unreserve(bo);
+ return ret;
+}
+
+static const struct dma_buf_ops amdgpu_dmabuf_ops = {
+ .attach = amdgpu_gem_map_attach,
+ .detach = amdgpu_gem_map_detach,
+ .map_dma_buf = drm_gem_map_dma_buf,
+ .unmap_dma_buf = drm_gem_unmap_dma_buf,
+ .release = drm_gem_dmabuf_release,
+ .begin_cpu_access = amdgpu_gem_begin_cpu_access,
+ .map = drm_gem_dmabuf_kmap,
+ .map_atomic = drm_gem_dmabuf_kmap_atomic,
+ .unmap = drm_gem_dmabuf_kunmap,
+ .unmap_atomic = drm_gem_dmabuf_kunmap_atomic,
+ .mmap = drm_gem_dmabuf_mmap,
+ .vmap = drm_gem_dmabuf_vmap,
+ .vunmap = drm_gem_dmabuf_vunmap,
+};
+
struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
struct drm_gem_object *gobj,
int flags)
@@ -176,7 +259,30 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
return ERR_PTR(-EPERM);
buf = drm_gem_prime_export(dev, gobj, flags);
- if (!IS_ERR(buf))
+ if (!IS_ERR(buf)) {
buf->file->f_mapping = dev->anon_inode->i_mapping;
+ buf->ops = &amdgpu_dmabuf_ops;
+ }
+
return buf;
}
+
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+ struct dma_buf *dma_buf)
+{
+ struct drm_gem_object *obj;
+
+ if (dma_buf->ops == &amdgpu_dmabuf_ops) {
+ obj = dma_buf->priv;
+ if (obj->dev == dev) {
+ /*
+ * Importing dmabuf exported from out own gem increases
+ * refcount on gem itself instead of f_count of dmabuf.
+ */
+ drm_gem_object_get(obj);
+ return obj;
+ }
+ }
+
+ return drm_gem_prime_import(dev, dma_buf);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 2157d4509e84..19e71f4a8ac2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -51,29 +51,11 @@ static int psp_sw_init(void *handle)
switch (adev->asic_type) {
case CHIP_VEGA10:
- psp->init_microcode = psp_v3_1_init_microcode;
- psp->bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv;
- psp->bootloader_load_sos = psp_v3_1_bootloader_load_sos;
- psp->prep_cmd_buf = psp_v3_1_prep_cmd_buf;
- psp->ring_init = psp_v3_1_ring_init;
- psp->ring_create = psp_v3_1_ring_create;
- psp->ring_stop = psp_v3_1_ring_stop;
- psp->ring_destroy = psp_v3_1_ring_destroy;
- psp->cmd_submit = psp_v3_1_cmd_submit;
- psp->compare_sram_data = psp_v3_1_compare_sram_data;
- psp->smu_reload_quirk = psp_v3_1_smu_reload_quirk;
- psp->mode1_reset = psp_v3_1_mode1_reset;
+ case CHIP_VEGA12:
+ psp_v3_1_set_psp_funcs(psp);
break;
case CHIP_RAVEN:
- psp->init_microcode = psp_v10_0_init_microcode;
- psp->prep_cmd_buf = psp_v10_0_prep_cmd_buf;
- psp->ring_init = psp_v10_0_ring_init;
- psp->ring_create = psp_v10_0_ring_create;
- psp->ring_stop = psp_v10_0_ring_stop;
- psp->ring_destroy = psp_v10_0_ring_destroy;
- psp->cmd_submit = psp_v10_0_cmd_submit;
- psp->compare_sram_data = psp_v10_0_compare_sram_data;
- psp->mode1_reset = psp_v10_0_mode1_reset;
+ psp_v10_0_set_psp_funcs(psp);
break;
default:
return -EINVAL;
@@ -81,6 +63,9 @@ static int psp_sw_init(void *handle)
psp->adev = adev;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ return 0;
+
ret = psp_init_microcode(psp);
if (ret) {
DRM_ERROR("Failed to load psp firmware!\n");
@@ -94,6 +79,9 @@ static int psp_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ return 0;
+
release_firmware(adev->psp.sos_fw);
adev->psp.sos_fw = NULL;
release_firmware(adev->psp.asd_fw);
@@ -472,6 +460,9 @@ static int psp_suspend(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct psp_context *psp = &adev->psp;
+ if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+ return 0;
+
ret = psp_ring_stop(psp, PSP_RING_TYPE__KM);
if (ret) {
DRM_ERROR("PSP ring stop failed\n");
@@ -512,19 +503,8 @@ failed:
return ret;
}
-static bool psp_check_reset(void* handle)
+int psp_gpu_reset(struct amdgpu_device *adev)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
- if (adev->flags & AMD_IS_APU)
- return true;
-
- return false;
-}
-
-static int psp_reset(void* handle)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
return psp_mode1_reset(&adev->psp);
}
@@ -571,9 +551,9 @@ const struct amd_ip_funcs psp_ip_funcs = {
.suspend = psp_suspend,
.resume = psp_resume,
.is_idle = NULL,
- .check_soft_reset = psp_check_reset,
+ .check_soft_reset = NULL,
.wait_for_idle = NULL,
- .soft_reset = psp_reset,
+ .soft_reset = NULL,
.set_clockgating_state = psp_set_clockgating_state,
.set_powergating_state = psp_set_powergating_state,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index ce4654550416..129209686848 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -33,6 +33,8 @@
#define PSP_ASD_SHARED_MEM_SIZE 0x4000
#define PSP_1_MEG 0x100000
+struct psp_context;
+
enum psp_ring_type
{
PSP_RING_TYPE__INVALID = 0,
@@ -53,12 +55,8 @@ struct psp_ring
uint32_t ring_size;
};
-struct psp_context
+struct psp_funcs
{
- struct amdgpu_device *adev;
- struct psp_ring km_ring;
- struct psp_gfx_cmd_resp *cmd;
-
int (*init_microcode)(struct psp_context *psp);
int (*bootloader_load_sysdrv)(struct psp_context *psp);
int (*bootloader_load_sos)(struct psp_context *psp);
@@ -77,6 +75,15 @@ struct psp_context
enum AMDGPU_UCODE_ID ucode_type);
bool (*smu_reload_quirk)(struct psp_context *psp);
int (*mode1_reset)(struct psp_context *psp);
+};
+
+struct psp_context
+{
+ struct amdgpu_device *adev;
+ struct psp_ring km_ring;
+ struct psp_gfx_cmd_resp *cmd;
+
+ const struct psp_funcs *funcs;
/* fence buffer */
struct amdgpu_bo *fw_pri_bo;
@@ -123,25 +130,25 @@ struct amdgpu_psp_funcs {
enum AMDGPU_UCODE_ID);
};
-#define psp_prep_cmd_buf(ucode, type) (psp)->prep_cmd_buf((ucode), (type))
-#define psp_ring_init(psp, type) (psp)->ring_init((psp), (type))
-#define psp_ring_create(psp, type) (psp)->ring_create((psp), (type))
-#define psp_ring_stop(psp, type) (psp)->ring_stop((psp), (type))
-#define psp_ring_destroy(psp, type) ((psp)->ring_destroy((psp), (type)))
+#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type))
+#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
+#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
+#define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))
+#define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type)))
#define psp_cmd_submit(psp, ucode, cmd_mc, fence_mc, index) \
- (psp)->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index))
+ (psp)->funcs->cmd_submit((psp), (ucode), (cmd_mc), (fence_mc), (index))
#define psp_compare_sram_data(psp, ucode, type) \
- (psp)->compare_sram_data((psp), (ucode), (type))
+ (psp)->funcs->compare_sram_data((psp), (ucode), (type))
#define psp_init_microcode(psp) \
- ((psp)->init_microcode ? (psp)->init_microcode((psp)) : 0)
+ ((psp)->funcs->init_microcode ? (psp)->funcs->init_microcode((psp)) : 0)
#define psp_bootloader_load_sysdrv(psp) \
- ((psp)->bootloader_load_sysdrv ? (psp)->bootloader_load_sysdrv((psp)) : 0)
+ ((psp)->funcs->bootloader_load_sysdrv ? (psp)->funcs->bootloader_load_sysdrv((psp)) : 0)
#define psp_bootloader_load_sos(psp) \
- ((psp)->bootloader_load_sos ? (psp)->bootloader_load_sos((psp)) : 0)
+ ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0)
#define psp_smu_reload_quirk(psp) \
- ((psp)->smu_reload_quirk ? (psp)->smu_reload_quirk((psp)) : false)
+ ((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
#define psp_mode1_reset(psp) \
- ((psp)->mode1_reset ? (psp)->mode1_reset((psp)) : false)
+ ((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
extern const struct amd_ip_funcs psp_ip_funcs;
@@ -151,4 +158,6 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index,
extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
+int psp_gpu_reset(struct amdgpu_device *adev);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 13044e66dcaf..d5f526f38e50 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -360,6 +360,9 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
amdgpu_debugfs_ring_fini(ring);
+ dma_fence_put(ring->vmid_wait);
+ ring->vmid_wait = NULL;
+
ring->adev->rings[ring->idx] = NULL;
}
@@ -481,7 +484,7 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
result = 0;
if (*pos < 12) {
- early[0] = amdgpu_ring_get_rptr(ring);
+ early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask;
early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask;
early[2] = ring->wptr & ring->buf_mask;
for (i = *pos / 4; i < 3 && size; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 102dad3edf6a..1a5911882657 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -26,6 +26,7 @@
#include <drm/amdgpu_drm.h>
#include <drm/gpu_scheduler.h>
+#include <drm/drm_print.h>
/* max number of rings */
#define AMDGPU_MAX_RINGS 18
@@ -35,8 +36,9 @@
#define AMDGPU_MAX_UVD_ENC_RINGS 2
/* some special values for the owner field */
-#define AMDGPU_FENCE_OWNER_UNDEFINED ((void*)0ul)
-#define AMDGPU_FENCE_OWNER_VM ((void*)1ul)
+#define AMDGPU_FENCE_OWNER_UNDEFINED ((void *)0ul)
+#define AMDGPU_FENCE_OWNER_VM ((void *)1ul)
+#define AMDGPU_FENCE_OWNER_KFD ((void *)2ul)
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
@@ -128,7 +130,6 @@ struct amdgpu_ring_funcs {
void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vmid,
uint64_t pd_addr);
void (*emit_hdp_flush)(struct amdgpu_ring *ring);
- void (*emit_hdp_invalidate)(struct amdgpu_ring *ring);
void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
uint32_t gds_base, uint32_t gds_size,
uint32_t gws_base, uint32_t gws_size,
@@ -151,6 +152,8 @@ struct amdgpu_ring_funcs {
void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg);
void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
+ void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask);
void (*emit_tmz)(struct amdgpu_ring *ring, bool start);
/* priority functions */
void (*set_priority) (struct amdgpu_ring *ring,
@@ -195,6 +198,7 @@ struct amdgpu_ring {
u64 cond_exe_gpu_addr;
volatile u32 *cond_exe_cpu_addr;
unsigned vm_inv_eng;
+ struct dma_fence *vmid_wait;
bool has_compute_vm_bug;
atomic_t num_jobs[DRM_SCHED_PRIORITY_MAX];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 3144400435b7..fb1667b35daa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -63,21 +63,27 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
INIT_LIST_HEAD(&sa_manager->flist[i]);
- r = amdgpu_bo_create(adev, size, align, true, domain,
- 0, NULL, NULL, 0, &sa_manager->bo);
+ r = amdgpu_bo_create_kernel(adev, size, align, domain, &sa_manager->bo,
+ &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
if (r) {
dev_err(adev->dev, "(%d) failed to allocate bo for manager\n", r);
return r;
}
+ memset(sa_manager->cpu_ptr, 0, sa_manager->size);
return r;
}
void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
- struct amdgpu_sa_manager *sa_manager)
+ struct amdgpu_sa_manager *sa_manager)
{
struct amdgpu_sa_bo *sa_bo, *tmp;
+ if (sa_manager->bo == NULL) {
+ dev_err(adev->dev, "no bo for sa manager\n");
+ return;
+ }
+
if (!list_empty(&sa_manager->olist)) {
sa_manager->hole = &sa_manager->olist,
amdgpu_sa_bo_try_free(sa_manager);
@@ -88,55 +94,9 @@ void amdgpu_sa_bo_manager_fini(struct amdgpu_device *adev,
list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) {
amdgpu_sa_bo_remove_locked(sa_bo);
}
- amdgpu_bo_unref(&sa_manager->bo);
- sa_manager->size = 0;
-}
-
-int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
- struct amdgpu_sa_manager *sa_manager)
-{
- int r;
-
- if (sa_manager->bo == NULL) {
- dev_err(adev->dev, "no bo for sa manager\n");
- return -EINVAL;
- }
- /* map the buffer */
- r = amdgpu_bo_reserve(sa_manager->bo, false);
- if (r) {
- dev_err(adev->dev, "(%d) failed to reserve manager bo\n", r);
- return r;
- }
- r = amdgpu_bo_pin(sa_manager->bo, sa_manager->domain, &sa_manager->gpu_addr);
- if (r) {
- amdgpu_bo_unreserve(sa_manager->bo);
- dev_err(adev->dev, "(%d) failed to pin manager bo\n", r);
- return r;
- }
- r = amdgpu_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr);
- memset(sa_manager->cpu_ptr, 0, sa_manager->size);
- amdgpu_bo_unreserve(sa_manager->bo);
- return r;
-}
-
-int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
- struct amdgpu_sa_manager *sa_manager)
-{
- int r;
-
- if (sa_manager->bo == NULL) {
- dev_err(adev->dev, "no bo for sa manager\n");
- return -EINVAL;
- }
-
- r = amdgpu_bo_reserve(sa_manager->bo, true);
- if (!r) {
- amdgpu_bo_kunmap(sa_manager->bo);
- amdgpu_bo_unpin(sa_manager->bo);
- amdgpu_bo_unreserve(sa_manager->bo);
- }
- return r;
+ amdgpu_bo_free_kernel(&sa_manager->bo, &sa_manager->gpu_addr, &sa_manager->cpu_ptr);
+ sa_manager->size = 0;
}
static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index df65c66dc956..2d6f5ec77a68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -31,6 +31,7 @@
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
struct amdgpu_sync_entry {
struct hlist_node node;
@@ -85,11 +86,20 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
*/
static void *amdgpu_sync_get_owner(struct dma_fence *f)
{
- struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
+ struct drm_sched_fence *s_fence;
+ struct amdgpu_amdkfd_fence *kfd_fence;
+
+ if (!f)
+ return AMDGPU_FENCE_OWNER_UNDEFINED;
+ s_fence = to_drm_sched_fence(f);
if (s_fence)
return s_fence->owner;
+ kfd_fence = to_amdgpu_amdkfd_fence(f);
+ if (kfd_fence)
+ return AMDGPU_FENCE_OWNER_KFD;
+
return AMDGPU_FENCE_OWNER_UNDEFINED;
}
@@ -204,11 +214,18 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
for (i = 0; i < flist->shared_count; ++i) {
f = rcu_dereference_protected(flist->shared[i],
reservation_object_held(resv));
+ /* We only want to trigger KFD eviction fences on
+ * evict or move jobs. Skip KFD fences otherwise.
+ */
+ fence_owner = amdgpu_sync_get_owner(f);
+ if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
+ owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+ continue;
+
if (amdgpu_sync_same_dev(adev, f)) {
/* VM updates are only interesting
* for other VM updates and moves.
*/
- fence_owner = amdgpu_sync_get_owner(f);
if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
(fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
((owner == AMDGPU_FENCE_OWNER_VM) !=
@@ -305,6 +322,41 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit
return NULL;
}
+/**
+ * amdgpu_sync_clone - clone a sync object
+ *
+ * @source: sync object to clone
+ * @clone: pointer to destination sync object
+ *
+ * Adds references to all unsignaled fences in @source to @clone. Also
+ * removes signaled fences from @source while at it.
+ */
+int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
+{
+ struct amdgpu_sync_entry *e;
+ struct hlist_node *tmp;
+ struct dma_fence *f;
+ int i, r;
+
+ hash_for_each_safe(source->fences, i, tmp, e, node) {
+ f = e->fence;
+ if (!dma_fence_is_signaled(f)) {
+ r = amdgpu_sync_fence(NULL, clone, f, e->explicit);
+ if (r)
+ return r;
+ } else {
+ hash_del(&e->node);
+ dma_fence_put(f);
+ kmem_cache_free(amdgpu_sync_slab, e);
+ }
+ }
+
+ dma_fence_put(clone->last_vm_update);
+ clone->last_vm_update = dma_fence_get(source->last_vm_update);
+
+ return 0;
+}
+
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
{
struct amdgpu_sync_entry *e;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index 7aba38d5c9df..10cf23a57f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -50,6 +50,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
struct amdgpu_ring *ring);
struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit);
+int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
void amdgpu_sync_free(struct amdgpu_sync *sync);
int amdgpu_sync_init(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index ed8c3739015b..2dbe87591f81 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -42,7 +42,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
/* Number of tests =
* (Total GTT - IB pool - writeback page - ring buffers) / test size
*/
- n = adev->mc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024;
+ n = adev->gmc.gart_size - AMDGPU_IB_POOL_SIZE*64*1024;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
if (adev->rings[i])
n -= adev->rings[i]->ring_size;
@@ -59,9 +59,8 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
goto out_cleanup;
}
- r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM, 0,
- NULL, NULL, 0, &vram_obj);
+ r = amdgpu_bo_create(adev, size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, 0,
+ ttm_bo_type_kernel, NULL, &vram_obj);
if (r) {
DRM_ERROR("Failed to create VRAM object\n");
goto out_cleanup;
@@ -80,9 +79,9 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
void **vram_start, **vram_end;
struct dma_fence *fence = NULL;
- r = amdgpu_bo_create(adev, size, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
- NULL, 0, gtt_obj + i);
+ r = amdgpu_bo_create(adev, size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_GTT, 0,
+ ttm_bo_type_kernel, NULL, gtt_obj + i);
if (r) {
DRM_ERROR("Failed to create GTT object %d\n", i);
goto out_lclean;
@@ -142,10 +141,10 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
"0x%16llx/0x%16llx)\n",
i, *vram_start, gart_start,
(unsigned long long)
- (gart_addr - adev->mc.gart_start +
+ (gart_addr - adev->gmc.gart_start +
(void*)gart_start - gtt_map),
(unsigned long long)
- (vram_addr - adev->mc.vram_start +
+ (vram_addr - adev->gmc.vram_start +
(void*)gart_start - gtt_map));
amdgpu_bo_kunmap(vram_obj);
goto out_lclean_unpin;
@@ -187,10 +186,10 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
"0x%16llx/0x%16llx)\n",
i, *gart_start, vram_start,
(unsigned long long)
- (vram_addr - adev->mc.vram_start +
+ (vram_addr - adev->gmc.vram_start +
(void*)vram_start - vram_map),
(unsigned long long)
- (gart_addr - adev->mc.gart_start +
+ (gart_addr - adev->gmc.gart_start +
(void*)vram_start - vram_map));
amdgpu_bo_kunmap(gtt_obj[i]);
goto out_lclean_unpin;
@@ -200,7 +199,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
amdgpu_bo_kunmap(gtt_obj[i]);
DRM_INFO("Tested GTT->VRAM and VRAM->GTT copy for GTT offset 0x%llx\n",
- gart_addr - adev->mc.gart_start);
+ gart_addr - adev->gmc.gart_start);
continue;
out_lclean_unpin:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index cace7a93fc94..532263ab6e16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -86,7 +86,7 @@ TRACE_EVENT(amdgpu_iv,
__field(unsigned, vmid_src)
__field(uint64_t, timestamp)
__field(unsigned, timestamp_src)
- __field(unsigned, pas_id)
+ __field(unsigned, pasid)
__array(unsigned, src_data, 4)
),
TP_fast_assign(
@@ -97,16 +97,16 @@ TRACE_EVENT(amdgpu_iv,
__entry->vmid_src = iv->vmid_src;
__entry->timestamp = iv->timestamp;
__entry->timestamp_src = iv->timestamp_src;
- __entry->pas_id = iv->pas_id;
+ __entry->pasid = iv->pasid;
__entry->src_data[0] = iv->src_data[0];
__entry->src_data[1] = iv->src_data[1];
__entry->src_data[2] = iv->src_data[2];
__entry->src_data[3] = iv->src_data[3];
),
- TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pas_id:%u src_data: %08x %08x %08x %08x\n",
+ TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x\n",
__entry->client_id, __entry->src_id,
__entry->ring_id, __entry->vmid,
- __entry->timestamp, __entry->pas_id,
+ __entry->timestamp, __entry->pasid,
__entry->src_data[0], __entry->src_data[1],
__entry->src_data[2], __entry->src_data[3])
);
@@ -217,7 +217,7 @@ TRACE_EVENT(amdgpu_vm_grab_id,
struct amdgpu_job *job),
TP_ARGS(vm, ring, job),
TP_STRUCT__entry(
- __field(struct amdgpu_vm *, vm)
+ __field(u32, pasid)
__field(u32, ring)
__field(u32, vmid)
__field(u32, vm_hub)
@@ -226,15 +226,15 @@ TRACE_EVENT(amdgpu_vm_grab_id,
),
TP_fast_assign(
- __entry->vm = vm;
+ __entry->pasid = vm->pasid;
__entry->ring = ring->idx;
__entry->vmid = job->vmid;
__entry->vm_hub = ring->funcs->vmhub,
__entry->pd_addr = job->vm_pd_addr;
__entry->needs_flush = job->vm_needs_flush;
),
- TP_printk("vm=%p, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
- __entry->vm, __entry->ring, __entry->vmid,
+ TP_printk("pasid=%d, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u",
+ __entry->pasid, __entry->ring, __entry->vmid,
__entry->vm_hub, __entry->pd_addr, __entry->needs_flush)
);
@@ -378,6 +378,28 @@ TRACE_EVENT(amdgpu_vm_flush,
__entry->vm_hub,__entry->pd_addr)
);
+DECLARE_EVENT_CLASS(amdgpu_pasid,
+ TP_PROTO(unsigned pasid),
+ TP_ARGS(pasid),
+ TP_STRUCT__entry(
+ __field(unsigned, pasid)
+ ),
+ TP_fast_assign(
+ __entry->pasid = pasid;
+ ),
+ TP_printk("pasid=%u", __entry->pasid)
+);
+
+DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_allocated,
+ TP_PROTO(unsigned pasid),
+ TP_ARGS(pasid)
+);
+
+DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_freed,
+ TP_PROTO(unsigned pasid),
+ TP_ARGS(pasid)
+);
+
TRACE_EVENT(amdgpu_bo_list_set,
TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo),
TP_ARGS(list, bo),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e4bb435e614b..205da3ff9cd0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -46,6 +46,7 @@
#include "amdgpu.h"
#include "amdgpu_object.h"
#include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
#include "bif/bif_4_1_d.h"
#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
@@ -161,7 +162,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
break;
case TTM_PL_TT:
man->func = &amdgpu_gtt_mgr_func;
- man->gpu_offset = adev->mc.gart_start;
+ man->gpu_offset = adev->gmc.gart_start;
man->available_caching = TTM_PL_MASK_CACHING;
man->default_caching = TTM_PL_FLAG_CACHED;
man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA;
@@ -169,7 +170,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
case TTM_PL_VRAM:
/* "On-card" video ram */
man->func = &amdgpu_vram_mgr_func;
- man->gpu_offset = adev->mc.vram_start;
+ man->gpu_offset = adev->gmc.vram_start;
man->flags = TTM_MEMTYPE_FLAG_FIXED |
TTM_MEMTYPE_FLAG_MAPPABLE;
man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
@@ -203,6 +204,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
};
+ if (bo->type == ttm_bo_type_sg) {
+ placement->num_placement = 0;
+ placement->num_busy_placement = 0;
+ return;
+ }
+
if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
placement->placement = &placements;
placement->busy_placement = &placements;
@@ -213,13 +220,11 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
abo = ttm_to_amdgpu_bo(bo);
switch (bo->mem.mem_type) {
case TTM_PL_VRAM:
- if (adev->mman.buffer_funcs &&
- adev->mman.buffer_funcs_ring &&
- adev->mman.buffer_funcs_ring->ready == false) {
+ if (!adev->mman.buffer_funcs_enabled) {
amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
- } else if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+ } else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
!(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
- unsigned fpfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
+ unsigned fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
struct drm_mm_node *node = bo->mem.mm_node;
unsigned long pages_left;
@@ -260,6 +265,13 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
{
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+ /*
+ * Don't verify access for KFD BOs. They don't have a GEM
+ * object associated with them.
+ */
+ if (abo->kfd_bo)
+ return 0;
+
if (amdgpu_ttm_tt_get_usermm(bo->ttm))
return -EPERM;
return drm_vma_node_verify_access(&abo->gem_base.vma_node,
@@ -331,7 +343,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
AMDGPU_GPU_PAGE_SIZE);
- if (!ring->ready) {
+ if (!adev->mman.buffer_funcs_enabled) {
DRM_ERROR("Trying to move memory with ring turned off.\n");
return -EINVAL;
}
@@ -577,12 +589,9 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
amdgpu_move_null(bo, new_mem);
return 0;
}
- if (adev->mman.buffer_funcs == NULL ||
- adev->mman.buffer_funcs_ring == NULL ||
- !adev->mman.buffer_funcs_ring->ready) {
- /* use memcpy */
+
+ if (!adev->mman.buffer_funcs_enabled)
goto memcpy;
- }
if (old_mem->mem_type == TTM_PL_VRAM &&
new_mem->mem_type == TTM_PL_SYSTEM) {
@@ -621,6 +630,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
{
struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
struct amdgpu_device *adev = amdgpu_ttm_adev(bdev);
+ struct drm_mm_node *mm_node = mem->mm_node;
mem->bus.addr = NULL;
mem->bus.offset = 0;
@@ -638,9 +648,18 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
case TTM_PL_VRAM:
mem->bus.offset = mem->start << PAGE_SHIFT;
/* check if it's visible */
- if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size)
+ if ((mem->bus.offset + mem->bus.size) > adev->gmc.visible_vram_size)
return -EINVAL;
- mem->bus.base = adev->mc.aper_base;
+ /* Only physically contiguous buffers apply. In a contiguous
+ * buffer, size of the first mm_node would match the number of
+ * pages in ttm_mem_reg.
+ */
+ if (adev->mman.aper_base_kaddr &&
+ (mm_node->size == mem->num_pages))
+ mem->bus.addr = (u8 *)adev->mman.aper_base_kaddr +
+ mem->bus.offset;
+
+ mem->bus.base = adev->gmc.aper_base;
mem->bus.is_iomem = true;
break;
default:
@@ -674,7 +693,6 @@ struct amdgpu_ttm_gup_task_list {
struct amdgpu_ttm_tt {
struct ttm_dma_tt ttm;
- struct amdgpu_device *adev;
u64 offset;
uint64_t userptr;
struct mm_struct *usermm;
@@ -832,6 +850,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
struct ttm_mem_reg *bo_mem)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void*)ttm;
uint64_t flags;
int r = 0;
@@ -858,9 +877,9 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm,
return 0;
}
- flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
+ flags = amdgpu_ttm_tt_pte_flags(adev, ttm, bo_mem);
gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
- r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
+ r = amdgpu_gart_bind(adev, gtt->offset, ttm->num_pages,
ttm->pages, gtt->ttm.dma_address, flags);
if (r)
@@ -891,7 +910,7 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
placement.num_busy_placement = 1;
placement.busy_placement = &placements;
placements.fpfn = 0;
- placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT;
+ placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT;
placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
TTM_PL_FLAG_TT;
@@ -937,6 +956,7 @@ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
int r;
@@ -947,7 +967,7 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
return 0;
/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
- r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
+ r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
if (r)
DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
gtt->ttm.ttm.num_pages, gtt->offset);
@@ -968,22 +988,20 @@ static struct ttm_backend_func amdgpu_backend_func = {
.destroy = &amdgpu_ttm_backend_destroy,
};
-static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_bo_device *bdev,
- unsigned long size, uint32_t page_flags,
- struct page *dummy_read_page)
+static struct ttm_tt *amdgpu_ttm_tt_create(struct ttm_buffer_object *bo,
+ uint32_t page_flags)
{
struct amdgpu_device *adev;
struct amdgpu_ttm_tt *gtt;
- adev = amdgpu_ttm_adev(bdev);
+ adev = amdgpu_ttm_adev(bo->bdev);
gtt = kzalloc(sizeof(struct amdgpu_ttm_tt), GFP_KERNEL);
if (gtt == NULL) {
return NULL;
}
gtt->ttm.ttm.func = &amdgpu_backend_func;
- gtt->adev = adev;
- if (ttm_dma_tt_init(&gtt->ttm, bdev, size, page_flags, dummy_read_page)) {
+ if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags)) {
kfree(gtt);
return NULL;
}
@@ -997,9 +1015,6 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
struct amdgpu_ttm_tt *gtt = (void *)ttm;
bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
- if (ttm->state != tt_unpopulated)
- return 0;
-
if (gtt && gtt->userptr) {
ttm->sg = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
if (!ttm->sg)
@@ -1012,13 +1027,14 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm,
if (slave && ttm->sg) {
drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
- gtt->ttm.dma_address, ttm->num_pages);
+ gtt->ttm.dma_address,
+ ttm->num_pages);
ttm->state = tt_unbound;
return 0;
}
#ifdef CONFIG_SWIOTLB
- if (swiotlb_nr_tbl()) {
+ if (adev->need_swiotlb && swiotlb_nr_tbl()) {
return ttm_dma_populate(&gtt->ttm, adev->dev, ctx);
}
#endif
@@ -1045,7 +1061,7 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
adev = amdgpu_ttm_adev(ttm->bdev);
#ifdef CONFIG_SWIOTLB
- if (swiotlb_nr_tbl()) {
+ if (adev->need_swiotlb && swiotlb_nr_tbl()) {
ttm_dma_unpopulate(&gtt->ttm, adev->dev);
return;
}
@@ -1170,6 +1186,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
{
unsigned long num_pages = bo->mem.num_pages;
struct drm_mm_node *node = bo->mem.mm_node;
+ struct reservation_object_list *flist;
+ struct dma_fence *f;
+ int i;
+
+ /* If bo is a KFD BO, check if the bo belongs to the current process.
+ * If true, then return false as any KFD process needs all its BOs to
+ * be resident to run successfully
+ */
+ flist = reservation_object_get_list(bo->resv);
+ if (flist) {
+ for (i = 0; i < flist->shared_count; ++i) {
+ f = rcu_dereference_protected(flist->shared[i],
+ reservation_object_held(bo->resv));
+ if (amdkfd_fence_check_mm(f, current->mm))
+ return false;
+ }
+ }
switch (bo->mem.mem_type) {
case TTM_PL_TT:
@@ -1212,7 +1245,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
pos = (nodes->start << PAGE_SHIFT) + offset;
- while (len && pos < adev->mc.mc_vram_size) {
+ while (len && pos < adev->gmc.mc_vram_size) {
uint64_t aligned_pos = pos & ~(uint64_t)3;
uint32_t bytes = 4 - (pos & 3);
uint32_t shift = (pos & 3) * 8;
@@ -1298,7 +1331,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
struct ttm_operation_ctx ctx = { false, false };
int r = 0;
int i;
- u64 vram_size = adev->mc.visible_vram_size;
+ u64 vram_size = adev->gmc.visible_vram_size;
u64 offset = adev->fw_vram_usage.start_offset;
u64 size = adev->fw_vram_usage.size;
struct amdgpu_bo *bo;
@@ -1309,11 +1342,12 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
if (adev->fw_vram_usage.size > 0 &&
adev->fw_vram_usage.size <= vram_size) {
- r = amdgpu_bo_create(adev, adev->fw_vram_usage.size,
- PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0,
- &adev->fw_vram_usage.reserved_bo);
+ r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
+ AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
+ ttm_bo_type_kernel, NULL,
+ &adev->fw_vram_usage.reserved_bo);
if (r)
goto error_create;
@@ -1387,8 +1421,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return r;
}
adev->mman.initialized = true;
+
+ /* We opt to avoid OOM on system pages allocations */
+ adev->mman.bdev.no_retry = true;
+
r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM,
- adev->mc.real_vram_size >> PAGE_SHIFT);
+ adev->gmc.real_vram_size >> PAGE_SHIFT);
if (r) {
DRM_ERROR("Failed initializing VRAM heap.\n");
return r;
@@ -1397,11 +1435,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* Reduce size of CPU-visible VRAM if requested */
vis_vram_limit = (u64)amdgpu_vis_vram_limit * 1024 * 1024;
if (amdgpu_vis_vram_limit > 0 &&
- vis_vram_limit <= adev->mc.visible_vram_size)
- adev->mc.visible_vram_size = vis_vram_limit;
+ vis_vram_limit <= adev->gmc.visible_vram_size)
+ adev->gmc.visible_vram_size = vis_vram_limit;
/* Change the size here instead of the init above so only lpfn is affected */
- amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
+#ifdef CONFIG_64BIT
+ adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
+ adev->gmc.visible_vram_size);
+#endif
/*
*The reserved vram for firmware must be pinned to the specified
@@ -1412,21 +1454,21 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return r;
}
- r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE,
+ r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->stolen_vga_memory,
NULL, NULL);
if (r)
return r;
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
- (unsigned) (adev->mc.real_vram_size / (1024 * 1024)));
+ (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
if (amdgpu_gtt_size == -1) {
struct sysinfo si;
si_meminfo(&si);
gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
- adev->mc.mc_vram_size),
+ adev->gmc.mc_vram_size),
((uint64_t)si.totalram * si.mem_unit * 3/4));
}
else
@@ -1494,6 +1536,9 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
amdgpu_ttm_debugfs_fini(adev);
amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
amdgpu_ttm_fw_reserve_vram_fini(adev);
+ if (adev->mman.aper_base_kaddr)
+ iounmap(adev->mman.aper_base_kaddr);
+ adev->mman.aper_base_kaddr = NULL;
ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM);
ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT);
@@ -1509,18 +1554,30 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
DRM_INFO("amdgpu: ttm finalized\n");
}
-/* this should only be called at bootup or when userspace
- * isn't running */
-void amdgpu_ttm_set_active_vram_size(struct amdgpu_device *adev, u64 size)
+/**
+ * amdgpu_ttm_set_buffer_funcs_status - enable/disable use of buffer functions
+ *
+ * @adev: amdgpu_device pointer
+ * @enable: true when we can use buffer functions.
+ *
+ * Enable/disable use of buffer functions during suspend/resume. This should
+ * only be called at bootup or when userspace isn't running.
+ */
+void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
{
- struct ttm_mem_type_manager *man;
+ struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM];
+ uint64_t size;
- if (!adev->mman.initialized)
+ if (!adev->mman.initialized || adev->in_gpu_reset)
return;
- man = &adev->mman.bdev.man[TTM_PL_VRAM];
/* this just adjusts TTM size idea, which sets lpfn to the correct value */
+ if (enable)
+ size = adev->gmc.real_vram_size;
+ else
+ size = adev->gmc.visible_vram_size;
man->size = size >> PAGE_SHIFT;
+ adev->mman.buffer_funcs_enabled = enable;
}
int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma)
@@ -1559,7 +1616,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
BUG_ON(adev->mman.buffer_funcs->copy_max_bytes <
AMDGPU_GTT_MAX_TRANSFER_SIZE * 8);
- *addr = adev->mc.gart_start;
+ *addr = adev->gmc.gart_start;
*addr += (u64)window * AMDGPU_GTT_MAX_TRANSFER_SIZE *
AMDGPU_GPU_PAGE_SIZE;
@@ -1619,6 +1676,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
unsigned i;
int r;
+ if (direct_submit && !ring->ready) {
+ DRM_ERROR("Trying to move memory with ring turned off.\n");
+ return -EINVAL;
+ }
+
max_bytes = adev->mman.buffer_funcs->copy_max_bytes;
num_loops = DIV_ROUND_UP(byte_count, max_bytes);
num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw;
@@ -1677,13 +1739,12 @@ error_free:
}
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
- uint64_t src_data,
+ uint32_t src_data,
struct reservation_object *resv,
struct dma_fence **fence)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- uint32_t max_bytes = 8 *
- adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde;
+ uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct drm_mm_node *mm_node;
@@ -1693,7 +1754,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
struct amdgpu_job *job;
int r;
- if (!ring->ready) {
+ if (!adev->mman.buffer_funcs_enabled) {
DRM_ERROR("Trying to clear memory with ring turned off.\n");
return -EINVAL;
}
@@ -1714,9 +1775,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
num_pages -= mm_node->size;
++mm_node;
}
-
- /* num of dwords for each SDMA_OP_PTEPDE cmd */
- num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
+ num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
/* for IB padding */
num_dw += 64;
@@ -1741,16 +1800,12 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t byte_count = mm_node->size << PAGE_SHIFT;
uint64_t dst_addr;
- WARN_ONCE(byte_count & 0x7, "size should be a multiple of 8");
-
dst_addr = amdgpu_mm_node_addr(&bo->tbo, mm_node, &bo->tbo.mem);
while (byte_count) {
uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
- amdgpu_vm_set_pte_pde(adev, &job->ibs[0],
- dst_addr, 0,
- cur_size_in_bytes >> 3, 0,
- src_data);
+ amdgpu_emit_fill_buffer(adev, &job->ibs[0], src_data,
+ dst_addr, cur_size_in_bytes);
dst_addr += cur_size_in_bytes;
byte_count -= cur_size_in_bytes;
@@ -1811,14 +1866,14 @@ static ssize_t amdgpu_ttm_vram_read(struct file *f, char __user *buf,
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
- if (*pos >= adev->mc.mc_vram_size)
+ if (*pos >= adev->gmc.mc_vram_size)
return -ENXIO;
while (size) {
unsigned long flags;
uint32_t value;
- if (*pos >= adev->mc.mc_vram_size)
+ if (*pos >= adev->gmc.mc_vram_size)
return result;
spin_lock_irqsave(&adev->mmio_idx_lock, flags);
@@ -1850,14 +1905,14 @@ static ssize_t amdgpu_ttm_vram_write(struct file *f, const char __user *buf,
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
- if (*pos >= adev->mc.mc_vram_size)
+ if (*pos >= adev->gmc.mc_vram_size)
return -ENXIO;
while (size) {
unsigned long flags;
uint32_t value;
- if (*pos >= adev->mc.mc_vram_size)
+ if (*pos >= adev->gmc.mc_vram_size)
return result;
r = get_user(value, (uint32_t *)buf);
@@ -1935,38 +1990,98 @@ static const struct file_operations amdgpu_ttm_gtt_fops = {
#endif
-static ssize_t amdgpu_iova_to_phys_read(struct file *f, char __user *buf,
- size_t size, loff_t *pos)
+static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
+ size_t size, loff_t *pos)
{
struct amdgpu_device *adev = file_inode(f)->i_private;
- int r;
- uint64_t phys;
struct iommu_domain *dom;
+ ssize_t result = 0;
+ int r;
- // always return 8 bytes
- if (size != 8)
- return -EINVAL;
+ dom = iommu_get_domain_for_dev(adev->dev);
- // only accept page addresses
- if (*pos & 0xFFF)
- return -EINVAL;
+ while (size) {
+ phys_addr_t addr = *pos & PAGE_MASK;
+ loff_t off = *pos & ~PAGE_MASK;
+ size_t bytes = PAGE_SIZE - off;
+ unsigned long pfn;
+ struct page *p;
+ void *ptr;
+
+ bytes = bytes < size ? bytes : size;
+
+ addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
+
+ pfn = addr >> PAGE_SHIFT;
+ if (!pfn_valid(pfn))
+ return -EPERM;
+
+ p = pfn_to_page(pfn);
+ if (p->mapping != adev->mman.bdev.dev_mapping)
+ return -EPERM;
+
+ ptr = kmap(p);
+ r = copy_to_user(buf, ptr + off, bytes);
+ kunmap(p);
+ if (r)
+ return -EFAULT;
+
+ size -= bytes;
+ *pos += bytes;
+ result += bytes;
+ }
+
+ return result;
+}
+
+static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
+ size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = file_inode(f)->i_private;
+ struct iommu_domain *dom;
+ ssize_t result = 0;
+ int r;
dom = iommu_get_domain_for_dev(adev->dev);
- if (dom)
- phys = iommu_iova_to_phys(dom, *pos);
- else
- phys = *pos;
- r = copy_to_user(buf, &phys, 8);
- if (r)
- return -EFAULT;
+ while (size) {
+ phys_addr_t addr = *pos & PAGE_MASK;
+ loff_t off = *pos & ~PAGE_MASK;
+ size_t bytes = PAGE_SIZE - off;
+ unsigned long pfn;
+ struct page *p;
+ void *ptr;
+
+ bytes = bytes < size ? bytes : size;
+
+ addr = dom ? iommu_iova_to_phys(dom, addr) : addr;
- return 8;
+ pfn = addr >> PAGE_SHIFT;
+ if (!pfn_valid(pfn))
+ return -EPERM;
+
+ p = pfn_to_page(pfn);
+ if (p->mapping != adev->mman.bdev.dev_mapping)
+ return -EPERM;
+
+ ptr = kmap(p);
+ r = copy_from_user(ptr + off, buf, bytes);
+ kunmap(p);
+ if (r)
+ return -EFAULT;
+
+ size -= bytes;
+ *pos += bytes;
+ result += bytes;
+ }
+
+ return result;
}
-static const struct file_operations amdgpu_ttm_iova_fops = {
+static const struct file_operations amdgpu_ttm_iomem_fops = {
.owner = THIS_MODULE,
- .read = amdgpu_iova_to_phys_read,
+ .read = amdgpu_iomem_read,
+ .write = amdgpu_iomem_write,
.llseek = default_llseek
};
@@ -1979,7 +2094,7 @@ static const struct {
#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
{ "amdgpu_gtt", &amdgpu_ttm_gtt_fops, TTM_PL_TT },
#endif
- { "amdgpu_iova", &amdgpu_ttm_iova_fops, TTM_PL_SYSTEM },
+ { "amdgpu_iomem", &amdgpu_ttm_iomem_fops, TTM_PL_SYSTEM },
};
#endif
@@ -2001,16 +2116,16 @@ static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev)
if (IS_ERR(ent))
return PTR_ERR(ent);
if (ttm_debugfs_entries[count].domain == TTM_PL_VRAM)
- i_size_write(ent->d_inode, adev->mc.mc_vram_size);
+ i_size_write(ent->d_inode, adev->gmc.mc_vram_size);
else if (ttm_debugfs_entries[count].domain == TTM_PL_TT)
- i_size_write(ent->d_inode, adev->mc.gart_size);
+ i_size_write(ent->d_inode, adev->gmc.gart_size);
adev->mman.debugfs_entries[count] = ent;
}
count = ARRAY_SIZE(amdgpu_ttm_debugfs_list);
#ifdef CONFIG_SWIOTLB
- if (!swiotlb_nr_tbl())
+ if (!(adev->need_swiotlb && swiotlb_nr_tbl()))
--count;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 167856f6080f..6ea7de863041 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -44,6 +44,7 @@ struct amdgpu_mman {
struct ttm_bo_device bdev;
bool mem_global_referenced;
bool initialized;
+ void __iomem *aper_base_kaddr;
#if defined(CONFIG_DEBUG_FS)
struct dentry *debugfs_entries[8];
@@ -52,6 +53,7 @@ struct amdgpu_mman {
/* buffer handling */
const struct amdgpu_buffer_funcs *buffer_funcs;
struct amdgpu_ring *buffer_funcs_ring;
+ bool buffer_funcs_enabled;
struct mutex gtt_window_lock;
/* Scheduler entity for buffer moves */
@@ -74,6 +76,11 @@ int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man);
uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
+int amdgpu_ttm_init(struct amdgpu_device *adev);
+void amdgpu_ttm_fini(struct amdgpu_device *adev);
+void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,
+ bool enable);
+
int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
uint64_t dst_offset, uint32_t byte_count,
struct reservation_object *resv,
@@ -86,7 +93,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
struct reservation_object *resv,
struct dma_fence **f);
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
- uint64_t src_data,
+ uint32_t src_data,
struct reservation_object *resv,
struct dma_fence **fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index 474f88fbafce..5916cc25e28b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -271,12 +271,13 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type)
return AMDGPU_FW_LOAD_SMU;
case CHIP_VEGA10:
case CHIP_RAVEN:
+ case CHIP_VEGA12:
if (!load_type)
return AMDGPU_FW_LOAD_DIRECT;
else
return AMDGPU_FW_LOAD_PSP;
default:
- DRM_ERROR("Unknow firmware load type\n");
+ DRM_ERROR("Unknown firmware load type\n");
}
return AMDGPU_FW_LOAD_DIRECT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index b2eae86bf906..627542b22ae4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -68,6 +68,7 @@
#define FIRMWARE_POLARIS12 "amdgpu/polaris12_uvd.bin"
#define FIRMWARE_VEGA10 "amdgpu/vega10_uvd.bin"
+#define FIRMWARE_VEGA12 "amdgpu/vega12_uvd.bin"
#define mmUVD_GPCOM_VCPU_DATA0_VEGA10 (0x03c4 + 0x7e00)
#define mmUVD_GPCOM_VCPU_DATA1_VEGA10 (0x03c5 + 0x7e00)
@@ -110,6 +111,7 @@ MODULE_FIRMWARE(FIRMWARE_POLARIS11);
MODULE_FIRMWARE(FIRMWARE_POLARIS12);
MODULE_FIRMWARE(FIRMWARE_VEGA10);
+MODULE_FIRMWARE(FIRMWARE_VEGA12);
static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
@@ -161,11 +163,14 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
case CHIP_POLARIS11:
fw_name = FIRMWARE_POLARIS11;
break;
+ case CHIP_POLARIS12:
+ fw_name = FIRMWARE_POLARIS12;
+ break;
case CHIP_VEGA10:
fw_name = FIRMWARE_VEGA10;
break;
- case CHIP_POLARIS12:
- fw_name = FIRMWARE_POLARIS12;
+ case CHIP_VEGA12:
+ fw_name = FIRMWARE_VEGA12;
break;
default:
return -EINVAL;
@@ -299,12 +304,15 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
cancel_delayed_work_sync(&adev->uvd.idle_work);
- for (i = 0; i < adev->uvd.max_handles; ++i)
- if (atomic_read(&adev->uvd.handles[i]))
- break;
+ /* only valid for physical mode */
+ if (adev->asic_type < CHIP_POLARIS10) {
+ for (i = 0; i < adev->uvd.max_handles; ++i)
+ if (atomic_read(&adev->uvd.handles[i]))
+ break;
- if (i == AMDGPU_MAX_UVD_HANDLES)
- return 0;
+ if (i == adev->uvd.max_handles)
+ return 0;
+ }
size = amdgpu_bo_size(adev->uvd.vcpu_bo);
ptr = adev->uvd.cpu_addr;
@@ -952,37 +960,28 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
bool direct, struct dma_fence **fence)
{
- struct ttm_operation_ctx ctx = { true, false };
- struct ttm_validate_buffer tv;
- struct ww_acquire_ctx ticket;
- struct list_head head;
+ struct amdgpu_device *adev = ring->adev;
+ struct dma_fence *f = NULL;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
- struct dma_fence *f = NULL;
- struct amdgpu_device *adev = ring->adev;
- uint64_t addr;
uint32_t data[4];
- int i, r;
-
- memset(&tv, 0, sizeof(tv));
- tv.bo = &bo->tbo;
-
- INIT_LIST_HEAD(&head);
- list_add(&tv.head, &head);
+ uint64_t addr;
+ long r;
+ int i;
- r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL);
- if (r)
- return r;
+ amdgpu_bo_kunmap(bo);
+ amdgpu_bo_unpin(bo);
if (!ring->adev->uvd.address_64_bit) {
+ struct ttm_operation_ctx ctx = { true, false };
+
amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
amdgpu_uvd_force_into_uvd_segment(bo);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r)
+ goto err;
}
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (r)
- goto err;
-
r = amdgpu_job_alloc_with_ib(adev, 64, &job);
if (r)
goto err;
@@ -1014,6 +1013,14 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
ib->length_dw = 16;
if (direct) {
+ r = reservation_object_wait_timeout_rcu(bo->tbo.resv,
+ true, false,
+ msecs_to_jiffies(10));
+ if (r == 0)
+ r = -ETIMEDOUT;
+ if (r < 0)
+ goto err_free;
+
r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
job->fence = dma_fence_get(f);
if (r)
@@ -1021,17 +1028,23 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
amdgpu_job_free(job);
} else {
+ r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
+ AMDGPU_FENCE_OWNER_UNDEFINED, false);
+ if (r)
+ goto err_free;
+
r = amdgpu_job_submit(job, ring, &adev->uvd.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &f);
if (r)
goto err_free;
}
- ttm_eu_fence_buffer_objects(&ticket, &head, f);
+ amdgpu_bo_fence(bo, f, false);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
if (fence)
*fence = dma_fence_get(f);
- amdgpu_bo_unref(&bo);
dma_fence_put(f);
return 0;
@@ -1040,7 +1053,8 @@ err_free:
amdgpu_job_free(job);
err:
- ttm_eu_backoff_reservation(&ticket, &head);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
return r;
}
@@ -1051,31 +1065,16 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct dma_fence **fence)
{
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_bo *bo;
+ struct amdgpu_bo *bo = NULL;
uint32_t *msg;
int r, i;
- r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
- NULL, NULL, 0, &bo);
+ r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL, (void **)&msg);
if (r)
return r;
- r = amdgpu_bo_reserve(bo, false);
- if (r) {
- amdgpu_bo_unref(&bo);
- return r;
- }
-
- r = amdgpu_bo_kmap(bo, (void **)&msg);
- if (r) {
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&bo);
- return r;
- }
-
/* stitch together an UVD create msg */
msg[0] = cpu_to_le32(0x00000de4);
msg[1] = cpu_to_le32(0x00000000);
@@ -1091,9 +1090,6 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
for (i = 11; i < 1024; ++i)
msg[i] = cpu_to_le32(0x0);
- amdgpu_bo_kunmap(bo);
- amdgpu_bo_unreserve(bo);
-
return amdgpu_uvd_send_msg(ring, bo, true, fence);
}
@@ -1101,31 +1097,16 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
bool direct, struct dma_fence **fence)
{
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_bo *bo;
+ struct amdgpu_bo *bo = NULL;
uint32_t *msg;
int r, i;
- r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
- NULL, NULL, 0, &bo);
+ r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL, (void **)&msg);
if (r)
return r;
- r = amdgpu_bo_reserve(bo, false);
- if (r) {
- amdgpu_bo_unref(&bo);
- return r;
- }
-
- r = amdgpu_bo_kmap(bo, (void **)&msg);
- if (r) {
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&bo);
- return r;
- }
-
/* stitch together an UVD destroy msg */
msg[0] = cpu_to_le32(0x00000de4);
msg[1] = cpu_to_le32(0x00000002);
@@ -1134,9 +1115,6 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
for (i = 4; i < 1024; ++i)
msg[i] = cpu_to_le32(0x0);
- amdgpu_bo_kunmap(bo);
- amdgpu_bo_unreserve(bo);
-
return amdgpu_uvd_send_msg(ring, bo, direct, fence);
}
@@ -1146,9 +1124,6 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
container_of(work, struct amdgpu_device, uvd.idle_work.work);
unsigned fences = amdgpu_fence_count_emitted(&adev->uvd.ring);
- if (amdgpu_sriov_vf(adev))
- return;
-
if (fences == 0) {
if (adev->pm.dpm_enabled) {
amdgpu_dpm_enable_uvd(adev, false);
@@ -1168,11 +1143,12 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- bool set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
+ bool set_clocks;
if (amdgpu_sriov_vf(adev))
return;
+ set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
if (set_clocks) {
if (adev->pm.dpm_enabled) {
amdgpu_dpm_enable_uvd(adev, true);
@@ -1188,7 +1164,8 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
{
- schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
+ if (!amdgpu_sriov_vf(ring->adev))
+ schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index d274ae535530..a33804bd3314 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -55,6 +55,7 @@
#define FIRMWARE_POLARIS12 "amdgpu/polaris12_vce.bin"
#define FIRMWARE_VEGA10 "amdgpu/vega10_vce.bin"
+#define FIRMWARE_VEGA12 "amdgpu/vega12_vce.bin"
#ifdef CONFIG_DRM_AMDGPU_CIK
MODULE_FIRMWARE(FIRMWARE_BONAIRE);
@@ -72,6 +73,7 @@ MODULE_FIRMWARE(FIRMWARE_POLARIS11);
MODULE_FIRMWARE(FIRMWARE_POLARIS12);
MODULE_FIRMWARE(FIRMWARE_VEGA10);
+MODULE_FIRMWARE(FIRMWARE_VEGA12);
static void amdgpu_vce_idle_work_handler(struct work_struct *work);
@@ -127,11 +129,14 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
case CHIP_POLARIS11:
fw_name = FIRMWARE_POLARIS11;
break;
+ case CHIP_POLARIS12:
+ fw_name = FIRMWARE_POLARIS12;
+ break;
case CHIP_VEGA10:
fw_name = FIRMWARE_VEGA10;
break;
- case CHIP_POLARIS12:
- fw_name = FIRMWARE_POLARIS12;
+ case CHIP_VEGA12:
+ fw_name = FIRMWARE_VEGA12;
break;
default:
@@ -300,9 +305,6 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work)
container_of(work, struct amdgpu_device, vce.idle_work.work);
unsigned i, count = 0;
- if (amdgpu_sriov_vf(adev))
- return;
-
for (i = 0; i < adev->vce.num_rings; i++)
count += amdgpu_fence_count_emitted(&adev->vce.ring[i]);
@@ -362,7 +364,8 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
*/
void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
{
- schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
+ if (!amdgpu_sriov_vf(ring->adev))
+ schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index 0fd378ae92c3..71781267ee4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -30,6 +30,8 @@
#define AMDGPU_VCE_HARVEST_VCE0 (1 << 0)
#define AMDGPU_VCE_HARVEST_VCE1 (1 << 1)
+#define AMDGPU_VCE_FW_53_45 ((53 << 24) | (45 << 16))
+
struct amdgpu_vce {
struct amdgpu_bo *vcpu_bo;
uint64_t gpu_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 837962118dbc..58e495330b38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -270,34 +270,17 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
return r;
}
-static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
- bool direct, struct dma_fence **fence)
+static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
+ struct amdgpu_bo *bo, bool direct,
+ struct dma_fence **fence)
{
- struct ttm_operation_ctx ctx = { true, false };
- struct ttm_validate_buffer tv;
- struct ww_acquire_ctx ticket;
- struct list_head head;
+ struct amdgpu_device *adev = ring->adev;
+ struct dma_fence *f = NULL;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
- struct dma_fence *f = NULL;
- struct amdgpu_device *adev = ring->adev;
uint64_t addr;
int i, r;
- memset(&tv, 0, sizeof(tv));
- tv.bo = &bo->tbo;
-
- INIT_LIST_HEAD(&head);
- list_add(&tv.head, &head);
-
- r = ttm_eu_reserve_buffers(&ticket, &head, true, NULL);
- if (r)
- return r;
-
- r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
- if (r)
- goto err;
-
r = amdgpu_job_alloc_with_ib(adev, 64, &job);
if (r)
goto err;
@@ -330,11 +313,12 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *b
goto err_free;
}
- ttm_eu_fence_buffer_objects(&ticket, &head, f);
+ amdgpu_bo_fence(bo, f, false);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
if (fence)
*fence = dma_fence_get(f);
- amdgpu_bo_unref(&bo);
dma_fence_put(f);
return 0;
@@ -343,7 +327,8 @@ err_free:
amdgpu_job_free(job);
err:
- ttm_eu_backoff_reservation(&ticket, &head);
+ amdgpu_bo_unreserve(bo);
+ amdgpu_bo_unref(&bo);
return r;
}
@@ -351,31 +336,16 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
struct dma_fence **fence)
{
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_bo *bo;
+ struct amdgpu_bo *bo = NULL;
uint32_t *msg;
int r, i;
- r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
- NULL, NULL, 0, &bo);
+ r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL, (void **)&msg);
if (r)
return r;
- r = amdgpu_bo_reserve(bo, false);
- if (r) {
- amdgpu_bo_unref(&bo);
- return r;
- }
-
- r = amdgpu_bo_kmap(bo, (void **)&msg);
- if (r) {
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&bo);
- return r;
- }
-
msg[0] = cpu_to_le32(0x00000028);
msg[1] = cpu_to_le32(0x00000038);
msg[2] = cpu_to_le32(0x00000001);
@@ -393,9 +363,6 @@ static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
for (i = 14; i < 1024; ++i)
msg[i] = cpu_to_le32(0x0);
- amdgpu_bo_kunmap(bo);
- amdgpu_bo_unreserve(bo);
-
return amdgpu_vcn_dec_send_msg(ring, bo, true, fence);
}
@@ -403,31 +370,16 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
bool direct, struct dma_fence **fence)
{
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_bo *bo;
+ struct amdgpu_bo *bo = NULL;
uint32_t *msg;
int r, i;
- r = amdgpu_bo_create(adev, 1024, PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM,
- AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
- AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
- NULL, NULL, 0, &bo);
+ r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &bo, NULL, (void **)&msg);
if (r)
return r;
- r = amdgpu_bo_reserve(bo, false);
- if (r) {
- amdgpu_bo_unref(&bo);
- return r;
- }
-
- r = amdgpu_bo_kmap(bo, (void **)&msg);
- if (r) {
- amdgpu_bo_unreserve(bo);
- amdgpu_bo_unref(&bo);
- return r;
- }
-
msg[0] = cpu_to_le32(0x00000028);
msg[1] = cpu_to_le32(0x00000018);
msg[2] = cpu_to_le32(0x00000000);
@@ -437,9 +389,6 @@ static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
for (i = 6; i < 1024; ++i)
msg[i] = cpu_to_le32(0x0);
- amdgpu_bo_kunmap(bo);
- amdgpu_bo_unreserve(bo);
-
return amdgpu_vcn_dec_send_msg(ring, bo, direct, fence);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index e7dfb7b44b4b..21adb1b6e5cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -22,7 +22,21 @@
*/
#include "amdgpu.h"
-#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */
+#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
+#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
+#define MAX_KIQ_REG_TRY 20
+
+uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
+{
+ uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT;
+
+ addr -= AMDGPU_VA_RESERVED_SIZE;
+
+ if (addr >= AMDGPU_VA_HOLE_START)
+ addr |= AMDGPU_VA_HOLE_END;
+
+ return addr;
+}
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
{
@@ -55,14 +69,14 @@ void amdgpu_free_static_csa(struct amdgpu_device *adev) {
/*
* amdgpu_map_static_csa should be called during amdgpu_vm_init
- * it maps virtual address "AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE"
- * to this VM, and each command submission of GFX should use this virtual
- * address within META_DATA init package to support SRIOV gfx preemption.
+ * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command
+ * submission of GFX should use this virtual address within META_DATA init
+ * package to support SRIOV gfx preemption.
*/
-
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_va **bo_va)
{
+ uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_VA_HOLE_MASK;
struct ww_acquire_ctx ticket;
struct list_head list;
struct amdgpu_bo_list_entry pd;
@@ -90,7 +104,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return -ENOMEM;
}
- r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, AMDGPU_CSA_VADDR,
+ r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr,
AMDGPU_CSA_SIZE);
if (r) {
DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r);
@@ -99,7 +113,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return r;
}
- r = amdgpu_vm_bo_map(adev, *bo_va, AMDGPU_CSA_VADDR, 0, AMDGPU_CSA_SIZE,
+ r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE,
AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE |
AMDGPU_PTE_EXECUTABLE);
@@ -125,9 +139,9 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
{
- signed long r;
+ signed long r, cnt = 0;
unsigned long flags;
- uint32_t val, seq;
+ uint32_t seq;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *ring = &kiq->ring;
@@ -141,18 +155,39 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
spin_unlock_irqrestore(&kiq->ring_lock, flags);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
- if (r < 1) {
- DRM_ERROR("wait for kiq fence error: %ld\n", r);
- return ~0;
+
+ /* don't wait anymore for gpu reset case because this way may
+ * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+ * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+ * never return if we keep waiting in virt_kiq_rreg, which cause
+ * gpu_recover() hang there.
+ *
+ * also don't wait anymore for IRQ context
+ * */
+ if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
+ goto failed_kiq_read;
+
+ if (in_interrupt())
+ might_sleep();
+
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
}
- val = adev->wb.wb[adev->virt.reg_val_offs];
- return val;
+ if (cnt > MAX_KIQ_REG_TRY)
+ goto failed_kiq_read;
+
+ return adev->wb.wb[adev->virt.reg_val_offs];
+
+failed_kiq_read:
+ pr_err("failed to read reg:%x\n", reg);
+ return ~0;
}
void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
{
- signed long r;
+ signed long r, cnt = 0;
unsigned long flags;
uint32_t seq;
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -168,8 +203,34 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
spin_unlock_irqrestore(&kiq->ring_lock, flags);
r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
- if (r < 1)
- DRM_ERROR("wait for kiq fence error: %ld\n", r);
+
+ /* don't wait anymore for gpu reset case because this way may
+ * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+ * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+ * never return if we keep waiting in virt_kiq_rreg, which cause
+ * gpu_recover() hang there.
+ *
+ * also don't wait anymore for IRQ context
+ * */
+ if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
+ goto failed_kiq_write;
+
+ if (in_interrupt())
+ might_sleep();
+
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY)
+ goto failed_kiq_write;
+
+ return;
+
+failed_kiq_write:
+ pr_err("failed to write reg:%x\n", reg);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 6a83425aa9ed..880ac113a3a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -251,8 +251,7 @@ struct amdgpu_virt {
uint32_t gim_feature;
};
-#define AMDGPU_CSA_SIZE (8 * 1024)
-#define AMDGPU_CSA_VADDR (AMDGPU_VA_RESERVED_SIZE - AMDGPU_CSA_SIZE)
+#define AMDGPU_CSA_SIZE (8 * 1024)
#define amdgpu_sriov_enabled(adev) \
((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV)
@@ -279,6 +278,8 @@ static inline bool is_virtual_machine(void)
}
struct amdgpu_vm;
+
+uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev);
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
int amdgpu_allocate_static_csa(struct amdgpu_device *adev);
int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 5afbc5e714d0..da55a78d7380 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -32,6 +32,7 @@
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
/*
* GPUVM
@@ -75,7 +76,8 @@ struct amdgpu_pte_update_params {
/* indirect buffer to fill with commands */
struct amdgpu_ib *ib;
/* Function which actually does the update */
- void (*func)(struct amdgpu_pte_update_params *params, uint64_t pe,
+ void (*func)(struct amdgpu_pte_update_params *params,
+ struct amdgpu_bo *bo, uint64_t pe,
uint64_t addr, unsigned count, uint32_t incr,
uint64_t flags);
/* The next two are used during VM update by CPU
@@ -257,6 +259,104 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
}
/**
+ * amdgpu_vm_clear_bo - initially clear the PDs/PTs
+ *
+ * @adev: amdgpu_device pointer
+ * @bo: BO to clear
+ * @level: level this BO is at
+ *
+ * Root PD needs to be reserved when calling this.
+ */
+static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm, struct amdgpu_bo *bo,
+ unsigned level, bool pte_support_ats)
+{
+ struct ttm_operation_ctx ctx = { true, false };
+ struct dma_fence *fence = NULL;
+ unsigned entries, ats_entries;
+ struct amdgpu_ring *ring;
+ struct amdgpu_job *job;
+ uint64_t addr;
+ int r;
+
+ addr = amdgpu_bo_gpu_offset(bo);
+ entries = amdgpu_bo_size(bo) / 8;
+
+ if (pte_support_ats) {
+ if (level == adev->vm_manager.root_level) {
+ ats_entries = amdgpu_vm_level_shift(adev, level);
+ ats_entries += AMDGPU_GPU_PAGE_SHIFT;
+ ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
+ ats_entries = min(ats_entries, entries);
+ entries -= ats_entries;
+ } else {
+ ats_entries = entries;
+ entries = 0;
+ }
+ } else {
+ ats_entries = 0;
+ }
+
+ ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+
+ r = reservation_object_reserve_shared(bo->tbo.resv);
+ if (r)
+ return r;
+
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (r)
+ goto error;
+
+ r = amdgpu_job_alloc_with_ib(adev, 64, &job);
+ if (r)
+ goto error;
+
+ if (ats_entries) {
+ uint64_t ats_value;
+
+ ats_value = AMDGPU_PTE_DEFAULT_ATC;
+ if (level != AMDGPU_VM_PTB)
+ ats_value |= AMDGPU_PDE_PTE;
+
+ amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
+ ats_entries, 0, ats_value);
+ addr += ats_entries * 8;
+ }
+
+ if (entries)
+ amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
+ entries, 0, 0);
+
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+
+ WARN_ON(job->ibs[0].length_dw > 64);
+ r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
+ AMDGPU_FENCE_OWNER_UNDEFINED, false);
+ if (r)
+ goto error_free;
+
+ r = amdgpu_job_submit(job, ring, &vm->entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+ if (r)
+ goto error_free;
+
+ amdgpu_bo_fence(bo, fence, true);
+ dma_fence_put(fence);
+
+ if (bo->shadow)
+ return amdgpu_vm_clear_bo(adev, vm, bo->shadow,
+ level, pte_support_ats);
+
+ return 0;
+
+error_free:
+ amdgpu_job_free(job);
+
+error:
+ return r;
+}
+
+/**
* amdgpu_vm_alloc_levels - allocate the PD/PT levels
*
* @adev: amdgpu_device pointer
@@ -270,13 +370,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_vm_pt *parent,
uint64_t saddr, uint64_t eaddr,
- unsigned level)
+ unsigned level, bool ats)
{
unsigned shift = amdgpu_vm_level_shift(adev, level);
unsigned pt_idx, from, to;
- int r;
u64 flags;
- uint64_t init_value = 0;
+ int r;
if (!parent->entries) {
unsigned num_entries = amdgpu_vm_num_entries(adev, level);
@@ -299,21 +398,13 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
saddr = saddr & ((1 << shift) - 1);
eaddr = eaddr & ((1 << shift) - 1);
- flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
- AMDGPU_GEM_CREATE_VRAM_CLEARED;
+ flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
if (vm->use_cpu_for_update)
flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
else
flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
AMDGPU_GEM_CREATE_SHADOW);
- if (vm->pte_support_ats) {
- init_value = AMDGPU_PTE_DEFAULT_ATC;
- if (level != AMDGPU_VM_PTB)
- init_value |= AMDGPU_PDE_PTE;
-
- }
-
/* walk over the address space and allocate the page tables */
for (pt_idx = from; pt_idx <= to; ++pt_idx) {
struct reservation_object *resv = vm->root.base.bo->tbo.resv;
@@ -323,16 +414,23 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
if (!entry->base.bo) {
r = amdgpu_bo_create(adev,
amdgpu_vm_bo_size(adev, level),
- AMDGPU_GPU_PAGE_SIZE, true,
- AMDGPU_GEM_DOMAIN_VRAM,
- flags,
- NULL, resv, init_value, &pt);
+ AMDGPU_GPU_PAGE_SIZE,
+ AMDGPU_GEM_DOMAIN_VRAM, flags,
+ ttm_bo_type_kernel, resv, &pt);
if (r)
return r;
+ r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
+ if (r) {
+ amdgpu_bo_unref(&pt->shadow);
+ amdgpu_bo_unref(&pt);
+ return r;
+ }
+
if (vm->use_cpu_for_update) {
r = amdgpu_bo_kmap(pt, NULL);
if (r) {
+ amdgpu_bo_unref(&pt->shadow);
amdgpu_bo_unref(&pt);
return r;
}
@@ -356,7 +454,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
((1 << shift) - 1);
r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
- sub_eaddr, level);
+ sub_eaddr, level, ats);
if (r)
return r;
}
@@ -379,26 +477,29 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
uint64_t saddr, uint64_t size)
{
- uint64_t last_pfn;
uint64_t eaddr;
+ bool ats = false;
/* validate the parameters */
if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
return -EINVAL;
eaddr = saddr + size - 1;
- last_pfn = eaddr / AMDGPU_GPU_PAGE_SIZE;
- if (last_pfn >= adev->vm_manager.max_pfn) {
- dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
- last_pfn, adev->vm_manager.max_pfn);
- return -EINVAL;
- }
+
+ if (vm->pte_support_ats)
+ ats = saddr < AMDGPU_VA_HOLE_START;
saddr /= AMDGPU_GPU_PAGE_SIZE;
eaddr /= AMDGPU_GPU_PAGE_SIZE;
+ if (eaddr >= adev->vm_manager.max_pfn) {
+ dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
+ eaddr, adev->vm_manager.max_pfn);
+ return -EINVAL;
+ }
+
return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
- adev->vm_manager.root_level);
+ adev->vm_manager.root_level, ats);
}
/**
@@ -465,7 +566,7 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
{
- return (adev->mc.real_vram_size == adev->mc.visible_vram_size);
+ return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size);
}
/**
@@ -491,14 +592,24 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
id->oa_base != job->oa_base ||
id->oa_size != job->oa_size);
bool vm_flush_needed = job->vm_needs_flush;
+ bool pasid_mapping_needed = id->pasid != job->pasid ||
+ !id->pasid_mapping ||
+ !dma_fence_is_signaled(id->pasid_mapping);
+ struct dma_fence *fence = NULL;
unsigned patch_offset = 0;
int r;
if (amdgpu_vmid_had_gpu_reset(adev, id)) {
gds_switch_needed = true;
vm_flush_needed = true;
+ pasid_mapping_needed = true;
}
+ gds_switch_needed &= !!ring->funcs->emit_gds_switch;
+ vm_flush_needed &= !!ring->funcs->emit_vm_flush;
+ pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
+ ring->funcs->emit_wreg;
+
if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
return 0;
@@ -508,23 +619,36 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
if (need_pipe_sync)
amdgpu_ring_emit_pipeline_sync(ring);
- if (ring->funcs->emit_vm_flush && vm_flush_needed) {
- struct dma_fence *fence;
-
+ if (vm_flush_needed) {
trace_amdgpu_vm_flush(ring, job->vmid, job->vm_pd_addr);
amdgpu_ring_emit_vm_flush(ring, job->vmid, job->vm_pd_addr);
+ }
+
+ if (pasid_mapping_needed)
+ amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
+ if (vm_flush_needed || pasid_mapping_needed) {
r = amdgpu_fence_emit(ring, &fence);
if (r)
return r;
+ }
+ if (vm_flush_needed) {
mutex_lock(&id_mgr->lock);
dma_fence_put(id->last_flush);
- id->last_flush = fence;
- id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
+ id->last_flush = dma_fence_get(fence);
+ id->current_gpu_reset_count =
+ atomic_read(&adev->gpu_reset_counter);
mutex_unlock(&id_mgr->lock);
}
+ if (pasid_mapping_needed) {
+ id->pasid = job->pasid;
+ dma_fence_put(id->pasid_mapping);
+ id->pasid_mapping = dma_fence_get(fence);
+ }
+ dma_fence_put(fence);
+
if (ring->funcs->emit_gds_switch && gds_switch_needed) {
id->gds_base = job->gds_base;
id->gds_size = job->gds_size;
@@ -578,6 +702,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
* amdgpu_vm_do_set_ptes - helper to call the right asic function
*
* @params: see amdgpu_pte_update_params definition
+ * @bo: PD/PT to update
* @pe: addr of the page entry
* @addr: dst addr to write into pe
* @count: number of page entries to update
@@ -588,10 +713,12 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
* to setup the page table using the DMA.
*/
static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
+ struct amdgpu_bo *bo,
uint64_t pe, uint64_t addr,
unsigned count, uint32_t incr,
uint64_t flags)
{
+ pe += amdgpu_bo_gpu_offset(bo);
trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
if (count < 3) {
@@ -608,6 +735,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
* amdgpu_vm_do_copy_ptes - copy the PTEs from the GART
*
* @params: see amdgpu_pte_update_params definition
+ * @bo: PD/PT to update
* @pe: addr of the page entry
* @addr: dst addr to write into pe
* @count: number of page entries to update
@@ -617,13 +745,14 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params,
* Traces the parameters and calls the DMA function to copy the PTEs.
*/
static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
+ struct amdgpu_bo *bo,
uint64_t pe, uint64_t addr,
unsigned count, uint32_t incr,
uint64_t flags)
{
uint64_t src = (params->src + (addr >> 12) * 8);
-
+ pe += amdgpu_bo_gpu_offset(bo);
trace_amdgpu_vm_copy_ptes(pe, src, count);
amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count);
@@ -657,6 +786,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
* amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU
*
* @params: see amdgpu_pte_update_params definition
+ * @bo: PD/PT to update
* @pe: kmap addr of the page entry
* @addr: dst addr to write into pe
* @count: number of page entries to update
@@ -666,6 +796,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
* Write count number of PT/PD entries directly.
*/
static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
+ struct amdgpu_bo *bo,
uint64_t pe, uint64_t addr,
unsigned count, uint32_t incr,
uint64_t flags)
@@ -673,14 +804,16 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
unsigned int i;
uint64_t value;
+ pe += (unsigned long)amdgpu_bo_kptr(bo);
+
trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags);
for (i = 0; i < count; i++) {
value = params->pages_addr ?
amdgpu_vm_map_gart(params->pages_addr, addr) :
addr;
- amdgpu_gart_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
- i, value, flags);
+ amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe,
+ i, value, flags);
addr += incr;
}
}
@@ -714,8 +847,7 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
struct amdgpu_vm_pt *parent,
struct amdgpu_vm_pt *entry)
{
- struct amdgpu_bo *bo = entry->base.bo, *shadow = NULL, *pbo;
- uint64_t pd_addr, shadow_addr = 0;
+ struct amdgpu_bo *bo = parent->base.bo, *pbo;
uint64_t pde, pt, flags;
unsigned level;
@@ -723,29 +855,17 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
if (entry->huge)
return;
- if (vm->use_cpu_for_update) {
- pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
- } else {
- pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
- shadow = parent->base.bo->shadow;
- if (shadow)
- shadow_addr = amdgpu_bo_gpu_offset(shadow);
- }
-
- for (level = 0, pbo = parent->base.bo->parent; pbo; ++level)
+ for (level = 0, pbo = bo->parent; pbo; ++level)
pbo = pbo->parent;
level += params->adev->vm_manager.root_level;
- pt = amdgpu_bo_gpu_offset(bo);
+ pt = amdgpu_bo_gpu_offset(entry->base.bo);
flags = AMDGPU_PTE_VALID;
- amdgpu_gart_get_vm_pde(params->adev, level, &pt, &flags);
- if (shadow) {
- pde = shadow_addr + (entry - parent->entries) * 8;
- params->func(params, pde, pt, 1, 0, flags);
- }
-
- pde = pd_addr + (entry - parent->entries) * 8;
- params->func(params, pde, pt, 1, 0, flags);
+ amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
+ pde = (entry - parent->entries) * 8;
+ if (bo->shadow)
+ params->func(params, bo->shadow, pde, pt, 1, 0, flags);
+ params->func(params, bo, pde, pt, 1, 0, flags);
}
/*
@@ -856,7 +976,7 @@ restart:
if (vm->use_cpu_for_update) {
/* Flush HDP */
mb();
- amdgpu_gart_flush_gpu_tlb(adev, 0);
+ amdgpu_asic_flush_hdp(adev, NULL);
} else if (params.ib->length_dw == 0) {
amdgpu_job_free(job);
} else {
@@ -870,11 +990,6 @@ restart:
amdgpu_ring_pad_ib(ring, params.ib);
amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
AMDGPU_FENCE_OWNER_VM, false);
- if (root->shadow)
- amdgpu_sync_resv(adev, &job->sync,
- root->shadow->tbo.resv,
- AMDGPU_FENCE_OWNER_VM, false);
-
WARN_ON(params.ib->length_dw > ndw);
r = amdgpu_job_submit(job, ring, &vm->entity,
AMDGPU_FENCE_OWNER_VM, &fence);
@@ -946,7 +1061,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
unsigned nptes, uint64_t dst,
uint64_t flags)
{
- uint64_t pd_addr, pde;
+ uint64_t pde;
/* In the case of a mixed PT the PDE must point to it*/
if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
@@ -967,21 +1082,12 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
}
entry->huge = true;
- amdgpu_gart_get_vm_pde(p->adev, AMDGPU_VM_PDB0,
- &dst, &flags);
+ amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
- if (p->func == amdgpu_vm_cpu_set_ptes) {
- pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
- } else {
- if (parent->base.bo->shadow) {
- pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
- pde = pd_addr + (entry - parent->entries) * 8;
- p->func(p, pde, dst, 1, 0, flags);
- }
- pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
- }
- pde = pd_addr + (entry - parent->entries) * 8;
- p->func(p, pde, dst, 1, 0, flags);
+ pde = (entry - parent->entries) * 8;
+ if (parent->base.bo->shadow)
+ p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags);
+ p->func(p, parent->base.bo, pde, dst, 1, 0, flags);
}
/**
@@ -1007,7 +1113,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
uint64_t addr, pe_start;
struct amdgpu_bo *pt;
unsigned nptes;
- bool use_cpu_update = (params->func == amdgpu_vm_cpu_set_ptes);
/* walk over the address space and update the page tables */
for (addr = start; addr < end; addr += nptes,
@@ -1030,20 +1135,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
continue;
pt = entry->base.bo;
- if (use_cpu_update) {
- pe_start = (unsigned long)amdgpu_bo_kptr(pt);
- } else {
- if (pt->shadow) {
- pe_start = amdgpu_bo_gpu_offset(pt->shadow);
- pe_start += (addr & mask) * 8;
- params->func(params, pe_start, dst, nptes,
- AMDGPU_GPU_PAGE_SIZE, flags);
- }
- pe_start = amdgpu_bo_gpu_offset(pt);
- }
-
- pe_start += (addr & mask) * 8;
- params->func(params, pe_start, dst, nptes,
+ pe_start = (addr & mask) * 8;
+ if (pt->shadow)
+ params->func(params, pt->shadow, pe_start, dst, nptes,
+ AMDGPU_GPU_PAGE_SIZE, flags);
+ params->func(params, pt, pe_start, dst, nptes,
AMDGPU_GPU_PAGE_SIZE, flags);
}
@@ -1204,11 +1300,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
} else {
/* set page commands needed */
- ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
+ ndw += ncmds * 10;
/* extra commands for begin/end fragments */
- ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw
- * adev->vm_manager.fragment_size;
+ ndw += 2 * 10 * adev->vm_manager.fragment_size;
params.func = amdgpu_vm_do_set_ptes;
}
@@ -1457,7 +1552,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
if (vm->use_cpu_for_update) {
/* Flush HDP */
mb();
- amdgpu_gart_flush_gpu_tlb(adev, 0);
+ amdgpu_asic_flush_hdp(adev, NULL);
}
spin_lock(&vm->status_lock);
@@ -1485,7 +1580,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
spin_lock_irqsave(&adev->vm_manager.prt_lock, flags);
enable = !!atomic_read(&adev->vm_manager.num_prt_users);
- adev->gart.gart_funcs->set_prt(adev, enable);
+ adev->gmc.gmc_funcs->set_prt(adev, enable);
spin_unlock_irqrestore(&adev->vm_manager.prt_lock, flags);
}
@@ -1494,7 +1589,7 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
*/
static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
{
- if (!adev->gart.gart_funcs->set_prt)
+ if (!adev->gmc.gmc_funcs->set_prt)
return;
if (atomic_inc_return(&adev->vm_manager.num_prt_users) == 1)
@@ -1529,7 +1624,7 @@ static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
{
struct amdgpu_prt_cb *cb;
- if (!adev->gart.gart_funcs->set_prt)
+ if (!adev->gmc.gmc_funcs->set_prt)
return;
cb = kmalloc(sizeof(struct amdgpu_prt_cb), GFP_KERNEL);
@@ -1623,16 +1718,16 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct dma_fence **fence)
{
struct amdgpu_bo_va_mapping *mapping;
+ uint64_t init_pte_value = 0;
struct dma_fence *f = NULL;
int r;
- uint64_t init_pte_value = 0;
while (!list_empty(&vm->freed)) {
mapping = list_first_entry(&vm->freed,
struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list);
- if (vm->pte_support_ats)
+ if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
@@ -2262,11 +2357,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
{
const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
AMDGPU_VM_PTE_COUNT(adev) * 8);
- uint64_t init_pde_value = 0, flags;
unsigned ring_instance;
struct amdgpu_ring *ring;
struct drm_sched_rq *rq;
unsigned long size;
+ uint64_t flags;
int r, i;
vm->va = RB_ROOT_CACHED;
@@ -2295,33 +2390,27 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_COMPUTE);
- if (adev->asic_type == CHIP_RAVEN) {
+ if (adev->asic_type == CHIP_RAVEN)
vm->pte_support_ats = true;
- init_pde_value = AMDGPU_PTE_DEFAULT_ATC
- | AMDGPU_PDE_PTE;
-
- }
- } else
+ } else {
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_GFX);
+ }
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
"CPU update of VM recommended only for large BAR system\n");
vm->last_update = NULL;
- flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
- AMDGPU_GEM_CREATE_VRAM_CLEARED;
+ flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
if (vm->use_cpu_for_update)
flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
else
- flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
- AMDGPU_GEM_CREATE_SHADOW);
+ flags |= AMDGPU_GEM_CREATE_SHADOW;
size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
- r = amdgpu_bo_create(adev, size, align, true, AMDGPU_GEM_DOMAIN_VRAM,
- flags, NULL, NULL, init_pde_value,
- &vm->root.base.bo);
+ r = amdgpu_bo_create(adev, size, align, AMDGPU_GEM_DOMAIN_VRAM, flags,
+ ttm_bo_type_kernel, NULL, &vm->root.base.bo);
if (r)
goto error_free_sched_entity;
@@ -2329,6 +2418,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
if (r)
goto error_free_root;
+ r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
+ adev->vm_manager.root_level,
+ vm->pte_support_ats);
+ if (r)
+ goto error_unreserve;
+
vm->root.base.vm = vm;
list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
list_add_tail(&vm->root.base.vm_status, &vm->evicted);
@@ -2352,6 +2447,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
return 0;
+error_unreserve:
+ amdgpu_bo_unreserve(vm->root.base.bo);
+
error_free_root:
amdgpu_bo_unref(&vm->root.base.bo->shadow);
amdgpu_bo_unref(&vm->root.base.bo);
@@ -2364,6 +2462,73 @@ error_free_sched_entity:
}
/**
+ * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
+ *
+ * This only works on GFX VMs that don't have any BOs added and no
+ * page tables allocated yet.
+ *
+ * Changes the following VM parameters:
+ * - use_cpu_for_update
+ * - pte_supports_ats
+ * - pasid (old PASID is released, because compute manages its own PASIDs)
+ *
+ * Reinitializes the page directory to reflect the changed ATS
+ * setting. May leave behind an unused shadow BO for the page
+ * directory when switching from SDMA updates to CPU updates.
+ *
+ * Returns 0 for success, -errno for errors.
+ */
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+ bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
+ int r;
+
+ r = amdgpu_bo_reserve(vm->root.base.bo, true);
+ if (r)
+ return r;
+
+ /* Sanity checks */
+ if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
+ r = -EINVAL;
+ goto error;
+ }
+
+ /* Check if PD needs to be reinitialized and do it before
+ * changing any other state, in case it fails.
+ */
+ if (pte_support_ats != vm->pte_support_ats) {
+ r = amdgpu_vm_clear_bo(adev, vm, vm->root.base.bo,
+ adev->vm_manager.root_level,
+ pte_support_ats);
+ if (r)
+ goto error;
+ }
+
+ /* Update VM state */
+ vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+ AMDGPU_VM_USE_CPU_FOR_COMPUTE);
+ vm->pte_support_ats = pte_support_ats;
+ DRM_DEBUG_DRIVER("VM update mode is %s\n",
+ vm->use_cpu_for_update ? "CPU" : "SDMA");
+ WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
+ "CPU update of VM recommended only for large BAR system\n");
+
+ if (vm->pasid) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
+ idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
+ spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+
+ vm->pasid = 0;
+ }
+
+error:
+ amdgpu_bo_unreserve(vm->root.base.bo);
+ return r;
+}
+
+/**
* amdgpu_vm_free_levels - free PD/PT levels
*
* @adev: amdgpu device structure
@@ -2405,11 +2570,13 @@ static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
struct amdgpu_bo_va_mapping *mapping, *tmp;
- bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
+ bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt;
struct amdgpu_bo *root;
u64 fault;
int i, r;
+ amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
+
/* Clear pending page faults from IH when the VM is destroyed */
while (kfifo_get(&vm->faults, &fault))
amdgpu_ih_clear_fault(adev, fault);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 21a80f1bb2b9..30f080364c97 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -28,6 +28,7 @@
#include <linux/kfifo.h>
#include <linux/rbtree.h>
#include <drm/gpu_scheduler.h>
+#include <drm/drm_file.h>
#include "amdgpu_sync.h"
#include "amdgpu_ring.h"
@@ -99,7 +100,7 @@ struct amdgpu_bo_list_entry;
#define AMDGPU_MMHUB 1
/* hardcode that limit for now */
-#define AMDGPU_VA_RESERVED_SIZE (8ULL << 20)
+#define AMDGPU_VA_RESERVED_SIZE (1ULL << 20)
/* VA hole for 48bit addresses on Vega10 */
#define AMDGPU_VA_HOLE_START 0x0000800000000000ULL
@@ -206,6 +207,15 @@ struct amdgpu_vm {
/* Limit non-retry fault storms */
unsigned int fault_credit;
+
+ /* Points to the KFD process VM info */
+ struct amdkfd_process_info *process_info;
+
+ /* List node in amdkfd_process_info.vm_list_head */
+ struct list_head vm_list_node;
+
+ /* Valid while the PD is reserved or fenced */
+ uint64_t pd_phys_addr;
};
struct amdgpu_vm_manager {
@@ -250,6 +260,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev);
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int vm_context, unsigned int pasid);
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
unsigned int pasid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 4acca92f6a52..9aca653bec07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -89,11 +89,11 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
uint64_t start = node->start << PAGE_SHIFT;
uint64_t end = (node->size + node->start) << PAGE_SHIFT;
- if (start >= adev->mc.visible_vram_size)
+ if (start >= adev->gmc.visible_vram_size)
return 0;
- return (end > adev->mc.visible_vram_size ?
- adev->mc.visible_vram_size : end) - start;
+ return (end > adev->gmc.visible_vram_size ?
+ adev->gmc.visible_vram_size : end) - start;
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index 2af26d2da127..d702fb8e3427 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -34,7 +34,7 @@
#include <linux/backlight.h>
#include "bif/bif_4_1_d.h"
-static u8
+u8
amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev)
{
u8 backlight_level;
@@ -48,7 +48,7 @@ amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev)
return backlight_level;
}
-static void
+void
amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev,
u8 backlight_level)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h
index 2bdec40515ce..f77cbdef679e 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.h
@@ -25,6 +25,11 @@
#define __ATOMBIOS_ENCODER_H__
u8
+amdgpu_atombios_encoder_get_backlight_level_from_reg(struct amdgpu_device *adev);
+void
+amdgpu_atombios_encoder_set_backlight_level_to_reg(struct amdgpu_device *adev,
+ u8 backlight_level);
+u8
amdgpu_atombios_encoder_get_backlight_level(struct amdgpu_encoder *amdgpu_encoder);
void
amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encoder,
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index a0943aa8d1d3..47ef3e6e7178 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -65,6 +65,8 @@ MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
#define VOLTAGE_VID_OFFSET_SCALE1 625
#define VOLTAGE_VID_OFFSET_SCALE2 100
+static const struct amd_pm_funcs ci_dpm_funcs;
+
static const struct ci_pt_defaults defaults_hawaii_xt =
{
1, 0xF, 0xFD, 0x19, 5, 0x14, 0, 0xB0000,
@@ -905,7 +907,7 @@ static bool ci_dpm_vblank_too_short(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 vblank_time = amdgpu_dpm_get_vblank_time(adev);
- u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300;
+ u32 switch_limit = adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 300;
/* disable mclk switching if the refresh is >120Hz, even if the
* blanking period would allow it
@@ -2954,7 +2956,7 @@ static int ci_calculate_mclk_params(struct amdgpu_device *adev,
mpll_ad_func_cntl &= ~MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK;
mpll_ad_func_cntl |= (mpll_param.post_div << MPLL_AD_FUNC_CNTL__YCLK_POST_DIV__SHIFT);
- if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
+ if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
mpll_dq_func_cntl &= ~(MPLL_DQ_FUNC_CNTL__YCLK_SEL_MASK |
MPLL_AD_FUNC_CNTL__YCLK_POST_DIV_MASK);
mpll_dq_func_cntl |= (mpll_param.yclk_sel << MPLL_DQ_FUNC_CNTL__YCLK_SEL__SHIFT) |
@@ -3077,7 +3079,7 @@ static int ci_populate_single_memory_level(struct amdgpu_device *adev,
(memory_clock <= pi->mclk_strobe_mode_threshold))
memory_level->StrobeEnable = 1;
- if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
+ if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
memory_level->StrobeRatio =
ci_get_mclk_frequency_ratio(memory_clock, memory_level->StrobeEnable);
if (pi->mclk_edc_enable_threshold &&
@@ -3695,40 +3697,6 @@ static int ci_find_boot_level(struct ci_single_dpm_table *table,
return ret;
}
-static void ci_save_default_power_profile(struct amdgpu_device *adev)
-{
- struct ci_power_info *pi = ci_get_pi(adev);
- struct SMU7_Discrete_GraphicsLevel *levels =
- pi->smc_state_table.GraphicsLevel;
- uint32_t min_level = 0;
-
- pi->default_gfx_power_profile.activity_threshold =
- be16_to_cpu(levels[0].ActivityLevel);
- pi->default_gfx_power_profile.up_hyst = levels[0].UpH;
- pi->default_gfx_power_profile.down_hyst = levels[0].DownH;
- pi->default_gfx_power_profile.type = AMD_PP_GFX_PROFILE;
-
- pi->default_compute_power_profile = pi->default_gfx_power_profile;
- pi->default_compute_power_profile.type = AMD_PP_COMPUTE_PROFILE;
-
- /* Optimize compute power profile: Use only highest
- * 2 power levels (if more than 2 are available), Hysteresis:
- * 0ms up, 5ms down
- */
- if (pi->smc_state_table.GraphicsDpmLevelCount > 2)
- min_level = pi->smc_state_table.GraphicsDpmLevelCount - 2;
- else if (pi->smc_state_table.GraphicsDpmLevelCount == 2)
- min_level = 1;
- pi->default_compute_power_profile.min_sclk =
- be32_to_cpu(levels[min_level].SclkFrequency);
-
- pi->default_compute_power_profile.up_hyst = 0;
- pi->default_compute_power_profile.down_hyst = 5;
-
- pi->gfx_power_profile = pi->default_gfx_power_profile;
- pi->compute_power_profile = pi->default_compute_power_profile;
-}
-
static int ci_init_smc_table(struct amdgpu_device *adev)
{
struct ci_power_info *pi = ci_get_pi(adev);
@@ -3752,7 +3720,7 @@ static int ci_init_smc_table(struct amdgpu_device *adev)
if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_STEPVDDC)
table->SystemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC;
- if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5)
+ if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5)
table->SystemFlags |= PPSMC_SYSTEMFLAG_GDDR5;
if (ulv->supported) {
@@ -3874,8 +3842,6 @@ static int ci_init_smc_table(struct amdgpu_device *adev)
if (ret)
return ret;
- ci_save_default_power_profile(adev);
-
return 0;
}
@@ -4549,12 +4515,12 @@ static int ci_set_mc_special_registers(struct amdgpu_device *adev,
for (k = 0; k < table->num_entries; k++) {
table->mc_reg_table_entry[k].mc_data[j] =
(temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff);
- if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5)
+ if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5)
table->mc_reg_table_entry[k].mc_data[j] |= 0x100;
}
j++;
- if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) {
+ if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) {
if (j >= SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE)
return -EINVAL;
table->mc_reg_address[j].s1 = mmMC_PMG_AUTO_CMD;
@@ -6277,6 +6243,8 @@ static int ci_dpm_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->powerplay.pp_funcs = &ci_dpm_funcs;
+ adev->powerplay.pp_handle = adev;
ci_dpm_set_irq_funcs(adev);
return 0;
@@ -6639,9 +6607,10 @@ static int ci_dpm_force_clock_level(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct ci_power_info *pi = ci_get_pi(adev);
- if (adev->pm.dpm.forced_level & (AMD_DPM_FORCED_LEVEL_AUTO |
- AMD_DPM_FORCED_LEVEL_LOW |
- AMD_DPM_FORCED_LEVEL_HIGH))
+ if (adev->pm.dpm.forced_level != AMD_DPM_FORCED_LEVEL_MANUAL)
+ return -EINVAL;
+
+ if (mask == 0)
return -EINVAL;
switch (type) {
@@ -6662,15 +6631,15 @@ static int ci_dpm_force_clock_level(void *handle,
case PP_PCIE:
{
uint32_t tmp = mask & pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
- uint32_t level = 0;
- while (tmp >>= 1)
- level++;
-
- if (!pi->pcie_dpm_key_disabled)
- amdgpu_ci_send_msg_to_smc_with_parameter(adev,
+ if (!pi->pcie_dpm_key_disabled) {
+ if (fls(tmp) != ffs(tmp))
+ amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_PCIeDPM_UnForceLevel);
+ else
+ amdgpu_ci_send_msg_to_smc_with_parameter(adev,
PPSMC_MSG_PCIeDPM_ForceLevel,
- level);
+ fls(tmp) - 1);
+ }
break;
}
default:
@@ -6752,222 +6721,6 @@ static int ci_dpm_set_mclk_od(void *handle, uint32_t value)
return 0;
}
-static int ci_dpm_get_power_profile_state(void *handle,
- struct amd_pp_profile *query)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct ci_power_info *pi = ci_get_pi(adev);
-
- if (!pi || !query)
- return -EINVAL;
-
- if (query->type == AMD_PP_GFX_PROFILE)
- memcpy(query, &pi->gfx_power_profile,
- sizeof(struct amd_pp_profile));
- else if (query->type == AMD_PP_COMPUTE_PROFILE)
- memcpy(query, &pi->compute_power_profile,
- sizeof(struct amd_pp_profile));
- else
- return -EINVAL;
-
- return 0;
-}
-
-static int ci_populate_requested_graphic_levels(struct amdgpu_device *adev,
- struct amd_pp_profile *request)
-{
- struct ci_power_info *pi = ci_get_pi(adev);
- struct ci_dpm_table *dpm_table = &(pi->dpm_table);
- struct SMU7_Discrete_GraphicsLevel *levels =
- pi->smc_state_table.GraphicsLevel;
- uint32_t array = pi->dpm_table_start +
- offsetof(SMU7_Discrete_DpmTable, GraphicsLevel);
- uint32_t array_size = sizeof(struct SMU7_Discrete_GraphicsLevel) *
- SMU7_MAX_LEVELS_GRAPHICS;
- uint32_t i;
-
- for (i = 0; i < dpm_table->sclk_table.count; i++) {
- levels[i].ActivityLevel =
- cpu_to_be16(request->activity_threshold);
- levels[i].EnabledForActivity = 1;
- levels[i].UpH = request->up_hyst;
- levels[i].DownH = request->down_hyst;
- }
-
- return amdgpu_ci_copy_bytes_to_smc(adev, array, (uint8_t *)levels,
- array_size, pi->sram_end);
-}
-
-static void ci_find_min_clock_masks(struct amdgpu_device *adev,
- uint32_t *sclk_mask, uint32_t *mclk_mask,
- uint32_t min_sclk, uint32_t min_mclk)
-{
- struct ci_power_info *pi = ci_get_pi(adev);
- struct ci_dpm_table *dpm_table = &(pi->dpm_table);
- uint32_t i;
-
- for (i = 0; i < dpm_table->sclk_table.count; i++) {
- if (dpm_table->sclk_table.dpm_levels[i].enabled &&
- dpm_table->sclk_table.dpm_levels[i].value >= min_sclk)
- *sclk_mask |= 1 << i;
- }
-
- for (i = 0; i < dpm_table->mclk_table.count; i++) {
- if (dpm_table->mclk_table.dpm_levels[i].enabled &&
- dpm_table->mclk_table.dpm_levels[i].value >= min_mclk)
- *mclk_mask |= 1 << i;
- }
-}
-
-static int ci_set_power_profile_state(struct amdgpu_device *adev,
- struct amd_pp_profile *request)
-{
- struct ci_power_info *pi = ci_get_pi(adev);
- int tmp_result, result = 0;
- uint32_t sclk_mask = 0, mclk_mask = 0;
-
- tmp_result = ci_freeze_sclk_mclk_dpm(adev);
- if (tmp_result) {
- DRM_ERROR("Failed to freeze SCLK MCLK DPM!");
- result = tmp_result;
- }
-
- tmp_result = ci_populate_requested_graphic_levels(adev,
- request);
- if (tmp_result) {
- DRM_ERROR("Failed to populate requested graphic levels!");
- result = tmp_result;
- }
-
- tmp_result = ci_unfreeze_sclk_mclk_dpm(adev);
- if (tmp_result) {
- DRM_ERROR("Failed to unfreeze SCLK MCLK DPM!");
- result = tmp_result;
- }
-
- ci_find_min_clock_masks(adev, &sclk_mask, &mclk_mask,
- request->min_sclk, request->min_mclk);
-
- if (sclk_mask) {
- if (!pi->sclk_dpm_key_disabled)
- amdgpu_ci_send_msg_to_smc_with_parameter(
- adev,
- PPSMC_MSG_SCLKDPM_SetEnabledMask,
- pi->dpm_level_enable_mask.
- sclk_dpm_enable_mask &
- sclk_mask);
- }
-
- if (mclk_mask) {
- if (!pi->mclk_dpm_key_disabled)
- amdgpu_ci_send_msg_to_smc_with_parameter(
- adev,
- PPSMC_MSG_MCLKDPM_SetEnabledMask,
- pi->dpm_level_enable_mask.
- mclk_dpm_enable_mask &
- mclk_mask);
- }
-
-
- return result;
-}
-
-static int ci_dpm_set_power_profile_state(void *handle,
- struct amd_pp_profile *request)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct ci_power_info *pi = ci_get_pi(adev);
- int ret = -1;
-
- if (!pi || !request)
- return -EINVAL;
-
- if (adev->pm.dpm.forced_level !=
- AMD_DPM_FORCED_LEVEL_AUTO)
- return -EINVAL;
-
- if (request->min_sclk ||
- request->min_mclk ||
- request->activity_threshold ||
- request->up_hyst ||
- request->down_hyst) {
- if (request->type == AMD_PP_GFX_PROFILE)
- memcpy(&pi->gfx_power_profile, request,
- sizeof(struct amd_pp_profile));
- else if (request->type == AMD_PP_COMPUTE_PROFILE)
- memcpy(&pi->compute_power_profile, request,
- sizeof(struct amd_pp_profile));
- else
- return -EINVAL;
-
- if (request->type == pi->current_power_profile)
- ret = ci_set_power_profile_state(
- adev,
- request);
- } else {
- /* set power profile if it exists */
- switch (request->type) {
- case AMD_PP_GFX_PROFILE:
- ret = ci_set_power_profile_state(
- adev,
- &pi->gfx_power_profile);
- break;
- case AMD_PP_COMPUTE_PROFILE:
- ret = ci_set_power_profile_state(
- adev,
- &pi->compute_power_profile);
- break;
- default:
- return -EINVAL;
- }
- }
-
- if (!ret)
- pi->current_power_profile = request->type;
-
- return 0;
-}
-
-static int ci_dpm_reset_power_profile_state(void *handle,
- struct amd_pp_profile *request)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct ci_power_info *pi = ci_get_pi(adev);
-
- if (!pi || !request)
- return -EINVAL;
-
- if (request->type == AMD_PP_GFX_PROFILE) {
- pi->gfx_power_profile = pi->default_gfx_power_profile;
- return ci_dpm_set_power_profile_state(adev,
- &pi->gfx_power_profile);
- } else if (request->type == AMD_PP_COMPUTE_PROFILE) {
- pi->compute_power_profile =
- pi->default_compute_power_profile;
- return ci_dpm_set_power_profile_state(adev,
- &pi->compute_power_profile);
- } else
- return -EINVAL;
-}
-
-static int ci_dpm_switch_power_profile(void *handle,
- enum amd_pp_profile_type type)
-{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- struct ci_power_info *pi = ci_get_pi(adev);
- struct amd_pp_profile request = {0};
-
- if (!pi)
- return -EINVAL;
-
- if (pi->current_power_profile != type) {
- request.type = type;
- return ci_dpm_set_power_profile_state(adev, &request);
- }
-
- return 0;
-}
-
static int ci_dpm_read_sensor(void *handle, int idx,
void *value, int *size)
{
@@ -7011,7 +6764,7 @@ static int ci_dpm_read_sensor(void *handle, int idx,
}
}
-const struct amd_ip_funcs ci_dpm_ip_funcs = {
+static const struct amd_ip_funcs ci_dpm_ip_funcs = {
.name = "ci_dpm",
.early_init = ci_dpm_early_init,
.late_init = ci_dpm_late_init,
@@ -7028,8 +6781,16 @@ const struct amd_ip_funcs ci_dpm_ip_funcs = {
.set_powergating_state = ci_dpm_set_powergating_state,
};
-const struct amd_pm_funcs ci_dpm_funcs = {
- .get_temperature = &ci_dpm_get_temp,
+const struct amdgpu_ip_block_version ci_smu_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_SMC,
+ .major = 7,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &ci_dpm_ip_funcs,
+};
+
+static const struct amd_pm_funcs ci_dpm_funcs = {
.pre_set_power_state = &ci_dpm_pre_set_power_state,
.set_power_state = &ci_dpm_set_power_state,
.post_set_power_state = &ci_dpm_post_set_power_state,
@@ -7053,10 +6814,6 @@ const struct amd_pm_funcs ci_dpm_funcs = {
.set_mclk_od = ci_dpm_set_mclk_od,
.check_state_equal = ci_check_state_equal,
.get_vce_clock_state = amdgpu_get_vce_clock_state,
- .get_power_profile_state = ci_dpm_get_power_profile_state,
- .set_power_profile_state = ci_dpm_set_power_profile_state,
- .reset_power_profile_state = ci_dpm_reset_power_profile_state,
- .switch_power_profile = ci_dpm_switch_power_profile,
.read_sensor = ci_dpm_read_sensor,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
index 84cbc9c45f4d..91be2996ae7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
@@ -295,13 +295,6 @@ struct ci_power_info {
bool fan_is_controlled_by_smc;
u32 t_min;
u32 fan_ctrl_default_mode;
-
- /* power profile */
- struct amd_pp_profile gfx_power_profile;
- struct amd_pp_profile compute_power_profile;
- struct amd_pp_profile default_gfx_power_profile;
- struct amd_pp_profile default_compute_power_profile;
- enum amd_pp_profile_type current_power_profile;
};
#define CISLANDS_VOLTAGE_CONTROL_NONE 0x0
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 8e59e65efd44..0df22030e713 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -67,7 +67,6 @@
#include "amdgpu_dm.h"
#include "amdgpu_amdkfd.h"
-#include "amdgpu_powerplay.h"
#include "dce_virtual.h"
/*
@@ -1715,6 +1714,27 @@ static void cik_detect_hw_virtualization(struct amdgpu_device *adev)
adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
}
+static void cik_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+ RREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL);
+ } else {
+ amdgpu_ring_emit_wreg(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+ }
+}
+
+static void cik_invalidate_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32(mmHDP_DEBUG0, 1);
+ RREG32(mmHDP_DEBUG0);
+ } else {
+ amdgpu_ring_emit_wreg(ring, mmHDP_DEBUG0, 1);
+ }
+}
+
static const struct amdgpu_asic_funcs cik_asic_funcs =
{
.read_disabled_bios = &cik_read_disabled_bios,
@@ -1726,6 +1746,8 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
.set_uvd_clocks = &cik_set_uvd_clocks,
.set_vce_clocks = &cik_set_vce_clocks,
.get_config_memsize = &cik_get_config_memsize,
+ .flush_hdp = &cik_flush_hdp,
+ .invalidate_hdp = &cik_invalidate_hdp,
};
static int cik_common_early_init(void *handle)
@@ -1864,10 +1886,6 @@ static int cik_common_early_init(void *handle)
return -EINVAL;
}
- adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
-
- amdgpu_device_get_pcie_info(adev);
-
return 0;
}
@@ -1977,7 +1995,10 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+ if (amdgpu_dpm == -1)
+ amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
+ else
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
@@ -1995,7 +2016,10 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+ if (amdgpu_dpm == -1)
+ amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
+ else
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
@@ -2013,7 +2037,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+ amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
@@ -2032,7 +2056,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &cik_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block);
amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+ amdgpu_device_ip_block_add(adev, &kv_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h
index c4989f51ecef..e49c6f15a0a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.h
+++ b/drivers/gpu/drm/amd/amdgpu/cik.h
@@ -24,6 +24,8 @@
#ifndef __CIK_H__
#define __CIK_H__
+#define CIK_FLUSH_GPU_TLB_NUM_WREG 3
+
void cik_srbm_select(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 queue, u32 vmid);
int cik_set_ip_blocks(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_dpm.h b/drivers/gpu/drm/amd/amdgpu/cik_dpm.h
index c7b4349f6319..2a086610f74d 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/cik_dpm.h
@@ -24,8 +24,7 @@
#ifndef __CIK_DPM_H__
#define __CIK_DPM_H__
-extern const struct amd_ip_funcs ci_dpm_ip_funcs;
-extern const struct amd_ip_funcs kv_dpm_ip_funcs;
-extern const struct amd_pm_funcs ci_dpm_funcs;
-extern const struct amd_pm_funcs kv_dpm_funcs;
+extern const struct amdgpu_ip_block_version ci_smu_ip_block;
+extern const struct amdgpu_ip_block_version kv_smu_ip_block;
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index d5a05c19708f..44d10c2172f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -111,7 +111,7 @@ static int cik_ih_irq_init(struct amdgpu_device *adev)
cik_ih_disable_interrupts(adev);
/* setup interrupt control */
- WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8);
+ WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -281,7 +281,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev,
entry->src_data[0] = dw[1] & 0xfffffff;
entry->ring_id = dw[2] & 0xff;
entry->vmid = (dw[2] >> 8) & 0xff;
- entry->pas_id = (dw[2] >> 16) & 0xffff;
+ entry->pasid = (dw[2] >> 16) & 0xffff;
/* wptr/rptr are in bytes! */
adev->irq.ih.rptr += 16;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 6e8278e689b1..f48ea0dad875 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -261,13 +261,6 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
}
-static void cik_sdma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
- amdgpu_ring_write(ring, mmHDP_DEBUG0);
- amdgpu_ring_write(ring, 1);
-}
-
/**
* cik_sdma_ring_emit_fence - emit a fence on the DMA ring
*
@@ -317,7 +310,7 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev)
if ((adev->mman.buffer_funcs_ring == sdma0) ||
(adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
@@ -517,7 +510,7 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
}
if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size);
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -885,18 +878,7 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(0) |
SDMA_POLL_REG_MEM_EXTRA_FUNC(0)); /* always */
- amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
- if (vmid < 8) {
- amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
- } else {
- amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
- }
- amdgpu_ring_write(ring, pd_addr >> 12);
-
- /* flush TLB */
- amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
- amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
- amdgpu_ring_write(ring, 1 << vmid);
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
@@ -906,6 +888,14 @@ static void cik_sdma_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
}
+static void cik_sdma_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
static void cik_enable_sdma_mgcg(struct amdgpu_device *adev,
bool enable)
{
@@ -1279,9 +1269,9 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
.set_wptr = cik_sdma_ring_set_wptr,
.emit_frame_size =
6 + /* cik_sdma_ring_emit_hdp_flush */
- 3 + /* cik_sdma_ring_emit_hdp_invalidate */
+ 3 + /* hdp invalidate */
6 + /* cik_sdma_ring_emit_pipeline_sync */
- 12 + /* cik_sdma_ring_emit_vm_flush */
+ CIK_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* cik_sdma_ring_emit_vm_flush */
9 + 9 + 9, /* cik_sdma_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 7 + 4, /* cik_sdma_ring_emit_ib */
.emit_ib = cik_sdma_ring_emit_ib,
@@ -1289,11 +1279,11 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
.emit_pipeline_sync = cik_sdma_ring_emit_pipeline_sync,
.emit_vm_flush = cik_sdma_ring_emit_vm_flush,
.emit_hdp_flush = cik_sdma_ring_emit_hdp_flush,
- .emit_hdp_invalidate = cik_sdma_ring_emit_hdp_invalidate,
.test_ring = cik_sdma_ring_test_ring,
.test_ib = cik_sdma_ring_test_ib,
.insert_nop = cik_sdma_ring_insert_nop,
.pad_ib = cik_sdma_ring_pad_ib,
+ .emit_wreg = cik_sdma_ring_emit_wreg,
};
static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
@@ -1391,9 +1381,6 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
.copy_pte = cik_sdma_vm_copy_pte,
.write_pte = cik_sdma_vm_write_pte,
-
- .set_max_nums_pte_pde = 0x1fffff >> 3,
- .set_pte_pde_num_dw = 10,
.set_pte_pde = cik_sdma_vm_set_pte_pde,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index f576e9cbbc61..960c29e17da6 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -111,7 +111,7 @@ static int cz_ih_irq_init(struct amdgpu_device *adev)
cz_ih_disable_interrupts(adev);
/* setup interrupt control */
- WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8);
+ WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -260,7 +260,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev,
entry->src_data[0] = dw[1] & 0xfffffff;
entry->ring_id = dw[2] & 0xff;
entry->vmid = (dw[2] >> 8) & 0xff;
- entry->pas_id = (dw[2] >> 16) & 0xffff;
+ entry->pasid = (dw[2] >> 16) & 0xffff;
/* wptr/rptr are in bytes! */
adev->irq.ih.rptr += 16;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index f34bc68aadfb..452f88ea46a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -190,66 +190,6 @@ static void dce_v10_0_audio_endpt_wreg(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
}
-static bool dce_v10_0_is_in_vblank(struct amdgpu_device *adev, int crtc)
-{
- if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) &
- CRTC_V_BLANK_START_END__CRTC_V_BLANK_START_MASK)
- return true;
- else
- return false;
-}
-
-static bool dce_v10_0_is_counter_moving(struct amdgpu_device *adev, int crtc)
-{
- u32 pos1, pos2;
-
- pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
- pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
-
- if (pos1 != pos2)
- return true;
- else
- return false;
-}
-
-/**
- * dce_v10_0_vblank_wait - vblank wait asic callback.
- *
- * @adev: amdgpu_device pointer
- * @crtc: crtc to wait for vblank on
- *
- * Wait for vblank on the requested crtc (evergreen+).
- */
-static void dce_v10_0_vblank_wait(struct amdgpu_device *adev, int crtc)
-{
- unsigned i = 100;
-
- if (crtc >= adev->mode_info.num_crtc)
- return;
-
- if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK))
- return;
-
- /* depending on when we hit vblank, we may be close to active; if so,
- * wait for another frame.
- */
- while (dce_v10_0_is_in_vblank(adev, crtc)) {
- if (i++ == 100) {
- i = 0;
- if (!dce_v10_0_is_counter_moving(adev, crtc))
- break;
- }
- }
-
- while (!dce_v10_0_is_in_vblank(adev, crtc)) {
- if (i++ == 100) {
- i = 0;
- if (!dce_v10_0_is_counter_moving(adev, crtc))
- break;
- }
- }
-}
-
static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
{
if (crtc >= adev->mode_info.num_crtc)
@@ -1205,7 +1145,7 @@ static void dce_v10_0_bandwidth_update(struct amdgpu_device *adev)
u32 num_heads = 0, lb_size;
int i;
- amdgpu_update_display_priority(adev);
+ amdgpu_display_update_priority(adev);
for (i = 0; i < adev->mode_info.num_crtc; i++) {
if (adev->mode_info.crtcs[i]->base.enabled)
@@ -2517,9 +2457,9 @@ static const struct drm_crtc_funcs dce_v10_0_crtc_funcs = {
.cursor_set2 = dce_v10_0_crtc_cursor_set2,
.cursor_move = dce_v10_0_crtc_cursor_move,
.gamma_set = dce_v10_0_crtc_gamma_set,
- .set_config = amdgpu_crtc_set_config,
+ .set_config = amdgpu_display_crtc_set_config,
.destroy = dce_v10_0_crtc_destroy,
- .page_flip_target = amdgpu_crtc_page_flip_target,
+ .page_flip_target = amdgpu_display_crtc_page_flip_target,
};
static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -2537,7 +2477,8 @@ static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
dce_v10_0_vga_enable(crtc, false);
/* Make sure VBLANK and PFLIP interrupts are still enabled */
- type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
+ type = amdgpu_display_crtc_idx_to_irq_type(adev,
+ amdgpu_crtc->crtc_id);
amdgpu_irq_update(adev, &adev->crtc_irq, type);
amdgpu_irq_update(adev, &adev->pageflip_irq, type);
drm_crtc_vblank_on(crtc);
@@ -2676,7 +2617,7 @@ static bool dce_v10_0_crtc_mode_fixup(struct drm_crtc *crtc,
amdgpu_crtc->connector = NULL;
return false;
}
- if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+ if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
return false;
if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
return false;
@@ -2824,9 +2765,9 @@ static int dce_v10_0_sw_init(void *handle)
adev->ddev->mode_config.preferred_depth = 24;
adev->ddev->mode_config.prefer_shadow = 1;
- adev->ddev->mode_config.fb_base = adev->mc.aper_base;
+ adev->ddev->mode_config.fb_base = adev->gmc.aper_base;
- r = amdgpu_modeset_create_props(adev);
+ r = amdgpu_display_modeset_create_props(adev);
if (r)
return r;
@@ -2841,7 +2782,7 @@ static int dce_v10_0_sw_init(void *handle)
}
if (amdgpu_atombios_get_connector_info_from_object_table(adev))
- amdgpu_print_display_setup(adev->ddev);
+ amdgpu_display_print_display_setup(adev->ddev);
else
return -EINVAL;
@@ -2921,6 +2862,11 @@ static int dce_v10_0_hw_fini(void *handle)
static int dce_v10_0_suspend(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->mode_info.bl_level =
+ amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
return dce_v10_0_hw_fini(handle);
}
@@ -2929,6 +2875,9 @@ static int dce_v10_0_resume(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
+ amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+ adev->mode_info.bl_level);
+
ret = dce_v10_0_hw_init(handle);
/* turn on the BL */
@@ -3249,7 +3198,7 @@ static int dce_v10_0_crtc_irq(struct amdgpu_device *adev,
{
unsigned crtc = entry->src_id - 1;
uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
- unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc);
+ unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev, crtc);
switch (entry->src_data[0]) {
case 0: /* vblank */
@@ -3601,7 +3550,6 @@ static void dce_v10_0_encoder_add(struct amdgpu_device *adev,
static const struct amdgpu_display_funcs dce_v10_0_display_funcs = {
.bandwidth_update = &dce_v10_0_bandwidth_update,
.vblank_get_counter = &dce_v10_0_vblank_get_counter,
- .vblank_wait = &dce_v10_0_vblank_wait,
.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
.hpd_sense = &dce_v10_0_hpd_sense,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 26378bd6aba4..a7c1c584a191 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -207,66 +207,6 @@ static void dce_v11_0_audio_endpt_wreg(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
}
-static bool dce_v11_0_is_in_vblank(struct amdgpu_device *adev, int crtc)
-{
- if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) &
- CRTC_V_BLANK_START_END__CRTC_V_BLANK_START_MASK)
- return true;
- else
- return false;
-}
-
-static bool dce_v11_0_is_counter_moving(struct amdgpu_device *adev, int crtc)
-{
- u32 pos1, pos2;
-
- pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
- pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
-
- if (pos1 != pos2)
- return true;
- else
- return false;
-}
-
-/**
- * dce_v11_0_vblank_wait - vblank wait asic callback.
- *
- * @adev: amdgpu_device pointer
- * @crtc: crtc to wait for vblank on
- *
- * Wait for vblank on the requested crtc (evergreen+).
- */
-static void dce_v11_0_vblank_wait(struct amdgpu_device *adev, int crtc)
-{
- unsigned i = 100;
-
- if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
- return;
-
- if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK))
- return;
-
- /* depending on when we hit vblank, we may be close to active; if so,
- * wait for another frame.
- */
- while (dce_v11_0_is_in_vblank(adev, crtc)) {
- if (i++ == 100) {
- i = 0;
- if (!dce_v11_0_is_counter_moving(adev, crtc))
- break;
- }
- }
-
- while (!dce_v11_0_is_in_vblank(adev, crtc)) {
- if (i++ == 100) {
- i = 0;
- if (!dce_v11_0_is_counter_moving(adev, crtc))
- break;
- }
- }
-}
-
static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
{
if (crtc < 0 || crtc >= adev->mode_info.num_crtc)
@@ -1229,7 +1169,7 @@ static void dce_v11_0_bandwidth_update(struct amdgpu_device *adev)
u32 num_heads = 0, lb_size;
int i;
- amdgpu_update_display_priority(adev);
+ amdgpu_display_update_priority(adev);
for (i = 0; i < adev->mode_info.num_crtc; i++) {
if (adev->mode_info.crtcs[i]->base.enabled)
@@ -2592,9 +2532,9 @@ static const struct drm_crtc_funcs dce_v11_0_crtc_funcs = {
.cursor_set2 = dce_v11_0_crtc_cursor_set2,
.cursor_move = dce_v11_0_crtc_cursor_move,
.gamma_set = dce_v11_0_crtc_gamma_set,
- .set_config = amdgpu_crtc_set_config,
+ .set_config = amdgpu_display_crtc_set_config,
.destroy = dce_v11_0_crtc_destroy,
- .page_flip_target = amdgpu_crtc_page_flip_target,
+ .page_flip_target = amdgpu_display_crtc_page_flip_target,
};
static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -2612,7 +2552,8 @@ static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
dce_v11_0_vga_enable(crtc, false);
/* Make sure VBLANK and PFLIP interrupts are still enabled */
- type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
+ type = amdgpu_display_crtc_idx_to_irq_type(adev,
+ amdgpu_crtc->crtc_id);
amdgpu_irq_update(adev, &adev->crtc_irq, type);
amdgpu_irq_update(adev, &adev->pageflip_irq, type);
drm_crtc_vblank_on(crtc);
@@ -2779,7 +2720,7 @@ static bool dce_v11_0_crtc_mode_fixup(struct drm_crtc *crtc,
amdgpu_crtc->connector = NULL;
return false;
}
- if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+ if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
return false;
if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
return false;
@@ -2939,9 +2880,9 @@ static int dce_v11_0_sw_init(void *handle)
adev->ddev->mode_config.preferred_depth = 24;
adev->ddev->mode_config.prefer_shadow = 1;
- adev->ddev->mode_config.fb_base = adev->mc.aper_base;
+ adev->ddev->mode_config.fb_base = adev->gmc.aper_base;
- r = amdgpu_modeset_create_props(adev);
+ r = amdgpu_display_modeset_create_props(adev);
if (r)
return r;
@@ -2957,7 +2898,7 @@ static int dce_v11_0_sw_init(void *handle)
}
if (amdgpu_atombios_get_connector_info_from_object_table(adev))
- amdgpu_print_display_setup(adev->ddev);
+ amdgpu_display_print_display_setup(adev->ddev);
else
return -EINVAL;
@@ -3047,6 +2988,11 @@ static int dce_v11_0_hw_fini(void *handle)
static int dce_v11_0_suspend(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->mode_info.bl_level =
+ amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
return dce_v11_0_hw_fini(handle);
}
@@ -3055,6 +3001,9 @@ static int dce_v11_0_resume(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
+ amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+ adev->mode_info.bl_level);
+
ret = dce_v11_0_hw_init(handle);
/* turn on the BL */
@@ -3368,7 +3317,8 @@ static int dce_v11_0_crtc_irq(struct amdgpu_device *adev,
{
unsigned crtc = entry->src_id - 1;
uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
- unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc);
+ unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
+ crtc);
switch (entry->src_data[0]) {
case 0: /* vblank */
@@ -3725,7 +3675,6 @@ static void dce_v11_0_encoder_add(struct amdgpu_device *adev,
static const struct amdgpu_display_funcs dce_v11_0_display_funcs = {
.bandwidth_update = &dce_v11_0_bandwidth_update,
.vblank_get_counter = &dce_v11_0_vblank_get_counter,
- .vblank_wait = &dce_v11_0_vblank_wait,
.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
.hpd_sense = &dce_v11_0_hpd_sense,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index bd2c4f727df6..9f67b7fd3487 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -142,64 +142,6 @@ static void dce_v6_0_audio_endpt_wreg(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
}
-static bool dce_v6_0_is_in_vblank(struct amdgpu_device *adev, int crtc)
-{
- if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) & CRTC_STATUS__CRTC_V_BLANK_MASK)
- return true;
- else
- return false;
-}
-
-static bool dce_v6_0_is_counter_moving(struct amdgpu_device *adev, int crtc)
-{
- u32 pos1, pos2;
-
- pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
- pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
-
- if (pos1 != pos2)
- return true;
- else
- return false;
-}
-
-/**
- * dce_v6_0_wait_for_vblank - vblank wait asic callback.
- *
- * @crtc: crtc to wait for vblank on
- *
- * Wait for vblank on the requested crtc (evergreen+).
- */
-static void dce_v6_0_vblank_wait(struct amdgpu_device *adev, int crtc)
-{
- unsigned i = 100;
-
- if (crtc >= adev->mode_info.num_crtc)
- return;
-
- if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK))
- return;
-
- /* depending on when we hit vblank, we may be close to active; if so,
- * wait for another frame.
- */
- while (dce_v6_0_is_in_vblank(adev, crtc)) {
- if (i++ == 100) {
- i = 0;
- if (!dce_v6_0_is_counter_moving(adev, crtc))
- break;
- }
- }
-
- while (!dce_v6_0_is_in_vblank(adev, crtc)) {
- if (i++ == 100) {
- i = 0;
- if (!dce_v6_0_is_counter_moving(adev, crtc))
- break;
- }
- }
-}
-
static u32 dce_v6_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
{
if (crtc >= adev->mode_info.num_crtc)
@@ -1108,7 +1050,7 @@ static void dce_v6_0_bandwidth_update(struct amdgpu_device *adev)
if (!adev->mode_info.mode_config_initialized)
return;
- amdgpu_update_display_priority(adev);
+ amdgpu_display_update_priority(adev);
for (i = 0; i < adev->mode_info.num_crtc; i++) {
if (adev->mode_info.crtcs[i]->base.enabled)
@@ -2407,9 +2349,9 @@ static const struct drm_crtc_funcs dce_v6_0_crtc_funcs = {
.cursor_set2 = dce_v6_0_crtc_cursor_set2,
.cursor_move = dce_v6_0_crtc_cursor_move,
.gamma_set = dce_v6_0_crtc_gamma_set,
- .set_config = amdgpu_crtc_set_config,
+ .set_config = amdgpu_display_crtc_set_config,
.destroy = dce_v6_0_crtc_destroy,
- .page_flip_target = amdgpu_crtc_page_flip_target,
+ .page_flip_target = amdgpu_display_crtc_page_flip_target,
};
static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -2425,7 +2367,8 @@ static void dce_v6_0_crtc_dpms(struct drm_crtc *crtc, int mode)
amdgpu_atombios_crtc_enable(crtc, ATOM_ENABLE);
amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
/* Make sure VBLANK and PFLIP interrupts are still enabled */
- type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
+ type = amdgpu_display_crtc_idx_to_irq_type(adev,
+ amdgpu_crtc->crtc_id);
amdgpu_irq_update(adev, &adev->crtc_irq, type);
amdgpu_irq_update(adev, &adev->pageflip_irq, type);
drm_crtc_vblank_on(crtc);
@@ -2562,7 +2505,7 @@ static bool dce_v6_0_crtc_mode_fixup(struct drm_crtc *crtc,
amdgpu_crtc->connector = NULL;
return false;
}
- if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+ if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
return false;
if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
return false;
@@ -2693,9 +2636,9 @@ static int dce_v6_0_sw_init(void *handle)
adev->ddev->mode_config.max_height = 16384;
adev->ddev->mode_config.preferred_depth = 24;
adev->ddev->mode_config.prefer_shadow = 1;
- adev->ddev->mode_config.fb_base = adev->mc.aper_base;
+ adev->ddev->mode_config.fb_base = adev->gmc.aper_base;
- r = amdgpu_modeset_create_props(adev);
+ r = amdgpu_display_modeset_create_props(adev);
if (r)
return r;
@@ -2711,7 +2654,7 @@ static int dce_v6_0_sw_init(void *handle)
ret = amdgpu_atombios_get_connector_info_from_object_table(adev);
if (ret)
- amdgpu_print_display_setup(adev->ddev);
+ amdgpu_display_print_display_setup(adev->ddev);
else
return -EINVAL;
@@ -2787,6 +2730,11 @@ static int dce_v6_0_hw_fini(void *handle)
static int dce_v6_0_suspend(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->mode_info.bl_level =
+ amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
return dce_v6_0_hw_fini(handle);
}
@@ -2795,6 +2743,9 @@ static int dce_v6_0_resume(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
+ amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+ adev->mode_info.bl_level);
+
ret = dce_v6_0_hw_init(handle);
/* turn on the BL */
@@ -2966,7 +2917,8 @@ static int dce_v6_0_crtc_irq(struct amdgpu_device *adev,
{
unsigned crtc = entry->src_id - 1;
uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
- unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc);
+ unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
+ crtc);
switch (entry->src_data[0]) {
case 0: /* vblank */
@@ -3093,7 +3045,7 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev,
tmp |= DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK;
WREG32(mmDC_HPD1_INT_CONTROL + hpd_offsets[hpd], tmp);
schedule_work(&adev->hotplug_work);
- DRM_INFO("IH: HPD%d\n", hpd + 1);
+ DRM_DEBUG("IH: HPD%d\n", hpd + 1);
}
return 0;
@@ -3407,7 +3359,6 @@ static void dce_v6_0_encoder_add(struct amdgpu_device *adev,
static const struct amdgpu_display_funcs dce_v6_0_display_funcs = {
.bandwidth_update = &dce_v6_0_bandwidth_update,
.vblank_get_counter = &dce_v6_0_vblank_get_counter,
- .vblank_wait = &dce_v6_0_vblank_wait,
.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
.hpd_sense = &dce_v6_0_hpd_sense,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index c008dc030687..f55422cbd77a 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -140,66 +140,6 @@ static void dce_v8_0_audio_endpt_wreg(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->audio_endpt_idx_lock, flags);
}
-static bool dce_v8_0_is_in_vblank(struct amdgpu_device *adev, int crtc)
-{
- if (RREG32(mmCRTC_STATUS + crtc_offsets[crtc]) &
- CRTC_V_BLANK_START_END__CRTC_V_BLANK_START_MASK)
- return true;
- else
- return false;
-}
-
-static bool dce_v8_0_is_counter_moving(struct amdgpu_device *adev, int crtc)
-{
- u32 pos1, pos2;
-
- pos1 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
- pos2 = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]);
-
- if (pos1 != pos2)
- return true;
- else
- return false;
-}
-
-/**
- * dce_v8_0_vblank_wait - vblank wait asic callback.
- *
- * @adev: amdgpu_device pointer
- * @crtc: crtc to wait for vblank on
- *
- * Wait for vblank on the requested crtc (evergreen+).
- */
-static void dce_v8_0_vblank_wait(struct amdgpu_device *adev, int crtc)
-{
- unsigned i = 100;
-
- if (crtc >= adev->mode_info.num_crtc)
- return;
-
- if (!(RREG32(mmCRTC_CONTROL + crtc_offsets[crtc]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK))
- return;
-
- /* depending on when we hit vblank, we may be close to active; if so,
- * wait for another frame.
- */
- while (dce_v8_0_is_in_vblank(adev, crtc)) {
- if (i++ == 100) {
- i = 0;
- if (!dce_v8_0_is_counter_moving(adev, crtc))
- break;
- }
- }
-
- while (!dce_v8_0_is_in_vblank(adev, crtc)) {
- if (i++ == 100) {
- i = 0;
- if (!dce_v8_0_is_counter_moving(adev, crtc))
- break;
- }
- }
-}
-
static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
{
if (crtc >= adev->mode_info.num_crtc)
@@ -1144,7 +1084,7 @@ static void dce_v8_0_bandwidth_update(struct amdgpu_device *adev)
u32 num_heads = 0, lb_size;
int i;
- amdgpu_update_display_priority(adev);
+ amdgpu_display_update_priority(adev);
for (i = 0; i < adev->mode_info.num_crtc; i++) {
if (adev->mode_info.crtcs[i]->base.enabled)
@@ -2421,9 +2361,9 @@ static const struct drm_crtc_funcs dce_v8_0_crtc_funcs = {
.cursor_set2 = dce_v8_0_crtc_cursor_set2,
.cursor_move = dce_v8_0_crtc_cursor_move,
.gamma_set = dce_v8_0_crtc_gamma_set,
- .set_config = amdgpu_crtc_set_config,
+ .set_config = amdgpu_display_crtc_set_config,
.destroy = dce_v8_0_crtc_destroy,
- .page_flip_target = amdgpu_crtc_page_flip_target,
+ .page_flip_target = amdgpu_display_crtc_page_flip_target,
};
static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -2441,7 +2381,8 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
amdgpu_atombios_crtc_blank(crtc, ATOM_DISABLE);
dce_v8_0_vga_enable(crtc, false);
/* Make sure VBLANK and PFLIP interrupts are still enabled */
- type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
+ type = amdgpu_display_crtc_idx_to_irq_type(adev,
+ amdgpu_crtc->crtc_id);
amdgpu_irq_update(adev, &adev->crtc_irq, type);
amdgpu_irq_update(adev, &adev->pageflip_irq, type);
drm_crtc_vblank_on(crtc);
@@ -2587,7 +2528,7 @@ static bool dce_v8_0_crtc_mode_fixup(struct drm_crtc *crtc,
amdgpu_crtc->connector = NULL;
return false;
}
- if (!amdgpu_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
+ if (!amdgpu_display_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
return false;
if (amdgpu_atombios_crtc_prepare_pll(crtc, adjusted_mode))
return false;
@@ -2724,9 +2665,9 @@ static int dce_v8_0_sw_init(void *handle)
adev->ddev->mode_config.preferred_depth = 24;
adev->ddev->mode_config.prefer_shadow = 1;
- adev->ddev->mode_config.fb_base = adev->mc.aper_base;
+ adev->ddev->mode_config.fb_base = adev->gmc.aper_base;
- r = amdgpu_modeset_create_props(adev);
+ r = amdgpu_display_modeset_create_props(adev);
if (r)
return r;
@@ -2741,7 +2682,7 @@ static int dce_v8_0_sw_init(void *handle)
}
if (amdgpu_atombios_get_connector_info_from_object_table(adev))
- amdgpu_print_display_setup(adev->ddev);
+ amdgpu_display_print_display_setup(adev->ddev);
else
return -EINVAL;
@@ -2819,6 +2760,11 @@ static int dce_v8_0_hw_fini(void *handle)
static int dce_v8_0_suspend(void *handle)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->mode_info.bl_level =
+ amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
+
return dce_v8_0_hw_fini(handle);
}
@@ -2827,6 +2773,9 @@ static int dce_v8_0_resume(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int ret;
+ amdgpu_atombios_encoder_set_backlight_level_to_reg(adev,
+ adev->mode_info.bl_level);
+
ret = dce_v8_0_hw_init(handle);
/* turn on the BL */
@@ -3063,7 +3012,8 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev,
{
unsigned crtc = entry->src_id - 1;
uint32_t disp_int = RREG32(interrupt_status_offsets[crtc].reg);
- unsigned irq_type = amdgpu_crtc_idx_to_irq_type(adev, crtc);
+ unsigned int irq_type = amdgpu_display_crtc_idx_to_irq_type(adev,
+ crtc);
switch (entry->src_data[0]) {
case 0: /* vblank */
@@ -3491,7 +3441,6 @@ static void dce_v8_0_encoder_add(struct amdgpu_device *adev,
static const struct amdgpu_display_funcs dce_v8_0_display_funcs = {
.bandwidth_update = &dce_v8_0_bandwidth_update,
.vblank_get_counter = &dce_v8_0_vblank_get_counter,
- .vblank_wait = &dce_v8_0_vblank_wait,
.backlight_set_level = &amdgpu_atombios_encoder_set_backlight_level,
.backlight_get_level = &amdgpu_atombios_encoder_get_backlight_level,
.hpd_sense = &dce_v8_0_hpd_sense,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index 120dd3b26fc2..b51f05dc9582 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -48,19 +48,6 @@ static void dce_virtual_set_crtc_vblank_interrupt_state(struct amdgpu_device *ad
int crtc,
enum amdgpu_interrupt_state state);
-/**
- * dce_virtual_vblank_wait - vblank wait asic callback.
- *
- * @adev: amdgpu_device pointer
- * @crtc: crtc to wait for vblank on
- *
- * Wait for vblank on the requested crtc (evergreen+).
- */
-static void dce_virtual_vblank_wait(struct amdgpu_device *adev, int crtc)
-{
- return;
-}
-
static u32 dce_virtual_vblank_get_counter(struct amdgpu_device *adev, int crtc)
{
return 0;
@@ -130,9 +117,9 @@ static const struct drm_crtc_funcs dce_virtual_crtc_funcs = {
.cursor_set2 = NULL,
.cursor_move = NULL,
.gamma_set = dce_virtual_crtc_gamma_set,
- .set_config = amdgpu_crtc_set_config,
+ .set_config = amdgpu_display_crtc_set_config,
.destroy = dce_virtual_crtc_destroy,
- .page_flip_target = amdgpu_crtc_page_flip_target,
+ .page_flip_target = amdgpu_display_crtc_page_flip_target,
};
static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -149,7 +136,8 @@ static void dce_virtual_crtc_dpms(struct drm_crtc *crtc, int mode)
case DRM_MODE_DPMS_ON:
amdgpu_crtc->enabled = true;
/* Make sure VBLANK interrupts are still enabled */
- type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
+ type = amdgpu_display_crtc_idx_to_irq_type(adev,
+ amdgpu_crtc->crtc_id);
amdgpu_irq_update(adev, &adev->crtc_irq, type);
drm_crtc_vblank_on(crtc);
break;
@@ -406,9 +394,9 @@ static int dce_virtual_sw_init(void *handle)
adev->ddev->mode_config.preferred_depth = 24;
adev->ddev->mode_config.prefer_shadow = 1;
- adev->ddev->mode_config.fb_base = adev->mc.aper_base;
+ adev->ddev->mode_config.fb_base = adev->gmc.aper_base;
- r = amdgpu_modeset_create_props(adev);
+ r = amdgpu_display_modeset_create_props(adev);
if (r)
return r;
@@ -485,6 +473,7 @@ static int dce_virtual_hw_init(void *handle)
/* no DCE */
break;
case CHIP_VEGA10:
+ case CHIP_VEGA12:
break;
default:
DRM_ERROR("Virtual display unsupported ASIC type: 0x%X\n", adev->asic_type);
@@ -653,7 +642,6 @@ static int dce_virtual_connector_encoder_init(struct amdgpu_device *adev,
static const struct amdgpu_display_funcs dce_virtual_display_funcs = {
.bandwidth_update = &dce_virtual_bandwidth_update,
.vblank_get_counter = &dce_virtual_vblank_get_counter,
- .vblank_wait = &dce_virtual_vblank_wait,
.backlight_set_level = NULL,
.backlight_get_level = NULL,
.hpd_sense = &dce_virtual_hpd_sense,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.h b/drivers/gpu/drm/amd/amdgpu/emu_soc.c
index c0c4bfdcdb14..d72c25c1b987 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.h
+++ b/drivers/gpu/drm/amd/amdgpu/emu_soc.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2015 Advanced Micro Devices, Inc.
+ * Copyright 2018 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -19,15 +19,15 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Authors: AMD
- *
*/
+#include "amdgpu.h"
+#include "soc15.h"
-#ifndef __AMDGPU_POWERPLAY_H__
-#define __AMDGPU_POWERPLAY_H__
-
-#include "amd_shared.h"
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
-extern const struct amdgpu_ip_block_version amdgpu_pp_ip_block;
+int emu_soc_asic_init(struct amdgpu_device *adev)
+{
+ return 0;
+}
-#endif /* __AMDGPU_POWERPLAY_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 9870d83b68c1..0fff5b8cd318 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -38,6 +38,7 @@
#include "dce/dce_6_0_sh_mask.h"
#include "gca/gfx_7_2_enum.h"
#include "si_enums.h"
+#include "si.h"
static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev);
static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -1808,17 +1809,6 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring)
return r;
}
-static void gfx_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
-{
- /* flush hdp cache */
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
- WRITE_DATA_DST_SEL(0)));
- amdgpu_ring_write(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, 0x1);
-}
-
static void gfx_v6_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
{
amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
@@ -1826,24 +1816,6 @@ static void gfx_v6_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
EVENT_INDEX(0));
}
-/**
- * gfx_v6_0_ring_emit_hdp_invalidate - emit an hdp invalidate on the cp
- *
- * @adev: amdgpu_device pointer
- * @ridx: amdgpu ring index
- *
- * Emits an hdp invalidate on the cp.
- */
-static void gfx_v6_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
- WRITE_DATA_DST_SEL(0)));
- amdgpu_ring_write(ring, mmHDP_DEBUG0);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, 0x1);
-}
-
static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
u64 seq, unsigned flags)
{
@@ -2358,25 +2330,7 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
{
int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
- /* write new base address */
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
- WRITE_DATA_DST_SEL(0)));
- if (vmid < 8) {
- amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid ));
- } else {
- amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8)));
- }
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, pd_addr >> 12);
-
- /* bits 0-15 are the VM contexts0-15 */
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
- WRITE_DATA_DST_SEL(0)));
- amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, 1 << vmid);
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* wait for the invalidate to complete */
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
@@ -2401,6 +2355,18 @@ static void gfx_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
}
}
+static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
+ WRITE_DATA_DST_SEL(0)));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev)
{
@@ -3511,23 +3477,21 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_gfx = {
.get_wptr = gfx_v6_0_ring_get_wptr,
.set_wptr = gfx_v6_0_ring_set_wptr_gfx,
.emit_frame_size =
- 5 + /* gfx_v6_0_ring_emit_hdp_flush */
- 5 + /* gfx_v6_0_ring_emit_hdp_invalidate */
+ 5 + 5 + /* hdp flush / invalidate */
14 + 14 + 14 + /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
7 + 4 + /* gfx_v6_0_ring_emit_pipeline_sync */
- 17 + 6 + /* gfx_v6_0_ring_emit_vm_flush */
+ SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v6_0_ring_emit_vm_flush */
3 + 2, /* gfx_v6_ring_emit_cntxcntl including vgt flush */
.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
.emit_ib = gfx_v6_0_ring_emit_ib,
.emit_fence = gfx_v6_0_ring_emit_fence,
.emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync,
.emit_vm_flush = gfx_v6_0_ring_emit_vm_flush,
- .emit_hdp_flush = gfx_v6_0_ring_emit_hdp_flush,
- .emit_hdp_invalidate = gfx_v6_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v6_0_ring_test_ring,
.test_ib = gfx_v6_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.emit_cntxcntl = gfx_v6_ring_emit_cntxcntl,
+ .emit_wreg = gfx_v6_0_ring_emit_wreg,
};
static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
@@ -3538,21 +3502,19 @@ static const struct amdgpu_ring_funcs gfx_v6_0_ring_funcs_compute = {
.get_wptr = gfx_v6_0_ring_get_wptr,
.set_wptr = gfx_v6_0_ring_set_wptr_compute,
.emit_frame_size =
- 5 + /* gfx_v6_0_ring_emit_hdp_flush */
- 5 + /* gfx_v6_0_ring_emit_hdp_invalidate */
+ 5 + 5 + /* hdp flush / invalidate */
7 + /* gfx_v6_0_ring_emit_pipeline_sync */
- 17 + /* gfx_v6_0_ring_emit_vm_flush */
+ SI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v6_0_ring_emit_vm_flush */
14 + 14 + 14, /* gfx_v6_0_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 6, /* gfx_v6_0_ring_emit_ib */
.emit_ib = gfx_v6_0_ring_emit_ib,
.emit_fence = gfx_v6_0_ring_emit_fence,
.emit_pipeline_sync = gfx_v6_0_ring_emit_pipeline_sync,
.emit_vm_flush = gfx_v6_0_ring_emit_vm_flush,
- .emit_hdp_flush = gfx_v6_0_ring_emit_hdp_flush,
- .emit_hdp_invalidate = gfx_v6_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v6_0_ring_test_ring,
.test_ib = gfx_v6_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
+ .emit_wreg = gfx_v6_0_ring_emit_wreg,
};
static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index a066c5eda135..e13d9d83767b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1946,7 +1946,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
if (i == 0)
sh_mem_base = 0;
else
- sh_mem_base = adev->mc.shared_aperture_start >> 48;
+ sh_mem_base = adev->gmc.shared_aperture_start >> 48;
cik_srbm_select(adev, 0, 0, 0, i);
/* CP and shaders */
WREG32(mmSH_MEM_CONFIG, sh_mem_cfg);
@@ -2147,26 +2147,6 @@ static void gfx_v7_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
EVENT_INDEX(0));
}
-
-/**
- * gfx_v7_0_ring_emit_hdp_invalidate - emit an hdp invalidate on the cp
- *
- * @adev: amdgpu_device pointer
- * @ridx: amdgpu ring index
- *
- * Emits an hdp invalidate on the cp.
- */
-static void gfx_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
- WRITE_DATA_DST_SEL(0) |
- WR_CONFIRM));
- amdgpu_ring_write(ring, mmHDP_DEBUG0);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, 1);
-}
-
/**
* gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring
*
@@ -3243,26 +3223,7 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
{
int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
- WRITE_DATA_DST_SEL(0)));
- if (vmid < 8) {
- amdgpu_ring_write(ring,
- (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
- } else {
- amdgpu_ring_write(ring,
- (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
- }
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, pd_addr >> 12);
-
- /* bits 0-15 are the VM contexts0-15 */
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
- WRITE_DATA_DST_SEL(0)));
- amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, 1 << vmid);
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* wait for the invalidate to complete */
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
@@ -3289,6 +3250,19 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
}
}
+static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
+ WRITE_DATA_DST_SEL(0)));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, val);
+}
+
/*
* RLC
* The RLC is a multi-purpose microengine that handles a
@@ -4384,34 +4358,8 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
case CHIP_KAVERI:
adev->gfx.config.max_shader_engines = 1;
adev->gfx.config.max_tile_pipes = 4;
- if ((adev->pdev->device == 0x1304) ||
- (adev->pdev->device == 0x1305) ||
- (adev->pdev->device == 0x130C) ||
- (adev->pdev->device == 0x130F) ||
- (adev->pdev->device == 0x1310) ||
- (adev->pdev->device == 0x1311) ||
- (adev->pdev->device == 0x131C)) {
- adev->gfx.config.max_cu_per_sh = 8;
- adev->gfx.config.max_backends_per_se = 2;
- } else if ((adev->pdev->device == 0x1309) ||
- (adev->pdev->device == 0x130A) ||
- (adev->pdev->device == 0x130D) ||
- (adev->pdev->device == 0x1313) ||
- (adev->pdev->device == 0x131D)) {
- adev->gfx.config.max_cu_per_sh = 6;
- adev->gfx.config.max_backends_per_se = 2;
- } else if ((adev->pdev->device == 0x1306) ||
- (adev->pdev->device == 0x1307) ||
- (adev->pdev->device == 0x130B) ||
- (adev->pdev->device == 0x130E) ||
- (adev->pdev->device == 0x1315) ||
- (adev->pdev->device == 0x131B)) {
- adev->gfx.config.max_cu_per_sh = 4;
- adev->gfx.config.max_backends_per_se = 1;
- } else {
- adev->gfx.config.max_cu_per_sh = 3;
- adev->gfx.config.max_backends_per_se = 1;
- }
+ adev->gfx.config.max_cu_per_sh = 8;
+ adev->gfx.config.max_backends_per_se = 2;
adev->gfx.config.max_sh_per_se = 1;
adev->gfx.config.max_texture_channel_caches = 4;
adev->gfx.config.max_gprs = 256;
@@ -5115,10 +5063,10 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.emit_frame_size =
20 + /* gfx_v7_0_ring_emit_gds_switch */
7 + /* gfx_v7_0_ring_emit_hdp_flush */
- 5 + /* gfx_v7_0_ring_emit_hdp_invalidate */
+ 5 + /* hdp invalidate */
12 + 12 + 12 + /* gfx_v7_0_ring_emit_fence_gfx x3 for user fence, vm fence */
7 + 4 + /* gfx_v7_0_ring_emit_pipeline_sync */
- 17 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
+ CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + 6 + /* gfx_v7_0_ring_emit_vm_flush */
3 + 4, /* gfx_v7_ring_emit_cntxcntl including vgt flush*/
.emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_gfx */
.emit_ib = gfx_v7_0_ring_emit_ib_gfx,
@@ -5127,12 +5075,12 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
- .emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v7_0_ring_test_ring,
.test_ib = gfx_v7_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
+ .emit_wreg = gfx_v7_0_ring_emit_wreg,
};
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
@@ -5146,9 +5094,9 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
.emit_frame_size =
20 + /* gfx_v7_0_ring_emit_gds_switch */
7 + /* gfx_v7_0_ring_emit_hdp_flush */
- 5 + /* gfx_v7_0_ring_emit_hdp_invalidate */
+ 5 + /* hdp invalidate */
7 + /* gfx_v7_0_ring_emit_pipeline_sync */
- 17 + /* gfx_v7_0_ring_emit_vm_flush */
+ CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
.emit_ib_size = 4, /* gfx_v7_0_ring_emit_ib_compute */
.emit_ib = gfx_v7_0_ring_emit_ib_compute,
@@ -5157,11 +5105,11 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
- .emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v7_0_ring_test_ring,
.test_ib = gfx_v7_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_wreg = gfx_v7_0_ring_emit_wreg,
};
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 4e694ae9f308..27943e57681c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -3796,7 +3796,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
WREG32(mmSH_MEM_CONFIG, tmp);
- tmp = adev->mc.shared_aperture_start >> 48;
+ tmp = adev->gmc.shared_aperture_start >> 48;
WREG32(mmSH_MEM_BASES, tmp);
}
@@ -4847,6 +4847,9 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
/* reset MQD to a clean status */
if (adev->gfx.mec.mqd_backup[mqd_idx])
memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
+ /* reset ring buffer */
+ ring->wptr = 0;
+ amdgpu_ring_clear_ring(ring);
} else {
amdgpu_ring_clear_ring(ring);
}
@@ -4921,13 +4924,6 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
/* Test KCQs */
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
- if (adev->in_gpu_reset) {
- /* move reset ring buffer to here to workaround
- * compute ring test failed
- */
- ring->wptr = 0;
- amdgpu_ring_clear_ring(ring);
- }
ring->ready = true;
r = amdgpu_ring_test_ring(ring);
if (r)
@@ -6230,19 +6226,6 @@ static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
EVENT_INDEX(0));
}
-
-static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
- WRITE_DATA_DST_SEL(0) |
- WR_CONFIRM));
- amdgpu_ring_write(ring, mmHDP_DEBUG0);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, 1);
-
-}
-
static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
@@ -6332,28 +6315,7 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
{
int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
- WRITE_DATA_DST_SEL(0)) |
- WR_CONFIRM);
- if (vmid < 8) {
- amdgpu_ring_write(ring,
- (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
- } else {
- amdgpu_ring_write(ring,
- (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
- }
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, pd_addr >> 12);
-
- /* bits 0-15 are the VM contexts0-15 */
- /* invalidate the cache */
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
- WRITE_DATA_DST_SEL(0)));
- amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, 1 << vmid);
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* wait for the invalidate to complete */
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
@@ -6617,8 +6579,22 @@ static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val)
{
+ uint32_t cmd;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = 1 << 16; /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
+
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
+ amdgpu_ring_write(ring, cmd);
amdgpu_ring_write(ring, reg);
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, val);
@@ -6871,7 +6847,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.emit_frame_size = /* maximum 215dw if count 16 IBs in */
5 + /* COND_EXEC */
7 + /* PIPELINE_SYNC */
- 19 + /* VM_FLUSH */
+ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
4 + /* double SWITCH_BUFFER,
@@ -6893,7 +6869,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
- .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v8_0_ring_test_ring,
.test_ib = gfx_v8_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
@@ -6902,6 +6877,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
+ .emit_wreg = gfx_v8_0_ring_emit_wreg,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
@@ -6915,9 +6891,9 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.emit_frame_size =
20 + /* gfx_v8_0_ring_emit_gds_switch */
7 + /* gfx_v8_0_ring_emit_hdp_flush */
- 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
+ 5 + /* hdp_invalidate */
7 + /* gfx_v8_0_ring_emit_pipeline_sync */
- 17 + /* gfx_v8_0_ring_emit_vm_flush */
+ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
.emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
.emit_ib = gfx_v8_0_ring_emit_ib_compute,
@@ -6926,12 +6902,12 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
- .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v8_0_ring_test_ring,
.test_ib = gfx_v8_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.set_priority = gfx_v8_0_ring_set_priority_compute,
+ .emit_wreg = gfx_v8_0_ring_emit_wreg,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
@@ -6945,7 +6921,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
.emit_frame_size =
20 + /* gfx_v8_0_ring_emit_gds_switch */
7 + /* gfx_v8_0_ring_emit_hdp_flush */
- 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
+ 5 + /* hdp_invalidate */
7 + /* gfx_v8_0_ring_emit_pipeline_sync */
17 + /* gfx_v8_0_ring_emit_vm_flush */
7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
@@ -7151,12 +7127,12 @@ static void gfx_v8_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
} ce_payload = {};
if (ring->adev->virt.chained_ib_support) {
- ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
- offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
+ ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
+ offsetof(struct vi_gfx_meta_data_chained_ib, ce_payload);
cnt_ce = (sizeof(ce_payload.chained) >> 2) + 4 - 2;
} else {
- ce_payload_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096 +
- offsetof(struct vi_gfx_meta_data, ce_payload);
+ ce_payload_addr = amdgpu_csa_vaddr(ring->adev) +
+ offsetof(struct vi_gfx_meta_data, ce_payload);
cnt_ce = (sizeof(ce_payload.regular) >> 2) + 4 - 2;
}
@@ -7179,7 +7155,7 @@ static void gfx_v8_0_ring_emit_de_meta(struct amdgpu_ring *ring)
struct vi_de_ib_state_chained_ib chained;
} de_payload = {};
- csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
+ csa_addr = amdgpu_csa_vaddr(ring->adev);
gds_addr = csa_addr + 4096;
if (ring->adev->virt.chained_ib_support) {
de_payload.chained.gds_backup_addrlo = lower_32_bits(gds_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index c06479615e8a..1ae3de1094f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -57,6 +57,13 @@ MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
+MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
+MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
+MODULE_FIRMWARE("amdgpu/vega12_me.bin");
+MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
+MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
+MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
+
MODULE_FIRMWARE("amdgpu/raven_ce.bin");
MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
MODULE_FIRMWARE("amdgpu/raven_me.bin");
@@ -144,7 +151,42 @@ static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
};
+static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
+};
+
+static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
+{
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000)
+};
+
#define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
+#define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
#define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
@@ -168,6 +210,14 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_gc_9_0_vg10,
ARRAY_SIZE(golden_settings_gc_9_0_vg10));
break;
+ case CHIP_VEGA12:
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_2_1,
+ ARRAY_SIZE(golden_settings_gc_9_2_1));
+ soc15_program_register_sequence(adev,
+ golden_settings_gc_9_2_1_vg12,
+ ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
+ break;
case CHIP_RAVEN:
soc15_program_register_sequence(adev,
golden_settings_gc_9_1,
@@ -271,58 +321,65 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
- struct amdgpu_device *adev = ring->adev;
- struct amdgpu_ib ib;
- struct dma_fence *f = NULL;
- uint32_t scratch;
- uint32_t tmp = 0;
- long r;
-
- r = amdgpu_gfx_scratch_get(adev, &scratch);
- if (r) {
- DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
- return r;
- }
- WREG32(scratch, 0xCAFEDEAD);
- memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(adev, NULL, 256, &ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
- goto err1;
- }
- ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
- ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
- ib.ptr[2] = 0xDEADBEEF;
- ib.length_dw = 3;
-
- r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
- if (r)
- goto err2;
-
- r = dma_fence_wait_timeout(f, false, timeout);
- if (r == 0) {
- DRM_ERROR("amdgpu: IB test timed out.\n");
- r = -ETIMEDOUT;
- goto err2;
- } else if (r < 0) {
- DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
- goto err2;
- }
- tmp = RREG32(scratch);
- if (tmp == 0xDEADBEEF) {
- DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
- r = 0;
- } else {
- DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
- scratch, tmp);
- r = -EINVAL;
- }
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_ib ib;
+ struct dma_fence *f = NULL;
+
+ unsigned index;
+ uint64_t gpu_addr;
+ uint32_t tmp;
+ long r;
+
+ r = amdgpu_device_wb_get(adev, &index);
+ if (r) {
+ dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
+ return r;
+ }
+
+ gpu_addr = adev->wb.gpu_addr + (index * 4);
+ adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
+ memset(&ib, 0, sizeof(ib));
+ r = amdgpu_ib_get(adev, NULL, 16, &ib);
+ if (r) {
+ DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
+ goto err1;
+ }
+ ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
+ ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
+ ib.ptr[2] = lower_32_bits(gpu_addr);
+ ib.ptr[3] = upper_32_bits(gpu_addr);
+ ib.ptr[4] = 0xDEADBEEF;
+ ib.length_dw = 5;
+
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+ if (r)
+ goto err2;
+
+ r = dma_fence_wait_timeout(f, false, timeout);
+ if (r == 0) {
+ DRM_ERROR("amdgpu: IB test timed out.\n");
+ r = -ETIMEDOUT;
+ goto err2;
+ } else if (r < 0) {
+ DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+ goto err2;
+ }
+
+ tmp = adev->wb.wb[index];
+ if (tmp == 0xDEADBEEF) {
+ DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
+ r = 0;
+ } else {
+ DRM_ERROR("ib test on ring %d failed\n", ring->idx);
+ r = -EINVAL;
+ }
+
err2:
- amdgpu_ib_free(adev, &ib, NULL);
- dma_fence_put(f);
+ amdgpu_ib_free(adev, &ib, NULL);
+ dma_fence_put(f);
err1:
- amdgpu_gfx_scratch_free(adev, scratch);
- return r;
+ amdgpu_device_wb_free(adev, index);
+ return r;
}
@@ -362,6 +419,9 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
case CHIP_VEGA10:
chip_name = "vega10";
break;
+ case CHIP_VEGA12:
+ chip_name = "vega12";
+ break;
case CHIP_RAVEN:
chip_name = "raven";
break;
@@ -961,6 +1021,15 @@ static void gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
break;
+ case CHIP_VEGA12:
+ adev->gfx.config.max_hw_contexts = 8;
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
+ gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
+ DRM_INFO("fix gfx.config for vega12\n");
+ break;
case CHIP_RAVEN:
adev->gfx.config.max_hw_contexts = 8;
adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
@@ -1242,6 +1311,7 @@ static int gfx_v9_0_sw_init(void *handle)
switch (adev->asic_type) {
case CHIP_VEGA10:
+ case CHIP_VEGA12:
case CHIP_RAVEN:
adev->gfx.mec.num_mec = 2;
break;
@@ -1254,23 +1324,23 @@ static int gfx_v9_0_sw_init(void *handle)
adev->gfx.mec.num_queue_per_pipe = 8;
/* KIQ event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
if (r)
return r;
/* EOP Event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq);
if (r)
return r;
/* Privileged reg */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 184,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 184,
&adev->gfx.priv_reg_irq);
if (r)
return r;
/* Privileged inst */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 185,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, 185,
&adev->gfx.priv_inst_irq);
if (r)
return r;
@@ -1539,7 +1609,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
WREG32_SOC15(GC, 0, mmSH_MEM_CONFIG, tmp);
- tmp = adev->mc.shared_aperture_start >> 48;
+ tmp = adev->gmc.shared_aperture_start >> 48;
WREG32_SOC15(GC, 0, mmSH_MEM_BASES, tmp);
}
}
@@ -2954,7 +3024,13 @@ static int gfx_v9_0_hw_fini(void *handle)
gfx_v9_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]);
if (amdgpu_sriov_vf(adev)) {
- pr_debug("For SRIOV client, shouldn't do anything.\n");
+ gfx_v9_0_cp_gfx_enable(adev, false);
+ /* must disable polling for SRIOV when hw finished, otherwise
+ * CPC engine may still keep fetching WB address which is already
+ * invalid after sw finished and trigger DMAR reading error in
+ * hypervisor side.
+ */
+ WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
return 0;
}
gfx_v9_0_cp_enable(adev, false);
@@ -3469,6 +3545,7 @@ static int gfx_v9_0_set_clockgating_state(void *handle,
switch (adev->asic_type) {
case CHIP_VEGA10:
+ case CHIP_VEGA12:
case CHIP_RAVEN:
gfx_v9_0_update_gfx_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
@@ -3585,14 +3662,6 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
ref_and_mask, ref_and_mask, 0x20);
}
-static void gfx_v9_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- gfx_v9_0_write_data_to_reg(ring, 0, true,
- SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
-}
-
static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib,
unsigned vmid, bool ctx_switch)
@@ -3686,32 +3755,10 @@ static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
- int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
- uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
- uint64_t flags = AMDGPU_PTE_VALID;
- unsigned eng = ring->vm_inv_eng;
-
- amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
- pd_addr |= flags;
-
- gfx_v9_0_write_data_to_reg(ring, usepfp, true,
- hub->ctx0_ptb_addr_lo32 + (2 * vmid),
- lower_32_bits(pd_addr));
-
- gfx_v9_0_write_data_to_reg(ring, usepfp, true,
- hub->ctx0_ptb_addr_hi32 + (2 * vmid),
- upper_32_bits(pd_addr));
-
- gfx_v9_0_write_data_to_reg(ring, usepfp, true,
- hub->vm_inv_eng0_req + eng, req);
-
- /* wait for the invalidate to complete */
- gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack +
- eng, 0, 1 << vmid, 1 << vmid, 0x20);
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* compute doesn't have PFP */
- if (usepfp) {
+ if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
/* sync PFP to ME, otherwise we might get invalid PFP reads */
amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
amdgpu_ring_write(ring, 0x0);
@@ -3735,6 +3782,105 @@ static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
return wptr;
}
+static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
+ bool acquire)
+{
+ struct amdgpu_device *adev = ring->adev;
+ int pipe_num, tmp, reg;
+ int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
+
+ pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
+
+ /* first me only has 2 entries, GFX and HP3D */
+ if (ring->me > 0)
+ pipe_num -= 2;
+
+ reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
+ tmp = RREG32(reg);
+ tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
+ WREG32(reg, tmp);
+}
+
+static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ bool acquire)
+{
+ int i, pipe;
+ bool reserve;
+ struct amdgpu_ring *iring;
+
+ mutex_lock(&adev->gfx.pipe_reserve_mutex);
+ pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
+ if (acquire)
+ set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+ else
+ clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+
+ if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
+ /* Clear all reservations - everyone reacquires all resources */
+ for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
+ gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
+ true);
+
+ for (i = 0; i < adev->gfx.num_compute_rings; ++i)
+ gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
+ true);
+ } else {
+ /* Lower all pipes without a current reservation */
+ for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
+ iring = &adev->gfx.gfx_ring[i];
+ pipe = amdgpu_gfx_queue_to_bit(adev,
+ iring->me,
+ iring->pipe,
+ 0);
+ reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+ gfx_v9_0_ring_set_pipe_percent(iring, reserve);
+ }
+
+ for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+ iring = &adev->gfx.compute_ring[i];
+ pipe = amdgpu_gfx_queue_to_bit(adev,
+ iring->me,
+ iring->pipe,
+ 0);
+ reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+ gfx_v9_0_ring_set_pipe_percent(iring, reserve);
+ }
+ }
+
+ mutex_unlock(&adev->gfx.pipe_reserve_mutex);
+}
+
+static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ bool acquire)
+{
+ uint32_t pipe_priority = acquire ? 0x2 : 0x0;
+ uint32_t queue_priority = acquire ? 0xf : 0x0;
+
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+ WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
+ WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
+
+ soc15_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
+ enum drm_sched_priority priority)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
+
+ if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+ return;
+
+ gfx_v9_0_hqd_set_priority(adev, ring, acquire);
+ gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
+}
+
static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
@@ -3788,7 +3934,7 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
int cnt;
cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
- csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
+ csa_addr = amdgpu_csa_vaddr(ring->adev);
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
@@ -3806,7 +3952,7 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
uint64_t csa_addr, gds_addr;
int cnt;
- csa_addr = AMDGPU_VA_RESERVED_SIZE - 2 * 4096;
+ csa_addr = amdgpu_csa_vaddr(ring->adev);
gds_addr = csa_addr + 4096;
de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
@@ -3904,15 +4050,34 @@ static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
}
static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
- uint32_t val)
+ uint32_t val)
{
+ uint32_t cmd = 0;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_GFX:
+ cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
+ break;
+ case AMDGPU_RING_TYPE_KIQ:
+ cmd = (1 << 16); /* no inc addr */
+ break;
+ default:
+ cmd = WR_CONFIRM;
+ break;
+ }
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
+ amdgpu_ring_write(ring, cmd);
amdgpu_ring_write(ring, reg);
amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, val);
}
+static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
+}
+
static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
@@ -4199,7 +4364,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_frame_size = /* totally 242 maximum if 16 IBs */
5 + /* COND_EXEC */
7 + /* PIPELINE_SYNC */
- 24 + /* VM_FLUSH */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* VM_FLUSH */
8 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
4 + /* double SWITCH_BUFFER,
@@ -4221,7 +4388,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
- .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v9_0_ring_test_ring,
.test_ib = gfx_v9_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
@@ -4231,6 +4397,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
.emit_tmz = gfx_v9_0_ring_emit_tmz,
+ .emit_wreg = gfx_v9_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
@@ -4245,9 +4413,11 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.emit_frame_size =
20 + /* gfx_v9_0_ring_emit_gds_switch */
7 + /* gfx_v9_0_ring_emit_hdp_flush */
- 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
+ 5 + /* hdp invalidate */
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
- 24 + /* gfx_v9_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
.emit_ib = gfx_v9_0_ring_emit_ib_compute,
@@ -4256,11 +4426,13 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
- .emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v9_0_ring_test_ring,
.test_ib = gfx_v9_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
+ .set_priority = gfx_v9_0_ring_set_priority_compute,
+ .emit_wreg = gfx_v9_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
@@ -4275,9 +4447,11 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
.emit_frame_size =
20 + /* gfx_v9_0_ring_emit_gds_switch */
7 + /* gfx_v9_0_ring_emit_hdp_flush */
- 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */
+ 5 + /* hdp invalidate */
7 + /* gfx_v9_0_ring_emit_pipeline_sync */
- 24 + /* gfx_v9_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+ 2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
.emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */
.emit_ib = gfx_v9_0_ring_emit_ib_compute,
@@ -4288,6 +4462,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_rreg = gfx_v9_0_ring_emit_rreg,
.emit_wreg = gfx_v9_0_ring_emit_wreg,
+ .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
};
static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -4342,6 +4517,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
{
switch (adev->asic_type) {
case CHIP_VEGA10:
+ case CHIP_VEGA12:
case CHIP_RAVEN:
adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 56f5fe4e2fee..acfbd2d749cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -40,7 +40,7 @@ static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
uint64_t value;
BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
- value = adev->gart.table_addr - adev->mc.vram_start
+ value = adev->gart.table_addr - adev->gmc.vram_start
+ adev->vm_manager.vram_base_offset;
value &= 0x0000FFFFFFFFF000ULL;
value |= 0x1; /*valid bit*/
@@ -57,14 +57,14 @@ static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
gfxhub_v1_0_init_gart_pt_regs(adev);
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
- (u32)(adev->mc.gart_start >> 12));
+ (u32)(adev->gmc.gart_start >> 12));
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
- (u32)(adev->mc.gart_start >> 44));
+ (u32)(adev->gmc.gart_start >> 44));
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
- (u32)(adev->mc.gart_end >> 12));
+ (u32)(adev->gmc.gart_end >> 12));
WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
- (u32)(adev->mc.gart_end >> 44));
+ (u32)(adev->gmc.gart_end >> 44));
}
static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
@@ -78,12 +78,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
/* Program the system aperture low logical page number. */
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
- adev->mc.vram_start >> 18);
+ adev->gmc.vram_start >> 18);
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
- adev->mc.vram_end >> 18);
+ adev->gmc.vram_end >> 18);
/* Set default page address. */
- value = adev->vram_scratch.gpu_addr - adev->mc.vram_start
+ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start
+ adev->vm_manager.vram_base_offset;
WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
@@ -92,9 +92,9 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
/* Program "protection fault". */
WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
- (u32)(adev->dummy_page.addr >> 12));
+ (u32)(adev->dummy_page_addr >> 12));
WREG32_SOC15(GC, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
- (u32)((u64)adev->dummy_page.addr >> 44));
+ (u32)((u64)adev->dummy_page_addr >> 44));
WREG32_FIELD15(GC, 0, VM_L2_PROTECTION_FAULT_CNTL2,
ACTIVE_PAGE_MIGRATION_PTE_READ_RETRY, 1);
@@ -143,7 +143,7 @@ static void gfxhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
WREG32_SOC15(GC, 0, mmVM_L2_CNTL2, tmp);
tmp = mmVM_L2_CNTL3_DEFAULT;
- if (adev->mc.translate_further) {
+ if (adev->gmc.translate_further) {
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
@@ -195,7 +195,7 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
num_level = adev->vm_manager.num_level;
block_size = adev->vm_manager.block_size;
- if (adev->mc.translate_further)
+ if (adev->gmc.translate_further)
num_level -= 1;
else
block_size -= 9;
@@ -257,9 +257,9 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
* SRIOV driver need to program them
*/
WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_BASE,
- adev->mc.vram_start >> 24);
+ adev->gmc.vram_start >> 24);
WREG32_SOC15(GC, 0, mmMC_VM_FB_LOCATION_TOP,
- adev->mc.vram_end >> 24);
+ adev->gmc.vram_end >> 24);
}
/* GART Enable. */
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index 8e28270d1ea9..5617cf62c566 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -22,6 +22,7 @@
*/
#include <linux/firmware.h>
#include <drm/drmP.h>
+#include <drm/drm_cache.h>
#include "amdgpu.h"
#include "gmc_v6_0.h"
#include "amdgpu_ucode.h"
@@ -36,7 +37,7 @@
#include "dce/dce_6_0_sh_mask.h"
#include "si_enums.h"
-static void gmc_v6_0_set_gart_funcs(struct amdgpu_device *adev);
+static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static int gmc_v6_0_wait_for_idle(void *handle);
@@ -136,19 +137,19 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev)
snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
else
snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
- err = request_firmware(&adev->mc.fw, fw_name, adev->dev);
+ err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->mc.fw);
+ err = amdgpu_ucode_validate(adev->gmc.fw);
out:
if (err) {
dev_err(adev->dev,
"si_mc: Failed to load firmware \"%s\"\n",
fw_name);
- release_firmware(adev->mc.fw);
- adev->mc.fw = NULL;
+ release_firmware(adev->gmc.fw);
+ adev->gmc.fw = NULL;
}
return err;
}
@@ -161,20 +162,20 @@ static int gmc_v6_0_mc_load_microcode(struct amdgpu_device *adev)
int i, regs_size, ucode_size;
const struct mc_firmware_header_v1_0 *hdr;
- if (!adev->mc.fw)
+ if (!adev->gmc.fw)
return -EINVAL;
- hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data;
+ hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data;
amdgpu_ucode_print_mc_hdr(&hdr->header);
- adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version);
+ adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version);
regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
new_io_mc_regs = (const __le32 *)
- (adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
+ (adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
new_fw_data = (const __le32 *)
- (adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ (adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
running = RREG32(mmMC_SEQ_SUP_CNTL) & MC_SEQ_SUP_CNTL__RUN_MASK;
@@ -217,12 +218,12 @@ static int gmc_v6_0_mc_load_microcode(struct amdgpu_device *adev)
}
static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev,
- struct amdgpu_mc *mc)
+ struct amdgpu_gmc *mc)
{
u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
base <<= 24;
- amdgpu_device_vram_location(adev, &adev->mc, base);
+ amdgpu_device_vram_location(adev, &adev->gmc, base);
amdgpu_device_gart_location(adev, mc);
}
@@ -259,9 +260,9 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
}
/* Update configuration */
WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
- adev->mc.vram_start >> 12);
+ adev->gmc.vram_start >> 12);
WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
- adev->mc.vram_end >> 12);
+ adev->gmc.vram_end >> 12);
WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
adev->vram_scratch.gpu_addr >> 12);
WREG32(mmMC_VM_AGP_BASE, 0);
@@ -319,56 +320,69 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
numchan = 16;
break;
}
- adev->mc.vram_width = numchan * chansize;
+ adev->gmc.vram_width = numchan * chansize;
/* size in MB on si */
- adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
- adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+ adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
if (!(adev->flags & AMD_IS_APU)) {
r = amdgpu_device_resize_fb_bar(adev);
if (r)
return r;
}
- adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
- adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
- adev->mc.visible_vram_size = adev->mc.aper_size;
+ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+ adev->gmc.visible_vram_size = adev->gmc.aper_size;
/* set the gart size */
if (amdgpu_gart_size == -1) {
switch (adev->asic_type) {
case CHIP_HAINAN: /* no MM engines */
default:
- adev->mc.gart_size = 256ULL << 20;
+ adev->gmc.gart_size = 256ULL << 20;
break;
case CHIP_VERDE: /* UVD, VCE do not support GPUVM */
case CHIP_TAHITI: /* UVD, VCE do not support GPUVM */
case CHIP_PITCAIRN: /* UVD, VCE do not support GPUVM */
case CHIP_OLAND: /* UVD, VCE do not support GPUVM */
- adev->mc.gart_size = 1024ULL << 20;
+ adev->gmc.gart_size = 1024ULL << 20;
break;
}
} else {
- adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
+ adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
}
- gmc_v6_0_vram_gtt_location(adev, &adev->mc);
+ gmc_v6_0_vram_gtt_location(adev, &adev->gmc);
return 0;
}
-static void gmc_v6_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
- uint32_t vmid)
+static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
{
- WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
-
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
}
-static int gmc_v6_0_gart_set_pte_pde(struct amdgpu_device *adev,
- void *cpu_pt_addr,
- uint32_t gpu_page_idx,
- uint64_t addr,
- uint64_t flags)
+static uint64_t gmc_v6_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ uint32_t reg;
+
+ /* write new base address */
+ if (vmid < 8)
+ reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid;
+ else
+ reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8);
+ amdgpu_ring_emit_wreg(ring, reg, pd_addr >> 12);
+
+ /* bits 0-15 are the VM contexts0-15 */
+ amdgpu_ring_emit_wreg(ring, mmVM_INVALIDATE_REQUEST, 1 << vmid);
+
+ return pd_addr;
+}
+
+static int gmc_v6_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
+ uint32_t gpu_page_idx, uint64_t addr,
+ uint64_t flags)
{
void __iomem *ptr = (void *)cpu_pt_addr;
uint64_t value;
@@ -432,9 +446,9 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
{
u32 tmp;
- if (enable && !adev->mc.prt_warning) {
+ if (enable && !adev->gmc.prt_warning) {
dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
- adev->mc.prt_warning = true;
+ adev->gmc.prt_warning = true;
}
tmp = RREG32(mmVM_PRT_CNTL);
@@ -454,7 +468,8 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable)
if (enable) {
uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
- uint32_t high = adev->vm_manager.max_pfn;
+ uint32_t high = adev->vm_manager.max_pfn -
+ (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -514,11 +529,11 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
(field << VM_L2_CNTL3__BANK_SELECT__SHIFT) |
(field << VM_L2_CNTL3__L2_CACHE_BIGK_FRAGMENT_SIZE__SHIFT));
/* setup context0 */
- WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12);
- WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
- (u32)(adev->dummy_page.addr >> 12));
+ (u32)(adev->dummy_page_addr >> 12));
WREG32(mmVM_CONTEXT0_CNTL2, 0);
WREG32(mmVM_CONTEXT0_CNTL,
VM_CONTEXT0_CNTL__ENABLE_CONTEXT_MASK |
@@ -548,7 +563,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
/* enable context1-15 */
WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
- (u32)(adev->dummy_page.addr >> 12));
+ (u32)(adev->dummy_page_addr >> 12));
WREG32(mmVM_CONTEXT1_CNTL2, 4);
WREG32(mmVM_CONTEXT1_CNTL,
VM_CONTEXT1_CNTL__ENABLE_CONTEXT_MASK |
@@ -560,9 +575,9 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
else
gmc_v6_0_set_fault_enable_default(adev, true);
- gmc_v6_0_gart_flush_gpu_tlb(adev, 0);
+ gmc_v6_0_flush_gpu_tlb(adev, 0);
dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
- (unsigned)(adev->mc.gart_size >> 20),
+ (unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)adev->gart.table_addr);
adev->gart.ready = true;
return 0;
@@ -794,7 +809,7 @@ static int gmc_v6_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- gmc_v6_0_set_gart_funcs(adev);
+ gmc_v6_0_set_gmc_funcs(adev);
gmc_v6_0_set_irq_funcs(adev);
return 0;
@@ -805,7 +820,7 @@ static int gmc_v6_0_late_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
- return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
+ return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
else
return 0;
}
@@ -817,26 +832,26 @@ static int gmc_v6_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->flags & AMD_IS_APU) {
- adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
} else {
u32 tmp = RREG32(mmMC_SEQ_MISC0);
tmp &= MC_SEQ_MISC0__MT__MASK;
- adev->mc.vram_type = gmc_v6_0_convert_vram_type(tmp);
+ adev->gmc.vram_type = gmc_v6_0_convert_vram_type(tmp);
}
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault);
if (r)
return r;
amdgpu_vm_adjust_size(adev, 64, 9, 1, 40);
- adev->mc.mc_mask = 0xffffffffffULL;
+ adev->gmc.mc_mask = 0xffffffffffULL;
- adev->mc.stolen_size = 256 * 1024;
+ adev->gmc.stolen_size = 256 * 1024;
adev->need_dma32 = false;
dma_bits = adev->need_dma32 ? 32 : 40;
@@ -851,6 +866,7 @@ static int gmc_v6_0_sw_init(void *handle)
pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
dev_warn(adev->dev, "amdgpu: No coherent DMA available.\n");
}
+ adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits);
r = gmc_v6_0_init_microcode(adev);
if (r) {
@@ -900,8 +916,8 @@ static int gmc_v6_0_sw_fini(void *handle)
amdgpu_vm_manager_fini(adev);
gmc_v6_0_gart_fini(adev);
amdgpu_bo_fini(adev);
- release_firmware(adev->mc.fw);
- adev->mc.fw = NULL;
+ release_firmware(adev->gmc.fw);
+ adev->gmc.fw = NULL;
return 0;
}
@@ -932,7 +948,7 @@ static int gmc_v6_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- amdgpu_irq_put(adev, &adev->mc.vm_fault, 0);
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v6_0_gart_disable(adev);
return 0;
@@ -1127,9 +1143,10 @@ static const struct amd_ip_funcs gmc_v6_0_ip_funcs = {
.set_powergating_state = gmc_v6_0_set_powergating_state,
};
-static const struct amdgpu_gart_funcs gmc_v6_0_gart_funcs = {
- .flush_gpu_tlb = gmc_v6_0_gart_flush_gpu_tlb,
- .set_pte_pde = gmc_v6_0_gart_set_pte_pde,
+static const struct amdgpu_gmc_funcs gmc_v6_0_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v6_0_flush_gpu_tlb,
+ .emit_flush_gpu_tlb = gmc_v6_0_emit_flush_gpu_tlb,
+ .set_pte_pde = gmc_v6_0_set_pte_pde,
.set_prt = gmc_v6_0_set_prt,
.get_vm_pde = gmc_v6_0_get_vm_pde,
.get_vm_pte_flags = gmc_v6_0_get_vm_pte_flags
@@ -1140,16 +1157,16 @@ static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = {
.process = gmc_v6_0_process_interrupt,
};
-static void gmc_v6_0_set_gart_funcs(struct amdgpu_device *adev)
+static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev)
{
- if (adev->gart.gart_funcs == NULL)
- adev->gart.gart_funcs = &gmc_v6_0_gart_funcs;
+ if (adev->gmc.gmc_funcs == NULL)
+ adev->gmc.gmc_funcs = &gmc_v6_0_gmc_funcs;
}
static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->mc.vm_fault.num_types = 1;
- adev->mc.vm_fault.funcs = &gmc_v6_0_irq_funcs;
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v6_0_irq_funcs;
}
const struct amdgpu_ip_block_version gmc_v6_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 86e9d682c59e..80054f36e487 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -22,6 +22,7 @@
*/
#include <linux/firmware.h>
#include <drm/drmP.h>
+#include <drm/drm_cache.h>
#include "amdgpu.h"
#include "cikd.h"
#include "cik.h"
@@ -42,7 +43,7 @@
#include "amdgpu_atombios.h"
-static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev);
+static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
static int gmc_v7_0_wait_for_idle(void *handle);
@@ -151,16 +152,16 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev)
else
snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
- err = request_firmware(&adev->mc.fw, fw_name, adev->dev);
+ err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->mc.fw);
+ err = amdgpu_ucode_validate(adev->gmc.fw);
out:
if (err) {
pr_err("cik_mc: Failed to load firmware \"%s\"\n", fw_name);
- release_firmware(adev->mc.fw);
- adev->mc.fw = NULL;
+ release_firmware(adev->gmc.fw);
+ adev->gmc.fw = NULL;
}
return err;
}
@@ -181,19 +182,19 @@ static int gmc_v7_0_mc_load_microcode(struct amdgpu_device *adev)
u32 running;
int i, ucode_size, regs_size;
- if (!adev->mc.fw)
+ if (!adev->gmc.fw)
return -EINVAL;
- hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data;
+ hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data;
amdgpu_ucode_print_mc_hdr(&hdr->header);
- adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version);
+ adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version);
regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
io_mc_regs = (const __le32 *)
- (adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
+ (adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
fw_data = (const __le32 *)
- (adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ (adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
running = REG_GET_FIELD(RREG32(mmMC_SEQ_SUP_CNTL), MC_SEQ_SUP_CNTL, RUN);
@@ -235,12 +236,12 @@ static int gmc_v7_0_mc_load_microcode(struct amdgpu_device *adev)
}
static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev,
- struct amdgpu_mc *mc)
+ struct amdgpu_gmc *mc)
{
u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
base <<= 24;
- amdgpu_device_vram_location(adev, &adev->mc, base);
+ amdgpu_device_vram_location(adev, &adev->gmc, base);
amdgpu_device_gart_location(adev, mc);
}
@@ -283,9 +284,9 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
}
/* Update configuration */
WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
- adev->mc.vram_start >> 12);
+ adev->gmc.vram_start >> 12);
WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
- adev->mc.vram_end >> 12);
+ adev->gmc.vram_end >> 12);
WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
adev->vram_scratch.gpu_addr >> 12);
WREG32(mmMC_VM_AGP_BASE, 0);
@@ -318,8 +319,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
{
int r;
- adev->mc.vram_width = amdgpu_atombios_get_vram_width(adev);
- if (!adev->mc.vram_width) {
+ adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev);
+ if (!adev->gmc.vram_width) {
u32 tmp;
int chansize, numchan;
@@ -361,38 +362,38 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
numchan = 16;
break;
}
- adev->mc.vram_width = numchan * chansize;
+ adev->gmc.vram_width = numchan * chansize;
}
/* size in MB on si */
- adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
- adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+ adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
if (!(adev->flags & AMD_IS_APU)) {
r = amdgpu_device_resize_fb_bar(adev);
if (r)
return r;
}
- adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
- adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
+ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
#ifdef CONFIG_X86_64
if (adev->flags & AMD_IS_APU) {
- adev->mc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
- adev->mc.aper_size = adev->mc.real_vram_size;
+ adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
}
#endif
/* In case the PCI BAR is larger than the actual amount of vram */
- adev->mc.visible_vram_size = adev->mc.aper_size;
- if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
- adev->mc.visible_vram_size = adev->mc.real_vram_size;
+ adev->gmc.visible_vram_size = adev->gmc.aper_size;
+ if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
+ adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
/* set the gart size */
if (amdgpu_gart_size == -1) {
switch (adev->asic_type) {
case CHIP_TOPAZ: /* no MM engines */
default:
- adev->mc.gart_size = 256ULL << 20;
+ adev->gmc.gart_size = 256ULL << 20;
break;
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_BONAIRE: /* UVD, VCE do not support GPUVM */
@@ -400,15 +401,15 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
case CHIP_KAVERI: /* UVD, VCE do not support GPUVM */
case CHIP_KABINI: /* UVD, VCE do not support GPUVM */
case CHIP_MULLINS: /* UVD, VCE do not support GPUVM */
- adev->mc.gart_size = 1024ULL << 20;
+ adev->gmc.gart_size = 1024ULL << 20;
break;
#endif
}
} else {
- adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
+ adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
}
- gmc_v7_0_vram_gtt_location(adev, &adev->mc);
+ gmc_v7_0_vram_gtt_location(adev, &adev->gmc);
return 0;
}
@@ -421,25 +422,44 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
*/
/**
- * gmc_v7_0_gart_flush_gpu_tlb - gart tlb flush callback
+ * gmc_v7_0_flush_gpu_tlb - gart tlb flush callback
*
* @adev: amdgpu_device pointer
* @vmid: vm instance to flush
*
* Flush the TLB for the requested page table (CIK).
*/
-static void gmc_v7_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
- uint32_t vmid)
+static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid)
{
- /* flush hdp cache */
- WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
-
/* bits 0-15 are the VM contexts0-15 */
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
}
+static uint64_t gmc_v7_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ uint32_t reg;
+
+ if (vmid < 8)
+ reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid;
+ else
+ reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8;
+ amdgpu_ring_emit_wreg(ring, reg, pd_addr >> 12);
+
+ /* bits 0-15 are the VM contexts0-15 */
+ amdgpu_ring_emit_wreg(ring, mmVM_INVALIDATE_REQUEST, 1 << vmid);
+
+ return pd_addr;
+}
+
+static void gmc_v7_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
+ unsigned pasid)
+{
+ amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid);
+}
+
/**
- * gmc_v7_0_gart_set_pte_pde - update the page tables using MMIO
+ * gmc_v7_0_set_pte_pde - update the page tables using MMIO
*
* @adev: amdgpu_device pointer
* @cpu_pt_addr: cpu address of the page table
@@ -449,11 +469,9 @@ static void gmc_v7_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
*
* Update the page tables using the CPU.
*/
-static int gmc_v7_0_gart_set_pte_pde(struct amdgpu_device *adev,
- void *cpu_pt_addr,
- uint32_t gpu_page_idx,
- uint64_t addr,
- uint64_t flags)
+static int gmc_v7_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
+ uint32_t gpu_page_idx, uint64_t addr,
+ uint64_t flags)
{
void __iomem *ptr = (void *)cpu_pt_addr;
uint64_t value;
@@ -523,9 +541,9 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
{
uint32_t tmp;
- if (enable && !adev->mc.prt_warning) {
+ if (enable && !adev->gmc.prt_warning) {
dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
- adev->mc.prt_warning = true;
+ adev->gmc.prt_warning = true;
}
tmp = RREG32(mmVM_PRT_CNTL);
@@ -547,7 +565,8 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable)
if (enable) {
uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
- uint32_t high = adev->vm_manager.max_pfn;
+ uint32_t high = adev->vm_manager.max_pfn -
+ (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -621,11 +640,11 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, L2_CACHE_BIGK_FRAGMENT_SIZE, field);
WREG32(mmVM_L2_CNTL3, tmp);
/* setup context0 */
- WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12);
- WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
- (u32)(adev->dummy_page.addr >> 12));
+ (u32)(adev->dummy_page_addr >> 12));
WREG32(mmVM_CONTEXT0_CNTL2, 0);
tmp = RREG32(mmVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
@@ -655,7 +674,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
/* enable context1-15 */
WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
- (u32)(adev->dummy_page.addr >> 12));
+ (u32)(adev->dummy_page_addr >> 12));
WREG32(mmVM_CONTEXT1_CNTL2, 4);
tmp = RREG32(mmVM_CONTEXT1_CNTL);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
@@ -674,9 +693,9 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
WREG32(mmCHUB_CONTROL, tmp);
}
- gmc_v7_0_gart_flush_gpu_tlb(adev, 0);
+ gmc_v7_0_flush_gpu_tlb(adev, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
- (unsigned)(adev->mc.gart_size >> 20),
+ (unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)adev->gart.table_addr);
adev->gart.ready = true;
return 0;
@@ -749,21 +768,21 @@ static void gmc_v7_0_gart_fini(struct amdgpu_device *adev)
*
* Print human readable fault information (CIK).
*/
-static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev,
- u32 status, u32 addr, u32 mc_client)
+static void gmc_v7_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
+ u32 addr, u32 mc_client, unsigned pasid)
{
- u32 mc_id;
u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
PROTECTIONS);
char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
+ u32 mc_id;
mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_ID);
- dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
- protections, vmid, addr,
+ dev_err(adev->dev, "VM fault (0x%02x, vmid %d, pasid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
+ protections, vmid, pasid, addr,
REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_RW) ?
"write" : "read", block, mc_client, mc_id);
@@ -921,16 +940,16 @@ static int gmc_v7_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- gmc_v7_0_set_gart_funcs(adev);
+ gmc_v7_0_set_gmc_funcs(adev);
gmc_v7_0_set_irq_funcs(adev);
- adev->mc.shared_aperture_start = 0x2000000000000000ULL;
- adev->mc.shared_aperture_end =
- adev->mc.shared_aperture_start + (4ULL << 30) - 1;
- adev->mc.private_aperture_start =
- adev->mc.shared_aperture_end + 1;
- adev->mc.private_aperture_end =
- adev->mc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+ adev->gmc.shared_aperture_end =
+ adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.private_aperture_start =
+ adev->gmc.shared_aperture_end + 1;
+ adev->gmc.private_aperture_end =
+ adev->gmc.private_aperture_start + (4ULL << 30) - 1;
return 0;
}
@@ -940,7 +959,7 @@ static int gmc_v7_0_late_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
- return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
+ return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
else
return 0;
}
@@ -952,18 +971,18 @@ static int gmc_v7_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->flags & AMD_IS_APU) {
- adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
} else {
u32 tmp = RREG32(mmMC_SEQ_MISC0);
tmp &= MC_SEQ_MISC0__MT__MASK;
- adev->mc.vram_type = gmc_v7_0_convert_vram_type(tmp);
+ adev->gmc.vram_type = gmc_v7_0_convert_vram_type(tmp);
}
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault);
if (r)
return r;
@@ -977,9 +996,9 @@ static int gmc_v7_0_sw_init(void *handle)
* This is the max address of the GPU's
* internal address space.
*/
- adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
+ adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
- adev->mc.stolen_size = 256 * 1024;
+ adev->gmc.stolen_size = 256 * 1024;
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 40-bits.
@@ -999,6 +1018,7 @@ static int gmc_v7_0_sw_init(void *handle)
pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
pr_warn("amdgpu: No coherent DMA available\n");
}
+ adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits);
r = gmc_v7_0_init_microcode(adev);
if (r) {
@@ -1049,8 +1069,8 @@ static int gmc_v7_0_sw_fini(void *handle)
amdgpu_vm_manager_fini(adev);
gmc_v7_0_gart_fini(adev);
amdgpu_bo_fini(adev);
- release_firmware(adev->mc.fw);
- adev->mc.fw = NULL;
+ release_firmware(adev->gmc.fw);
+ adev->gmc.fw = NULL;
return 0;
}
@@ -1083,7 +1103,7 @@ static int gmc_v7_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- amdgpu_irq_put(adev, &adev->mc.vm_fault, 0);
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v7_0_gart_disable(adev);
return 0;
@@ -1257,7 +1277,8 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
status);
- gmc_v7_0_vm_decode_fault(adev, status, addr, mc_client);
+ gmc_v7_0_vm_decode_fault(adev, status, addr, mc_client,
+ entry->pasid);
}
return 0;
@@ -1306,9 +1327,11 @@ static const struct amd_ip_funcs gmc_v7_0_ip_funcs = {
.set_powergating_state = gmc_v7_0_set_powergating_state,
};
-static const struct amdgpu_gart_funcs gmc_v7_0_gart_funcs = {
- .flush_gpu_tlb = gmc_v7_0_gart_flush_gpu_tlb,
- .set_pte_pde = gmc_v7_0_gart_set_pte_pde,
+static const struct amdgpu_gmc_funcs gmc_v7_0_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v7_0_flush_gpu_tlb,
+ .emit_flush_gpu_tlb = gmc_v7_0_emit_flush_gpu_tlb,
+ .emit_pasid_mapping = gmc_v7_0_emit_pasid_mapping,
+ .set_pte_pde = gmc_v7_0_set_pte_pde,
.set_prt = gmc_v7_0_set_prt,
.get_vm_pte_flags = gmc_v7_0_get_vm_pte_flags,
.get_vm_pde = gmc_v7_0_get_vm_pde
@@ -1319,16 +1342,16 @@ static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = {
.process = gmc_v7_0_process_interrupt,
};
-static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev)
+static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev)
{
- if (adev->gart.gart_funcs == NULL)
- adev->gart.gart_funcs = &gmc_v7_0_gart_funcs;
+ if (adev->gmc.gmc_funcs == NULL)
+ adev->gmc.gmc_funcs = &gmc_v7_0_gmc_funcs;
}
static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->mc.vm_fault.num_types = 1;
- adev->mc.vm_fault.funcs = &gmc_v7_0_irq_funcs;
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v7_0_irq_funcs;
}
const struct amdgpu_ip_block_version gmc_v7_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 9a813d834f1a..d71d4cb68f9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -22,6 +22,7 @@
*/
#include <linux/firmware.h>
#include <drm/drmP.h>
+#include <drm/drm_cache.h>
#include "amdgpu.h"
#include "gmc_v8_0.h"
#include "amdgpu_ucode.h"
@@ -44,7 +45,7 @@
#include "amdgpu_atombios.h"
-static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev);
+static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev);
static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
static int gmc_v8_0_wait_for_idle(void *handle);
@@ -235,16 +236,16 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
}
snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mc.bin", chip_name);
- err = request_firmware(&adev->mc.fw, fw_name, adev->dev);
+ err = request_firmware(&adev->gmc.fw, fw_name, adev->dev);
if (err)
goto out;
- err = amdgpu_ucode_validate(adev->mc.fw);
+ err = amdgpu_ucode_validate(adev->gmc.fw);
out:
if (err) {
pr_err("mc: Failed to load firmware \"%s\"\n", fw_name);
- release_firmware(adev->mc.fw);
- adev->mc.fw = NULL;
+ release_firmware(adev->gmc.fw);
+ adev->gmc.fw = NULL;
}
return err;
}
@@ -273,19 +274,19 @@ static int gmc_v8_0_tonga_mc_load_microcode(struct amdgpu_device *adev)
if (amdgpu_sriov_bios(adev))
return 0;
- if (!adev->mc.fw)
+ if (!adev->gmc.fw)
return -EINVAL;
- hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data;
+ hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data;
amdgpu_ucode_print_mc_hdr(&hdr->header);
- adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version);
+ adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version);
regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
io_mc_regs = (const __le32 *)
- (adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
+ (adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
fw_data = (const __le32 *)
- (adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ (adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
running = REG_GET_FIELD(RREG32(mmMC_SEQ_SUP_CNTL), MC_SEQ_SUP_CNTL, RUN);
@@ -349,19 +350,19 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev)
if (vbios_version == 0)
return 0;
- if (!adev->mc.fw)
+ if (!adev->gmc.fw)
return -EINVAL;
- hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data;
+ hdr = (const struct mc_firmware_header_v1_0 *)adev->gmc.fw->data;
amdgpu_ucode_print_mc_hdr(&hdr->header);
- adev->mc.fw_version = le32_to_cpu(hdr->header.ucode_version);
+ adev->gmc.fw_version = le32_to_cpu(hdr->header.ucode_version);
regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
io_mc_regs = (const __le32 *)
- (adev->mc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
+ (adev->gmc.fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
fw_data = (const __le32 *)
- (adev->mc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+ (adev->gmc.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
data = RREG32(mmMC_SEQ_MISC0);
data &= ~(0x40);
@@ -397,7 +398,7 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev)
}
static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
- struct amdgpu_mc *mc)
+ struct amdgpu_gmc *mc)
{
u64 base = 0;
@@ -405,7 +406,7 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev,
base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF;
base <<= 24;
- amdgpu_device_vram_location(adev, &adev->mc, base);
+ amdgpu_device_vram_location(adev, &adev->gmc, base);
amdgpu_device_gart_location(adev, mc);
}
@@ -448,18 +449,18 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
}
/* Update configuration */
WREG32(mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
- adev->mc.vram_start >> 12);
+ adev->gmc.vram_start >> 12);
WREG32(mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
- adev->mc.vram_end >> 12);
+ adev->gmc.vram_end >> 12);
WREG32(mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
adev->vram_scratch.gpu_addr >> 12);
if (amdgpu_sriov_vf(adev)) {
- tmp = ((adev->mc.vram_end >> 24) & 0xFFFF) << 16;
- tmp |= ((adev->mc.vram_start >> 24) & 0xFFFF);
+ tmp = ((adev->gmc.vram_end >> 24) & 0xFFFF) << 16;
+ tmp |= ((adev->gmc.vram_start >> 24) & 0xFFFF);
WREG32(mmMC_VM_FB_LOCATION, tmp);
/* XXX double check these! */
- WREG32(mmHDP_NONSURFACE_BASE, (adev->mc.vram_start >> 8));
+ WREG32(mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8));
WREG32(mmHDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
WREG32(mmHDP_NONSURFACE_SIZE, 0x3FFFFFFF);
}
@@ -494,8 +495,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
{
int r;
- adev->mc.vram_width = amdgpu_atombios_get_vram_width(adev);
- if (!adev->mc.vram_width) {
+ adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev);
+ if (!adev->gmc.vram_width) {
u32 tmp;
int chansize, numchan;
@@ -537,31 +538,31 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
numchan = 16;
break;
}
- adev->mc.vram_width = numchan * chansize;
+ adev->gmc.vram_width = numchan * chansize;
}
/* size in MB on si */
- adev->mc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
- adev->mc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+ adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
+ adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL;
if (!(adev->flags & AMD_IS_APU)) {
r = amdgpu_device_resize_fb_bar(adev);
if (r)
return r;
}
- adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
- adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
+ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
#ifdef CONFIG_X86_64
if (adev->flags & AMD_IS_APU) {
- adev->mc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
- adev->mc.aper_size = adev->mc.real_vram_size;
+ adev->gmc.aper_base = ((u64)RREG32(mmMC_VM_FB_OFFSET)) << 22;
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
}
#endif
/* In case the PCI BAR is larger than the actual amount of vram */
- adev->mc.visible_vram_size = adev->mc.aper_size;
- if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
- adev->mc.visible_vram_size = adev->mc.real_vram_size;
+ adev->gmc.visible_vram_size = adev->gmc.aper_size;
+ if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
+ adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
/* set the gart size */
if (amdgpu_gart_size == -1) {
@@ -570,20 +571,20 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
case CHIP_POLARIS10: /* all engines support GPUVM */
case CHIP_POLARIS12: /* all engines support GPUVM */
default:
- adev->mc.gart_size = 256ULL << 20;
+ adev->gmc.gart_size = 256ULL << 20;
break;
case CHIP_TONGA: /* UVD, VCE do not support GPUVM */
case CHIP_FIJI: /* UVD, VCE do not support GPUVM */
case CHIP_CARRIZO: /* UVD, VCE do not support GPUVM, DCE SG support */
case CHIP_STONEY: /* UVD does not support GPUVM, DCE SG support */
- adev->mc.gart_size = 1024ULL << 20;
+ adev->gmc.gart_size = 1024ULL << 20;
break;
}
} else {
- adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
+ adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
}
- gmc_v8_0_vram_gtt_location(adev, &adev->mc);
+ gmc_v8_0_vram_gtt_location(adev, &adev->gmc);
return 0;
}
@@ -596,25 +597,45 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
*/
/**
- * gmc_v8_0_gart_flush_gpu_tlb - gart tlb flush callback
+ * gmc_v8_0_flush_gpu_tlb - gart tlb flush callback
*
* @adev: amdgpu_device pointer
* @vmid: vm instance to flush
*
* Flush the TLB for the requested page table (CIK).
*/
-static void gmc_v8_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
+static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
uint32_t vmid)
{
- /* flush hdp cache */
- WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
-
/* bits 0-15 are the VM contexts0-15 */
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
}
+static uint64_t gmc_v8_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ uint32_t reg;
+
+ if (vmid < 8)
+ reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid;
+ else
+ reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8;
+ amdgpu_ring_emit_wreg(ring, reg, pd_addr >> 12);
+
+ /* bits 0-15 are the VM contexts0-15 */
+ amdgpu_ring_emit_wreg(ring, mmVM_INVALIDATE_REQUEST, 1 << vmid);
+
+ return pd_addr;
+}
+
+static void gmc_v8_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
+ unsigned pasid)
+{
+ amdgpu_ring_emit_wreg(ring, mmIH_VMID_0_LUT + vmid, pasid);
+}
+
/**
- * gmc_v8_0_gart_set_pte_pde - update the page tables using MMIO
+ * gmc_v8_0_set_pte_pde - update the page tables using MMIO
*
* @adev: amdgpu_device pointer
* @cpu_pt_addr: cpu address of the page table
@@ -624,11 +645,9 @@ static void gmc_v8_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
*
* Update the page tables using the CPU.
*/
-static int gmc_v8_0_gart_set_pte_pde(struct amdgpu_device *adev,
- void *cpu_pt_addr,
- uint32_t gpu_page_idx,
- uint64_t addr,
- uint64_t flags)
+static int gmc_v8_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
+ uint32_t gpu_page_idx, uint64_t addr,
+ uint64_t flags)
{
void __iomem *ptr = (void *)cpu_pt_addr;
uint64_t value;
@@ -722,9 +741,9 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
{
u32 tmp;
- if (enable && !adev->mc.prt_warning) {
+ if (enable && !adev->gmc.prt_warning) {
dev_warn(adev->dev, "Disabling VM faults because of PRT request!\n");
- adev->mc.prt_warning = true;
+ adev->gmc.prt_warning = true;
}
tmp = RREG32(mmVM_PRT_CNTL);
@@ -746,7 +765,8 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable)
if (enable) {
uint32_t low = AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT;
- uint32_t high = adev->vm_manager.max_pfn;
+ uint32_t high = adev->vm_manager.max_pfn -
+ (AMDGPU_VA_RESERVED_SIZE >> AMDGPU_GPU_PAGE_SHIFT);
WREG32(mmVM_PRT_APERTURE0_LOW_ADDR, low);
WREG32(mmVM_PRT_APERTURE1_LOW_ADDR, low);
@@ -836,11 +856,11 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL4, VMC_TAP_CONTEXT1_PTE_REQUEST_SNOOP, 0);
WREG32(mmVM_L2_CNTL4, tmp);
/* setup context0 */
- WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->mc.gart_start >> 12);
- WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->mc.gart_end >> 12);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12);
+ WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12);
WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12);
WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
- (u32)(adev->dummy_page.addr >> 12));
+ (u32)(adev->dummy_page_addr >> 12));
WREG32(mmVM_CONTEXT0_CNTL2, 0);
tmp = RREG32(mmVM_CONTEXT0_CNTL);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1);
@@ -870,7 +890,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
/* enable context1-15 */
WREG32(mmVM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
- (u32)(adev->dummy_page.addr >> 12));
+ (u32)(adev->dummy_page_addr >> 12));
WREG32(mmVM_CONTEXT1_CNTL2, 4);
tmp = RREG32(mmVM_CONTEXT1_CNTL);
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
@@ -890,9 +910,9 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
else
gmc_v8_0_set_fault_enable_default(adev, true);
- gmc_v8_0_gart_flush_gpu_tlb(adev, 0);
+ gmc_v8_0_flush_gpu_tlb(adev, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
- (unsigned)(adev->mc.gart_size >> 20),
+ (unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)adev->gart.table_addr);
adev->gart.ready = true;
return 0;
@@ -965,21 +985,21 @@ static void gmc_v8_0_gart_fini(struct amdgpu_device *adev)
*
* Print human readable fault information (CIK).
*/
-static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev,
- u32 status, u32 addr, u32 mc_client)
+static void gmc_v8_0_vm_decode_fault(struct amdgpu_device *adev, u32 status,
+ u32 addr, u32 mc_client, unsigned pasid)
{
- u32 mc_id;
u32 vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
u32 protections = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
PROTECTIONS);
char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
+ u32 mc_id;
mc_id = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_ID);
- dev_err(adev->dev, "VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
- protections, vmid, addr,
+ dev_err(adev->dev, "VM fault (0x%02x, vmid %d, pasid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
+ protections, vmid, pasid, addr,
REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
MEMORY_CLIENT_RW) ?
"write" : "read", block, mc_client, mc_id);
@@ -1011,16 +1031,16 @@ static int gmc_v8_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- gmc_v8_0_set_gart_funcs(adev);
+ gmc_v8_0_set_gmc_funcs(adev);
gmc_v8_0_set_irq_funcs(adev);
- adev->mc.shared_aperture_start = 0x2000000000000000ULL;
- adev->mc.shared_aperture_end =
- adev->mc.shared_aperture_start + (4ULL << 30) - 1;
- adev->mc.private_aperture_start =
- adev->mc.shared_aperture_end + 1;
- adev->mc.private_aperture_end =
- adev->mc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+ adev->gmc.shared_aperture_end =
+ adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.private_aperture_start =
+ adev->gmc.shared_aperture_end + 1;
+ adev->gmc.private_aperture_end =
+ adev->gmc.private_aperture_start + (4ULL << 30) - 1;
return 0;
}
@@ -1030,7 +1050,7 @@ static int gmc_v8_0_late_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (amdgpu_vm_fault_stop != AMDGPU_VM_FAULT_STOP_ALWAYS)
- return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
+ return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
else
return 0;
}
@@ -1044,7 +1064,7 @@ static int gmc_v8_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->flags & AMD_IS_APU) {
- adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+ adev->gmc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
} else {
u32 tmp;
@@ -1053,14 +1073,14 @@ static int gmc_v8_0_sw_init(void *handle)
else
tmp = RREG32(mmMC_SEQ_MISC0);
tmp &= MC_SEQ_MISC0__MT__MASK;
- adev->mc.vram_type = gmc_v8_0_convert_vram_type(tmp);
+ adev->gmc.vram_type = gmc_v8_0_convert_vram_type(tmp);
}
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->mc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->mc.vm_fault);
+ r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault);
if (r)
return r;
@@ -1074,9 +1094,9 @@ static int gmc_v8_0_sw_init(void *handle)
* This is the max address of the GPU's
* internal address space.
*/
- adev->mc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
+ adev->gmc.mc_mask = 0xffffffffffULL; /* 40 bit MC */
- adev->mc.stolen_size = 256 * 1024;
+ adev->gmc.stolen_size = 256 * 1024;
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 40-bits.
@@ -1096,6 +1116,7 @@ static int gmc_v8_0_sw_init(void *handle)
pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
pr_warn("amdgpu: No coherent DMA available\n");
}
+ adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits);
r = gmc_v8_0_init_microcode(adev);
if (r) {
@@ -1146,8 +1167,8 @@ static int gmc_v8_0_sw_fini(void *handle)
amdgpu_vm_manager_fini(adev);
gmc_v8_0_gart_fini(adev);
amdgpu_bo_fini(adev);
- release_firmware(adev->mc.fw);
- adev->mc.fw = NULL;
+ release_firmware(adev->gmc.fw);
+ adev->gmc.fw = NULL;
return 0;
}
@@ -1188,7 +1209,7 @@ static int gmc_v8_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- amdgpu_irq_put(adev, &adev->mc.vm_fault, 0);
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v8_0_gart_disable(adev);
return 0;
@@ -1268,10 +1289,10 @@ static bool gmc_v8_0_check_soft_reset(void *handle)
SRBM_SOFT_RESET, SOFT_RESET_MC, 1);
}
if (srbm_soft_reset) {
- adev->mc.srbm_soft_reset = srbm_soft_reset;
+ adev->gmc.srbm_soft_reset = srbm_soft_reset;
return true;
} else {
- adev->mc.srbm_soft_reset = 0;
+ adev->gmc.srbm_soft_reset = 0;
return false;
}
}
@@ -1280,7 +1301,7 @@ static int gmc_v8_0_pre_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!adev->mc.srbm_soft_reset)
+ if (!adev->gmc.srbm_soft_reset)
return 0;
gmc_v8_0_mc_stop(adev);
@@ -1296,9 +1317,9 @@ static int gmc_v8_0_soft_reset(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 srbm_soft_reset;
- if (!adev->mc.srbm_soft_reset)
+ if (!adev->gmc.srbm_soft_reset)
return 0;
- srbm_soft_reset = adev->mc.srbm_soft_reset;
+ srbm_soft_reset = adev->gmc.srbm_soft_reset;
if (srbm_soft_reset) {
u32 tmp;
@@ -1326,7 +1347,7 @@ static int gmc_v8_0_post_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- if (!adev->mc.srbm_soft_reset)
+ if (!adev->gmc.srbm_soft_reset)
return 0;
gmc_v8_0_mc_resume(adev);
@@ -1407,7 +1428,8 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
addr);
dev_err(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
status);
- gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client);
+ gmc_v8_0_vm_decode_fault(adev, status, addr, mc_client,
+ entry->pasid);
}
return 0;
@@ -1639,9 +1661,11 @@ static const struct amd_ip_funcs gmc_v8_0_ip_funcs = {
.get_clockgating_state = gmc_v8_0_get_clockgating_state,
};
-static const struct amdgpu_gart_funcs gmc_v8_0_gart_funcs = {
- .flush_gpu_tlb = gmc_v8_0_gart_flush_gpu_tlb,
- .set_pte_pde = gmc_v8_0_gart_set_pte_pde,
+static const struct amdgpu_gmc_funcs gmc_v8_0_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v8_0_flush_gpu_tlb,
+ .emit_flush_gpu_tlb = gmc_v8_0_emit_flush_gpu_tlb,
+ .emit_pasid_mapping = gmc_v8_0_emit_pasid_mapping,
+ .set_pte_pde = gmc_v8_0_set_pte_pde,
.set_prt = gmc_v8_0_set_prt,
.get_vm_pte_flags = gmc_v8_0_get_vm_pte_flags,
.get_vm_pde = gmc_v8_0_get_vm_pde
@@ -1652,16 +1676,16 @@ static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = {
.process = gmc_v8_0_process_interrupt,
};
-static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev)
+static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev)
{
- if (adev->gart.gart_funcs == NULL)
- adev->gart.gart_funcs = &gmc_v8_0_gart_funcs;
+ if (adev->gmc.gmc_funcs == NULL)
+ adev->gmc.gmc_funcs = &gmc_v8_0_gmc_funcs;
}
static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->mc.vm_fault.num_types = 1;
- adev->mc.vm_fault.funcs = &gmc_v8_0_irq_funcs;
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v8_0_irq_funcs;
}
const struct amdgpu_ip_block_version gmc_v8_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 2719937e09d6..e687363900bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -21,6 +21,7 @@
*
*/
#include <linux/firmware.h>
+#include <drm/drm_cache.h>
#include "amdgpu.h"
#include "gmc_v9_0.h"
#include "amdgpu_atomfirmware.h"
@@ -33,6 +34,7 @@
#include "vega10_enum.h"
#include "mmhub/mmhub_1_0_offset.h"
#include "athub/athub_1_0_offset.h"
+#include "oss/osssys_4_0_offset.h"
#include "soc15.h"
#include "soc15_common.h"
@@ -262,10 +264,10 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
if (printk_ratelimit()) {
dev_err(adev->dev,
- "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pas_id:%u)\n",
+ "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u)\n",
entry->vmid_src ? "mmhub" : "gfxhub",
entry->src_id, entry->ring_id, entry->vmid,
- entry->pas_id);
+ entry->pasid);
dev_err(adev->dev, " at page 0x%016llx from %d\n",
addr, entry->client_id);
if (!amdgpu_sriov_vf(adev))
@@ -284,8 +286,8 @@ static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
{
- adev->mc.vm_fault.num_types = 1;
- adev->mc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
+ adev->gmc.vm_fault.num_types = 1;
+ adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
}
static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
@@ -315,24 +317,21 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid)
*/
/**
- * gmc_v9_0_gart_flush_gpu_tlb - gart tlb flush callback
+ * gmc_v9_0_flush_gpu_tlb - gart tlb flush callback
*
* @adev: amdgpu_device pointer
* @vmid: vm instance to flush
*
* Flush the TLB for the requested page table.
*/
-static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
+static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
uint32_t vmid)
{
/* Use register 17 for GART */
const unsigned eng = 17;
unsigned i, j;
- /* flush hdp cache */
- adev->nbio_funcs->hdp_flush(adev);
-
- spin_lock(&adev->mc.invalidate_lock);
+ spin_lock(&adev->gmc.invalidate_lock);
for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
struct amdgpu_vmhub *hub = &adev->vmhub[i];
@@ -365,11 +364,52 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
DRM_ERROR("Timeout waiting for VM flush ACK!\n");
}
- spin_unlock(&adev->mc.invalidate_lock);
+ spin_unlock(&adev->gmc.invalidate_lock);
+}
+
+static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
+ uint32_t req = gmc_v9_0_get_invalidate_req(vmid);
+ uint64_t flags = AMDGPU_PTE_VALID;
+ unsigned eng = ring->vm_inv_eng;
+
+ amdgpu_gmc_get_vm_pde(adev, -1, &pd_addr, &flags);
+ pd_addr |= flags;
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid),
+ lower_32_bits(pd_addr));
+
+ amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + (2 * vmid),
+ upper_32_bits(pd_addr));
+
+ amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req);
+
+ /* wait for the invalidate to complete */
+ amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng,
+ 1 << vmid, 1 << vmid);
+
+ return pd_addr;
+}
+
+static void gmc_v9_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid,
+ unsigned pasid)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t reg;
+
+ if (ring->funcs->vmhub == AMDGPU_GFXHUB)
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid;
+ else
+ reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid;
+
+ amdgpu_ring_emit_wreg(ring, reg, pasid);
}
/**
- * gmc_v9_0_gart_set_pte_pde - update the page tables using MMIO
+ * gmc_v9_0_set_pte_pde - update the page tables using MMIO
*
* @adev: amdgpu_device pointer
* @cpu_pt_addr: cpu address of the page table
@@ -379,11 +419,9 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
*
* Update the page tables using the CPU.
*/
-static int gmc_v9_0_gart_set_pte_pde(struct amdgpu_device *adev,
- void *cpu_pt_addr,
- uint32_t gpu_page_idx,
- uint64_t addr,
- uint64_t flags)
+static int gmc_v9_0_set_pte_pde(struct amdgpu_device *adev, void *cpu_pt_addr,
+ uint32_t gpu_page_idx, uint64_t addr,
+ uint64_t flags)
{
void __iomem *ptr = (void *)cpu_pt_addr;
uint64_t value;
@@ -474,10 +512,10 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
{
if (!(*flags & AMDGPU_PDE_PTE))
*addr = adev->vm_manager.vram_base_offset + *addr -
- adev->mc.vram_start;
+ adev->gmc.vram_start;
BUG_ON(*addr & 0xFFFF00000000003FULL);
- if (!adev->mc.translate_further)
+ if (!adev->gmc.translate_further)
return;
if (level == AMDGPU_VM_PDB1) {
@@ -493,34 +531,35 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
}
}
-static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
- .flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb,
- .set_pte_pde = gmc_v9_0_gart_set_pte_pde,
- .get_invalidate_req = gmc_v9_0_get_invalidate_req,
+static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = {
+ .flush_gpu_tlb = gmc_v9_0_flush_gpu_tlb,
+ .emit_flush_gpu_tlb = gmc_v9_0_emit_flush_gpu_tlb,
+ .emit_pasid_mapping = gmc_v9_0_emit_pasid_mapping,
+ .set_pte_pde = gmc_v9_0_set_pte_pde,
.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
.get_vm_pde = gmc_v9_0_get_vm_pde
};
-static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev)
+static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev)
{
- if (adev->gart.gart_funcs == NULL)
- adev->gart.gart_funcs = &gmc_v9_0_gart_funcs;
+ if (adev->gmc.gmc_funcs == NULL)
+ adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs;
}
static int gmc_v9_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- gmc_v9_0_set_gart_funcs(adev);
+ gmc_v9_0_set_gmc_funcs(adev);
gmc_v9_0_set_irq_funcs(adev);
- adev->mc.shared_aperture_start = 0x2000000000000000ULL;
- adev->mc.shared_aperture_end =
- adev->mc.shared_aperture_start + (4ULL << 30) - 1;
- adev->mc.private_aperture_start =
- adev->mc.shared_aperture_end + 1;
- adev->mc.private_aperture_end =
- adev->mc.private_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
+ adev->gmc.shared_aperture_end =
+ adev->gmc.shared_aperture_start + (4ULL << 30) - 1;
+ adev->gmc.private_aperture_start =
+ adev->gmc.shared_aperture_end + 1;
+ adev->gmc.private_aperture_end =
+ adev->gmc.private_aperture_start + (4ULL << 30) - 1;
return 0;
}
@@ -634,7 +673,7 @@ static int gmc_v9_0_late_init(void *handle)
for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
BUG_ON(vm_inv_eng[i] > 16);
- if (adev->asic_type == CHIP_VEGA10) {
+ if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) {
r = gmc_v9_0_ecc_available(adev);
if (r == 1) {
DRM_INFO("ECC is active.\n");
@@ -646,16 +685,16 @@ static int gmc_v9_0_late_init(void *handle)
}
}
- return amdgpu_irq_get(adev, &adev->mc.vm_fault, 0);
+ return amdgpu_irq_get(adev, &adev->gmc.vm_fault, 0);
}
static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev,
- struct amdgpu_mc *mc)
+ struct amdgpu_gmc *mc)
{
u64 base = 0;
if (!amdgpu_sriov_vf(adev))
base = mmhub_v1_0_get_fb_location(adev);
- amdgpu_device_vram_location(adev, &adev->mc, base);
+ amdgpu_device_vram_location(adev, &adev->gmc, base);
amdgpu_device_gart_location(adev, mc);
/* base offset of vram pages */
if (adev->flags & AMD_IS_APU)
@@ -679,10 +718,14 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
int chansize, numchan;
int r;
- adev->mc.vram_width = amdgpu_atomfirmware_get_vram_width(adev);
- if (!adev->mc.vram_width) {
+ if (amdgpu_emu_mode != 1)
+ adev->gmc.vram_width = amdgpu_atomfirmware_get_vram_width(adev);
+ if (!adev->gmc.vram_width) {
/* hbm memory channel size */
- chansize = 128;
+ if (adev->flags & AMD_IS_APU)
+ chansize = 64;
+ else
+ chansize = 128;
tmp = RREG32_SOC15(DF, 0, mmDF_CS_AON0_DramBaseAddress0);
tmp &= DF_CS_AON0_DramBaseAddress0__IntLvNumChan_MASK;
@@ -717,43 +760,50 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
numchan = 2;
break;
}
- adev->mc.vram_width = numchan * chansize;
+ adev->gmc.vram_width = numchan * chansize;
}
/* size in MB on si */
- adev->mc.mc_vram_size =
+ adev->gmc.mc_vram_size =
adev->nbio_funcs->get_memsize(adev) * 1024ULL * 1024ULL;
- adev->mc.real_vram_size = adev->mc.mc_vram_size;
+ adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
if (!(adev->flags & AMD_IS_APU)) {
r = amdgpu_device_resize_fb_bar(adev);
if (r)
return r;
}
- adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
- adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
+ adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
+ adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+#ifdef CONFIG_X86_64
+ if (adev->flags & AMD_IS_APU) {
+ adev->gmc.aper_base = gfxhub_v1_0_get_mc_fb_offset(adev);
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
+ }
+#endif
/* In case the PCI BAR is larger than the actual amount of vram */
- adev->mc.visible_vram_size = adev->mc.aper_size;
- if (adev->mc.visible_vram_size > adev->mc.real_vram_size)
- adev->mc.visible_vram_size = adev->mc.real_vram_size;
+ adev->gmc.visible_vram_size = adev->gmc.aper_size;
+ if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
+ adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
/* set the gart size */
if (amdgpu_gart_size == -1) {
switch (adev->asic_type) {
case CHIP_VEGA10: /* all engines support GPUVM */
+ case CHIP_VEGA12: /* all engines support GPUVM */
default:
- adev->mc.gart_size = 256ULL << 20;
+ adev->gmc.gart_size = 512ULL << 20;
break;
case CHIP_RAVEN: /* DCE SG support */
- adev->mc.gart_size = 1024ULL << 20;
+ adev->gmc.gart_size = 1024ULL << 20;
break;
}
} else {
- adev->mc.gart_size = (u64)amdgpu_gart_size << 20;
+ adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
}
- gmc_v9_0_vram_gtt_location(adev, &adev->mc);
+ gmc_v9_0_vram_gtt_location(adev, &adev->gmc);
return 0;
}
@@ -785,23 +835,22 @@ static int gmc_v9_0_sw_init(void *handle)
gfxhub_v1_0_init(adev);
mmhub_v1_0_init(adev);
- spin_lock_init(&adev->mc.invalidate_lock);
+ spin_lock_init(&adev->gmc.invalidate_lock);
+ adev->gmc.vram_type = amdgpu_atomfirmware_get_vram_type(adev);
switch (adev->asic_type) {
case CHIP_RAVEN:
- adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
if (adev->rev_id == 0x0 || adev->rev_id == 0x1) {
amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48);
} else {
/* vm_size is 128TB + 512GB for legacy 3-level page support */
amdgpu_vm_adjust_size(adev, 128 * 1024 + 512, 9, 2, 48);
- adev->mc.translate_further =
+ adev->gmc.translate_further =
adev->vm_manager.num_level > 1;
}
break;
case CHIP_VEGA10:
- /* XXX Don't know how to get VRAM type yet. */
- adev->mc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+ case CHIP_VEGA12:
/*
* To fulfill 4-level page support,
* vm size is 256TB (48bit), maximum size of Vega10,
@@ -814,10 +863,10 @@ static int gmc_v9_0_sw_init(void *handle)
}
/* This interrupt is VMC page fault.*/
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VMC, 0,
- &adev->mc.vm_fault);
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_UTCL2, 0,
- &adev->mc.vm_fault);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, 0,
+ &adev->gmc.vm_fault);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, 0,
+ &adev->gmc.vm_fault);
if (r)
return r;
@@ -826,13 +875,13 @@ static int gmc_v9_0_sw_init(void *handle)
* This is the max address of the GPU's
* internal address space.
*/
- adev->mc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
+ adev->gmc.mc_mask = 0xffffffffffffULL; /* 48 bit MC */
/*
* It needs to reserve 8M stolen memory for vega10
* TODO: Figure out how to avoid that...
*/
- adev->mc.stolen_size = 8 * 1024 * 1024;
+ adev->gmc.stolen_size = 8 * 1024 * 1024;
/* set DMA mask + need_dma32 flags.
* PCIE - can handle 44-bits.
@@ -852,6 +901,7 @@ static int gmc_v9_0_sw_init(void *handle)
pci_set_consistent_dma_mask(adev->pdev, DMA_BIT_MASK(32));
printk(KERN_WARNING "amdgpu: No coherent DMA available.\n");
}
+ adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits);
r = gmc_v9_0_mc_init(adev);
if (r)
@@ -917,6 +967,8 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_athub_1_0_0,
ARRAY_SIZE(golden_settings_athub_1_0_0));
break;
+ case CHIP_VEGA12:
+ break;
case CHIP_RAVEN:
soc15_program_register_sequence(adev,
golden_settings_athub_1_0_0,
@@ -973,7 +1025,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp);
/* After HDP is initialized, flush HDP.*/
- adev->nbio_funcs->hdp_flush(adev);
+ adev->nbio_funcs->hdp_flush(adev, NULL);
if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
value = false;
@@ -982,10 +1034,10 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
gfxhub_v1_0_set_fault_enable_default(adev, value);
mmhub_v1_0_set_fault_enable_default(adev, value);
- gmc_v9_0_gart_flush_gpu_tlb(adev, 0);
+ gmc_v9_0_flush_gpu_tlb(adev, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
- (unsigned)(adev->mc.gart_size >> 20),
+ (unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)adev->gart.table_addr);
adev->gart.ready = true;
return 0;
@@ -1036,7 +1088,7 @@ static int gmc_v9_0_hw_fini(void *handle)
return 0;
}
- amdgpu_irq_put(adev, &adev->mc.vm_fault, 0);
+ amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0);
gmc_v9_0_gart_disable(adev);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index c4e4be3dd31d..842c4b677b4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -111,7 +111,7 @@ static int iceland_ih_irq_init(struct amdgpu_device *adev)
iceland_ih_disable_interrupts(adev);
/* setup interrupt control */
- WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8);
+ WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -260,7 +260,7 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev,
entry->src_data[0] = dw[1] & 0xfffffff;
entry->ring_id = dw[2] & 0xff;
entry->vmid = (dw[2] >> 8) & 0xff;
- entry->pas_id = (dw[2] >> 16) & 0xffff;
+ entry->pasid = (dw[2] >> 16) & 0xffff;
/* wptr/rptr are in bytes! */
adev->irq.ih.rptr += 16;
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index d9e9e52a0def..26ba984ab2b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -42,6 +42,8 @@
#define KV_MINIMUM_ENGINE_CLOCK 800
#define SMC_RAM_END 0x40000
+static const struct amd_pm_funcs kv_dpm_funcs;
+
static void kv_dpm_set_irq_funcs(struct amdgpu_device *adev);
static int kv_enable_nb_dpm(struct amdgpu_device *adev,
bool enable);
@@ -2960,6 +2962,8 @@ static int kv_dpm_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->powerplay.pp_funcs = &kv_dpm_funcs;
+ adev->powerplay.pp_handle = adev;
kv_dpm_set_irq_funcs(adev);
return 0;
@@ -3301,7 +3305,7 @@ static int kv_dpm_read_sensor(void *handle, int idx,
}
}
-const struct amd_ip_funcs kv_dpm_ip_funcs = {
+static const struct amd_ip_funcs kv_dpm_ip_funcs = {
.name = "kv_dpm",
.early_init = kv_dpm_early_init,
.late_init = kv_dpm_late_init,
@@ -3318,8 +3322,16 @@ const struct amd_ip_funcs kv_dpm_ip_funcs = {
.set_powergating_state = kv_dpm_set_powergating_state,
};
-const struct amd_pm_funcs kv_dpm_funcs = {
- .get_temperature = &kv_dpm_get_temp,
+const struct amdgpu_ip_block_version kv_smu_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_SMC,
+ .major = 1,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &kv_dpm_ip_funcs,
+};
+
+static const struct amd_pm_funcs kv_dpm_funcs = {
.pre_set_power_state = &kv_dpm_pre_set_power_state,
.set_power_state = &kv_dpm_set_power_state,
.post_set_power_state = &kv_dpm_post_set_power_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index ffd5b7ee49c4..43f925773b57 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -50,7 +50,7 @@ static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev)
uint64_t value;
BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL));
- value = adev->gart.table_addr - adev->mc.vram_start +
+ value = adev->gart.table_addr - adev->gmc.vram_start +
adev->vm_manager.vram_base_offset;
value &= 0x0000FFFFFFFFF000ULL;
value |= 0x1; /* valid bit */
@@ -67,14 +67,14 @@ static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev)
mmhub_v1_0_init_gart_pt_regs(adev);
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
- (u32)(adev->mc.gart_start >> 12));
+ (u32)(adev->gmc.gart_start >> 12));
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32,
- (u32)(adev->mc.gart_start >> 44));
+ (u32)(adev->gmc.gart_start >> 44));
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32,
- (u32)(adev->mc.gart_end >> 12));
+ (u32)(adev->gmc.gart_end >> 12));
WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32,
- (u32)(adev->mc.gart_end >> 44));
+ (u32)(adev->gmc.gart_end >> 44));
}
static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
@@ -89,12 +89,12 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
/* Program the system aperture low logical page number. */
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR,
- adev->mc.vram_start >> 18);
+ adev->gmc.vram_start >> 18);
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR,
- adev->mc.vram_end >> 18);
+ adev->gmc.vram_end >> 18);
/* Set default page address. */
- value = adev->vram_scratch.gpu_addr - adev->mc.vram_start +
+ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start +
adev->vm_manager.vram_base_offset;
WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_DEFAULT_ADDR_LSB,
(u32)(value >> 12));
@@ -103,9 +103,9 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev)
/* Program "protection fault". */
WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_LO32,
- (u32)(adev->dummy_page.addr >> 12));
+ (u32)(adev->dummy_page_addr >> 12));
WREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_DEFAULT_ADDR_HI32,
- (u32)((u64)adev->dummy_page.addr >> 44));
+ (u32)((u64)adev->dummy_page_addr >> 44));
tmp = RREG32_SOC15(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL2);
tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL2,
@@ -155,7 +155,7 @@ static void mmhub_v1_0_init_cache_regs(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL2, INVALIDATE_L2_CACHE, 1);
WREG32_SOC15(MMHUB, 0, mmVM_L2_CNTL2, tmp);
- if (adev->mc.translate_further) {
+ if (adev->gmc.translate_further) {
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3, BANK_SELECT, 12);
tmp = REG_SET_FIELD(tmp, VM_L2_CNTL3,
L2_CACHE_BIGK_FRAGMENT_SIZE, 9);
@@ -207,7 +207,7 @@ static void mmhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
num_level = adev->vm_manager.num_level;
block_size = adev->vm_manager.block_size;
- if (adev->mc.translate_further)
+ if (adev->gmc.translate_further)
num_level -= 1;
else
block_size -= 9;
@@ -272,21 +272,21 @@ static const struct pctl_data pctl0_data[] = {
{0x11, 0x6a684},
{0x19, 0xea68e},
{0x29, 0xa69e},
- {0x2b, 0x34a6c0},
- {0x61, 0x83a707},
- {0xe6, 0x8a7a4},
- {0xf0, 0x1a7b8},
- {0xf3, 0xfa7cc},
- {0x104, 0x17a7dd},
- {0x11d, 0xa7dc},
- {0x11f, 0x12a7f5},
- {0x133, 0xa808},
- {0x135, 0x12a810},
- {0x149, 0x7a82c}
+ {0x2b, 0x0010a6c0},
+ {0x3d, 0x83a707},
+ {0xc2, 0x8a7a4},
+ {0xcc, 0x1a7b8},
+ {0xcf, 0xfa7cc},
+ {0xe0, 0x17a7dd},
+ {0xf9, 0xa7dc},
+ {0xfb, 0x12a7f5},
+ {0x10f, 0xa808},
+ {0x111, 0x12a810},
+ {0x125, 0x7a82c}
};
#define PCTL0_DATA_LEN (ARRAY_SIZE(pctl0_data))
-#define PCTL0_RENG_EXEC_END_PTR 0x151
+#define PCTL0_RENG_EXEC_END_PTR 0x12d
#define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640
#define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833
@@ -385,10 +385,9 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return;
+ /****************** pctl0 **********************/
pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC);
pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE);
- pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC);
- pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
/* Light sleep must be disabled before writing to pctl0 registers */
pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
@@ -402,12 +401,13 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev)
pctl0_data[i].data);
}
- /* Set the reng execute end ptr for pctl0 */
- pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
- PCTL0_RENG_EXECUTE,
- RENG_EXECUTE_END_PTR,
- PCTL0_RENG_EXEC_END_PTR);
- WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
+ /* Re-enable light sleep */
+ pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
+ WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
+
+ /****************** pctl1 **********************/
+ pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC);
+ pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
/* Light sleep must be disabled before writing to pctl1 registers */
pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
@@ -421,20 +421,25 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev)
pctl1_data[i].data);
}
+ /* Re-enable light sleep */
+ pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
+ WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
+
+ mmhub_v1_0_power_gating_write_save_ranges(adev);
+
+ /* Set the reng execute end ptr for pctl0 */
+ pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
+ PCTL0_RENG_EXECUTE,
+ RENG_EXECUTE_END_PTR,
+ PCTL0_RENG_EXEC_END_PTR);
+ WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
+
/* Set the reng execute end ptr for pctl1 */
pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
PCTL1_RENG_EXECUTE,
RENG_EXECUTE_END_PTR,
PCTL1_RENG_EXEC_END_PTR);
WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
-
- mmhub_v1_0_power_gating_write_save_ranges(adev);
-
- /* Re-enable light sleep */
- pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
- WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
- pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
- WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
}
void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
@@ -466,6 +471,9 @@ void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
RENG_EXECUTE_ON_REG_UPDATE, 1);
WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
+ if (adev->powerplay.pp_funcs->set_mmhub_powergating_by_smu)
+ amdgpu_dpm_set_mmhub_powergating_by_smu(adev);
+
} else {
pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
PCTL0_RENG_EXECUTE,
@@ -494,9 +502,9 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
* SRIOV driver need to program them
*/
WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE,
- adev->mc.vram_start >> 24);
+ adev->gmc.vram_start >> 24);
WREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP,
- adev->mc.vram_end >> 24);
+ adev->gmc.vram_end >> 24);
}
/* GART Enable. */
@@ -725,6 +733,7 @@ int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev,
switch (adev->asic_type) {
case CHIP_VEGA10:
+ case CHIP_VEGA12:
case CHIP_RAVEN:
mmhub_v1_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 271452d3999a..493348672475 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -33,56 +33,34 @@
static void xgpu_ai_mailbox_send_ack(struct amdgpu_device *adev)
{
- u32 reg;
- int timeout = AI_MAILBOX_TIMEDOUT;
- u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID);
-
- reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
- mmBIF_BX_PF0_MAILBOX_CONTROL));
- reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_ACK, 1);
- WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
- mmBIF_BX_PF0_MAILBOX_CONTROL), reg);
-
- /*Wait for RCV_MSG_VALID to be 0*/
- reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
- mmBIF_BX_PF0_MAILBOX_CONTROL));
- while (reg & mask) {
- if (timeout <= 0) {
- pr_err("RCV_MSG_VALID is not cleared\n");
- break;
- }
- mdelay(1);
- timeout -=1;
-
- reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
- mmBIF_BX_PF0_MAILBOX_CONTROL));
- }
+ WREG8(AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE, 2);
}
static void xgpu_ai_mailbox_set_valid(struct amdgpu_device *adev, bool val)
{
- u32 reg;
+ WREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE, val ? 1 : 0);
+}
- reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
- mmBIF_BX_PF0_MAILBOX_CONTROL));
- reg = REG_SET_FIELD(reg, BIF_BX_PF0_MAILBOX_CONTROL,
- TRN_MSG_VALID, val ? 1 : 0);
- WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL),
- reg);
+/*
+ * this peek_msg could *only* be called in IRQ routine becuase in IRQ routine
+ * RCV_MSG_VALID filed of BIF_BX_PF0_MAILBOX_CONTROL must already be set to 1
+ * by host.
+ *
+ * if called no in IRQ routine, this peek_msg cannot guaranteed to return the
+ * correct value since it doesn't return the RCV_DW0 under the case that
+ * RCV_MSG_VALID is set by host.
+ */
+static enum idh_event xgpu_ai_mailbox_peek_msg(struct amdgpu_device *adev)
+{
+ return RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
+ mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0));
}
+
static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev,
enum idh_event event)
{
u32 reg;
- u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, RCV_MSG_VALID);
-
- if (event != IDH_FLR_NOTIFICATION_CMPL) {
- reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
- mmBIF_BX_PF0_MAILBOX_CONTROL));
- if (!(reg & mask))
- return -ENOENT;
- }
reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW0));
@@ -94,54 +72,67 @@ static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev,
return 0;
}
+static uint8_t xgpu_ai_peek_ack(struct amdgpu_device *adev) {
+ return RREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE) & 2;
+}
+
static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
{
- int r = 0, timeout = AI_MAILBOX_TIMEDOUT;
- u32 mask = REG_FIELD_MASK(BIF_BX_PF0_MAILBOX_CONTROL, TRN_MSG_ACK);
- u32 reg;
+ int timeout = AI_MAILBOX_POLL_ACK_TIMEDOUT;
+ u8 reg;
+
+ do {
+ reg = RREG8(AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE);
+ if (reg & 2)
+ return 0;
- reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
- mmBIF_BX_PF0_MAILBOX_CONTROL));
- while (!(reg & mask)) {
- if (timeout <= 0) {
- pr_err("Doesn't get ack from pf.\n");
- r = -ETIME;
- break;
- }
mdelay(5);
timeout -= 5;
+ } while (timeout > 1);
- reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
- mmBIF_BX_PF0_MAILBOX_CONTROL));
- }
+ pr_err("Doesn't get TRN_MSG_ACK from pf in %d msec\n", AI_MAILBOX_POLL_ACK_TIMEDOUT);
- return r;
+ return -ETIME;
}
static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
{
- int r = 0, timeout = AI_MAILBOX_TIMEDOUT;
-
- r = xgpu_ai_mailbox_rcv_msg(adev, event);
- while (r) {
- if (timeout <= 0) {
- pr_err("Doesn't get msg:%d from pf.\n", event);
- r = -ETIME;
- break;
- }
- mdelay(5);
- timeout -= 5;
+ int r, timeout = AI_MAILBOX_POLL_MSG_TIMEDOUT;
+ do {
r = xgpu_ai_mailbox_rcv_msg(adev, event);
- }
+ if (!r)
+ return 0;
- return r;
+ msleep(10);
+ timeout -= 10;
+ } while (timeout > 1);
+
+ pr_err("Doesn't get msg:%d from pf, error=%d\n", event, r);
+
+ return -ETIME;
}
static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
enum idh_request req, u32 data1, u32 data2, u32 data3) {
u32 reg;
int r;
+ uint8_t trn;
+
+ /* IMPORTANT:
+ * clear TRN_MSG_VALID valid to clear host's RCV_MSG_ACK
+ * and with host's RCV_MSG_ACK cleared hw automatically clear host's RCV_MSG_ACK
+ * which lead to VF's TRN_MSG_ACK cleared, otherwise below xgpu_ai_poll_ack()
+ * will return immediatly
+ */
+ do {
+ xgpu_ai_mailbox_set_valid(adev, false);
+ trn = xgpu_ai_peek_ack(adev);
+ if (trn) {
+ pr_err("trn=%x ACK should not assert! wait again !\n", trn);
+ msleep(1);
+ }
+ } while(trn);
reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0));
@@ -245,15 +236,36 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
{
struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
-
- /* wait until RCV_MSG become 3 */
- if (xgpu_ai_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) {
- pr_err("failed to recieve FLR_CMPL\n");
- return;
- }
-
- /* Trigger recovery due to world switch failure */
- amdgpu_device_gpu_recover(adev, NULL, false);
+ int timeout = AI_MAILBOX_POLL_FLR_TIMEDOUT;
+ int locked;
+
+ /* block amdgpu_gpu_recover till msg FLR COMPLETE received,
+ * otherwise the mailbox msg will be ruined/reseted by
+ * the VF FLR.
+ *
+ * we can unlock the lock_reset to allow "amdgpu_job_timedout"
+ * to run gpu_recover() after FLR_NOTIFICATION_CMPL received
+ * which means host side had finished this VF's FLR.
+ */
+ locked = mutex_trylock(&adev->lock_reset);
+ if (locked)
+ adev->in_gpu_reset = 1;
+
+ do {
+ if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
+ goto flr_done;
+
+ msleep(10);
+ timeout -= 10;
+ } while (timeout > 1);
+
+flr_done:
+ if (locked)
+ mutex_unlock(&adev->lock_reset);
+
+ /* Trigger recovery for world switch failure if no TDR */
+ if (amdgpu_lockup_timeout == 0)
+ amdgpu_device_gpu_recover(adev, NULL, true);
}
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
@@ -274,24 +286,22 @@ static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- int r;
-
- /* trigger gpu-reset by hypervisor only if TDR disbaled */
- if (!amdgpu_gpu_recovery) {
- /* see what event we get */
- r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
-
- /* sometimes the interrupt is delayed to inject to VM, so under such case
- * the IDH_FLR_NOTIFICATION is overwritten by VF FLR from GIM side, thus
- * above recieve message could be failed, we should schedule the flr_work
- * anyway
+ enum idh_event event = xgpu_ai_mailbox_peek_msg(adev);
+
+ switch (event) {
+ case IDH_FLR_NOTIFICATION:
+ if (amdgpu_sriov_runtime(adev))
+ schedule_work(&adev->virt.flr_work);
+ break;
+ /* READY_TO_ACCESS_GPU is fetched by kernel polling, IRQ can ignore
+ * it byfar since that polling thread will handle it,
+ * other msg like flr complete is not handled here.
*/
- if (r) {
- DRM_ERROR("FLR_NOTIFICATION is missed\n");
- xgpu_ai_mailbox_send_ack(adev);
- }
-
- schedule_work(&adev->virt.flr_work);
+ case IDH_CLR_MSG_BUF:
+ case IDH_FLR_NOTIFICATION_CMPL:
+ case IDH_READY_TO_ACCESS_GPU:
+ default:
+ break;
}
return 0;
@@ -319,11 +329,11 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev)
{
int r;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 135, &adev->virt.rcv_irq);
if (r)
return r;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_BIF, 138, &adev->virt.ack_irq);
if (r) {
amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index 67e78576a9eb..b4a9ceea334b 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -24,7 +24,9 @@
#ifndef __MXGPU_AI_H__
#define __MXGPU_AI_H__
-#define AI_MAILBOX_TIMEDOUT 12000
+#define AI_MAILBOX_POLL_ACK_TIMEDOUT 500
+#define AI_MAILBOX_POLL_MSG_TIMEDOUT 12000
+#define AI_MAILBOX_POLL_FLR_TIMEDOUT 500
enum idh_request {
IDH_REQ_GPU_INIT_ACCESS = 1,
@@ -51,4 +53,7 @@ int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev);
int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev);
void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev);
+#define AI_MAIBOX_CONTROL_TRN_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4
+#define AI_MAIBOX_CONTROL_RCV_OFFSET_BYTE SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_CONTROL) * 4 + 1
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index d4da663d5eb0..6f9c54978cc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -53,9 +53,16 @@ static void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable)
WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0);
}
-static void nbio_v6_1_hdp_flush(struct amdgpu_device *adev)
+static void nbio_v6_1_hdp_flush(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
{
- WREG32_SOC15_NO_KIQ(NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+ if (!ring || !ring->funcs->emit_wreg)
+ WREG32_SOC15_NO_KIQ(NBIO, 0,
+ mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL,
+ 0);
+ else
+ amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
+ NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL), 0);
}
static u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev)
@@ -126,7 +133,7 @@ static void nbio_v6_1_ih_control(struct amdgpu_device *adev)
u32 interrupt_cntl;
/* setup interrupt control */
- WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8);
+ WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL);
/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -213,12 +220,12 @@ static u32 nbio_v6_1_get_hdp_flush_done_offset(struct amdgpu_device *adev)
static u32 nbio_v6_1_get_pcie_index_offset(struct amdgpu_device *adev)
{
- return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX);
+ return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2);
}
static u32 nbio_v6_1_get_pcie_data_offset(struct amdgpu_device *adev)
{
- return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA);
+ return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2);
}
static const struct nbio_hdp_flush_reg nbio_v6_1_hdp_flush_reg = {
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
index 17a9131a4598..df34dc79d444 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
@@ -53,9 +53,14 @@ static void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable)
WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0);
}
-static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev)
+static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
{
- WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+ if (!ring || !ring->funcs->emit_wreg)
+ WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
+ else
+ amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
+ NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
}
static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
@@ -203,7 +208,7 @@ static void nbio_v7_0_ih_control(struct amdgpu_device *adev)
u32 interrupt_cntl;
/* setup interrupt control */
- WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8);
+ WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL);
/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 5a9fe24697f9..8873d833a7f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -87,7 +87,7 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *
return 0;
}
-int psp_v10_0_init_microcode(struct psp_context *psp)
+static int psp_v10_0_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
const char *chip_name;
@@ -133,7 +133,8 @@ out:
return err;
}
-int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd)
+static int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode,
+ struct psp_gfx_cmd_resp *cmd)
{
int ret;
uint64_t fw_mem_mc_addr = ucode->mc_addr;
@@ -152,7 +153,8 @@ int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cm
return ret;
}
-int psp_v10_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type)
+static int psp_v10_0_ring_init(struct psp_context *psp,
+ enum psp_ring_type ring_type)
{
int ret = 0;
struct psp_ring *ring;
@@ -177,7 +179,8 @@ int psp_v10_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type)
return 0;
}
-int psp_v10_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type)
+static int psp_v10_0_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
{
int ret = 0;
unsigned int psp_ring_reg = 0;
@@ -208,7 +211,8 @@ int psp_v10_0_ring_create(struct psp_context *psp, enum psp_ring_type ring_type)
return ret;
}
-int psp_v10_0_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type)
+static int psp_v10_0_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
{
int ret = 0;
struct psp_ring *ring;
@@ -231,7 +235,8 @@ int psp_v10_0_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type)
return ret;
}
-int psp_v10_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type)
+static int psp_v10_0_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
{
int ret = 0;
struct psp_ring *ring = &psp->km_ring;
@@ -248,10 +253,10 @@ int psp_v10_0_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type
return ret;
}
-int psp_v10_0_cmd_submit(struct psp_context *psp,
- struct amdgpu_firmware_info *ucode,
- uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
- int index)
+static int psp_v10_0_cmd_submit(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode,
+ uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
+ int index)
{
unsigned int psp_write_ptr_reg = 0;
struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem;
@@ -298,9 +303,9 @@ int psp_v10_0_cmd_submit(struct psp_context *psp,
static int
psp_v10_0_sram_map(struct amdgpu_device *adev,
- unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
- unsigned int *sram_data_reg_offset,
- enum AMDGPU_UCODE_ID ucode_id)
+ unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
+ unsigned int *sram_data_reg_offset,
+ enum AMDGPU_UCODE_ID ucode_id)
{
int ret = 0;
@@ -383,9 +388,9 @@ psp_v10_0_sram_map(struct amdgpu_device *adev,
return ret;
}
-bool psp_v10_0_compare_sram_data(struct psp_context *psp,
- struct amdgpu_firmware_info *ucode,
- enum AMDGPU_UCODE_ID ucode_type)
+static bool psp_v10_0_compare_sram_data(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode,
+ enum AMDGPU_UCODE_ID ucode_type)
{
int err = 0;
unsigned int fw_sram_reg_val = 0;
@@ -419,8 +424,25 @@ bool psp_v10_0_compare_sram_data(struct psp_context *psp,
}
-int psp_v10_0_mode1_reset(struct psp_context *psp)
+static int psp_v10_0_mode1_reset(struct psp_context *psp)
{
DRM_INFO("psp mode 1 reset not supported now! \n");
return -EINVAL;
}
+
+static const struct psp_funcs psp_v10_0_funcs = {
+ .init_microcode = psp_v10_0_init_microcode,
+ .prep_cmd_buf = psp_v10_0_prep_cmd_buf,
+ .ring_init = psp_v10_0_ring_init,
+ .ring_create = psp_v10_0_ring_create,
+ .ring_stop = psp_v10_0_ring_stop,
+ .ring_destroy = psp_v10_0_ring_destroy,
+ .cmd_submit = psp_v10_0_cmd_submit,
+ .compare_sram_data = psp_v10_0_compare_sram_data,
+ .mode1_reset = psp_v10_0_mode1_reset,
+};
+
+void psp_v10_0_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v10_0_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
index 451e8308303f..20c2a94859d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.h
@@ -27,24 +27,6 @@
#include "amdgpu_psp.h"
-extern int psp_v10_0_init_microcode(struct psp_context *psp);
-extern int psp_v10_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode,
- struct psp_gfx_cmd_resp *cmd);
-extern int psp_v10_0_ring_init(struct psp_context *psp,
- enum psp_ring_type ring_type);
-extern int psp_v10_0_ring_create(struct psp_context *psp,
- enum psp_ring_type ring_type);
-extern int psp_v10_0_ring_stop(struct psp_context *psp,
- enum psp_ring_type ring_type);
-extern int psp_v10_0_ring_destroy(struct psp_context *psp,
- enum psp_ring_type ring_type);
-extern int psp_v10_0_cmd_submit(struct psp_context *psp,
- struct amdgpu_firmware_info *ucode,
- uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
- int index);
-extern bool psp_v10_0_compare_sram_data(struct psp_context *psp,
- struct amdgpu_firmware_info *ucode,
- enum AMDGPU_UCODE_ID ucode_type);
+void psp_v10_0_set_psp_funcs(struct psp_context *psp);
-extern int psp_v10_0_mode1_reset(struct psp_context *psp);
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 19bd1934e63d..196e75def1f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -39,6 +39,8 @@
MODULE_FIRMWARE("amdgpu/vega10_sos.bin");
MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
+MODULE_FIRMWARE("amdgpu/vega12_sos.bin");
+MODULE_FIRMWARE("amdgpu/vega12_asd.bin");
#define smnMP1_FIRMWARE_FLAGS 0x3010028
@@ -93,7 +95,7 @@ psp_v3_1_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *t
return 0;
}
-int psp_v3_1_init_microcode(struct psp_context *psp)
+static int psp_v3_1_init_microcode(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
const char *chip_name;
@@ -107,6 +109,9 @@ int psp_v3_1_init_microcode(struct psp_context *psp)
case CHIP_VEGA10:
chip_name = "vega10";
break;
+ case CHIP_VEGA12:
+ chip_name = "vega12";
+ break;
default: BUG();
}
@@ -161,7 +166,7 @@ out:
return err;
}
-int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
+static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
{
int ret;
uint32_t psp_gfxdrv_command_reg = 0;
@@ -202,7 +207,7 @@ int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp)
return ret;
}
-int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
+static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
{
int ret;
unsigned int psp_gfxdrv_command_reg = 0;
@@ -243,7 +248,8 @@ int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
return ret;
}
-int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd)
+static int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode,
+ struct psp_gfx_cmd_resp *cmd)
{
int ret;
uint64_t fw_mem_mc_addr = ucode->mc_addr;
@@ -262,7 +268,8 @@ int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd
return ret;
}
-int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type)
+static int psp_v3_1_ring_init(struct psp_context *psp,
+ enum psp_ring_type ring_type)
{
int ret = 0;
struct psp_ring *ring;
@@ -287,7 +294,8 @@ int psp_v3_1_ring_init(struct psp_context *psp, enum psp_ring_type ring_type)
return 0;
}
-int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type)
+static int psp_v3_1_ring_create(struct psp_context *psp,
+ enum psp_ring_type ring_type)
{
int ret = 0;
unsigned int psp_ring_reg = 0;
@@ -318,7 +326,8 @@ int psp_v3_1_ring_create(struct psp_context *psp, enum psp_ring_type ring_type)
return ret;
}
-int psp_v3_1_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type)
+static int psp_v3_1_ring_stop(struct psp_context *psp,
+ enum psp_ring_type ring_type)
{
int ret = 0;
struct psp_ring *ring;
@@ -341,7 +350,8 @@ int psp_v3_1_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type)
return ret;
}
-int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type)
+static int psp_v3_1_ring_destroy(struct psp_context *psp,
+ enum psp_ring_type ring_type)
{
int ret = 0;
struct psp_ring *ring = &psp->km_ring;
@@ -358,10 +368,10 @@ int psp_v3_1_ring_destroy(struct psp_context *psp, enum psp_ring_type ring_type)
return ret;
}
-int psp_v3_1_cmd_submit(struct psp_context *psp,
- struct amdgpu_firmware_info *ucode,
- uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
- int index)
+static int psp_v3_1_cmd_submit(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode,
+ uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
+ int index)
{
unsigned int psp_write_ptr_reg = 0;
struct psp_gfx_rb_frame * write_frame = psp->km_ring.ring_mem;
@@ -410,9 +420,9 @@ int psp_v3_1_cmd_submit(struct psp_context *psp,
static int
psp_v3_1_sram_map(struct amdgpu_device *adev,
- unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
- unsigned int *sram_data_reg_offset,
- enum AMDGPU_UCODE_ID ucode_id)
+ unsigned int *sram_offset, unsigned int *sram_addr_reg_offset,
+ unsigned int *sram_data_reg_offset,
+ enum AMDGPU_UCODE_ID ucode_id)
{
int ret = 0;
@@ -495,9 +505,9 @@ psp_v3_1_sram_map(struct amdgpu_device *adev,
return ret;
}
-bool psp_v3_1_compare_sram_data(struct psp_context *psp,
- struct amdgpu_firmware_info *ucode,
- enum AMDGPU_UCODE_ID ucode_type)
+static bool psp_v3_1_compare_sram_data(struct psp_context *psp,
+ struct amdgpu_firmware_info *ucode,
+ enum AMDGPU_UCODE_ID ucode_type)
{
int err = 0;
unsigned int fw_sram_reg_val = 0;
@@ -530,7 +540,7 @@ bool psp_v3_1_compare_sram_data(struct psp_context *psp,
return true;
}
-bool psp_v3_1_smu_reload_quirk(struct psp_context *psp)
+static bool psp_v3_1_smu_reload_quirk(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
uint32_t reg;
@@ -541,7 +551,7 @@ bool psp_v3_1_smu_reload_quirk(struct psp_context *psp)
return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false;
}
-int psp_v3_1_mode1_reset(struct psp_context *psp)
+static int psp_v3_1_mode1_reset(struct psp_context *psp)
{
int ret;
uint32_t offset;
@@ -574,3 +584,23 @@ int psp_v3_1_mode1_reset(struct psp_context *psp)
return 0;
}
+
+static const struct psp_funcs psp_v3_1_funcs = {
+ .init_microcode = psp_v3_1_init_microcode,
+ .bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
+ .bootloader_load_sos = psp_v3_1_bootloader_load_sos,
+ .prep_cmd_buf = psp_v3_1_prep_cmd_buf,
+ .ring_init = psp_v3_1_ring_init,
+ .ring_create = psp_v3_1_ring_create,
+ .ring_stop = psp_v3_1_ring_stop,
+ .ring_destroy = psp_v3_1_ring_destroy,
+ .cmd_submit = psp_v3_1_cmd_submit,
+ .compare_sram_data = psp_v3_1_compare_sram_data,
+ .smu_reload_quirk = psp_v3_1_smu_reload_quirk,
+ .mode1_reset = psp_v3_1_mode1_reset,
+};
+
+void psp_v3_1_set_psp_funcs(struct psp_context *psp)
+{
+ psp->funcs = &psp_v3_1_funcs;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
index b05dbada7751..e411e31ba452 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.h
@@ -32,26 +32,6 @@ enum { PSP_BINARY_ALIGNMENT = 64 };
enum { PSP_BOOTLOADER_1_MEG_ALIGNMENT = 0x100000 };
enum { PSP_BOOTLOADER_8_MEM_ALIGNMENT = 0x800000 };
-extern int psp_v3_1_init_microcode(struct psp_context *psp);
-extern int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp);
-extern int psp_v3_1_bootloader_load_sos(struct psp_context *psp);
-extern int psp_v3_1_prep_cmd_buf(struct amdgpu_firmware_info *ucode,
- struct psp_gfx_cmd_resp *cmd);
-extern int psp_v3_1_ring_init(struct psp_context *psp,
- enum psp_ring_type ring_type);
-extern int psp_v3_1_ring_create(struct psp_context *psp,
- enum psp_ring_type ring_type);
-extern int psp_v3_1_ring_stop(struct psp_context *psp,
- enum psp_ring_type ring_type);
-extern int psp_v3_1_ring_destroy(struct psp_context *psp,
- enum psp_ring_type ring_type);
-extern int psp_v3_1_cmd_submit(struct psp_context *psp,
- struct amdgpu_firmware_info *ucode,
- uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr,
- int index);
-extern bool psp_v3_1_compare_sram_data(struct psp_context *psp,
- struct amdgpu_firmware_info *ucode,
- enum AMDGPU_UCODE_ID ucode_type);
-extern bool psp_v3_1_smu_reload_quirk(struct psp_context *psp);
-extern int psp_v3_1_mode1_reset(struct psp_context *psp);
+void psp_v3_1_set_psp_funcs(struct psp_context *psp);
+
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index d4787ad4d346..6452101c7aab 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -289,13 +289,6 @@ static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring)
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
}
-static void sdma_v2_4_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
- amdgpu_ring_write(ring, mmHDP_DEBUG0);
- amdgpu_ring_write(ring, 1);
-}
/**
* sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring
*
@@ -346,7 +339,7 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
if ((adev->mman.buffer_funcs_ring == sdma0) ||
(adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
@@ -491,7 +484,7 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
}
if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size);
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -861,20 +854,7 @@ static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
- if (vmid < 8) {
- amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
- } else {
- amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
- }
- amdgpu_ring_write(ring, pd_addr >> 12);
-
- /* flush TLB */
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
- amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
- amdgpu_ring_write(ring, 1 << vmid);
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* wait for flush */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
@@ -888,6 +868,15 @@ static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
}
+static void sdma_v2_4_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
static int sdma_v2_4_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1203,9 +1192,9 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
.set_wptr = sdma_v2_4_ring_set_wptr,
.emit_frame_size =
6 + /* sdma_v2_4_ring_emit_hdp_flush */
- 3 + /* sdma_v2_4_ring_emit_hdp_invalidate */
+ 3 + /* hdp invalidate */
6 + /* sdma_v2_4_ring_emit_pipeline_sync */
- 12 + /* sdma_v2_4_ring_emit_vm_flush */
+ VI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v2_4_ring_emit_vm_flush */
10 + 10 + 10, /* sdma_v2_4_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 7 + 6, /* sdma_v2_4_ring_emit_ib */
.emit_ib = sdma_v2_4_ring_emit_ib,
@@ -1213,11 +1202,11 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
.emit_pipeline_sync = sdma_v2_4_ring_emit_pipeline_sync,
.emit_vm_flush = sdma_v2_4_ring_emit_vm_flush,
.emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush,
- .emit_hdp_invalidate = sdma_v2_4_ring_emit_hdp_invalidate,
.test_ring = sdma_v2_4_ring_test_ring,
.test_ib = sdma_v2_4_ring_test_ib,
.insert_nop = sdma_v2_4_ring_insert_nop,
.pad_ib = sdma_v2_4_ring_pad_ib,
+ .emit_wreg = sdma_v2_4_ring_emit_wreg,
};
static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
@@ -1316,9 +1305,6 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
.copy_pte = sdma_v2_4_vm_copy_pte,
.write_pte = sdma_v2_4_vm_write_pte,
-
- .set_max_nums_pte_pde = 0x1fffff >> 3,
- .set_pte_pde_num_dw = 10,
.set_pte_pde = sdma_v2_4_vm_set_pte_pde,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 521978c40537..ecaef084dab1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -460,14 +460,6 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
}
-static void sdma_v3_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
- amdgpu_ring_write(ring, mmHDP_DEBUG0);
- amdgpu_ring_write(ring, 1);
-}
-
/**
* sdma_v3_0_ring_emit_fence - emit a fence on the DMA ring
*
@@ -518,7 +510,7 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev)
if ((adev->mman.buffer_funcs_ring == sdma0) ||
(adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
@@ -719,14 +711,17 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
WREG32(mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI + sdma_offsets[i],
upper_32_bits(wptr_gpu_addr));
wptr_poll_cntl = RREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i]);
- if (ring->use_pollmem)
+ if (ring->use_pollmem) {
+ /*wptr polling is not enogh fast, directly clean the wptr register */
+ WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
SDMA0_GFX_RB_WPTR_POLL_CNTL,
ENABLE, 1);
- else
+ } else {
wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
SDMA0_GFX_RB_WPTR_POLL_CNTL,
ENABLE, 0);
+ }
WREG32(mmSDMA0_GFX_RB_WPTR_POLL_CNTL + sdma_offsets[i], wptr_poll_cntl);
/* enable DMA RB */
@@ -758,7 +753,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
}
if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size);
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -1127,20 +1122,7 @@ static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
- if (vmid < 8) {
- amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
- } else {
- amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8));
- }
- amdgpu_ring_write(ring, pd_addr >> 12);
-
- /* flush TLB */
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
- amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
- amdgpu_ring_write(ring, 1 << vmid);
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* wait for flush */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
@@ -1154,6 +1136,15 @@ static void sdma_v3_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
}
+static void sdma_v3_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
+
static int sdma_v3_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1637,9 +1628,9 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
.set_wptr = sdma_v3_0_ring_set_wptr,
.emit_frame_size =
6 + /* sdma_v3_0_ring_emit_hdp_flush */
- 3 + /* sdma_v3_0_ring_emit_hdp_invalidate */
+ 3 + /* hdp invalidate */
6 + /* sdma_v3_0_ring_emit_pipeline_sync */
- 12 + /* sdma_v3_0_ring_emit_vm_flush */
+ VI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v3_0_ring_emit_vm_flush */
10 + 10 + 10, /* sdma_v3_0_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 7 + 6, /* sdma_v3_0_ring_emit_ib */
.emit_ib = sdma_v3_0_ring_emit_ib,
@@ -1647,11 +1638,11 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
.emit_pipeline_sync = sdma_v3_0_ring_emit_pipeline_sync,
.emit_vm_flush = sdma_v3_0_ring_emit_vm_flush,
.emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush,
- .emit_hdp_invalidate = sdma_v3_0_ring_emit_hdp_invalidate,
.test_ring = sdma_v3_0_ring_test_ring,
.test_ib = sdma_v3_0_ring_test_ib,
.insert_nop = sdma_v3_0_ring_insert_nop,
.pad_ib = sdma_v3_0_ring_pad_ib,
+ .emit_wreg = sdma_v3_0_ring_emit_wreg,
};
static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1750,10 +1741,6 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
.copy_pte = sdma_v3_0_vm_copy_pte,
.write_pte = sdma_v3_0_vm_write_pte,
-
- /* not 0x3fffff due to HW limitation */
- .set_max_nums_pte_pde = 0x3fffe0 >> 3,
- .set_pte_pde_num_dw = 10,
.set_pte_pde = sdma_v3_0_vm_set_pte_pde,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index e92fb372bc99..2a8184082cd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -31,8 +31,6 @@
#include "sdma0/sdma0_4_0_sh_mask.h"
#include "sdma1/sdma1_4_0_offset.h"
#include "sdma1/sdma1_4_0_sh_mask.h"
-#include "mmhub/mmhub_1_0_offset.h"
-#include "mmhub/mmhub_1_0_sh_mask.h"
#include "hdp/hdp_4_0_offset.h"
#include "sdma0/sdma0_4_1_default.h"
@@ -42,6 +40,8 @@
MODULE_FIRMWARE("amdgpu/vega10_sdma.bin");
MODULE_FIRMWARE("amdgpu/vega10_sdma1.bin");
+MODULE_FIRMWARE("amdgpu/vega12_sdma.bin");
+MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin");
MODULE_FIRMWARE("amdgpu/raven_sdma.bin");
#define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L
@@ -86,6 +86,13 @@ static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002)
};
+static const struct soc15_reg_golden golden_settings_sdma_vg12[] = {
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
+ SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
+ SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001)
+};
+
static const struct soc15_reg_golden golden_settings_sdma_4_1[] =
{
SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
@@ -124,6 +131,14 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev)
golden_settings_sdma_vg10,
ARRAY_SIZE(golden_settings_sdma_vg10));
break;
+ case CHIP_VEGA12:
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_4,
+ ARRAY_SIZE(golden_settings_sdma_4));
+ soc15_program_register_sequence(adev,
+ golden_settings_sdma_vg12,
+ ARRAY_SIZE(golden_settings_sdma_vg12));
+ break;
case CHIP_RAVEN:
soc15_program_register_sequence(adev,
golden_settings_sdma_4_1,
@@ -164,6 +179,9 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev)
case CHIP_VEGA10:
chip_name = "vega10";
break;
+ case CHIP_VEGA12:
+ chip_name = "vega12";
+ break;
case CHIP_RAVEN:
chip_name = "raven";
break;
@@ -238,31 +256,27 @@ static uint64_t sdma_v4_0_ring_get_rptr(struct amdgpu_ring *ring)
static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- u64 *wptr = NULL;
- uint64_t local_wptr = 0;
+ u64 wptr;
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
- wptr = ((u64 *)&adev->wb.wb[ring->wptr_offs]);
- DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", *wptr);
- *wptr = (*wptr) >> 2;
- DRM_DEBUG("wptr/doorbell after shift == 0x%016llx\n", *wptr);
+ wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
+ DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
} else {
u32 lowbit, highbit;
int me = (ring == &adev->sdma.instance[0].ring) ? 0 : 1;
- wptr = &local_wptr;
lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR)) >> 2;
highbit = RREG32(sdma_v4_0_get_reg_offset(adev, me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2;
DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n",
me, highbit, lowbit);
- *wptr = highbit;
- *wptr = (*wptr) << 32;
- *wptr |= lowbit;
+ wptr = highbit;
+ wptr = wptr << 32;
+ wptr |= lowbit;
}
- return *wptr;
+ return wptr >> 2;
}
/**
@@ -375,16 +389,6 @@ static void sdma_v4_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
}
-static void sdma_v4_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
- amdgpu_ring_write(ring, SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE));
- amdgpu_ring_write(ring, 1);
-}
-
/**
* sdma_v4_0_ring_emit_fence - emit a fence on the DMA ring
*
@@ -440,7 +444,7 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
if ((adev->mman.buffer_funcs_ring == sdma0) ||
(adev->mman.buffer_funcs_ring == sdma1))
- amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
for (i = 0; i < adev->sdma.num_instances; i++) {
rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
@@ -682,7 +686,7 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
}
if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size);
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
@@ -1135,38 +1139,28 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
- uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
- uint64_t flags = AMDGPU_PTE_VALID;
- unsigned eng = ring->vm_inv_eng;
-
- amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
- pd_addr |= flags;
-
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
- amdgpu_ring_write(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2);
- amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
- SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
- amdgpu_ring_write(ring, hub->ctx0_ptb_addr_hi32 + vmid * 2);
- amdgpu_ring_write(ring, upper_32_bits(pd_addr));
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
+}
- /* flush TLB */
+static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
- amdgpu_ring_write(ring, hub->vm_inv_eng0_req + eng);
- amdgpu_ring_write(ring, req);
+ amdgpu_ring_write(ring, reg);
+ amdgpu_ring_write(ring, val);
+}
- /* wait for flush */
+static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */
- amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
+ amdgpu_ring_write(ring, reg << 2);
amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, 1 << vmid); /* reference */
- amdgpu_ring_write(ring, 1 << vmid); /* mask */
+ amdgpu_ring_write(ring, val); /* reference */
+ amdgpu_ring_write(ring, mask); /* mask */
amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));
}
@@ -1196,13 +1190,13 @@ static int sdma_v4_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* SDMA trap event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_SDMA0, 224,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA0, 224,
&adev->sdma.trap_irq);
if (r)
return r;
/* SDMA trap event */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_SDMA1, 224,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_SDMA1, 224,
&adev->sdma.trap_irq);
if (r)
return r;
@@ -1357,7 +1351,7 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
{
DRM_DEBUG("IH: SDMA trap\n");
switch (entry->client_id) {
- case AMDGPU_IH_CLIENTID_SDMA0:
+ case SOC15_IH_CLIENTID_SDMA0:
switch (entry->ring_id) {
case 0:
amdgpu_fence_process(&adev->sdma.instance[0].ring);
@@ -1373,7 +1367,7 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
break;
}
break;
- case AMDGPU_IH_CLIENTID_SDMA1:
+ case SOC15_IH_CLIENTID_SDMA1:
switch (entry->ring_id) {
case 0:
amdgpu_fence_process(&adev->sdma.instance[1].ring);
@@ -1423,7 +1417,7 @@ static void sdma_v4_0_update_medium_grain_clock_gating(
if (def != data)
WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data);
- if (adev->asic_type == CHIP_VEGA10) {
+ if (adev->sdma.num_instances > 1) {
def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL));
data &= ~(SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
@@ -1451,7 +1445,7 @@ static void sdma_v4_0_update_medium_grain_clock_gating(
if (def != data)
WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CLK_CTRL), data);
- if (adev->asic_type == CHIP_VEGA10) {
+ if (adev->sdma.num_instances > 1) {
def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_CLK_CTRL));
data |= (SDMA1_CLK_CTRL__SOFT_OVERRIDE7_MASK |
SDMA1_CLK_CTRL__SOFT_OVERRIDE6_MASK |
@@ -1482,7 +1476,7 @@ static void sdma_v4_0_update_medium_grain_light_sleep(
WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
/* 1-not override: enable sdma1 mem light sleep */
- if (adev->asic_type == CHIP_VEGA10) {
+ if (adev->sdma.num_instances > 1) {
def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
data |= SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
if (def != data)
@@ -1496,7 +1490,7 @@ static void sdma_v4_0_update_medium_grain_light_sleep(
WREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_POWER_CNTL), data);
/* 0-override:disable sdma1 mem light sleep */
- if (adev->asic_type == CHIP_VEGA10) {
+ if (adev->sdma.num_instances > 1) {
def = data = RREG32(SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_POWER_CNTL));
data &= ~SDMA1_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
if (def != data)
@@ -1515,6 +1509,7 @@ static int sdma_v4_0_set_clockgating_state(void *handle,
switch (adev->asic_type) {
case CHIP_VEGA10:
+ case CHIP_VEGA12:
case CHIP_RAVEN:
sdma_v4_0_update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
@@ -1592,9 +1587,11 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
.set_wptr = sdma_v4_0_ring_set_wptr,
.emit_frame_size =
6 + /* sdma_v4_0_ring_emit_hdp_flush */
- 3 + /* sdma_v4_0_ring_emit_hdp_invalidate */
+ 3 + /* hdp invalidate */
6 + /* sdma_v4_0_ring_emit_pipeline_sync */
- 18 + /* sdma_v4_0_ring_emit_vm_flush */
+ /* sdma_v4_0_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */
.emit_ib = sdma_v4_0_ring_emit_ib,
@@ -1602,11 +1599,12 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
.emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync,
.emit_vm_flush = sdma_v4_0_ring_emit_vm_flush,
.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
- .emit_hdp_invalidate = sdma_v4_0_ring_emit_hdp_invalidate,
.test_ring = sdma_v4_0_ring_test_ring,
.test_ib = sdma_v4_0_ring_test_ib,
.insert_nop = sdma_v4_0_ring_insert_nop,
.pad_ib = sdma_v4_0_ring_pad_ib,
+ .emit_wreg = sdma_v4_0_ring_emit_wreg,
+ .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
};
static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1641,7 +1639,7 @@ static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
*
- * Copy GPU buffers using the DMA engine (VEGA10).
+ * Copy GPU buffers using the DMA engine (VEGA10/12).
* Used by the amdgpu ttm implementation to move pages if
* registered as the asic copy callback.
*/
@@ -1668,7 +1666,7 @@ static void sdma_v4_0_emit_copy_buffer(struct amdgpu_ib *ib,
* @dst_offset: dst GPU address
* @byte_count: number of bytes to xfer
*
- * Fill GPU buffers using the DMA engine (VEGA10).
+ * Fill GPU buffers using the DMA engine (VEGA10/12).
*/
static void sdma_v4_0_emit_fill_buffer(struct amdgpu_ib *ib,
uint32_t src_data,
@@ -1705,9 +1703,6 @@ static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
.copy_pte = sdma_v4_0_vm_copy_pte,
.write_pte = sdma_v4_0_vm_write_pte,
-
- .set_max_nums_pte_pde = 0x400000 >> 3,
- .set_pte_pde_num_dw = 10,
.set_pte_pde = sdma_v4_0_vm_set_pte_pde,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index 543101d5a5ed..b154667a8fd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -31,7 +31,8 @@
#include "amdgpu_uvd.h"
#include "amdgpu_vce.h"
#include "atom.h"
-#include "amdgpu_powerplay.h"
+#include "amd_pcie.h"
+#include "si_dpm.h"
#include "sid.h"
#include "si_ih.h"
#include "gfx_v6_0.h"
@@ -1230,6 +1231,27 @@ static void si_detect_hw_virtualization(struct amdgpu_device *adev)
adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
}
+static void si_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+ RREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL);
+ } else {
+ amdgpu_ring_emit_wreg(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+ }
+}
+
+static void si_invalidate_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32(mmHDP_DEBUG0, 1);
+ RREG32(mmHDP_DEBUG0);
+ } else {
+ amdgpu_ring_emit_wreg(ring, mmHDP_DEBUG0, 1);
+ }
+}
+
static const struct amdgpu_asic_funcs si_asic_funcs =
{
.read_disabled_bios = &si_read_disabled_bios,
@@ -1241,6 +1263,8 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
.set_uvd_clocks = &si_set_uvd_clocks,
.set_vce_clocks = NULL,
.get_config_memsize = &si_get_config_memsize,
+ .flush_hdp = &si_flush_hdp,
+ .invalidate_hdp = &si_invalidate_hdp,
};
static uint32_t si_get_rev_id(struct amdgpu_device *adev)
@@ -1461,8 +1485,8 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
{
struct pci_dev *root = adev->pdev->bus->self;
int bridge_pos, gpu_pos;
- u32 speed_cntl, mask, current_data_rate;
- int ret, i;
+ u32 speed_cntl, current_data_rate;
+ int i;
u16 tmp16;
if (pci_is_root_bus(adev->pdev->bus))
@@ -1474,23 +1498,20 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
if (adev->flags & AMD_IS_APU)
return;
- ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
- if (ret != 0)
- return;
-
- if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
+ if (!(adev->pm.pcie_gen_mask & (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)))
return;
speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
LC_CURRENT_DATA_RATE_SHIFT;
- if (mask & DRM_PCIE_SPEED_80) {
+ if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
if (current_data_rate == 2) {
DRM_INFO("PCIE gen 3 link speeds already enabled\n");
return;
}
DRM_INFO("enabling PCIE gen 3 link speeds, disable with amdgpu.pcie_gen2=0\n");
- } else if (mask & DRM_PCIE_SPEED_50) {
+ } else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) {
if (current_data_rate == 1) {
DRM_INFO("PCIE gen 2 link speeds already enabled\n");
return;
@@ -1506,7 +1527,7 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
if (!gpu_pos)
return;
- if (mask & DRM_PCIE_SPEED_80) {
+ if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) {
if (current_data_rate != 2) {
u16 bridge_cfg, gpu_cfg;
u16 bridge_cfg2, gpu_cfg2;
@@ -1589,9 +1610,9 @@ static void si_pcie_gen3_enable(struct amdgpu_device *adev)
pci_read_config_word(adev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
tmp16 &= ~0xf;
- if (mask & DRM_PCIE_SPEED_80)
+ if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
tmp16 |= 3;
- else if (mask & DRM_PCIE_SPEED_50)
+ else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
tmp16 |= 2;
else
tmp16 |= 1;
@@ -1962,7 +1983,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &si_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block);
amdgpu_device_ip_block_add(adev, &si_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+ amdgpu_device_ip_block_add(adev, &si_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
else
@@ -1976,7 +1997,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &si_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block);
amdgpu_device_ip_block_add(adev, &si_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+ amdgpu_device_ip_block_add(adev, &si_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
else
@@ -1990,7 +2011,7 @@ int si_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &si_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block);
amdgpu_device_ip_block_add(adev, &si_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+ amdgpu_device_ip_block_add(adev, &si_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block);
diff --git a/drivers/gpu/drm/amd/amdgpu/si.h b/drivers/gpu/drm/amd/amdgpu/si.h
index 589225080c24..06ed7212a0d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.h
+++ b/drivers/gpu/drm/amd/amdgpu/si.h
@@ -24,6 +24,8 @@
#ifndef __SI_H__
#define __SI_H__
+#define SI_FLUSH_GPU_TLB_NUM_WREG 2
+
void si_srbm_select(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 queue, u32 vmid);
int si_set_ip_blocks(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 9a29c1399091..b75d901ba3c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -24,6 +24,7 @@
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#include "si.h"
#include "sid.h"
const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
@@ -74,20 +75,6 @@ static void si_dma_ring_emit_ib(struct amdgpu_ring *ring,
}
-static void si_dma_ring_emit_hdp_flush(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
- amdgpu_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL));
- amdgpu_ring_write(ring, 1);
-}
-
-static void si_dma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
- amdgpu_ring_write(ring, (0xf << 16) | (HDP_DEBUG0));
- amdgpu_ring_write(ring, 1);
-}
-
/**
* si_dma_ring_emit_fence - emit a fence on the DMA ring
*
@@ -134,7 +121,7 @@ static void si_dma_stop(struct amdgpu_device *adev)
WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl);
if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
+ amdgpu_ttm_set_buffer_funcs_status(adev, false);
ring->ready = false;
}
}
@@ -197,7 +184,7 @@ static int si_dma_start(struct amdgpu_device *adev)
}
if (adev->mman.buffer_funcs_ring == ring)
- amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size);
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
}
return 0;
@@ -475,17 +462,7 @@ static void si_dma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
- amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
- if (vmid < 8)
- amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid));
- else
- amdgpu_ring_write(ring, (0xf << 16) | (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + (vmid - 8)));
- amdgpu_ring_write(ring, pd_addr >> 12);
-
- /* bits 0-7 are the VM contexts0-7 */
- amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
- amdgpu_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST));
- amdgpu_ring_write(ring, 1 << vmid);
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
/* wait for invalidate to complete */
amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_POLL_REG_MEM, 0, 0, 0, 0));
@@ -496,6 +473,14 @@ static void si_dma_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, (0 << 28) | 0x20); /* func(always) | poll interval */
}
+static void si_dma_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0, 0));
+ amdgpu_ring_write(ring, (0xf << 16) | reg);
+ amdgpu_ring_write(ring, val);
+}
+
static int si_dma_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -772,22 +757,20 @@ static const struct amdgpu_ring_funcs si_dma_ring_funcs = {
.get_wptr = si_dma_ring_get_wptr,
.set_wptr = si_dma_ring_set_wptr,
.emit_frame_size =
- 3 + /* si_dma_ring_emit_hdp_flush */
- 3 + /* si_dma_ring_emit_hdp_invalidate */
+ 3 + 3 + /* hdp flush / invalidate */
6 + /* si_dma_ring_emit_pipeline_sync */
- 12 + /* si_dma_ring_emit_vm_flush */
+ SI_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* si_dma_ring_emit_vm_flush */
9 + 9 + 9, /* si_dma_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 7 + 3, /* si_dma_ring_emit_ib */
.emit_ib = si_dma_ring_emit_ib,
.emit_fence = si_dma_ring_emit_fence,
.emit_pipeline_sync = si_dma_ring_emit_pipeline_sync,
.emit_vm_flush = si_dma_ring_emit_vm_flush,
- .emit_hdp_flush = si_dma_ring_emit_hdp_flush,
- .emit_hdp_invalidate = si_dma_ring_emit_hdp_invalidate,
.test_ring = si_dma_ring_test_ring,
.test_ib = si_dma_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = si_dma_ring_pad_ib,
+ .emit_wreg = si_dma_ring_emit_wreg,
};
static void si_dma_set_ring_funcs(struct amdgpu_device *adev)
@@ -891,9 +874,6 @@ static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
.copy_pte = si_dma_vm_copy_pte,
.write_pte = si_dma_vm_write_pte,
-
- .set_max_nums_pte_pde = 0xffff8 >> 3,
- .set_pte_pde_num_dw = 9,
.set_pte_pde = si_dma_vm_set_pte_pde,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
index ce675a7f179a..672eaffac0a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c
@@ -26,6 +26,7 @@
#include "amdgpu_pm.h"
#include "amdgpu_dpm.h"
#include "amdgpu_atombios.h"
+#include "amd_pcie.h"
#include "sid.h"
#include "r600_dpm.h"
#include "si_dpm.h"
@@ -66,6 +67,8 @@ MODULE_FIRMWARE("radeon/hainan_smc.bin");
MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
+static const struct amd_pm_funcs si_dpm_funcs;
+
union power_info {
struct _ATOM_POWERPLAY_INFO info;
struct _ATOM_POWERPLAY_INFO_V2 info_2;
@@ -3064,7 +3067,7 @@ static bool si_dpm_vblank_too_short(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 vblank_time = amdgpu_dpm_get_vblank_time(adev);
/* we never hit the non-gddr5 limit so disable it */
- u32 switch_limit = adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 0;
+ u32 switch_limit = adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5 ? 450 : 0;
if (vblank_time < switch_limit)
return true;
@@ -3331,29 +3334,6 @@ static void btc_apply_voltage_delta_rules(struct amdgpu_device *adev,
}
}
-static enum amdgpu_pcie_gen r600_get_pcie_gen_support(struct amdgpu_device *adev,
- u32 sys_mask,
- enum amdgpu_pcie_gen asic_gen,
- enum amdgpu_pcie_gen default_gen)
-{
- switch (asic_gen) {
- case AMDGPU_PCIE_GEN1:
- return AMDGPU_PCIE_GEN1;
- case AMDGPU_PCIE_GEN2:
- return AMDGPU_PCIE_GEN2;
- case AMDGPU_PCIE_GEN3:
- return AMDGPU_PCIE_GEN3;
- default:
- if ((sys_mask & DRM_PCIE_SPEED_80) && (default_gen == AMDGPU_PCIE_GEN3))
- return AMDGPU_PCIE_GEN3;
- else if ((sys_mask & DRM_PCIE_SPEED_50) && (default_gen == AMDGPU_PCIE_GEN2))
- return AMDGPU_PCIE_GEN2;
- else
- return AMDGPU_PCIE_GEN1;
- }
- return AMDGPU_PCIE_GEN1;
-}
-
static void r600_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
u32 *p, u32 *u)
{
@@ -4350,7 +4330,7 @@ static u8 si_get_strobe_mode_settings(struct amdgpu_device *adev, u32 mclk)
if (mclk <= pi->mclk_strobe_mode_threshold)
strobe_mode = true;
- if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5)
+ if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5)
result = si_get_mclk_frequency_ratio(mclk, strobe_mode);
else
result = si_get_ddr3_mclk_frequency_ratio(mclk);
@@ -4937,7 +4917,7 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev,
table->initialState.levels[0].bSP = cpu_to_be32(pi->dsp);
table->initialState.levels[0].gen2PCIE = (u8)si_pi->boot_pcie_gen;
- if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
+ if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
table->initialState.levels[0].strobeMode =
si_get_strobe_mode_settings(adev,
initial_state->performance_levels[0].mclk);
@@ -5028,10 +5008,11 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
table->ACPIState.levels[0].vddc.index,
&table->ACPIState.levels[0].std_vddc);
}
- table->ACPIState.levels[0].gen2PCIE = (u8)r600_get_pcie_gen_support(adev,
- si_pi->sys_pcie_mask,
- si_pi->boot_pcie_gen,
- AMDGPU_PCIE_GEN1);
+ table->ACPIState.levels[0].gen2PCIE =
+ (u8)amdgpu_get_pcie_gen_support(adev,
+ si_pi->sys_pcie_mask,
+ si_pi->boot_pcie_gen,
+ AMDGPU_PCIE_GEN1);
if (si_pi->vddc_phase_shed_control)
si_populate_phase_shedding_value(adev,
@@ -5208,7 +5189,7 @@ static int si_init_smc_table(struct amdgpu_device *adev)
if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_STEPVDDC)
table->systemFlags |= PPSMC_SYSTEMFLAG_STEPVDDC;
- if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5)
+ if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5)
table->systemFlags |= PPSMC_SYSTEMFLAG_GDDR5;
if (adev->pm.dpm.platform_caps & ATOM_PP_PLATFORM_CAP_REVERT_GPIO5_POLARITY)
@@ -5385,7 +5366,7 @@ static int si_populate_mclk_value(struct amdgpu_device *adev,
mpll_ad_func_cntl &= ~YCLK_POST_DIV_MASK;
mpll_ad_func_cntl |= YCLK_POST_DIV(mpll_param.post_div);
- if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
+ if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
mpll_dq_func_cntl &= ~(YCLK_SEL_MASK | YCLK_POST_DIV_MASK);
mpll_dq_func_cntl |= YCLK_SEL(mpll_param.yclk_sel) |
YCLK_POST_DIV(mpll_param.post_div);
@@ -5397,7 +5378,7 @@ static int si_populate_mclk_value(struct amdgpu_device *adev,
u32 tmp;
u32 reference_clock = adev->clock.mpll.reference_freq;
- if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5)
+ if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5)
freq_nom = memory_clock * 4;
else
freq_nom = memory_clock * 2;
@@ -5489,7 +5470,7 @@ static int si_convert_power_level_to_smc(struct amdgpu_device *adev,
level->mcFlags |= SISLANDS_SMC_MC_PG_EN;
}
- if (adev->mc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
+ if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
if (pl->mclk > pi->mclk_edc_enable_threshold)
level->mcFlags |= SISLANDS_SMC_MC_EDC_RD_FLAG;
@@ -5860,12 +5841,12 @@ static int si_set_mc_special_registers(struct amdgpu_device *adev,
table->mc_reg_table_entry[k].mc_data[j] =
(temp_reg & 0xffff0000) |
(table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff);
- if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5)
+ if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5)
table->mc_reg_table_entry[k].mc_data[j] |= 0x100;
}
j++;
- if (adev->mc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) {
+ if (adev->gmc.vram_type != AMDGPU_VRAM_TYPE_GDDR5) {
if (j >= SMC_SISLANDS_MC_REGISTER_ARRAY_SIZE)
return -EINVAL;
table->mc_reg_address[j].s1 = MC_PMG_AUTO_CMD;
@@ -7168,10 +7149,10 @@ static void si_parse_pplib_clock_info(struct amdgpu_device *adev,
pl->vddc = le16_to_cpu(clock_info->si.usVDDC);
pl->vddci = le16_to_cpu(clock_info->si.usVDDCI);
pl->flags = le32_to_cpu(clock_info->si.ulFlags);
- pl->pcie_gen = r600_get_pcie_gen_support(adev,
- si_pi->sys_pcie_mask,
- si_pi->boot_pcie_gen,
- clock_info->si.ucPCIEGen);
+ pl->pcie_gen = amdgpu_get_pcie_gen_support(adev,
+ si_pi->sys_pcie_mask,
+ si_pi->boot_pcie_gen,
+ clock_info->si.ucPCIEGen);
/* patch up vddc if necessary */
ret = si_get_leakage_voltage_from_leakage_index(adev, pl->vddc,
@@ -7326,7 +7307,6 @@ static int si_dpm_init(struct amdgpu_device *adev)
struct si_power_info *si_pi;
struct atom_clock_dividers dividers;
int ret;
- u32 mask;
si_pi = kzalloc(sizeof(struct si_power_info), GFP_KERNEL);
if (si_pi == NULL)
@@ -7336,11 +7316,9 @@ static int si_dpm_init(struct amdgpu_device *adev)
eg_pi = &ni_pi->eg;
pi = &eg_pi->rv7xx;
- ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
- if (ret)
- si_pi->sys_pcie_mask = 0;
- else
- si_pi->sys_pcie_mask = mask;
+ si_pi->sys_pcie_mask =
+ (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) >>
+ CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT;
si_pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID;
si_pi->boot_pcie_gen = si_get_current_pcie_speed(adev);
@@ -7938,6 +7916,8 @@ static int si_dpm_early_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ adev->powerplay.pp_funcs = &si_dpm_funcs;
+ adev->powerplay.pp_handle = adev;
si_dpm_set_irq_funcs(adev);
return 0;
}
@@ -8038,7 +8018,7 @@ static int si_dpm_read_sensor(void *handle, int idx,
}
}
-const struct amd_ip_funcs si_dpm_ip_funcs = {
+static const struct amd_ip_funcs si_dpm_ip_funcs = {
.name = "si_dpm",
.early_init = si_dpm_early_init,
.late_init = si_dpm_late_init,
@@ -8055,8 +8035,16 @@ const struct amd_ip_funcs si_dpm_ip_funcs = {
.set_powergating_state = si_dpm_set_powergating_state,
};
-const struct amd_pm_funcs si_dpm_funcs = {
- .get_temperature = &si_dpm_get_temp,
+const struct amdgpu_ip_block_version si_smu_ip_block =
+{
+ .type = AMD_IP_BLOCK_TYPE_SMC,
+ .major = 6,
+ .minor = 0,
+ .rev = 0,
+ .funcs = &si_dpm_ip_funcs,
+};
+
+static const struct amd_pm_funcs si_dpm_funcs = {
.pre_set_power_state = &si_dpm_pre_set_power_state,
.set_power_state = &si_dpm_set_power_state,
.post_set_power_state = &si_dpm_post_set_power_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.h b/drivers/gpu/drm/amd/amdgpu/si_dpm.h
index 9fe343de3477..6b7d292b919f 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.h
@@ -245,8 +245,7 @@ enum si_display_gap
SI_PM_DISPLAY_GAP_IGNORE = 3,
};
-extern const struct amd_ip_funcs si_dpm_ip_funcs;
-extern const struct amd_pm_funcs si_dpm_funcs;
+extern const struct amdgpu_ip_block_version si_smu_ip_block;
struct ni_leakage_coeffients
{
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index a04a033f57de..51cf8a30f6c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -57,7 +57,6 @@
#include "uvd_v7_0.h"
#include "vce_v4_0.h"
#include "vcn_v1_0.h"
-#include "amdgpu_powerplay.h"
#include "dce_virtual.h"
#include "mxgpu_ai.h"
@@ -417,12 +416,7 @@ static int soc15_asic_reset(struct amdgpu_device *adev)
pci_save_state(adev->pdev);
- for (i = 0; i < AMDGPU_MAX_IP_NUM; i++) {
- if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP){
- adev->ip_blocks[i].version->funcs->soft_reset((void *)adev);
- break;
- }
- }
+ psp_gpu_reset(adev);
pci_restore_state(adev->pdev);
@@ -514,6 +508,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
/* Set IP register base before any HW register access */
switch (adev->asic_type) {
case CHIP_VEGA10:
+ case CHIP_VEGA12:
case CHIP_RAVEN:
vega10_reg_base_init(adev);
break;
@@ -533,13 +528,13 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
switch (adev->asic_type) {
case CHIP_VEGA10:
+ case CHIP_VEGA12:
amdgpu_device_ip_block_add(adev, &vega10_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
- if (amdgpu_fw_load_type == 2 || amdgpu_fw_load_type == -1)
- amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
+ amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block);
if (!amdgpu_sriov_vf(adev))
- amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
@@ -558,7 +553,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block);
amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block);
amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block);
- amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
@@ -583,6 +578,21 @@ static uint32_t soc15_get_rev_id(struct amdgpu_device *adev)
return adev->nbio_funcs->get_rev_id(adev);
}
+static void soc15_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ adev->nbio_funcs->hdp_flush(adev, ring);
+}
+
+static void soc15_invalidate_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg)
+ WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
+ else
+ amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
+ HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
+}
+
static const struct amdgpu_asic_funcs soc15_asic_funcs =
{
.read_disabled_bios = &soc15_read_disabled_bios,
@@ -594,11 +604,12 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
.set_uvd_clocks = &soc15_set_uvd_clocks,
.set_vce_clocks = &soc15_set_vce_clocks,
.get_config_memsize = &soc15_get_config_memsize,
+ .flush_hdp = &soc15_flush_hdp,
+ .invalidate_hdp = &soc15_invalidate_hdp,
};
static int soc15_common_early_init(void *handle)
{
- bool psp_enabled = false;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->smc_rreg = NULL;
@@ -616,10 +627,6 @@ static int soc15_common_early_init(void *handle)
adev->asic_funcs = &soc15_asic_funcs;
- if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP) &&
- (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_PSP)))
- psp_enabled = true;
-
adev->rev_id = soc15_get_rev_id(adev);
adev->external_rev_id = 0xFF;
switch (adev->asic_type) {
@@ -646,6 +653,28 @@ static int soc15_common_early_init(void *handle)
adev->pg_flags = 0;
adev->external_rev_id = 0x1;
break;
+ case CHIP_VEGA12:
+ adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_3D_CGCG |
+ AMD_CG_SUPPORT_GFX_3D_CGLS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_MGCG |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_HDP_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_ROM_MGCG |
+ AMD_CG_SUPPORT_VCE_MGCG |
+ AMD_CG_SUPPORT_UVD_MGCG;
+ adev->pg_flags = 0;
+ adev->external_rev_id = adev->rev_id + 0x14;
+ break;
case CHIP_RAVEN:
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_MGLS |
@@ -680,10 +709,6 @@ static int soc15_common_early_init(void *handle)
xgpu_ai_mailbox_set_irq_funcs(adev);
}
- adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
-
- amdgpu_device_get_pcie_info(adev);
-
return 0;
}
@@ -882,6 +907,7 @@ static int soc15_common_set_clockgating_state(void *handle,
switch (adev->asic_type) {
case CHIP_VEGA10:
+ case CHIP_VEGA12:
adev->nbio_funcs->update_medium_grain_clock_gating(adev,
state == AMD_CG_STATE_GATE ? true : false);
adev->nbio_funcs->update_medium_grain_light_sleep(adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h
index 26b3feac5d06..f70da8a29f86 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.h
@@ -27,6 +27,9 @@
#include "nbio_v6_1.h"
#include "nbio_v7_0.h"
+#define SOC15_FLUSH_GPU_TLB_NUM_WREG 4
+#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 1
+
extern const struct amd_ip_funcs soc15_common_ip_funcs;
struct soc15_reg_golden {
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index 5995ffc183de..52853d8a8fdd 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -107,7 +107,7 @@ static int tonga_ih_irq_init(struct amdgpu_device *adev)
tonga_ih_disable_interrupts(adev);
/* setup interrupt control */
- WREG32(mmINTERRUPT_CNTL2, adev->dummy_page.addr >> 8);
+ WREG32(mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8);
interrupt_cntl = RREG32(mmINTERRUPT_CNTL);
/* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi
* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN
@@ -271,7 +271,7 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev,
entry->src_data[0] = dw[1] & 0xfffffff;
entry->ring_id = dw[2] & 0xff;
entry->vmid = (dw[2] >> 8) & 0xff;
- entry->pas_id = (dw[2] >> 16) & 0xffff;
+ entry->pasid = (dw[2] >> 16) & 0xffff;
/* wptr/rptr are in bytes! */
adev->irq.ih.rptr += 16;
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 8ab10c220910..948bb9437757 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -464,32 +464,6 @@ static void uvd_v4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
}
/**
- * uvd_v4_2_ring_emit_hdp_flush - emit an hdp flush
- *
- * @ring: amdgpu_ring pointer
- *
- * Emits an hdp flush.
- */
-static void uvd_v4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
- amdgpu_ring_write(ring, 0);
-}
-
-/**
- * uvd_v4_2_ring_hdp_invalidate - emit an hdp invalidate
- *
- * @ring: amdgpu_ring pointer
- *
- * Emits an hdp invalidate.
- */
-static void uvd_v4_2_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
- amdgpu_ring_write(ring, 1);
-}
-
-/**
* uvd_v4_2_ring_test_ring - register write test
*
* @ring: amdgpu_ring pointer
@@ -765,14 +739,10 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
.set_wptr = uvd_v4_2_ring_set_wptr,
.parse_cs = amdgpu_uvd_ring_parse_cs,
.emit_frame_size =
- 2 + /* uvd_v4_2_ring_emit_hdp_flush */
- 2 + /* uvd_v4_2_ring_emit_hdp_invalidate */
14, /* uvd_v4_2_ring_emit_fence x1 no user fence */
.emit_ib_size = 4, /* uvd_v4_2_ring_emit_ib */
.emit_ib = uvd_v4_2_ring_emit_ib,
.emit_fence = uvd_v4_2_ring_emit_fence,
- .emit_hdp_flush = uvd_v4_2_ring_emit_hdp_flush,
- .emit_hdp_invalidate = uvd_v4_2_ring_emit_hdp_invalidate,
.test_ring = uvd_v4_2_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index c1fe30cdba32..6445d55e7d5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -479,32 +479,6 @@ static void uvd_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
}
/**
- * uvd_v5_0_ring_emit_hdp_flush - emit an hdp flush
- *
- * @ring: amdgpu_ring pointer
- *
- * Emits an hdp flush.
- */
-static void uvd_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
- amdgpu_ring_write(ring, 0);
-}
-
-/**
- * uvd_v5_0_ring_hdp_invalidate - emit an hdp invalidate
- *
- * @ring: amdgpu_ring pointer
- *
- * Emits an hdp invalidate.
- */
-static void uvd_v5_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
- amdgpu_ring_write(ring, 1);
-}
-
-/**
* uvd_v5_0_ring_test_ring - register write test
*
* @ring: amdgpu_ring pointer
@@ -873,14 +847,10 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
.set_wptr = uvd_v5_0_ring_set_wptr,
.parse_cs = amdgpu_uvd_ring_parse_cs,
.emit_frame_size =
- 2 + /* uvd_v5_0_ring_emit_hdp_flush */
- 2 + /* uvd_v5_0_ring_emit_hdp_invalidate */
14, /* uvd_v5_0_ring_emit_fence x1 no user fence */
.emit_ib_size = 6, /* uvd_v5_0_ring_emit_ib */
.emit_ib = uvd_v5_0_ring_emit_ib,
.emit_fence = uvd_v5_0_ring_emit_fence,
- .emit_hdp_flush = uvd_v5_0_ring_emit_hdp_flush,
- .emit_hdp_invalidate = uvd_v5_0_ring_emit_hdp_invalidate,
.test_ring = uvd_v5_0_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index b2bfedaf57f1..f26f515db2fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -964,32 +964,6 @@ static void uvd_v6_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
}
/**
- * uvd_v6_0_ring_emit_hdp_flush - emit an hdp flush
- *
- * @ring: amdgpu_ring pointer
- *
- * Emits an hdp flush.
- */
-static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
- amdgpu_ring_write(ring, 0);
-}
-
-/**
- * uvd_v6_0_ring_hdp_invalidate - emit an hdp invalidate
- *
- * @ring: amdgpu_ring pointer
- *
- * Emits an hdp invalidate.
- */
-static void uvd_v6_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
- amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
- amdgpu_ring_write(ring, 1);
-}
-
-/**
* uvd_v6_0_ring_test_ring - register write test
*
* @ring: amdgpu_ring pointer
@@ -1072,29 +1046,21 @@ static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, ib->length_dw);
}
-static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
+static void uvd_v6_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
{
- uint32_t reg;
-
- if (vmid < 8)
- reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vmid;
- else
- reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8;
-
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
amdgpu_ring_write(ring, reg << 2);
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
- amdgpu_ring_write(ring, pd_addr >> 12);
+ amdgpu_ring_write(ring, val);
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
amdgpu_ring_write(ring, 0x8);
+}
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
- amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
- amdgpu_ring_write(ring, 1 << vmid);
- amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
- amdgpu_ring_write(ring, 0x8);
+static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vmid, uint64_t pd_addr)
+{
+ amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
@@ -1140,7 +1106,7 @@ static void uvd_v6_0_enc_ring_insert_end(struct amdgpu_ring *ring)
}
static void uvd_v6_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned int vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
amdgpu_ring_write(ring, HEVC_ENC_CMD_UPDATE_PTB);
amdgpu_ring_write(ring, vmid);
@@ -1562,21 +1528,19 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
.set_wptr = uvd_v6_0_ring_set_wptr,
.parse_cs = amdgpu_uvd_ring_parse_cs,
.emit_frame_size =
- 2 + /* uvd_v6_0_ring_emit_hdp_flush */
- 2 + /* uvd_v6_0_ring_emit_hdp_invalidate */
+ 6 + 6 + /* hdp flush / invalidate */
10 + /* uvd_v6_0_ring_emit_pipeline_sync */
14, /* uvd_v6_0_ring_emit_fence x1 no user fence */
.emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */
.emit_ib = uvd_v6_0_ring_emit_ib,
.emit_fence = uvd_v6_0_ring_emit_fence,
- .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
- .emit_hdp_invalidate = uvd_v6_0_ring_emit_hdp_invalidate,
.test_ring = uvd_v6_0_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_uvd_ring_begin_use,
.end_use = amdgpu_uvd_ring_end_use,
+ .emit_wreg = uvd_v6_0_ring_emit_wreg,
};
static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
@@ -1588,24 +1552,22 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
.get_wptr = uvd_v6_0_ring_get_wptr,
.set_wptr = uvd_v6_0_ring_set_wptr,
.emit_frame_size =
- 2 + /* uvd_v6_0_ring_emit_hdp_flush */
- 2 + /* uvd_v6_0_ring_emit_hdp_invalidate */
+ 6 + 6 + /* hdp flush / invalidate */
10 + /* uvd_v6_0_ring_emit_pipeline_sync */
- 20 + /* uvd_v6_0_ring_emit_vm_flush */
+ VI_FLUSH_GPU_TLB_NUM_WREG * 6 + 8 + /* uvd_v6_0_ring_emit_vm_flush */
14 + 14, /* uvd_v6_0_ring_emit_fence x2 vm fence */
.emit_ib_size = 8, /* uvd_v6_0_ring_emit_ib */
.emit_ib = uvd_v6_0_ring_emit_ib,
.emit_fence = uvd_v6_0_ring_emit_fence,
.emit_vm_flush = uvd_v6_0_ring_emit_vm_flush,
.emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync,
- .emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
- .emit_hdp_invalidate = uvd_v6_0_ring_emit_hdp_invalidate,
.test_ring = uvd_v6_0_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_uvd_ring_begin_use,
.end_use = amdgpu_uvd_ring_end_use,
+ .emit_wreg = uvd_v6_0_ring_emit_wreg,
};
static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
@@ -1618,7 +1580,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_enc_ring_vm_funcs = {
.set_wptr = uvd_v6_0_enc_ring_set_wptr,
.emit_frame_size =
4 + /* uvd_v6_0_enc_ring_emit_pipeline_sync */
- 6 + /* uvd_v6_0_enc_ring_emit_vm_flush */
+ 5 + /* uvd_v6_0_enc_ring_emit_vm_flush */
5 + 5 + /* uvd_v6_0_enc_ring_emit_fence x2 vm fence */
1, /* uvd_v6_0_enc_ring_insert_end */
.emit_ib_size = 5, /* uvd_v6_0_enc_ring_emit_ib */
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 6b95f4f344b5..eddc57f3b72a 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -25,6 +25,7 @@
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_uvd.h"
+#include "soc15.h"
#include "soc15d.h"
#include "soc15_common.h"
#include "mmsch_v1_0.h"
@@ -389,13 +390,13 @@ static int uvd_v7_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* UVD TRAP */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_UVD, 124, &adev->uvd.irq);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, 124, &adev->uvd.irq);
if (r)
return r;
/* UVD ENC TRAP */
for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_UVD, i + 119, &adev->uvd.irq);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UVD, i + 119, &adev->uvd.irq);
if (r)
return r;
}
@@ -1135,37 +1136,6 @@ static void uvd_v7_0_enc_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
}
/**
- * uvd_v7_0_ring_emit_hdp_flush - emit an hdp flush
- *
- * @ring: amdgpu_ring pointer
- *
- * Emits an hdp flush.
- */
-static void uvd_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(NBIF, 0,
- mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0));
- amdgpu_ring_write(ring, 0);
-}
-
-/**
- * uvd_v7_0_ring_hdp_invalidate - emit an hdp invalidate
- *
- * @ring: amdgpu_ring pointer
- *
- * Emits an hdp invalidate.
- */
-static void uvd_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 0));
- amdgpu_ring_write(ring, 1);
-}
-
-/**
* uvd_v7_0_ring_test_ring - register write test
*
* @ring: amdgpu_ring pointer
@@ -1255,33 +1225,33 @@ static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, ib->length_dw);
}
-static void uvd_v7_0_vm_reg_write(struct amdgpu_ring *ring,
- uint32_t data0, uint32_t data1)
+static void uvd_v7_0_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
{
struct amdgpu_device *adev = ring->adev;
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
- amdgpu_ring_write(ring, data0);
+ amdgpu_ring_write(ring, reg << 2);
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
- amdgpu_ring_write(ring, data1);
+ amdgpu_ring_write(ring, val);
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
amdgpu_ring_write(ring, 8);
}
-static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring,
- uint32_t data0, uint32_t data1, uint32_t mask)
+static void uvd_v7_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
{
struct amdgpu_device *adev = ring->adev;
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
- amdgpu_ring_write(ring, data0);
+ amdgpu_ring_write(ring, reg << 2);
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
- amdgpu_ring_write(ring, data1);
+ amdgpu_ring_write(ring, val);
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0));
amdgpu_ring_write(ring, mask);
@@ -1294,37 +1264,15 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
- uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
- uint64_t flags = AMDGPU_PTE_VALID;
- unsigned eng = ring->vm_inv_eng;
uint32_t data0, data1, mask;
- amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
- pd_addr |= flags;
-
- data0 = (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2;
- data1 = upper_32_bits(pd_addr);
- uvd_v7_0_vm_reg_write(ring, data0, data1);
-
- data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2;
- data1 = lower_32_bits(pd_addr);
- uvd_v7_0_vm_reg_write(ring, data0, data1);
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
- data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2;
+ /* wait for reg writes */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
data1 = lower_32_bits(pd_addr);
mask = 0xffffffff;
- uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
-
- /* flush TLB */
- data0 = (hub->vm_inv_eng0_req + eng) << 2;
- data1 = req;
- uvd_v7_0_vm_reg_write(ring, data0, data1);
-
- /* wait for flush */
- data0 = (hub->vm_inv_eng0_ack + eng) << 2;
- data1 = 1 << vmid;
- mask = 1 << vmid;
- uvd_v7_0_vm_reg_wait(ring, data0, data1, mask);
+ uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask);
}
static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
@@ -1342,40 +1290,34 @@ static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, HEVC_ENC_CMD_END);
}
+static void uvd_v7_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val,
+ uint32_t mask)
+{
+ amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, val);
+}
+
static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned int vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
- uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
- uint64_t flags = AMDGPU_PTE_VALID;
- unsigned eng = ring->vm_inv_eng;
- amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
- pd_addr |= flags;
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
- amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
- amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2);
- amdgpu_ring_write(ring, upper_32_bits(pd_addr));
-
- amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
- amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
- amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
- amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
- amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
- amdgpu_ring_write(ring, 0xffffffff);
- amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+ /* wait for reg writes */
+ uvd_v7_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
+ lower_32_bits(pd_addr), 0xffffffff);
+}
- /* flush TLB */
+static void uvd_v7_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
- amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
- amdgpu_ring_write(ring, req);
-
- /* wait for flush */
- amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WAIT);
- amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
- amdgpu_ring_write(ring, 1 << vmid);
- amdgpu_ring_write(ring, 1 << vmid);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
}
#if 0
@@ -1712,22 +1654,23 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {
.get_wptr = uvd_v7_0_ring_get_wptr,
.set_wptr = uvd_v7_0_ring_set_wptr,
.emit_frame_size =
- 2 + /* uvd_v7_0_ring_emit_hdp_flush */
- 2 + /* uvd_v7_0_ring_emit_hdp_invalidate */
- 34 + /* uvd_v7_0_ring_emit_vm_flush */
+ 6 + 6 + /* hdp flush / invalidate */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* uvd_v7_0_ring_emit_vm_flush */
14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */
.emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */
.emit_ib = uvd_v7_0_ring_emit_ib,
.emit_fence = uvd_v7_0_ring_emit_fence,
.emit_vm_flush = uvd_v7_0_ring_emit_vm_flush,
- .emit_hdp_flush = uvd_v7_0_ring_emit_hdp_flush,
- .emit_hdp_invalidate = uvd_v7_0_ring_emit_hdp_invalidate,
.test_ring = uvd_v7_0_ring_test_ring,
.test_ib = amdgpu_uvd_ring_test_ib,
.insert_nop = uvd_v7_0_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_uvd_ring_begin_use,
.end_use = amdgpu_uvd_ring_end_use,
+ .emit_wreg = uvd_v7_0_ring_emit_wreg,
+ .emit_reg_wait = uvd_v7_0_ring_emit_reg_wait,
};
static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
@@ -1740,7 +1683,10 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
.get_wptr = uvd_v7_0_enc_ring_get_wptr,
.set_wptr = uvd_v7_0_enc_ring_set_wptr,
.emit_frame_size =
- 17 + /* uvd_v7_0_enc_ring_emit_vm_flush */
+ 3 + 3 + /* hdp flush / invalidate */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* uvd_v7_0_enc_ring_emit_vm_flush */
5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */
1, /* uvd_v7_0_enc_ring_insert_end */
.emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */
@@ -1754,6 +1700,8 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_uvd_ring_begin_use,
.end_use = amdgpu_uvd_ring_end_use,
+ .emit_wreg = uvd_v7_0_enc_ring_emit_wreg,
+ .emit_reg_wait = uvd_v7_0_enc_ring_emit_reg_wait,
};
static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index a5355eb689f1..428d1928e44e 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -844,7 +844,7 @@ static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
}
static void vce_v3_0_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned int vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
amdgpu_ring_write(ring, VCE_CMD_UPDATE_PTB);
amdgpu_ring_write(ring, vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 7cf2eef68cf2..73fd48d6c756 100755..100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -28,6 +28,7 @@
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_vce.h"
+#include "soc15.h"
#include "soc15d.h"
#include "soc15_common.h"
#include "mmsch_v1_0.h"
@@ -419,7 +420,7 @@ static int vce_v4_0_sw_init(void *handle)
unsigned size;
int r, i;
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCE0, 167, &adev->vce.irq);
if (r)
return r;
@@ -964,40 +965,33 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, VCE_CMD_END);
}
+static void vce_v4_0_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
+ uint32_t val, uint32_t mask)
+{
+ amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, val);
+}
+
static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned int vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
- uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
- uint64_t flags = AMDGPU_PTE_VALID;
- unsigned eng = ring->vm_inv_eng;
- amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
- pd_addr |= flags;
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
- amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
- amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2);
- amdgpu_ring_write(ring, upper_32_bits(pd_addr));
-
- amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
- amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
- amdgpu_ring_write(ring, lower_32_bits(pd_addr));
-
- amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
- amdgpu_ring_write(ring, (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
- amdgpu_ring_write(ring, 0xffffffff);
- amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+ /* wait for reg writes */
+ vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
+ lower_32_bits(pd_addr), 0xffffffff);
+}
- /* flush TLB */
+static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
- amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
- amdgpu_ring_write(ring, req);
-
- /* wait for flush */
- amdgpu_ring_write(ring, VCE_CMD_REG_WAIT);
- amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
- amdgpu_ring_write(ring, 1 << vmid);
- amdgpu_ring_write(ring, 1 << vmid);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
}
static int vce_v4_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -1069,7 +1063,9 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
.set_wptr = vce_v4_0_ring_set_wptr,
.parse_cs = amdgpu_vce_ring_parse_cs_vm,
.emit_frame_size =
- 17 + /* vce_v4_0_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vce_v4_0_emit_vm_flush */
5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */
1, /* vce_v4_0_ring_insert_end */
.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */
@@ -1083,6 +1079,8 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_vce_ring_begin_use,
.end_use = amdgpu_vce_ring_end_use,
+ .emit_wreg = vce_v4_0_emit_wreg,
+ .emit_reg_wait = vce_v4_0_emit_reg_wait,
};
static void vce_v4_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index b99e15c43e45..8c132673bc79 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -25,6 +25,7 @@
#include <drm/drmP.h>
#include "amdgpu.h"
#include "amdgpu_vcn.h"
+#include "soc15.h"
#include "soc15d.h"
#include "soc15_common.h"
@@ -74,13 +75,13 @@ static int vcn_v1_0_sw_init(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* VCN DEC TRAP */
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCN, 124, &adev->vcn.irq);
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, 124, &adev->vcn.irq);
if (r)
return r;
/* VCN ENC TRAP */
for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
- r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_VCN, i + 119,
+ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + 119,
&adev->vcn.irq);
if (r)
return r;
@@ -809,21 +810,6 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64
}
/**
- * vcn_v1_0_dec_ring_hdp_invalidate - emit an hdp invalidate
- *
- * @ring: amdgpu_ring pointer
- *
- * Emits an hdp invalidate.
- */
-static void vcn_v1_0_dec_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
-
- amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 0));
- amdgpu_ring_write(ring, 1);
-}
-
-/**
* vcn_v1_0_dec_ring_emit_ib - execute indirect buffer
*
* @ring: amdgpu_ring pointer
@@ -852,33 +838,18 @@ static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, ib->length_dw);
}
-static void vcn_v1_0_dec_vm_reg_write(struct amdgpu_ring *ring,
- uint32_t data0, uint32_t data1)
+static void vcn_v1_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val,
+ uint32_t mask)
{
struct amdgpu_device *adev = ring->adev;
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
- amdgpu_ring_write(ring, data0);
+ amdgpu_ring_write(ring, reg << 2);
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
- amdgpu_ring_write(ring, data1);
- amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
- amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1);
-}
-
-static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring,
- uint32_t data0, uint32_t data1, uint32_t mask)
-{
- struct amdgpu_device *adev = ring->adev;
-
- amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
- amdgpu_ring_write(ring, data0);
- amdgpu_ring_write(ring,
- PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
- amdgpu_ring_write(ring, data1);
+ amdgpu_ring_write(ring, val);
amdgpu_ring_write(ring,
PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GP_SCRATCH8), 0));
amdgpu_ring_write(ring, mask);
@@ -888,40 +859,34 @@ static void vcn_v1_0_dec_vm_reg_wait(struct amdgpu_ring *ring,
}
static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned vmid, uint64_t pd_addr)
+ unsigned vmid, uint64_t pd_addr)
{
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
- uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
- uint64_t flags = AMDGPU_PTE_VALID;
- unsigned eng = ring->vm_inv_eng;
uint32_t data0, data1, mask;
- amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
- pd_addr |= flags;
-
- data0 = (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2;
- data1 = upper_32_bits(pd_addr);
- vcn_v1_0_dec_vm_reg_write(ring, data0, data1);
-
- data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2;
- data1 = lower_32_bits(pd_addr);
- vcn_v1_0_dec_vm_reg_write(ring, data0, data1);
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
- data0 = (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2;
+ /* wait for register write */
+ data0 = hub->ctx0_ptb_addr_lo32 + vmid * 2;
data1 = lower_32_bits(pd_addr);
mask = 0xffffffff;
- vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask);
-
- /* flush TLB */
- data0 = (hub->vm_inv_eng0_req + eng) << 2;
- data1 = req;
- vcn_v1_0_dec_vm_reg_write(ring, data0, data1);
-
- /* wait for flush */
- data0 = (hub->vm_inv_eng0_ack + eng) << 2;
- data1 = 1 << vmid;
- mask = 1 << vmid;
- vcn_v1_0_dec_vm_reg_wait(ring, data0, data1, mask);
+ vcn_v1_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);
+}
+
+static void vcn_v1_0_dec_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0), 0));
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1), 0));
+ amdgpu_ring_write(ring, val);
+ amdgpu_ring_write(ring,
+ PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD), 0));
+ amdgpu_ring_write(ring, VCN_DEC_CMD_WRITE_REG << 1);
}
/**
@@ -1020,43 +985,34 @@ static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, ib->length_dw);
}
+static void vcn_v1_0_enc_ring_emit_reg_wait(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val,
+ uint32_t mask)
+{
+ amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, mask);
+ amdgpu_ring_write(ring, val);
+}
+
static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned int vmid, uint64_t pd_addr)
+ unsigned int vmid, uint64_t pd_addr)
{
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
- uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vmid);
- uint64_t flags = AMDGPU_PTE_VALID;
- unsigned eng = ring->vm_inv_eng;
-
- amdgpu_gart_get_vm_pde(ring->adev, -1, &pd_addr, &flags);
- pd_addr |= flags;
- amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
- amdgpu_ring_write(ring,
- (hub->ctx0_ptb_addr_hi32 + vmid * 2) << 2);
- amdgpu_ring_write(ring, upper_32_bits(pd_addr));
-
- amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
- amdgpu_ring_write(ring,
- (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
- amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+ pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
- amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
- amdgpu_ring_write(ring,
- (hub->ctx0_ptb_addr_lo32 + vmid * 2) << 2);
- amdgpu_ring_write(ring, 0xffffffff);
- amdgpu_ring_write(ring, lower_32_bits(pd_addr));
+ /* wait for reg writes */
+ vcn_v1_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,
+ lower_32_bits(pd_addr), 0xffffffff);
+}
- /* flush TLB */
+static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring,
+ uint32_t reg, uint32_t val)
+{
amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WRITE);
- amdgpu_ring_write(ring, (hub->vm_inv_eng0_req + eng) << 2);
- amdgpu_ring_write(ring, req);
-
- /* wait for flush */
- amdgpu_ring_write(ring, VCN_ENC_CMD_REG_WAIT);
- amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2);
- amdgpu_ring_write(ring, 1 << vmid);
- amdgpu_ring_write(ring, 1 << vmid);
+ amdgpu_ring_write(ring, reg << 2);
+ amdgpu_ring_write(ring, val);
}
static int vcn_v1_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -1133,15 +1089,16 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.get_wptr = vcn_v1_0_dec_ring_get_wptr,
.set_wptr = vcn_v1_0_dec_ring_set_wptr,
.emit_frame_size =
- 2 + /* vcn_v1_0_dec_ring_emit_hdp_invalidate */
- 34 + /* vcn_v1_0_dec_ring_emit_vm_flush */
+ 6 + 6 + /* hdp invalidate / flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
+ 8 + /* vcn_v1_0_dec_ring_emit_vm_flush */
14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */
6,
.emit_ib_size = 8, /* vcn_v1_0_dec_ring_emit_ib */
.emit_ib = vcn_v1_0_dec_ring_emit_ib,
.emit_fence = vcn_v1_0_dec_ring_emit_fence,
.emit_vm_flush = vcn_v1_0_dec_ring_emit_vm_flush,
- .emit_hdp_invalidate = vcn_v1_0_dec_ring_emit_hdp_invalidate,
.test_ring = amdgpu_vcn_dec_ring_test_ring,
.test_ib = amdgpu_vcn_dec_ring_test_ib,
.insert_nop = vcn_v1_0_ring_insert_nop,
@@ -1150,6 +1107,8 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_vcn_ring_begin_use,
.end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v1_0_dec_ring_emit_wreg,
+ .emit_reg_wait = vcn_v1_0_dec_ring_emit_reg_wait,
};
static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
@@ -1162,7 +1121,9 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
.get_wptr = vcn_v1_0_enc_ring_get_wptr,
.set_wptr = vcn_v1_0_enc_ring_set_wptr,
.emit_frame_size =
- 17 + /* vcn_v1_0_enc_ring_emit_vm_flush */
+ SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
+ SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
+ 4 + /* vcn_v1_0_enc_ring_emit_vm_flush */
5 + 5 + /* vcn_v1_0_enc_ring_emit_fence x2 vm fence */
1, /* vcn_v1_0_enc_ring_insert_end */
.emit_ib_size = 5, /* vcn_v1_0_enc_ring_emit_ib */
@@ -1176,6 +1137,8 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.begin_use = amdgpu_vcn_ring_begin_use,
.end_use = amdgpu_vcn_ring_end_use,
+ .emit_wreg = vcn_v1_0_enc_ring_emit_wreg,
+ .emit_reg_wait = vcn_v1_0_enc_ring_emit_reg_wait,
};
static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index ee14d78be2a9..5ae5ed2e62d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -245,8 +245,8 @@ static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev)
* some faults get cleared.
*/
switch (dw0 & 0xff) {
- case AMDGPU_IH_CLIENTID_VMC:
- case AMDGPU_IH_CLIENTID_UTCL2:
+ case SOC15_IH_CLIENTID_VMC:
+ case SOC15_IH_CLIENTID_UTCL2:
break;
default:
/* Not a VM fault */
@@ -333,7 +333,7 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev,
entry->vmid_src = (dw[0] >> 31);
entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
entry->timestamp_src = dw[2] >> 31;
- entry->pas_id = dw[3] & 0xffff;
+ entry->pasid = dw[3] & 0xffff;
entry->pasid_src = dw[3] >> 31;
entry->src_data[0] = dw[4];
entry->src_data[1] = dw[5];
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
index b7bdd04793d6..4c45db7f1157 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
@@ -24,7 +24,8 @@
#include "soc15.h"
#include "soc15_common.h"
-#include "soc15ip.h"
+#include "soc15_hw_ip.h"
+#include "vega10_ip_offset.h"
int vega10_reg_base_init(struct amdgpu_device *adev)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 1e3e05a11f7a..126f1276d347 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -71,7 +71,6 @@
#include "uvd_v5_0.h"
#include "uvd_v6_0.h"
#include "vce_v3_0.h"
-#include "amdgpu_powerplay.h"
#if defined(CONFIG_DRM_AMD_ACP)
#include "amdgpu_acp.h"
#endif
@@ -856,6 +855,27 @@ static uint32_t vi_get_rev_id(struct amdgpu_device *adev)
>> PCIE_EFUSE4__STRAP_BIF_ATI_REV_ID__SHIFT;
}
+static void vi_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+ RREG32(mmHDP_MEM_COHERENCY_FLUSH_CNTL);
+ } else {
+ amdgpu_ring_emit_wreg(ring, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 1);
+ }
+}
+
+static void vi_invalidate_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+{
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32(mmHDP_DEBUG0, 1);
+ RREG32(mmHDP_DEBUG0);
+ } else {
+ amdgpu_ring_emit_wreg(ring, mmHDP_DEBUG0, 1);
+ }
+}
+
static const struct amdgpu_asic_funcs vi_asic_funcs =
{
.read_disabled_bios = &vi_read_disabled_bios,
@@ -867,6 +887,8 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
.set_uvd_clocks = &vi_set_uvd_clocks,
.set_vce_clocks = &vi_set_vce_clocks,
.get_config_memsize = &vi_get_config_memsize,
+ .flush_hdp = &vi_flush_hdp,
+ .invalidate_hdp = &vi_invalidate_hdp,
};
#define CZ_REV_BRISTOL(rev) \
@@ -874,7 +896,6 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
static int vi_common_early_init(void *handle)
{
- bool smc_enabled = false;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
if (adev->flags & AMD_IS_APU) {
@@ -895,10 +916,6 @@ static int vi_common_early_init(void *handle)
adev->asic_funcs = &vi_asic_funcs;
- if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SMC) &&
- (amdgpu_ip_block_mask & (1 << AMD_IP_BLOCK_TYPE_SMC)))
- smc_enabled = true;
-
adev->rev_id = vi_get_rev_id(adev);
adev->external_rev_id = 0xFF;
switch (adev->asic_type) {
@@ -1074,11 +1091,6 @@ static int vi_common_early_init(void *handle)
xgpu_vi_mailbox_set_irq_funcs(adev);
}
- /* vi use smc load by default */
- adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
-
- amdgpu_device_get_pcie_info(adev);
-
return 0;
}
@@ -1493,7 +1505,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v7_4_ip_block);
amdgpu_device_ip_block_add(adev, &iceland_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
@@ -1503,7 +1515,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v8_5_ip_block);
amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
@@ -1523,7 +1535,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block);
amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
@@ -1545,7 +1557,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block);
amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
@@ -1563,7 +1575,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block);
amdgpu_device_ip_block_add(adev, &cz_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
@@ -1584,7 +1596,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
amdgpu_device_ip_block_add(adev, &vi_common_ip_block);
amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block);
amdgpu_device_ip_block_add(adev, &cz_ih_ip_block);
- amdgpu_device_ip_block_add(adev, &amdgpu_pp_ip_block);
+ amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
if (adev->enable_virtual_display)
amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
#if defined(CONFIG_DRM_AMD_DC)
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h
index 575d7aed5d32..0429fe332269 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.h
+++ b/drivers/gpu/drm/amd/amdgpu/vi.h
@@ -24,6 +24,8 @@
#ifndef __VI_H__
#define __VI_H__
+#define VI_FLUSH_GPU_TLB_NUM_WREG 3
+
void vi_srbm_select(struct amdgpu_device *adev,
u32 me, u32 pipe, u32 queue, u32 vmid);
int vi_set_ip_blocks(struct amdgpu_device *adev);