summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-05 01:49:32 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-05 01:49:32 +0300
commitf377ea88b862bf7151be96d276f4cb740f8e1c41 (patch)
tree6205913431c012e285316281b6221a20d4a92128 /drivers/gpu/drm/amd/amdkfd
parent51e771c0d25b43d0f12b2c7c01939942becbbe28 (diff)
parent73bf1b7be7aab60d7c651402441dd0b0b4991098 (diff)
downloadlinux-f377ea88b862bf7151be96d276f4cb740f8e1c41.tar.xz
Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull drm updates from Dave Airlie: "This is the main pull request for the drm for 4.3. Nouveau is probably the biggest amount of changes in here, since it missed 4.2. Highlights below, along with the usual bunch of fixes. All stuff outside drm should have applicable acks. Highlights: - new drivers: freescale dcu kms driver - core: more atomic fixes disable some dri1 interfaces on kms drivers drop fb panic handling, this was just getting more broken, as more locking was required. new core fbdev Kconfig support - instead of each driver enable/disabling it struct_mutex cleanups - panel: more new panels cleanup Kconfig - i915: Skylake support enabled by default legacy modesetting using atomic infrastructure Skylake fixes GEN9 workarounds - amdgpu: Fiji support CGS support for amdgpu Initial GPU scheduler - off by default Lots of bug fixes and optimisations. - radeon: DP fixes misc fixes - amdkfd: Add Carrizo support for amdkfd using amdgpu. - nouveau: long pending cleanup to complete driver, fully bisectable which makes it larger, perfmon work more reclocking improvements maxwell displayport fixes - vmwgfx: new DX device support, supports OpenGL 3.3 screen targets support - mgag200: G200eW support G200e new revision support - msm: dragonboard 410c support, msm8x94 support, msm8x74v1 support yuv format support dma plane support mdp5 rotation initial hdcp - sti: atomic support - exynos: lots of cleanups atomic modesetting/pageflipping support render node support - tegra: tegra210 support (dc, dsi, dp/hdmi) dpms with atomic modesetting support - atmel: support for 3 more atmel SoCs new input formats, PRIME support. - dwhdmi: preparing to add audio support - rockchip: yuv plane support" * 'drm-next' of git://people.freedesktop.org/~airlied/linux: (1369 commits) drm/amdgpu: rename gmc_v8_0_init_compute_vmid drm/amdgpu: fix vce3 instance handling drm/amdgpu: remove ib test for the second VCE Ring drm/amdgpu: properly enable VM fault interrupts drm/amdgpu: fix warning in scheduler drm/amdgpu: fix buffer placement under memory pressure drm/amdgpu/cz: fix cz_dpm_update_low_memory_pstate logic drm/amdgpu: fix typo in dce11 watermark setup drm/amdgpu: fix typo in dce10 watermark setup drm/amdgpu: use top down allocation for non-CPU accessible vram drm/amdgpu: be explicit about cpu vram access for driver BOs (v2) drm/amdgpu: set MEC doorbell range for Fiji drm/amdgpu: implement burst NOP for SDMA drm/amdgpu: add insert_nop ring func and default implementation drm/amdgpu: add amdgpu_get_sdma_instance helper function drm/amdgpu: add AMDGPU_MAX_SDMA_INSTANCES drm/amdgpu: add burst_nop flag for sdma drm/amdgpu: add count field for the SDMA NOP packet v2 drm/amdgpu: use PT for VM sync on unmap drm/amdgpu: make wait_event uninterruptible in push_job ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_regs.h11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c103
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c20
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c249
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c99
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h398
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h1
14 files changed, 875 insertions, 39 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index 8dfac37ff327..e13c67c8d2c0 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -4,6 +4,6 @@
config HSA_AMD
tristate "HSA kernel driver for AMD GPU devices"
- depends on DRM_RADEON && AMD_IOMMU_V2 && X86_64
+ depends on (DRM_RADEON || DRM_AMDGPU) && AMD_IOMMU_V2 && X86_64
help
Enable this if you want to use HSA features on AMD GPU devices.
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 28551153ec6d..7fc9b0f444cb 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -2,7 +2,8 @@
# Makefile for Heterogenous System Architecture support for AMD GPU devices
#
-ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/
+ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/amd/include/ \
+ -Idrivers/gpu/drm/amd/include/asic_reg
amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
index 183be5b8414f..48769d12dd7b 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h
+++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
@@ -65,17 +65,6 @@
#define AQL_ENABLE 1
-#define SDMA_RB_VMID(x) (x << 24)
-#define SDMA_RB_ENABLE (1 << 0)
-#define SDMA_RB_SIZE(x) ((x) << 1) /* log2 */
-#define SDMA_RPTR_WRITEBACK_ENABLE (1 << 12)
-#define SDMA_RPTR_WRITEBACK_TIMER(x) ((x) << 16) /* log2 */
-#define SDMA_OFFSET(x) (x << 0)
-#define SDMA_DB_ENABLE (1 << 28)
-#define SDMA_ATC (1 << 0)
-#define SDMA_VA_PTR32 (1 << 4)
-#define SDMA_VA_SHARED_BASE(x) (x << 8)
-
#define GRBM_GFX_INDEX 0x30800
#define ATC_VMID_PASID_MAPPING_VALID (1U << 31)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index c991973019d0..c6a1b4cc6458 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -31,7 +31,7 @@
#include <uapi/linux/kfd_ioctl.h>
#include <linux/time.h>
#include <linux/mm.h>
-#include <uapi/asm-generic/mman-common.h>
+#include <linux/mman.h>
#include <asm/processor.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 75312c82969f..3f95f7cb4019 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -80,7 +80,12 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x1318, &kaveri_device_info }, /* Kaveri */
{ 0x131B, &kaveri_device_info }, /* Kaveri */
{ 0x131C, &kaveri_device_info }, /* Kaveri */
- { 0x131D, &kaveri_device_info } /* Kaveri */
+ { 0x131D, &kaveri_device_info }, /* Kaveri */
+ { 0x9870, &carrizo_device_info }, /* Carrizo */
+ { 0x9874, &carrizo_device_info }, /* Carrizo */
+ { 0x9875, &carrizo_device_info }, /* Carrizo */
+ { 0x9876, &carrizo_device_info }, /* Carrizo */
+ { 0x9877, &carrizo_device_info } /* Carrizo */
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index 9ce8a20a7aff..c6f435aa803f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -23,6 +23,7 @@
#include "kfd_device_queue_manager.h"
#include "cik_regs.h"
+#include "oss/oss_2_4_sh_mask.h"
static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
@@ -135,13 +136,16 @@ static int register_process_cik(struct device_queue_manager *dqm,
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
- uint32_t value = SDMA_ATC;
+ uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
if (q->process->is_32bit_user_mode)
- value |= SDMA_VA_PTR32 | get_sh_mem_bases_32(qpd_to_pdd(qpd));
+ value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
+ get_sh_mem_bases_32(qpd_to_pdd(qpd));
else
- value |= SDMA_VA_SHARED_BASE(get_sh_mem_bases_nybble_64(
- qpd_to_pdd(qpd)));
+ value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
+ SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
+ SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
+
q->properties.sdma_vm_addr = value;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index 4c15212a3899..7e9cae9d349b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -22,6 +22,10 @@
*/
#include "kfd_device_queue_manager.h"
+#include "gca/gfx_8_0_enum.h"
+#include "gca/gfx_8_0_sh_mask.h"
+#include "gca/gfx_8_0_enum.h"
+#include "oss/oss_3_0_sh_mask.h"
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
@@ -37,14 +41,40 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops)
{
- pr_warn("amdkfd: VI DQM is not currently supported\n");
-
ops->set_cache_memory_policy = set_cache_memory_policy_vi;
ops->register_process = register_process_vi;
ops->initialize = initialize_cpsch_vi;
ops->init_sdma_vm = init_sdma_vm;
}
+static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
+{
+ /* In 64-bit mode, we can only control the top 3 bits of the LDS,
+ * scratch and GPUVM apertures.
+ * The hardware fills in the remaining 59 bits according to the
+ * following pattern:
+ * LDS: X0000000'00000000 - X0000001'00000000 (4GB)
+ * Scratch: X0000001'00000000 - X0000002'00000000 (4GB)
+ * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB)
+ *
+ * (where X/Y is the configurable nybble with the low-bit 0)
+ *
+ * LDS and scratch will have the same top nybble programmed in the
+ * top 3 bits of SH_MEM_BASES.PRIVATE_BASE.
+ * GPUVM can have a different top nybble programmed in the
+ * top 3 bits of SH_MEM_BASES.SHARED_BASE.
+ * We don't bother to support different top nybbles
+ * for LDS/Scratch and GPUVM.
+ */
+
+ BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
+ top_address_nybble == 0);
+
+ return top_address_nybble << 12 |
+ (top_address_nybble << 12) <<
+ SH_MEM_BASES__SHARED_BASE__SHIFT;
+}
+
static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
enum cache_policy default_policy,
@@ -52,18 +82,83 @@ static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size)
{
- return false;
+ uint32_t default_mtype;
+ uint32_t ape1_mtype;
+
+ default_mtype = (default_policy == cache_policy_coherent) ?
+ MTYPE_CC :
+ MTYPE_NC;
+
+ ape1_mtype = (alternate_policy == cache_policy_coherent) ?
+ MTYPE_CC :
+ MTYPE_NC;
+
+ qpd->sh_mem_config = (qpd->sh_mem_config &
+ SH_MEM_CONFIG__ADDRESS_MODE_MASK) |
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
+ default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
+ ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
+ SH_MEM_CONFIG__PRIVATE_ATC_MASK;
+
+ return true;
}
static int register_process_vi(struct device_queue_manager *dqm,
struct qcm_process_device *qpd)
{
- return -1;
+ struct kfd_process_device *pdd;
+ unsigned int temp;
+
+ BUG_ON(!dqm || !qpd);
+
+ pdd = qpd_to_pdd(qpd);
+
+ /* check if sh_mem_config register already configured */
+ if (qpd->sh_mem_config == 0) {
+ qpd->sh_mem_config =
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
+ MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
+ MTYPE_CC << SH_MEM_CONFIG__APE1_MTYPE__SHIFT |
+ SH_MEM_CONFIG__PRIVATE_ATC_MASK;
+
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ }
+
+ if (qpd->pqm->process->is_32bit_user_mode) {
+ temp = get_sh_mem_bases_32(pdd);
+ qpd->sh_mem_bases = temp << SH_MEM_BASES__SHARED_BASE__SHIFT;
+ qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA32 <<
+ SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
+ } else {
+ temp = get_sh_mem_bases_nybble_64(pdd);
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
+ qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 <<
+ SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
+ }
+
+ pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
+ qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
+
+ return 0;
}
static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
struct qcm_process_device *qpd)
{
+ uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
+
+ if (q->process->is_32bit_user_mode)
+ value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
+ get_sh_mem_bases_32(qpd_to_pdd(qpd));
+ else
+ value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
+ SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
+ SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
+
+ q->properties.sdma_vm_addr = value;
}
static int initialize_cpsch_vi(struct device_queue_manager *dqm)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 35b987574633..2b655103ba79 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -33,7 +33,7 @@
#include <linux/time.h>
#include "kfd_priv.h"
#include <linux/mm.h>
-#include <uapi/asm-generic/mman-common.h>
+#include <linux/mman.h>
#include <asm/processor.h>
/*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 434979428fc0..d83de985e88c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -27,6 +27,7 @@
#include "kfd_mqd_manager.h"
#include "cik_regs.h"
#include "cik_structs.h"
+#include "oss/oss_2_4_sh_mask.h"
static inline struct cik_mqd *get_mqd(void *mqd)
{
@@ -214,17 +215,20 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
BUG_ON(!mm || !mqd || !q);
m = get_sdma_mqd(mqd);
- m->sdma_rlc_rb_cntl =
- SDMA_RB_SIZE((ffs(q->queue_size / sizeof(unsigned int)))) |
- SDMA_RB_VMID(q->vmid) |
- SDMA_RPTR_WRITEBACK_ENABLE |
- SDMA_RPTR_WRITEBACK_TIMER(6);
+ m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) <<
+ SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+ q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
m->sdma_rlc_rb_base = lower_32_bits(q->queue_address >> 8);
m->sdma_rlc_rb_base_hi = upper_32_bits(q->queue_address >> 8);
m->sdma_rlc_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
m->sdma_rlc_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
- m->sdma_rlc_doorbell = SDMA_OFFSET(q->doorbell_off) | SDMA_DB_ENABLE;
+ m->sdma_rlc_doorbell = q->doorbell_off <<
+ SDMA0_RLC0_DOORBELL__OFFSET__SHIFT |
+ 1 << SDMA0_RLC0_DOORBELL__ENABLE__SHIFT;
+
m->sdma_rlc_virtual_addr = q->sdma_vm_addr;
m->sdma_engine_id = q->sdma_engine_id;
@@ -234,7 +238,9 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
if (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0) {
- m->sdma_rlc_rb_cntl |= SDMA_RB_ENABLE;
+ m->sdma_rlc_rb_cntl |=
+ 1 << SDMA0_RLC0_RB_CNTL__RB_ENABLE__SHIFT;
+
q->is_active = true;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index b3a7e3ba1e38..fa32c32fa1c2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -22,12 +22,255 @@
*/
#include <linux/printk.h>
+#include <linux/slab.h>
#include "kfd_priv.h"
#include "kfd_mqd_manager.h"
+#include "vi_structs.h"
+#include "gca/gfx_8_0_sh_mask.h"
+#include "gca/gfx_8_0_enum.h"
+
+#define CP_MQD_CONTROL__PRIV_STATE__SHIFT 0x8
+
+static inline struct vi_mqd *get_mqd(void *mqd)
+{
+ return (struct vi_mqd *)mqd;
+}
+
+static int init_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ int retval;
+ uint64_t addr;
+ struct vi_mqd *m;
+
+ retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd),
+ mqd_mem_obj);
+ if (retval != 0)
+ return -ENOMEM;
+
+ m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr;
+ addr = (*mqd_mem_obj)->gpu_addr;
+
+ memset(m, 0, sizeof(struct vi_mqd));
+
+ m->header = 0xC0310800;
+ m->compute_pipelinestat_enable = 1;
+ m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+
+ m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
+ 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+
+ m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT |
+ MTYPE_UC << CP_MQD_CONTROL__MTYPE__SHIFT;
+
+ m->cp_mqd_base_addr_lo = lower_32_bits(addr);
+ m->cp_mqd_base_addr_hi = upper_32_bits(addr);
+
+ m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
+ 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
+ 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+
+ m->cp_hqd_pipe_priority = 1;
+ m->cp_hqd_queue_priority = 15;
+
+ m->cp_hqd_eop_rptr = 1 << CP_HQD_EOP_RPTR__INIT_FETCHER__SHIFT;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL)
+ m->cp_hqd_iq_rptr = 1;
+
+ *mqd = m;
+ if (gart_addr != NULL)
+ *gart_addr = addr;
+ retval = mm->update_mqd(mm, m, q);
+
+ return retval;
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t __user *wptr)
+{
+ return mm->dev->kfd2kgd->hqd_load
+ (mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
+}
+
+static int __update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q, unsigned int mtype,
+ unsigned int atc_bit)
+{
+ struct vi_mqd *m;
+
+ BUG_ON(!mm || !q || !mqd);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ m = get_mqd(mqd);
+
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT |
+ atc_bit << CP_HQD_PQ_CONTROL__PQ_ATC__SHIFT |
+ mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT;
+ m->cp_hqd_pq_control |=
+ ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
+ pr_debug("kfd: cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+
+ m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+ m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+
+ m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+
+ m->cp_hqd_pq_doorbell_control =
+ 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN__SHIFT |
+ q->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("kfd: cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ m->cp_hqd_eop_control = atc_bit << CP_HQD_EOP_CONTROL__EOP_ATC__SHIFT |
+ mtype << CP_HQD_EOP_CONTROL__MTYPE__SHIFT;
+
+ m->cp_hqd_ib_control = atc_bit << CP_HQD_IB_CONTROL__IB_ATC__SHIFT |
+ 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
+ mtype << CP_HQD_IB_CONTROL__MTYPE__SHIFT;
+
+ m->cp_hqd_eop_control |=
+ ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1;
+ m->cp_hqd_eop_base_addr_lo =
+ lower_32_bits(q->eop_ring_buffer_address >> 8);
+ m->cp_hqd_eop_base_addr_hi =
+ upper_32_bits(q->eop_ring_buffer_address >> 8);
+
+ m->cp_hqd_iq_timer = atc_bit << CP_HQD_IQ_TIMER__IQ_ATC__SHIFT |
+ mtype << CP_HQD_IQ_TIMER__MTYPE__SHIFT;
+
+ m->cp_hqd_vmid = q->vmid;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+ 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
+ }
+
+ m->cp_hqd_active = 0;
+ q->is_active = false;
+ if (q->queue_size > 0 &&
+ q->queue_address != 0 &&
+ q->queue_percent > 0) {
+ m->cp_hqd_active = 1;
+ q->is_active = true;
+ }
+
+ return 0;
+}
+
+
+static int update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ return __update_mqd(mm, mqd, q, MTYPE_CC, 1);
+}
+
+static int destroy_mqd(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_destroy
+ (mm->dev->kgd, type, timeout,
+ pipe_id, queue_id);
+}
+
+static void uninit_mqd(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+ BUG_ON(!mm || !mqd);
+ kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+}
+
+static bool is_occupied(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_is_occupied(
+ mm->dev->kgd, queue_address,
+ pipe_id, queue_id);
+}
+
+static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct vi_mqd *m;
+ int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+
+ if (retval != 0)
+ return retval;
+
+ m = get_mqd(*mqd);
+
+ m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+
+ return retval;
+}
+
+static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ struct vi_mqd *m;
+ int retval = __update_mqd(mm, mqd, q, MTYPE_UC, 0);
+
+ if (retval != 0)
+ return retval;
+
+ m = get_mqd(mqd);
+ m->cp_hqd_vmid = q->vmid;
+ return retval;
+}
struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
- struct kfd_dev *dev)
+ struct kfd_dev *dev)
{
- pr_warn("amdkfd: VI MQD is not currently supported\n");
- return NULL;
+ struct mqd_manager *mqd;
+
+ BUG_ON(!dev);
+ BUG_ON(type >= KFD_MQD_TYPE_MAX);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
+ if (!mqd)
+ return NULL;
+
+ mqd->dev = dev;
+
+ switch (type) {
+ case KFD_MQD_TYPE_CP:
+ case KFD_MQD_TYPE_COMPUTE:
+ mqd->init_mqd = init_mqd;
+ mqd->uninit_mqd = uninit_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->is_occupied = is_occupied;
+ break;
+ case KFD_MQD_TYPE_HIQ:
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->uninit_mqd = uninit_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd_hiq;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->is_occupied = is_occupied;
+ break;
+ case KFD_MQD_TYPE_SDMA:
+ break;
+ default:
+ kfree(mqd);
+ return NULL;
+ }
+
+ return mqd;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 99b6d28a11c3..90f391434fa3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -27,6 +27,7 @@
#include "kfd_kernel_queue.h"
#include "kfd_priv.h"
#include "kfd_pm4_headers.h"
+#include "kfd_pm4_headers_vi.h"
#include "kfd_pm4_opcodes.h"
static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
@@ -55,6 +56,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
bool *over_subscription)
{
unsigned int process_count, queue_count;
+ unsigned int map_queue_size;
BUG_ON(!pm || !rlib_size || !over_subscription);
@@ -69,9 +71,13 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
pr_debug("kfd: over subscribed runlist\n");
}
+ map_queue_size =
+ (pm->dqm->dev->device_info->asic_family == CHIP_CARRIZO) ?
+ sizeof(struct pm4_mes_map_queues) :
+ sizeof(struct pm4_map_queues);
/* calculate run list ib allocation size */
*rlib_size = process_count * sizeof(struct pm4_map_process) +
- queue_count * sizeof(struct pm4_map_queues);
+ queue_count * map_queue_size;
/*
* Increase the allocation size in case we need a chained run list
@@ -176,6 +182,71 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
+static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct queue *q, bool is_static)
+{
+ struct pm4_mes_map_queues *packet;
+ bool use_static = is_static;
+
+ BUG_ON(!pm || !buffer || !q);
+
+ pr_debug("kfd: In func %s\n", __func__);
+
+ packet = (struct pm4_mes_map_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_map_queues));
+
+ packet->header.u32all = build_pm4_header(IT_MAP_QUEUES,
+ sizeof(struct pm4_map_queues));
+ packet->bitfields2.alloc_format =
+ alloc_format__mes_map_queues__one_per_pipe_vi;
+ packet->bitfields2.num_queues = 1;
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
+
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_map_queues__compute_vi;
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__normal_compute_vi;
+
+ switch (q->properties.type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ if (use_static)
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__normal_latency_static_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_DIQ:
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__debug_interface_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_map_queues__sdma0_vi;
+ use_static = false; /* no static queues under SDMA */
+ break;
+ default:
+ pr_err("kfd: in %s queue type %d\n", __func__,
+ q->properties.type);
+ BUG();
+ break;
+ }
+ packet->bitfields3.doorbell_offset =
+ q->properties.doorbell_off;
+
+ packet->mqd_addr_lo =
+ lower_32_bits(q->gart_mqd_addr);
+
+ packet->mqd_addr_hi =
+ upper_32_bits(q->gart_mqd_addr);
+
+ packet->wptr_addr_lo =
+ lower_32_bits((uint64_t)q->properties.write_ptr);
+
+ packet->wptr_addr_hi =
+ upper_32_bits((uint64_t)q->properties.write_ptr);
+
+ return 0;
+}
+
static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
struct queue *q, bool is_static)
{
@@ -292,8 +363,17 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n",
kq->queue->queue, qpd->is_debug);
- retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
- kq->queue, qpd->is_debug);
+ if (pm->dqm->dev->device_info->asic_family ==
+ CHIP_CARRIZO)
+ retval = pm_create_map_queue_vi(pm,
+ &rl_buffer[rl_wptr],
+ kq->queue,
+ qpd->is_debug);
+ else
+ retval = pm_create_map_queue(pm,
+ &rl_buffer[rl_wptr],
+ kq->queue,
+ qpd->is_debug);
if (retval != 0)
return retval;
@@ -309,8 +389,17 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n",
q->queue, qpd->is_debug);
- retval = pm_create_map_queue(pm, &rl_buffer[rl_wptr],
- q, qpd->is_debug);
+ if (pm->dqm->dev->device_info->asic_family ==
+ CHIP_CARRIZO)
+ retval = pm_create_map_queue_vi(pm,
+ &rl_buffer[rl_wptr],
+ q,
+ qpd->is_debug);
+ else
+ retval = pm_create_map_queue(pm,
+ &rl_buffer[rl_wptr],
+ q,
+ qpd->is_debug);
if (retval != 0)
return retval;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
new file mode 100644
index 000000000000..08c721922812
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
@@ -0,0 +1,398 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef F32_MES_PM4_PACKETS_H
+#define F32_MES_PM4_PACKETS_H
+
+#ifndef PM4_MES_HEADER_DEFINED
+#define PM4_MES_HEADER_DEFINED
+union PM4_MES_TYPE_3_HEADER {
+ struct {
+ uint32_t reserved1 : 8; /* < reserved */
+ uint32_t opcode : 8; /* < IT opcode */
+ uint32_t count : 14;/* < number of DWORDs - 1 in the
+ information body. */
+ uint32_t type : 2; /* < packet identifier.
+ It should be 3 for type 3 packets */
+ };
+ uint32_t u32All;
+};
+#endif /* PM4_MES_HEADER_DEFINED */
+
+/*--------------------MES_SET_RESOURCES--------------------*/
+
+#ifndef PM4_MES_SET_RESOURCES_DEFINED
+#define PM4_MES_SET_RESOURCES_DEFINED
+enum mes_set_resources_queue_type_enum {
+ queue_type__mes_set_resources__kernel_interface_queue_kiq = 0,
+ queue_type__mes_set_resources__hsa_interface_queue_hiq = 1,
+ queue_type__mes_set_resources__hsa_debug_interface_queue = 4
+};
+
+
+struct pm4_mes_set_resources {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t vmid_mask:16;
+ uint32_t unmap_latency:8;
+ uint32_t reserved1:5;
+ enum mes_set_resources_queue_type_enum queue_type:3;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ uint32_t queue_mask_lo;
+ uint32_t queue_mask_hi;
+ uint32_t gws_mask_lo;
+ uint32_t gws_mask_hi;
+
+ union {
+ struct {
+ uint32_t oac_mask:16;
+ uint32_t reserved2:16;
+ } bitfields7;
+ uint32_t ordinal7;
+ };
+
+ union {
+ struct {
+ uint32_t gds_heap_base:6;
+ uint32_t reserved3:5;
+ uint32_t gds_heap_size:6;
+ uint32_t reserved4:15;
+ } bitfields8;
+ uint32_t ordinal8;
+ };
+
+};
+#endif
+
+/*--------------------MES_RUN_LIST--------------------*/
+
+#ifndef PM4_MES_RUN_LIST_DEFINED
+#define PM4_MES_RUN_LIST_DEFINED
+
+struct pm4_mes_runlist {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t reserved1:2;
+ uint32_t ib_base_lo:30;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t ib_base_hi:16;
+ uint32_t reserved2:16;
+ } bitfields3;
+ uint32_t ordinal3;
+ };
+
+ union {
+ struct {
+ uint32_t ib_size:20;
+ uint32_t chain:1;
+ uint32_t offload_polling:1;
+ uint32_t reserved3:1;
+ uint32_t valid:1;
+ uint32_t reserved4:8;
+ } bitfields4;
+ uint32_t ordinal4;
+ };
+
+};
+#endif
+
+/*--------------------MES_MAP_PROCESS--------------------*/
+
+#ifndef PM4_MES_MAP_PROCESS_DEFINED
+#define PM4_MES_MAP_PROCESS_DEFINED
+
+struct pm4_mes_map_process {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t pasid:16;
+ uint32_t reserved1:8;
+ uint32_t diq_enable:1;
+ uint32_t process_quantum:7;
+ } bitfields2;
+ uint32_t ordinal2;
+};
+
+ union {
+ struct {
+ uint32_t page_table_base:28;
+ uint32_t reserved2:4;
+ } bitfields3;
+ uint32_t ordinal3;
+ };
+
+ uint32_t sh_mem_bases;
+ uint32_t sh_mem_ape1_base;
+ uint32_t sh_mem_ape1_limit;
+ uint32_t sh_mem_config;
+ uint32_t gds_addr_lo;
+ uint32_t gds_addr_hi;
+
+ union {
+ struct {
+ uint32_t num_gws:6;
+ uint32_t reserved3:2;
+ uint32_t num_oac:4;
+ uint32_t reserved4:4;
+ uint32_t gds_size:6;
+ uint32_t num_queues:10;
+ } bitfields10;
+ uint32_t ordinal10;
+ };
+
+};
+#endif
+
+/*--------------------MES_MAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED
+#define PM4_MES_MAP_QUEUES_VI_DEFINED
+enum mes_map_queues_queue_sel_vi_enum {
+ queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0,
+queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1
+};
+
+enum mes_map_queues_queue_type_vi_enum {
+ queue_type__mes_map_queues__normal_compute_vi = 0,
+ queue_type__mes_map_queues__debug_interface_queue_vi = 1,
+ queue_type__mes_map_queues__normal_latency_static_queue_vi = 2,
+queue_type__mes_map_queues__low_latency_static_queue_vi = 3
+};
+
+enum mes_map_queues_alloc_format_vi_enum {
+ alloc_format__mes_map_queues__one_per_pipe_vi = 0,
+alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
+};
+
+enum mes_map_queues_engine_sel_vi_enum {
+ engine_sel__mes_map_queues__compute_vi = 0,
+ engine_sel__mes_map_queues__sdma0_vi = 2,
+ engine_sel__mes_map_queues__sdma1_vi = 3
+};
+
+
+struct pm4_mes_map_queues {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t reserved1:4;
+ enum mes_map_queues_queue_sel_vi_enum queue_sel:2;
+ uint32_t reserved2:15;
+ enum mes_map_queues_queue_type_vi_enum queue_type:3;
+ enum mes_map_queues_alloc_format_vi_enum alloc_format:2;
+ enum mes_map_queues_engine_sel_vi_enum engine_sel:3;
+ uint32_t num_queues:3;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t reserved3:1;
+ uint32_t check_disable:1;
+ uint32_t doorbell_offset:21;
+ uint32_t reserved4:3;
+ uint32_t queue:6;
+ } bitfields3;
+ uint32_t ordinal3;
+ };
+
+ uint32_t mqd_addr_lo;
+ uint32_t mqd_addr_hi;
+ uint32_t wptr_addr_lo;
+ uint32_t wptr_addr_hi;
+};
+#endif
+
+/*--------------------MES_QUERY_STATUS--------------------*/
+
+#ifndef PM4_MES_QUERY_STATUS_DEFINED
+#define PM4_MES_QUERY_STATUS_DEFINED
+enum mes_query_status_interrupt_sel_enum {
+ interrupt_sel__mes_query_status__completion_status = 0,
+ interrupt_sel__mes_query_status__process_status = 1,
+ interrupt_sel__mes_query_status__queue_status = 2
+};
+
+enum mes_query_status_command_enum {
+ command__mes_query_status__interrupt_only = 0,
+ command__mes_query_status__fence_only_immediate = 1,
+ command__mes_query_status__fence_only_after_write_ack = 2,
+ command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3
+};
+
+enum mes_query_status_engine_sel_enum {
+ engine_sel__mes_query_status__compute = 0,
+ engine_sel__mes_query_status__sdma0_queue = 2,
+ engine_sel__mes_query_status__sdma1_queue = 3
+};
+
+struct pm4_mes_query_status {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t context_id:28;
+ enum mes_query_status_interrupt_sel_enum
+ interrupt_sel:2;
+ enum mes_query_status_command_enum command:2;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t pasid:16;
+ uint32_t reserved1:16;
+ } bitfields3a;
+ struct {
+ uint32_t reserved2:2;
+ uint32_t doorbell_offset:21;
+ uint32_t reserved3:2;
+ enum mes_query_status_engine_sel_enum engine_sel:3;
+ uint32_t reserved4:4;
+ } bitfields3b;
+ uint32_t ordinal3;
+ };
+
+ uint32_t addr_lo;
+ uint32_t addr_hi;
+ uint32_t data_lo;
+ uint32_t data_hi;
+};
+#endif
+
+/*--------------------MES_UNMAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_UNMAP_QUEUES_DEFINED
+#define PM4_MES_UNMAP_QUEUES_DEFINED
+enum mes_unmap_queues_action_enum {
+ action__mes_unmap_queues__preempt_queues = 0,
+ action__mes_unmap_queues__reset_queues = 1,
+ action__mes_unmap_queues__disable_process_queues = 2,
+ action__mes_unmap_queues__reserved = 3
+};
+
+enum mes_unmap_queues_queue_sel_enum {
+ queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
+ queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
+ queue_sel__mes_unmap_queues__unmap_all_queues = 2,
+ queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3
+};
+
+enum mes_unmap_queues_engine_sel_enum {
+ engine_sel__mes_unmap_queues__compute = 0,
+ engine_sel__mes_unmap_queues__sdma0 = 2,
+ engine_sel__mes_unmap_queues__sdmal = 3
+};
+
+struct PM4_MES_UNMAP_QUEUES {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ enum mes_unmap_queues_action_enum action:2;
+ uint32_t reserved1:2;
+ enum mes_unmap_queues_queue_sel_enum queue_sel:2;
+ uint32_t reserved2:20;
+ enum mes_unmap_queues_engine_sel_enum engine_sel:3;
+ uint32_t num_queues:3;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t pasid:16;
+ uint32_t reserved3:16;
+ } bitfields3a;
+ struct {
+ uint32_t reserved4:2;
+ uint32_t doorbell_offset0:21;
+ uint32_t reserved5:9;
+ } bitfields3b;
+ uint32_t ordinal3;
+ };
+
+ union {
+ struct {
+ uint32_t reserved6:2;
+ uint32_t doorbell_offset1:21;
+ uint32_t reserved7:9;
+ } bitfields4;
+ uint32_t ordinal4;
+ };
+
+ union {
+ struct {
+ uint32_t reserved8:2;
+ uint32_t doorbell_offset2:21;
+ uint32_t reserved9:9;
+ } bitfields5;
+ uint32_t ordinal5;
+ };
+
+ union {
+ struct {
+ uint32_t reserved10:2;
+ uint32_t doorbell_offset3:21;
+ uint32_t reserved11:9;
+ } bitfields6;
+ uint32_t ordinal6;
+ };
+};
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index c25728bc388a..74909e72a009 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1186,6 +1186,11 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
* TODO: Retrieve max engine clock values from KGD
*/
+ if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
+ dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE;
+ pr_info("amdkfd: adding doorbell packet type capability\n");
+ }
+
res = 0;
err:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 989624b3cd14..c3ddb9b95ff8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -40,6 +40,7 @@
#define HSA_CAP_WATCH_POINTS_TOTALBITS_MASK 0x00000f00
#define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8
#define HSA_CAP_RESERVED 0xfffff000
+#define HSA_CAP_DOORBELL_PACKET_TYPE 0x00001000
struct kfd_node_properties {
uint32_t cpu_cores_count;