summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2024-09-16 18:05:31 +0300
committerDave Airlie <airlied@redhat.com>2024-09-16 18:06:10 +0300
commit26df39de93bb8763bb3088db6c76eb98d4cd7213 (patch)
tree834991400838b64bcd6583fdc00deacf477c715f
parentbf05aeac230e390a5aee4bd3dc978b0c4d7e745f (diff)
parent0c8c5bdd7eaf291b6f727e98506fb68acee3a4cc (diff)
downloadlinux-26df39de93bb8763bb3088db6c76eb98d4cd7213.tar.xz
Merge tag 'amd-drm-next-6.12-2024-09-13' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.12-2024-09-13: amdgpu: - GPUVM sync fixes - kdoc fixes - Misc spelling mistakes - Add some raven GFXOFF quirks - Use clamp helper - DC fixes - JPEG fixes - Process isolation fix - Queue reset fix - W=1 cleanup - SMU14 fixes - JPEG fixes amdkfd: - Fetch cacheline info from IP discovery - Queue reset fix - RAS fix - Document SVM events - CRIU fixes - Race fix in dma-buf handling drm: - dma-buf fd race fixes Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20240913134139.2861073-1-alexander.deucher@amd.com
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c76
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c63
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c93
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c64
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c40
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c45
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c22
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c13
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c79
-rw-r--r--drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c20
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c24
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h1
-rw-r--r--drivers/gpu/drm/amd/include/atomfirmware.h4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h11
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c12
-rw-r--r--drivers/gpu/drm/drm_prime.c84
-rw-r--r--include/drm/drm_prime.h3
-rw-r--r--include/uapi/linux/kfd_ioctl.h100
50 files changed, 678 insertions, 366 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 38408e4e158e..c7b18c52825d 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -39,23 +39,7 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \
-I$(FULL_AMD_PATH)/amdkfd
-subdir-ccflags-y := -Wextra
-subdir-ccflags-y += -Wunused
-subdir-ccflags-y += -Wmissing-prototypes
-subdir-ccflags-y += -Wmissing-declarations
-subdir-ccflags-y += -Wmissing-include-dirs
-subdir-ccflags-y += -Wold-style-definition
-subdir-ccflags-y += -Wmissing-format-attribute
-# Need this to avoid recursive variable evaluation issues
-cond-flags := $(call cc-option, -Wunused-but-set-variable) \
- $(call cc-option, -Wunused-const-variable) \
- $(call cc-option, -Wstringop-truncation) \
- $(call cc-option, -Wpacked-not-aligned)
-subdir-ccflags-y += $(cond-flags)
-subdir-ccflags-y += -Wno-unused-parameter
-subdir-ccflags-y += -Wno-type-limits
-subdir-ccflags-y += -Wno-sign-compare
-subdir-ccflags-y += -Wno-missing-field-initializers
+# Locally disable W=1 warnings enabled in drm subsystem Makefile
subdir-ccflags-y += -Wno-override-init
subdir-ccflags-$(CONFIG_DRM_AMDGPU_WERROR) += -Werror
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e095572458cd..dcd59040c449 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -237,6 +237,7 @@ extern int sched_policy;
extern bool debug_evictions;
extern bool no_system_mem_limit;
extern int halt_if_hws_hang;
+extern uint amdgpu_svm_default_granularity;
#else
static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS;
static const bool __maybe_unused debug_evictions; /* = false */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 73b2b401b450..9435af2e6bdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -20,7 +20,6 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include <linux/module.h>
-#include <linux/fdtable.h>
#include <linux/uaccess.h>
#include <linux/firmware.h>
#include "amdgpu.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 6d5fd371d5ce..4afef5b46c7d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -25,7 +25,6 @@
#include <linux/pagemap.h>
#include <linux/sched/mm.h>
#include <linux/sched/task.h>
-#include <linux/fdtable.h>
#include <drm/ttm/ttm_tt.h>
#include <drm/drm_exec.h>
@@ -818,18 +817,13 @@ static int kfd_mem_export_dmabuf(struct kgd_mem *mem)
if (!mem->dmabuf) {
struct amdgpu_device *bo_adev;
struct dma_buf *dmabuf;
- int r, fd;
bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
- r = drm_gem_prime_handle_to_fd(&bo_adev->ddev, bo_adev->kfd.client.file,
+ dmabuf = drm_gem_prime_handle_to_dmabuf(&bo_adev->ddev, bo_adev->kfd.client.file,
mem->gem_handle,
mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ?
- DRM_RDWR : 0, &fd);
- if (r)
- return r;
- dmabuf = dma_buf_get(fd);
- close_fd(fd);
- if (WARN_ON_ONCE(IS_ERR(dmabuf)))
+ DRM_RDWR : 0);
+ if (IS_ERR(dmabuf))
return PTR_ERR(dmabuf);
mem->dmabuf = dmabuf;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 8dee7c62c801..86cff30d5c4e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -169,6 +169,16 @@ uint amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu;
char *amdgpu_virtual_display;
bool enforce_isolation;
+
+/* Specifies the default granularity for SVM, used in buffer
+ * migration and restoration of backing memory when handling
+ * recoverable page faults.
+ *
+ * The value is given as log(numPages(buffer)); for a 2 MiB
+ * buffer it computes to be 9
+ */
+uint amdgpu_svm_default_granularity = 9;
+
/*
* OverDrive(bit 14) disabled by default
* GFX DCS(bit 19) disabled by default
@@ -321,6 +331,13 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
module_param_named(msi, amdgpu_msi, int, 0444);
/**
+ * DOC: svm_default_granularity (uint)
+ * Used in buffer migration and handling of recoverable page faults
+ */
+MODULE_PARM_DESC(svm_default_granularity, "SVM's default granularity in log(2^Pages), default 9 = 2^9 = 2 MiB");
+module_param_named(svm_default_granularity, amdgpu_svm_default_granularity, uint, 0644);
+
+/**
* DOC: lockup_timeout (string)
* Set GPU scheduler timeout value in ms.
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index b779d47a546a..83e54697f0ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -1397,14 +1397,23 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev,
static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- long timeout = msecs_to_jiffies(1000);
- struct dma_fence *f = NULL;
+ struct drm_gpu_scheduler *sched = &ring->sched;
+ struct drm_sched_entity entity;
+ struct dma_fence *f;
struct amdgpu_job *job;
struct amdgpu_ib *ib;
int i, r;
- r = amdgpu_job_alloc_with_ib(adev, NULL, NULL,
- 64, AMDGPU_IB_POOL_DIRECT,
+ /* Initialize the scheduler entity */
+ r = drm_sched_entity_init(&entity, DRM_SCHED_PRIORITY_NORMAL,
+ &sched, 1, NULL);
+ if (r) {
+ dev_err(adev->dev, "Failed setting up GFX kernel entity.\n");
+ goto err;
+ }
+
+ r = amdgpu_job_alloc_with_ib(ring->adev, &entity, NULL,
+ 64, 0,
&job);
if (r)
goto err;
@@ -1416,24 +1425,18 @@ static int amdgpu_gfx_run_cleaner_shader_job(struct amdgpu_ring *ring)
ib->ptr[i] = ring->funcs->nop;
ib->length_dw = ring->funcs->align_mask + 1;
- r = amdgpu_job_submit_direct(job, ring, &f);
- if (r)
- goto err_free;
+ f = amdgpu_job_submit(job);
- r = dma_fence_wait_timeout(f, false, timeout);
- if (r == 0)
- r = -ETIMEDOUT;
- else if (r > 0)
- r = 0;
+ r = dma_fence_wait(f, false);
+ if (r)
+ goto err;
- amdgpu_ib_free(adev, ib, f);
dma_fence_put(f);
+ /* Clean up the scheduler entity */
+ drm_sched_entity_destroy(&entity);
return 0;
-err_free:
- amdgpu_job_free(job);
- amdgpu_ib_free(adev, ib, f);
err:
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index f7d5d4f08a53..10b61ff63802 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -832,6 +832,7 @@ int amdgpu_mes_reset_hw_queue_mmio(struct amdgpu_device *adev, int queue_type,
struct mes_reset_queue_input queue_input;
int r;
+ queue_input.queue_type = queue_type;
queue_input.use_mmio = true;
queue_input.me_id = me_id;
queue_input.pipe_id = pipe_id;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
index 0bb2466d539a..675aa138ea11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pll.c
@@ -94,7 +94,7 @@ static void amdgpu_pll_get_fb_ref_div(struct amdgpu_device *adev, unsigned int n
ref_div_max = min(128 / post_div, ref_div_max);
/* get matching reference and feedback divider */
- *ref_div = min(max(DIV_ROUND_CLOSEST(den, post_div), 1u), ref_div_max);
+ *ref_div = clamp(DIV_ROUND_CLOSEST(den, post_div), 1u, ref_div_max);
*fb_div = DIV_ROUND_CLOSEST(nom * *ref_div * post_div, den);
/* limit fb divider to its maximum */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 863b2a34b2d6..f9ff493c100e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -22,7 +22,6 @@
* Authors: Andres Rodriguez <andresx7@gmail.com>
*/
-#include <linux/fdtable.h>
#include <linux/file.h>
#include <linux/pid.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index e20d19ae01b2..2452dfa6314f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -844,7 +844,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,
params.vm = vm;
params.immediate = immediate;
- r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
+ r = vm->update_funcs->prepare(&params, NULL);
if (r)
goto error;
@@ -908,10 +908,12 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
{
struct amdgpu_vm *vm = params->vm;
- if (!fence || !*fence)
+ tlb_cb->vm = vm;
+ if (!fence || !*fence) {
+ amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb);
return;
+ }
- tlb_cb->vm = vm;
if (!dma_fence_add_callback(*fence, &tlb_cb->cb,
amdgpu_vm_tlb_seq_cb)) {
dma_fence_put(vm->last_tlb_flush);
@@ -939,7 +941,7 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
* @unlocked: unlocked invalidation during MM callback
* @flush_tlb: trigger tlb invalidation after update completed
* @allow_override: change MTYPE for local NUMA nodes
- * @resv: fences we need to sync to
+ * @sync: fences we need to sync to
* @start: start of mapped range
* @last: last mapped entry
* @flags: flags for the entries
@@ -955,16 +957,16 @@ amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params,
* 0 for success, negative erro code for failure.
*/
int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- bool immediate, bool unlocked, bool flush_tlb, bool allow_override,
- struct dma_resv *resv, uint64_t start, uint64_t last,
- uint64_t flags, uint64_t offset, uint64_t vram_base,
+ bool immediate, bool unlocked, bool flush_tlb,
+ bool allow_override, struct amdgpu_sync *sync,
+ uint64_t start, uint64_t last, uint64_t flags,
+ uint64_t offset, uint64_t vram_base,
struct ttm_resource *res, dma_addr_t *pages_addr,
struct dma_fence **fence)
{
struct amdgpu_vm_tlb_seq_struct *tlb_cb;
struct amdgpu_vm_update_params params;
struct amdgpu_res_cursor cursor;
- enum amdgpu_sync_mode sync_mode;
int r, idx;
if (!drm_dev_enter(adev_to_drm(adev), &idx))
@@ -997,14 +999,6 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
params.allow_override = allow_override;
INIT_LIST_HEAD(&params.tlb_flush_waitlist);
- /* Implicitly sync to command submissions in the same VM before
- * unmapping. Sync to moving fences before mapping.
- */
- if (!(flags & AMDGPU_PTE_VALID))
- sync_mode = AMDGPU_SYNC_EQ_OWNER;
- else
- sync_mode = AMDGPU_SYNC_EXPLICIT;
-
amdgpu_vm_eviction_lock(vm);
if (vm->evicting) {
r = -EBUSY;
@@ -1019,7 +1013,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
dma_fence_put(tmp);
}
- r = vm->update_funcs->prepare(&params, resv, sync_mode);
+ r = vm->update_funcs->prepare(&params, sync);
if (r)
goto error_free;
@@ -1161,23 +1155,30 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
struct amdgpu_bo *bo = bo_va->base.bo;
struct amdgpu_vm *vm = bo_va->base.vm;
struct amdgpu_bo_va_mapping *mapping;
+ struct dma_fence **last_update;
dma_addr_t *pages_addr = NULL;
struct ttm_resource *mem;
- struct dma_fence **last_update;
+ struct amdgpu_sync sync;
bool flush_tlb = clear;
- bool uncached;
- struct dma_resv *resv;
uint64_t vram_base;
uint64_t flags;
+ bool uncached;
int r;
+ amdgpu_sync_create(&sync);
if (clear || !bo) {
mem = NULL;
- resv = vm->root.bo->tbo.base.resv;
+
+ /* Implicitly sync to command submissions in the same VM before
+ * unmapping.
+ */
+ r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
+ AMDGPU_SYNC_EQ_OWNER, vm);
+ if (r)
+ goto error_free;
} else {
struct drm_gem_object *obj = &bo->tbo.base;
- resv = bo->tbo.base.resv;
if (obj->import_attach && bo_va->is_xgmi) {
struct dma_buf *dma_buf = obj->import_attach->dmabuf;
struct drm_gem_object *gobj = dma_buf->priv;
@@ -1191,6 +1192,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
if (mem && (mem->mem_type == TTM_PL_TT ||
mem->mem_type == AMDGPU_PL_PREEMPT))
pages_addr = bo->tbo.ttm->dma_address;
+
+ /* Implicitly sync to moving fences before mapping anything */
+ r = amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv,
+ AMDGPU_SYNC_EXPLICIT, vm);
+ if (r)
+ goto error_free;
}
if (bo) {
@@ -1240,12 +1247,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
trace_amdgpu_vm_bo_update(mapping);
r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
- !uncached, resv, mapping->start, mapping->last,
- update_flags, mapping->offset,
- vram_base, mem, pages_addr,
- last_update);
+ !uncached, &sync, mapping->start,
+ mapping->last, update_flags,
+ mapping->offset, vram_base, mem,
+ pages_addr, last_update);
if (r)
- return r;
+ goto error_free;
}
/* If the BO is not in its preferred location add it back to
@@ -1273,7 +1280,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
trace_amdgpu_vm_bo_mapping(mapping);
}
- return 0;
+error_free:
+ amdgpu_sync_free(&sync);
+ return r;
}
/**
@@ -1420,25 +1429,34 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct dma_fence **fence)
{
- struct dma_resv *resv = vm->root.bo->tbo.base.resv;
struct amdgpu_bo_va_mapping *mapping;
- uint64_t init_pte_value = 0;
struct dma_fence *f = NULL;
+ struct amdgpu_sync sync;
int r;
+
+ /*
+ * Implicitly sync to command submissions in the same VM before
+ * unmapping.
+ */
+ amdgpu_sync_create(&sync);
+ r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
+ AMDGPU_SYNC_EQ_OWNER, vm);
+ if (r)
+ goto error_free;
+
while (!list_empty(&vm->freed)) {
mapping = list_first_entry(&vm->freed,
struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list);
r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
- resv, mapping->start, mapping->last,
- init_pte_value, 0, 0, NULL, NULL,
- &f);
+ &sync, mapping->start, mapping->last,
+ 0, 0, 0, NULL, NULL, &f);
amdgpu_vm_free_mapping(adev, vm, mapping, f);
if (r) {
dma_fence_put(f);
- return r;
+ goto error_free;
}
}
@@ -1449,7 +1467,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
dma_fence_put(f);
}
- return 0;
+error_free:
+ amdgpu_sync_free(&sync);
+ return r;
}
@@ -2224,7 +2244,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
phys_ram_gb = ((uint64_t)si.totalram * si.mem_unit +
(1 << 30) - 1) >> 30;
vm_size = roundup_pow_of_two(
- min(max(phys_ram_gb * 3, min_vm_size), max_size));
+ clamp(phys_ram_gb * 3, min_vm_size, max_size));
}
adev->vm_manager.max_pfn = (uint64_t)vm_size << 18;
@@ -2403,7 +2423,6 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int32_t xcp_id)
{
- struct amdgpu_ip_block *ip_block;
struct amdgpu_bo *root_bo;
struct amdgpu_bo_vm *root;
int r, i;
@@ -2435,11 +2454,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_GFX);
- /* use CPU for page table update if SDMA is unavailable */
- ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_SDMA);
- if (!ip_block || ip_block->status.valid == false)
- vm->use_cpu_for_update = true;
-
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
WARN_ONCE((vm->use_cpu_for_update &&
@@ -2770,6 +2784,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
* amdgpu_vm_handle_fault - graceful handling of VM faults.
* @adev: amdgpu device pointer
* @pasid: PASID of the VM
+ * @ts: Timestamp of the fault
* @vmid: VMID, only used for GFX 9.4.3.
* @node_id: Node_id received in IH cookie. Only applicable for
* GFX 9.4.3.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d12d66dca8e9..52dd7cdfdc81 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -304,8 +304,8 @@ struct amdgpu_vm_update_params {
struct amdgpu_vm_update_funcs {
int (*map_table)(struct amdgpu_bo_vm *bo);
- int (*prepare)(struct amdgpu_vm_update_params *p, struct dma_resv *resv,
- enum amdgpu_sync_mode sync_mode);
+ int (*prepare)(struct amdgpu_vm_update_params *p,
+ struct amdgpu_sync *sync);
int (*update)(struct amdgpu_vm_update_params *p,
struct amdgpu_bo_vm *bo, uint64_t pe, uint64_t addr,
unsigned count, uint32_t incr, uint64_t flags);
@@ -505,9 +505,10 @@ int amdgpu_vm_flush_compute_tlb(struct amdgpu_device *adev,
void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
struct amdgpu_vm *vm, struct amdgpu_bo *bo);
int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- bool immediate, bool unlocked, bool flush_tlb, bool allow_override,
- struct dma_resv *resv, uint64_t start, uint64_t last,
- uint64_t flags, uint64_t offset, uint64_t vram_base,
+ bool immediate, bool unlocked, bool flush_tlb,
+ bool allow_override, struct amdgpu_sync *sync,
+ uint64_t start, uint64_t last, uint64_t flags,
+ uint64_t offset, uint64_t vram_base,
struct ttm_resource *res, dma_addr_t *pages_addr,
struct dma_fence **fence);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
index 3895bd7d176a..0c1ef5850a5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
@@ -39,20 +39,18 @@ static int amdgpu_vm_cpu_map_table(struct amdgpu_bo_vm *table)
* amdgpu_vm_cpu_prepare - prepare page table update with the CPU
*
* @p: see amdgpu_vm_update_params definition
- * @resv: reservation object with embedded fence
- * @sync_mode: synchronization mode
+ * @sync: sync obj with fences to wait on
*
* Returns:
* Negativ errno, 0 for success.
*/
static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p,
- struct dma_resv *resv,
- enum amdgpu_sync_mode sync_mode)
+ struct amdgpu_sync *sync)
{
- if (!resv)
+ if (!sync)
return 0;
- return amdgpu_bo_sync_wait_resv(p->adev, resv, sync_mode, p->vm, true);
+ return amdgpu_sync_wait(sync, true);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index e39d6e7643bf..a076f43097e4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -403,7 +403,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
params.vm = vm;
params.immediate = immediate;
- r = vm->update_funcs->prepare(&params, NULL, AMDGPU_SYNC_EXPLICIT);
+ r = vm->update_funcs->prepare(&params, NULL);
if (r)
goto exit;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 9b748d7058b5..4772fba33285 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -77,32 +77,24 @@ static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p,
* amdgpu_vm_sdma_prepare - prepare SDMA command submission
*
* @p: see amdgpu_vm_update_params definition
- * @resv: reservation object with embedded fence
- * @sync_mode: synchronization mode
+ * @sync: amdgpu_sync object with fences to wait for
*
* Returns:
* Negativ errno, 0 for success.
*/
static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
- struct dma_resv *resv,
- enum amdgpu_sync_mode sync_mode)
+ struct amdgpu_sync *sync)
{
- struct amdgpu_sync sync;
int r;
r = amdgpu_vm_sdma_alloc_job(p, 0);
if (r)
return r;
- if (!resv)
+ if (!sync)
return 0;
- amdgpu_sync_create(&sync);
- r = amdgpu_sync_resv(p->adev, &sync, resv, sync_mode, p->vm);
- if (!r)
- r = amdgpu_sync_push_to_job(&sync, p->job);
- amdgpu_sync_free(&sync);
-
+ r = amdgpu_sync_push_to_job(sync, p->job);
if (r) {
p->num_dw_left = 0;
amdgpu_job_free(p->job);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 8cf5d7925b51..23f0573ae47b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1345,6 +1345,10 @@ static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
{ 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
/* Apple MacBook Pro (15-inch, 2019) Radeon Pro Vega 20 4 GB */
{ 0x1002, 0x69af, 0x106b, 0x019a, 0xc0 },
+ /* https://bbs.openkylin.top/t/topic/171497 */
+ { 0x1002, 0x15d8, 0x19e5, 0x3e14, 0xc2 },
+ /* HP 705G4 DM with R5 2400G */
+ { 0x1002, 0x15dd, 0x103c, 0x8464, 0xd6 },
{ 0, 0, 0, 0, 0 },
};
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
index 71f43a5c7f72..6e0e88076224 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c
@@ -23,6 +23,7 @@
#include "amdgpu.h"
#include "amdgpu_jpeg.h"
+#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "vcn_v1_0.h"
@@ -34,6 +35,9 @@
static void jpeg_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev);
static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev);
static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring);
+static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
static void jpeg_v1_0_decode_ring_patch_wreg(struct amdgpu_ring *ring, uint32_t *ptr, uint32_t reg_offset, uint32_t val)
{
@@ -300,7 +304,10 @@ static void jpeg_v1_0_decode_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring,
PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4)));
+ if (ring->funcs->parse_cs)
+ amdgpu_ring_write(ring, 0);
+ else
+ amdgpu_ring_write(ring, (vmid | (vmid << 4)));
amdgpu_ring_write(ring,
PACKETJ(SOC15_REG_OFFSET(JPEG, 0, mmUVD_LMI_JPEG_VMID), 0, 0, PACKETJ_TYPE0));
@@ -554,6 +561,7 @@ static const struct amdgpu_ring_funcs jpeg_v1_0_decode_ring_vm_funcs = {
.get_rptr = jpeg_v1_0_decode_ring_get_rptr,
.get_wptr = jpeg_v1_0_decode_ring_get_wptr,
.set_wptr = jpeg_v1_0_decode_ring_set_wptr,
+ .parse_cs = jpeg_v1_dec_ring_parse_cs,
.emit_frame_size =
6 + 6 + /* hdp invalidate / flush */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
@@ -611,3 +619,69 @@ static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring)
vcn_v1_0_set_pg_for_begin_use(ring, set_clocks);
}
+
+/**
+ * jpeg_v1_dec_ring_parse_cs - command submission parser
+ *
+ * @parser: Command submission parser context
+ * @job: the job to parse
+ * @ib: the IB to parse
+ *
+ * Parse the command stream, return -EINVAL for invalid packet,
+ * 0 otherwise
+ */
+static int jpeg_v1_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ u32 i, reg, res, cond, type;
+ int ret = 0;
+ struct amdgpu_device *adev = parser->adev;
+
+ for (i = 0; i < ib->length_dw ; i += 2) {
+ reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
+ res = CP_PACKETJ_GET_RES(ib->ptr[i]);
+ cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
+ type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
+
+ if (res || cond != PACKETJ_CONDITION_CHECK0) /* only allow 0 for now */
+ return -EINVAL;
+
+ if (reg >= JPEG_V1_REG_RANGE_START && reg <= JPEG_V1_REG_RANGE_END)
+ continue;
+
+ switch (type) {
+ case PACKETJ_TYPE0:
+ if (reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH &&
+ reg != JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW &&
+ reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH &&
+ reg != JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW &&
+ reg != JPEG_V1_REG_CTX_INDEX &&
+ reg != JPEG_V1_REG_CTX_DATA) {
+ ret = -EINVAL;
+ }
+ break;
+ case PACKETJ_TYPE1:
+ if (reg != JPEG_V1_REG_CTX_DATA)
+ ret = -EINVAL;
+ break;
+ case PACKETJ_TYPE3:
+ if (reg != JPEG_V1_REG_SOFT_RESET)
+ ret = -EINVAL;
+ break;
+ case PACKETJ_TYPE6:
+ if (ib->ptr[i] != CP_PACKETJ_NOP)
+ ret = -EINVAL;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (ret) {
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
index bbf33a6a3972..9654d22e0376 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.h
@@ -29,4 +29,15 @@ int jpeg_v1_0_sw_init(void *handle);
void jpeg_v1_0_sw_fini(void *handle);
void jpeg_v1_0_start(struct amdgpu_device *adev, int mode);
+#define JPEG_V1_REG_RANGE_START 0x8000
+#define JPEG_V1_REG_RANGE_END 0x803f
+
+#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_HIGH 0x8238
+#define JPEG_V1_LMI_JPEG_WRITE_64BIT_BAR_LOW 0x8239
+#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_HIGH 0x825a
+#define JPEG_V1_LMI_JPEG_READ_64BIT_BAR_LOW 0x825b
+#define JPEG_V1_REG_CTX_INDEX 0x8328
+#define JPEG_V1_REG_CTX_DATA 0x8329
+#define JPEG_V1_REG_SOFT_RESET 0x83a0
+
#endif /*__JPEG_V1_0_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
index 98aa3ccd0d20..41c0f8750dc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c
@@ -23,6 +23,7 @@
#include "amdgpu.h"
#include "amdgpu_jpeg.h"
+#include "amdgpu_cs.h"
#include "amdgpu_pm.h"
#include "soc15.h"
#include "soc15d.h"
@@ -538,7 +539,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
+
+ if (ring->funcs->parse_cs)
+ amdgpu_ring_write(ring, 0);
+ else
+ amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8)));
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
@@ -764,6 +769,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v2_0_dec_ring_get_rptr,
.get_wptr = jpeg_v2_0_dec_ring_get_wptr,
.set_wptr = jpeg_v2_0_dec_ring_set_wptr,
+ .parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@@ -810,3 +816,58 @@ const struct amdgpu_ip_block_version jpeg_v2_0_ip_block = {
.rev = 0,
.funcs = &jpeg_v2_0_ip_funcs,
};
+
+/**
+ * jpeg_v2_dec_ring_parse_cs - command submission parser
+ *
+ * @parser: Command submission parser context
+ * @job: the job to parse
+ * @ib: the IB to parse
+ *
+ * Parse the command stream, return -EINVAL for invalid packet,
+ * 0 otherwise
+ */
+int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib)
+{
+ u32 i, reg, res, cond, type;
+ struct amdgpu_device *adev = parser->adev;
+
+ for (i = 0; i < ib->length_dw ; i += 2) {
+ reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
+ res = CP_PACKETJ_GET_RES(ib->ptr[i]);
+ cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
+ type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
+
+ if (res) /* only support 0 at the moment */
+ return -EINVAL;
+
+ switch (type) {
+ case PACKETJ_TYPE0:
+ if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START ||
+ reg > JPEG_REG_RANGE_END) {
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ }
+ break;
+ case PACKETJ_TYPE3:
+ if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START ||
+ reg > JPEG_REG_RANGE_END) {
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ }
+ break;
+ case PACKETJ_TYPE6:
+ if (ib->ptr[i] == CP_PACKETJ_NOP)
+ continue;
+ dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
+ return -EINVAL;
+ default:
+ dev_err(adev->dev, "Unknown packet type %d !\n", type);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
index 654e43e83e2c..63fadda7a673 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h
@@ -45,6 +45,9 @@
#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
+#define JPEG_REG_RANGE_START 0x4000
+#define JPEG_REG_RANGE_END 0x41c2
+
void jpeg_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring);
void jpeg_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring);
void jpeg_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
@@ -57,6 +60,9 @@ void jpeg_v2_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr);
void jpeg_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count);
+int jpeg_v2_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
+ struct amdgpu_job *job,
+ struct amdgpu_ib *ib);
extern const struct amdgpu_ip_block_version jpeg_v2_0_ip_block;
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index d8ef95c847c2..eedb9a829d95 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -662,6 +662,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = {
.get_rptr = jpeg_v2_5_dec_ring_get_rptr,
.get_wptr = jpeg_v2_5_dec_ring_get_wptr,
.set_wptr = jpeg_v2_5_dec_ring_set_wptr,
+ .parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@@ -691,6 +692,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = {
.get_rptr = jpeg_v2_5_dec_ring_get_rptr,
.get_wptr = jpeg_v2_5_dec_ring_get_wptr,
.set_wptr = jpeg_v2_5_dec_ring_set_wptr,
+ .parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
index 31cfa3ce6528..b1e7fd25afbc 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c
@@ -560,6 +560,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v3_0_dec_ring_get_rptr,
.get_wptr = jpeg_v3_0_dec_ring_get_wptr,
.set_wptr = jpeg_v3_0_dec_ring_set_wptr,
+ .parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
index 3dac8f259d7f..6c5c1a68a9b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -727,6 +727,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v4_0_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_dec_ring_set_wptr,
+ .parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h
index 07d36c2abd6b..47638fd4d4e2 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h
@@ -32,5 +32,4 @@ enum amdgpu_jpeg_v4_0_sub_block {
};
extern const struct amdgpu_ip_block_version jpeg_v4_0_ip_block;
-
#endif /* __JPEG_V4_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index 6ae5a784e187..86958cb2c2ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -23,9 +23,9 @@
#include "amdgpu.h"
#include "amdgpu_jpeg.h"
-#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
+#include "jpeg_v2_0.h"
#include "jpeg_v4_0_3.h"
#include "mmsch_v4_0_3.h"
@@ -59,6 +59,12 @@ static int amdgpu_ih_srcid_jpeg[] = {
VCN_4_0__SRCID__JPEG7_DECODE
};
+static inline bool jpeg_v4_0_3_normalizn_reqd(struct amdgpu_device *adev)
+{
+ return amdgpu_sriov_vf(adev) ||
+ (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4));
+}
+
/**
* jpeg_v4_0_3_early_init - set function pointers
*
@@ -734,32 +740,20 @@ void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE4));
amdgpu_ring_write(ring, 0);
- if (ring->adev->jpeg.inst[ring->me].aid_id) {
- amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET,
- 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x4);
- } else {
- amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
- amdgpu_ring_write(ring, 0);
- }
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_REG_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, 0x3fbc);
- if (ring->adev->jpeg.inst[ring->me].aid_id) {
- amdgpu_ring_write(ring, PACKETJ(regUVD_JRBC_EXTERNAL_MCM_ADDR_INTERNAL_OFFSET,
- 0, PACKETJ_CONDITION_CHECK0, PACKETJ_TYPE0));
- amdgpu_ring_write(ring, 0x0);
- } else {
- amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
- amdgpu_ring_write(ring, 0);
- }
-
amdgpu_ring_write(ring, PACKETJ(JRBC_DEC_EXTERNAL_REG_WRITE_ADDR,
0, 0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, 0x1);
+ amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE6));
+ amdgpu_ring_write(ring, 0);
+
amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7));
amdgpu_ring_write(ring, 0);
}
@@ -834,8 +828,8 @@ void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
{
uint32_t reg_offset;
- /* For VF, only local offsets should be used */
- if (amdgpu_sriov_vf(ring->adev))
+ /* Use normalized offsets if required */
+ if (jpeg_v4_0_3_normalizn_reqd(ring->adev))
reg = NORMALIZE_JPEG_REG_OFFSET(reg);
reg_offset = (reg << 2);
@@ -881,8 +875,8 @@ void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint
{
uint32_t reg_offset;
- /* For VF, only local offsets should be used */
- if (amdgpu_sriov_vf(ring->adev))
+ /* Use normalized offsets if required */
+ if (jpeg_v4_0_3_normalizn_reqd(ring->adev))
reg = NORMALIZE_JPEG_REG_OFFSET(reg);
reg_offset = (reg << 2);
@@ -1089,7 +1083,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = {
.get_rptr = jpeg_v4_0_3_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_3_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_3_dec_ring_set_wptr,
- .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs,
+ .parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
@@ -1254,56 +1248,3 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev)
{
adev->jpeg.ras = &jpeg_v4_0_3_ras;
}
-
-/**
- * jpeg_v4_0_3_dec_ring_parse_cs - command submission parser
- *
- * @parser: Command submission parser context
- * @job: the job to parse
- * @ib: the IB to parse
- *
- * Parse the command stream, return -EINVAL for invalid packet,
- * 0 otherwise
- */
-int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
- struct amdgpu_job *job,
- struct amdgpu_ib *ib)
-{
- uint32_t i, reg, res, cond, type;
- struct amdgpu_device *adev = parser->adev;
-
- for (i = 0; i < ib->length_dw ; i += 2) {
- reg = CP_PACKETJ_GET_REG(ib->ptr[i]);
- res = CP_PACKETJ_GET_RES(ib->ptr[i]);
- cond = CP_PACKETJ_GET_COND(ib->ptr[i]);
- type = CP_PACKETJ_GET_TYPE(ib->ptr[i]);
-
- if (res) /* only support 0 at the moment */
- return -EINVAL;
-
- switch (type) {
- case PACKETJ_TYPE0:
- if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) {
- dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
- return -EINVAL;
- }
- break;
- case PACKETJ_TYPE3:
- if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) {
- dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
- return -EINVAL;
- }
- break;
- case PACKETJ_TYPE6:
- if (ib->ptr[i] == CP_PACKETJ_NOP)
- continue;
- dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]);
- return -EINVAL;
- default:
- dev_err(adev->dev, "Unknown packet type %d !\n", type);
- return -EINVAL;
- }
- }
-
- return 0;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
index 71c54b294e15..747a3e5f6856 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h
@@ -46,9 +46,6 @@
#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
-#define JPEG_REG_RANGE_START 0x4000
-#define JPEG_REG_RANGE_END 0x41c2
-
extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block;
void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring,
@@ -65,7 +62,5 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring);
void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val, uint32_t mask);
-int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser,
- struct amdgpu_job *job,
- struct amdgpu_ib *ib);
+
#endif /* __JPEG_V4_0_3_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
index f96ac6bce526..44eeed445ea9 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c
@@ -768,6 +768,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = {
.get_rptr = jpeg_v4_0_5_dec_ring_get_rptr,
.get_wptr = jpeg_v4_0_5_dec_ring_get_wptr,
.set_wptr = jpeg_v4_0_5_dec_ring_set_wptr,
+ .parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
index f4daff90c770..d662aa841f97 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c
@@ -26,6 +26,7 @@
#include "amdgpu_pm.h"
#include "soc15.h"
#include "soc15d.h"
+#include "jpeg_v2_0.h"
#include "jpeg_v4_0_3.h"
#include "vcn/vcn_5_0_0_offset.h"
@@ -646,7 +647,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = {
.get_rptr = jpeg_v5_0_0_dec_ring_get_rptr,
.get_wptr = jpeg_v5_0_0_dec_ring_get_wptr,
.set_wptr = jpeg_v5_0_0_dec_ring_set_wptr,
- .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs,
+ .parse_cs = jpeg_v2_dec_ring_parse_cs,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 0f055d1b1da6..ee91ff9e52a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -415,7 +415,7 @@ static int mes_v11_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_typ
/* wait till dequeue take effects */
for (i = 0; i < adev->usec_timeout; i++) {
if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1))
- break;
+ break;
udelay(1);
}
if (i >= adev->usec_timeout) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 00350eccd571..9044bdb38cf4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -36,7 +36,6 @@
#include <linux/mman.h>
#include <linux/ptrace.h>
#include <linux/dma-buf.h>
-#include <linux/fdtable.h>
#include <linux/processor.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
@@ -1835,7 +1834,8 @@ static uint32_t get_process_num_bos(struct kfd_process *p)
}
static int criu_get_prime_handle(struct kgd_mem *mem,
- int flags, u32 *shared_fd)
+ int flags, u32 *shared_fd,
+ struct file **file)
{
struct dma_buf *dmabuf;
int ret;
@@ -1846,13 +1846,14 @@ static int criu_get_prime_handle(struct kgd_mem *mem,
return ret;
}
- ret = dma_buf_fd(dmabuf, flags);
+ ret = get_unused_fd_flags(flags);
if (ret < 0) {
pr_err("dmabuf create fd failed, ret:%d\n", ret);
goto out_free_dmabuf;
}
*shared_fd = ret;
+ *file = dmabuf->file;
return 0;
out_free_dmabuf:
@@ -1860,6 +1861,25 @@ out_free_dmabuf:
return ret;
}
+static void commit_files(struct file **files,
+ struct kfd_criu_bo_bucket *bo_buckets,
+ unsigned int count,
+ int err)
+{
+ while (count--) {
+ struct file *file = files[count];
+
+ if (!file)
+ continue;
+ if (err) {
+ fput(file);
+ put_unused_fd(bo_buckets[count].dmabuf_fd);
+ } else {
+ fd_install(bo_buckets[count].dmabuf_fd, file);
+ }
+ }
+}
+
static int criu_checkpoint_bos(struct kfd_process *p,
uint32_t num_bos,
uint8_t __user *user_bos,
@@ -1868,6 +1888,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
{
struct kfd_criu_bo_bucket *bo_buckets;
struct kfd_criu_bo_priv_data *bo_privs;
+ struct file **files = NULL;
int ret = 0, pdd_index, bo_index = 0, id;
void *mem;
@@ -1881,6 +1902,12 @@ static int criu_checkpoint_bos(struct kfd_process *p,
goto exit;
}
+ files = kvzalloc(num_bos * sizeof(struct file *), GFP_KERNEL);
+ if (!files) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
for (pdd_index = 0; pdd_index < p->n_pdds; pdd_index++) {
struct kfd_process_device *pdd = p->pdds[pdd_index];
struct amdgpu_bo *dumper_bo;
@@ -1923,7 +1950,7 @@ static int criu_checkpoint_bos(struct kfd_process *p,
ret = criu_get_prime_handle(kgd_mem,
bo_bucket->alloc_flags &
KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DRM_RDWR : 0,
- &bo_bucket->dmabuf_fd);
+ &bo_bucket->dmabuf_fd, &files[bo_index]);
if (ret)
goto exit;
} else {
@@ -1974,12 +2001,8 @@ static int criu_checkpoint_bos(struct kfd_process *p,
*priv_offset += num_bos * sizeof(*bo_privs);
exit:
- while (ret && bo_index--) {
- if (bo_buckets[bo_index].alloc_flags
- & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
- close_fd(bo_buckets[bo_index].dmabuf_fd);
- }
-
+ commit_files(files, bo_buckets, bo_index, ret);
+ kvfree(files);
kvfree(bo_buckets);
kvfree(bo_privs);
return ret;
@@ -2331,7 +2354,8 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd,
static int criu_restore_bo(struct kfd_process *p,
struct kfd_criu_bo_bucket *bo_bucket,
- struct kfd_criu_bo_priv_data *bo_priv)
+ struct kfd_criu_bo_priv_data *bo_priv,
+ struct file **file)
{
struct kfd_process_device *pdd;
struct kgd_mem *kgd_mem;
@@ -2383,7 +2407,7 @@ static int criu_restore_bo(struct kfd_process *p,
if (bo_bucket->alloc_flags
& (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT)) {
ret = criu_get_prime_handle(kgd_mem, DRM_RDWR,
- &bo_bucket->dmabuf_fd);
+ &bo_bucket->dmabuf_fd, file);
if (ret)
return ret;
} else {
@@ -2400,6 +2424,7 @@ static int criu_restore_bos(struct kfd_process *p,
{
struct kfd_criu_bo_bucket *bo_buckets = NULL;
struct kfd_criu_bo_priv_data *bo_privs = NULL;
+ struct file **files = NULL;
int ret = 0;
uint32_t i = 0;
@@ -2413,6 +2438,12 @@ static int criu_restore_bos(struct kfd_process *p,
if (!bo_buckets)
return -ENOMEM;
+ files = kvzalloc(args->num_bos * sizeof(struct file *), GFP_KERNEL);
+ if (!files) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
ret = copy_from_user(bo_buckets, (void __user *)args->bos,
args->num_bos * sizeof(*bo_buckets));
if (ret) {
@@ -2438,7 +2469,7 @@ static int criu_restore_bos(struct kfd_process *p,
/* Create and map new BOs */
for (; i < args->num_bos; i++) {
- ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i]);
+ ret = criu_restore_bo(p, &bo_buckets[i], &bo_privs[i], &files[i]);
if (ret) {
pr_debug("Failed to restore BO[%d] ret%d\n", i, ret);
goto exit;
@@ -2453,11 +2484,8 @@ static int criu_restore_bos(struct kfd_process *p,
ret = -EFAULT;
exit:
- while (ret && i--) {
- if (bo_buckets[i].alloc_flags
- & (KFD_IOC_ALLOC_MEM_FLAGS_VRAM | KFD_IOC_ALLOC_MEM_FLAGS_GTT))
- close_fd(bo_buckets[i].dmabuf_fd);
- }
+ commit_files(files, bo_buckets, i, ret);
+ kvfree(files);
kvfree(bo_buckets);
kvfree(bo_privs);
return ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index cd7b81b7b939..48caecf7e72e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1434,7 +1434,8 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
- pcache_info[0].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
+ pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_tcp_per_wpg / 2;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_tcp_cache_line_size;
i++;
}
/* Scalar L1 Instruction Cache per SQC */
@@ -1446,6 +1447,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_INST_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_instruction_cache_line_size;
i++;
}
/* Scalar L1 Data Cache per SQC */
@@ -1456,6 +1458,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.gc_num_sqc_per_wgp * 2;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_scalar_data_cache_line_size;
i++;
}
/* GL1 Data Cache per SA */
@@ -1468,6 +1471,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ pcache_info[i].cache_line_size = 0;
i++;
}
/* L2 Data Cache per GPU (Total Tex Cache) */
@@ -1478,6 +1482,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ pcache_info[i].cache_line_size = adev->gfx.config.gc_tcc_cache_line_size;
i++;
}
/* L3 Data Cache per GPU */
@@ -1488,6 +1493,7 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
CRAT_CACHE_FLAGS_DATA_CACHE |
CRAT_CACHE_FLAGS_SIMD_CACHE);
pcache_info[i].num_cu_shared = adev->gfx.config.max_cu_per_sh;
+ pcache_info[i].cache_line_size = 0;
i++;
}
return i;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 577d121cc6d1..71b465f8d83e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2407,10 +2407,9 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
pdd->sdma_past_activity_counter += sdma_val;
}
- list_del(&q->list);
- qpd->queue_count--;
if (q->properties.is_active) {
decrement_queue_count(dqm, qpd, q);
+ q->properties.is_active = false;
if (!dqm->dev->kfd->shared_resources.enable_mes) {
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
@@ -2421,6 +2420,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
retval = remove_queue_mes(dqm, q, qpd);
}
}
+ list_del(&q->list);
+ qpd->queue_count--;
/*
* Unconditionally decrement this counter, regardless of the queue's
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index fecdbbab9894..d46a13156ee9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -167,11 +167,23 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
case SOC15_IH_CLIENTID_SE3SH:
case SOC15_IH_CLIENTID_UTCL2:
block = AMDGPU_RAS_BLOCK__GFX;
- if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
- reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
- else
+ if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x00557300 and onwards */
+ if (dev->adev->pm.fw_version < 0x00557300)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x05550C00 and onwards */
+ if (dev->adev->pm.fw_version < 0x05550C00)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else {
reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ }
break;
case SOC15_IH_CLIENTID_VMC:
case SOC15_IH_CLIENTID_VMC1:
@@ -184,11 +196,23 @@ static void event_interrupt_poison_consumption_v9(struct kfd_node *dev,
case SOC15_IH_CLIENTID_SDMA3:
case SOC15_IH_CLIENTID_SDMA4:
block = AMDGPU_RAS_BLOCK__SDMA;
- if (amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) ||
- amdgpu_ip_version(dev->adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))
- reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
- else
+ if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x00557300 and onwards */
+ if (dev->adev->pm.fw_version < 0x00557300)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else if (amdgpu_ip_version(dev->adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) {
+ /* driver mode-2 for gfx poison is only supported by
+ * pmfw 0x05550C00 and onwards */
+ if (dev->adev->pm.fw_version < 0x05550C00)
+ reset = AMDGPU_RAS_GPU_RESET_MODE1_RESET;
+ else
+ reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ } else {
reset = AMDGPU_RAS_GPU_RESET_MODE2_RESET;
+ }
break;
default:
dev_warn(dev->adev->dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 9ae9abc6eb43..d6530febabad 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -868,6 +868,12 @@ struct svm_range_list {
struct task_struct *faulting_task;
/* check point ts decides if page fault recovery need be dropped */
uint64_t checkpoint_ts[MAX_GPU_INSTANCE];
+
+ /* Default granularity to use in buffer migration
+ * and restoration of backing memory while handling
+ * recoverable page faults
+ */
+ uint8_t default_granularity;
};
/* Process data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 20ea745729ee..b439d4d0bd84 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -1046,6 +1046,7 @@ exit:
pr_debug("Queue id %d was restored successfully\n", queue_id);
kfree(q_data);
+ kfree(q_extra_data);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
index ea6a8e43bd5b..de8b9abf7afc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
@@ -235,17 +235,16 @@ void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset,
amdgpu_reset_get_desc(reset_context, reset_cause,
sizeof(reset_cause));
- kfd_smi_event_add(0, dev, event, "%x %s\n",
- dev->reset_seq_num,
- reset_cause);
+ kfd_smi_event_add(0, dev, event, KFD_EVENT_FMT_UPDATE_GPU_RESET(
+ dev->reset_seq_num, reset_cause));
}
void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev,
uint64_t throttle_bitmask)
{
- kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
+ kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, KFD_EVENT_FMT_THERMAL_THROTTLING(
throttle_bitmask,
- amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
+ amdgpu_dpm_get_thermal_throttling_counter(dev->adev)));
}
void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
@@ -256,8 +255,8 @@ void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid)
if (task_info) {
/* Report VM faults from user applications, not retry from kernel */
if (task_info->pid)
- kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
- task_info->pid, task_info->task_name);
+ kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, KFD_EVENT_FMT_VMFAULT(
+ task_info->pid, task_info->task_name));
amdgpu_vm_put_task_info(task_info);
}
}
@@ -267,16 +266,16 @@ void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid,
ktime_t ts)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_START,
- "%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid,
- address, node->id, write_fault ? 'W' : 'R');
+ KFD_EVENT_FMT_PAGEFAULT_START(ktime_to_ns(ts), pid,
+ address, node->id, write_fault ? 'W' : 'R'));
}
void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid,
unsigned long address, bool migration)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_END,
- "%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(),
- pid, address, node->id, migration ? 'M' : 'U');
+ KFD_EVENT_FMT_PAGEFAULT_END(ktime_get_boottime_ns(),
+ pid, address, node->id, migration ? 'M' : 'U'));
}
void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
@@ -286,9 +285,9 @@ void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid,
uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_START,
- "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n",
+ KFD_EVENT_FMT_MIGRATE_START(
ktime_get_boottime_ns(), pid, start, end - start,
- from, to, prefetch_loc, preferred_loc, trigger);
+ from, to, prefetch_loc, preferred_loc, trigger));
}
void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
@@ -296,24 +295,24 @@ void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid,
uint32_t from, uint32_t to, uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_END,
- "%lld -%d @%lx(%lx) %x->%x %d\n",
+ KFD_EVENT_FMT_MIGRATE_END(
ktime_get_boottime_ns(), pid, start, end - start,
- from, to, trigger);
+ from, to, trigger));
}
void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid,
uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_EVICTION,
- "%lld -%d %x %d\n", ktime_get_boottime_ns(), pid,
- node->id, trigger);
+ KFD_EVENT_FMT_QUEUE_EVICTION(ktime_get_boottime_ns(), pid,
+ node->id, trigger));
}
void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_RESTORE,
- "%lld -%d %x\n", ktime_get_boottime_ns(), pid,
- node->id);
+ KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), pid,
+ node->id, 0));
}
void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
@@ -330,8 +329,8 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm)
kfd_smi_event_add(p->lead_thread->pid, pdd->dev,
KFD_SMI_EVENT_QUEUE_RESTORE,
- "%lld -%d %x %c\n", ktime_get_boottime_ns(),
- p->lead_thread->pid, pdd->dev->id, 'R');
+ KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(),
+ p->lead_thread->pid, pdd->dev->id, 'R'));
}
kfd_unref_process(p);
}
@@ -341,8 +340,8 @@ void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid,
uint32_t trigger)
{
kfd_smi_event_add(pid, node, KFD_SMI_EVENT_UNMAP_FROM_GPU,
- "%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(),
- pid, address, last - address + 1, node->id, trigger);
+ KFD_EVENT_FMT_UNMAP_FROM_GPU(ktime_get_boottime_ns(),
+ pid, address, last - address + 1, node->id, trigger));
}
int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 40c94c4cdd96..04e746923697 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -309,12 +309,13 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap)
}
static void
-svm_range_set_default_attributes(int32_t *location, int32_t *prefetch_loc,
- uint8_t *granularity, uint32_t *flags)
+svm_range_set_default_attributes(struct svm_range_list *svms, int32_t *location,
+ int32_t *prefetch_loc, uint8_t *granularity,
+ uint32_t *flags)
{
*location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
*prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
- *granularity = 9;
+ *granularity = svms->default_granularity;
*flags =
KFD_IOCTL_SVM_FLAG_HOST_ACCESS | KFD_IOCTL_SVM_FLAG_COHERENT;
}
@@ -358,7 +359,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start,
bitmap_copy(prange->bitmap_access, svms->bitmap_supported,
MAX_GPU_INSTANCE);
- svm_range_set_default_attributes(&prange->preferred_loc,
+ svm_range_set_default_attributes(svms, &prange->preferred_loc,
&prange->prefetch_loc,
&prange->granularity, &prange->flags);
@@ -2703,9 +2704,10 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
*is_heap_stack = vma_is_initial_heap(vma) || vma_is_initial_stack(vma);
start_limit = max(vma->vm_start >> PAGE_SHIFT,
- (unsigned long)ALIGN_DOWN(addr, 2UL << 8));
+ (unsigned long)ALIGN_DOWN(addr, 1UL << p->svms.default_granularity));
end_limit = min(vma->vm_end >> PAGE_SHIFT,
- (unsigned long)ALIGN(addr + 1, 2UL << 8));
+ (unsigned long)ALIGN(addr + 1, 1UL << p->svms.default_granularity));
+
/* First range that starts after the fault address */
node = interval_tree_iter_first(&p->svms.objects, addr + 1, ULONG_MAX);
if (node) {
@@ -3249,6 +3251,12 @@ int svm_range_list_init(struct kfd_process *p)
if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev))
bitmap_set(svms->bitmap_supported, i, 1);
+ /* Value of default granularity cannot exceed 0x1B, the
+ * number of pages supported by a 4-level paging table
+ */
+ svms->default_granularity = min_t(u8, amdgpu_svm_default_granularity, 0x1B);
+ pr_debug("Default SVM Granularity to use: %d\n", svms->default_granularity);
+
return 0;
}
@@ -3776,7 +3784,7 @@ svm_range_get_attr(struct kfd_process *p, struct mm_struct *mm,
node = interval_tree_iter_first(&svms->objects, start, last);
if (!node) {
pr_debug("range attrs not found return default values\n");
- svm_range_set_default_attributes(&location, &prefetch_loc,
+ svm_range_set_default_attributes(svms, &location, &prefetch_loc,
&granularity, &flags_and);
flags_or = flags_and;
if (p->xnack_enabled)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index a8d0d1b71723..0cff66735cfe 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -10610,7 +10610,7 @@ static bool should_reset_plane(struct drm_atomic_state *state,
* TODO: We can likely skip bandwidth validation if the only thing that
* changed about the plane was it'z z-ordering.
*/
- if (new_crtc_state->zpos_changed)
+ if (old_plane_state->normalized_zpos != new_plane_state->normalized_zpos)
return true;
if (drm_atomic_crtc_needs_modeset(new_crtc_state))
@@ -11458,6 +11458,17 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
drm_dbg(dev, "Failed to determine cursor mode\n");
goto fail;
}
+
+ /*
+ * If overlay cursor is needed, DC cannot go through the
+ * native cursor update path. All enabled planes on the CRTC
+ * need to be added for DC to not disable a plane by mistake
+ */
+ if (dm_new_crtc_state->cursor_mode == DM_CURSOR_OVERLAY_MODE) {
+ ret = drm_atomic_add_affected_planes(state, crtc);
+ if (ret)
+ goto fail;
+ }
}
/* Remove exiting planes if they are modified */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 6b5eed37532b..c0c61c03984c 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -1325,7 +1325,7 @@ static bool is_dsc_need_re_compute(
if (new_crtc_state->enable && new_crtc_state->active) {
if (new_crtc_state->mode_changed || new_crtc_state->active_changed ||
new_crtc_state->connectors_changed) {
- DRM_DEBUG_DRIVER("%s:%d MST_DSC dsc recompte required."
+ DRM_DEBUG_DRIVER("%s:%d MST_DSC dsc recompute required."
"stream 0x%p in new dc_state\n",
__func__, __LINE__, stream);
is_dsc_need_re_compute = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
index ee02b78e290f..838d72eaa87f 100644
--- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.c
@@ -1748,10 +1748,6 @@ void dccg35_init(struct dccg *dccg)
dccg35_set_dpstreamclk_root_clock_gating(dccg, otg_inst, false);
}
- if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp)
- for (otg_inst = 0; otg_inst < 4; otg_inst++)
- dccg35_set_dppclk_root_clock_gating(dccg, otg_inst, 0);
-
/*
dccg35_enable_global_fgcg_rep(
dccg, dccg->ctx->dc->debug.enable_fine_grain_clock_gating.bits
@@ -1932,47 +1928,32 @@ static void dccg35_enable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst,
}
/*get other front end connected to this backend*/
-static uint8_t dccg35_get_other_enabled_symclk_fe(struct dccg *dccg, uint32_t stream_enc_inst, uint32_t link_enc_inst)
+static uint8_t dccg35_get_number_enabled_symclk_fe_connected_to_be(struct dccg *dccg, uint32_t link_enc_inst)
{
uint8_t num_enabled_symclk_fe = 0;
- uint32_t be_clk_en = 0, fe_clk_en[5] = {0}, be_clk_sel[5] = {0};
+ uint32_t fe_clk_en[5] = {0}, be_clk_sel[5] = {0};
struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
- switch (link_enc_inst) {
- case 0:
- REG_GET_3(SYMCLKA_CLOCK_ENABLE, SYMCLKA_CLOCK_ENABLE, &be_clk_en,
- SYMCLKA_FE_EN, &fe_clk_en[0],
- SYMCLKA_FE_SRC_SEL, &be_clk_sel[0]);
- break;
- case 1:
- REG_GET_3(SYMCLKB_CLOCK_ENABLE, SYMCLKB_CLOCK_ENABLE, &be_clk_en,
- SYMCLKB_FE_EN, &fe_clk_en[1],
- SYMCLKB_FE_SRC_SEL, &be_clk_sel[1]);
- break;
- case 2:
- REG_GET_3(SYMCLKC_CLOCK_ENABLE, SYMCLKC_CLOCK_ENABLE, &be_clk_en,
- SYMCLKC_FE_EN, &fe_clk_en[2],
- SYMCLKC_FE_SRC_SEL, &be_clk_sel[2]);
- break;
- case 3:
- REG_GET_3(SYMCLKD_CLOCK_ENABLE, SYMCLKD_CLOCK_ENABLE, &be_clk_en,
- SYMCLKD_FE_EN, &fe_clk_en[3],
- SYMCLKD_FE_SRC_SEL, &be_clk_sel[3]);
- break;
- case 4:
- REG_GET_3(SYMCLKE_CLOCK_ENABLE, SYMCLKE_CLOCK_ENABLE, &be_clk_en,
- SYMCLKE_FE_EN, &fe_clk_en[4],
- SYMCLKE_FE_SRC_SEL, &be_clk_sel[4]);
- break;
- }
- if (be_clk_en) {
- /* for DPMST, this backend could be used by multiple front end.
- only disable the backend if this stream_enc_ins is the last active stream enc connected to this back_end*/
- uint8_t i;
- for (i = 0; i != link_enc_inst && i < ARRAY_SIZE(fe_clk_en); i++) {
- if (fe_clk_en[i] && be_clk_sel[i] == link_enc_inst)
- num_enabled_symclk_fe++;
- }
+ REG_GET_2(SYMCLKA_CLOCK_ENABLE, SYMCLKA_FE_EN, &fe_clk_en[0],
+ SYMCLKA_FE_SRC_SEL, &be_clk_sel[0]);
+
+ REG_GET_2(SYMCLKB_CLOCK_ENABLE, SYMCLKB_FE_EN, &fe_clk_en[1],
+ SYMCLKB_FE_SRC_SEL, &be_clk_sel[1]);
+
+ REG_GET_2(SYMCLKC_CLOCK_ENABLE, SYMCLKC_FE_EN, &fe_clk_en[2],
+ SYMCLKC_FE_SRC_SEL, &be_clk_sel[2]);
+
+ REG_GET_2(SYMCLKD_CLOCK_ENABLE, SYMCLKD_FE_EN, &fe_clk_en[3],
+ SYMCLKD_FE_SRC_SEL, &be_clk_sel[3]);
+
+ REG_GET_2(SYMCLKE_CLOCK_ENABLE, SYMCLKE_FE_EN, &fe_clk_en[4],
+ SYMCLKE_FE_SRC_SEL, &be_clk_sel[4]);
+
+ uint8_t i;
+
+ for (i = 0; i < ARRAY_SIZE(fe_clk_en); i++) {
+ if (fe_clk_en[i] && be_clk_sel[i] == link_enc_inst)
+ num_enabled_symclk_fe++;
}
return num_enabled_symclk_fe;
}
@@ -2020,9 +2001,9 @@ static void dccg35_disable_symclk_se(struct dccg *dccg, uint32_t stream_enc_inst
break;
}
- /*check other enabled symclk fe */
- num_enabled_symclk_fe = dccg35_get_other_enabled_symclk_fe(dccg, stream_enc_inst, link_enc_inst);
- /*only turn off backend clk if other front end attachecd to this backend are all off,
+ /*check other enabled symclk fe connected to this be */
+ num_enabled_symclk_fe = dccg35_get_number_enabled_symclk_fe_connected_to_be(dccg, link_enc_inst);
+ /*only turn off backend clk if other front end attached to this backend are all off,
for mst, only turn off the backend if this is the last front end*/
if (num_enabled_symclk_fe == 0) {
switch (link_enc_inst) {
@@ -2351,6 +2332,14 @@ static void dccg35_disable_symclk_se_cb(
/* DMU PHY sequence switches SYMCLK_BE (link_enc_inst) to ref clock once PHY is turned off */
}
+void dccg35_root_gate_disable_control(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating)
+{
+
+ if (dccg->ctx->dc->debug.root_clock_optimization.bits.dpp) {
+ dccg35_set_dppclk_root_clock_gating(dccg, pipe_idx, disable_clock_gating);
+ }
+}
+
static const struct dccg_funcs dccg35_funcs_new = {
.update_dpp_dto = dccg35_update_dpp_dto_cb,
.dpp_root_clock_control = dccg35_dpp_root_clock_control_cb,
@@ -2411,7 +2400,7 @@ static const struct dccg_funcs dccg35_funcs = {
.enable_symclk_se = dccg35_enable_symclk_se,
.disable_symclk_se = dccg35_disable_symclk_se,
.set_dtbclk_p_src = dccg35_set_dtbclk_p_src,
-
+ .dccg_root_gate_disable_control = dccg35_root_gate_disable_control,
};
struct dccg *dccg35_create(
diff --git a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h
index 1586a45ca3bd..51f98c5c51c4 100644
--- a/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/dccg/dcn35/dcn35_dccg.h
@@ -241,6 +241,7 @@ struct dccg *dccg35_create(
void dccg35_init(struct dccg *dccg);
void dccg35_enable_global_fgcg_rep(struct dccg *dccg, bool value);
+void dccg35_root_gate_disable_control(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating);
#endif //__DCN35_DCCG_H__
diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c
index 01f98139292e..5105fd580017 100644
--- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_dscl.c
@@ -951,6 +951,8 @@ static void dpp401_dscl_set_isharp_filter(
*
* @dpp_base: High level DPP struct
* @scl_data: scalaer_data info
+ * @program_isharp_1dlut: flag to program isharp 1D LUT
+ * @bs_coeffs_updated: Blur and Scale Coefficients update flag
*
* This is the primary function to program isharp
*
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
index 01dffed4d30b..a6a1db5ba8ba 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c
@@ -3212,15 +3212,19 @@ void dcn10_set_drr(struct pipe_ctx **pipe_ctx,
* as well.
*/
for (i = 0; i < num_pipes; i++) {
- if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) {
- if (pipe_ctx[i]->stream_res.tg->funcs->set_drr)
- pipe_ctx[i]->stream_res.tg->funcs->set_drr(
- pipe_ctx[i]->stream_res.tg, &params);
+ /* dc_state_destruct() might null the stream resources, so fetch tg
+ * here first to avoid a race condition. The lifetime of the pointee
+ * itself (the timing_generator object) is not a problem here.
+ */
+ struct timing_generator *tg = pipe_ctx[i]->stream_res.tg;
+
+ if ((tg != NULL) && tg->funcs) {
+ if (tg->funcs->set_drr)
+ tg->funcs->set_drr(tg, &params);
if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
- if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control)
- pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
- pipe_ctx[i]->stream_res.tg,
- event_triggers, num_frames);
+ if (tg->funcs->set_static_screen_control)
+ tg->funcs->set_static_screen_control(
+ tg, event_triggers, num_frames);
}
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
index a4c6decee0f8..479fd3e89e5a 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c
@@ -240,6 +240,10 @@ void dcn35_init_hw(struct dc *dc)
dc->res_pool->hubbub->funcs->allow_self_refresh_control(dc->res_pool->hubbub,
!dc->res_pool->hubbub->ctx->dc->debug.disable_stutter);
}
+ if (res_pool->dccg->funcs->dccg_root_gate_disable_control) {
+ for (i = 0; i < res_pool->pipe_count; i++)
+ res_pool->dccg->funcs->dccg_root_gate_disable_control(res_pool->dccg, i, 0);
+ }
for (i = 0; i < res_pool->audio_count; i++) {
struct audio *audio = res_pool->audios[i];
@@ -1414,7 +1418,13 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx,
params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num;
for (i = 0; i < num_pipes; i++) {
- if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) {
+ /* dc_state_destruct() might null the stream resources, so fetch tg
+ * here first to avoid a race condition. The lifetime of the pointee
+ * itself (the timing_generator object) is not a problem here.
+ */
+ struct timing_generator *tg = pipe_ctx[i]->stream_res.tg;
+
+ if ((tg != NULL) && tg->funcs) {
if (pipe_ctx[i]->stream && pipe_ctx[i]->stream->ctx->dc->debug.static_screen_wait_frames) {
struct dc_crtc_timing *timing = &pipe_ctx[i]->stream->timing;
struct dc *dc = pipe_ctx[i]->stream->ctx->dc;
@@ -1426,14 +1436,12 @@ void dcn35_set_drr(struct pipe_ctx **pipe_ctx,
num_frames = 2 * (frame_rate % 60);
}
}
- if (pipe_ctx[i]->stream_res.tg->funcs->set_drr)
- pipe_ctx[i]->stream_res.tg->funcs->set_drr(
- pipe_ctx[i]->stream_res.tg, &params);
+ if (tg->funcs->set_drr)
+ tg->funcs->set_drr(tg, &params);
if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
- if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control)
- pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
- pipe_ctx[i]->stream_res.tg,
- event_triggers, num_frames);
+ if (tg->funcs->set_static_screen_control)
+ tg->funcs->set_static_screen_control(
+ tg, event_triggers, num_frames);
}
}
}
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
index d619eb229a62..e94e9ba60f55 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h
@@ -213,6 +213,7 @@ struct dccg_funcs {
uint32_t otg_inst);
void (*set_dto_dscclk)(struct dccg *dccg, uint32_t dsc_inst);
void (*set_ref_dscclk)(struct dccg *dccg, uint32_t dsc_inst);
+ void (*dccg_root_gate_disable_control)(struct dccg *dccg, uint32_t pipe_idx, uint32_t disable_clock_gating);
};
#endif //__DAL_DCCG_H__
diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index 09cbc3afd6d8..b0fc22383e28 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -1038,7 +1038,7 @@ struct display_object_info_table_v1_4
uint16_t supporteddevices;
uint8_t number_of_path;
uint8_t reserved;
- struct atom_display_object_path_v2 display_path[8]; //the real number of this included in the structure is calculated by using the (whole structure size - the header size- number_of_path)/size of atom_display_object_path
+ struct atom_display_object_path_v2 display_path[]; //the real number of this included in the structure is calculated by using the (whole structure size - the header size- number_of_path)/size of atom_display_object_path
};
struct display_object_info_table_v1_5 {
@@ -1048,7 +1048,7 @@ struct display_object_info_table_v1_5 {
uint8_t reserved;
// the real number of this included in the structure is calculated by using the
// (whole structure size - the header size- number_of_path)/size of atom_display_object_path
- struct atom_display_object_path_v3 display_path[8];
+ struct atom_display_object_path_v3 display_path[];
};
/*
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index ac0dd6b97f8d..e71a721c12b9 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -439,7 +439,16 @@ enum smu_clk_type {
__SMU_DUMMY_MAP(BACO_CG), \
__SMU_DUMMY_MAP(SOC_CG), \
__SMU_DUMMY_MAP(LOW_POWER_DCNCLKS), \
- __SMU_DUMMY_MAP(WHISPER_MODE),
+ __SMU_DUMMY_MAP(WHISPER_MODE), \
+ __SMU_DUMMY_MAP(EDC_PWRBRK), \
+ __SMU_DUMMY_MAP(SOC_EDC_XVMIN), \
+ __SMU_DUMMY_MAP(GFX_PSM_DIDT), \
+ __SMU_DUMMY_MAP(APT_ALL_ENABLE), \
+ __SMU_DUMMY_MAP(APT_SQ_THROTTLE), \
+ __SMU_DUMMY_MAP(APT_PF_DCS), \
+ __SMU_DUMMY_MAP(GFX_EDC_XVMIN), \
+ __SMU_DUMMY_MAP(GFX_DIDT_XVMIN), \
+ __SMU_DUMMY_MAP(FAN_ABNORMAL),
#undef __SMU_DUMMY_MAP
#define __SMU_DUMMY_MAP(feature) SMU_FEATURE_##feature##_BIT
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
index a31fae5feedf..43820d7d2c54 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
@@ -199,6 +199,15 @@ static struct cmn2asic_mapping smu_v14_0_2_feature_mask_map[SMU_FEATURE_COUNT] =
FEA_MAP(MEM_TEMP_READ),
FEA_MAP(ATHUB_MMHUB_PG),
FEA_MAP(SOC_PCC),
+ FEA_MAP(EDC_PWRBRK),
+ FEA_MAP(SOC_EDC_XVMIN),
+ FEA_MAP(GFX_PSM_DIDT),
+ FEA_MAP(APT_ALL_ENABLE),
+ FEA_MAP(APT_SQ_THROTTLE),
+ FEA_MAP(APT_PF_DCS),
+ FEA_MAP(GFX_EDC_XVMIN),
+ FEA_MAP(GFX_DIDT_XVMIN),
+ FEA_MAP(FAN_ABNORMAL),
[SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
[SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
[SMU_FEATURE_PPT_BIT] = {1, FEATURE_THROTTLERS_BIT},
@@ -687,6 +696,9 @@ static int smu_v14_0_2_set_default_dpm_table(struct smu_context *smu)
pcie_table->clk_freq[pcie_table->num_of_link_levels] =
skutable->LclkFreq[link_level];
pcie_table->num_of_link_levels++;
+
+ if (link_level == 0)
+ link_level++;
}
/* dcefclk dpm table setup */
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 03bd3c7bd0dc..0e3f8adf162f 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -410,22 +410,30 @@ static struct dma_buf *export_and_register_object(struct drm_device *dev,
}
/**
- * drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers
+ * drm_gem_prime_handle_to_dmabuf - PRIME export function for GEM drivers
* @dev: dev to export the buffer from
* @file_priv: drm file-private structure
* @handle: buffer handle to export
* @flags: flags like DRM_CLOEXEC
- * @prime_fd: pointer to storage for the fd id of the create dma-buf
*
* This is the PRIME export function which must be used mandatorily by GEM
* drivers to ensure correct lifetime management of the underlying GEM object.
* The actual exporting from GEM object to a dma-buf is done through the
* &drm_gem_object_funcs.export callback.
+ *
+ * Unlike drm_gem_prime_handle_to_fd(), it returns the struct dma_buf it
+ * has created, without attaching it to any file descriptors. The difference
+ * between those two is similar to that between anon_inode_getfile() and
+ * anon_inode_getfd(); insertion into descriptor table is something you
+ * can not revert if any cleanup is needed, so the descriptor-returning
+ * variants should only be used when you are past the last failure exit
+ * and the only thing left is passing the new file descriptor to userland.
+ * When all you need is the object itself or when you need to do something
+ * else that might fail, use that one instead.
*/
-int drm_gem_prime_handle_to_fd(struct drm_device *dev,
+struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev,
struct drm_file *file_priv, uint32_t handle,
- uint32_t flags,
- int *prime_fd)
+ uint32_t flags)
{
struct drm_gem_object *obj;
int ret = 0;
@@ -434,14 +442,14 @@ int drm_gem_prime_handle_to_fd(struct drm_device *dev,
mutex_lock(&file_priv->prime.lock);
obj = drm_gem_object_lookup(file_priv, handle);
if (!obj) {
- ret = -ENOENT;
+ dmabuf = ERR_PTR(-ENOENT);
goto out_unlock;
}
dmabuf = drm_prime_lookup_buf_by_handle(&file_priv->prime, handle);
if (dmabuf) {
get_dma_buf(dmabuf);
- goto out_have_handle;
+ goto out;
}
mutex_lock(&dev->object_name_lock);
@@ -463,7 +471,6 @@ int drm_gem_prime_handle_to_fd(struct drm_device *dev,
/* normally the created dma-buf takes ownership of the ref,
* but if that fails then drop the ref
*/
- ret = PTR_ERR(dmabuf);
mutex_unlock(&dev->object_name_lock);
goto out;
}
@@ -478,34 +485,51 @@ out_have_obj:
ret = drm_prime_add_buf_handle(&file_priv->prime,
dmabuf, handle);
mutex_unlock(&dev->object_name_lock);
- if (ret)
- goto fail_put_dmabuf;
-
-out_have_handle:
- ret = dma_buf_fd(dmabuf, flags);
- /*
- * We must _not_ remove the buffer from the handle cache since the newly
- * created dma buf is already linked in the global obj->dma_buf pointer,
- * and that is invariant as long as a userspace gem handle exists.
- * Closing the handle will clean out the cache anyway, so we don't leak.
- */
- if (ret < 0) {
- goto fail_put_dmabuf;
- } else {
- *prime_fd = ret;
- ret = 0;
+ if (ret) {
+ dma_buf_put(dmabuf);
+ dmabuf = ERR_PTR(ret);
}
-
- goto out;
-
-fail_put_dmabuf:
- dma_buf_put(dmabuf);
out:
drm_gem_object_put(obj);
out_unlock:
mutex_unlock(&file_priv->prime.lock);
+ return dmabuf;
+}
+EXPORT_SYMBOL(drm_gem_prime_handle_to_dmabuf);
- return ret;
+/**
+ * drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers
+ * @dev: dev to export the buffer from
+ * @file_priv: drm file-private structure
+ * @handle: buffer handle to export
+ * @flags: flags like DRM_CLOEXEC
+ * @prime_fd: pointer to storage for the fd id of the create dma-buf
+ *
+ * This is the PRIME export function which must be used mandatorily by GEM
+ * drivers to ensure correct lifetime management of the underlying GEM object.
+ * The actual exporting from GEM object to a dma-buf is done through the
+ * &drm_gem_object_funcs.export callback.
+ */
+int drm_gem_prime_handle_to_fd(struct drm_device *dev,
+ struct drm_file *file_priv, uint32_t handle,
+ uint32_t flags,
+ int *prime_fd)
+{
+ struct dma_buf *dmabuf;
+ int fd = get_unused_fd_flags(flags);
+
+ if (fd < 0)
+ return fd;
+
+ dmabuf = drm_gem_prime_handle_to_dmabuf(dev, file_priv, handle, flags);
+ if (IS_ERR(dmabuf)) {
+ put_unused_fd(fd);
+ return PTR_ERR(dmabuf);
+ }
+
+ fd_install(fd, dmabuf->file);
+ *prime_fd = fd;
+ return 0;
}
EXPORT_SYMBOL(drm_gem_prime_handle_to_fd);
diff --git a/include/drm/drm_prime.h b/include/drm/drm_prime.h
index 2a1d01e5b56b..fa085c44d4ca 100644
--- a/include/drm/drm_prime.h
+++ b/include/drm/drm_prime.h
@@ -69,6 +69,9 @@ void drm_gem_dmabuf_release(struct dma_buf *dma_buf);
int drm_gem_prime_fd_to_handle(struct drm_device *dev,
struct drm_file *file_priv, int prime_fd, uint32_t *handle);
+struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev,
+ struct drm_file *file_priv, uint32_t handle,
+ uint32_t flags);
int drm_gem_prime_handle_to_fd(struct drm_device *dev,
struct drm_file *file_priv, uint32_t handle, uint32_t flags,
int *prime_fd);
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 71a7ce5f2d4c..717307d6b5b7 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -540,26 +540,29 @@ enum kfd_smi_event {
KFD_SMI_EVENT_ALL_PROCESS = 64
};
+/* The reason of the page migration event */
enum KFD_MIGRATE_TRIGGERS {
- KFD_MIGRATE_TRIGGER_PREFETCH,
- KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU,
- KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU,
- KFD_MIGRATE_TRIGGER_TTM_EVICTION
+ KFD_MIGRATE_TRIGGER_PREFETCH, /* Prefetch to GPU VRAM or system memory */
+ KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, /* GPU page fault recover */
+ KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, /* CPU page fault recover */
+ KFD_MIGRATE_TRIGGER_TTM_EVICTION /* TTM eviction */
};
+/* The reason of user queue evition event */
enum KFD_QUEUE_EVICTION_TRIGGERS {
- KFD_QUEUE_EVICTION_TRIGGER_SVM,
- KFD_QUEUE_EVICTION_TRIGGER_USERPTR,
- KFD_QUEUE_EVICTION_TRIGGER_TTM,
- KFD_QUEUE_EVICTION_TRIGGER_SUSPEND,
- KFD_QUEUE_EVICTION_CRIU_CHECKPOINT,
- KFD_QUEUE_EVICTION_CRIU_RESTORE
+ KFD_QUEUE_EVICTION_TRIGGER_SVM, /* SVM buffer migration */
+ KFD_QUEUE_EVICTION_TRIGGER_USERPTR, /* userptr movement */
+ KFD_QUEUE_EVICTION_TRIGGER_TTM, /* TTM move buffer */
+ KFD_QUEUE_EVICTION_TRIGGER_SUSPEND, /* GPU suspend */
+ KFD_QUEUE_EVICTION_CRIU_CHECKPOINT, /* CRIU checkpoint */
+ KFD_QUEUE_EVICTION_CRIU_RESTORE /* CRIU restore */
};
+/* The reason of unmap buffer from GPU event */
enum KFD_SVM_UNMAP_TRIGGERS {
- KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY,
- KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE,
- KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU
+ KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY, /* MMU notifier CPU buffer movement */
+ KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE,/* MMU notifier page migration */
+ KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU /* Unmap to free the buffer */
};
#define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
@@ -570,6 +573,77 @@ struct kfd_ioctl_smi_events_args {
__u32 anon_fd; /* from KFD */
};
+/*
+ * SVM event tracing via SMI system management interface
+ *
+ * Open event file descriptor
+ * use ioctl AMDKFD_IOC_SMI_EVENTS, pass in gpuid and return a anonymous file
+ * descriptor to receive SMI events.
+ * If calling with sudo permission, then file descriptor can be used to receive
+ * SVM events from all processes, otherwise, to only receive SVM events of same
+ * process.
+ *
+ * To enable the SVM event
+ * Write event file descriptor with KFD_SMI_EVENT_MASK_FROM_INDEX(event) bitmap
+ * mask to start record the event to the kfifo, use bitmap mask combination
+ * for multiple events. New event mask will overwrite the previous event mask.
+ * KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS) bit requires sudo
+ * permisson to receive SVM events from all process.
+ *
+ * To receive the event
+ * Application can poll file descriptor to wait for the events, then read event
+ * from the file into a buffer. Each event is one line string message, starting
+ * with the event id, then the event specific information.
+ *
+ * To decode event information
+ * The following event format string macro can be used with sscanf to decode
+ * the specific event information.
+ * event triggers: the reason to generate the event, defined as enum for unmap,
+ * eviction and migrate events.
+ * node, from, to, prefetch_loc, preferred_loc: GPU ID, or 0 for system memory.
+ * addr: user mode address, in pages
+ * size: in pages
+ * pid: the process ID to generate the event
+ * ns: timestamp in nanosecond-resolution, starts at system boot time but
+ * stops during suspend
+ * migrate_update: GPU page fault is recovered by 'M' for migrate, 'U' for update
+ * rw: 'W' for write page fault, 'R' for read page fault
+ * rescheduled: 'R' if the queue restore failed and rescheduled to try again
+ */
+#define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\
+ "%x %s\n", (reset_seq_num), (reset_cause)
+
+#define KFD_EVENT_FMT_THERMAL_THROTTLING(bitmask, counter)\
+ "%llx:%llx\n", (bitmask), (counter)
+
+#define KFD_EVENT_FMT_VMFAULT(pid, task_name)\
+ "%x:%s\n", (pid), (task_name)
+
+#define KFD_EVENT_FMT_PAGEFAULT_START(ns, pid, addr, node, rw)\
+ "%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (rw)
+
+#define KFD_EVENT_FMT_PAGEFAULT_END(ns, pid, addr, node, migrate_update)\
+ "%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (migrate_update)
+
+#define KFD_EVENT_FMT_MIGRATE_START(ns, pid, start, size, from, to, prefetch_loc,\
+ preferred_loc, migrate_trigger)\
+ "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", (ns), (pid), (start), (size),\
+ (from), (to), (prefetch_loc), (preferred_loc), (migrate_trigger)
+
+#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger)\
+ "%lld -%d @%lx(%lx) %x->%x %d\n", (ns), (pid), (start), (size),\
+ (from), (to), (migrate_trigger)
+
+#define KFD_EVENT_FMT_QUEUE_EVICTION(ns, pid, node, evict_trigger)\
+ "%lld -%d %x %d\n", (ns), (pid), (node), (evict_trigger)
+
+#define KFD_EVENT_FMT_QUEUE_RESTORE(ns, pid, node, rescheduled)\
+ "%lld -%d %x %c\n", (ns), (pid), (node), (rescheduled)
+
+#define KFD_EVENT_FMT_UNMAP_FROM_GPU(ns, pid, addr, size, node, unmap_trigger)\
+ "%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\
+ (node), (unmap_trigger)
+
/**************************************************************************************************
* CRIU IOCTLs (Checkpoint Restore In Userspace)
*