summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Francis <David.Francis@amd.com>2026-01-16 18:21:15 +0300
committerAlex Deucher <alexander.deucher@amd.com>2026-03-07 00:33:59 +0300
commit421c0f19043337a553e802b2dfe4b56d538ef4d6 (patch)
treea252e5d2ae1a1927401b0ae6097673478c1dd857
parent8c78845bf9b18d336e5b5d00c966176a3c3f9581 (diff)
downloadlinux-421c0f19043337a553e802b2dfe4b56d538ef4d6.tar.xz
drm/amdgpu: Check for multiplication overflow in checkpoint stack size
get_checkpoint_info() in kfd_mqd_manager_v9.c finds 32-bit value ctl_stack_size by multiplying two 32-bit values. This can overflow to a lower value, which could result in copying outside the bounds of a buffer in checkpoint_mqd() in the same file. Put in a check for the overflow, and fail with -EINVAL if detected. v2: use check_mul_overflow() Signed-off-by: David Francis <David.Francis@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c8
6 files changed, 22 insertions, 8 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 3ddf06c755b5..ab3b2e7be9bd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2720,7 +2720,7 @@ static int get_wave_state(struct device_queue_manager *dqm,
ctl_stack, ctl_stack_used_size, save_area_used_size);
}
-static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
+static int get_queue_checkpoint_info(struct device_queue_manager *dqm,
const struct queue *q,
u32 *mqd_size,
u32 *ctl_stack_size)
@@ -2728,6 +2728,7 @@ static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
struct mqd_manager *mqd_mgr;
enum KFD_MQD_TYPE mqd_type =
get_mqd_type_from_queue_type(q->properties.type);
+ int ret = 0;
dqm_lock(dqm);
mqd_mgr = dqm->mqd_mgrs[mqd_type];
@@ -2735,9 +2736,11 @@ static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
*ctl_stack_size = 0;
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info)
- mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size);
+ ret = mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size);
dqm_unlock(dqm);
+
+ return ret;
}
static int checkpoint_mqd(struct device_queue_manager *dqm,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index ef07e44916f8..3272328da11f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -192,7 +192,7 @@ struct device_queue_manager_ops {
int (*reset_queues)(struct device_queue_manager *dqm,
uint16_t pasid);
- void (*get_queue_checkpoint_info)(struct device_queue_manager *dqm,
+ int (*get_queue_checkpoint_info)(struct device_queue_manager *dqm,
const struct queue *q, u32 *mqd_size,
u32 *ctl_stack_size);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 2429d278ef0e..06ca6235ff1b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -102,7 +102,8 @@ struct mqd_manager {
u32 *ctl_stack_used_size,
u32 *save_area_used_size);
- void (*get_checkpoint_info)(struct mqd_manager *mm, void *mqd, uint32_t *ctl_stack_size);
+ int (*get_checkpoint_info)(struct mqd_manager *mm, void *mqd,
+ uint32_t *ctl_stack_size);
void (*checkpoint_mqd)(struct mqd_manager *mm,
void *mqd,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 19f21932a5ce..979ae94ac966 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -385,11 +385,14 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
return 0;
}
-static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
+static int get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
{
struct v9_mqd *m = get_mqd(mqd);
- *ctl_stack_size = m->cp_hqd_cntl_stack_size * NUM_XCC(mm->dev->xcc_mask);
+ if (check_mul_overflow(m->cp_hqd_cntl_stack_size, NUM_XCC(mm->dev->xcc_mask), ctl_stack_size))
+ return -EINVAL;
+
+ return 0;
}
static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index f02ef2d44a07..431a20323146 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -274,10 +274,11 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
return 0;
}
-static void get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
+static int get_checkpoint_info(struct mqd_manager *mm, void *mqd, u32 *ctl_stack_size)
{
/* Control stack is stored in user mode */
*ctl_stack_size = 0;
+ return 0;
}
static void checkpoint_mqd(struct mqd_manager *mm, void *mqd, void *mqd_dst, void *ctl_stack_dst)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 8ea31699d38b..586d409ebe4e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -1069,6 +1069,7 @@ int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
uint32_t *ctl_stack_size)
{
struct process_queue_node *pqn;
+ int ret;
pqn = get_queue_by_qid(pqm, qid);
if (!pqn) {
@@ -1081,9 +1082,14 @@ int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
return -EOPNOTSUPP;
}
- pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm,
+ ret = pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm,
pqn->q, mqd_size,
ctl_stack_size);
+ if (ret) {
+ pr_debug("amdkfd: Overflow while computing stack size for queue %d\n", qid);
+ return ret;
+ }
+
return 0;
}