summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c183
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c102
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c21
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h27
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c123
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c318
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c40
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c33
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c63
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c62
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c46
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c294
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pasid.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h330
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h140
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h32
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c25
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c71
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_queue.c12
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c46
26 files changed, 825 insertions, 1183 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6316aad43a73..e4a8c2e52cb2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -142,12 +142,12 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
struct kfd_ioctl_create_queue_args *args)
{
if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
- pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
+ pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
return -EINVAL;
}
if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
- pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
+ pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
return -EINVAL;
}
@@ -155,26 +155,26 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
(!access_ok(VERIFY_WRITE,
(const void __user *) args->ring_base_address,
sizeof(uint64_t)))) {
- pr_err("kfd: can't access ring base address\n");
+ pr_err("Can't access ring base address\n");
return -EFAULT;
}
if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
- pr_err("kfd: ring size must be a power of 2 or 0\n");
+ pr_err("Ring size must be a power of 2 or 0\n");
return -EINVAL;
}
if (!access_ok(VERIFY_WRITE,
(const void __user *) args->read_pointer_address,
sizeof(uint32_t))) {
- pr_err("kfd: can't access read pointer\n");
+ pr_err("Can't access read pointer\n");
return -EFAULT;
}
if (!access_ok(VERIFY_WRITE,
(const void __user *) args->write_pointer_address,
sizeof(uint32_t))) {
- pr_err("kfd: can't access write pointer\n");
+ pr_err("Can't access write pointer\n");
return -EFAULT;
}
@@ -182,7 +182,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
!access_ok(VERIFY_WRITE,
(const void __user *) args->eop_buffer_address,
sizeof(uint32_t))) {
- pr_debug("kfd: can't access eop buffer");
+ pr_debug("Can't access eop buffer");
return -EFAULT;
}
@@ -190,7 +190,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
!access_ok(VERIFY_WRITE,
(const void __user *) args->ctx_save_restore_address,
sizeof(uint32_t))) {
- pr_debug("kfd: can't access ctx save restore buffer");
+ pr_debug("Can't access ctx save restore buffer");
return -EFAULT;
}
@@ -219,27 +219,27 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
else
q_properties->format = KFD_QUEUE_FORMAT_PM4;
- pr_debug("Queue Percentage (%d, %d)\n",
+ pr_debug("Queue Percentage: %d, %d\n",
q_properties->queue_percent, args->queue_percentage);
- pr_debug("Queue Priority (%d, %d)\n",
+ pr_debug("Queue Priority: %d, %d\n",
q_properties->priority, args->queue_priority);
- pr_debug("Queue Address (0x%llX, 0x%llX)\n",
+ pr_debug("Queue Address: 0x%llX, 0x%llX\n",
q_properties->queue_address, args->ring_base_address);
- pr_debug("Queue Size (0x%llX, %u)\n",
+ pr_debug("Queue Size: 0x%llX, %u\n",
q_properties->queue_size, args->ring_size);
- pr_debug("Queue r/w Pointers (0x%llX, 0x%llX)\n",
- (uint64_t) q_properties->read_ptr,
- (uint64_t) q_properties->write_ptr);
+ pr_debug("Queue r/w Pointers: %p, %p\n",
+ q_properties->read_ptr,
+ q_properties->write_ptr);
- pr_debug("Queue Format (%d)\n", q_properties->format);
+ pr_debug("Queue Format: %d\n", q_properties->format);
- pr_debug("Queue EOP (0x%llX)\n", q_properties->eop_ring_buffer_address);
+ pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
- pr_debug("Queue CTX save arex (0x%llX)\n",
+ pr_debug("Queue CTX save area: 0x%llX\n",
q_properties->ctx_save_restore_area_address);
return 0;
@@ -257,16 +257,16 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
memset(&q_properties, 0, sizeof(struct queue_properties));
- pr_debug("kfd: creating queue ioctl\n");
+ pr_debug("Creating queue ioctl\n");
err = set_queue_properties_from_user(&q_properties, args);
if (err)
return err;
- pr_debug("kfd: looking for gpu id 0x%x\n", args->gpu_id);
+ pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
dev = kfd_device_by_id(args->gpu_id);
- if (dev == NULL) {
- pr_debug("kfd: gpu id 0x%x was not found\n", args->gpu_id);
+ if (!dev) {
+ pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
return -EINVAL;
}
@@ -278,7 +278,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
goto err_bind_process;
}
- pr_debug("kfd: creating queue for PASID %d on GPU 0x%x\n",
+ pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
p->pasid,
dev->id);
@@ -296,15 +296,15 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
mutex_unlock(&p->mutex);
- pr_debug("kfd: queue id %d was created successfully\n", args->queue_id);
+ pr_debug("Queue id %d was created successfully\n", args->queue_id);
- pr_debug("ring buffer address == 0x%016llX\n",
+ pr_debug("Ring buffer address == 0x%016llX\n",
args->ring_base_address);
- pr_debug("read ptr address == 0x%016llX\n",
+ pr_debug("Read ptr address == 0x%016llX\n",
args->read_pointer_address);
- pr_debug("write ptr address == 0x%016llX\n",
+ pr_debug("Write ptr address == 0x%016llX\n",
args->write_pointer_address);
return 0;
@@ -321,7 +321,7 @@ static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
int retval;
struct kfd_ioctl_destroy_queue_args *args = data;
- pr_debug("kfd: destroying queue id %d for PASID %d\n",
+ pr_debug("Destroying queue id %d for pasid %d\n",
args->queue_id,
p->pasid);
@@ -341,12 +341,12 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
struct queue_properties properties;
if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
- pr_err("kfd: queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
+ pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
return -EINVAL;
}
if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
- pr_err("kfd: queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
+ pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
return -EINVAL;
}
@@ -354,12 +354,12 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
(!access_ok(VERIFY_WRITE,
(const void __user *) args->ring_base_address,
sizeof(uint64_t)))) {
- pr_err("kfd: can't access ring base address\n");
+ pr_err("Can't access ring base address\n");
return -EFAULT;
}
if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
- pr_err("kfd: ring size must be a power of 2 or 0\n");
+ pr_err("Ring size must be a power of 2 or 0\n");
return -EINVAL;
}
@@ -368,7 +368,7 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
properties.queue_percent = args->queue_percentage;
properties.priority = args->queue_priority;
- pr_debug("kfd: updating queue id %d for PASID %d\n",
+ pr_debug("Updating queue id %d for pasid %d\n",
args->queue_id, p->pasid);
mutex_lock(&p->mutex);
@@ -400,7 +400,7 @@ static int kfd_ioctl_set_memory_policy(struct file *filep,
}
dev = kfd_device_by_id(args->gpu_id);
- if (dev == NULL)
+ if (!dev)
return -EINVAL;
mutex_lock(&p->mutex);
@@ -443,7 +443,7 @@ static int kfd_ioctl_dbg_register(struct file *filep,
long status = 0;
dev = kfd_device_by_id(args->gpu_id);
- if (dev == NULL)
+ if (!dev)
return -EINVAL;
if (dev->device_info->asic_family == CHIP_CARRIZO) {
@@ -460,12 +460,11 @@ static int kfd_ioctl_dbg_register(struct file *filep,
*/
pdd = kfd_bind_process_to_device(dev, p);
if (IS_ERR(pdd)) {
- mutex_unlock(&p->mutex);
- mutex_unlock(kfd_get_dbgmgr_mutex());
- return PTR_ERR(pdd);
+ status = PTR_ERR(pdd);
+ goto out;
}
- if (dev->dbgmgr == NULL) {
+ if (!dev->dbgmgr) {
/* In case of a legal call, we have no dbgmgr yet */
create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
if (create_ok) {
@@ -480,6 +479,7 @@ static int kfd_ioctl_dbg_register(struct file *filep,
status = -EINVAL;
}
+out:
mutex_unlock(&p->mutex);
mutex_unlock(kfd_get_dbgmgr_mutex());
@@ -494,7 +494,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep,
long status;
dev = kfd_device_by_id(args->gpu_id);
- if (dev == NULL)
+ if (!dev)
return -EINVAL;
if (dev->device_info->asic_family == CHIP_CARRIZO) {
@@ -505,7 +505,7 @@ static int kfd_ioctl_dbg_unregister(struct file *filep,
mutex_lock(kfd_get_dbgmgr_mutex());
status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
- if (status == 0) {
+ if (!status) {
kfd_dbgmgr_destroy(dev->dbgmgr);
dev->dbgmgr = NULL;
}
@@ -539,7 +539,7 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep,
memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
dev = kfd_device_by_id(args->gpu_id);
- if (dev == NULL)
+ if (!dev)
return -EINVAL;
if (dev->device_info->asic_family == CHIP_CARRIZO) {
@@ -580,8 +580,8 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep,
args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
- kfree(args_buff);
- return -EINVAL;
+ status = -EINVAL;
+ goto out;
}
watch_mask_value = (uint64_t) args_buff[args_idx];
@@ -604,8 +604,8 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep,
}
if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
- kfree(args_buff);
- return -EINVAL;
+ status = -EINVAL;
+ goto out;
}
/* Currently HSA Event is not supported for DBG */
@@ -617,6 +617,7 @@ static int kfd_ioctl_dbg_address_watch(struct file *filep,
mutex_unlock(kfd_get_dbgmgr_mutex());
+out:
kfree(args_buff);
return status;
@@ -646,7 +647,7 @@ static int kfd_ioctl_dbg_wave_control(struct file *filep,
sizeof(wac_info.trapId);
dev = kfd_device_by_id(args->gpu_id);
- if (dev == NULL)
+ if (!dev)
return -EINVAL;
if (dev->device_info->asic_family == CHIP_CARRIZO) {
@@ -782,8 +783,9 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
"scratch_limit %llX\n", pdd->scratch_limit);
args->num_of_nodes++;
- } while ((pdd = kfd_get_next_process_device_data(p, pdd)) != NULL &&
- (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
+
+ pdd = kfd_get_next_process_device_data(p, pdd);
+ } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
}
mutex_unlock(&p->mutex);
@@ -846,9 +848,84 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
return err;
}
+static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_set_scratch_backing_va_args *args = data;
+ struct kfd_process_device *pdd;
+ struct kfd_dev *dev;
+ long err;
+
+ dev = kfd_device_by_id(args->gpu_id);
+ if (!dev)
+ return -EINVAL;
+
+ mutex_lock(&p->mutex);
+
+ pdd = kfd_bind_process_to_device(dev, p);
+ if (IS_ERR(pdd)) {
+ err = PTR_ERR(pdd);
+ goto bind_process_to_device_fail;
+ }
+
+ pdd->qpd.sh_hidden_private_base = args->va_addr;
+
+ mutex_unlock(&p->mutex);
+
+ if (sched_policy == KFD_SCHED_POLICY_NO_HWS && pdd->qpd.vmid != 0)
+ dev->kfd2kgd->set_scratch_backing_va(
+ dev->kgd, args->va_addr, pdd->qpd.vmid);
+
+ return 0;
+
+bind_process_to_device_fail:
+ mutex_unlock(&p->mutex);
+ return err;
+}
+
+static int kfd_ioctl_get_tile_config(struct file *filep,
+ struct kfd_process *p, void *data)
+{
+ struct kfd_ioctl_get_tile_config_args *args = data;
+ struct kfd_dev *dev;
+ struct tile_config config;
+ int err = 0;
+
+ dev = kfd_device_by_id(args->gpu_id);
+
+ dev->kfd2kgd->get_tile_config(dev->kgd, &config);
+
+ args->gb_addr_config = config.gb_addr_config;
+ args->num_banks = config.num_banks;
+ args->num_ranks = config.num_ranks;
+
+ if (args->num_tile_configs > config.num_tile_configs)
+ args->num_tile_configs = config.num_tile_configs;
+ err = copy_to_user((void __user *)args->tile_config_ptr,
+ config.tile_config_ptr,
+ args->num_tile_configs * sizeof(uint32_t));
+ if (err) {
+ args->num_tile_configs = 0;
+ return -EFAULT;
+ }
+
+ if (args->num_macro_tile_configs > config.num_macro_tile_configs)
+ args->num_macro_tile_configs =
+ config.num_macro_tile_configs;
+ err = copy_to_user((void __user *)args->macro_tile_config_ptr,
+ config.macro_tile_config_ptr,
+ args->num_macro_tile_configs * sizeof(uint32_t));
+ if (err) {
+ args->num_macro_tile_configs = 0;
+ return -EFAULT;
+ }
+
+ return 0;
+}
#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
- [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl}
+ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
+ .cmd_drv = 0, .name = #ioctl}
/** Ioctl table */
static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
@@ -899,6 +976,12 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
kfd_ioctl_dbg_wave_control, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
+ kfd_ioctl_set_scratch_backing_va, 0),
+
+ AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
+ kfd_ioctl_get_tile_config, 0)
};
#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index d5e19b5fbbfb..0aa021aa0aa1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -42,8 +42,6 @@
static void dbgdev_address_watch_disable_nodiq(struct kfd_dev *dev)
{
- BUG_ON(!dev || !dev->kfd2kgd);
-
dev->kfd2kgd->address_watch_disable(dev->kgd);
}
@@ -62,7 +60,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
unsigned int *ib_packet_buff;
int status;
- BUG_ON(!dbgdev || !dbgdev->kq || !packet_buff || !size_in_bytes);
+ if (WARN_ON(!size_in_bytes))
+ return -EINVAL;
kq = dbgdev->kq;
@@ -77,8 +76,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
status = kq->ops.acquire_packet_buffer(kq,
pq_packets_size_in_bytes / sizeof(uint32_t),
&ib_packet_buff);
- if (status != 0) {
- pr_err("amdkfd: acquire_packet_buffer failed\n");
+ if (status) {
+ pr_err("acquire_packet_buffer failed\n");
return status;
}
@@ -115,8 +114,8 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
status = kfd_gtt_sa_allocate(dbgdev->dev, sizeof(uint64_t),
&mem_obj);
- if (status != 0) {
- pr_err("amdkfd: Failed to allocate GART memory\n");
+ if (status) {
+ pr_err("Failed to allocate GART memory\n");
kq->ops.rollback_packet(kq);
return status;
}
@@ -168,8 +167,6 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
static int dbgdev_register_nodiq(struct kfd_dbgdev *dbgdev)
{
- BUG_ON(!dbgdev);
-
/*
* no action is needed in this case,
* just make sure diq will not be used
@@ -187,14 +184,12 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
struct kernel_queue *kq = NULL;
int status;
- BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->dev);
-
status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
&properties, 0, KFD_QUEUE_TYPE_DIQ,
&qid);
if (status) {
- pr_err("amdkfd: Failed to create DIQ\n");
+ pr_err("Failed to create DIQ\n");
return status;
}
@@ -202,8 +197,8 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
kq = pqm_get_kernel_queue(dbgdev->pqm, qid);
- if (kq == NULL) {
- pr_err("amdkfd: Error getting DIQ\n");
+ if (!kq) {
+ pr_err("Error getting DIQ\n");
pqm_destroy_queue(dbgdev->pqm, qid);
return -EFAULT;
}
@@ -215,8 +210,6 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
static int dbgdev_unregister_nodiq(struct kfd_dbgdev *dbgdev)
{
- BUG_ON(!dbgdev || !dbgdev->dev);
-
/* disable watch address */
dbgdev_address_watch_disable_nodiq(dbgdev->dev);
return 0;
@@ -227,8 +220,6 @@ static int dbgdev_unregister_diq(struct kfd_dbgdev *dbgdev)
/* todo - disable address watch */
int status;
- BUG_ON(!dbgdev || !dbgdev->pqm || !dbgdev->kq);
-
status = pqm_destroy_queue(dbgdev->pqm,
dbgdev->kq->queue->properties.queue_id);
dbgdev->kq = NULL;
@@ -245,14 +236,12 @@ static void dbgdev_address_watch_set_registers(
{
union ULARGE_INTEGER addr;
- BUG_ON(!adw_info || !addrHi || !addrLo || !cntl);
-
addr.quad_part = 0;
addrHi->u32All = 0;
addrLo->u32All = 0;
cntl->u32All = 0;
- if (adw_info->watch_mask != NULL)
+ if (adw_info->watch_mask)
cntl->bitfields.mask =
(uint32_t) (adw_info->watch_mask[index] &
ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK);
@@ -279,7 +268,7 @@ static void dbgdev_address_watch_set_registers(
}
static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
- struct dbg_address_watch_info *adw_info)
+ struct dbg_address_watch_info *adw_info)
{
union TCP_WATCH_ADDR_H_BITS addrHi;
union TCP_WATCH_ADDR_L_BITS addrLo;
@@ -287,13 +276,11 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
struct kfd_process_device *pdd;
unsigned int i;
- BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
-
/* taking the vmid for that process on the safe way using pdd */
pdd = kfd_get_process_device_data(dbgdev->dev,
adw_info->process);
if (!pdd) {
- pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
+ pr_err("Failed to get pdd for wave control no DIQ\n");
return -EFAULT;
}
@@ -303,17 +290,16 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
(adw_info->num_watch_points == 0)) {
- pr_err("amdkfd: num_watch_points is invalid\n");
+ pr_err("num_watch_points is invalid\n");
return -EINVAL;
}
- if ((adw_info->watch_mode == NULL) ||
- (adw_info->watch_address == NULL)) {
- pr_err("amdkfd: adw_info fields are not valid\n");
+ if (!adw_info->watch_mode || !adw_info->watch_address) {
+ pr_err("adw_info fields are not valid\n");
return -EINVAL;
}
- for (i = 0 ; i < adw_info->num_watch_points ; i++) {
+ for (i = 0; i < adw_info->num_watch_points; i++) {
dbgdev_address_watch_set_registers(adw_info, &addrHi, &addrLo,
&cntl, i, pdd->qpd.vmid);
@@ -348,7 +334,7 @@ static int dbgdev_address_watch_nodiq(struct kfd_dbgdev *dbgdev,
}
static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
- struct dbg_address_watch_info *adw_info)
+ struct dbg_address_watch_info *adw_info)
{
struct pm4__set_config_reg *packets_vec;
union TCP_WATCH_ADDR_H_BITS addrHi;
@@ -363,28 +349,25 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
/* we do not control the vmid in DIQ mode, just a place holder */
unsigned int vmid = 0;
- BUG_ON(!dbgdev || !dbgdev->dev || !adw_info);
-
addrHi.u32All = 0;
addrLo.u32All = 0;
cntl.u32All = 0;
if ((adw_info->num_watch_points > MAX_WATCH_ADDRESSES) ||
(adw_info->num_watch_points == 0)) {
- pr_err("amdkfd: num_watch_points is invalid\n");
+ pr_err("num_watch_points is invalid\n");
return -EINVAL;
}
- if ((NULL == adw_info->watch_mode) ||
- (NULL == adw_info->watch_address)) {
- pr_err("amdkfd: adw_info fields are not valid\n");
+ if (!adw_info->watch_mode || !adw_info->watch_address) {
+ pr_err("adw_info fields are not valid\n");
return -EINVAL;
}
status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
- if (status != 0) {
- pr_err("amdkfd: Failed to allocate GART memory\n");
+ if (status) {
+ pr_err("Failed to allocate GART memory\n");
return status;
}
@@ -442,8 +425,6 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
i,
ADDRESS_WATCH_REG_CNTL);
- aw_reg_add_dword /= sizeof(uint32_t);
-
packets_vec[0].bitfields2.reg_offset =
aw_reg_add_dword - AMD_CONFIG_REG_BASE;
@@ -455,8 +436,6 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
i,
ADDRESS_WATCH_REG_ADDR_HI);
- aw_reg_add_dword /= sizeof(uint32_t);
-
packets_vec[1].bitfields2.reg_offset =
aw_reg_add_dword - AMD_CONFIG_REG_BASE;
packets_vec[1].reg_data[0] = addrHi.u32All;
@@ -467,8 +446,6 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
i,
ADDRESS_WATCH_REG_ADDR_LO);
- aw_reg_add_dword /= sizeof(uint32_t);
-
packets_vec[2].bitfields2.reg_offset =
aw_reg_add_dword - AMD_CONFIG_REG_BASE;
packets_vec[2].reg_data[0] = addrLo.u32All;
@@ -485,8 +462,6 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
i,
ADDRESS_WATCH_REG_CNTL);
- aw_reg_add_dword /= sizeof(uint32_t);
-
packets_vec[3].bitfields2.reg_offset =
aw_reg_add_dword - AMD_CONFIG_REG_BASE;
packets_vec[3].reg_data[0] = cntl.u32All;
@@ -498,8 +473,8 @@ static int dbgdev_address_watch_diq(struct kfd_dbgdev *dbgdev,
packet_buff_uint,
ib_size);
- if (status != 0) {
- pr_err("amdkfd: Failed to submit IB to DIQ\n");
+ if (status) {
+ pr_err("Failed to submit IB to DIQ\n");
break;
}
}
@@ -518,8 +493,6 @@ static int dbgdev_wave_control_set_registers(
union GRBM_GFX_INDEX_BITS reg_gfx_index;
struct HsaDbgWaveMsgAMDGen2 *pMsg;
- BUG_ON(!wac_info || !in_reg_sq_cmd || !in_reg_gfx_index);
-
reg_sq_cmd.u32All = 0;
reg_gfx_index.u32All = 0;
pMsg = &wac_info->dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2;
@@ -620,18 +593,16 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
struct pm4__set_config_reg *packets_vec;
size_t ib_size = sizeof(struct pm4__set_config_reg) * 3;
- BUG_ON(!dbgdev || !wac_info);
-
reg_sq_cmd.u32All = 0;
status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
&reg_gfx_index);
if (status) {
- pr_err("amdkfd: Failed to set wave control registers\n");
+ pr_err("Failed to set wave control registers\n");
return status;
}
- /* we do not control the VMID in DIQ,so reset it to a known value */
+ /* we do not control the VMID in DIQ, so reset it to a known value */
reg_sq_cmd.bits.vm_id = 0;
pr_debug("\t\t %30s\n", "* * * * * * * * * * * * * * * * * *");
@@ -667,7 +638,7 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
status = kfd_gtt_sa_allocate(dbgdev->dev, ib_size, &mem_obj);
if (status != 0) {
- pr_err("amdkfd: Failed to allocate GART memory\n");
+ pr_err("Failed to allocate GART memory\n");
return status;
}
@@ -719,8 +690,8 @@ static int dbgdev_wave_control_diq(struct kfd_dbgdev *dbgdev,
packet_buff_uint,
ib_size);
- if (status != 0)
- pr_err("amdkfd: Failed to submit IB to DIQ\n");
+ if (status)
+ pr_err("Failed to submit IB to DIQ\n");
kfd_gtt_sa_free(dbgdev->dev, mem_obj);
@@ -735,21 +706,19 @@ static int dbgdev_wave_control_nodiq(struct kfd_dbgdev *dbgdev,
union GRBM_GFX_INDEX_BITS reg_gfx_index;
struct kfd_process_device *pdd;
- BUG_ON(!dbgdev || !dbgdev->dev || !wac_info);
-
reg_sq_cmd.u32All = 0;
/* taking the VMID for that process on the safe way using PDD */
pdd = kfd_get_process_device_data(dbgdev->dev, wac_info->process);
if (!pdd) {
- pr_err("amdkfd: Failed to get pdd for wave control no DIQ\n");
+ pr_err("Failed to get pdd for wave control no DIQ\n");
return -EFAULT;
}
status = dbgdev_wave_control_set_registers(wac_info, &reg_sq_cmd,
&reg_gfx_index);
if (status) {
- pr_err("amdkfd: Failed to set wave control registers\n");
+ pr_err("Failed to set wave control registers\n");
return status;
}
@@ -818,12 +787,13 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
* ATC_VMID15_PASID_MAPPING
- * to check which VMID the current process is mapped to. */
+ * to check which VMID the current process is mapped to.
+ */
for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
(dev->kgd, vmid)) {
- if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_valid
+ if (dev->kfd2kgd->get_atc_vmid_pasid_mapping_pasid
(dev->kgd, vmid) == p->pasid) {
pr_debug("Killing wave fronts of vmid %d and pasid %d\n",
vmid, p->pasid);
@@ -833,7 +803,7 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
}
if (vmid > last_vmid_to_scan) {
- pr_err("amdkfd: didn't found vmid for pasid (%d)\n", p->pasid);
+ pr_err("Didn't find vmid for pasid %d\n", p->pasid);
return -EFAULT;
}
@@ -860,8 +830,6 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
void kfd_dbgdev_init(struct kfd_dbgdev *pdbgdev, struct kfd_dev *pdev,
enum DBGDEV_TYPE type)
{
- BUG_ON(!pdbgdev || !pdev);
-
pdbgdev->dev = pdev;
pdbgdev->kq = NULL;
pdbgdev->type = type;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
index 56d676396342..3da25f7bda6b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.c
@@ -44,8 +44,6 @@ struct mutex *kfd_get_dbgmgr_mutex(void)
static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr)
{
- BUG_ON(!pmgr);
-
kfree(pmgr->dbgdev);
pmgr->dbgdev = NULL;
@@ -55,7 +53,7 @@ static void kfd_dbgmgr_uninitialize(struct kfd_dbgmgr *pmgr)
void kfd_dbgmgr_destroy(struct kfd_dbgmgr *pmgr)
{
- if (pmgr != NULL) {
+ if (pmgr) {
kfd_dbgmgr_uninitialize(pmgr);
kfree(pmgr);
}
@@ -66,12 +64,12 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
enum DBGDEV_TYPE type = DBGDEV_TYPE_DIQ;
struct kfd_dbgmgr *new_buff;
- BUG_ON(pdev == NULL);
- BUG_ON(!pdev->init_complete);
+ if (WARN_ON(!pdev->init_complete))
+ return false;
new_buff = kfd_alloc_struct(new_buff);
if (!new_buff) {
- pr_err("amdkfd: Failed to allocate dbgmgr instance\n");
+ pr_err("Failed to allocate dbgmgr instance\n");
return false;
}
@@ -79,7 +77,7 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
new_buff->dev = pdev;
new_buff->dbgdev = kfd_alloc_struct(new_buff->dbgdev);
if (!new_buff->dbgdev) {
- pr_err("amdkfd: Failed to allocate dbgdev instance\n");
+ pr_err("Failed to allocate dbgdev instance\n");
kfree(new_buff);
return false;
}
@@ -96,8 +94,6 @@ bool kfd_dbgmgr_create(struct kfd_dbgmgr **ppmgr, struct kfd_dev *pdev)
long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
{
- BUG_ON(!p || !pmgr || !pmgr->dbgdev);
-
if (pmgr->pasid != 0) {
pr_debug("H/W debugger is already active using pasid %d\n",
pmgr->pasid);
@@ -118,8 +114,6 @@ long kfd_dbgmgr_register(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
{
- BUG_ON(!p || !pmgr || !pmgr->dbgdev);
-
/* Is the requests coming from the already registered process? */
if (pmgr->pasid != p->pasid) {
pr_debug("H/W debugger is not registered by calling pasid %d\n",
@@ -137,8 +131,6 @@ long kfd_dbgmgr_unregister(struct kfd_dbgmgr *pmgr, struct kfd_process *p)
long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
struct dbg_wave_control_info *wac_info)
{
- BUG_ON(!pmgr || !pmgr->dbgdev || !wac_info);
-
/* Is the requests coming from the already registered process? */
if (pmgr->pasid != wac_info->process->pasid) {
pr_debug("H/W debugger support was not registered for requester pasid %d\n",
@@ -152,9 +144,6 @@ long kfd_dbgmgr_wave_control(struct kfd_dbgmgr *pmgr,
long kfd_dbgmgr_address_watch(struct kfd_dbgmgr *pmgr,
struct dbg_address_watch_info *adw_info)
{
- BUG_ON(!pmgr || !pmgr->dbgdev || !adw_info);
-
-
/* Is the requests coming from the already registered process? */
if (pmgr->pasid != adw_info->process->pasid) {
pr_debug("H/W debugger support was not registered for requester pasid %d\n",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
index 257a745ad0b5..a04a1fe1d0d9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgmgr.h
@@ -30,13 +30,11 @@
#pragma pack(push, 4)
enum HSA_DBG_WAVEOP {
- HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */
- HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */
- HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */
- HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter
- debug mode */
- HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take
- a trap */
+ HSA_DBG_WAVEOP_HALT = 1, /* Halts a wavefront */
+ HSA_DBG_WAVEOP_RESUME = 2, /* Resumes a wavefront */
+ HSA_DBG_WAVEOP_KILL = 3, /* Kills a wavefront */
+ HSA_DBG_WAVEOP_DEBUG = 4, /* Causes wavefront to enter dbg mode */
+ HSA_DBG_WAVEOP_TRAP = 5, /* Causes wavefront to take a trap */
HSA_DBG_NUM_WAVEOP = 5,
HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF
};
@@ -81,15 +79,13 @@ struct HsaDbgWaveMsgAMDGen2 {
uint32_t UserData:8; /* user data */
uint32_t ShaderArray:1; /* Shader array */
uint32_t Priv:1; /* Privileged */
- uint32_t Reserved0:4; /* This field is reserved,
- should be 0 */
+ uint32_t Reserved0:4; /* Reserved, should be 0 */
uint32_t WaveId:4; /* wave id */
uint32_t SIMD:2; /* SIMD id */
uint32_t HSACU:4; /* Compute unit */
uint32_t ShaderEngine:2;/* Shader engine */
uint32_t MessageType:2; /* see HSA_DBG_WAVEMSG_TYPE */
- uint32_t Reserved1:4; /* This field is reserved,
- should be 0 */
+ uint32_t Reserved1:4; /* Reserved, should be 0 */
} ui32;
uint32_t Value;
};
@@ -121,20 +117,23 @@ struct HsaDbgWaveMessage {
* in the user mode instruction stream. The OS scheduler event is typically
* associated and signaled by an interrupt issued by the GPU, but other HSA
* system interrupt conditions from other HW (e.g. IOMMUv2) may be surfaced
- * by the KFD by this mechanism, too. */
+ * by the KFD by this mechanism, too.
+ */
/* these are the new definitions for events */
enum HSA_EVENTTYPE {
HSA_EVENTTYPE_SIGNAL = 0, /* user-mode generated GPU signal */
HSA_EVENTTYPE_NODECHANGE = 1, /* HSA node change (attach/detach) */
HSA_EVENTTYPE_DEVICESTATECHANGE = 2, /* HSA device state change
- (start/stop) */
+ * (start/stop)
+ */
HSA_EVENTTYPE_HW_EXCEPTION = 3, /* GPU shader exception event */
HSA_EVENTTYPE_SYSTEM_EVENT = 4, /* GPU SYSCALL with parameter info */
HSA_EVENTTYPE_DEBUG_EVENT = 5, /* GPU signal for debugging */
HSA_EVENTTYPE_PROFILE_EVENT = 6,/* GPU signal for profiling */
HSA_EVENTTYPE_QUEUE_EVENT = 7, /* GPU signal queue idle state
- (EOP pm4) */
+ * (EOP pm4)
+ */
/* ... */
HSA_EVENTTYPE_MAXID,
HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3f95f7cb4019..61fff25b4ce7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -26,7 +26,7 @@
#include <linux/slab.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
-#include "kfd_pm4_headers.h"
+#include "kfd_pm4_headers_vi.h"
#define MQD_SIZE_ALIGNED 768
@@ -98,11 +98,14 @@ static const struct kfd_device_info *lookup_device_info(unsigned short did)
for (i = 0; i < ARRAY_SIZE(supported_devices); i++) {
if (supported_devices[i].did == did) {
- BUG_ON(supported_devices[i].device_info == NULL);
+ WARN_ON(!supported_devices[i].device_info);
return supported_devices[i].device_info;
}
}
+ dev_warn(kfd_device, "DID %04x is missing in supported_devices\n",
+ did);
+
return NULL;
}
@@ -114,8 +117,10 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
const struct kfd_device_info *device_info =
lookup_device_info(pdev->device);
- if (!device_info)
+ if (!device_info) {
+ dev_err(kfd_device, "kgd2kfd_probe failed\n");
return NULL;
+ }
kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
if (!kfd)
@@ -152,15 +157,16 @@ static bool device_iommu_pasid_init(struct kfd_dev *kfd)
}
if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) {
- dev_err(kfd_device, "error required iommu flags ats(%i), pri(%i), pasid(%i)\n",
+ dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n",
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0,
(iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0,
- (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) != 0);
+ (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP)
+ != 0);
return false;
}
pasid_limit = min_t(unsigned int,
- (unsigned int)1 << kfd->device_info->max_pasid_bits,
+ (unsigned int)(1 << kfd->device_info->max_pasid_bits),
iommu_info.max_pasids);
/*
* last pasid is used for kernel queues doorbells
@@ -211,9 +217,8 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
flags);
dev = kfd_device_by_pci_dev(pdev);
- BUG_ON(dev == NULL);
-
- kfd_signal_iommu_event(dev, pasid, address,
+ if (!WARN_ON(!dev))
+ kfd_signal_iommu_event(dev, pasid, address,
flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
return AMD_IOMMU_INV_PRI_RSP_INVALID;
@@ -234,9 +239,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
* calculate max size of runlist packet.
* There can be only 2 packets at once
*/
- size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) +
- max_num_of_queues_per_device *
- sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2;
+ size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_mes_map_process) +
+ max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
+ + sizeof(struct pm4_mes_runlist)) * 2;
/* Add size of HIQ & DIQ */
size += KFD_KERNEL_QUEUE_SIZE * 2;
@@ -247,42 +252,37 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
if (kfd->kfd2kgd->init_gtt_mem_allocation(
kfd->kgd, size, &kfd->gtt_mem,
&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr)){
- dev_err(kfd_device,
- "Could not allocate %d bytes for device (%x:%x)\n",
- size, kfd->pdev->vendor, kfd->pdev->device);
+ dev_err(kfd_device, "Could not allocate %d bytes\n", size);
goto out;
}
- dev_info(kfd_device,
- "Allocated %d bytes on gart for device(%x:%x)\n",
- size, kfd->pdev->vendor, kfd->pdev->device);
+ dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
/* Initialize GTT sa with 512 byte chunk size */
if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
- dev_err(kfd_device,
- "Error initializing gtt sub-allocator\n");
+ dev_err(kfd_device, "Error initializing gtt sub-allocator\n");
goto kfd_gtt_sa_init_error;
}
- kfd_doorbell_init(kfd);
-
- if (kfd_topology_add_device(kfd) != 0) {
+ if (kfd_doorbell_init(kfd)) {
dev_err(kfd_device,
- "Error adding device (%x:%x) to topology\n",
- kfd->pdev->vendor, kfd->pdev->device);
+ "Error initializing doorbell aperture\n");
+ goto kfd_doorbell_error;
+ }
+
+ if (kfd_topology_add_device(kfd)) {
+ dev_err(kfd_device, "Error adding device to topology\n");
goto kfd_topology_add_device_error;
}
if (kfd_interrupt_init(kfd)) {
- dev_err(kfd_device,
- "Error initializing interrupts for device (%x:%x)\n",
- kfd->pdev->vendor, kfd->pdev->device);
+ dev_err(kfd_device, "Error initializing interrupts\n");
goto kfd_interrupt_error;
}
if (!device_iommu_pasid_init(kfd)) {
dev_err(kfd_device,
- "Error initializing iommuv2 for device (%x:%x)\n",
+ "Error initializing iommuv2 for device %x:%x\n",
kfd->pdev->vendor, kfd->pdev->device);
goto device_iommu_pasid_error;
}
@@ -292,15 +292,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->dqm = device_queue_manager_init(kfd);
if (!kfd->dqm) {
- dev_err(kfd_device,
- "Error initializing queue manager for device (%x:%x)\n",
- kfd->pdev->vendor, kfd->pdev->device);
+ dev_err(kfd_device, "Error initializing queue manager\n");
goto device_queue_manager_error;
}
- if (kfd->dqm->ops.start(kfd->dqm) != 0) {
+ if (kfd->dqm->ops.start(kfd->dqm)) {
dev_err(kfd_device,
- "Error starting queuen manager for device (%x:%x)\n",
+ "Error starting queue manager for device %x:%x\n",
kfd->pdev->vendor, kfd->pdev->device);
goto dqm_start_error;
}
@@ -308,10 +306,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->dbgmgr = NULL;
kfd->init_complete = true;
- dev_info(kfd_device, "added device (%x:%x)\n", kfd->pdev->vendor,
+ dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
kfd->pdev->device);
- pr_debug("kfd: Starting kfd with the following scheduling policy %d\n",
+ pr_debug("Starting kfd with the following scheduling policy %d\n",
sched_policy);
goto out;
@@ -325,11 +323,13 @@ device_iommu_pasid_error:
kfd_interrupt_error:
kfd_topology_remove_device(kfd);
kfd_topology_add_device_error:
+ kfd_doorbell_fini(kfd);
+kfd_doorbell_error:
kfd_gtt_sa_fini(kfd);
kfd_gtt_sa_init_error:
kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
dev_err(kfd_device,
- "device (%x:%x) NOT added due to errors\n",
+ "device %x:%x NOT added due to errors\n",
kfd->pdev->vendor, kfd->pdev->device);
out:
return kfd->init_complete;
@@ -342,6 +342,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
amd_iommu_free_device(kfd->pdev);
kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd);
+ kfd_doorbell_fini(kfd);
kfd_gtt_sa_fini(kfd);
kfd->kfd2kgd->free_gtt_mem(kfd->kgd, kfd->gtt_mem);
}
@@ -351,8 +352,6 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
void kgd2kfd_suspend(struct kfd_dev *kfd)
{
- BUG_ON(kfd == NULL);
-
if (kfd->init_complete) {
kfd->dqm->ops.stop(kfd->dqm);
amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
@@ -366,14 +365,15 @@ int kgd2kfd_resume(struct kfd_dev *kfd)
unsigned int pasid_limit;
int err;
- BUG_ON(kfd == NULL);
-
pasid_limit = kfd_get_pasid_limit();
if (kfd->init_complete) {
err = amd_iommu_init_device(kfd->pdev, pasid_limit);
- if (err < 0)
+ if (err < 0) {
+ dev_err(kfd_device, "failed to initialize iommu\n");
return -ENXIO;
+ }
+
amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
iommu_pasid_shutdown_callback);
amd_iommu_set_invalid_ppr_cb(kfd->pdev, iommu_invalid_ppr_cb);
@@ -402,26 +402,27 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
unsigned int chunk_size)
{
- unsigned int num_of_bits;
+ unsigned int num_of_longs;
- BUG_ON(!kfd);
- BUG_ON(!kfd->gtt_mem);
- BUG_ON(buf_size < chunk_size);
- BUG_ON(buf_size == 0);
- BUG_ON(chunk_size == 0);
+ if (WARN_ON(buf_size < chunk_size))
+ return -EINVAL;
+ if (WARN_ON(buf_size == 0))
+ return -EINVAL;
+ if (WARN_ON(chunk_size == 0))
+ return -EINVAL;
kfd->gtt_sa_chunk_size = chunk_size;
kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
- num_of_bits = kfd->gtt_sa_num_of_chunks / BITS_PER_BYTE;
- BUG_ON(num_of_bits == 0);
+ num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) /
+ BITS_PER_LONG;
- kfd->gtt_sa_bitmap = kzalloc(num_of_bits, GFP_KERNEL);
+ kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL);
if (!kfd->gtt_sa_bitmap)
return -ENOMEM;
- pr_debug("kfd: gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
+ pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
mutex_init(&kfd->gtt_sa_lock);
@@ -455,8 +456,6 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
{
unsigned int found, start_search, cur_size;
- BUG_ON(!kfd);
-
if (size == 0)
return -EINVAL;
@@ -467,7 +466,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
if ((*mem_obj) == NULL)
return -ENOMEM;
- pr_debug("kfd: allocated mem_obj = %p for size = %d\n", *mem_obj, size);
+ pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size);
start_search = 0;
@@ -479,7 +478,7 @@ kfd_gtt_restart_search:
kfd->gtt_sa_num_of_chunks,
start_search);
- pr_debug("kfd: found = %d\n", found);
+ pr_debug("Found = %d\n", found);
/* If there wasn't any free chunk, bail out */
if (found == kfd->gtt_sa_num_of_chunks)
@@ -497,12 +496,12 @@ kfd_gtt_restart_search:
found,
kfd->gtt_sa_chunk_size);
- pr_debug("kfd: gpu_addr = %p, cpu_addr = %p\n",
+ pr_debug("gpu_addr = %p, cpu_addr = %p\n",
(uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
/* If we need only one chunk, mark it as allocated and get out */
if (size <= kfd->gtt_sa_chunk_size) {
- pr_debug("kfd: single bit\n");
+ pr_debug("Single bit\n");
set_bit(found, kfd->gtt_sa_bitmap);
goto kfd_gtt_out;
}
@@ -537,7 +536,7 @@ kfd_gtt_restart_search:
} while (cur_size > 0);
- pr_debug("kfd: range_start = %d, range_end = %d\n",
+ pr_debug("range_start = %d, range_end = %d\n",
(*mem_obj)->range_start, (*mem_obj)->range_end);
/* Mark the chunks as allocated */
@@ -551,7 +550,7 @@ kfd_gtt_out:
return 0;
kfd_gtt_no_free_chunk:
- pr_debug("kfd: allocation failed with mem_obj = %p\n", mem_obj);
+ pr_debug("Allocation failed with mem_obj = %p\n", mem_obj);
mutex_unlock(&kfd->gtt_sa_lock);
kfree(mem_obj);
return -ENOMEM;
@@ -561,13 +560,11 @@ int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
{
unsigned int bit;
- BUG_ON(!kfd);
-
/* Act like kfree when trying to free a NULL object */
if (!mem_obj)
return 0;
- pr_debug("kfd: free mem_obj = %p, range_start = %d, range_end = %d\n",
+ pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n",
mem_obj, mem_obj->range_start, mem_obj->range_end);
mutex_lock(&kfd->gtt_sa_lock);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 602769ced3bd..53a66e821624 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -79,20 +79,17 @@ static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
unsigned int get_queues_num(struct device_queue_manager *dqm)
{
- BUG_ON(!dqm || !dqm->dev);
return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
KGD_MAX_QUEUES);
}
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
{
- BUG_ON(!dqm || !dqm->dev);
return dqm->dev->shared_resources.num_queue_per_pipe;
}
unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
{
- BUG_ON(!dqm || !dqm->dev);
return dqm->dev->shared_resources.num_pipe_per_mec;
}
@@ -121,7 +118,7 @@ static int allocate_vmid(struct device_queue_manager *dqm,
/* Kaveri kfd vmid's starts from vmid 8 */
allocated_vmid = bit + KFD_VMID_START_OFFSET;
- pr_debug("kfd: vmid allocation %d\n", allocated_vmid);
+ pr_debug("vmid allocation %d\n", allocated_vmid);
qpd->vmid = allocated_vmid;
q->properties.vmid = allocated_vmid;
@@ -152,42 +149,38 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
{
int retval;
- BUG_ON(!dqm || !q || !qpd || !allocated_vmid);
-
- pr_debug("kfd: In func %s\n", __func__);
print_queue(q);
mutex_lock(&dqm->lock);
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
- pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+ pr_warn("Can't create new usermode queue because %d queues were already created\n",
dqm->total_queue_count);
- mutex_unlock(&dqm->lock);
- return -EPERM;
+ retval = -EPERM;
+ goto out_unlock;
}
if (list_empty(&qpd->queues_list)) {
retval = allocate_vmid(dqm, qpd, q);
- if (retval != 0) {
- mutex_unlock(&dqm->lock);
- return retval;
- }
+ if (retval)
+ goto out_unlock;
}
*allocated_vmid = qpd->vmid;
q->properties.vmid = qpd->vmid;
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
retval = create_compute_queue_nocpsch(dqm, q, qpd);
- if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
retval = create_sdma_queue_nocpsch(dqm, q, qpd);
+ else
+ retval = -EINVAL;
- if (retval != 0) {
+ if (retval) {
if (list_empty(&qpd->queues_list)) {
deallocate_vmid(dqm, qpd, q);
*allocated_vmid = 0;
}
- mutex_unlock(&dqm->lock);
- return retval;
+ goto out_unlock;
}
list_add(&q->list, &qpd->queues_list);
@@ -205,8 +198,9 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
pr_debug("Total of %d queues are accountable so far\n",
dqm->total_queue_count);
+out_unlock:
mutex_unlock(&dqm->lock);
- return 0;
+ return retval;
}
static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
@@ -216,7 +210,8 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
set = false;
- for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_per_mec(dqm);
+ for (pipe = dqm->next_pipe_to_allocate, i = 0;
+ i < get_pipes_per_mec(dqm);
pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
if (!is_pipe_enabled(dqm, 0, pipe))
@@ -239,8 +234,7 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
if (!set)
return -EBUSY;
- pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n",
- __func__, q->pipe, q->queue);
+ pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
/* horizontal hqd allocation */
dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
@@ -260,36 +254,38 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
int retval;
struct mqd_manager *mqd;
- BUG_ON(!dqm || !q || !qpd);
-
mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
- if (mqd == NULL)
+ if (!mqd)
return -ENOMEM;
retval = allocate_hqd(dqm, q);
- if (retval != 0)
+ if (retval)
return retval;
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
- if (retval != 0) {
- deallocate_hqd(dqm, q);
- return retval;
- }
+ if (retval)
+ goto out_deallocate_hqd;
- pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n",
- q->pipe,
- q->queue);
+ pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
+ q->pipe, q->queue);
- retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
- q->queue, (uint32_t __user *) q->properties.write_ptr);
- if (retval != 0) {
- deallocate_hqd(dqm, q);
- mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
- return retval;
- }
+ dqm->dev->kfd2kgd->set_scratch_backing_va(
+ dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);
+
+ retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue, &q->properties,
+ q->process->mm);
+ if (retval)
+ goto out_uninit_mqd;
return 0;
+
+out_uninit_mqd:
+ mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+out_deallocate_hqd:
+ deallocate_hqd(dqm, q);
+
+ return retval;
}
static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
@@ -299,12 +295,8 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
int retval;
struct mqd_manager *mqd;
- BUG_ON(!dqm || !q || !q->mqd || !qpd);
-
retval = 0;
- pr_debug("kfd: In Func %s\n", __func__);
-
mutex_lock(&dqm->lock);
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
@@ -323,7 +315,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
dqm->sdma_queue_count--;
deallocate_sdma_queue(dqm, q->sdma_id);
} else {
- pr_debug("q->properties.type is invalid (%d)\n",
+ pr_debug("q->properties.type %d is invalid\n",
q->properties.type);
retval = -EINVAL;
goto out;
@@ -334,7 +326,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
q->pipe, q->queue);
- if (retval != 0)
+ if (retval)
goto out;
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
@@ -364,14 +356,12 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
struct mqd_manager *mqd;
bool prev_active = false;
- BUG_ON(!dqm || !q || !q->mqd);
-
mutex_lock(&dqm->lock);
mqd = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
- if (mqd == NULL) {
- mutex_unlock(&dqm->lock);
- return -ENOMEM;
+ if (!mqd) {
+ retval = -ENOMEM;
+ goto out_unlock;
}
if (q->properties.is_active)
@@ -385,12 +375,13 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
if ((q->properties.is_active) && (!prev_active))
dqm->queue_count++;
- else if ((!q->properties.is_active) && (prev_active))
+ else if (!q->properties.is_active && prev_active)
dqm->queue_count--;
if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
retval = execute_queues_cpsch(dqm, false);
+out_unlock:
mutex_unlock(&dqm->lock);
return retval;
}
@@ -400,15 +391,16 @@ static struct mqd_manager *get_mqd_manager_nocpsch(
{
struct mqd_manager *mqd;
- BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX);
+ if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+ return NULL;
- pr_debug("kfd: In func %s mqd type %d\n", __func__, type);
+ pr_debug("mqd type %d\n", type);
mqd = dqm->mqds[type];
if (!mqd) {
mqd = mqd_manager_init(type, dqm->dev);
- if (mqd == NULL)
- pr_err("kfd: mqd manager is NULL");
+ if (!mqd)
+ pr_err("mqd manager is NULL");
dqm->mqds[type] = mqd;
}
@@ -421,11 +413,7 @@ static int register_process_nocpsch(struct device_queue_manager *dqm,
struct device_process_node *n;
int retval;
- BUG_ON(!dqm || !qpd);
-
- pr_debug("kfd: In func %s\n", __func__);
-
- n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL);
+ n = kzalloc(sizeof(*n), GFP_KERNEL);
if (!n)
return -ENOMEM;
@@ -449,10 +437,6 @@ static int unregister_process_nocpsch(struct device_queue_manager *dqm,
int retval;
struct device_process_node *cur, *next;
- BUG_ON(!dqm || !qpd);
-
- pr_debug("In func %s\n", __func__);
-
pr_debug("qpd->queues_list is %s\n",
list_empty(&qpd->queues_list) ? "empty" : "not empty");
@@ -493,51 +477,39 @@ static void init_interrupts(struct device_queue_manager *dqm)
{
unsigned int i;
- BUG_ON(dqm == NULL);
-
for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
if (is_pipe_enabled(dqm, 0, i))
dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
}
-static int init_scheduler(struct device_queue_manager *dqm)
-{
- int retval = 0;
-
- BUG_ON(!dqm);
-
- pr_debug("kfd: In %s\n", __func__);
-
- return retval;
-}
-
static int initialize_nocpsch(struct device_queue_manager *dqm)
{
- int i;
+ int pipe, queue;
- BUG_ON(!dqm);
+ pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
- pr_debug("kfd: In func %s num of pipes: %d\n",
- __func__, get_pipes_per_mec(dqm));
+ dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
+ sizeof(unsigned int), GFP_KERNEL);
+ if (!dqm->allocated_queues)
+ return -ENOMEM;
mutex_init(&dqm->lock);
INIT_LIST_HEAD(&dqm->queues);
dqm->queue_count = dqm->next_pipe_to_allocate = 0;
dqm->sdma_queue_count = 0;
- dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
- sizeof(unsigned int), GFP_KERNEL);
- if (!dqm->allocated_queues) {
- mutex_destroy(&dqm->lock);
- return -ENOMEM;
- }
- for (i = 0; i < get_pipes_per_mec(dqm); i++)
- dqm->allocated_queues[i] = (1 << get_queues_per_pipe(dqm)) - 1;
+ for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
+ int pipe_offset = pipe * get_queues_per_pipe(dqm);
+
+ for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
+ if (test_bit(pipe_offset + queue,
+ dqm->dev->shared_resources.queue_bitmap))
+ dqm->allocated_queues[pipe] |= 1 << queue;
+ }
dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
- init_scheduler(dqm);
return 0;
}
@@ -545,9 +517,7 @@ static void uninitialize_nocpsch(struct device_queue_manager *dqm)
{
int i;
- BUG_ON(!dqm);
-
- BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
+ WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
kfree(dqm->allocated_queues);
for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
@@ -604,33 +574,34 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
return -ENOMEM;
retval = allocate_sdma_queue(dqm, &q->sdma_id);
- if (retval != 0)
+ if (retval)
return retval;
q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM;
- pr_debug("kfd: sdma id is: %d\n", q->sdma_id);
- pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id);
- pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id);
+ pr_debug("SDMA id is: %d\n", q->sdma_id);
+ pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
+ pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
- if (retval != 0) {
- deallocate_sdma_queue(dqm, q->sdma_id);
- return retval;
- }
+ if (retval)
+ goto out_deallocate_sdma_queue;
- retval = mqd->load_mqd(mqd, q->mqd, 0,
- 0, NULL);
- if (retval != 0) {
- deallocate_sdma_queue(dqm, q->sdma_id);
- mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
- return retval;
- }
+ retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL);
+ if (retval)
+ goto out_uninit_mqd;
return 0;
+
+out_uninit_mqd:
+ mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+out_deallocate_sdma_queue:
+ deallocate_sdma_queue(dqm, q->sdma_id);
+
+ return retval;
}
/*
@@ -642,10 +613,6 @@ static int set_sched_resources(struct device_queue_manager *dqm)
int i, mec;
struct scheduling_resources res;
- BUG_ON(!dqm);
-
- pr_debug("kfd: In func %s\n", __func__);
-
res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
res.vmid_mask <<= KFD_VMID_START_OFFSET;
@@ -663,8 +630,9 @@ static int set_sched_resources(struct device_queue_manager *dqm)
/* This situation may be hit in the future if a new HW
* generation exposes more than 64 queues. If so, the
- * definition of res.queue_mask needs updating */
- if (WARN_ON(i > (sizeof(res.queue_mask)*8))) {
+ * definition of res.queue_mask needs updating
+ */
+ if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
pr_err("Invalid queue enabled by amdgpu: %d\n", i);
break;
}
@@ -674,9 +642,9 @@ static int set_sched_resources(struct device_queue_manager *dqm)
res.gws_mask = res.oac_mask = res.gds_heap_base =
res.gds_heap_size = 0;
- pr_debug("kfd: scheduling resources:\n"
- " vmid mask: 0x%8X\n"
- " queue mask: 0x%8llX\n",
+ pr_debug("Scheduling resources:\n"
+ "vmid mask: 0x%8X\n"
+ "queue mask: 0x%8llX\n",
res.vmid_mask, res.queue_mask);
return pm_send_set_resources(&dqm->packets, &res);
@@ -686,10 +654,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
{
int retval;
- BUG_ON(!dqm);
-
- pr_debug("kfd: In func %s num of pipes: %d\n",
- __func__, get_pipes_per_mec(dqm));
+ pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
mutex_init(&dqm->lock);
INIT_LIST_HEAD(&dqm->queues);
@@ -697,13 +662,9 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
dqm->sdma_queue_count = 0;
dqm->active_runlist = false;
retval = dqm->ops_asic_specific.initialize(dqm);
- if (retval != 0)
- goto fail_init_pipelines;
-
- return 0;
+ if (retval)
+ mutex_destroy(&dqm->lock);
-fail_init_pipelines:
- mutex_destroy(&dqm->lock);
return retval;
}
@@ -712,25 +673,23 @@ static int start_cpsch(struct device_queue_manager *dqm)
struct device_process_node *node;
int retval;
- BUG_ON(!dqm);
-
retval = 0;
retval = pm_init(&dqm->packets, dqm);
- if (retval != 0)
+ if (retval)
goto fail_packet_manager_init;
retval = set_sched_resources(dqm);
- if (retval != 0)
+ if (retval)
goto fail_set_sched_resources;
- pr_debug("kfd: allocating fence memory\n");
+ pr_debug("Allocating fence memory\n");
/* allocate fence memory on the gart */
retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
&dqm->fence_mem);
- if (retval != 0)
+ if (retval)
goto fail_allocate_vidmem;
dqm->fence_addr = dqm->fence_mem->cpu_ptr;
@@ -758,8 +717,6 @@ static int stop_cpsch(struct device_queue_manager *dqm)
struct device_process_node *node;
struct kfd_process_device *pdd;
- BUG_ON(!dqm);
-
destroy_queues_cpsch(dqm, true, true);
list_for_each_entry(node, &dqm->queues, list) {
@@ -776,13 +733,9 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
struct kernel_queue *kq,
struct qcm_process_device *qpd)
{
- BUG_ON(!dqm || !kq || !qpd);
-
- pr_debug("kfd: In func %s\n", __func__);
-
mutex_lock(&dqm->lock);
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
- pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n",
+ pr_warn("Can't create new kernel queue because %d queues were already created\n",
dqm->total_queue_count);
mutex_unlock(&dqm->lock);
return -EPERM;
@@ -809,10 +762,6 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
struct kernel_queue *kq,
struct qcm_process_device *qpd)
{
- BUG_ON(!dqm || !kq);
-
- pr_debug("kfd: In %s\n", __func__);
-
mutex_lock(&dqm->lock);
/* here we actually preempt the DIQ */
destroy_queues_cpsch(dqm, true, false);
@@ -844,8 +793,6 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
int retval;
struct mqd_manager *mqd;
- BUG_ON(!dqm || !q || !qpd);
-
retval = 0;
if (allocate_vmid)
@@ -854,7 +801,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
mutex_lock(&dqm->lock);
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
- pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
+ pr_warn("Can't create new usermode queue because %d queues were already created\n",
dqm->total_queue_count);
retval = -EPERM;
goto out;
@@ -866,15 +813,15 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
mqd = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
- if (mqd == NULL) {
- mutex_unlock(&dqm->lock);
- return -ENOMEM;
+ if (!mqd) {
+ retval = -ENOMEM;
+ goto out;
}
dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
- if (retval != 0)
+ if (retval)
goto out;
list_add(&q->list, &qpd->queues_list);
@@ -884,7 +831,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
}
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
- dqm->sdma_queue_count++;
+ dqm->sdma_queue_count++;
/*
* Unconditionally increment this counter, regardless of the queue's
* type or whether the queue is active.
@@ -903,12 +850,11 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
unsigned int fence_value,
unsigned long timeout)
{
- BUG_ON(!fence_addr);
timeout += jiffies;
while (*fence_addr != fence_value) {
if (time_after(jiffies, timeout)) {
- pr_err("kfd: qcm fence wait loop timeout expired\n");
+ pr_err("qcm fence wait loop timeout expired\n");
return -ETIME;
}
schedule();
@@ -932,8 +878,6 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm,
enum kfd_preempt_type_filter preempt_type;
struct kfd_process_device *pdd;
- BUG_ON(!dqm);
-
retval = 0;
if (lock)
@@ -941,7 +885,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm,
if (!dqm->active_runlist)
goto out;
- pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n",
+ pr_debug("Before destroying queues, sdma queue count is : %u\n",
dqm->sdma_queue_count);
if (dqm->sdma_queue_count > 0) {
@@ -955,7 +899,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm,
retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
preempt_type, 0, false, 0);
- if (retval != 0)
+ if (retval)
goto out;
*dqm->fence_addr = KFD_FENCE_INIT;
@@ -964,7 +908,7 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm,
/* should be timed out */
retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
- if (retval != 0) {
+ if (retval) {
pdd = kfd_get_process_device_data(dqm->dev,
kfd_get_process(current));
pdd->reset_wavefronts = true;
@@ -983,14 +927,12 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
{
int retval;
- BUG_ON(!dqm);
-
if (lock)
mutex_lock(&dqm->lock);
retval = destroy_queues_cpsch(dqm, false, false);
- if (retval != 0) {
- pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
+ if (retval) {
+ pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption");
goto out;
}
@@ -1005,8 +947,8 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
}
retval = pm_send_runlist(&dqm->packets, &dqm->queues);
- if (retval != 0) {
- pr_err("kfd: failed to execute runlist");
+ if (retval) {
+ pr_err("failed to execute runlist");
goto out;
}
dqm->active_runlist = true;
@@ -1025,8 +967,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
struct mqd_manager *mqd;
bool preempt_all_queues;
- BUG_ON(!dqm || !qpd || !q);
-
preempt_all_queues = false;
retval = 0;
@@ -1098,8 +1038,6 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
{
bool retval;
- pr_debug("kfd: In func %s\n", __func__);
-
mutex_lock(&dqm->lock);
if (alternate_aperture_size == 0) {
@@ -1120,14 +1058,11 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
uint64_t base = (uintptr_t)alternate_aperture_base;
uint64_t limit = base + alternate_aperture_size - 1;
- if (limit <= base)
- goto out;
-
- if ((base & APE1_FIXED_BITS_MASK) != 0)
- goto out;
-
- if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT)
+ if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
+ (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
+ retval = false;
goto out;
+ }
qpd->sh_mem_ape1_base = base >> 16;
qpd->sh_mem_ape1_limit = limit >> 16;
@@ -1144,27 +1079,22 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
program_sh_mem_settings(dqm, qpd);
- pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
+ pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
qpd->sh_mem_config, qpd->sh_mem_ape1_base,
qpd->sh_mem_ape1_limit);
- mutex_unlock(&dqm->lock);
- return retval;
-
out:
mutex_unlock(&dqm->lock);
- return false;
+ return retval;
}
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
{
struct device_queue_manager *dqm;
- BUG_ON(!dev);
+ pr_debug("Loading device queue manager\n");
- pr_debug("kfd: loading device queue manager\n");
-
- dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL);
+ dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
if (!dqm)
return NULL;
@@ -1202,8 +1132,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
break;
default:
- BUG();
- break;
+ pr_err("Invalid scheduling policy %d\n", sched_policy);
+ goto out_free;
}
switch (dev->device_info->asic_family) {
@@ -1216,18 +1146,16 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
break;
}
- if (dqm->ops.initialize(dqm) != 0) {
- kfree(dqm);
- return NULL;
- }
+ if (!dqm->ops.initialize(dqm))
+ return dqm;
- return dqm;
+out_free:
+ kfree(dqm);
+ return NULL;
}
void device_queue_manager_uninit(struct device_queue_manager *dqm)
{
- BUG_ON(!dqm);
-
dqm->ops.uninitialize(dqm);
kfree(dqm);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index 48dc0561b402..72c3cbabc0a7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -24,6 +24,7 @@
#include "kfd_device_queue_manager.h"
#include "cik_regs.h"
#include "oss/oss_2_4_sh_mask.h"
+#include "gca/gfx_7_2_sh_mask.h"
static bool set_cache_memory_policy_cik(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
@@ -65,7 +66,7 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
* for LDS/Scratch and GPUVM.
*/
- BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
+ WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
top_address_nybble == 0);
return PRIVATE_BASE(top_address_nybble << 12) |
@@ -104,8 +105,6 @@ static int register_process_cik(struct device_queue_manager *dqm,
struct kfd_process_device *pdd;
unsigned int temp;
- BUG_ON(!dqm || !qpd);
-
pdd = qpd_to_pdd(qpd);
/* check if sh_mem_config register already configured */
@@ -125,9 +124,10 @@ static int register_process_cik(struct device_queue_manager *dqm,
} else {
temp = get_sh_mem_bases_nybble_64(pdd);
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
+ qpd->sh_mem_config |= 1 << SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
}
- pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
+ pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
index 7e9cae9d349b..40e9ddd096cd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c
@@ -67,7 +67,7 @@ static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble)
* for LDS/Scratch and GPUVM.
*/
- BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
+ WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE ||
top_address_nybble == 0);
return top_address_nybble << 12 |
@@ -110,8 +110,6 @@ static int register_process_vi(struct device_queue_manager *dqm,
struct kfd_process_device *pdd;
unsigned int temp;
- BUG_ON(!dqm || !qpd);
-
pdd = qpd_to_pdd(qpd);
/* check if sh_mem_config register already configured */
@@ -137,9 +135,11 @@ static int register_process_vi(struct device_queue_manager *dqm,
qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp);
qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 <<
SH_MEM_CONFIG__ADDRESS_MODE__SHIFT;
+ qpd->sh_mem_config |= 1 <<
+ SH_MEM_CONFIG__PRIVATE_ATC__SHIFT;
}
- pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
+ pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n",
qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases);
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index 453c5d66e5c3..acf4d2a977ad 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -59,7 +59,7 @@ static inline size_t doorbell_process_allocation(void)
}
/* Doorbell calculations for device init. */
-void kfd_doorbell_init(struct kfd_dev *kfd)
+int kfd_doorbell_init(struct kfd_dev *kfd)
{
size_t doorbell_start_offset;
size_t doorbell_aperture_size;
@@ -95,26 +95,35 @@ void kfd_doorbell_init(struct kfd_dev *kfd)
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
doorbell_process_allocation());
- BUG_ON(!kfd->doorbell_kernel_ptr);
+ if (!kfd->doorbell_kernel_ptr)
+ return -ENOMEM;
- pr_debug("kfd: doorbell initialization:\n");
- pr_debug("kfd: doorbell base == 0x%08lX\n",
+ pr_debug("Doorbell initialization:\n");
+ pr_debug("doorbell base == 0x%08lX\n",
(uintptr_t)kfd->doorbell_base);
- pr_debug("kfd: doorbell_id_offset == 0x%08lX\n",
+ pr_debug("doorbell_id_offset == 0x%08lX\n",
kfd->doorbell_id_offset);
- pr_debug("kfd: doorbell_process_limit == 0x%08lX\n",
+ pr_debug("doorbell_process_limit == 0x%08lX\n",
doorbell_process_limit);
- pr_debug("kfd: doorbell_kernel_offset == 0x%08lX\n",
+ pr_debug("doorbell_kernel_offset == 0x%08lX\n",
(uintptr_t)kfd->doorbell_base);
- pr_debug("kfd: doorbell aperture size == 0x%08lX\n",
+ pr_debug("doorbell aperture size == 0x%08lX\n",
kfd->shared_resources.doorbell_aperture_size);
- pr_debug("kfd: doorbell kernel address == 0x%08lX\n",
+ pr_debug("doorbell kernel address == 0x%08lX\n",
(uintptr_t)kfd->doorbell_kernel_ptr);
+
+ return 0;
+}
+
+void kfd_doorbell_fini(struct kfd_dev *kfd)
+{
+ if (kfd->doorbell_kernel_ptr)
+ iounmap(kfd->doorbell_kernel_ptr);
}
int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
@@ -131,7 +140,7 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
/* Find kfd device according to gpu id */
dev = kfd_device_by_id(vma->vm_pgoff);
- if (dev == NULL)
+ if (!dev)
return -EINVAL;
/* Calculate physical address of doorbell */
@@ -142,12 +151,11 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- pr_debug("kfd: mapping doorbell page in %s\n"
+ pr_debug("Mapping doorbell page\n"
" target user address == 0x%08llX\n"
" physical address == 0x%08llX\n"
" vm_flags == 0x%04lX\n"
" size == 0x%04lX\n",
- __func__,
(unsigned long long) vma->vm_start, address, vma->vm_flags,
doorbell_process_allocation());
@@ -166,8 +174,6 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
{
u32 inx;
- BUG_ON(!kfd || !doorbell_off);
-
mutex_lock(&kfd->doorbell_mutex);
inx = find_first_zero_bit(kfd->doorbell_available_index,
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
@@ -185,7 +191,7 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
*doorbell_off = KERNEL_DOORBELL_PASID * (doorbell_process_allocation() /
sizeof(u32)) + inx;
- pr_debug("kfd: get kernel queue doorbell\n"
+ pr_debug("Get kernel queue doorbell\n"
" doorbell offset == 0x%08X\n"
" kernel address == 0x%08lX\n",
*doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx));
@@ -197,8 +203,6 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
{
unsigned int inx;
- BUG_ON(!kfd || !db_addr);
-
inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr);
mutex_lock(&kfd->doorbell_mutex);
@@ -210,7 +214,7 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
{
if (db) {
writel(value, db);
- pr_debug("writing %d to doorbell address 0x%p\n", value, db);
+ pr_debug("Writing %d to doorbell address 0x%p\n", value, db);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index d1ce83d73a87..5979158c3f7b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -110,7 +110,7 @@ static bool allocate_free_slot(struct kfd_process *process,
*out_page = page;
*out_slot_index = slot;
- pr_debug("allocated event signal slot in page %p, slot %d\n",
+ pr_debug("Allocated event signal slot in page %p, slot %d\n",
page, slot);
return true;
@@ -155,9 +155,9 @@ static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p)
struct signal_page,
event_pages)->page_index + 1;
- pr_debug("allocated new event signal page at %p, for process %p\n",
+ pr_debug("Allocated new event signal page at %p, for process %p\n",
page, p);
- pr_debug("page index is %d\n", page->page_index);
+ pr_debug("Page index is %d\n", page->page_index);
list_add(&page->event_pages, &p->signal_event_pages);
@@ -194,7 +194,8 @@ static void release_event_notification_slot(struct signal_page *page,
page->free_slots++;
/* We don't free signal pages, they are retained by the process
- * and reused until it exits. */
+ * and reused until it exits.
+ */
}
static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p,
@@ -246,7 +247,7 @@ static u32 make_nonsignal_event_id(struct kfd_process *p)
for (id = p->next_nonsignal_event_id;
id < KFD_LAST_NONSIGNAL_EVENT_ID &&
- lookup_event_by_id(p, id) != NULL;
+ lookup_event_by_id(p, id);
id++)
;
@@ -265,7 +266,7 @@ static u32 make_nonsignal_event_id(struct kfd_process *p)
for (id = KFD_FIRST_NONSIGNAL_EVENT_ID;
id < KFD_LAST_NONSIGNAL_EVENT_ID &&
- lookup_event_by_id(p, id) != NULL;
+ lookup_event_by_id(p, id);
id++)
;
@@ -291,13 +292,13 @@ static int create_signal_event(struct file *devkfd,
struct kfd_event *ev)
{
if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) {
- pr_warn("amdkfd: Signal event wasn't created because limit was reached\n");
+ pr_warn("Signal event wasn't created because limit was reached\n");
return -ENOMEM;
}
if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page,
&ev->signal_slot_index)) {
- pr_warn("amdkfd: Signal event wasn't created because out of kernel memory\n");
+ pr_warn("Signal event wasn't created because out of kernel memory\n");
return -ENOMEM;
}
@@ -309,11 +310,7 @@ static int create_signal_event(struct file *devkfd,
ev->event_id = make_signal_event_id(ev->signal_page,
ev->signal_slot_index);
- pr_debug("signal event number %zu created with id %d, address %p\n",
- p->signal_event_count, ev->event_id,
- ev->user_signal_address);
-
- pr_debug("signal event number %zu created with id %d, address %p\n",
+ pr_debug("Signal event number %zu created with id %d, address %p\n",
p->signal_event_count, ev->event_id,
ev->user_signal_address);
@@ -345,7 +342,7 @@ void kfd_event_init_process(struct kfd_process *p)
static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
{
- if (ev->signal_page != NULL) {
+ if (ev->signal_page) {
release_event_notification_slot(ev->signal_page,
ev->signal_slot_index);
p->signal_event_count--;
@@ -584,7 +581,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
* search faster.
*/
struct signal_page *page;
- unsigned i;
+ unsigned int i;
list_for_each_entry(page, &p->signal_event_pages, event_pages)
for (i = 0; i < SLOTS_PER_PAGE; i++)
@@ -816,7 +813,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
/* check required size is logical */
if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) !=
get_order(vma->vm_end - vma->vm_start)) {
- pr_err("amdkfd: event page mmap requested illegal size\n");
+ pr_err("Event page mmap requested illegal size\n");
return -EINVAL;
}
@@ -825,7 +822,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
page = lookup_signal_page_by_index(p, page_index);
if (!page) {
/* Probably KFD bug, but mmap is user-accessible. */
- pr_debug("signal page could not be found for page_index %u\n",
+ pr_debug("Signal page could not be found for page_index %u\n",
page_index);
return -EINVAL;
}
@@ -836,7 +833,7 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE
| VM_DONTDUMP | VM_PFNMAP;
- pr_debug("mapping signal page\n");
+ pr_debug("Mapping signal page\n");
pr_debug(" start user address == 0x%08lx\n", vma->vm_start);
pr_debug(" end user address == 0x%08lx\n", vma->vm_end);
pr_debug(" pfn == 0x%016lX\n", pfn);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 2b655103ba79..c59384bbbc5f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -304,7 +304,7 @@ int kfd_init_apertures(struct kfd_process *process)
id < NUM_OF_SUPPORTED_GPUS) {
pdd = kfd_create_process_device_data(dev, process);
- if (pdd == NULL) {
+ if (!pdd) {
pr_err("Failed to create process device data\n");
return -1;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 7f134aa9bfd3..70b3a99cffc2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -179,7 +179,7 @@ static void interrupt_wq(struct work_struct *work)
bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry)
{
/* integer and bitwise OR so there is no boolean short-circuiting */
- unsigned wanted = 0;
+ unsigned int wanted = 0;
wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
ih_ring_entry);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index d135cd002a95..681b639f5133 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -41,11 +41,11 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
int retval;
union PM4_MES_TYPE_3_HEADER nop;
- BUG_ON(!kq || !dev);
- BUG_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ);
+ if (WARN_ON(type != KFD_QUEUE_TYPE_DIQ && type != KFD_QUEUE_TYPE_HIQ))
+ return false;
- pr_debug("amdkfd: In func %s initializing queue type %d size %d\n",
- __func__, KFD_QUEUE_TYPE_HIQ, queue_size);
+ pr_debug("Initializing queue type %d size %d\n", KFD_QUEUE_TYPE_HIQ,
+ queue_size);
memset(&prop, 0, sizeof(prop));
memset(&nop, 0, sizeof(nop));
@@ -63,23 +63,23 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
KFD_MQD_TYPE_HIQ);
break;
default:
- BUG();
- break;
+ pr_err("Invalid queue type %d\n", type);
+ return false;
}
- if (kq->mqd == NULL)
+ if (!kq->mqd)
return false;
prop.doorbell_ptr = kfd_get_kernel_doorbell(dev, &prop.doorbell_off);
- if (prop.doorbell_ptr == NULL) {
- pr_err("amdkfd: error init doorbell");
+ if (!prop.doorbell_ptr) {
+ pr_err("Failed to initialize doorbell");
goto err_get_kernel_doorbell;
}
retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
if (retval != 0) {
- pr_err("amdkfd: error init pq queues size (%d)\n", queue_size);
+ pr_err("Failed to init pq queues size %d\n", queue_size);
goto err_pq_allocate_vidmem;
}
@@ -87,7 +87,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->pq_gpu_addr = kq->pq->gpu_addr;
retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size);
- if (retval == false)
+ if (!retval)
goto err_eop_allocate_vidmem;
retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel),
@@ -139,11 +139,12 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
/* assign HIQ to HQD */
if (type == KFD_QUEUE_TYPE_HIQ) {
- pr_debug("assigning hiq to hqd\n");
+ pr_debug("Assigning hiq to hqd\n");
kq->queue->pipe = KFD_CIK_HIQ_PIPE;
kq->queue->queue = KFD_CIK_HIQ_QUEUE;
kq->mqd->load_mqd(kq->mqd, kq->queue->mqd, kq->queue->pipe,
- kq->queue->queue, NULL);
+ kq->queue->queue, &kq->queue->properties,
+ NULL);
} else {
/* allocate fence for DIQ */
@@ -180,8 +181,6 @@ err_get_kernel_doorbell:
static void uninitialize(struct kernel_queue *kq)
{
- BUG_ON(!kq);
-
if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
kq->mqd->destroy_mqd(kq->mqd,
NULL,
@@ -211,8 +210,6 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
uint32_t wptr, rptr;
unsigned int *queue_address;
- BUG_ON(!kq || !buffer_ptr);
-
rptr = *kq->rptr_kernel;
wptr = *kq->wptr_kernel;
queue_address = (unsigned int *)kq->pq_kernel_addr;
@@ -252,11 +249,7 @@ static void submit_packet(struct kernel_queue *kq)
{
#ifdef DEBUG
int i;
-#endif
-
- BUG_ON(!kq);
-#ifdef DEBUG
for (i = *kq->wptr_kernel; i < kq->pending_wptr; i++) {
pr_debug("0x%2X ", kq->pq_kernel_addr[i]);
if (i % 15 == 0)
@@ -272,7 +265,6 @@ static void submit_packet(struct kernel_queue *kq)
static void rollback_packet(struct kernel_queue *kq)
{
- BUG_ON(!kq);
kq->pending_wptr = *kq->queue->properties.write_ptr;
}
@@ -281,9 +273,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
{
struct kernel_queue *kq;
- BUG_ON(!dev);
-
- kq = kzalloc(sizeof(struct kernel_queue), GFP_KERNEL);
+ kq = kzalloc(sizeof(*kq), GFP_KERNEL);
if (!kq)
return NULL;
@@ -304,7 +294,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
}
if (!kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) {
- pr_err("amdkfd: failed to init kernel queue\n");
+ pr_err("Failed to init kernel queue\n");
kfree(kq);
return NULL;
}
@@ -313,32 +303,37 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
void kernel_queue_uninit(struct kernel_queue *kq)
{
- BUG_ON(!kq);
-
kq->ops.uninitialize(kq);
kfree(kq);
}
+/* FIXME: Can this test be removed? */
static __attribute__((unused)) void test_kq(struct kfd_dev *dev)
{
struct kernel_queue *kq;
uint32_t *buffer, i;
int retval;
- BUG_ON(!dev);
-
- pr_err("amdkfd: starting kernel queue test\n");
+ pr_err("Starting kernel queue test\n");
kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
- BUG_ON(!kq);
+ if (unlikely(!kq)) {
+ pr_err(" Failed to initialize HIQ\n");
+ pr_err("Kernel queue test failed\n");
+ return;
+ }
retval = kq->ops.acquire_packet_buffer(kq, 5, &buffer);
- BUG_ON(retval != 0);
+ if (unlikely(retval != 0)) {
+ pr_err(" Failed to acquire packet buffer\n");
+ pr_err("Kernel queue test failed\n");
+ return;
+ }
for (i = 0; i < 5; i++)
buffer[i] = kq->nop_packet;
kq->ops.submit_packet(kq);
- pr_err("amdkfd: ending kernel queue test\n");
+ pr_err("Ending kernel queue test\n");
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 850a5623661f..0d73bea22c45 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -61,7 +61,8 @@ MODULE_PARM_DESC(send_sigterm,
static int amdkfd_init_completed;
-int kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f)
+int kgd2kfd_init(unsigned int interface_version,
+ const struct kgd2kfd_calls **g2f)
{
if (!amdkfd_init_completed)
return -EPROBE_DEFER;
@@ -90,7 +91,7 @@ static int __init kfd_module_init(void)
/* Verify module parameters */
if ((sched_policy < KFD_SCHED_POLICY_HWS) ||
(sched_policy > KFD_SCHED_POLICY_NO_HWS)) {
- pr_err("kfd: sched_policy has invalid value\n");
+ pr_err("sched_policy has invalid value\n");
return -1;
}
@@ -98,13 +99,13 @@ static int __init kfd_module_init(void)
if ((max_num_of_queues_per_device < 1) ||
(max_num_of_queues_per_device >
KFD_MAX_NUM_OF_QUEUES_PER_DEVICE)) {
- pr_err("kfd: max_num_of_queues_per_device must be between 1 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n");
+ pr_err("max_num_of_queues_per_device must be between 1 to KFD_MAX_NUM_OF_QUEUES_PER_DEVICE\n");
return -1;
}
err = kfd_pasid_init();
if (err < 0)
- goto err_pasid;
+ return err;
err = kfd_chardev_init();
if (err < 0)
@@ -126,7 +127,6 @@ err_topology:
kfd_chardev_exit();
err_ioctl:
kfd_pasid_exit();
-err_pasid:
return err;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
index 213a71e0b6c7..1f3a6ba7eed2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
@@ -67,7 +67,8 @@ struct mqd_manager {
int (*load_mqd)(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t __user *wptr);
+ struct queue_properties *p,
+ struct mm_struct *mms);
int (*update_mqd)(struct mqd_manager *mm, void *mqd,
struct queue_properties *q);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 6acc4313363e..44ffd23348fc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -44,10 +44,6 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
struct cik_mqd *m;
int retval;
- BUG_ON(!mm || !q || !mqd);
-
- pr_debug("kfd: In func %s\n", __func__);
-
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
mqd_mem_obj);
@@ -101,7 +97,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_iq_rptr = AQL_ENABLE;
*mqd = m;
- if (gart_addr != NULL)
+ if (gart_addr)
*gart_addr = addr;
retval = mm->update_mqd(mm, m, q);
@@ -115,8 +111,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
int retval;
struct cik_sdma_rlc_registers *m;
- BUG_ON(!mm || !mqd || !mqd_mem_obj);
-
retval = kfd_gtt_sa_allocate(mm->dev,
sizeof(struct cik_sdma_rlc_registers),
mqd_mem_obj);
@@ -129,7 +123,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
memset(m, 0, sizeof(struct cik_sdma_rlc_registers));
*mqd = m;
- if (gart_addr != NULL)
+ if (gart_addr)
*gart_addr = (*mqd_mem_obj)->gpu_addr;
retval = mm->update_mqd(mm, m, q);
@@ -140,27 +134,31 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
static void uninit_mqd(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
- BUG_ON(!mm || !mqd);
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
}
static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
- BUG_ON(!mm || !mqd);
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
}
static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id,
- uint32_t queue_id, uint32_t __user *wptr)
+ uint32_t queue_id, struct queue_properties *p,
+ struct mm_struct *mms)
{
- return mm->dev->kfd2kgd->hqd_load
- (mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
+ /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+ uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1);
+
+ return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ (uint32_t __user *)p->write_ptr,
+ wptr_shift, wptr_mask, mms);
}
static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
- uint32_t pipe_id, uint32_t queue_id,
- uint32_t __user *wptr)
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
{
return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd);
}
@@ -170,10 +168,6 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
{
struct cik_mqd *m;
- BUG_ON(!mm || !q || !mqd);
-
- pr_debug("kfd: In func %s\n", __func__);
-
m = get_mqd(mqd);
m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
DEFAULT_MIN_AVAIL_SIZE | PQ_ATC_EN;
@@ -188,21 +182,17 @@ static int update_mqd(struct mqd_manager *mm, void *mqd,
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
- m->cp_hqd_pq_doorbell_control = DOORBELL_EN |
- DOORBELL_OFFSET(q->doorbell_off);
+ m->cp_hqd_pq_doorbell_control = DOORBELL_OFFSET(q->doorbell_off);
m->cp_hqd_vmid = q->vmid;
- if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ if (q->format == KFD_QUEUE_FORMAT_AQL)
m->cp_hqd_pq_control |= NO_UPDATE_RPTR;
- }
- m->cp_hqd_active = 0;
q->is_active = false;
if (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0) {
- m->cp_hqd_active = 1;
q->is_active = true;
}
@@ -214,8 +204,6 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
{
struct cik_sdma_rlc_registers *m;
- BUG_ON(!mm || !mqd || !q);
-
m = get_sdma_mqd(mqd);
m->sdma_rlc_rb_cntl = ffs(q->queue_size / sizeof(unsigned int)) <<
SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
@@ -254,7 +242,7 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
unsigned int timeout, uint32_t pipe_id,
uint32_t queue_id)
{
- return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, type, timeout,
+ return mm->dev->kfd2kgd->hqd_destroy(mm->dev->kgd, mqd, type, timeout,
pipe_id, queue_id);
}
@@ -301,10 +289,6 @@ static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
struct cik_mqd *m;
int retval;
- BUG_ON(!mm || !q || !mqd || !mqd_mem_obj);
-
- pr_debug("kfd: In func %s\n", __func__);
-
retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd),
mqd_mem_obj);
@@ -359,10 +343,6 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
{
struct cik_mqd *m;
- BUG_ON(!mm || !q || !mqd);
-
- pr_debug("kfd: In func %s\n", __func__);
-
m = get_mqd(mqd);
m->cp_hqd_pq_control = DEFAULT_RPTR_BLOCK_SIZE |
DEFAULT_MIN_AVAIL_SIZE |
@@ -400,8 +380,6 @@ struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
{
struct cik_sdma_rlc_registers *m;
- BUG_ON(!mqd);
-
m = (struct cik_sdma_rlc_registers *)mqd;
return m;
@@ -412,12 +390,10 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
{
struct mqd_manager *mqd;
- BUG_ON(!dev);
- BUG_ON(type >= KFD_MQD_TYPE_MAX);
-
- pr_debug("kfd: In func %s\n", __func__);
+ if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+ return NULL;
- mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
+ mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
if (!mqd)
return NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index a9b9882a9a77..73cbfe186dd2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -85,7 +85,7 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_hqd_iq_rptr = 1;
*mqd = m;
- if (gart_addr != NULL)
+ if (gart_addr)
*gart_addr = addr;
retval = mm->update_mqd(mm, m, q);
@@ -94,10 +94,15 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
static int load_mqd(struct mqd_manager *mm, void *mqd,
uint32_t pipe_id, uint32_t queue_id,
- uint32_t __user *wptr)
+ struct queue_properties *p, struct mm_struct *mms)
{
- return mm->dev->kfd2kgd->hqd_load
- (mm->dev->kgd, mqd, pipe_id, queue_id, wptr);
+ /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+ uint32_t wptr_mask = (uint32_t)((p->queue_size / sizeof(uint32_t)) - 1);
+
+ return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ (uint32_t __user *)p->write_ptr,
+ wptr_shift, wptr_mask, mms);
}
static int __update_mqd(struct mqd_manager *mm, void *mqd,
@@ -106,10 +111,6 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
{
struct vi_mqd *m;
- BUG_ON(!mm || !q || !mqd);
-
- pr_debug("kfd: In func %s\n", __func__);
-
m = get_mqd(mqd);
m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT |
@@ -117,7 +118,7 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
mtype << CP_HQD_PQ_CONTROL__MTYPE__SHIFT;
m->cp_hqd_pq_control |=
ffs(q->queue_size / sizeof(unsigned int)) - 1 - 1;
- pr_debug("kfd: cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+ pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
@@ -126,10 +127,9 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
m->cp_hqd_pq_doorbell_control =
- 1 << CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN__SHIFT |
q->doorbell_off <<
CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
- pr_debug("kfd: cp_hqd_pq_doorbell_control 0x%x\n",
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
m->cp_hqd_pq_doorbell_control);
m->cp_hqd_eop_control = atc_bit << CP_HQD_EOP_CONTROL__EOP_ATC__SHIFT |
@@ -139,8 +139,15 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
mtype << CP_HQD_IB_CONTROL__MTYPE__SHIFT;
- m->cp_hqd_eop_control |=
- ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1;
+ /*
+ * HW does not clamp this field correctly. Maximum EOP queue size
+ * is constrained by per-SE EOP done signal count, which is 8-bit.
+ * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
+ * more than (EOP entry count - 1) so a queue size of 0x800 dwords
+ * is safe, giving a maximum field value of 0xA.
+ */
+ m->cp_hqd_eop_control |= min(0xA,
+ ffs(q->eop_ring_buffer_size / sizeof(unsigned int)) - 1 - 1);
m->cp_hqd_eop_base_addr_lo =
lower_32_bits(q->eop_ring_buffer_address >> 8);
m->cp_hqd_eop_base_addr_hi =
@@ -156,12 +163,10 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT;
}
- m->cp_hqd_active = 0;
q->is_active = false;
if (q->queue_size > 0 &&
q->queue_address != 0 &&
q->queue_percent > 0) {
- m->cp_hqd_active = 1;
q->is_active = true;
}
@@ -181,14 +186,13 @@ static int destroy_mqd(struct mqd_manager *mm, void *mqd,
uint32_t queue_id)
{
return mm->dev->kfd2kgd->hqd_destroy
- (mm->dev->kgd, type, timeout,
+ (mm->dev->kgd, mqd, type, timeout,
pipe_id, queue_id);
}
static void uninit_mqd(struct mqd_manager *mm, void *mqd,
struct kfd_mem_obj *mqd_mem_obj)
{
- BUG_ON(!mm || !mqd);
kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
}
@@ -238,12 +242,10 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
{
struct mqd_manager *mqd;
- BUG_ON(!dev);
- BUG_ON(type >= KFD_MQD_TYPE_MAX);
-
- pr_debug("kfd: In func %s\n", __func__);
+ if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+ return NULL;
- mqd = kzalloc(sizeof(struct mqd_manager), GFP_KERNEL);
+ mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
if (!mqd)
return NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 7131998848d7..1d312603de9f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -26,7 +26,6 @@
#include "kfd_device_queue_manager.h"
#include "kfd_kernel_queue.h"
#include "kfd_priv.h"
-#include "kfd_pm4_headers.h"
#include "kfd_pm4_headers_vi.h"
#include "kfd_pm4_opcodes.h"
@@ -35,7 +34,8 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
{
unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t);
- BUG_ON((temp * sizeof(uint32_t)) > buffer_size_bytes);
+ WARN((temp * sizeof(uint32_t)) > buffer_size_bytes,
+ "Runlist IB overflow");
*wptr = temp;
}
@@ -43,12 +43,12 @@ static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size)
{
union PM4_MES_TYPE_3_HEADER header;
- header.u32all = 0;
+ header.u32All = 0;
header.opcode = opcode;
header.count = packet_size/sizeof(uint32_t) - 2;
header.type = PM4_TYPE_3;
- return header.u32all;
+ return header.u32All;
}
static void pm_calc_rlib_size(struct packet_manager *pm,
@@ -58,8 +58,6 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
unsigned int process_count, queue_count;
unsigned int map_queue_size;
- BUG_ON(!pm || !rlib_size || !over_subscription);
-
process_count = pm->dqm->processes_count;
queue_count = pm->dqm->queue_count;
@@ -67,15 +65,12 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
*over_subscription = false;
if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) {
*over_subscription = true;
- pr_debug("kfd: over subscribed runlist\n");
+ pr_debug("Over subscribed runlist\n");
}
- map_queue_size =
- (pm->dqm->dev->device_info->asic_family == CHIP_CARRIZO) ?
- sizeof(struct pm4_mes_map_queues) :
- sizeof(struct pm4_map_queues);
+ map_queue_size = sizeof(struct pm4_mes_map_queues);
/* calculate run list ib allocation size */
- *rlib_size = process_count * sizeof(struct pm4_map_process) +
+ *rlib_size = process_count * sizeof(struct pm4_mes_map_process) +
queue_count * map_queue_size;
/*
@@ -83,9 +78,9 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
* when over subscription
*/
if (*over_subscription)
- *rlib_size += sizeof(struct pm4_runlist);
+ *rlib_size += sizeof(struct pm4_mes_runlist);
- pr_debug("kfd: runlist ib size %d\n", *rlib_size);
+ pr_debug("runlist ib size %d\n", *rlib_size);
}
static int pm_allocate_runlist_ib(struct packet_manager *pm,
@@ -96,17 +91,16 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
{
int retval;
- BUG_ON(!pm);
- BUG_ON(pm->allocated);
- BUG_ON(is_over_subscription == NULL);
+ if (WARN_ON(pm->allocated))
+ return -EINVAL;
pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
&pm->ib_buffer_obj);
- if (retval != 0) {
- pr_err("kfd: failed to allocate runlist IB\n");
+ if (retval) {
+ pr_err("Failed to allocate runlist IB\n");
return retval;
}
@@ -121,15 +115,16 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
uint64_t ib, size_t ib_size_in_dwords, bool chain)
{
- struct pm4_runlist *packet;
+ struct pm4_mes_runlist *packet;
- BUG_ON(!pm || !buffer || !ib);
+ if (WARN_ON(!ib))
+ return -EFAULT;
- packet = (struct pm4_runlist *)buffer;
+ packet = (struct pm4_mes_runlist *)buffer;
- memset(buffer, 0, sizeof(struct pm4_runlist));
- packet->header.u32all = build_pm4_header(IT_RUN_LIST,
- sizeof(struct pm4_runlist));
+ memset(buffer, 0, sizeof(struct pm4_mes_runlist));
+ packet->header.u32All = build_pm4_header(IT_RUN_LIST,
+ sizeof(struct pm4_mes_runlist));
packet->bitfields4.ib_size = ib_size_in_dwords;
packet->bitfields4.chain = chain ? 1 : 0;
@@ -144,20 +139,16 @@ static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
struct qcm_process_device *qpd)
{
- struct pm4_map_process *packet;
+ struct pm4_mes_map_process *packet;
struct queue *cur;
uint32_t num_queues;
- BUG_ON(!pm || !buffer || !qpd);
-
- packet = (struct pm4_map_process *)buffer;
-
- pr_debug("kfd: In func %s\n", __func__);
+ packet = (struct pm4_mes_map_process *)buffer;
- memset(buffer, 0, sizeof(struct pm4_map_process));
+ memset(buffer, 0, sizeof(struct pm4_mes_map_process));
- packet->header.u32all = build_pm4_header(IT_MAP_PROCESS,
- sizeof(struct pm4_map_process));
+ packet->header.u32All = build_pm4_header(IT_MAP_PROCESS,
+ sizeof(struct pm4_mes_map_process));
packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
packet->bitfields2.process_quantum = 1;
packet->bitfields2.pasid = qpd->pqm->process->pasid;
@@ -175,27 +166,26 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
+ /* TODO: scratch support */
+ packet->sh_hidden_private_base_vmid = 0;
+
packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
return 0;
}
-static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer,
+static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
struct queue *q, bool is_static)
{
struct pm4_mes_map_queues *packet;
bool use_static = is_static;
- BUG_ON(!pm || !buffer || !q);
-
- pr_debug("kfd: In func %s\n", __func__);
-
packet = (struct pm4_mes_map_queues *)buffer;
- memset(buffer, 0, sizeof(struct pm4_map_queues));
+ memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
- packet->header.u32all = build_pm4_header(IT_MAP_QUEUES,
- sizeof(struct pm4_map_queues));
+ packet->header.u32All = build_pm4_header(IT_MAP_QUEUES,
+ sizeof(struct pm4_mes_map_queues));
packet->bitfields2.alloc_format =
alloc_format__mes_map_queues__one_per_pipe_vi;
packet->bitfields2.num_queues = 1;
@@ -223,10 +213,8 @@ static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer,
use_static = false; /* no static queues under SDMA */
break;
default:
- pr_err("kfd: in %s queue type %d\n", __func__,
- q->properties.type);
- BUG();
- break;
+ WARN(1, "queue type %d", q->properties.type);
+ return -EINVAL;
}
packet->bitfields3.doorbell_offset =
q->properties.doorbell_off;
@@ -246,68 +234,6 @@ static int pm_create_map_queue_vi(struct packet_manager *pm, uint32_t *buffer,
return 0;
}
-static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
- struct queue *q, bool is_static)
-{
- struct pm4_map_queues *packet;
- bool use_static = is_static;
-
- BUG_ON(!pm || !buffer || !q);
-
- pr_debug("kfd: In func %s\n", __func__);
-
- packet = (struct pm4_map_queues *)buffer;
- memset(buffer, 0, sizeof(struct pm4_map_queues));
-
- packet->header.u32all = build_pm4_header(IT_MAP_QUEUES,
- sizeof(struct pm4_map_queues));
- packet->bitfields2.alloc_format =
- alloc_format__mes_map_queues__one_per_pipe;
- packet->bitfields2.num_queues = 1;
- packet->bitfields2.queue_sel =
- queue_sel__mes_map_queues__map_to_hws_determined_queue_slots;
-
- packet->bitfields2.vidmem = (q->properties.is_interop) ?
- vidmem__mes_map_queues__uses_video_memory :
- vidmem__mes_map_queues__uses_no_video_memory;
-
- switch (q->properties.type) {
- case KFD_QUEUE_TYPE_COMPUTE:
- case KFD_QUEUE_TYPE_DIQ:
- packet->bitfields2.engine_sel =
- engine_sel__mes_map_queues__compute;
- break;
- case KFD_QUEUE_TYPE_SDMA:
- packet->bitfields2.engine_sel =
- engine_sel__mes_map_queues__sdma0;
- use_static = false; /* no static queues under SDMA */
- break;
- default:
- BUG();
- break;
- }
-
- packet->mes_map_queues_ordinals[0].bitfields3.doorbell_offset =
- q->properties.doorbell_off;
-
- packet->mes_map_queues_ordinals[0].bitfields3.is_static =
- (use_static) ? 1 : 0;
-
- packet->mes_map_queues_ordinals[0].mqd_addr_lo =
- lower_32_bits(q->gart_mqd_addr);
-
- packet->mes_map_queues_ordinals[0].mqd_addr_hi =
- upper_32_bits(q->gart_mqd_addr);
-
- packet->mes_map_queues_ordinals[0].wptr_addr_lo =
- lower_32_bits((uint64_t)q->properties.write_ptr);
-
- packet->mes_map_queues_ordinals[0].wptr_addr_hi =
- upper_32_bits((uint64_t)q->properties.write_ptr);
-
- return 0;
-}
-
static int pm_create_runlist_ib(struct packet_manager *pm,
struct list_head *queues,
uint64_t *rl_gpu_addr,
@@ -322,19 +248,16 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
struct kernel_queue *kq;
bool is_over_subscription;
- BUG_ON(!pm || !queues || !rl_size_bytes || !rl_gpu_addr);
-
rl_wptr = retval = proccesses_mapped = 0;
retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr,
&alloc_size_bytes, &is_over_subscription);
- if (retval != 0)
+ if (retval)
return retval;
*rl_size_bytes = alloc_size_bytes;
- pr_debug("kfd: In func %s\n", __func__);
- pr_debug("kfd: building runlist ib process count: %d queues count %d\n",
+ pr_debug("Building runlist ib process count: %d queues count %d\n",
pm->dqm->processes_count, pm->dqm->queue_count);
/* build the run list ib packet */
@@ -342,42 +265,35 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
qpd = cur->qpd;
/* build map process packet */
if (proccesses_mapped >= pm->dqm->processes_count) {
- pr_debug("kfd: not enough space left in runlist IB\n");
+ pr_debug("Not enough space left in runlist IB\n");
pm_release_ib(pm);
return -ENOMEM;
}
retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
- if (retval != 0)
+ if (retval)
return retval;
proccesses_mapped++;
- inc_wptr(&rl_wptr, sizeof(struct pm4_map_process),
+ inc_wptr(&rl_wptr, sizeof(struct pm4_mes_map_process),
alloc_size_bytes);
list_for_each_entry(kq, &qpd->priv_queue_list, list) {
if (!kq->queue->properties.is_active)
continue;
- pr_debug("kfd: static_queue, mapping kernel q %d, is debug status %d\n",
+ pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
kq->queue->queue, qpd->is_debug);
- if (pm->dqm->dev->device_info->asic_family ==
- CHIP_CARRIZO)
- retval = pm_create_map_queue_vi(pm,
- &rl_buffer[rl_wptr],
- kq->queue,
- qpd->is_debug);
- else
- retval = pm_create_map_queue(pm,
+ retval = pm_create_map_queue(pm,
&rl_buffer[rl_wptr],
kq->queue,
qpd->is_debug);
- if (retval != 0)
+ if (retval)
return retval;
inc_wptr(&rl_wptr,
- sizeof(struct pm4_map_queues),
+ sizeof(struct pm4_mes_map_queues),
alloc_size_bytes);
}
@@ -385,51 +301,44 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
if (!q->properties.is_active)
continue;
- pr_debug("kfd: static_queue, mapping user queue %d, is debug status %d\n",
+ pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
q->queue, qpd->is_debug);
- if (pm->dqm->dev->device_info->asic_family ==
- CHIP_CARRIZO)
- retval = pm_create_map_queue_vi(pm,
- &rl_buffer[rl_wptr],
- q,
- qpd->is_debug);
- else
- retval = pm_create_map_queue(pm,
+ retval = pm_create_map_queue(pm,
&rl_buffer[rl_wptr],
q,
qpd->is_debug);
- if (retval != 0)
+ if (retval)
return retval;
inc_wptr(&rl_wptr,
- sizeof(struct pm4_map_queues),
+ sizeof(struct pm4_mes_map_queues),
alloc_size_bytes);
}
}
- pr_debug("kfd: finished map process and queues to runlist\n");
+ pr_debug("Finished map process and queues to runlist\n");
if (is_over_subscription)
- pm_create_runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr,
- alloc_size_bytes / sizeof(uint32_t), true);
+ retval = pm_create_runlist(pm, &rl_buffer[rl_wptr],
+ *rl_gpu_addr,
+ alloc_size_bytes / sizeof(uint32_t),
+ true);
for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++)
pr_debug("0x%2X ", rl_buffer[i]);
pr_debug("\n");
- return 0;
+ return retval;
}
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
{
- BUG_ON(!dqm);
-
pm->dqm = dqm;
mutex_init(&pm->lock);
pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);
- if (pm->priv_queue == NULL) {
+ if (!pm->priv_queue) {
mutex_destroy(&pm->lock);
return -ENOMEM;
}
@@ -440,8 +349,6 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
void pm_uninit(struct packet_manager *pm)
{
- BUG_ON(!pm);
-
mutex_destroy(&pm->lock);
kernel_queue_uninit(pm->priv_queue);
}
@@ -449,25 +356,22 @@ void pm_uninit(struct packet_manager *pm)
int pm_send_set_resources(struct packet_manager *pm,
struct scheduling_resources *res)
{
- struct pm4_set_resources *packet;
-
- BUG_ON(!pm || !res);
-
- pr_debug("kfd: In func %s\n", __func__);
+ struct pm4_mes_set_resources *packet;
+ int retval = 0;
mutex_lock(&pm->lock);
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
sizeof(*packet) / sizeof(uint32_t),
- (unsigned int **)&packet);
- if (packet == NULL) {
- mutex_unlock(&pm->lock);
- pr_err("kfd: failed to allocate buffer on kernel queue\n");
- return -ENOMEM;
+ (unsigned int **)&packet);
+ if (!packet) {
+ pr_err("Failed to allocate buffer on kernel queue\n");
+ retval = -ENOMEM;
+ goto out;
}
- memset(packet, 0, sizeof(struct pm4_set_resources));
- packet->header.u32all = build_pm4_header(IT_SET_RESOURCES,
- sizeof(struct pm4_set_resources));
+ memset(packet, 0, sizeof(struct pm4_mes_set_resources));
+ packet->header.u32All = build_pm4_header(IT_SET_RESOURCES,
+ sizeof(struct pm4_mes_set_resources));
packet->bitfields2.queue_type =
queue_type__mes_set_resources__hsa_interface_queue_hiq;
@@ -485,9 +389,10 @@ int pm_send_set_resources(struct packet_manager *pm,
pm->priv_queue->ops.submit_packet(pm->priv_queue);
+out:
mutex_unlock(&pm->lock);
- return 0;
+ return retval;
}
int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
@@ -497,26 +402,24 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
size_t rl_ib_size, packet_size_dwords;
int retval;
- BUG_ON(!pm || !dqm_queues);
-
retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr,
&rl_ib_size);
- if (retval != 0)
+ if (retval)
goto fail_create_runlist_ib;
- pr_debug("kfd: runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
+ pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
- packet_size_dwords = sizeof(struct pm4_runlist) / sizeof(uint32_t);
+ packet_size_dwords = sizeof(struct pm4_mes_runlist) / sizeof(uint32_t);
mutex_lock(&pm->lock);
retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
packet_size_dwords, &rl_buffer);
- if (retval != 0)
+ if (retval)
goto fail_acquire_packet_buffer;
retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr,
rl_ib_size / sizeof(uint32_t), false);
- if (retval != 0)
+ if (retval)
goto fail_create_runlist;
pm->priv_queue->ops.submit_packet(pm->priv_queue);
@@ -530,8 +433,7 @@ fail_create_runlist:
fail_acquire_packet_buffer:
mutex_unlock(&pm->lock);
fail_create_runlist_ib:
- if (pm->allocated)
- pm_release_ib(pm);
+ pm_release_ib(pm);
return retval;
}
@@ -539,20 +441,21 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
uint32_t fence_value)
{
int retval;
- struct pm4_query_status *packet;
+ struct pm4_mes_query_status *packet;
- BUG_ON(!pm || !fence_address);
+ if (WARN_ON(!fence_address))
+ return -EFAULT;
mutex_lock(&pm->lock);
retval = pm->priv_queue->ops.acquire_packet_buffer(
pm->priv_queue,
- sizeof(struct pm4_query_status) / sizeof(uint32_t),
+ sizeof(struct pm4_mes_query_status) / sizeof(uint32_t),
(unsigned int **)&packet);
- if (retval != 0)
+ if (retval)
goto fail_acquire_packet_buffer;
- packet->header.u32all = build_pm4_header(IT_QUERY_STATUS,
- sizeof(struct pm4_query_status));
+ packet->header.u32All = build_pm4_header(IT_QUERY_STATUS,
+ sizeof(struct pm4_mes_query_status));
packet->bitfields2.context_id = 0;
packet->bitfields2.interrupt_sel =
@@ -566,9 +469,6 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
packet->data_lo = lower_32_bits((uint64_t)fence_value);
pm->priv_queue->ops.submit_packet(pm->priv_queue);
- mutex_unlock(&pm->lock);
-
- return 0;
fail_acquire_packet_buffer:
mutex_unlock(&pm->lock);
@@ -582,24 +482,22 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
{
int retval;
uint32_t *buffer;
- struct pm4_unmap_queues *packet;
-
- BUG_ON(!pm);
+ struct pm4_mes_unmap_queues *packet;
mutex_lock(&pm->lock);
retval = pm->priv_queue->ops.acquire_packet_buffer(
pm->priv_queue,
- sizeof(struct pm4_unmap_queues) / sizeof(uint32_t),
+ sizeof(struct pm4_mes_unmap_queues) / sizeof(uint32_t),
&buffer);
- if (retval != 0)
+ if (retval)
goto err_acquire_packet_buffer;
- packet = (struct pm4_unmap_queues *)buffer;
- memset(buffer, 0, sizeof(struct pm4_unmap_queues));
- pr_debug("kfd: static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n",
+ packet = (struct pm4_mes_unmap_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
+ pr_debug("static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n",
mode, reset, type);
- packet->header.u32all = build_pm4_header(IT_UNMAP_QUEUES,
- sizeof(struct pm4_unmap_queues));
+ packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES,
+ sizeof(struct pm4_mes_unmap_queues));
switch (type) {
case KFD_QUEUE_TYPE_COMPUTE:
case KFD_QUEUE_TYPE_DIQ:
@@ -611,8 +509,9 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
break;
default:
- BUG();
- break;
+ WARN(1, "queue type %d", type);
+ retval = -EINVAL;
+ goto err_invalid;
}
if (reset)
@@ -636,16 +535,17 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
break;
case KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES:
packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__perform_request_on_all_active_queues;
+ queue_sel__mes_unmap_queues__unmap_all_queues;
break;
case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES:
/* in this case, we do not preempt static queues */
packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only;
+ queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
break;
default:
- BUG();
- break;
+ WARN(1, "filter %d", mode);
+ retval = -EINVAL;
+ goto err_invalid;
}
pm->priv_queue->ops.submit_packet(pm->priv_queue);
@@ -653,6 +553,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
mutex_unlock(&pm->lock);
return 0;
+err_invalid:
+ pm->priv_queue->ops.rollback_packet(pm->priv_queue);
err_acquire_packet_buffer:
mutex_unlock(&pm->lock);
return retval;
@@ -660,8 +562,6 @@ err_acquire_packet_buffer:
void pm_release_ib(struct packet_manager *pm)
{
- BUG_ON(!pm);
-
mutex_lock(&pm->lock);
if (pm->allocated) {
kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
index 6cfe7f1f18cf..1e06de0bc673 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pasid.c
@@ -32,7 +32,8 @@ int kfd_pasid_init(void)
{
pasid_limit = KFD_MAX_NUM_OF_PROCESSES;
- pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long), GFP_KERNEL);
+ pasid_bitmap = kcalloc(BITS_TO_LONGS(pasid_limit), sizeof(long),
+ GFP_KERNEL);
if (!pasid_bitmap)
return -ENOMEM;
@@ -91,6 +92,6 @@ unsigned int kfd_pasid_alloc(void)
void kfd_pasid_free(unsigned int pasid)
{
- BUG_ON(pasid == 0 || pasid >= pasid_limit);
- clear_bit(pasid, pasid_bitmap);
+ if (!WARN_ON(pasid == 0 || pasid >= pasid_limit))
+ clear_bit(pasid, pasid_bitmap);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
index 5b393f3e34a9..e50f73d25de6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers.h
@@ -28,112 +28,19 @@
#define PM4_MES_HEADER_DEFINED
union PM4_MES_TYPE_3_HEADER {
struct {
- uint32_t reserved1:8; /* < reserved */
- uint32_t opcode:8; /* < IT opcode */
- uint32_t count:14; /* < number of DWORDs - 1
- * in the information body.
- */
- uint32_t type:2; /* < packet identifier.
- * It should be 3 for type 3 packets
- */
+ /* reserved */
+ uint32_t reserved1:8;
+ /* IT opcode */
+ uint32_t opcode:8;
+ /* number of DWORDs - 1 in the information body */
+ uint32_t count:14;
+ /* packet identifier. It should be 3 for type 3 packets */
+ uint32_t type:2;
};
uint32_t u32all;
};
#endif /* PM4_MES_HEADER_DEFINED */
-/* --------------------MES_SET_RESOURCES-------------------- */
-
-#ifndef PM4_MES_SET_RESOURCES_DEFINED
-#define PM4_MES_SET_RESOURCES_DEFINED
-enum set_resources_queue_type_enum {
- queue_type__mes_set_resources__kernel_interface_queue_kiq = 0,
- queue_type__mes_set_resources__hsa_interface_queue_hiq = 1,
- queue_type__mes_set_resources__hsa_debug_interface_queue = 4
-};
-
-struct pm4_set_resources {
- union {
- union PM4_MES_TYPE_3_HEADER header; /* header */
- uint32_t ordinal1;
- };
-
- union {
- struct {
- uint32_t vmid_mask:16;
- uint32_t unmap_latency:8;
- uint32_t reserved1:5;
- enum set_resources_queue_type_enum queue_type:3;
- } bitfields2;
- uint32_t ordinal2;
- };
-
- uint32_t queue_mask_lo;
- uint32_t queue_mask_hi;
- uint32_t gws_mask_lo;
- uint32_t gws_mask_hi;
-
- union {
- struct {
- uint32_t oac_mask:16;
- uint32_t reserved2:16;
- } bitfields7;
- uint32_t ordinal7;
- };
-
- union {
- struct {
- uint32_t gds_heap_base:6;
- uint32_t reserved3:5;
- uint32_t gds_heap_size:6;
- uint32_t reserved4:15;
- } bitfields8;
- uint32_t ordinal8;
- };
-
-};
-#endif
-
-/*--------------------MES_RUN_LIST-------------------- */
-
-#ifndef PM4_MES_RUN_LIST_DEFINED
-#define PM4_MES_RUN_LIST_DEFINED
-
-struct pm4_runlist {
- union {
- union PM4_MES_TYPE_3_HEADER header; /* header */
- uint32_t ordinal1;
- };
-
- union {
- struct {
- uint32_t reserved1:2;
- uint32_t ib_base_lo:30;
- } bitfields2;
- uint32_t ordinal2;
- };
-
- union {
- struct {
- uint32_t ib_base_hi:16;
- uint32_t reserved2:16;
- } bitfields3;
- uint32_t ordinal3;
- };
-
- union {
- struct {
- uint32_t ib_size:20;
- uint32_t chain:1;
- uint32_t offload_polling:1;
- uint32_t reserved3:1;
- uint32_t valid:1;
- uint32_t reserved4:8;
- } bitfields4;
- uint32_t ordinal4;
- };
-
-};
-#endif
/*--------------------MES_MAP_PROCESS-------------------- */
@@ -186,217 +93,58 @@ struct pm4_map_process {
};
#endif
-/*--------------------MES_MAP_QUEUES--------------------*/
-
-#ifndef PM4_MES_MAP_QUEUES_DEFINED
-#define PM4_MES_MAP_QUEUES_DEFINED
-enum map_queues_queue_sel_enum {
- queue_sel__mes_map_queues__map_to_specified_queue_slots = 0,
- queue_sel__mes_map_queues__map_to_hws_determined_queue_slots = 1,
- queue_sel__mes_map_queues__enable_process_queues = 2
-};
+#ifndef PM4_MES_MAP_PROCESS_DEFINED_KV_SCRATCH
+#define PM4_MES_MAP_PROCESS_DEFINED_KV_SCRATCH
-enum map_queues_vidmem_enum {
- vidmem__mes_map_queues__uses_no_video_memory = 0,
- vidmem__mes_map_queues__uses_video_memory = 1
-};
-
-enum map_queues_alloc_format_enum {
- alloc_format__mes_map_queues__one_per_pipe = 0,
- alloc_format__mes_map_queues__all_on_one_pipe = 1
-};
-
-enum map_queues_engine_sel_enum {
- engine_sel__mes_map_queues__compute = 0,
- engine_sel__mes_map_queues__sdma0 = 2,
- engine_sel__mes_map_queues__sdma1 = 3
-};
-
-struct pm4_map_queues {
+struct pm4_map_process_scratch_kv {
union {
- union PM4_MES_TYPE_3_HEADER header; /* header */
- uint32_t ordinal1;
- };
-
- union {
- struct {
- uint32_t reserved1:4;
- enum map_queues_queue_sel_enum queue_sel:2;
- uint32_t reserved2:2;
- uint32_t vmid:4;
- uint32_t reserved3:4;
- enum map_queues_vidmem_enum vidmem:2;
- uint32_t reserved4:6;
- enum map_queues_alloc_format_enum alloc_format:2;
- enum map_queues_engine_sel_enum engine_sel:3;
- uint32_t num_queues:3;
- } bitfields2;
- uint32_t ordinal2;
- };
-
- struct {
- union {
- struct {
- uint32_t is_static:1;
- uint32_t reserved5:1;
- uint32_t doorbell_offset:21;
- uint32_t reserved6:3;
- uint32_t queue:6;
- } bitfields3;
- uint32_t ordinal3;
- };
-
- uint32_t mqd_addr_lo;
- uint32_t mqd_addr_hi;
- uint32_t wptr_addr_lo;
- uint32_t wptr_addr_hi;
-
- } mes_map_queues_ordinals[1]; /* 1..N of these ordinal groups */
-
-};
-#endif
-
-/*--------------------MES_QUERY_STATUS--------------------*/
-
-#ifndef PM4_MES_QUERY_STATUS_DEFINED
-#define PM4_MES_QUERY_STATUS_DEFINED
-enum query_status_interrupt_sel_enum {
- interrupt_sel__mes_query_status__completion_status = 0,
- interrupt_sel__mes_query_status__process_status = 1,
- interrupt_sel__mes_query_status__queue_status = 2
-};
-
-enum query_status_command_enum {
- command__mes_query_status__interrupt_only = 0,
- command__mes_query_status__fence_only_immediate = 1,
- command__mes_query_status__fence_only_after_write_ack = 2,
- command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3
-};
-
-enum query_status_engine_sel_enum {
- engine_sel__mes_query_status__compute = 0,
- engine_sel__mes_query_status__sdma0_queue = 2,
- engine_sel__mes_query_status__sdma1_queue = 3
-};
-
-struct pm4_query_status {
- union {
- union PM4_MES_TYPE_3_HEADER header; /* header */
- uint32_t ordinal1;
- };
-
- union {
- struct {
- uint32_t context_id:28;
- enum query_status_interrupt_sel_enum interrupt_sel:2;
- enum query_status_command_enum command:2;
- } bitfields2;
- uint32_t ordinal2;
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
};
union {
struct {
uint32_t pasid:16;
- uint32_t reserved1:16;
- } bitfields3a;
- struct {
- uint32_t reserved2:2;
- uint32_t doorbell_offset:21;
- uint32_t reserved3:3;
- enum query_status_engine_sel_enum engine_sel:3;
- uint32_t reserved4:3;
- } bitfields3b;
- uint32_t ordinal3;
- };
-
- uint32_t addr_lo;
- uint32_t addr_hi;
- uint32_t data_lo;
- uint32_t data_hi;
-};
-#endif
-
-/*--------------------MES_UNMAP_QUEUES--------------------*/
-
-#ifndef PM4_MES_UNMAP_QUEUES_DEFINED
-#define PM4_MES_UNMAP_QUEUES_DEFINED
-enum unmap_queues_action_enum {
- action__mes_unmap_queues__preempt_queues = 0,
- action__mes_unmap_queues__reset_queues = 1,
- action__mes_unmap_queues__disable_process_queues = 2
-};
-
-enum unmap_queues_queue_sel_enum {
- queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
- queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
- queue_sel__mes_unmap_queues__perform_request_on_all_active_queues = 2,
- queue_sel__mes_unmap_queues__perform_request_on_dynamic_queues_only = 3
-};
-
-enum unmap_queues_engine_sel_enum {
- engine_sel__mes_unmap_queues__compute = 0,
- engine_sel__mes_unmap_queues__sdma0 = 2,
- engine_sel__mes_unmap_queues__sdma1 = 3
-};
-
-struct pm4_unmap_queues {
- union {
- union PM4_MES_TYPE_3_HEADER header; /* header */
- uint32_t ordinal1;
- };
-
- union {
- struct {
- enum unmap_queues_action_enum action:2;
- uint32_t reserved1:2;
- enum unmap_queues_queue_sel_enum queue_sel:2;
- uint32_t reserved2:20;
- enum unmap_queues_engine_sel_enum engine_sel:3;
- uint32_t num_queues:3;
+ uint32_t reserved1:8;
+ uint32_t diq_enable:1;
+ uint32_t process_quantum:7;
} bitfields2;
uint32_t ordinal2;
};
union {
struct {
- uint32_t pasid:16;
- uint32_t reserved3:16;
- } bitfields3a;
- struct {
- uint32_t reserved4:2;
- uint32_t doorbell_offset0:21;
- uint32_t reserved5:9;
- } bitfields3b;
+ uint32_t page_table_base:28;
+ uint32_t reserved2:4;
+ } bitfields3;
uint32_t ordinal3;
};
- union {
- struct {
- uint32_t reserved6:2;
- uint32_t doorbell_offset1:21;
- uint32_t reserved7:9;
- } bitfields4;
- uint32_t ordinal4;
- };
-
- union {
- struct {
- uint32_t reserved8:2;
- uint32_t doorbell_offset2:21;
- uint32_t reserved9:9;
- } bitfields5;
- uint32_t ordinal5;
- };
+ uint32_t reserved3;
+ uint32_t sh_mem_bases;
+ uint32_t sh_mem_config;
+ uint32_t sh_mem_ape1_base;
+ uint32_t sh_mem_ape1_limit;
+ uint32_t sh_hidden_private_base_vmid;
+ uint32_t reserved4;
+ uint32_t reserved5;
+ uint32_t gds_addr_lo;
+ uint32_t gds_addr_hi;
union {
struct {
- uint32_t reserved10:2;
- uint32_t doorbell_offset3:21;
- uint32_t reserved11:9;
- } bitfields6;
- uint32_t ordinal6;
+ uint32_t num_gws:6;
+ uint32_t reserved6:2;
+ uint32_t num_oac:4;
+ uint32_t reserved7:4;
+ uint32_t gds_size:6;
+ uint32_t num_queues:10;
+ } bitfields14;
+ uint32_t ordinal14;
};
+ uint32_t completion_signal_lo32;
+uint32_t completion_signal_hi32;
};
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
index 08c721922812..7c8d9b357749 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h
@@ -30,10 +30,12 @@ union PM4_MES_TYPE_3_HEADER {
struct {
uint32_t reserved1 : 8; /* < reserved */
uint32_t opcode : 8; /* < IT opcode */
- uint32_t count : 14;/* < number of DWORDs - 1 in the
- information body. */
- uint32_t type : 2; /* < packet identifier.
- It should be 3 for type 3 packets */
+ uint32_t count : 14;/* < Number of DWORDS - 1 in the
+ * information body
+ */
+ uint32_t type : 2; /* < packet identifier
+ * It should be 3 for type 3 packets
+ */
};
uint32_t u32All;
};
@@ -124,9 +126,10 @@ struct pm4_mes_runlist {
uint32_t ib_size:20;
uint32_t chain:1;
uint32_t offload_polling:1;
- uint32_t reserved3:1;
+ uint32_t reserved2:1;
uint32_t valid:1;
- uint32_t reserved4:8;
+ uint32_t process_cnt:4;
+ uint32_t reserved3:4;
} bitfields4;
uint32_t ordinal4;
};
@@ -141,8 +144,8 @@ struct pm4_mes_runlist {
struct pm4_mes_map_process {
union {
- union PM4_MES_TYPE_3_HEADER header; /* header */
- uint32_t ordinal1;
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
};
union {
@@ -153,36 +156,48 @@ struct pm4_mes_map_process {
uint32_t process_quantum:7;
} bitfields2;
uint32_t ordinal2;
-};
+ };
union {
struct {
uint32_t page_table_base:28;
- uint32_t reserved2:4;
+ uint32_t reserved3:4;
} bitfields3;
uint32_t ordinal3;
};
+ uint32_t reserved;
+
uint32_t sh_mem_bases;
+ uint32_t sh_mem_config;
uint32_t sh_mem_ape1_base;
uint32_t sh_mem_ape1_limit;
- uint32_t sh_mem_config;
+
+ uint32_t sh_hidden_private_base_vmid;
+
+ uint32_t reserved2;
+ uint32_t reserved3;
+
uint32_t gds_addr_lo;
uint32_t gds_addr_hi;
union {
struct {
uint32_t num_gws:6;
- uint32_t reserved3:2;
+ uint32_t reserved4:2;
uint32_t num_oac:4;
- uint32_t reserved4:4;
+ uint32_t reserved5:4;
uint32_t gds_size:6;
uint32_t num_queues:10;
} bitfields10;
uint32_t ordinal10;
};
+ uint32_t completion_signal_lo;
+ uint32_t completion_signal_hi;
+
};
+
#endif
/*--------------------MES_MAP_QUEUES--------------------*/
@@ -335,7 +350,7 @@ enum mes_unmap_queues_engine_sel_enum {
engine_sel__mes_unmap_queues__sdmal = 3
};
-struct PM4_MES_UNMAP_QUEUES {
+struct pm4_mes_unmap_queues {
union {
union PM4_MES_TYPE_3_HEADER header; /* header */
uint32_t ordinal1;
@@ -395,4 +410,101 @@ struct PM4_MES_UNMAP_QUEUES {
};
#endif
+#ifndef PM4_MEC_RELEASE_MEM_DEFINED
+#define PM4_MEC_RELEASE_MEM_DEFINED
+enum RELEASE_MEM_event_index_enum {
+ event_index___release_mem__end_of_pipe = 5,
+ event_index___release_mem__shader_done = 6
+};
+
+enum RELEASE_MEM_cache_policy_enum {
+ cache_policy___release_mem__lru = 0,
+ cache_policy___release_mem__stream = 1,
+ cache_policy___release_mem__bypass = 2
+};
+
+enum RELEASE_MEM_dst_sel_enum {
+ dst_sel___release_mem__memory_controller = 0,
+ dst_sel___release_mem__tc_l2 = 1,
+ dst_sel___release_mem__queue_write_pointer_register = 2,
+ dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3
+};
+
+enum RELEASE_MEM_int_sel_enum {
+ int_sel___release_mem__none = 0,
+ int_sel___release_mem__send_interrupt_only = 1,
+ int_sel___release_mem__send_interrupt_after_write_confirm = 2,
+ int_sel___release_mem__send_data_after_write_confirm = 3
+};
+
+enum RELEASE_MEM_data_sel_enum {
+ data_sel___release_mem__none = 0,
+ data_sel___release_mem__send_32_bit_low = 1,
+ data_sel___release_mem__send_64_bit_data = 2,
+ data_sel___release_mem__send_gpu_clock_counter = 3,
+ data_sel___release_mem__send_cp_perfcounter_hi_lo = 4,
+ data_sel___release_mem__store_gds_data_to_memory = 5
+};
+
+struct pm4_mec_release_mem {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /*header */
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int event_type:6;
+ unsigned int reserved1:2;
+ enum RELEASE_MEM_event_index_enum event_index:4;
+ unsigned int tcl1_vol_action_ena:1;
+ unsigned int tc_vol_action_ena:1;
+ unsigned int reserved2:1;
+ unsigned int tc_wb_action_ena:1;
+ unsigned int tcl1_action_ena:1;
+ unsigned int tc_action_ena:1;
+ unsigned int reserved3:6;
+ unsigned int atc:1;
+ enum RELEASE_MEM_cache_policy_enum cache_policy:2;
+ unsigned int reserved4:5;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ unsigned int reserved5:16;
+ enum RELEASE_MEM_dst_sel_enum dst_sel:2;
+ unsigned int reserved6:6;
+ enum RELEASE_MEM_int_sel_enum int_sel:3;
+ unsigned int reserved7:2;
+ enum RELEASE_MEM_data_sel_enum data_sel:3;
+ } bitfields3;
+ unsigned int ordinal3;
+ };
+
+ union {
+ struct {
+ unsigned int reserved8:2;
+ unsigned int address_lo_32b:30;
+ } bitfields4;
+ struct {
+ unsigned int reserved9:3;
+ unsigned int address_lo_64b:29;
+ } bitfields5;
+ unsigned int ordinal4;
+ };
+
+ unsigned int address_hi;
+
+ unsigned int data_lo;
+
+ unsigned int data_hi;
+};
+#endif
+
+enum {
+ CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014
+};
+
#endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 4750cabe4252..b397ec726400 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -239,11 +239,6 @@ enum kfd_preempt_type_filter {
KFD_PREEMPT_TYPE_FILTER_BY_PASID
};
-enum kfd_preempt_type {
- KFD_PREEMPT_TYPE_WAVEFRONT,
- KFD_PREEMPT_TYPE_WAVEFRONT_RESET
-};
-
/**
* enum kfd_queue_type
*
@@ -294,13 +289,13 @@ enum kfd_queue_format {
* @write_ptr: Defines the number of dwords written to the ring buffer.
*
* @doorbell_ptr: This field aim is to notify the H/W of new packet written to
- * the queue ring buffer. This field should be similar to write_ptr and the user
- * should update this field after he updated the write_ptr.
+ * the queue ring buffer. This field should be similar to write_ptr and the
+ * user should update this field after he updated the write_ptr.
*
* @doorbell_off: The doorbell offset in the doorbell pci-bar.
*
- * @is_interop: Defines if this is a interop queue. Interop queue means that the
- * queue can access both graphics and compute resources.
+ * @is_interop: Defines if this is a interop queue. Interop queue means that
+ * the queue can access both graphics and compute resources.
*
* @is_active: Defines if the queue is active or not.
*
@@ -352,9 +347,10 @@ struct queue_properties {
* @properties: The queue properties.
*
* @mec: Used only in no cp scheduling mode and identifies to micro engine id
- * that the queue should be execute on.
+ * that the queue should be execute on.
*
- * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe id.
+ * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe
+ * id.
*
* @queue: Used only in no cp scheduliong mode and identifies the queue's slot.
*
@@ -436,6 +432,7 @@ struct qcm_process_device {
uint32_t gds_size;
uint32_t num_gws;
uint32_t num_oac;
+ uint32_t sh_hidden_private_base;
};
/* Data that is per-process-per device. */
@@ -520,8 +517,8 @@ struct kfd_process {
struct mutex event_mutex;
/* All events in process hashed by ID, linked on kfd_event.events. */
DECLARE_HASHTABLE(events, 4);
- struct list_head signal_event_pages; /* struct slot_page_header.
- event_pages */
+ /* struct slot_page_header.event_pages */
+ struct list_head signal_event_pages;
u32 next_nonsignal_event_id;
size_t signal_event_count;
};
@@ -559,8 +556,10 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process *p);
/* Process device data iterator */
-struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p);
-struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p,
+struct kfd_process_device *kfd_get_first_process_device_data(
+ struct kfd_process *p);
+struct kfd_process_device *kfd_get_next_process_device_data(
+ struct kfd_process *p,
struct kfd_process_device *pdd);
bool kfd_has_process_device_data(struct kfd_process *p);
@@ -573,7 +572,8 @@ unsigned int kfd_pasid_alloc(void);
void kfd_pasid_free(unsigned int pasid);
/* Doorbells */
-void kfd_doorbell_init(struct kfd_dev *kfd);
+int kfd_doorbell_init(struct kfd_dev *kfd);
+void kfd_doorbell_fini(struct kfd_dev *kfd);
int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
unsigned int *doorbell_off);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 035bbc98a63d..c74cf22a1ed9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -79,9 +79,7 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread)
{
struct kfd_process *process;
- BUG_ON(!kfd_process_wq);
-
- if (thread->mm == NULL)
+ if (!thread->mm)
return ERR_PTR(-EINVAL);
/* Only the pthreads threading model is supported. */
@@ -101,7 +99,7 @@ struct kfd_process *kfd_create_process(const struct task_struct *thread)
/* A prior open of /dev/kfd could have already created the process. */
process = find_process(thread);
if (process)
- pr_debug("kfd: process already found\n");
+ pr_debug("Process already found\n");
if (!process)
process = create_process(thread);
@@ -117,7 +115,7 @@ struct kfd_process *kfd_get_process(const struct task_struct *thread)
{
struct kfd_process *process;
- if (thread->mm == NULL)
+ if (!thread->mm)
return ERR_PTR(-EINVAL);
/* Only the pthreads threading model is supported. */
@@ -202,10 +200,8 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu)
struct kfd_process_release_work *work;
struct kfd_process *p;
- BUG_ON(!kfd_process_wq);
-
p = container_of(rcu, struct kfd_process, rcu);
- BUG_ON(atomic_read(&p->mm->mm_count) <= 0);
+ WARN_ON(atomic_read(&p->mm->mm_count) <= 0);
mmdrop(p->mm);
@@ -229,7 +225,8 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
* mmu_notifier srcu is read locked
*/
p = container_of(mn, struct kfd_process, mmu_notifier);
- BUG_ON(p->mm != mm);
+ if (WARN_ON(p->mm != mm))
+ return;
mutex_lock(&kfd_processes_mutex);
hash_del_rcu(&p->kfd_processes);
@@ -250,7 +247,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
if (pdd->reset_wavefronts) {
- pr_warn("amdkfd: Resetting all wave fronts\n");
+ pr_warn("Resetting all wave fronts\n");
dbgdev_wave_reset_wavefronts(pdd->dev, p);
pdd->reset_wavefronts = false;
}
@@ -407,8 +404,6 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
struct kfd_process *p;
struct kfd_process_device *pdd;
- BUG_ON(dev == NULL);
-
/*
* Look for the process that matches the pasid. If there is no such
* process, we either released it in amdkfd's own notifier, or there
@@ -449,14 +444,16 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
mutex_unlock(&p->mutex);
}
-struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
+struct kfd_process_device *kfd_get_first_process_device_data(
+ struct kfd_process *p)
{
return list_first_entry(&p->per_device_data,
struct kfd_process_device,
per_device_list);
}
-struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p,
+struct kfd_process_device *kfd_get_next_process_device_data(
+ struct kfd_process *p,
struct kfd_process_device *pdd)
{
if (list_is_last(&pdd->per_device_list, &p->per_device_data))
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 32cdf2b483db..1cae95e2b13a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -32,12 +32,9 @@ static inline struct process_queue_node *get_queue_by_qid(
{
struct process_queue_node *pqn;
- BUG_ON(!pqm);
-
list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
- if (pqn->q && pqn->q->properties.queue_id == qid)
- return pqn;
- if (pqn->kq && pqn->kq->queue->properties.queue_id == qid)
+ if ((pqn->q && pqn->q->properties.queue_id == qid) ||
+ (pqn->kq && pqn->kq->queue->properties.queue_id == qid))
return pqn;
}
@@ -49,17 +46,13 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
{
unsigned long found;
- BUG_ON(!pqm || !qid);
-
- pr_debug("kfd: in %s\n", __func__);
-
found = find_first_zero_bit(pqm->queue_slot_bitmap,
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
- pr_debug("kfd: the new slot id %lu\n", found);
+ pr_debug("The new slot id %lu\n", found);
if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
- pr_info("amdkfd: Can not open more queues for process with pasid %d\n",
+ pr_info("Cannot open more queues for process with pasid %d\n",
pqm->process->pasid);
return -ENOMEM;
}
@@ -72,13 +65,11 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
{
- BUG_ON(!pqm);
-
INIT_LIST_HEAD(&pqm->queues);
pqm->queue_slot_bitmap =
kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
BITS_PER_BYTE), GFP_KERNEL);
- if (pqm->queue_slot_bitmap == NULL)
+ if (!pqm->queue_slot_bitmap)
return -ENOMEM;
pqm->process = p;
@@ -90,10 +81,6 @@ void pqm_uninit(struct process_queue_manager *pqm)
int retval;
struct process_queue_node *pqn, *next;
- BUG_ON(!pqm);
-
- pr_debug("In func %s\n", __func__);
-
list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
retval = pqm_destroy_queue(
pqm,
@@ -102,7 +89,7 @@ void pqm_uninit(struct process_queue_manager *pqm)
pqn->kq->queue->properties.queue_id);
if (retval != 0) {
- pr_err("kfd: failed to destroy queue\n");
+ pr_err("failed to destroy queue\n");
return;
}
}
@@ -117,8 +104,6 @@ static int create_cp_queue(struct process_queue_manager *pqm,
{
int retval;
- retval = 0;
-
/* Doorbell initialized in user space*/
q_properties->doorbell_ptr = NULL;
@@ -131,16 +116,13 @@ static int create_cp_queue(struct process_queue_manager *pqm,
retval = init_queue(q, q_properties);
if (retval != 0)
- goto err_init_queue;
+ return retval;
(*q)->device = dev;
(*q)->process = pqm->process;
- pr_debug("kfd: PQM After init queue");
-
- return retval;
+ pr_debug("PQM After init queue");
-err_init_queue:
return retval;
}
@@ -161,8 +143,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
int num_queues = 0;
struct queue *cur;
- BUG_ON(!pqm || !dev || !properties || !qid);
-
memset(&q_properties, 0, sizeof(struct queue_properties));
memcpy(&q_properties, properties, sizeof(struct queue_properties));
q = NULL;
@@ -185,7 +165,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
list_for_each_entry(cur, &pdd->qpd.queues_list, list)
num_queues++;
if (num_queues >= dev->device_info->max_no_of_hqd/2)
- return (-ENOSPC);
+ return -ENOSPC;
}
retval = find_available_queue_slot(pqm, qid);
@@ -197,7 +177,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
}
- pqn = kzalloc(sizeof(struct process_queue_node), GFP_KERNEL);
+ pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
if (!pqn) {
retval = -ENOMEM;
goto err_allocate_pqn;
@@ -210,7 +190,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
((dev->dqm->processes_count >= VMID_PER_DEVICE) ||
(dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
- pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
+ pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
retval = -EPERM;
goto err_create_queue;
}
@@ -227,7 +207,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
break;
case KFD_QUEUE_TYPE_DIQ:
kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ);
- if (kq == NULL) {
+ if (!kq) {
retval = -ENOMEM;
goto err_create_queue;
}
@@ -238,22 +218,22 @@ int pqm_create_queue(struct process_queue_manager *pqm,
kq, &pdd->qpd);
break;
default:
- BUG();
- break;
+ WARN(1, "Invalid queue type %d", type);
+ retval = -EINVAL;
}
if (retval != 0) {
- pr_debug("Error dqm create queue\n");
+ pr_err("DQM create queue failed\n");
goto err_create_queue;
}
- pr_debug("kfd: PQM After DQM create queue\n");
+ pr_debug("PQM After DQM create queue\n");
list_add(&pqn->process_queue_list, &pqm->queues);
if (q) {
*properties = q->properties;
- pr_debug("kfd: PQM done creating queue\n");
+ pr_debug("PQM done creating queue\n");
print_queue_properties(properties);
}
@@ -279,14 +259,11 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
dqm = NULL;
- BUG_ON(!pqm);
retval = 0;
- pr_debug("kfd: In Func %s\n", __func__);
-
pqn = get_queue_by_qid(pqm, qid);
- if (pqn == NULL) {
- pr_err("kfd: queue id does not match any known queue\n");
+ if (!pqn) {
+ pr_err("Queue id does not match any known queue\n");
return -EINVAL;
}
@@ -295,7 +272,8 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
dev = pqn->kq->dev;
if (pqn->q)
dev = pqn->q->device;
- BUG_ON(!dev);
+ if (WARN_ON(!dev))
+ return -ENODEV;
pdd = kfd_get_process_device_data(dev, pqm->process);
if (!pdd) {
@@ -335,12 +313,9 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
int retval;
struct process_queue_node *pqn;
- BUG_ON(!pqm);
-
pqn = get_queue_by_qid(pqm, qid);
if (!pqn) {
- pr_debug("amdkfd: No queue %d exists for update operation\n",
- qid);
+ pr_debug("No queue %d exists for update operation\n", qid);
return -EFAULT;
}
@@ -363,8 +338,6 @@ struct kernel_queue *pqm_get_kernel_queue(
{
struct process_queue_node *pqn;
- BUG_ON(!pqm);
-
pqn = get_queue_by_qid(pqm, qid);
if (pqn && pqn->kq)
return pqn->kq;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index 0ab197077f2d..a5315d4f1c95 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -65,17 +65,15 @@ void print_queue(struct queue *q)
int init_queue(struct queue **q, const struct queue_properties *properties)
{
- struct queue *tmp;
+ struct queue *tmp_q;
- BUG_ON(!q);
-
- tmp = kzalloc(sizeof(struct queue), GFP_KERNEL);
- if (!tmp)
+ tmp_q = kzalloc(sizeof(*tmp_q), GFP_KERNEL);
+ if (!tmp_q)
return -ENOMEM;
- memcpy(&tmp->properties, properties, sizeof(struct queue_properties));
+ memcpy(&tmp_q->properties, properties, sizeof(*properties));
- *q = tmp;
+ *q = tmp_q;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 1e5064749959..19ce59028d6b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -108,9 +108,6 @@ static int kfd_topology_get_crat_acpi(void *crat_image, size_t *size)
static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
struct crat_subtype_computeunit *cu)
{
- BUG_ON(!dev);
- BUG_ON(!cu);
-
dev->node_props.cpu_cores_count = cu->num_cpu_cores;
dev->node_props.cpu_core_id_base = cu->processor_id_low;
if (cu->hsa_capability & CRAT_CU_FLAGS_IOMMU_PRESENT)
@@ -123,9 +120,6 @@ static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
static void kfd_populated_cu_info_gpu(struct kfd_topology_device *dev,
struct crat_subtype_computeunit *cu)
{
- BUG_ON(!dev);
- BUG_ON(!cu);
-
dev->node_props.simd_id_base = cu->processor_id_low;
dev->node_props.simd_count = cu->num_simd_cores;
dev->node_props.lds_size_in_kb = cu->lds_size_in_kb;
@@ -148,8 +142,6 @@ static int kfd_parse_subtype_cu(struct crat_subtype_computeunit *cu)
struct kfd_topology_device *dev;
int i = 0;
- BUG_ON(!cu);
-
pr_info("Found CU entry in CRAT table with proximity_domain=%d caps=%x\n",
cu->proximity_domain, cu->hsa_capability);
list_for_each_entry(dev, &topology_device_list, list) {
@@ -177,8 +169,6 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem)
struct kfd_topology_device *dev;
int i = 0;
- BUG_ON(!mem);
-
pr_info("Found memory entry in CRAT table with proximity_domain=%d\n",
mem->promixity_domain);
list_for_each_entry(dev, &topology_device_list, list) {
@@ -223,8 +213,6 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache)
struct kfd_topology_device *dev;
uint32_t id;
- BUG_ON(!cache);
-
id = cache->processor_id_low;
pr_info("Found cache entry in CRAT table with processor_id=%d\n", id);
@@ -274,8 +262,6 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink)
uint32_t id_from;
uint32_t id_to;
- BUG_ON(!iolink);
-
id_from = iolink->proximity_domain_from;
id_to = iolink->proximity_domain_to;
@@ -323,8 +309,6 @@ static int kfd_parse_subtype(struct crat_subtype_generic *sub_type_hdr)
struct crat_subtype_iolink *iolink;
int ret = 0;
- BUG_ON(!sub_type_hdr);
-
switch (sub_type_hdr->type) {
case CRAT_SUBTYPE_COMPUTEUNIT_AFFINITY:
cu = (struct crat_subtype_computeunit *)sub_type_hdr;
@@ -368,8 +352,6 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev)
struct kfd_cache_properties *cache;
struct kfd_iolink_properties *iolink;
- BUG_ON(!dev);
-
list_del(&dev->list);
while (dev->mem_props.next != &dev->mem_props) {
@@ -416,7 +398,7 @@ static struct kfd_topology_device *kfd_create_topology_device(void)
struct kfd_topology_device *dev;
dev = kfd_alloc_struct(dev);
- if (dev == NULL) {
+ if (!dev) {
pr_err("No memory to allocate a topology device");
return NULL;
}
@@ -666,7 +648,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
dev->node_props.simd_count);
if (dev->mem_bank_count < dev->node_props.mem_banks_count) {
- pr_info_once("kfd: mem_banks_count truncated from %d to %d\n",
+ pr_info_once("mem_banks_count truncated from %d to %d\n",
dev->node_props.mem_banks_count,
dev->mem_bank_count);
sysfs_show_32bit_prop(buffer, "mem_banks_count",
@@ -763,8 +745,6 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev)
struct kfd_cache_properties *cache;
struct kfd_mem_properties *mem;
- BUG_ON(!dev);
-
if (dev->kobj_iolink) {
list_for_each_entry(iolink, &dev->io_link_props, list)
if (iolink->kobj) {
@@ -819,12 +799,12 @@ static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev,
int ret;
uint32_t i;
- BUG_ON(!dev);
+ if (WARN_ON(dev->kobj_node))
+ return -EEXIST;
/*
* Creating the sysfs folders
*/
- BUG_ON(dev->kobj_node);
dev->kobj_node = kfd_alloc_struct(dev->kobj_node);
if (!dev->kobj_node)
return -ENOMEM;
@@ -957,7 +937,7 @@ static int kfd_topology_update_sysfs(void)
int ret;
pr_info("Creating topology SYSFS entries\n");
- if (sys_props.kobj_topology == NULL) {
+ if (!sys_props.kobj_topology) {
sys_props.kobj_topology =
kfd_alloc_struct(sys_props.kobj_topology);
if (!sys_props.kobj_topology)
@@ -1117,10 +1097,8 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
struct kfd_topology_device *dev;
struct kfd_topology_device *out_dev = NULL;
- BUG_ON(!gpu);
-
list_for_each_entry(dev, &topology_device_list, list)
- if (dev->gpu == NULL && dev->node_props.simd_count > 0) {
+ if (!dev->gpu && (dev->node_props.simd_count > 0)) {
dev->gpu = gpu;
out_dev = dev;
break;
@@ -1143,11 +1121,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
struct kfd_topology_device *dev;
int res;
- BUG_ON(!gpu);
-
gpu_id = kfd_generate_gpu_id(gpu);
- pr_debug("kfd: Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
+ pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
down_write(&topology_lock);
/*
@@ -1170,8 +1146,8 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
* GPU vBIOS
*/
- /*
- * Update the SYSFS tree, since we added another topology device
+ /* Update the SYSFS tree, since we added another topology
+ * device
*/
if (kfd_topology_update_sysfs() < 0)
kfd_topology_release_sysfs();
@@ -1190,7 +1166,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
if (dev->gpu->device_info->asic_family == CHIP_CARRIZO) {
dev->node_props.capability |= HSA_CAP_DOORBELL_PACKET_TYPE;
- pr_info("amdkfd: adding doorbell packet type capability\n");
+ pr_info("Adding doorbell packet type capability\n");
}
res = 0;
@@ -1210,8 +1186,6 @@ int kfd_topology_remove_device(struct kfd_dev *gpu)
uint32_t gpu_id;
int res = -ENODEV;
- BUG_ON(!gpu);
-
down_write(&topology_lock);
list_for_each_entry(dev, &topology_device_list, list)