From 733fa1f7428c362b17b3de3a1c691e21fa803239 Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Wed, 20 Sep 2017 18:10:14 -0400
Subject: drm/amdkfd: Fix suspend/resume issue on Carrizo v2

When we do suspend/resume through "sudo pm-suspend" while there is
HSA activity running, upon resume we will encounter HWS hanging, which
is caused by memory read/write failures. The root cause is that when
suspend, we neglected to unbind pasid from kfd device.

Another major change is that the bind/unbinding is changed to be
performed on a per process basis, instead of whether there are queues
in dqm.

v2:
- free IOMMU device if kfd_bind_processes_to_device fails in kfd_resume
- add comments to kfd_bind/unbind_processes_to/from_device
- minor cleanups

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 634083e340d1..c71160422348 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -432,6 +432,13 @@ struct qcm_process_device {
 	uint32_t sh_hidden_private_base;
 };
 
+
+enum kfd_pdd_bound {
+	PDD_UNBOUND = 0,
+	PDD_BOUND,
+	PDD_BOUND_SUSPENDED,
+};
+
 /* Data that is per-process-per device. */
 struct kfd_process_device {
 	/*
@@ -456,7 +463,7 @@ struct kfd_process_device {
 	uint64_t scratch_limit;
 
 	/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
-	bool bound;
+	enum kfd_pdd_bound bound;
 
 	/* This flag tells if we should reset all
 	 * wavefronts on process termination
@@ -547,8 +554,10 @@ struct kfd_process *kfd_get_process(const struct task_struct *);
 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
 
 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
-							struct kfd_process *p);
-void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid);
+						struct kfd_process *p);
+int kfd_bind_processes_to_device(struct kfd_dev *dev);
+void kfd_unbind_processes_from_device(struct kfd_dev *dev);
+void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid);
 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
 							struct kfd_process *p);
 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
-- 
cgit v1.2.3


From 8c72c3d7dfa86f7e84c5397975eb9c803e4de7b6 Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Wed, 20 Sep 2017 18:10:15 -0400
Subject: drm/amdkfd: Rectify the jiffies calculation error with milliseconds
 v2

The timeout in milliseconds should not be regarded as jiffies. This
commit fixed that.

v2:
- use msecs_to_jiffies
- change timeout_ms parameter to unsigned int to match msecs_to_jiffies

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 6 +++---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h                 | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 5db82b877deb..87961fe669e1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -835,12 +835,12 @@ out:
 
 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
 				unsigned int fence_value,
-				unsigned long timeout)
+				unsigned int timeout_ms)
 {
-	timeout += jiffies;
+	unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
 
 	while (*fence_addr != fence_value) {
-		if (time_after(jiffies, timeout)) {
+		if (time_after(jiffies, end_jiffies)) {
 			pr_err("qcm fence wait loop timeout expired\n");
 			return -ETIME;
 		}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index c71160422348..f37fbfd0e917 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -672,7 +672,7 @@ struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
 
 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
 				unsigned int fence_value,
-				unsigned long timeout);
+				unsigned int timeout_ms);
 
 /* Packet Manager */
 
-- 
cgit v1.2.3


From b90e3fbecc9030efb8a6aed1d54a79d0c3d0820a Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Wed, 20 Sep 2017 18:10:16 -0400
Subject: drm/amdkfd: Adjust dequeue latencies and timeouts

Adjust latencies and timeouts for dequeueing with HWS and consolidate
them in one place. Make them longer to allow long running waves to
complete without causing a timeout. The timeout is twice as long as the
latency plus some buffer to make sure we don't detect a timeout
prematurely.

Change timeouts for dequeueing HQDs without HWS. KFD_UNMAP_LATENCY is
more consistent with what the HWS does for user queues.

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 4 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c         | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c       | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h                 | 3 ---
 5 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 87961fe669e1..dd60c6eec962 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -323,7 +323,7 @@ static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
 
 	retval = mqd->destroy_mqd(mqd, q->mqd,
 				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
-				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
+				KFD_UNMAP_LATENCY_MS,
 				q->pipe, q->queue);
 
 	if (retval)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index faf820a06400..99e23053f59d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -29,7 +29,9 @@
 #include "kfd_priv.h"
 #include "kfd_mqd_manager.h"
 
-#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS	(500)
+#define KFD_UNMAP_LATENCY_MS			(4000)
+#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (2 * KFD_UNMAP_LATENCY_MS + 1000)
+
 #define CIK_VMID_NUM				(8)
 #define KFD_VMID_START_OFFSET			(8)
 #define VMID_PER_DEVICE				CIK_VMID_NUM
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index ed71ad40e8f7..d53d32a91a16 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -185,7 +185,7 @@ static void uninitialize(struct kernel_queue *kq)
 		kq->mqd->destroy_mqd(kq->mqd,
 					kq->queue->mqd,
 					KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
-					QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
+					KFD_UNMAP_LATENCY_MS,
 					kq->queue->pipe,
 					kq->queue->queue);
 	else if (kq->queue->properties.type == KFD_QUEUE_TYPE_DIQ)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 1d312603de9f..9eda884d8d48 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -376,7 +376,7 @@ int pm_send_set_resources(struct packet_manager *pm,
 	packet->bitfields2.queue_type =
 			queue_type__mes_set_resources__hsa_interface_queue_hiq;
 	packet->bitfields2.vmid_mask = res->vmid_mask;
-	packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY;
+	packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
 	packet->bitfields7.oac_mask = res->oac_mask;
 	packet->bitfields8.gds_heap_base = res->gds_heap_base;
 	packet->bitfields8.gds_heap_size = res->gds_heap_size;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index f37fbfd0e917..8883416634d3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -676,11 +676,8 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
 
 /* Packet Manager */
 
-#define KFD_HIQ_TIMEOUT (500)
-
 #define KFD_FENCE_COMPLETED (100)
 #define KFD_FENCE_INIT   (10)
-#define KFD_UNMAP_LATENCY (150)
 
 struct packet_manager {
 	struct device_queue_manager *dqm;
-- 
cgit v1.2.3


From 44008d7a871ce5a487cbcc4c412a5149145ea442 Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Wed, 20 Sep 2017 18:10:18 -0400
Subject: drm/amdkfd: Use VMID bitmap from KGD v2

The hard-coded values related to VMID were removed in KFD, as those
values can be calculated in the KFD initialization function.

v2: remove unnecessary local variable

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c                |  9 ++-------
 drivers/gpu/drm/amd/amdkfd/kfd_device.c                |  5 +++++
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 13 ++++++-------
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  4 ----
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h                  |  7 +++++++
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |  2 +-
 6 files changed, 21 insertions(+), 19 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 0aa021aa0aa1..7d5635fdd6e7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -769,13 +769,8 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
 	union GRBM_GFX_INDEX_BITS reg_gfx_index;
 	struct kfd_process_device *pdd;
 	struct dbg_wave_control_info wac_info;
-	int temp;
-	int first_vmid_to_scan = 8;
-	int last_vmid_to_scan = 15;
-
-	first_vmid_to_scan = ffs(dev->shared_resources.compute_vmid_bitmap) - 1;
-	temp = dev->shared_resources.compute_vmid_bitmap >> first_vmid_to_scan;
-	last_vmid_to_scan = first_vmid_to_scan + ffz(temp);
+	int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
+	int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;
 
 	reg_sq_cmd.u32All = 0;
 	status = 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 80c90f3619b7..46049f005b02 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -219,6 +219,11 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 
 	kfd->shared_resources = *gpu_resources;
 
+	kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
+	kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
+	kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
+			- kfd->vm_info.first_vmid_kfd + 1;
+
 	/* calculate max size of mqds needed for queues */
 	size = max_num_of_queues_per_device *
 			kfd->device_info->mqd_size_aligned;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index dd60c6eec962..87f8742dcd8c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -113,11 +113,11 @@ static int allocate_vmid(struct device_queue_manager *dqm,
 	if (dqm->vmid_bitmap == 0)
 		return -ENOMEM;
 
-	bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM);
+	bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap,
+				dqm->dev->vm_info.vmid_num_kfd);
 	clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
 
-	/* Kaveri kfd vmid's starts from vmid 8 */
-	allocated_vmid = bit + KFD_VMID_START_OFFSET;
+	allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
 	pr_debug("vmid allocation %d\n", allocated_vmid);
 	qpd->vmid = allocated_vmid;
 	q->properties.vmid = allocated_vmid;
@@ -132,7 +132,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm,
 				struct qcm_process_device *qpd,
 				struct queue *q)
 {
-	int bit = qpd->vmid - KFD_VMID_START_OFFSET;
+	int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
 
 	/* Release the vmid mapping */
 	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
@@ -507,7 +507,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
 				dqm->allocated_queues[pipe] |= 1 << queue;
 	}
 
-	dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
+	dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
 	dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
 
 	return 0;
@@ -613,8 +613,7 @@ static int set_sched_resources(struct device_queue_manager *dqm)
 	int i, mec;
 	struct scheduling_resources res;
 
-	res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
-	res.vmid_mask <<= KFD_VMID_START_OFFSET;
+	res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
 
 	res.queue_mask = 0;
 	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 99e23053f59d..60d46ce0b8a7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -32,10 +32,6 @@
 #define KFD_UNMAP_LATENCY_MS			(4000)
 #define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (2 * KFD_UNMAP_LATENCY_MS + 1000)
 
-#define CIK_VMID_NUM				(8)
-#define KFD_VMID_START_OFFSET			(8)
-#define VMID_PER_DEVICE				CIK_VMID_NUM
-#define KFD_DQM_FIRST_PIPE			(0)
 #define CIK_SDMA_QUEUES				(4)
 #define CIK_SDMA_QUEUES_PER_ENGINE		(2)
 #define CIK_SDMA_ENGINE_NUM			(2)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 8883416634d3..f0ca60f27d9a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -141,6 +141,12 @@ struct kfd_mem_obj {
 	uint32_t *cpu_ptr;
 };
 
+struct kfd_vmid_info {
+	uint32_t first_vmid_kfd;
+	uint32_t last_vmid_kfd;
+	uint32_t vmid_num_kfd;
+};
+
 struct kfd_dev {
 	struct kgd_dev *kgd;
 
@@ -162,6 +168,7 @@ struct kfd_dev {
 					   */
 
 	struct kgd2kfd_shared_resources shared_resources;
+	struct kfd_vmid_info vm_info;
 
 	const struct kfd2kgd_calls *kfd2kgd;
 	struct mutex doorbell_mutex;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 03bec765b03d..68fe0d99f6c2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -187,7 +187,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	case KFD_QUEUE_TYPE_COMPUTE:
 		/* check if there is over subscription */
 		if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
-		((dev->dqm->processes_count >= VMID_PER_DEVICE) ||
+		((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) ||
 		(dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
 			pr_err("Over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
 			retval = -EPERM;
-- 
cgit v1.2.3


From e596b90338126d08a8d90f08e79bb644b8c9f2b6 Mon Sep 17 00:00:00 2001
From: Yong Zhao <Yong.Zhao@amd.com>
Date: Wed, 20 Sep 2017 18:10:19 -0400
Subject: drm/amdkfd: Reuse CHIP_* from amdgpu v2

There are already CHIP_* definitions under amd_shared.h file on amdgpu
side, so KFD should reuse them rather than defining new ones.

Using enum for asic type requires default cases on switch statements
to prevent compiler warnings. WARN on unsupported ASICs. It should never
get there because KFD should not be initialized on unsupported devices.

v2: Replace BUG() with WARN and error return

Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c |  4 ++++
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c         | 18 ++++++++++++------
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c          |  3 +++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h                 |  9 +++------
 4 files changed, 22 insertions(+), 12 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 87f8742dcd8c..fe0f0de0ef86 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1130,6 +1130,10 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 	case CHIP_KAVERI:
 		device_queue_manager_init_cik(&dqm->ops_asic_specific);
 		break;
+	default:
+		WARN(1, "Unexpected ASIC family %u",
+		     dev->device_info->asic_family);
+		goto out_free;
 	}
 
 	if (!dqm->ops.initialize(dqm))
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index d53d32a91a16..8b0c0645d7c0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -303,14 +303,20 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
 	case CHIP_KAVERI:
 		kernel_queue_init_cik(&kq->ops_asic_specific);
 		break;
+	default:
+		WARN(1, "Unexpected ASIC family %u",
+		     dev->device_info->asic_family);
+		goto out_free;
 	}
 
-	if (!kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) {
-		pr_err("Failed to init kernel queue\n");
-		kfree(kq);
-		return NULL;
-	}
-	return kq;
+	if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
+		return kq;
+
+	pr_err("Failed to init kernel queue\n");
+
+out_free:
+	kfree(kq);
+	return NULL;
 }
 
 void kernel_queue_uninit(struct kernel_queue *kq)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index b1ef1368c3bb..dfd260ef81ff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -31,6 +31,9 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
 		return mqd_manager_init_cik(type, dev);
 	case CHIP_CARRIZO:
 		return mqd_manager_init_vi(type, dev);
+	default:
+		WARN(1, "Unexpected ASIC family %u",
+		     dev->device_info->asic_family);
 	}
 
 	return NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index f0ca60f27d9a..5ebe565fecb3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -33,6 +33,8 @@
 #include <linux/kfd_ioctl.h>
 #include <kgd_kfd_interface.h>
 
+#include "amd_shared.h"
+
 #define KFD_SYSFS_FILE_MODE 0444
 
 #define KFD_MMAP_DOORBELL_MASK 0x8000000000000
@@ -112,11 +114,6 @@ enum cache_policy {
 	cache_policy_noncoherent
 };
 
-enum asic_family_type {
-	CHIP_KAVERI = 0,
-	CHIP_CARRIZO
-};
-
 struct kfd_event_interrupt_class {
 	bool (*interrupt_isr)(struct kfd_dev *dev,
 				const uint32_t *ih_ring_entry);
@@ -125,7 +122,7 @@ struct kfd_event_interrupt_class {
 };
 
 struct kfd_device_info {
-	unsigned int asic_family;
+	enum amd_asic_type asic_family;
 	const struct kfd_event_interrupt_class *event_interrupt_class;
 	unsigned int max_pasid_bits;
 	unsigned int max_no_of_hqd;
-- 
cgit v1.2.3


From 7da2bcf87617fb00386ce61024e1c84d045b4e4f Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Wed, 27 Sep 2017 00:09:48 -0400
Subject: drm/amdkfd: Avoid name confusion involved in queue unmapping

When unmapping the queues from HW scheduler, there are two actions:
reset and preempt. So naming the variables with only preempt is
inapproriate.

For functions such as destroy_queues_cpsch, what they do actually is to
unmap the queues on HW scheduler rather than to destroy them. Change the
name to reflect that fact. On the other hand, there is already a function
called destroy_queue_cpsch() which exactly destroys a queue, and the name
is very close to destroy_queues_cpsch(), resulting in confusion.

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 32 +++++++++++-----------
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c    | 18 ++++++------
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h              | 20 +++++++-------
 3 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 471b34e58225..1995e0aa7fa3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -45,8 +45,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd);
 
 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
-static int destroy_queues_cpsch(struct device_queue_manager *dqm,
-				bool preempt_static_queues, bool lock);
+static int unmap_queues_cpsch(struct device_queue_manager *dqm,
+				bool static_queues_included, bool lock);
 
 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
 					struct queue *q,
@@ -707,7 +707,7 @@ fail_packet_manager_init:
 
 static int stop_cpsch(struct device_queue_manager *dqm)
 {
-	destroy_queues_cpsch(dqm, true, true);
+	unmap_queues_cpsch(dqm, true, true);
 
 	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
 	pm_uninit(&dqm->packets);
@@ -750,7 +750,7 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
 {
 	mutex_lock(&dqm->lock);
 	/* here we actually preempt the DIQ */
-	destroy_queues_cpsch(dqm, true, false);
+	unmap_queues_cpsch(dqm, true, false);
 	list_del(&kq->list);
 	dqm->queue_count--;
 	qpd->is_debug = false;
@@ -849,19 +849,19 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
 	return 0;
 }
 
-static int destroy_sdma_queues(struct device_queue_manager *dqm,
+static int unmap_sdma_queues(struct device_queue_manager *dqm,
 				unsigned int sdma_engine)
 {
 	return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
-			KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false,
+			KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
 			sdma_engine);
 }
 
-static int destroy_queues_cpsch(struct device_queue_manager *dqm,
-				bool preempt_static_queues, bool lock)
+static int unmap_queues_cpsch(struct device_queue_manager *dqm,
+				bool static_queues_included, bool lock)
 {
 	int retval;
-	enum kfd_preempt_type_filter preempt_type;
+	enum kfd_unmap_queues_filter filter;
 	struct kfd_process_device *pdd;
 
 	retval = 0;
@@ -875,16 +875,16 @@ static int destroy_queues_cpsch(struct device_queue_manager *dqm,
 		dqm->sdma_queue_count);
 
 	if (dqm->sdma_queue_count > 0) {
-		destroy_sdma_queues(dqm, 0);
-		destroy_sdma_queues(dqm, 1);
+		unmap_sdma_queues(dqm, 0);
+		unmap_sdma_queues(dqm, 1);
 	}
 
-	preempt_type = preempt_static_queues ?
-			KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES :
-			KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES;
+	filter = static_queues_included ?
+			KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
+			KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
 
 	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
-			preempt_type, 0, false, 0);
+			filter, 0, false, 0);
 	if (retval)
 		goto out;
 
@@ -916,7 +916,7 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
 	if (lock)
 		mutex_lock(&dqm->lock);
 
-	retval = destroy_queues_cpsch(dqm, false, false);
+	retval = unmap_queues_cpsch(dqm, false, false);
 	if (retval) {
 		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption");
 		goto out;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 9eda884d8d48..e5a15babb4f3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -476,7 +476,7 @@ fail_acquire_packet_buffer:
 }
 
 int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
-			enum kfd_preempt_type_filter mode,
+			enum kfd_unmap_queues_filter filter,
 			uint32_t filter_param, bool reset,
 			unsigned int sdma_engine)
 {
@@ -494,8 +494,8 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 
 	packet = (struct pm4_mes_unmap_queues *)buffer;
 	memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
-	pr_debug("static_queue: unmapping queues: mode is %d , reset is %d , type is %d\n",
-		mode, reset, type);
+	pr_debug("static_queue: unmapping queues: filter is %d , reset is %d , type is %d\n",
+		filter, reset, type);
 	packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES,
 					sizeof(struct pm4_mes_unmap_queues));
 	switch (type) {
@@ -521,29 +521,29 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
 		packet->bitfields2.action =
 				action__mes_unmap_queues__preempt_queues;
 
-	switch (mode) {
-	case KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE:
+	switch (filter) {
+	case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
 		packet->bitfields2.queue_sel =
 				queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
 		packet->bitfields2.num_queues = 1;
 		packet->bitfields3b.doorbell_offset0 = filter_param;
 		break;
-	case KFD_PREEMPT_TYPE_FILTER_BY_PASID:
+	case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
 		packet->bitfields2.queue_sel =
 				queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
 		packet->bitfields3a.pasid = filter_param;
 		break;
-	case KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES:
+	case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
 		packet->bitfields2.queue_sel =
 				queue_sel__mes_unmap_queues__unmap_all_queues;
 		break;
-	case KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES:
+	case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
 		/* in this case, we do not preempt static queues */
 		packet->bitfields2.queue_sel =
 				queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
 		break;
 	default:
-		WARN(1, "filter %d", mode);
+		WARN(1, "filter %d", filter);
 		retval = -EINVAL;
 		goto err_invalid;
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 5ebe565fecb3..8af0d6fe257a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -222,22 +222,22 @@ void kfd_chardev_exit(void);
 struct device *kfd_chardev(void);
 
 /**
- * enum kfd_preempt_type_filter
+ * enum kfd_unmap_queues_filter
  *
- * @KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE: Preempts single queue.
+ * @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue.
  *
- * @KFD_PRERMPT_TYPE_FILTER_ALL_QUEUES: Preempts all queues in the
+ * @KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: Preempts all queues in the
  *						running queues list.
  *
- * @KFD_PRERMPT_TYPE_FILTER_BY_PASID: Preempts queues that belongs to
+ * @KFD_UNMAP_QUEUES_FILTER_BY_PASID: Preempts queues that belongs to
  *						specific process.
  *
  */
-enum kfd_preempt_type_filter {
-	KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE,
-	KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES,
-	KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES,
-	KFD_PREEMPT_TYPE_FILTER_BY_PASID
+enum kfd_unmap_queues_filter {
+	KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE,
+	KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES,
+	KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES,
+	KFD_UNMAP_QUEUES_FILTER_BY_PASID
 };
 
 /**
@@ -700,7 +700,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
 				uint32_t fence_value);
 
 int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
-			enum kfd_preempt_type_filter mode,
+			enum kfd_unmap_queues_filter mode,
 			uint32_t filter_param, bool reset,
 			unsigned int sdma_engine);
 
-- 
cgit v1.2.3


From 9fd3f1bfae6c6c75f0c8aedb5d499d74cdb52eb9 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Wed, 27 Sep 2017 00:09:52 -0400
Subject: drm/amdkfd: Improve process termination handling

Separate device queue termination from process queue manager
termination. Unmap all queues at once instead of one at a time.
Unmap device queues before the PASID is unbound, in the
kfd_process_iommu_unbind_callback.

When resetting wavefronts in non-HWS mode, do it before the VMID is
released.

Signed-off-by: Ben Goz <ben.goz@amd.com>
Signed-off-by: shaoyun liu <shaoyun.liu@amd.com>
Signed-off-by: Amber Lin <Amber.Lin@amd.com>
Signed-off-by: Yong Zhao <Yong.Zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 185 ++++++++++++++++-----
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |   5 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h              |  18 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c           |  36 ++--
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |  37 +++--
 5 files changed, 200 insertions(+), 81 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f5a5988257c8..dd7e44593cfe 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -296,65 +296,73 @@ out_deallocate_hqd:
 	return retval;
 }
 
-static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
+/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
+ * to avoid asynchronized access
+ */
+static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
 				struct qcm_process_device *qpd,
 				struct queue *q)
 {
 	int retval;
 	struct mqd_manager *mqd;
 
-	retval = 0;
-
-	mutex_lock(&dqm->lock);
+	mqd = dqm->ops.get_mqd_manager(dqm,
+		get_mqd_type_from_queue_type(q->properties.type));
+	if (!mqd)
+		return -ENOMEM;
 
 	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
-		mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
-		if (mqd == NULL) {
-			retval = -ENOMEM;
-			goto out;
-		}
 		deallocate_hqd(dqm, q);
 	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-		mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
-		if (mqd == NULL) {
-			retval = -ENOMEM;
-			goto out;
-		}
 		dqm->sdma_queue_count--;
 		deallocate_sdma_queue(dqm, q->sdma_id);
 	} else {
 		pr_debug("q->properties.type %d is invalid\n",
 				q->properties.type);
-		retval = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
+	dqm->total_queue_count--;
 
 	retval = mqd->destroy_mqd(mqd, q->mqd,
 				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
 				KFD_UNMAP_LATENCY_MS,
 				q->pipe, q->queue);
-
-	if (retval)
-		goto out;
+	if (retval == -ETIME)
+		qpd->reset_wavefronts = true;
 
 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
 
 	list_del(&q->list);
-	if (list_empty(&qpd->queues_list))
+	if (list_empty(&qpd->queues_list)) {
+		if (qpd->reset_wavefronts) {
+			pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
+					dqm->dev);
+			/* dbgdev_wave_reset_wavefronts has to be called before
+			 * deallocate_vmid(), i.e. when vmid is still in use.
+			 */
+			dbgdev_wave_reset_wavefronts(dqm->dev,
+					qpd->pqm->process);
+			qpd->reset_wavefronts = false;
+		}
+
 		deallocate_vmid(dqm, qpd, q);
+	}
 	if (q->properties.is_active)
 		dqm->queue_count--;
 
-	/*
-	 * Unconditionally decrement this counter, regardless of the queue's
-	 * type
-	 */
-	dqm->total_queue_count--;
-	pr_debug("Total of %d queues are accountable so far\n",
-			dqm->total_queue_count);
+	return retval;
+}
 
-out:
+static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
+				struct qcm_process_device *qpd,
+				struct queue *q)
+{
+	int retval;
+
+	mutex_lock(&dqm->lock);
+	retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
 	mutex_unlock(&dqm->lock);
+
 	return retval;
 }
 
@@ -921,10 +929,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
 				enum kfd_unmap_queues_filter filter,
 				uint32_t filter_param)
 {
-	int retval;
-	struct kfd_process_device *pdd;
-
-	retval = 0;
+	int retval = 0;
 
 	if (!dqm->active_runlist)
 		return retval;
@@ -948,12 +953,9 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm,
 	/* should be timed out */
 	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
 				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
-	if (retval) {
-		pdd = kfd_get_process_device_data(dqm->dev,
-				kfd_get_process(current));
-		pdd->reset_wavefronts = true;
+	if (retval)
 		return retval;
-	}
+
 	pm_release_ib(&dqm->packets);
 	dqm->active_runlist = false;
 
@@ -1015,7 +1017,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 	if (q->properties.is_active)
 		dqm->queue_count--;
 
-	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+	retval = execute_queues_cpsch(dqm,
+				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
+	if (retval == -ETIME)
+		qpd->reset_wavefronts = true;
 
 	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
 
@@ -1105,6 +1110,108 @@ out:
 	return retval;
 }
 
+static int process_termination_nocpsch(struct device_queue_manager *dqm,
+		struct qcm_process_device *qpd)
+{
+	struct queue *q, *next;
+	struct device_process_node *cur, *next_dpn;
+	int retval = 0;
+
+	mutex_lock(&dqm->lock);
+
+	/* Clear all user mode queues */
+	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
+		int ret;
+
+		ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
+		if (ret)
+			retval = ret;
+	}
+
+	/* Unregister process */
+	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
+		if (qpd == cur->qpd) {
+			list_del(&cur->list);
+			kfree(cur);
+			dqm->processes_count--;
+			break;
+		}
+	}
+
+	mutex_unlock(&dqm->lock);
+	return retval;
+}
+
+
+static int process_termination_cpsch(struct device_queue_manager *dqm,
+		struct qcm_process_device *qpd)
+{
+	int retval;
+	struct queue *q, *next;
+	struct kernel_queue *kq, *kq_next;
+	struct mqd_manager *mqd;
+	struct device_process_node *cur, *next_dpn;
+	enum kfd_unmap_queues_filter filter =
+		KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
+
+	retval = 0;
+
+	mutex_lock(&dqm->lock);
+
+	/* Clean all kernel queues */
+	list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
+		list_del(&kq->list);
+		dqm->queue_count--;
+		qpd->is_debug = false;
+		dqm->total_queue_count--;
+		filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
+	}
+
+	/* Clear all user mode queues */
+	list_for_each_entry(q, &qpd->queues_list, list) {
+		if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+			dqm->sdma_queue_count--;
+
+		if (q->properties.is_active)
+			dqm->queue_count--;
+
+		dqm->total_queue_count--;
+	}
+
+	/* Unregister process */
+	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
+		if (qpd == cur->qpd) {
+			list_del(&cur->list);
+			kfree(cur);
+			dqm->processes_count--;
+			break;
+		}
+	}
+
+	retval = execute_queues_cpsch(dqm, filter, 0);
+	if (retval || qpd->reset_wavefronts) {
+		pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
+		dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
+		qpd->reset_wavefronts = false;
+	}
+
+	/* lastly, free mqd resources */
+	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
+		mqd = dqm->ops.get_mqd_manager(dqm,
+			get_mqd_type_from_queue_type(q->properties.type));
+		if (!mqd) {
+			retval = -ENOMEM;
+			goto out;
+		}
+		list_del(&q->list);
+		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+	}
+
+out:
+	mutex_unlock(&dqm->lock);
+	return retval;
+}
+
 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 {
 	struct device_queue_manager *dqm;
@@ -1133,6 +1240,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
 		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
+		dqm->ops.process_termination = process_termination_cpsch;
 		break;
 	case KFD_SCHED_POLICY_NO_HWS:
 		/* initialize dqm for no cp scheduling */
@@ -1147,6 +1255,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
 		dqm->ops.initialize = initialize_nocpsch;
 		dqm->ops.uninitialize = uninitialize;
 		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
+		dqm->ops.process_termination = process_termination_nocpsch;
 		break;
 	default:
 		pr_err("Invalid scheduling policy %d\n", sched_policy);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 60d46ce0b8a7..31c2b1f9d320 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -77,6 +77,8 @@ struct device_process_node {
  * @set_cache_memory_policy: Sets memory policy (cached/ non cached) for the
  * memory apertures.
  *
+ * @process_termination: Clears all process queues belongs to that device.
+ *
  */
 
 struct device_queue_manager_ops {
@@ -120,6 +122,9 @@ struct device_queue_manager_ops {
 					   enum cache_policy alternate_policy,
 					   void __user *alternate_aperture_base,
 					   uint64_t alternate_aperture_size);
+
+	int (*process_termination)(struct device_queue_manager *dqm,
+			struct qcm_process_device *qpd);
 };
 
 struct device_queue_manager_asic_ops {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 8af0d6fe257a..2e13f5d46d25 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -421,6 +421,12 @@ struct qcm_process_device {
 	unsigned int queue_count;
 	unsigned int vmid;
 	bool is_debug;
+
+	/* This flag tells if we should reset all wavefronts on
+	 * process termination
+	 */
+	bool reset_wavefronts;
+
 	/*
 	 * All the memory management data should be here too
 	 */
@@ -454,6 +460,8 @@ struct kfd_process_device {
 	/* The device that owns this data. */
 	struct kfd_dev *dev;
 
+	/* The process that owns this kfd_process_device. */
+	struct kfd_process *process;
 
 	/* per-process-per device QCM data structure */
 	struct qcm_process_device qpd;
@@ -469,10 +477,12 @@ struct kfd_process_device {
 	/* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */
 	enum kfd_pdd_bound bound;
 
-	/* This flag tells if we should reset all
-	 * wavefronts on process termination
+	/* Flag used to tell the pdd has dequeued from the dqm.
+	 * This is used to prevent dev->dqm->ops.process_termination() from
+	 * being called twice when it is already called in IOMMU callback
+	 * function.
 	 */
-	bool reset_wavefronts;
+	bool already_dequeued;
 };
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
@@ -659,6 +669,8 @@ struct process_queue_node {
 	struct list_head process_queue_list;
 };
 
+void kfd_process_dequeue_from_device(struct kfd_process_device *pdd);
+void kfd_process_dequeue_from_all_devices(struct kfd_process *p);
 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p);
 void pqm_uninit(struct process_queue_manager *pqm);
 int pqm_create_queue(struct process_queue_manager *pqm,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 1325f88591ae..3ccb3b53216e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -171,9 +171,6 @@ static void kfd_process_wq_release(struct work_struct *work)
 		pr_debug("Releasing pdd (topology id %d) for process (pasid %d) in workqueue\n",
 				pdd->dev->id, p->pasid);
 
-		if (pdd->reset_wavefronts)
-			dbgdev_wave_reset_wavefronts(pdd->dev, p);
-
 		if (pdd->bound == PDD_BOUND)
 			amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid);
 
@@ -237,24 +234,17 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 
 	mutex_lock(&p->mutex);
 
-	/* In case our notifier is called before IOMMU notifier */
+	kfd_process_dequeue_from_all_devices(p);
 	pqm_uninit(&p->pqm);
 
 	/* Iterate over all process device data structure and check
-	 * if we should delete debug managers and reset all wavefronts
+	 * if we should delete debug managers
 	 */
-	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
 		if ((pdd->dev->dbgmgr) &&
 				(pdd->dev->dbgmgr->pasid == p->pasid))
 			kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
 
-		if (pdd->reset_wavefronts) {
-			pr_warn("Resetting all wave fronts\n");
-			dbgdev_wave_reset_wavefronts(pdd->dev, p);
-			pdd->reset_wavefronts = false;
-		}
-	}
-
 	mutex_unlock(&p->mutex);
 
 	/*
@@ -368,8 +358,9 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
 		INIT_LIST_HEAD(&pdd->qpd.queues_list);
 		INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
 		pdd->qpd.dqm = dev->dqm;
-		pdd->reset_wavefronts = false;
+		pdd->process = p;
 		pdd->bound = PDD_UNBOUND;
+		pdd->already_dequeued = false;
 		list_add(&pdd->per_device_list, &p->per_device_data);
 	}
 
@@ -498,19 +489,12 @@ void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid)
 	if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
 		kfd_dbgmgr_destroy(dev->dbgmgr);
 
-	pqm_uninit(&p->pqm);
-
 	pdd = kfd_get_process_device_data(dev, p);
-
-	if (!pdd) {
-		mutex_unlock(&p->mutex);
-		return;
-	}
-
-	if (pdd->reset_wavefronts) {
-		dbgdev_wave_reset_wavefronts(pdd->dev, p);
-		pdd->reset_wavefronts = false;
-	}
+	if (pdd)
+		/* For GPU relying on IOMMU, we need to dequeue here
+		 * when PASID is still bound.
+		 */
+		kfd_process_dequeue_from_device(pdd);
 
 	mutex_unlock(&p->mutex);
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 68fe0d99f6c2..31ec3ca9d46d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -63,6 +63,25 @@ static int find_available_queue_slot(struct process_queue_manager *pqm,
 	return 0;
 }
 
+void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
+{
+	struct kfd_dev *dev = pdd->dev;
+
+	if (pdd->already_dequeued)
+		return;
+
+	dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
+	pdd->already_dequeued = true;
+}
+
+void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
+{
+	struct kfd_process_device *pdd;
+
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
+		kfd_process_dequeue_from_device(pdd);
+}
+
 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
 {
 	INIT_LIST_HEAD(&pqm->queues);
@@ -78,21 +97,14 @@ int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p)
 
 void pqm_uninit(struct process_queue_manager *pqm)
 {
-	int retval;
 	struct process_queue_node *pqn, *next;
 
 	list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
-		retval = pqm_destroy_queue(
-				pqm,
-				(pqn->q != NULL) ?
-					pqn->q->properties.queue_id :
-					pqn->kq->queue->properties.queue_id);
-
-		if (retval != 0) {
-			pr_err("failed to destroy queue\n");
-			return;
-		}
+		uninit_queue(pqn->q);
+		list_del(&pqn->process_queue_list);
+		kfree(pqn);
 	}
+
 	kfree(pqm->queue_slot_bitmap);
 	pqm->queue_slot_bitmap = NULL;
 }
@@ -290,9 +302,6 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
 	if (pqn->q) {
 		dqm = pqn->q->device->dqm;
 		retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
-		if (retval != 0)
-			return retval;
-
 		uninit_queue(pqn->q);
 	}
 
-- 
cgit v1.2.3


From e6f791b1b068b168c6f5203f29040b972d7fbc20 Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Wed, 27 Sep 2017 00:09:53 -0400
Subject: drm/amdkfd: Compress unnecessary function parameters

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c               |  3 +--
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c                |  5 +++--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h                  |  2 --
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 10 +++-------
 4 files changed, 7 insertions(+), 13 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 660b3fbade41..0ef82b229754 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -282,8 +282,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
 			p->pasid,
 			dev->id);
 
-	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties,
-				0, q_properties.type, &queue_id);
+	err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
 	if (err != 0)
 		goto err_create_queue;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 7d5635fdd6e7..c407f6bd9956 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -184,9 +184,10 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
 	struct kernel_queue *kq = NULL;
 	int status;
 
+	properties.type = KFD_QUEUE_TYPE_DIQ;
+
 	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,
-				&properties, 0, KFD_QUEUE_TYPE_DIQ,
-				&qid);
+				&properties, &qid);
 
 	if (status) {
 		pr_err("Failed to create DIQ\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 2e13f5d46d25..7f141730de41 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -677,8 +677,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 			    struct kfd_dev *dev,
 			    struct file *f,
 			    struct queue_properties *properties,
-			    unsigned int flags,
-			    enum kfd_queue_type type,
 			    unsigned int *qid);
 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
 int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 31ec3ca9d46d..63c569b62cee 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -142,20 +142,17 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 			    struct kfd_dev *dev,
 			    struct file *f,
 			    struct queue_properties *properties,
-			    unsigned int flags,
-			    enum kfd_queue_type type,
 			    unsigned int *qid)
 {
 	int retval;
 	struct kfd_process_device *pdd;
-	struct queue_properties q_properties;
 	struct queue *q;
 	struct process_queue_node *pqn;
 	struct kernel_queue *kq;
 	int num_queues = 0;
 	struct queue *cur;
+	enum kfd_queue_type type = properties->type;
 
-	memcpy(&q_properties, properties, sizeof(struct queue_properties));
 	q = NULL;
 	kq = NULL;
 
@@ -206,7 +203,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 			goto err_create_queue;
 		}
 
-		retval = create_cp_queue(pqm, dev, &q, &q_properties, f, *qid);
+		retval = create_cp_queue(pqm, dev, &q, properties, f, *qid);
 		if (retval != 0)
 			goto err_create_queue;
 		pqn->q = q;
@@ -243,9 +240,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	list_add(&pqn->process_queue_list, &pqm->queues);
 
 	if (q) {
-		*properties = q->properties;
 		pr_debug("PQM done creating queue\n");
-		print_queue_properties(properties);
+		print_queue_properties(&q->properties);
 	}
 
 	return retval;
-- 
cgit v1.2.3


From bc920fd4f4350a2e3094c165a77798d721f39e7b Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Wed, 27 Sep 2017 00:09:54 -0400
Subject: drm/amdkfd: Clean up process queue management

Removed unused num_concurrent_processes.

Implemented counting of queues in QPD. This makes counting the queue
list repeatedly in several places unnecessary.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 5 +++++
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c        | 7 +------
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h                  | 1 -
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 9 ++-------
 4 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index dd7e44593cfe..c0685cd64d12 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -189,6 +189,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 	}
 
 	list_add(&q->list, &qpd->queues_list);
+	qpd->queue_count++;
 	if (q->properties.is_active)
 		dqm->queue_count++;
 
@@ -347,6 +348,7 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
 
 		deallocate_vmid(dqm, qpd, q);
 	}
+	qpd->queue_count--;
 	if (q->properties.is_active)
 		dqm->queue_count--;
 
@@ -856,6 +858,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 		goto out;
 
 	list_add(&q->list, &qpd->queues_list);
+	qpd->queue_count++;
 	if (q->properties.is_active) {
 		dqm->queue_count++;
 		retval = execute_queues_cpsch(dqm,
@@ -1014,6 +1017,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
 		dqm->sdma_queue_count--;
 
 	list_del(&q->list);
+	qpd->queue_count--;
 	if (q->properties.is_active)
 		dqm->queue_count--;
 
@@ -1204,6 +1208,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm,
 			goto out;
 		}
 		list_del(&q->list);
+		qpd->queue_count--;
 		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index e5a15babb4f3..5d1770e577e9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -140,8 +140,6 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
 				struct qcm_process_device *qpd)
 {
 	struct pm4_mes_map_process *packet;
-	struct queue *cur;
-	uint32_t num_queues;
 
 	packet = (struct pm4_mes_map_process *)buffer;
 
@@ -156,10 +154,7 @@ static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
 	packet->bitfields10.gds_size = qpd->gds_size;
 	packet->bitfields10.num_gws = qpd->num_gws;
 	packet->bitfields10.num_oac = qpd->num_oac;
-	num_queues = 0;
-	list_for_each_entry(cur, &qpd->queues_list, list)
-		num_queues++;
-	packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : num_queues;
+	packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
 
 	packet->sh_mem_config = qpd->sh_mem_config;
 	packet->sh_mem_bases = qpd->sh_mem_bases;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 7f141730de41..7d86ec9790d3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -405,7 +405,6 @@ struct scheduling_resources {
 struct process_queue_manager {
 	/* data */
 	struct kfd_process	*process;
-	unsigned int		num_concurrent_processes;
 	struct list_head	queues;
 	unsigned long		*queue_slot_bitmap;
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 63c569b62cee..88ad178bffb6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -149,8 +149,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	struct queue *q;
 	struct process_queue_node *pqn;
 	struct kernel_queue *kq;
-	int num_queues = 0;
-	struct queue *cur;
 	enum kfd_queue_type type = properties->type;
 
 	q = NULL;
@@ -168,11 +166,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	 * If we are just about to create DIQ, the is_debug flag is not set yet
 	 * Hence we also check the type as well
 	 */
-	if ((pdd->qpd.is_debug) ||
-		(type == KFD_QUEUE_TYPE_DIQ)) {
-		list_for_each_entry(cur, &pdd->qpd.queues_list, list)
-			num_queues++;
-		if (num_queues >= dev->device_info->max_no_of_hqd/2)
+	if ((pdd->qpd.is_debug) || (type == KFD_QUEUE_TYPE_DIQ)) {
+		if (pdd->qpd.queue_count >= dev->device_info->max_no_of_hqd/2)
 			return -ENOSPC;
 	}
 
-- 
cgit v1.2.3


From 9b56bb115460cee92a80bf85232b4b7da2f080e6 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 27 Oct 2017 19:35:19 -0400
Subject: drm/amdkfd: Don't dereference kfd_process.mm

The kfd_process doesn't own a reference to the mm_struct, so it can
disappear without warning even while the kfd_process still exists.

Therefore, avoid dereferencing the kfd_process.mm pointer and make
it opaque. Use get_task_mm to get a temporary reference to the mm
when it's needed.

v2: removed unnecessary WARN_ON

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_events.c  | 19 +++++++++++++++----
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  7 ++++++-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  1 -
 3 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 944abfad39c1..61ce5476c055 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -24,8 +24,8 @@
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/sched/signal.h>
+#include <linux/sched/mm.h>
 #include <linux/uaccess.h>
-#include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/memory.h>
 #include "kfd_priv.h"
@@ -904,14 +904,24 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
 	 * running so the lookup function returns a locked process.
 	 */
 	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
+	struct mm_struct *mm;
 
 	if (!p)
 		return; /* Presumably process exited. */
 
+	/* Take a safe reference to the mm_struct, which may otherwise
+	 * disappear even while the kfd_process is still referenced.
+	 */
+	mm = get_task_mm(p->lead_thread);
+	if (!mm) {
+		mutex_unlock(&p->mutex);
+		return; /* Process is exiting */
+	}
+
 	memset(&memory_exception_data, 0, sizeof(memory_exception_data));
 
-	down_read(&p->mm->mmap_sem);
-	vma = find_vma(p->mm, address);
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, address);
 
 	memory_exception_data.gpu_id = dev->id;
 	memory_exception_data.va = address;
@@ -937,7 +947,8 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid,
 		}
 	}
 
-	up_read(&p->mm->mmap_sem);
+	up_read(&mm->mmap_sem);
+	mmput(mm);
 
 	mutex_lock(&p->event_mutex);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 7d86ec9790d3..1a483a7ecd4e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -494,7 +494,12 @@ struct kfd_process {
 	 */
 	struct hlist_node kfd_processes;
 
-	struct mm_struct *mm;
+	/*
+	 * Opaque pointer to mm_struct. We don't hold a reference to
+	 * it so it should never be dereferenced from here. This is
+	 * only used for looking up processes by their mm.
+	 */
+	void *mm;
 
 	struct mutex mutex;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 3ccb3b53216e..695fa2ae8e5b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -200,7 +200,6 @@ static void kfd_process_destroy_delayed(struct rcu_head *rcu)
 	struct kfd_process *p;
 
 	p = container_of(rcu, struct kfd_process, rcu);
-	WARN_ON(atomic_read(&p->mm->mm_count) <= 0);
 
 	mmdrop(p->mm);
 
-- 
cgit v1.2.3


From fdf0c8332a0309ac619e22e82b6014c77b2a3518 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 27 Oct 2017 19:35:22 -0400
Subject: drm/amdkfd: Clean up kfd_wait_on_events

Cleaned up the code while resolving some potential bugs and
inconsistencies in the process.

Clean-ups:
* Remove enum kfd_event_wait_result, which duplicates
  KFD_IOC_EVENT_RESULT definitions
* alloc_event_waiters can be called without holding p->event_mutex
* Return an error code from copy_signaled_event_data instead of bool
* Clean up error handling code paths to minimize duplication in
  kfd_wait_on_events

Fixes:
* Consistently return an error code from kfd_wait_on_events and set
  wait_result to KFD_IOC_WAIT_RESULT_FAIL in all failure cases.
* Always call free_waiters while holding p->event_mutex
* copy_signaled_event_data might sleep. Don't call it while the task state
  is TASK_INTERRUPTIBLE.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  5 +--
 drivers/gpu/drm/amd/amdkfd/kfd_events.c  | 71 ++++++++++++++------------------
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  8 +---
 3 files changed, 32 insertions(+), 52 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 0ef82b229754..a25321ff448f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -835,15 +835,12 @@ static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
 				void *data)
 {
 	struct kfd_ioctl_wait_events_args *args = data;
-	enum kfd_event_wait_result wait_result;
 	int err;
 
 	err = kfd_wait_on_events(p, args->num_events,
 			(void __user *)args->events_ptr,
 			(args->wait_for_all != 0),
-			args->timeout, &wait_result);
-
-	args->wait_result = wait_result;
+			args->timeout, &args->wait_result);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 1efd6a8c2a40..33cafbb96520 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -668,7 +668,7 @@ static bool test_event_condition(bool all, uint32_t num_events,
  * Copy event specific data, if defined.
  * Currently only memory exception events have additional data to copy to user
  */
-static bool copy_signaled_event_data(uint32_t num_events,
+static int copy_signaled_event_data(uint32_t num_events,
 		struct kfd_event_waiter *event_waiters,
 		struct kfd_event_data __user *data)
 {
@@ -686,11 +686,11 @@ static bool copy_signaled_event_data(uint32_t num_events,
 			src = &event->memory_exception_data;
 			if (copy_to_user(dst, src,
 				sizeof(struct kfd_hsa_memory_exception_data)))
-				return false;
+				return -EFAULT;
 		}
 	}
 
-	return true;
+	return 0;
 
 }
 
@@ -727,7 +727,7 @@ static void free_waiters(uint32_t num_events, struct kfd_event_waiter *waiters)
 int kfd_wait_on_events(struct kfd_process *p,
 		       uint32_t num_events, void __user *data,
 		       bool all, uint32_t user_timeout_ms,
-		       enum kfd_event_wait_result *wait_result)
+		       uint32_t *wait_result)
 {
 	struct kfd_event_data __user *events =
 			(struct kfd_event_data __user *) data;
@@ -737,18 +737,18 @@ int kfd_wait_on_events(struct kfd_process *p,
 	struct kfd_event_waiter *event_waiters = NULL;
 	long timeout = user_timeout_to_jiffies(user_timeout_ms);
 
+	event_waiters = alloc_event_waiters(num_events);
+	if (!event_waiters) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
 	mutex_lock(&p->event_mutex);
 
 	/* Set to something unreasonable - this is really
 	 * just a bool for now.
 	 */
-	*wait_result = KFD_WAIT_TIMEOUT;
-
-	event_waiters = alloc_event_waiters(num_events);
-	if (!event_waiters) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	*wait_result = KFD_IOC_WAIT_RESULT_TIMEOUT;
 
 	for (i = 0; i < num_events; i++) {
 		struct kfd_event_data event_data;
@@ -756,23 +756,21 @@ int kfd_wait_on_events(struct kfd_process *p,
 		if (copy_from_user(&event_data, &events[i],
 				sizeof(struct kfd_event_data))) {
 			ret = -EFAULT;
-			goto fail;
+			goto out_unlock;
 		}
 
 		ret = init_event_waiter_get_status(p, &event_waiters[i],
 				event_data.event_id, i);
 		if (ret)
-			goto fail;
+			goto out_unlock;
 	}
 
 	/* Check condition once. */
 	if (test_event_condition(all, num_events, event_waiters)) {
-		if (copy_signaled_event_data(num_events,
-				event_waiters, events))
-			*wait_result = KFD_WAIT_COMPLETE;
-		else
-			*wait_result = KFD_WAIT_ERROR;
-		free_waiters(num_events, event_waiters);
+		*wait_result = KFD_IOC_WAIT_RESULT_COMPLETE;
+		ret = copy_signaled_event_data(num_events,
+					       event_waiters, events);
+		goto out_unlock;
 	} else {
 		/* Add to wait lists if we need to wait. */
 		for (i = 0; i < num_events; i++)
@@ -781,12 +779,6 @@ int kfd_wait_on_events(struct kfd_process *p,
 
 	mutex_unlock(&p->event_mutex);
 
-	/* Return if all waits were already satisfied. */
-	if (*wait_result != KFD_WAIT_TIMEOUT) {
-		__set_current_state(TASK_RUNNING);
-		return ret;
-	}
-
 	while (true) {
 		if (fatal_signal_pending(current)) {
 			ret = -EINTR;
@@ -818,16 +810,12 @@ int kfd_wait_on_events(struct kfd_process *p,
 		set_current_state(TASK_INTERRUPTIBLE);
 
 		if (test_event_condition(all, num_events, event_waiters)) {
-			if (copy_signaled_event_data(num_events,
-					event_waiters, events))
-				*wait_result = KFD_WAIT_COMPLETE;
-			else
-				*wait_result = KFD_WAIT_ERROR;
+			*wait_result = KFD_IOC_WAIT_RESULT_COMPLETE;
 			break;
 		}
 
 		if (timeout <= 0) {
-			*wait_result = KFD_WAIT_TIMEOUT;
+			*wait_result = KFD_IOC_WAIT_RESULT_TIMEOUT;
 			break;
 		}
 
@@ -835,19 +823,20 @@ int kfd_wait_on_events(struct kfd_process *p,
 	}
 	__set_current_state(TASK_RUNNING);
 
+	/* copy_signaled_event_data may sleep. So this has to happen
+	 * after the task state is set back to RUNNING.
+	 */
+	if (!ret && *wait_result == KFD_IOC_WAIT_RESULT_COMPLETE)
+		ret = copy_signaled_event_data(num_events,
+					       event_waiters, events);
+
 	mutex_lock(&p->event_mutex);
+out_unlock:
 	free_waiters(num_events, event_waiters);
 	mutex_unlock(&p->event_mutex);
-
-	return ret;
-
-fail:
-	if (event_waiters)
-		free_waiters(num_events, event_waiters);
-
-	mutex_unlock(&p->event_mutex);
-
-	*wait_result = KFD_WAIT_ERROR;
+out:
+	if (ret)
+		*wait_result = KFD_IOC_WAIT_RESULT_FAIL;
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 1a483a7ecd4e..d3cf53a50ced 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -726,19 +726,13 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
 extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
 extern const struct kfd_device_global_init_class device_global_init_class_cik;
 
-enum kfd_event_wait_result {
-	KFD_WAIT_COMPLETE,
-	KFD_WAIT_TIMEOUT,
-	KFD_WAIT_ERROR
-};
-
 void kfd_event_init_process(struct kfd_process *p);
 void kfd_event_free_process(struct kfd_process *p);
 int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma);
 int kfd_wait_on_events(struct kfd_process *p,
 		       uint32_t num_events, void __user *data,
 		       bool all, uint32_t user_timeout_ms,
-		       enum kfd_event_wait_result *wait_result);
+		       uint32_t *wait_result);
 void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
 				uint32_t valid_id_bits);
 void kfd_signal_iommu_event(struct kfd_dev *dev,
-- 
cgit v1.2.3


From 50cb7dd94cb43a6204813376e1be1d21780b71fb Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 27 Oct 2017 19:35:26 -0400
Subject: drm/amdkfd: Simplify events page allocator

The first event page is always big enough to handle all events.
Handling of multiple events pages is not supported by user mode, and
not necessary.

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_events.c | 197 +++++++++++---------------------
 drivers/gpu/drm/amd/amdkfd/kfd_events.h |   1 -
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |   4 +-
 3 files changed, 70 insertions(+), 132 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 7dae26f0e11a..7cc17107f448 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -41,6 +41,9 @@ struct kfd_event_waiter {
 	bool activated;		 /* Becomes true when event is signaled */
 };
 
+#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
+#define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE)
+
 /*
  * Over-complicated pooled allocator for event notification slots.
  *
@@ -51,132 +54,98 @@ struct kfd_event_waiter {
  * Individual signal events are then allocated a slot in a page.
  */
 
-struct signal_page {
-	struct list_head event_pages;	/* kfd_process.signal_event_pages */
+struct kfd_signal_page {
 	uint64_t *kernel_address;
 	uint64_t __user *user_address;
-	uint32_t page_index;		/* Index into the mmap aperture. */
 	unsigned int free_slots;
-	unsigned long used_slot_bitmap[0];
+	unsigned long used_slot_bitmap[SLOT_BITMAP_LONGS];
 };
 
-#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
-#define SLOT_BITMAP_SIZE BITS_TO_LONGS(SLOTS_PER_PAGE)
-#define BITS_PER_PAGE (ilog2(SLOTS_PER_PAGE)+1)
-#define SIGNAL_PAGE_SIZE (sizeof(struct signal_page) + \
-				SLOT_BITMAP_SIZE * sizeof(long))
-
 /*
  * For signal events, the event ID is used as the interrupt user data.
  * For SQ s_sendmsg interrupts, this is limited to 8 bits.
  */
 
 #define INTERRUPT_DATA_BITS 8
-#define SIGNAL_EVENT_ID_SLOT_SHIFT 0
 
-static uint64_t *page_slots(struct signal_page *page)
+static uint64_t *page_slots(struct kfd_signal_page *page)
 {
 	return page->kernel_address;
 }
 
 static bool allocate_free_slot(struct kfd_process *process,
-				struct signal_page **out_page,
-				unsigned int *out_slot_index)
+			       unsigned int *out_slot_index)
 {
-	struct signal_page *page;
+	struct kfd_signal_page *page = process->signal_page;
+	unsigned int slot;
 
-	list_for_each_entry(page, &process->signal_event_pages, event_pages) {
-		if (page->free_slots > 0) {
-			unsigned int slot =
-				find_first_zero_bit(page->used_slot_bitmap,
-							SLOTS_PER_PAGE);
+	if (!page || page->free_slots == 0) {
+		pr_debug("No free event signal slots were found for process %p\n",
+			 process);
 
-			__set_bit(slot, page->used_slot_bitmap);
-			page->free_slots--;
+		return false;
+	}
 
-			page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT;
+	slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE);
 
-			*out_page = page;
-			*out_slot_index = slot;
+	__set_bit(slot, page->used_slot_bitmap);
+	page->free_slots--;
 
-			pr_debug("Allocated event signal slot in page %p, slot %d\n",
-					page, slot);
+	page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT;
 
-			return true;
-		}
-	}
+	*out_slot_index = slot;
 
-	pr_debug("No free event signal slots were found for process %p\n",
-			process);
+	pr_debug("Allocated event signal slot in page %p, slot %d\n",
+		 page, slot);
 
-	return false;
+	return true;
 }
 
-#define list_tail_entry(head, type, member) \
-	list_entry((head)->prev, type, member)
-
-static bool allocate_signal_page(struct file *devkfd, struct kfd_process *p)
+static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
 {
 	void *backing_store;
-	struct signal_page *page;
+	struct kfd_signal_page *page;
 
-	page = kzalloc(SIGNAL_PAGE_SIZE, GFP_KERNEL);
+	page = kzalloc(sizeof(*page), GFP_KERNEL);
 	if (!page)
-		goto fail_alloc_signal_page;
+		return NULL;
 
 	page->free_slots = SLOTS_PER_PAGE;
 
-	backing_store = (void *) __get_free_pages(GFP_KERNEL | __GFP_ZERO,
+	backing_store = (void *) __get_free_pages(GFP_KERNEL,
 					get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
 	if (!backing_store)
 		goto fail_alloc_signal_store;
 
-	/* prevent user-mode info leaks */
+	/* Initialize all events to unsignaled */
 	memset(backing_store, (uint8_t) UNSIGNALED_EVENT_SLOT,
-		KFD_SIGNAL_EVENT_LIMIT * 8);
+	       KFD_SIGNAL_EVENT_LIMIT * 8);
 
 	page->kernel_address = backing_store;
-
-	if (list_empty(&p->signal_event_pages))
-		page->page_index = 0;
-	else
-		page->page_index = list_tail_entry(&p->signal_event_pages,
-						   struct signal_page,
-						   event_pages)->page_index + 1;
-
 	pr_debug("Allocated new event signal page at %p, for process %p\n",
 			page, p);
-	pr_debug("Page index is %d\n", page->page_index);
-
-	list_add(&page->event_pages, &p->signal_event_pages);
 
-	return true;
+	return page;
 
 fail_alloc_signal_store:
 	kfree(page);
-fail_alloc_signal_page:
-	return false;
+	return NULL;
 }
 
-static bool allocate_event_notification_slot(struct file *devkfd,
-					struct kfd_process *p,
-					struct signal_page **page,
-					unsigned int *signal_slot_index)
+static bool allocate_event_notification_slot(struct kfd_process *p,
+					     unsigned int *signal_slot_index)
 {
-	bool ret;
-
-	ret = allocate_free_slot(p, page, signal_slot_index);
-	if (!ret) {
-		ret = allocate_signal_page(devkfd, p);
-		if (ret)
-			ret = allocate_free_slot(p, page, signal_slot_index);
+	if (!p->signal_page) {
+		p->signal_page = allocate_signal_page(p);
+		if (!p->signal_page)
+			return false;
 	}
 
-	return ret;
+	return allocate_free_slot(p, signal_slot_index);
 }
 
 /* Assumes that the process's event_mutex is locked. */
-static void release_event_notification_slot(struct signal_page *page,
+static void release_event_notification_slot(struct kfd_signal_page *page,
 						size_t slot_index)
 {
 	__clear_bit(slot_index, page->used_slot_bitmap);
@@ -187,22 +156,6 @@ static void release_event_notification_slot(struct signal_page *page,
 	 */
 }
 
-static struct signal_page *lookup_signal_page_by_index(struct kfd_process *p,
-						unsigned int page_index)
-{
-	struct signal_page *page;
-
-	/*
-	 * This is safe because we don't delete signal pages until the
-	 * process exits.
-	 */
-	list_for_each_entry(page, &p->signal_event_pages, event_pages)
-		if (page->page_index == page_index)
-			return page;
-
-	return NULL;
-}
-
 /*
  * Assumes that p->event_mutex is held and of course that p is not going
  * away (current or locked).
@@ -218,13 +171,6 @@ static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
 	return NULL;
 }
 
-static u32 make_signal_event_id(struct signal_page *page,
-					 unsigned int signal_slot_index)
-{
-	return page->page_index |
-			(signal_slot_index << SIGNAL_EVENT_ID_SLOT_SHIFT);
-}
-
 /*
  * Produce a kfd event id for a nonsignal event.
  * These are arbitrary numbers, so we do a sequential search through
@@ -270,10 +216,9 @@ static u32 make_nonsignal_event_id(struct kfd_process *p)
 }
 
 static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p,
-						struct signal_page *page,
 						unsigned int signal_slot)
 {
-	return lookup_event_by_id(p, make_signal_event_id(page, signal_slot));
+	return lookup_event_by_id(p, signal_slot);
 }
 
 static int create_signal_event(struct file *devkfd,
@@ -288,8 +233,7 @@ static int create_signal_event(struct file *devkfd,
 		return -ENOMEM;
 	}
 
-	if (!allocate_event_notification_slot(devkfd, p, &ev->signal_page,
-						&ev->signal_slot_index)) {
+	if (!allocate_event_notification_slot(p, &ev->signal_slot_index)) {
 		pr_warn("Signal event wasn't created because out of kernel memory\n");
 		return -ENOMEM;
 	}
@@ -297,10 +241,9 @@ static int create_signal_event(struct file *devkfd,
 	p->signal_event_count++;
 
 	ev->user_signal_address =
-			&ev->signal_page->user_address[ev->signal_slot_index];
+			&p->signal_page->user_address[ev->signal_slot_index];
 
-	ev->event_id = make_signal_event_id(ev->signal_page,
-						ev->signal_slot_index);
+	ev->event_id = ev->signal_slot_index;
 
 	pr_debug("Signal event number %zu created with id %d, address %p\n",
 			p->signal_event_count, ev->event_id,
@@ -327,7 +270,7 @@ void kfd_event_init_process(struct kfd_process *p)
 {
 	mutex_init(&p->event_mutex);
 	hash_init(p->events);
-	INIT_LIST_HEAD(&p->signal_event_pages);
+	p->signal_page = NULL;
 	p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
 	p->signal_event_count = 0;
 }
@@ -341,8 +284,9 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
 		waiter->event = NULL;
 	wake_up_all(&ev->wq);
 
-	if (ev->signal_page) {
-		release_event_notification_slot(ev->signal_page,
+	if ((ev->type == KFD_EVENT_TYPE_SIGNAL ||
+	     ev->type == KFD_EVENT_TYPE_DEBUG) && p->signal_page) {
+		release_event_notification_slot(p->signal_page,
 						ev->signal_slot_index);
 		p->signal_event_count--;
 	}
@@ -365,12 +309,11 @@ static void destroy_events(struct kfd_process *p)
  * We assume that the process is being destroyed and there is no need to
  * unmap the pages or keep bookkeeping data in order.
  */
-static void shutdown_signal_pages(struct kfd_process *p)
+static void shutdown_signal_page(struct kfd_process *p)
 {
-	struct signal_page *page, *tmp;
+	struct kfd_signal_page *page = p->signal_page;
 
-	list_for_each_entry_safe(page, tmp, &p->signal_event_pages,
-					event_pages) {
+	if (page) {
 		free_pages((unsigned long)page->kernel_address,
 				get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
 		kfree(page);
@@ -380,7 +323,7 @@ static void shutdown_signal_pages(struct kfd_process *p)
 void kfd_event_free_process(struct kfd_process *p)
 {
 	destroy_events(p);
-	shutdown_signal_pages(p);
+	shutdown_signal_page(p);
 }
 
 static bool event_can_be_gpu_signaled(const struct kfd_event *ev)
@@ -420,8 +363,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
 	case KFD_EVENT_TYPE_DEBUG:
 		ret = create_signal_event(devkfd, p, ev);
 		if (!ret) {
-			*event_page_offset = (ev->signal_page->page_index |
-					KFD_MMAP_EVENTS_MASK);
+			*event_page_offset = KFD_MMAP_EVENTS_MASK;
 			*event_page_offset <<= PAGE_SHIFT;
 			*event_slot_index = ev->signal_slot_index;
 		}
@@ -527,13 +469,17 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
 
 static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
 {
-	page_slots(ev->signal_page)[ev->signal_slot_index] =
+	page_slots(p->signal_page)[ev->signal_slot_index] =
 						UNSIGNALED_EVENT_SLOT;
 }
 
-static bool is_slot_signaled(struct signal_page *page, unsigned int index)
+static bool is_slot_signaled(struct kfd_process *p, unsigned int index)
 {
-	return page_slots(page)[index] != UNSIGNALED_EVENT_SLOT;
+	if (!p->signal_page)
+		return false;
+	else
+		return page_slots(p->signal_page)[index] !=
+			UNSIGNALED_EVENT_SLOT;
 }
 
 static void set_event_from_interrupt(struct kfd_process *p,
@@ -566,22 +512,19 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
 		/* Partial ID is a full ID. */
 		ev = lookup_event_by_id(p, partial_id);
 		set_event_from_interrupt(p, ev);
-	} else {
+	} else if (p->signal_page) {
 		/*
 		 * Partial ID is in fact partial. For now we completely
 		 * ignore it, but we could use any bits we did receive to
 		 * search faster.
 		 */
-		struct signal_page *page;
 		unsigned int i;
 
-		list_for_each_entry(page, &p->signal_event_pages, event_pages)
-			for (i = 0; i < SLOTS_PER_PAGE; i++)
-				if (is_slot_signaled(page, i)) {
-					ev = lookup_event_by_page_slot(p,
-								page, i);
-					set_event_from_interrupt(p, ev);
-				}
+		for (i = 0; i < SLOTS_PER_PAGE; i++)
+			if (is_slot_signaled(p, i)) {
+				ev = lookup_event_by_page_slot(p, i);
+				set_event_from_interrupt(p, ev);
+			}
 	}
 
 	mutex_unlock(&p->event_mutex);
@@ -846,9 +789,8 @@ out:
 int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
 {
 
-	unsigned int page_index;
 	unsigned long pfn;
-	struct signal_page *page;
+	struct kfd_signal_page *page;
 
 	/* check required size is logical */
 	if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) !=
@@ -857,13 +799,10 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
 		return -EINVAL;
 	}
 
-	page_index = vma->vm_pgoff;
-
-	page = lookup_signal_page_by_index(p, page_index);
+	page = p->signal_page;
 	if (!page) {
 		/* Probably KFD bug, but mmap is user-accessible. */
-		pr_debug("Signal page could not be found for page_index %u\n",
-				page_index);
+		pr_debug("Signal page could not be found\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
index 96f9122805fa..f85fcee4414b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -60,7 +60,6 @@ struct kfd_event {
 	wait_queue_head_t wq; /* List of event waiters. */
 
 	/* Only for signal events. */
-	struct signal_page *signal_page;
 	unsigned int signal_slot_index;
 	uint64_t __user *user_signal_address;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index d3cf53a50ced..c1b3ee22faed 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -540,8 +540,8 @@ struct kfd_process {
 	struct mutex event_mutex;
 	/* All events in process hashed by ID, linked on kfd_event.events. */
 	DECLARE_HASHTABLE(events, 4);
-	/* struct slot_page_header.event_pages */
-	struct list_head signal_event_pages;
+	/* Event page */
+	struct kfd_signal_page *signal_page;
 	u32 next_nonsignal_event_id;
 	size_t signal_event_count;
 	bool signal_event_limit_reached;
-- 
cgit v1.2.3


From 482f07775cf559c82cb3d086e3c4fad91582e4cb Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 27 Oct 2017 19:35:27 -0400
Subject: drm/amdkfd: Simplify event ID and signal slot management

Signal slots are identical to event IDs.

Replace the used_slot_bitmap and events hash table with an IDR to
allocate and lookup event IDs and signal slots more efficiently.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_events.c | 230 ++++++++++----------------------
 drivers/gpu/drm/amd/amdkfd/kfd_events.h |  14 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |   6 +-
 3 files changed, 80 insertions(+), 170 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 7cc17107f448..41580e0638f3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -41,24 +41,16 @@ struct kfd_event_waiter {
 	bool activated;		 /* Becomes true when event is signaled */
 };
 
-#define SLOTS_PER_PAGE KFD_SIGNAL_EVENT_LIMIT
-#define SLOT_BITMAP_LONGS BITS_TO_LONGS(SLOTS_PER_PAGE)
-
 /*
- * Over-complicated pooled allocator for event notification slots.
- *
  * Each signal event needs a 64-bit signal slot where the signaler will write
- * a 1 before sending an interrupt.l (This is needed because some interrupts
+ * a 1 before sending an interrupt. (This is needed because some interrupts
  * do not contain enough spare data bits to identify an event.)
- * We get whole pages from vmalloc and map them to the process VA.
- * Individual signal events are then allocated a slot in a page.
+ * We get whole pages and map them to the process VA.
+ * Individual signal events use their event_id as slot index.
  */
-
 struct kfd_signal_page {
 	uint64_t *kernel_address;
 	uint64_t __user *user_address;
-	unsigned int free_slots;
-	unsigned long used_slot_bitmap[SLOT_BITMAP_LONGS];
 };
 
 /*
@@ -73,34 +65,6 @@ static uint64_t *page_slots(struct kfd_signal_page *page)
 	return page->kernel_address;
 }
 
-static bool allocate_free_slot(struct kfd_process *process,
-			       unsigned int *out_slot_index)
-{
-	struct kfd_signal_page *page = process->signal_page;
-	unsigned int slot;
-
-	if (!page || page->free_slots == 0) {
-		pr_debug("No free event signal slots were found for process %p\n",
-			 process);
-
-		return false;
-	}
-
-	slot = find_first_zero_bit(page->used_slot_bitmap, SLOTS_PER_PAGE);
-
-	__set_bit(slot, page->used_slot_bitmap);
-	page->free_slots--;
-
-	page_slots(page)[slot] = UNSIGNALED_EVENT_SLOT;
-
-	*out_slot_index = slot;
-
-	pr_debug("Allocated event signal slot in page %p, slot %d\n",
-		 page, slot);
-
-	return true;
-}
-
 static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
 {
 	void *backing_store;
@@ -110,8 +74,6 @@ static struct kfd_signal_page *allocate_signal_page(struct kfd_process *p)
 	if (!page)
 		return NULL;
 
-	page->free_slots = SLOTS_PER_PAGE;
-
 	backing_store = (void *) __get_free_pages(GFP_KERNEL,
 					get_order(KFD_SIGNAL_EVENT_LIMIT * 8));
 	if (!backing_store)
@@ -132,28 +94,26 @@ fail_alloc_signal_store:
 	return NULL;
 }
 
-static bool allocate_event_notification_slot(struct kfd_process *p,
-					     unsigned int *signal_slot_index)
+static int allocate_event_notification_slot(struct kfd_process *p,
+					    struct kfd_event *ev)
 {
+	int id;
+
 	if (!p->signal_page) {
 		p->signal_page = allocate_signal_page(p);
 		if (!p->signal_page)
-			return false;
+			return -ENOMEM;
 	}
 
-	return allocate_free_slot(p, signal_slot_index);
-}
+	id = idr_alloc(&p->event_idr, ev, 0, KFD_SIGNAL_EVENT_LIMIT,
+		       GFP_KERNEL);
+	if (id < 0)
+		return id;
 
-/* Assumes that the process's event_mutex is locked. */
-static void release_event_notification_slot(struct kfd_signal_page *page,
-						size_t slot_index)
-{
-	__clear_bit(slot_index, page->used_slot_bitmap);
-	page->free_slots++;
+	ev->event_id = id;
+	page_slots(p->signal_page)[id] = UNSIGNALED_EVENT_SLOT;
 
-	/* We don't free signal pages, they are retained by the process
-	 * and reused until it exits.
-	 */
+	return 0;
 }
 
 /*
@@ -162,89 +122,32 @@ static void release_event_notification_slot(struct kfd_signal_page *page,
  */
 static struct kfd_event *lookup_event_by_id(struct kfd_process *p, uint32_t id)
 {
-	struct kfd_event *ev;
-
-	hash_for_each_possible(p->events, ev, events, id)
-		if (ev->event_id == id)
-			return ev;
-
-	return NULL;
-}
-
-/*
- * Produce a kfd event id for a nonsignal event.
- * These are arbitrary numbers, so we do a sequential search through
- * the hash table for an unused number.
- */
-static u32 make_nonsignal_event_id(struct kfd_process *p)
-{
-	u32 id;
-
-	for (id = p->next_nonsignal_event_id;
-		id < KFD_LAST_NONSIGNAL_EVENT_ID &&
-		lookup_event_by_id(p, id);
-		id++)
-		;
-
-	if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
-
-		/*
-		 * What if id == LAST_NONSIGNAL_EVENT_ID - 1?
-		 * Then next_nonsignal_event_id = LAST_NONSIGNAL_EVENT_ID so
-		 * the first loop fails immediately and we proceed with the
-		 * wraparound loop below.
-		 */
-		p->next_nonsignal_event_id = id + 1;
-
-		return id;
-	}
-
-	for (id = KFD_FIRST_NONSIGNAL_EVENT_ID;
-		id < KFD_LAST_NONSIGNAL_EVENT_ID &&
-		lookup_event_by_id(p, id);
-		id++)
-		;
-
-
-	if (id < KFD_LAST_NONSIGNAL_EVENT_ID) {
-		p->next_nonsignal_event_id = id + 1;
-		return id;
-	}
-
-	p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
-	return 0;
-}
-
-static struct kfd_event *lookup_event_by_page_slot(struct kfd_process *p,
-						unsigned int signal_slot)
-{
-	return lookup_event_by_id(p, signal_slot);
+	return idr_find(&p->event_idr, id);
 }
 
 static int create_signal_event(struct file *devkfd,
 				struct kfd_process *p,
 				struct kfd_event *ev)
 {
+	int ret;
+
 	if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) {
 		if (!p->signal_event_limit_reached) {
 			pr_warn("Signal event wasn't created because limit was reached\n");
 			p->signal_event_limit_reached = true;
 		}
-		return -ENOMEM;
+		return -ENOSPC;
 	}
 
-	if (!allocate_event_notification_slot(p, &ev->signal_slot_index)) {
+	ret = allocate_event_notification_slot(p, ev);
+	if (ret) {
 		pr_warn("Signal event wasn't created because out of kernel memory\n");
-		return -ENOMEM;
+		return ret;
 	}
 
 	p->signal_event_count++;
 
-	ev->user_signal_address =
-			&p->signal_page->user_address[ev->signal_slot_index];
-
-	ev->event_id = ev->signal_slot_index;
-
+	ev->user_signal_address = &p->signal_page->user_address[ev->event_id];
 	pr_debug("Signal event number %zu created with id %d, address %p\n",
 			p->signal_event_count, ev->event_id,
 			ev->user_signal_address);
@@ -252,16 +155,20 @@ static int create_signal_event(struct file *devkfd,
 	return 0;
 }
 
-/*
- * No non-signal events are supported yet.
- * We create them as events that never signal.
- * Set event calls from user-mode are failed.
- */
 static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
 {
-	ev->event_id = make_nonsignal_event_id(p);
-	if (ev->event_id == 0)
-		return -ENOMEM;
+	/* Cast KFD_LAST_NONSIGNAL_EVENT to uint32_t. This allows an
+	 * intentional integer overflow to -1 without a compiler
+	 * warning. idr_alloc treats a negative value as "maximum
+	 * signed integer".
+	 */
+	int id = idr_alloc(&p->event_idr, ev, KFD_FIRST_NONSIGNAL_EVENT_ID,
+			   (uint32_t)KFD_LAST_NONSIGNAL_EVENT_ID + 1,
+			   GFP_KERNEL);
+
+	if (id < 0)
+		return id;
+	ev->event_id = id;
 
 	return 0;
 }
@@ -269,9 +176,8 @@ static int create_other_event(struct kfd_process *p, struct kfd_event *ev)
 void kfd_event_init_process(struct kfd_process *p)
 {
 	mutex_init(&p->event_mutex);
-	hash_init(p->events);
+	idr_init(&p->event_idr);
 	p->signal_page = NULL;
-	p->next_nonsignal_event_id = KFD_FIRST_NONSIGNAL_EVENT_ID;
 	p->signal_event_count = 0;
 }
 
@@ -284,25 +190,22 @@ static void destroy_event(struct kfd_process *p, struct kfd_event *ev)
 		waiter->event = NULL;
 	wake_up_all(&ev->wq);
 
-	if ((ev->type == KFD_EVENT_TYPE_SIGNAL ||
-	     ev->type == KFD_EVENT_TYPE_DEBUG) && p->signal_page) {
-		release_event_notification_slot(p->signal_page,
-						ev->signal_slot_index);
+	if (ev->type == KFD_EVENT_TYPE_SIGNAL ||
+	    ev->type == KFD_EVENT_TYPE_DEBUG)
 		p->signal_event_count--;
-	}
 
-	hash_del(&ev->events);
+	idr_remove(&p->event_idr, ev->event_id);
 	kfree(ev);
 }
 
 static void destroy_events(struct kfd_process *p)
 {
 	struct kfd_event *ev;
-	struct hlist_node *tmp;
-	unsigned int hash_bkt;
+	uint32_t id;
 
-	hash_for_each_safe(p->events, hash_bkt, tmp, ev, events)
+	idr_for_each_entry(&p->event_idr, ev, id)
 		destroy_event(p, ev);
+	idr_destroy(&p->event_idr);
 }
 
 /*
@@ -365,7 +268,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
 		if (!ret) {
 			*event_page_offset = KFD_MMAP_EVENTS_MASK;
 			*event_page_offset <<= PAGE_SHIFT;
-			*event_slot_index = ev->signal_slot_index;
+			*event_slot_index = ev->event_id;
 		}
 		break;
 	default:
@@ -374,8 +277,6 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
 	}
 
 	if (!ret) {
-		hash_add(p->events, &ev->events, ev->event_id);
-
 		*event_id = ev->event_id;
 		*event_trigger_data = ev->event_id;
 	} else {
@@ -469,17 +370,7 @@ int kfd_reset_event(struct kfd_process *p, uint32_t event_id)
 
 static void acknowledge_signal(struct kfd_process *p, struct kfd_event *ev)
 {
-	page_slots(p->signal_page)[ev->signal_slot_index] =
-						UNSIGNALED_EVENT_SLOT;
-}
-
-static bool is_slot_signaled(struct kfd_process *p, unsigned int index)
-{
-	if (!p->signal_page)
-		return false;
-	else
-		return page_slots(p->signal_page)[index] !=
-			UNSIGNALED_EVENT_SLOT;
+	page_slots(p->signal_page)[ev->event_id] = UNSIGNALED_EVENT_SLOT;
 }
 
 static void set_event_from_interrupt(struct kfd_process *p,
@@ -518,13 +409,31 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
 		 * ignore it, but we could use any bits we did receive to
 		 * search faster.
 		 */
-		unsigned int i;
+		uint64_t *slots = page_slots(p->signal_page);
+		uint32_t id;
+
+		if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
+			/* With relatively few events, it's faster to
+			 * iterate over the event IDR
+			 */
+			idr_for_each_entry(&p->event_idr, ev, id) {
+				if (id >= KFD_SIGNAL_EVENT_LIMIT)
+					break;
 
-		for (i = 0; i < SLOTS_PER_PAGE; i++)
-			if (is_slot_signaled(p, i)) {
-				ev = lookup_event_by_page_slot(p, i);
-				set_event_from_interrupt(p, ev);
+				if (slots[id] != UNSIGNALED_EVENT_SLOT)
+					set_event_from_interrupt(p, ev);
 			}
+		} else {
+			/* With relatively many events, it's faster to
+			 * iterate over the signal slots and lookup
+			 * only signaled events from the IDR.
+			 */
+			for (id = 0; id < KFD_SIGNAL_EVENT_LIMIT; id++)
+				if (slots[id] != UNSIGNALED_EVENT_SLOT) {
+					ev = lookup_event_by_id(p, id);
+					set_event_from_interrupt(p, ev);
+				}
+		}
 	}
 
 	mutex_unlock(&p->event_mutex);
@@ -836,12 +745,13 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p,
 {
 	struct kfd_hsa_memory_exception_data *ev_data;
 	struct kfd_event *ev;
-	int bkt;
+	uint32_t id;
 	bool send_signal = true;
 
 	ev_data = (struct kfd_hsa_memory_exception_data *) event_data;
 
-	hash_for_each(p->events, bkt, ev, events)
+	id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+	idr_for_each_entry_continue(&p->event_idr, ev, id)
 		if (ev->type == type) {
 			send_signal = false;
 			dev_dbg(kfd_device,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
index f85fcee4414b..abca5bfebbff 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.h
@@ -31,9 +31,13 @@
 #include "kfd_priv.h"
 #include <uapi/linux/kfd_ioctl.h>
 
-#define KFD_EVENT_ID_NONSIGNAL_MASK 0x80000000U
-#define KFD_FIRST_NONSIGNAL_EVENT_ID KFD_EVENT_ID_NONSIGNAL_MASK
-#define KFD_LAST_NONSIGNAL_EVENT_ID UINT_MAX
+/*
+ * IDR supports non-negative integer IDs. Small IDs are used for
+ * signal events to match their signal slot. Use the upper half of the
+ * ID space for non-signal events.
+ */
+#define KFD_FIRST_NONSIGNAL_EVENT_ID ((INT_MAX >> 1) + 1)
+#define KFD_LAST_NONSIGNAL_EVENT_ID INT_MAX
 
 /*
  * Written into kfd_signal_slot_t to indicate that the event is not signaled.
@@ -47,9 +51,6 @@ struct kfd_event_waiter;
 struct signal_page;
 
 struct kfd_event {
-	/* All events in process, rooted at kfd_process.events. */
-	struct hlist_node events;
-
 	u32 event_id;
 
 	bool signaled;
@@ -60,7 +61,6 @@ struct kfd_event {
 	wait_queue_head_t wq; /* List of event waiters. */
 
 	/* Only for signal events. */
-	unsigned int signal_slot_index;
 	uint64_t __user *user_signal_address;
 
 	/* type specific data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index c1b3ee22faed..ebae8e1891d2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -31,6 +31,7 @@
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
 #include <linux/kfd_ioctl.h>
+#include <linux/idr.h>
 #include <kgd_kfd_interface.h>
 
 #include "amd_shared.h"
@@ -538,11 +539,10 @@ struct kfd_process {
 
 	/* Event-related data */
 	struct mutex event_mutex;
-	/* All events in process hashed by ID, linked on kfd_event.events. */
-	DECLARE_HASHTABLE(events, 4);
+	/* Event ID allocator and lookup */
+	struct idr event_idr;
 	/* Event page */
 	struct kfd_signal_page *signal_page;
-	u32 next_nonsignal_event_id;
 	size_t signal_event_count;
 	bool signal_event_limit_reached;
 };
-- 
cgit v1.2.3


From b9a5d0a5db802277c93667c6af93e699bb8c773e Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 27 Oct 2017 19:35:29 -0400
Subject: drm/amdkfd: Make event limit dependent on user mode mapping size

This allows increasing the KFD_SIGNAL_EVENT_LIMIT in kfd_ioctl.h
without breaking processes built with older kfd_ioctl.h versions.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_events.c | 25 +++++++++++++++++++------
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |  1 +
 2 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 26e8045aa760..cb92d4b72400 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -97,9 +97,17 @@ static int allocate_event_notification_slot(struct kfd_process *p,
 		p->signal_page = allocate_signal_page(p);
 		if (!p->signal_page)
 			return -ENOMEM;
+		/* Oldest user mode expects 256 event slots */
+		p->signal_mapped_size = 256*8;
 	}
 
-	id = idr_alloc(&p->event_idr, ev, 0, KFD_SIGNAL_EVENT_LIMIT,
+	/*
+	 * Compatibility with old user mode: Only use signal slots
+	 * user mode has mapped, may be less than
+	 * KFD_SIGNAL_EVENT_LIMIT. This also allows future increase
+	 * of the event limit without breaking user mode.
+	 */
+	id = idr_alloc(&p->event_idr, ev, 0, p->signal_mapped_size / 8,
 		       GFP_KERNEL);
 	if (id < 0)
 		return id;
@@ -173,7 +181,8 @@ static int create_signal_event(struct file *devkfd,
 {
 	int ret;
 
-	if (p->signal_event_count == KFD_SIGNAL_EVENT_LIMIT) {
+	if (p->signal_mapped_size &&
+	    p->signal_event_count == p->signal_mapped_size / 8) {
 		if (!p->signal_event_limit_reached) {
 			pr_warn("Signal event wasn't created because limit was reached\n");
 			p->signal_event_limit_reached = true;
@@ -744,12 +753,12 @@ out:
 
 int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
 {
-
 	unsigned long pfn;
 	struct kfd_signal_page *page;
+	int ret;
 
-	/* check required size is logical */
-	if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) !=
+	/* check required size doesn't exceed the allocated size */
+	if (get_order(KFD_SIGNAL_EVENT_LIMIT * 8) <
 			get_order(vma->vm_end - vma->vm_start)) {
 		pr_err("Event page mmap requested illegal size\n");
 		return -EINVAL;
@@ -779,8 +788,12 @@ int kfd_event_mmap(struct kfd_process *p, struct vm_area_struct *vma)
 	page->user_address = (uint64_t __user *)vma->vm_start;
 
 	/* mapping the page to user process */
-	return remap_pfn_range(vma, vma->vm_start, pfn,
+	ret = remap_pfn_range(vma, vma->vm_start, pfn,
 			vma->vm_end - vma->vm_start, vma->vm_page_prot);
+	if (!ret)
+		p->signal_mapped_size = vma->vm_end - vma->vm_start;
+
+	return ret;
 }
 
 /*
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ebae8e1891d2..ba26da81d8ec 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -543,6 +543,7 @@ struct kfd_process {
 	struct idr event_idr;
 	/* Event page */
 	struct kfd_signal_page *signal_page;
+	size_t signal_mapped_size;
 	size_t signal_event_count;
 	bool signal_event_limit_reached;
 };
-- 
cgit v1.2.3


From 04ad47bd14a0f7a268124c4fc468e964e457a702 Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andres.rodriguez@amd.com>
Date: Fri, 27 Oct 2017 19:35:31 -0400
Subject: drm/amdkfd: use standard kernel kfifo for IH

Replace our implementation of a lockless ring buffer with the standard
linux kernel kfifo.

We shouldn't maintain our own version of a standard data structure.

Signed-off-by: Andres Rodriguez <andres.rodriguez@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 78 ++++++++++--------------------
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h      |  6 +--
 2 files changed, 27 insertions(+), 57 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 70b3a99cffc2..ffbb91aa9bbf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -42,25 +42,24 @@
 
 #include <linux/slab.h>
 #include <linux/device.h>
+#include <linux/kfifo.h>
 #include "kfd_priv.h"
 
-#define KFD_INTERRUPT_RING_SIZE 1024
+#define KFD_IH_NUM_ENTRIES 1024
 
 static void interrupt_wq(struct work_struct *);
 
 int kfd_interrupt_init(struct kfd_dev *kfd)
 {
-	void *interrupt_ring = kmalloc_array(KFD_INTERRUPT_RING_SIZE,
-					kfd->device_info->ih_ring_entry_size,
-					GFP_KERNEL);
-	if (!interrupt_ring)
-		return -ENOMEM;
-
-	kfd->interrupt_ring = interrupt_ring;
-	kfd->interrupt_ring_size =
-		KFD_INTERRUPT_RING_SIZE * kfd->device_info->ih_ring_entry_size;
-	atomic_set(&kfd->interrupt_ring_wptr, 0);
-	atomic_set(&kfd->interrupt_ring_rptr, 0);
+	int r;
+
+	r = kfifo_alloc(&kfd->ih_fifo,
+		KFD_IH_NUM_ENTRIES * kfd->device_info->ih_ring_entry_size,
+		GFP_KERNEL);
+	if (r) {
+		dev_err(kfd_chardev(), "Failed to allocate IH fifo\n");
+		return r;
+	}
 
 	spin_lock_init(&kfd->interrupt_lock);
 
@@ -98,68 +97,41 @@ void kfd_interrupt_exit(struct kfd_dev *kfd)
 	 */
 	flush_scheduled_work();
 
-	kfree(kfd->interrupt_ring);
+	kfifo_free(&kfd->ih_fifo);
 }
 
 /*
- * This assumes that it can't be called concurrently with itself
- * but only with dequeue_ih_ring_entry.
+ * Assumption: single reader/writer. This function is not re-entrant
  */
 bool enqueue_ih_ring_entry(struct kfd_dev *kfd,	const void *ih_ring_entry)
 {
-	unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
-	unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
+	int count;
 
-	if ((rptr - wptr) % kfd->interrupt_ring_size ==
-					kfd->device_info->ih_ring_entry_size) {
-		/* This is very bad, the system is likely to hang. */
+	count = kfifo_in(&kfd->ih_fifo, ih_ring_entry,
+				kfd->device_info->ih_ring_entry_size);
+	if (count != kfd->device_info->ih_ring_entry_size) {
 		dev_err_ratelimited(kfd_chardev(),
-			"Interrupt ring overflow, dropping interrupt.\n");
+			"Interrupt ring overflow, dropping interrupt %d\n",
+			count);
 		return false;
 	}
 
-	memcpy(kfd->interrupt_ring + wptr, ih_ring_entry,
-			kfd->device_info->ih_ring_entry_size);
-
-	wptr = (wptr + kfd->device_info->ih_ring_entry_size) %
-			kfd->interrupt_ring_size;
-	smp_wmb(); /* Ensure memcpy'd data is visible before wptr update. */
-	atomic_set(&kfd->interrupt_ring_wptr, wptr);
-
 	return true;
 }
 
 /*
- * This assumes that it can't be called concurrently with itself
- * but only with enqueue_ih_ring_entry.
+ * Assumption: single reader/writer. This function is not re-entrant
  */
 static bool dequeue_ih_ring_entry(struct kfd_dev *kfd, void *ih_ring_entry)
 {
-	/*
-	 * Assume that wait queues have an implicit barrier, i.e. anything that
-	 * happened in the ISR before it queued work is visible.
-	 */
-
-	unsigned int wptr = atomic_read(&kfd->interrupt_ring_wptr);
-	unsigned int rptr = atomic_read(&kfd->interrupt_ring_rptr);
+	int count;
 
-	if (rptr == wptr)
-		return false;
-
-	memcpy(ih_ring_entry, kfd->interrupt_ring + rptr,
-			kfd->device_info->ih_ring_entry_size);
-
-	rptr = (rptr + kfd->device_info->ih_ring_entry_size) %
-			kfd->interrupt_ring_size;
+	count = kfifo_out(&kfd->ih_fifo, ih_ring_entry,
+				kfd->device_info->ih_ring_entry_size);
 
-	/*
-	 * Ensure the rptr write update is not visible until
-	 * memcpy has finished reading.
-	 */
-	smp_mb();
-	atomic_set(&kfd->interrupt_ring_rptr, rptr);
+	WARN_ON(count && count != kfd->device_info->ih_ring_entry_size);
 
-	return true;
+	return count == kfd->device_info->ih_ring_entry_size;
 }
 
 static void interrupt_wq(struct work_struct *work)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index ba26da81d8ec..0aec5ca8a964 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -32,6 +32,7 @@
 #include <linux/spinlock.h>
 #include <linux/kfd_ioctl.h>
 #include <linux/idr.h>
+#include <linux/kfifo.h>
 #include <kgd_kfd_interface.h>
 
 #include "amd_shared.h"
@@ -182,10 +183,7 @@ struct kfd_dev {
 	unsigned int gtt_sa_num_of_chunks;
 
 	/* Interrupts */
-	void *interrupt_ring;
-	size_t interrupt_ring_size;
-	atomic_t interrupt_ring_rptr;
-	atomic_t interrupt_ring_wptr;
+	struct kfifo ih_fifo;
 	struct work_struct interrupt_work;
 	spinlock_t interrupt_lock;
 
-- 
cgit v1.2.3


From 48e876a20e79566f1736413d4f42dc66f3ab2f16 Mon Sep 17 00:00:00 2001
From: Andres Rodriguez <andres.rodriguez@amd.com>
Date: Fri, 27 Oct 2017 19:35:34 -0400
Subject: drm/amdkfd: use a high priority workqueue for IH work

In systems under heavy load the IH work may experience significant
scheduling delays.

Under load + system workqueue:
    Max Latency: 7.023695 ms
    Avg Latency: 0.263994 ms

Under load + high priority workqueue:
    Max Latency: 1.162568 ms
    Avg Latency: 0.163213 ms

Further work is required to measure the impact of per-cpu settings on IH
performance.

Signed-off-by: Andres Rodriguez <andres.rodriguez@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c    | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 3 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h      | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 46049f005b02..621a3b53a038 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -403,7 +403,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 	if (kfd->interrupts_active
 	    && interrupt_is_wanted(kfd, ih_ring_entry)
 	    && enqueue_ih_ring_entry(kfd, ih_ring_entry))
-		schedule_work(&kfd->interrupt_work);
+		queue_work(kfd->ih_wq, &kfd->interrupt_work);
 
 	spin_unlock(&kfd->interrupt_lock);
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 9c08d4670b7f..035c351f47c5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -61,6 +61,7 @@ int kfd_interrupt_init(struct kfd_dev *kfd)
 		return r;
 	}
 
+	kfd->ih_wq = alloc_workqueue("KFD IH", WQ_HIGHPRI, 1);
 	spin_lock_init(&kfd->interrupt_lock);
 
 	INIT_WORK(&kfd->interrupt_work, interrupt_wq);
@@ -95,7 +96,7 @@ void kfd_interrupt_exit(struct kfd_dev *kfd)
 	 * work-queue items that will access interrupt_ring. New work items
 	 * can't be created because we stopped interrupt handling above.
 	 */
-	flush_work(&kfd->interrupt_work);
+	flush_workqueue(kfd->ih_wq);
 
 	kfifo_free(&kfd->ih_fifo);
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 0aec5ca8a964..6a91a60c64cf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -184,6 +184,7 @@ struct kfd_dev {
 
 	/* Interrupts */
 	struct kfifo ih_fifo;
+	struct workqueue_struct *ih_wq;
 	struct work_struct interrupt_work;
 	spinlock_t interrupt_lock;
 
-- 
cgit v1.2.3


From ab40cba30333cc264cf2731626565c3e1f29e4d1 Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Wed, 1 Nov 2017 19:21:26 -0400
Subject: drm/amdkfd: Clean up the data structure in kfd_process

A list of per-process queues is maintained in the
kfd_process_queue_manager, so the queues array in kfd_process is
redundant and in fact unused.

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  6 ------
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 18 ------------------
 2 files changed, 24 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 6a91a60c64cf..78b5d61780cb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -527,12 +527,6 @@ struct kfd_process {
 
 	struct process_queue_manager pqm;
 
-	/* The process's queues. */
-	size_t queue_array_size;
-
-	/* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */
-	struct kfd_queue **queues;
-
 	/*Is the user space process 32 bit?*/
 	bool is_32bit_user_mode;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 695fa2ae8e5b..946f4d68947f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -34,13 +34,6 @@ struct mm_struct;
 #include "kfd_priv.h"
 #include "kfd_dbgmgr.h"
 
-/*
- * Initial size for the array of queues.
- * The allocated size is doubled each time
- * it is exceeded up to MAX_PROCESS_QUEUES.
- */
-#define INITIAL_QUEUE_ARRAY_SIZE 16
-
 /*
  * List of struct kfd_process (field kfd_process).
  * Unique/indexed by mm_struct*
@@ -187,8 +180,6 @@ static void kfd_process_wq_release(struct work_struct *work)
 
 	mutex_destroy(&p->mutex);
 
-	kfree(p->queues);
-
 	kfree(p);
 
 	kfree(work);
@@ -270,11 +261,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 	if (!process)
 		goto err_alloc_process;
 
-	process->queues = kmalloc_array(INITIAL_QUEUE_ARRAY_SIZE,
-					sizeof(process->queues[0]), GFP_KERNEL);
-	if (!process->queues)
-		goto err_alloc_queues;
-
 	process->pasid = kfd_pasid_alloc();
 	if (process->pasid == 0)
 		goto err_alloc_pasid;
@@ -297,8 +283,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
 
 	process->lead_thread = thread->group_leader;
 
-	process->queue_array_size = INITIAL_QUEUE_ARRAY_SIZE;
-
 	INIT_LIST_HEAD(&process->per_device_data);
 
 	kfd_event_init_process(process);
@@ -327,8 +311,6 @@ err_mmu_notifier:
 err_alloc_doorbells:
 	kfd_pasid_free(process->pasid);
 err_alloc_pasid:
-	kfree(process->queues);
-err_alloc_queues:
 	kfree(process);
 err_alloc_process:
 	return ERR_PTR(err);
-- 
cgit v1.2.3


From 894a8293aaa702a5aef758bc069162a671ca7a07 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Wed, 1 Nov 2017 19:21:33 -0400
Subject: drm/amdkfd: Minor cleanups

These were missed previously when rebasing changes for upstreaming.

v2: Remove redundant sched_policy conditions

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 10 ++++------
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h                 |  2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c              |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdkfd/kfd_priv.h')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index e2fc4c5d42cd..e202921c150e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -389,12 +389,11 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 	if (sched_policy != KFD_SCHED_POLICY_NO_HWS) {
 		retval = unmap_queues_cpsch(dqm,
 				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
-		if (retval != 0) {
+		if (retval) {
 			pr_err("unmap queue failed\n");
 			goto out_unlock;
 		}
-	} else if (sched_policy == KFD_SCHED_POLICY_NO_HWS &&
-		   prev_active &&
+	} else if (prev_active &&
 		   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
 		    q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
 		retval = mqd->destroy_mqd(mqd, q->mqd,
@@ -421,8 +420,7 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 
 	if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
 		retval = map_queues_cpsch(dqm);
-	else if (sched_policy == KFD_SCHED_POLICY_NO_HWS &&
-		 q->properties.is_active &&
+	else if (q->properties.is_active &&
 		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
 		  q->properties.type == KFD_QUEUE_TYPE_SDMA))
 		retval = mqd->load_mqd(mqd, q->mqd, q->pipe, q->queue,
@@ -832,7 +830,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 
 	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
 		retval = allocate_sdma_queue(dqm, &q->sdma_id);
-		if (retval != 0)
+		if (retval)
 			goto out;
 		q->properties.sdma_queue_id =
 			q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 78b5d61780cb..9e4134c5b481 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -507,6 +507,8 @@ struct kfd_process {
 	 * In any process, the thread that started main() is the lead
 	 * thread and outlives the rest.
 	 * It is here because amd_iommu_bind_pasid wants a task_struct.
+	 * It can also be used for safely getting a reference to the
+	 * mm_struct of the process.
 	 */
 	struct task_struct *lead_thread;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index db08f8f53d4b..1f5ccd28bd41 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -416,7 +416,7 @@ int kfd_bind_processes_to_device(struct kfd_dev *dev)
 		err = amd_iommu_bind_pasid(dev->pdev, p->pasid,
 				p->lead_thread);
 		if (err < 0) {
-			pr_err("unexpected pasid %d binding failure\n",
+			pr_err("Unexpected pasid %d binding failure\n",
 					p->pasid);
 			mutex_unlock(&p->mutex);
 			break;
-- 
cgit v1.2.3