summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
diff options
context:
space:
mode:
authorJonathan Kim <jonathan.kim@amd.com>2022-04-06 19:03:31 +0300
committerAlex Deucher <alexander.deucher@amd.com>2023-06-09 19:35:58 +0300
commit44b87bb0836c65d1b9d21b01503eb6e9b9297771 (patch)
treeb6cc3c68fa63ea1bff4ac649f2705b31514eec8f /drivers/gpu/drm/amd/amdkfd/kfd_debug.c
parent69a8c3ae2dea84a6d571e4c1aad306f630f3ccfd (diff)
downloadlinux-44b87bb0836c65d1b9d21b01503eb6e9b9297771.tar.xz
drm/amdkfd: add raise exception event function
Exception events can be generated from interrupts or queue activitity. The raise event function will save exception status of a queue, device or process then notify the debugger of the status change by writing to a debugger polled file descriptor that the debugger provides during debug attach. For memory violation exceptions, extra exception data will be saved. The debugger will be able to query the saved exception states by query operation that will be provided by follow up patches. Signed-off-by: Jonathan Kim <jonathan.kim@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_debug.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c104
1 files changed, 104 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 5e2ee2d1acc4..dccb27fc764b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -24,6 +24,107 @@
#include "kfd_device_queue_manager.h"
#include <linux/file.h>
+void debug_event_write_work_handler(struct work_struct *work)
+{
+ struct kfd_process *process;
+
+ static const char write_data = '.';
+ loff_t pos = 0;
+
+ process = container_of(work,
+ struct kfd_process,
+ debug_event_workarea);
+
+ kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
+}
+
+/* update process/device/queue exception status, write to descriptor
+ * only if exception_status is enabled.
+ */
+bool kfd_dbg_ev_raise(uint64_t event_mask,
+ struct kfd_process *process, struct kfd_node *dev,
+ unsigned int source_id, bool use_worker,
+ void *exception_data, size_t exception_data_size)
+{
+ struct process_queue_manager *pqm;
+ struct process_queue_node *pqn;
+ int i;
+ static const char write_data = '.';
+ loff_t pos = 0;
+ bool is_subscribed = true;
+
+ if (!(process && process->debug_trap_enabled))
+ return false;
+
+ mutex_lock(&process->event_mutex);
+
+ if (event_mask & KFD_EC_MASK_DEVICE) {
+ for (i = 0; i < process->n_pdds; i++) {
+ struct kfd_process_device *pdd = process->pdds[i];
+
+ if (pdd->dev != dev)
+ continue;
+
+ pdd->exception_status |= event_mask & KFD_EC_MASK_DEVICE;
+
+ if (event_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
+ if (!pdd->vm_fault_exc_data) {
+ pdd->vm_fault_exc_data = kmemdup(
+ exception_data,
+ exception_data_size,
+ GFP_KERNEL);
+ if (!pdd->vm_fault_exc_data)
+ pr_debug("Failed to allocate exception data memory");
+ } else {
+ pr_debug("Debugger exception data not saved\n");
+ print_hex_dump_bytes("exception data: ",
+ DUMP_PREFIX_OFFSET,
+ exception_data,
+ exception_data_size);
+ }
+ }
+ break;
+ }
+ } else if (event_mask & KFD_EC_MASK_PROCESS) {
+ process->exception_status |= event_mask & KFD_EC_MASK_PROCESS;
+ } else {
+ pqm = &process->pqm;
+ list_for_each_entry(pqn, &pqm->queues,
+ process_queue_list) {
+ int target_id;
+
+ if (!pqn->q)
+ continue;
+
+ target_id = event_mask & KFD_EC_MASK(EC_QUEUE_NEW) ?
+ pqn->q->properties.queue_id :
+ pqn->q->doorbell_id;
+
+ if (pqn->q->device != dev || target_id != source_id)
+ continue;
+
+ pqn->q->properties.exception_status |= event_mask;
+ break;
+ }
+ }
+
+ if (process->exception_enable_mask & event_mask) {
+ if (use_worker)
+ schedule_work(&process->debug_event_workarea);
+ else
+ kernel_write(process->dbg_ev_file,
+ &write_data,
+ 1,
+ &pos);
+ } else {
+ is_subscribed = false;
+ }
+
+ mutex_unlock(&process->event_mutex);
+
+ return is_subscribed;
+}
+
static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)
{
struct mqd_update_info minfo = {0};
@@ -99,6 +200,9 @@ static void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int
{
int i;
+ if (!unwind)
+ cancel_work_sync(&target->debug_event_workarea);
+
for (i = 0; i < target->n_pdds; i++) {
struct kfd_process_device *pdd = target->pdds[i];