diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_devcoredump.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_devcoredump.c | 156 |
1 files changed, 95 insertions, 61 deletions
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c index 81dc7795c065..203e3038cc81 100644 --- a/drivers/gpu/drm/xe/xe_devcoredump.c +++ b/drivers/gpu/drm/xe/xe_devcoredump.c @@ -80,7 +80,8 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q) return &q->gt->uc.guc; } -static ssize_t __xe_devcoredump_read(char *buffer, size_t count, +static ssize_t __xe_devcoredump_read(char *buffer, ssize_t count, + ssize_t start, struct xe_devcoredump *coredump) { struct xe_device *xe; @@ -94,7 +95,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count, ss = &coredump->snapshot; iter.data = buffer; - iter.start = 0; + iter.start = start; iter.remain = count; p = drm_coredump_printer(&iter); @@ -119,11 +120,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count, drm_puts(&p, "\n**** GuC CT ****\n"); xe_guc_ct_snapshot_print(ss->guc.ct, &p); - /* - * Don't add a new section header here because the mesa debug decoder - * tool expects the context information to be in the 'GuC CT' section. - */ - /* drm_puts(&p, "\n**** Contexts ****\n"); */ + drm_puts(&p, "\n**** Contexts ****\n"); xe_guc_exec_queue_snapshot_print(ss->ge, &p); drm_puts(&p, "\n**** Job ****\n"); @@ -172,12 +169,34 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss) ss->vm = NULL; } +#define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G) + +/** + * xe_devcoredump_read() - Read data from the Xe device coredump snapshot + * @buffer: Destination buffer to copy the coredump data into + * @offset: Offset in the coredump data to start reading from + * @count: Number of bytes to read + * @data: Pointer to the xe_devcoredump structure + * @datalen: Length of the data (unused) + * + * Reads a chunk of the coredump snapshot data into the provided buffer. + * If the devcoredump is smaller than 1.5 GB (XE_DEVCOREDUMP_CHUNK_MAX), + * it is read directly from a pre-written buffer. For larger devcoredumps, + * the pre-written buffer must be periodically repopulated from the snapshot + * state due to kmalloc size limitations. + * + * Return: Number of bytes copied on success, or a negative error code on failure. + */ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) { struct xe_devcoredump *coredump = data; struct xe_devcoredump_snapshot *ss; - ssize_t byte_copied; + ssize_t byte_copied = 0; + u32 chunk_offset; + ssize_t new_chunk_position; + bool pm_needed = false; + int ret = 0; if (!coredump) return -ENODEV; @@ -187,25 +206,45 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset, /* Ensure delayed work is captured before continuing */ flush_work(&ss->work); + pm_needed = ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX; + if (pm_needed) + xe_pm_runtime_get(gt_to_xe(ss->gt)); + mutex_lock(&coredump->lock); if (!ss->read.buffer) { - mutex_unlock(&coredump->lock); - return -ENODEV; + ret = -ENODEV; + goto unlock; } - if (offset >= ss->read.size) { - mutex_unlock(&coredump->lock); - return 0; + if (offset >= ss->read.size) + goto unlock; + + new_chunk_position = div_u64_rem(offset, + XE_DEVCOREDUMP_CHUNK_MAX, + &chunk_offset); + + if (offset >= ss->read.chunk_position + XE_DEVCOREDUMP_CHUNK_MAX || + offset < ss->read.chunk_position) { + ss->read.chunk_position = new_chunk_position * + XE_DEVCOREDUMP_CHUNK_MAX; + + __xe_devcoredump_read(ss->read.buffer, + XE_DEVCOREDUMP_CHUNK_MAX, + ss->read.chunk_position, coredump); } byte_copied = count < ss->read.size - offset ? count : ss->read.size - offset; - memcpy(buffer, ss->read.buffer + offset, byte_copied); + memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied); +unlock: mutex_unlock(&coredump->lock); - return byte_copied; + if (pm_needed) + xe_pm_runtime_put(gt_to_xe(ss->gt)); + + return byte_copied ? byte_copied : ret; } static void xe_devcoredump_free(void *data) @@ -241,7 +280,7 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) /* * NB: Despite passing a GFP_ flags parameter here, more allocations are done - * internally using GFP_KERNEL expliictly. Hence this call must be in the worker + * internally using GFP_KERNEL explicitly. Hence this call must be in the worker * thread and not in the initial capture call. */ dev_coredumpm_timeout(gt_to_xe(ss->gt)->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL, @@ -258,17 +297,32 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work) xe_guc_exec_queue_snapshot_capture_delayed(ss->ge); xe_force_wake_put(gt_to_fw(ss->gt), fw_ref); - xe_pm_runtime_put(xe); + ss->read.chunk_position = 0; /* Calculate devcoredump size */ - ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump); - - ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); - if (!ss->read.buffer) - return; + ss->read.size = __xe_devcoredump_read(NULL, LONG_MAX, 0, coredump); + + if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) { + ss->read.buffer = kvmalloc(XE_DEVCOREDUMP_CHUNK_MAX, + GFP_USER); + if (!ss->read.buffer) + goto put_pm; + + __xe_devcoredump_read(ss->read.buffer, + XE_DEVCOREDUMP_CHUNK_MAX, + 0, coredump); + } else { + ss->read.buffer = kvmalloc(ss->read.size, GFP_USER); + if (!ss->read.buffer) + goto put_pm; + + __xe_devcoredump_read(ss->read.buffer, ss->read.size, 0, + coredump); + xe_devcoredump_snapshot_free(ss); + } - __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump); - xe_devcoredump_snapshot_free(ss); +put_pm: + xe_pm_runtime_put(xe); } static void devcoredump_snapshot(struct xe_devcoredump *coredump, @@ -277,13 +331,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, { struct xe_devcoredump_snapshot *ss = &coredump->snapshot; struct xe_guc *guc = exec_queue_to_guc(q); - u32 adj_logical_mask = q->logical_mask; - u32 width_mask = (0x1 << q->width) - 1; const char *process_name = "no process"; - unsigned int fw_ref; bool cookie; - int i; ss->snapshot_time = ktime_get_real(); ss->boot_time = ktime_get_boottime(); @@ -299,14 +349,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump, INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work); cookie = dma_fence_begin_signalling(); - for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) { - if (adj_logical_mask & BIT(i)) { - adj_logical_mask |= width_mask << i; - i += q->width; - } else { - ++i; - } - } /* keep going if fw fails as we still want to save the memory and SW data */ fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL); @@ -395,51 +437,43 @@ int xe_devcoredump_init(struct xe_device *xe) /** * xe_print_blob_ascii85 - print a BLOB to some useful location in ASCII85 * - * The output is split to multiple lines because some print targets, e.g. dmesg - * cannot handle arbitrarily long lines. Note also that printing to dmesg in - * piece-meal fashion is not possible, each separate call to drm_puts() has a - * line-feed automatically added! Therefore, the entire output line must be - * constructed in a local buffer first, then printed in one atomic output call. + * The output is split into multiple calls to drm_puts() because some print + * targets, e.g. dmesg, cannot handle arbitrarily long lines. These targets may + * add newlines, as is the case with dmesg: each drm_puts() call creates a + * separate line. * * There is also a scheduler yield call to prevent the 'task has been stuck for * 120s' kernel hang check feature from firing when printing to a slow target * such as dmesg over a serial port. * - * TODO: Add compression prior to the ASCII85 encoding to shrink huge buffers down. - * * @p: the printer object to output to * @prefix: optional prefix to add to output string + * @suffix: optional suffix to add at the end. 0 disables it and is + * not added to the output, which is useful when using multiple calls + * to dump data to @p * @blob: the Binary Large OBject to dump out * @offset: offset in bytes to skip from the front of the BLOB, must be a multiple of sizeof(u32) * @size: the size in bytes of the BLOB, must be a multiple of sizeof(u32) */ -void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, +void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix, const void *blob, size_t offset, size_t size) { const u32 *blob32 = (const u32 *)blob; char buff[ASCII85_BUFSZ], *line_buff; size_t line_pos = 0; - /* - * Splitting blobs across multiple lines is not compatible with the mesa - * debug decoder tool. Note that even dropping the explicit '\n' below - * doesn't help because the GuC log is so big some underlying implementation - * still splits the lines at 512K characters. So just bail completely for - * the moment. - */ - return; - #define DMESG_MAX_LINE_LEN 800 -#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */ + /* Always leave space for the suffix char and the \0 */ +#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "<suffix>\0" */ if (size & 3) drm_printf(p, "Size not word aligned: %zu", size); if (offset & 3) - drm_printf(p, "Offset not word aligned: %zu", size); + drm_printf(p, "Offset not word aligned: %zu", offset); - line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_KERNEL); - if (IS_ERR_OR_NULL(line_buff)) { - drm_printf(p, "Failed to allocate line buffer: %pe", line_buff); + line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_ATOMIC); + if (!line_buff) { + drm_printf(p, "Failed to allocate line buffer\n"); return; } @@ -462,7 +496,6 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, line_pos += strlen(line_buff + line_pos); if ((line_pos + MIN_SPACE) >= DMESG_MAX_LINE_LEN) { - line_buff[line_pos++] = '\n'; line_buff[line_pos++] = 0; drm_puts(p, line_buff); @@ -474,10 +507,11 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, } } + if (suffix) + line_buff[line_pos++] = suffix; + if (line_pos) { - line_buff[line_pos++] = '\n'; line_buff[line_pos++] = 0; - drm_puts(p, line_buff); } |