summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_devcoredump.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_devcoredump.c')
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c156
1 files changed, 95 insertions, 61 deletions
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 81dc7795c065..203e3038cc81 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -80,7 +80,8 @@ static struct xe_guc *exec_queue_to_guc(struct xe_exec_queue *q)
return &q->gt->uc.guc;
}
-static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
+static ssize_t __xe_devcoredump_read(char *buffer, ssize_t count,
+ ssize_t start,
struct xe_devcoredump *coredump)
{
struct xe_device *xe;
@@ -94,7 +95,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
ss = &coredump->snapshot;
iter.data = buffer;
- iter.start = 0;
+ iter.start = start;
iter.remain = count;
p = drm_coredump_printer(&iter);
@@ -119,11 +120,7 @@ static ssize_t __xe_devcoredump_read(char *buffer, size_t count,
drm_puts(&p, "\n**** GuC CT ****\n");
xe_guc_ct_snapshot_print(ss->guc.ct, &p);
- /*
- * Don't add a new section header here because the mesa debug decoder
- * tool expects the context information to be in the 'GuC CT' section.
- */
- /* drm_puts(&p, "\n**** Contexts ****\n"); */
+ drm_puts(&p, "\n**** Contexts ****\n");
xe_guc_exec_queue_snapshot_print(ss->ge, &p);
drm_puts(&p, "\n**** Job ****\n");
@@ -172,12 +169,34 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
ss->vm = NULL;
}
+#define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G)
+
+/**
+ * xe_devcoredump_read() - Read data from the Xe device coredump snapshot
+ * @buffer: Destination buffer to copy the coredump data into
+ * @offset: Offset in the coredump data to start reading from
+ * @count: Number of bytes to read
+ * @data: Pointer to the xe_devcoredump structure
+ * @datalen: Length of the data (unused)
+ *
+ * Reads a chunk of the coredump snapshot data into the provided buffer.
+ * If the devcoredump is smaller than 1.5 GB (XE_DEVCOREDUMP_CHUNK_MAX),
+ * it is read directly from a pre-written buffer. For larger devcoredumps,
+ * the pre-written buffer must be periodically repopulated from the snapshot
+ * state due to kmalloc size limitations.
+ *
+ * Return: Number of bytes copied on success, or a negative error code on failure.
+ */
static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
size_t count, void *data, size_t datalen)
{
struct xe_devcoredump *coredump = data;
struct xe_devcoredump_snapshot *ss;
- ssize_t byte_copied;
+ ssize_t byte_copied = 0;
+ u32 chunk_offset;
+ ssize_t new_chunk_position;
+ bool pm_needed = false;
+ int ret = 0;
if (!coredump)
return -ENODEV;
@@ -187,25 +206,45 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
/* Ensure delayed work is captured before continuing */
flush_work(&ss->work);
+ pm_needed = ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX;
+ if (pm_needed)
+ xe_pm_runtime_get(gt_to_xe(ss->gt));
+
mutex_lock(&coredump->lock);
if (!ss->read.buffer) {
- mutex_unlock(&coredump->lock);
- return -ENODEV;
+ ret = -ENODEV;
+ goto unlock;
}
- if (offset >= ss->read.size) {
- mutex_unlock(&coredump->lock);
- return 0;
+ if (offset >= ss->read.size)
+ goto unlock;
+
+ new_chunk_position = div_u64_rem(offset,
+ XE_DEVCOREDUMP_CHUNK_MAX,
+ &chunk_offset);
+
+ if (offset >= ss->read.chunk_position + XE_DEVCOREDUMP_CHUNK_MAX ||
+ offset < ss->read.chunk_position) {
+ ss->read.chunk_position = new_chunk_position *
+ XE_DEVCOREDUMP_CHUNK_MAX;
+
+ __xe_devcoredump_read(ss->read.buffer,
+ XE_DEVCOREDUMP_CHUNK_MAX,
+ ss->read.chunk_position, coredump);
}
byte_copied = count < ss->read.size - offset ? count :
ss->read.size - offset;
- memcpy(buffer, ss->read.buffer + offset, byte_copied);
+ memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied);
+unlock:
mutex_unlock(&coredump->lock);
- return byte_copied;
+ if (pm_needed)
+ xe_pm_runtime_put(gt_to_xe(ss->gt));
+
+ return byte_copied ? byte_copied : ret;
}
static void xe_devcoredump_free(void *data)
@@ -241,7 +280,7 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
/*
* NB: Despite passing a GFP_ flags parameter here, more allocations are done
- * internally using GFP_KERNEL expliictly. Hence this call must be in the worker
+ * internally using GFP_KERNEL explicitly. Hence this call must be in the worker
* thread and not in the initial capture call.
*/
dev_coredumpm_timeout(gt_to_xe(ss->gt)->drm.dev, THIS_MODULE, coredump, 0, GFP_KERNEL,
@@ -258,17 +297,32 @@ static void xe_devcoredump_deferred_snap_work(struct work_struct *work)
xe_guc_exec_queue_snapshot_capture_delayed(ss->ge);
xe_force_wake_put(gt_to_fw(ss->gt), fw_ref);
- xe_pm_runtime_put(xe);
+ ss->read.chunk_position = 0;
/* Calculate devcoredump size */
- ss->read.size = __xe_devcoredump_read(NULL, INT_MAX, coredump);
-
- ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
- if (!ss->read.buffer)
- return;
+ ss->read.size = __xe_devcoredump_read(NULL, LONG_MAX, 0, coredump);
+
+ if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX) {
+ ss->read.buffer = kvmalloc(XE_DEVCOREDUMP_CHUNK_MAX,
+ GFP_USER);
+ if (!ss->read.buffer)
+ goto put_pm;
+
+ __xe_devcoredump_read(ss->read.buffer,
+ XE_DEVCOREDUMP_CHUNK_MAX,
+ 0, coredump);
+ } else {
+ ss->read.buffer = kvmalloc(ss->read.size, GFP_USER);
+ if (!ss->read.buffer)
+ goto put_pm;
+
+ __xe_devcoredump_read(ss->read.buffer, ss->read.size, 0,
+ coredump);
+ xe_devcoredump_snapshot_free(ss);
+ }
- __xe_devcoredump_read(ss->read.buffer, ss->read.size, coredump);
- xe_devcoredump_snapshot_free(ss);
+put_pm:
+ xe_pm_runtime_put(xe);
}
static void devcoredump_snapshot(struct xe_devcoredump *coredump,
@@ -277,13 +331,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
{
struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
struct xe_guc *guc = exec_queue_to_guc(q);
- u32 adj_logical_mask = q->logical_mask;
- u32 width_mask = (0x1 << q->width) - 1;
const char *process_name = "no process";
-
unsigned int fw_ref;
bool cookie;
- int i;
ss->snapshot_time = ktime_get_real();
ss->boot_time = ktime_get_boottime();
@@ -299,14 +349,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work);
cookie = dma_fence_begin_signalling();
- for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
- if (adj_logical_mask & BIT(i)) {
- adj_logical_mask |= width_mask << i;
- i += q->width;
- } else {
- ++i;
- }
- }
/* keep going if fw fails as we still want to save the memory and SW data */
fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
@@ -395,51 +437,43 @@ int xe_devcoredump_init(struct xe_device *xe)
/**
* xe_print_blob_ascii85 - print a BLOB to some useful location in ASCII85
*
- * The output is split to multiple lines because some print targets, e.g. dmesg
- * cannot handle arbitrarily long lines. Note also that printing to dmesg in
- * piece-meal fashion is not possible, each separate call to drm_puts() has a
- * line-feed automatically added! Therefore, the entire output line must be
- * constructed in a local buffer first, then printed in one atomic output call.
+ * The output is split into multiple calls to drm_puts() because some print
+ * targets, e.g. dmesg, cannot handle arbitrarily long lines. These targets may
+ * add newlines, as is the case with dmesg: each drm_puts() call creates a
+ * separate line.
*
* There is also a scheduler yield call to prevent the 'task has been stuck for
* 120s' kernel hang check feature from firing when printing to a slow target
* such as dmesg over a serial port.
*
- * TODO: Add compression prior to the ASCII85 encoding to shrink huge buffers down.
- *
* @p: the printer object to output to
* @prefix: optional prefix to add to output string
+ * @suffix: optional suffix to add at the end. 0 disables it and is
+ * not added to the output, which is useful when using multiple calls
+ * to dump data to @p
* @blob: the Binary Large OBject to dump out
* @offset: offset in bytes to skip from the front of the BLOB, must be a multiple of sizeof(u32)
* @size: the size in bytes of the BLOB, must be a multiple of sizeof(u32)
*/
-void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix,
+void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix, char suffix,
const void *blob, size_t offset, size_t size)
{
const u32 *blob32 = (const u32 *)blob;
char buff[ASCII85_BUFSZ], *line_buff;
size_t line_pos = 0;
- /*
- * Splitting blobs across multiple lines is not compatible with the mesa
- * debug decoder tool. Note that even dropping the explicit '\n' below
- * doesn't help because the GuC log is so big some underlying implementation
- * still splits the lines at 512K characters. So just bail completely for
- * the moment.
- */
- return;
-
#define DMESG_MAX_LINE_LEN 800
-#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "\n\0" */
+ /* Always leave space for the suffix char and the \0 */
+#define MIN_SPACE (ASCII85_BUFSZ + 2) /* 85 + "<suffix>\0" */
if (size & 3)
drm_printf(p, "Size not word aligned: %zu", size);
if (offset & 3)
- drm_printf(p, "Offset not word aligned: %zu", size);
+ drm_printf(p, "Offset not word aligned: %zu", offset);
- line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_KERNEL);
- if (IS_ERR_OR_NULL(line_buff)) {
- drm_printf(p, "Failed to allocate line buffer: %pe", line_buff);
+ line_buff = kzalloc(DMESG_MAX_LINE_LEN, GFP_ATOMIC);
+ if (!line_buff) {
+ drm_printf(p, "Failed to allocate line buffer\n");
return;
}
@@ -462,7 +496,6 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix,
line_pos += strlen(line_buff + line_pos);
if ((line_pos + MIN_SPACE) >= DMESG_MAX_LINE_LEN) {
- line_buff[line_pos++] = '\n';
line_buff[line_pos++] = 0;
drm_puts(p, line_buff);
@@ -474,10 +507,11 @@ void xe_print_blob_ascii85(struct drm_printer *p, const char *prefix,
}
}
+ if (suffix)
+ line_buff[line_pos++] = suffix;
+
if (line_pos) {
- line_buff[line_pos++] = '\n';
line_buff[line_pos++] = 0;
-
drm_puts(p, line_buff);
}