drm/i915: Rely on spinlock protection for GPU error capture

Trust that we now have adequate protection over the low level structures via the engine->active.lock to allow ourselves to capture the GPU error state without the heavy hammer of stop_machine(). Sadly this does mean that we have to forgo some of the lesser used information (not derived from the active state) that is not controlled by the active locks. This includes the list of buffers in the ppGTT and pinned globally in the GGTT. Originally this was used to manually verify relocations, but hasn't been required for sometime and modern mesa now has the habit of ensuring that all interesting buffers within a batch are captured in their entirety (that are the auxiliary state buffers, but not the textures). A useful side-effect is that this allows us to restore error capturing for Braswell and Broxton. v2: Use pagevec for a typical arbitrary number of preallocated pages Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190722222847.24178-1-chris@chris-wilson.co.uk
author: Chris Wilson <chris@chris-wilson.co.uk> 2019-07-23 01:28:47 +0300
committer: Chris Wilson <chris@chris-wilson.co.uk> 2019-07-23 02:22:20 +0300
commit: 3bdd4f8485310687affe117f1b1a6e9012e897f6 (patch)
tree: f79e6e2ebf06caff04816e9fe3e1004756ee1773 /drivers/gpu/drm/i915/i915_gpu_error.c
parent: df8cf31e749788d8de8c3028e530e6fd9ef8bc79 (diff)
download: linux-3bdd4f8485310687affe117f1b1a6e9012e897f6.tar.xz
1 files changed, 193 insertions, 296 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index c5b89bf4d616..2193687eac72 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -29,8 +29,8 @@
 
 #include <linux/ascii85.h>
 #include <linux/nmi.h>
+#include <linux/pagevec.h>
 #include <linux/scatterlist.h>
-#include <linux/stop_machine.h>
 #include <linux/utsname.h>
 #include <linux/zlib.h>
 
@@ -46,6 +46,9 @@
 #include "i915_scatterlist.h"
 #include "intel_csr.h"
 
+#define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
+#define ATOMIC_MAYFAIL (GFP_ATOMIC | __GFP_NOWARN)
+
 static inline const struct intel_engine_cs *
 engine_lookup(const struct drm_i915_private *i915, unsigned int id)
 {
@@ -67,26 +70,6 @@ engine_name(const struct drm_i915_private *i915, unsigned int id)
 	return __engine_name(engine_lookup(i915, id));
 }
 
-static const char *tiling_flag(int tiling)
-{
-	switch (tiling) {
-	default:
-	case I915_TILING_NONE: return "";
-	case I915_TILING_X: return " X";
-	case I915_TILING_Y: return " Y";
-	}
-}
-
-static const char *dirty_flag(int dirty)
-{
-	return dirty ? " dirty" : "";
-}
-
-static const char *purgeable_flag(int purgeable)
-{
-	return purgeable ? " purgeable" : "";
-}
-
 static void __sg_set_buf(struct scatterlist *sg,
 			 void *addr, unsigned int len, loff_t it)
 {
@@ -114,7 +97,7 @@ static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len)
 	if (e->cur == e->end) {
 		struct scatterlist *sgl;
 
-		sgl = (typeof(sgl))__get_free_page(GFP_KERNEL);
+		sgl = (typeof(sgl))__get_free_page(ALLOW_FAIL);
 		if (!sgl) {
 			e->err = -ENOMEM;
 			return false;
@@ -134,7 +117,7 @@ static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len)
 	}
 
 	e->size = ALIGN(len + 1, SZ_64K);
-	e->buf = kmalloc(e->size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+	e->buf = kmalloc(e->size, ALLOW_FAIL);
 	if (!e->buf) {
 		e->size = PAGE_ALIGN(len + 1);
 		e->buf = kmalloc(e->size, GFP_KERNEL);
@@ -211,47 +194,115 @@ i915_error_printer(struct drm_i915_error_state_buf *e)
 	return p;
 }
 
+/* single threaded page allocator with a reserved stash for emergencies */
+static void pool_fini(struct pagevec *pv)
+{
+	pagevec_release(pv);
+}
+
+static int pool_refill(struct pagevec *pv, gfp_t gfp)
+{
+	while (pagevec_space(pv)) {
+		struct page *p;
+
+		p = alloc_page(gfp);
+		if (!p)
+			return -ENOMEM;
+
+		pagevec_add(pv, p);
+	}
+
+	return 0;
+}
+
+static int pool_init(struct pagevec *pv, gfp_t gfp)
+{
+	int err;
+
+	pagevec_init(pv);
+
+	err = pool_refill(pv, gfp);
+	if (err)
+		pool_fini(pv);
+
+	return err;
+}
+
+static void *pool_alloc(struct pagevec *pv, gfp_t gfp)
+{
+	struct page *p;
+
+	p = alloc_page(gfp);
+	if (!p && pagevec_count(pv))
+		p = pv->pages[--pv->nr];
+
+	return p ? page_address(p) : NULL;
+}
+
+static void pool_free(struct pagevec *pv, void *addr)
+{
+	struct page *p = virt_to_page(addr);
+
+	if (pagevec_space(pv))
+		pagevec_add(pv, p);
+	else
+		__free_page(p);
+}
+
 #ifdef CONFIG_DRM_I915_COMPRESS_ERROR
 
 struct compress {
+	struct pagevec pool;
 	struct z_stream_s zstream;
 	void *tmp;
 };
 
 static bool compress_init(struct compress *c)
 {
-	struct z_stream_s *zstream = memset(&c->zstream, 0, sizeof(c->zstream));
+	struct z_stream_s *zstream = &c->zstream;
 
-	zstream->workspace =
-		kmalloc(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
-			GFP_ATOMIC | __GFP_NOWARN);
-	if (!zstream->workspace)
+	if (pool_init(&c->pool, ALLOW_FAIL))
 		return false;
 
-	if (zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) != Z_OK) {
-		kfree(zstream->workspace);
+	zstream->workspace =
+		kmalloc(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
+			ALLOW_FAIL);
+	if (!zstream->workspace) {
+		pool_fini(&c->pool);
 		return false;
 	}
 
 	c->tmp = NULL;
 	if (i915_has_memcpy_from_wc())
-		c->tmp = (void *)__get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+		c->tmp = pool_alloc(&c->pool, ALLOW_FAIL);
 
 	return true;
 }
 
-static void *compress_next_page(struct drm_i915_error_object *dst)
+static bool compress_start(struct compress *c)
 {
-	unsigned long page;
+	struct z_stream_s *zstream = &c->zstream;
+	void *workspace = zstream->workspace;
+
+	memset(zstream, 0, sizeof(*zstream));
+	zstream->workspace = workspace;
+
+	return zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) == Z_OK;
+}
+
+static void *compress_next_page(struct compress *c,
+				struct drm_i915_error_object *dst)
+{
+	void *page;
 
 	if (dst->page_count >= dst->num_pages)
 		return ERR_PTR(-ENOSPC);
 
-	page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
+	page = pool_alloc(&c->pool, ATOMIC_MAYFAIL);
 	if (!page)
 		return ERR_PTR(-ENOMEM);
 
-	return dst->pages[dst->page_count++] = (void *)page;
+	return dst->pages[dst->page_count++] = page;
 }
 
 static int compress_page(struct compress *c,
@@ -267,7 +318,7 @@ static int compress_page(struct compress *c,
 
 	do {
 		if (zstream->avail_out == 0) {
-			zstream->next_out = compress_next_page(dst);
+			zstream->next_out = compress_next_page(c, dst);
 			if (IS_ERR(zstream->next_out))
 				return PTR_ERR(zstream->next_out);
 
@@ -295,7 +346,7 @@ static int compress_flush(struct compress *c,
 	do {
 		switch (zlib_deflate(zstream, Z_FINISH)) {
 		case Z_OK: /* more space requested */
-			zstream->next_out = compress_next_page(dst);
+			zstream->next_out = compress_next_page(c, dst);
 			if (IS_ERR(zstream->next_out))
 				return PTR_ERR(zstream->next_out);
 
@@ -316,15 +367,17 @@ end:
 	return 0;
 }
 
-static void compress_fini(struct compress *c,
-			  struct drm_i915_error_object *dst)
+static void compress_finish(struct compress *c)
 {
-	struct z_stream_s *zstream = &c->zstream;
+	zlib_deflateEnd(&c->zstream);
+}
 
-	zlib_deflateEnd(zstream);
-	kfree(zstream->workspace);
+static void compress_fini(struct compress *c)
+{
+	kfree(c->zstream.workspace);
 	if (c->tmp)
-		free_page((unsigned long)c->tmp);
+		pool_free(&c->pool, c->tmp);
+	pool_fini(&c->pool);
 }
 
 static void err_compression_marker(struct drm_i915_error_state_buf *m)
@@ -335,10 +388,16 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m)
 #else
 
 struct compress {
+	struct pagevec pool;
 };
 
 static bool compress_init(struct compress *c)
 {
+	return pool_init(&c->pool, ALLOW_FAIL) == 0;
+}
+
+static bool compress_start(struct compress *c)
+{
 	return true;
 }
 
@@ -346,14 +405,12 @@ static int compress_page(struct compress *c,
 			 void *src,
 			 struct drm_i915_error_object *dst)
 {
-	unsigned long page;
 	void *ptr;
 
-	page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
-	if (!page)
+	ptr = pool_alloc(&c->pool, ATOMIC_MAYFAIL);
+	if (!ptr)
 		return -ENOMEM;
 
-	ptr = (void *)page;
 	if (!i915_memcpy_from_wc(ptr, src, PAGE_SIZE))
 		memcpy(ptr, src, PAGE_SIZE);
 	dst->pages[dst->page_count++] = ptr;
@@ -367,11 +424,15 @@ static int compress_flush(struct compress *c,
 	return 0;
 }
 
-static void compress_fini(struct compress *c,
-			  struct drm_i915_error_object *dst)
+static void compress_finish(struct compress *c)
 {
 }
 
+static void compress_fini(struct compress *c)
+{
+	pool_fini(&c->pool);
+}
+
 static void err_compression_marker(struct drm_i915_error_state_buf *m)
 {
 	err_puts(m, "~");
@@ -379,36 +440,6 @@ static void err_compression_marker(struct drm_i915_error_state_buf *m)
 
 #endif
 
-static void print_error_buffers(struct drm_i915_error_state_buf *m,
-				const char *name,
-				struct drm_i915_error_buffer *err,
-				int count)
-{
-	err_printf(m, "%s [%d]:\n", name, count);
-
-	while (count--) {
-		err_printf(m, "    %08x_%08x %8u %02x %02x",
-			   upper_32_bits(err->gtt_offset),
-			   lower_32_bits(err->gtt_offset),
-			   err->size,
-			   err->read_domains,
-			   err->write_domain);
-		err_puts(m, tiling_flag(err->tiling));
-		err_puts(m, dirty_flag(err->dirty));
-		err_puts(m, purgeable_flag(err->purgeable));
-		err_puts(m, err->userptr ? " userptr" : "");
-		err_puts(m, i915_cache_level_str(m->i915, err->cache_level));
-
-		if (err->name)
-			err_printf(m, " (name: %d)", err->name);
-		if (err->fence_reg != I915_FENCE_REG_NONE)
-			err_printf(m, " (fence: %d)", err->fence_reg);
-
-		err_puts(m, "\n");
-		err++;
-	}
-}
-
 static void error_print_instdone(struct drm_i915_error_state_buf *m,
 				 const struct drm_i915_error_engine *ee)
 {
@@ -734,33 +765,6 @@ static void __err_print_to_sgl(struct drm_i915_error_state_buf *m,
 			error_print_engine(m, &error->engine[i], error->epoch);
 	}
 
-	for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) {
-		char buf[128];
-		int len, first = 1;
-
-		if (!error->active_vm[i])
-			break;
-
-		len = scnprintf(buf, sizeof(buf), "Active (");
-		for (j = 0; j < ARRAY_SIZE(error->engine); j++) {
-			if (error->engine[j].vm != error->active_vm[i])
-				continue;
-
-			len += scnprintf(buf + len, sizeof(buf), "%s%s",
-					 first ? "" : ", ",
-					 m->i915->engine[j]->name);
-			first = 0;
-		}
-		scnprintf(buf + len, sizeof(buf), ")");
-		print_error_buffers(m, buf,
-				    error->active_bo[i],
-				    error->active_bo_count[i]);
-	}
-
-	print_error_buffers(m, "Pinned (global)",
-			    error->pinned_bo,
-			    error->pinned_bo_count);
-
 	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
 		const struct drm_i915_error_engine *ee = &error->engine[i];
 
@@ -974,10 +978,6 @@ void __i915_gpu_state_free(struct kref *error_ref)
 		kfree(ee->requests);
 	}
 
-	for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
-		kfree(error->active_bo[i]);
-	kfree(error->pinned_bo);
-
 	kfree(error->overlay);
 	kfree(error->display);
 
@@ -990,12 +990,12 @@ void __i915_gpu_state_free(struct kref *error_ref)
 
 static struct drm_i915_error_object *
 i915_error_object_create(struct drm_i915_private *i915,
-			 struct i915_vma *vma)
+			 struct i915_vma *vma,
+			 struct compress *compress)
 {
 	struct i915_ggtt *ggtt = &i915->ggtt;
 	const u64 slot = ggtt->error_capture.start;
 	struct drm_i915_error_object *dst;
-	struct compress compress;
 	unsigned long num_pages;
 	struct sgt_iter iter;
 	dma_addr_t dma;
@@ -1006,22 +1006,21 @@ i915_error_object_create(struct drm_i915_private *i915,
 
 	num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
 	num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */
-	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *),
-		      GFP_ATOMIC | __GFP_NOWARN);
+	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), ATOMIC_MAYFAIL);
 	if (!dst)
 		return NULL;
 
+	if (!compress_start(compress)) {
+		kfree(dst);
+		return NULL;
+	}
+
 	dst->gtt_offset = vma->node.start;
 	dst->gtt_size = vma->node.size;
 	dst->num_pages = num_pages;
 	dst->page_count = 0;
 	dst->unused = 0;
 
-	if (!compress_init(&compress)) {
-		kfree(dst);
-		return NULL;
-	}
-
 	ret = -EINVAL;
 	for_each_sgt_dma(dma, iter, vma->pages) {
 		void __iomem *s;
@@ -1029,69 +1028,23 @@ i915_error_object_create(struct drm_i915_private *i915,
 		ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0);
 
 		s = io_mapping_map_atomic_wc(&ggtt->iomap, slot);
-		ret = compress_page(&compress, (void  __force *)s, dst);
+		ret = compress_page(compress, (void  __force *)s, dst);
 		io_mapping_unmap_atomic(s);
 		if (ret)
 			break;
 	}
 
-	if (ret || compress_flush(&compress, dst)) {
+	if (ret || compress_flush(compress, dst)) {
 		while (dst->page_count--)
-			free_page((unsigned long)dst->pages[dst->page_count]);
+			pool_free(&compress->pool, dst->pages[dst->page_count]);
 		kfree(dst);
 		dst = NULL;
 	}
+	compress_finish(compress);
 
-	compress_fini(&compress, dst);
 	return dst;
 }
 
-static void capture_bo(struct drm_i915_error_buffer *err,
-		       struct i915_vma *vma)
-{
-	struct drm_i915_gem_object *obj = vma->obj;
-
-	err->size = obj->base.size;
-	err->name = obj->base.name;
-
-	err->gtt_offset = vma->node.start;
-	err->read_domains = obj->read_domains;
-	err->write_domain = obj->write_domain;
-	err->fence_reg = vma->fence ? vma->fence->id : -1;
-	err->tiling = i915_gem_object_get_tiling(obj);
-	err->dirty = obj->mm.dirty;
-	err->purgeable = obj->mm.madv != I915_MADV_WILLNEED;
-	err->userptr = obj->userptr.mm != NULL;
-	err->cache_level = obj->cache_level;
-}
-
-static u32 capture_error_bo(struct drm_i915_error_buffer *err,
-			    int count, struct list_head *head,
-			    unsigned int flags)
-#define ACTIVE_ONLY BIT(0)
-#define PINNED_ONLY BIT(1)
-{
-	struct i915_vma *vma;
-	int i = 0;
-
-	list_for_each_entry(vma, head, vm_link) {
-		if (!vma->obj)
-			continue;
-
-		if (flags & ACTIVE_ONLY && !i915_vma_is_active(vma))
-			continue;
-
-		if (flags & PINNED_ONLY && !i915_vma_is_pinned(vma))
-			continue;
-
-		capture_bo(err++, vma);
-		if (++i == count)
-			break;
-	}
-
-	return i;
-}
-
 /*
  * Generate a semi-unique error code. The code is not meant to have meaning, The
  * code's only purpose is to try to prevent false duplicated bug reports by
@@ -1281,7 +1234,7 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 	if (!count)
 		return;
 
-	ee->requests = kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC);
+	ee->requests = kcalloc(count, sizeof(*ee->requests), ATOMIC_MAYFAIL);
 	if (!ee->requests)
 		return;
 
@@ -1349,8 +1302,10 @@ static void record_context(struct drm_i915_error_context *e,
 	e->active = atomic_read(&ctx->active_count);
 }
 
-static void request_record_user_bo(struct i915_request *request,
-				   struct drm_i915_error_engine *ee)
+static void
+request_record_user_bo(struct i915_request *request,
+		       struct drm_i915_error_engine *ee,
+		       struct compress *compress)
 {
 	struct i915_capture_list *c;
 	struct drm_i915_error_object **bo;
@@ -1362,18 +1317,20 @@ static void request_record_user_bo(struct i915_request *request,
 	if (!max)
 		return;
 
-	bo = kmalloc_array(max, sizeof(*bo), GFP_ATOMIC);
+	bo = kmalloc_array(max, sizeof(*bo), ATOMIC_MAYFAIL);
 	if (!bo) {
 		/* If we can't capture everything, try to capture something. */
 		max = min_t(long, max, PAGE_SIZE / sizeof(*bo));
-		bo = kmalloc_array(max, sizeof(*bo), GFP_ATOMIC);
+		bo = kmalloc_array(max, sizeof(*bo), ATOMIC_MAYFAIL);
 	}
 	if (!bo)
 		return;
 
 	count = 0;
 	for (c = request->capture_list; c; c = c->next) {
-		bo[count] = i915_error_object_create(request->i915, c->vma);
+		bo[count] = i915_error_object_create(request->i915,
+						     c->vma,
+						     compress);
 		if (!bo[count])
 			break;
 		if (++count == max)
@@ -1386,7 +1343,8 @@ static void request_record_user_bo(struct i915_request *request,
 
 static struct drm_i915_error_object *
 capture_object(struct drm_i915_private *dev_priv,
-	       struct drm_i915_gem_object *obj)
+	       struct drm_i915_gem_object *obj,
+	       struct compress *compress)
 {
 	if (obj && i915_gem_object_has_pages(obj)) {
 		struct i915_vma fake = {
@@ -1396,13 +1354,14 @@ capture_object(struct drm_i915_private *dev_priv,
 			.obj = obj,
 		};
 
-		return i915_error_object_create(dev_priv, &fake);
+		return i915_error_object_create(dev_priv, &fake, compress);
 	} else {
 		return NULL;
 	}
 }
 
-static void gem_record_rings(struct i915_gpu_state *error)
+static void
+gem_record_rings(struct i915_gpu_state *error, struct compress *compress)
 {
 	struct drm_i915_private *i915 = error->i915;
 	int i;
@@ -1420,6 +1379,9 @@ static void gem_record_rings(struct i915_gpu_state *error)
 
 		ee->engine_id = i;
 
+		/* Refill our page pool before entering atomic section */
+		pool_refill(&compress->pool, ALLOW_FAIL);
+
 		error_record_engine_registers(error, engine, ee);
 		error_record_engine_execlists(engine, ee);
 
@@ -1429,8 +1391,6 @@ static void gem_record_rings(struct i915_gpu_state *error)
 			struct i915_gem_context *ctx = request->gem_context;
 			struct intel_ring *ring = request->ring;
 
-			ee->vm = ctx->vm ?: &engine->gt->ggtt->vm;
-
 			record_context(&ee->context, ctx);
 
 			/* We need to copy these to an anonymous buffer
@@ -1438,17 +1398,21 @@ static void gem_record_rings(struct i915_gpu_state *error)
 			 * by userspace.
 			 */
 			ee->batchbuffer =
-				i915_error_object_create(i915, request->batch);
+				i915_error_object_create(i915,
+							 request->batch,
+							 compress);
 
 			if (HAS_BROKEN_CS_TLB(i915))
 				ee->wa_batchbuffer =
 				  i915_error_object_create(i915,
-							   engine->gt->scratch);
-			request_record_user_bo(request, ee);
+							   engine->gt->scratch,
+							   compress);
+			request_record_user_bo(request, ee, compress);
 
 			ee->ctx =
 				i915_error_object_create(i915,
-							 request->hw_context->state);
+							 request->hw_context->state,
+							 compress);
 
 			error->simulated |=
 				i915_gem_context_no_error_capture(ctx);
@@ -1460,7 +1424,9 @@ static void gem_record_rings(struct i915_gpu_state *error)
 			ee->cpu_ring_head = ring->head;
 			ee->cpu_ring_tail = ring->tail;
 			ee->ringbuffer =
-				i915_error_object_create(i915, ring->vma);
+				i915_error_object_create(i915,
+							 ring->vma,
+							 compress);
 
 			engine_record_requests(engine, request, ee);
 		}
@@ -1468,89 +1434,21 @@ static void gem_record_rings(struct i915_gpu_state *error)
 
 		ee->hws_page =
 			i915_error_object_create(i915,
-						 engine->status_page.vma);
-
-		ee->wa_ctx = i915_error_object_create(i915, engine->wa_ctx.vma);
-
-		ee->default_state = capture_object(i915, engine->default_state);
-	}
-}
-
-static void gem_capture_vm(struct i915_gpu_state *error,
-			   struct i915_address_space *vm,
-			   int idx)
-{
-	struct drm_i915_error_buffer *active_bo;
-	struct i915_vma *vma;
-	int count;
-
-	count = 0;
-	list_for_each_entry(vma, &vm->bound_list, vm_link)
-		if (i915_vma_is_active(vma))
-			count++;
-
-	active_bo = NULL;
-	if (count)
-		active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC);
-	if (active_bo)
-		count = capture_error_bo(active_bo,
-					 count, &vm->bound_list,
-					 ACTIVE_ONLY);
-	else
-		count = 0;
-
-	error->active_vm[idx] = vm;
-	error->active_bo[idx] = active_bo;
-	error->active_bo_count[idx] = count;
-}
-
-static void capture_active_buffers(struct i915_gpu_state *error)
-{
-	int cnt = 0, i, j;
-
-	BUILD_BUG_ON(ARRAY_SIZE(error->engine) > ARRAY_SIZE(error->active_bo));
-	BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_vm));
-	BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_bo_count));
-
-	/* Scan each engine looking for unique active contexts/vm */
-	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
-		struct drm_i915_error_engine *ee = &error->engine[i];
-		bool found;
+						 engine->status_page.vma,
+						 compress);
 
-		if (!ee->vm)
-			continue;
+		ee->wa_ctx =
+			i915_error_object_create(i915,
+						 engine->wa_ctx.vma,
+						 compress);
 
-		found = false;
-		for (j = 0; j < i && !found; j++)
-			found = error->engine[j].vm == ee->vm;
-		if (!found)
-			gem_capture_vm(error, ee->vm, cnt++);
+		ee->default_state =
+			capture_object(i915, engine->default_state, compress);
 	}
 }
 
-static void capture_pinned_buffers(struct i915_gpu_state *error)
-{
-	struct i915_address_space *vm = &error->i915->ggtt.vm;
-	struct drm_i915_error_buffer *bo;
-	struct i915_vma *vma;
-	int count;
-
-	count = 0;
-	list_for_each_entry(vma, &vm->bound_list, vm_link)
-		count++;
-
-	bo = NULL;
-	if (count)
-		bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
-	if (!bo)
-		return;
-
-	error->pinned_bo_count =
-		capture_error_bo(bo, count, &vm->bound_list, PINNED_ONLY);
-	error->pinned_bo = bo;
-}
-
-static void capture_uc_state(struct i915_gpu_state *error)
+static void
+capture_uc_state(struct i915_gpu_state *error, struct compress *compress)
 {
 	struct drm_i915_private *i915 = error->i915;
 	struct i915_error_uc *error_uc = &error->uc;
@@ -1567,9 +1465,11 @@ static void capture_uc_state(struct i915_gpu_state *error)
 	 * As modparams are generally accesible from the userspace make
 	 * explicit copies of the firmware paths.
 	 */
-	error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, GFP_ATOMIC);
-	error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, GFP_ATOMIC);
-	error_uc->guc_log = i915_error_object_create(i915, uc->guc.log.vma);
+	error_uc->guc_fw.path = kstrdup(uc->guc.fw.path, ALLOW_FAIL);
+	error_uc->huc_fw.path = kstrdup(uc->huc.fw.path, ALLOW_FAIL);
+	error_uc->guc_log = i915_error_object_create(i915,
+						     uc->guc.log.vma,
+						     compress);
 }
 
 /* Capture all registers which don't fit into another category. */
@@ -1753,56 +1653,53 @@ static void capture_finish(struct i915_gpu_state *error)
 	ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
 }
 
-static int capture(void *data)
-{
-	struct i915_gpu_state *error = data;
-
-	error->time = ktime_get_real();
-	error->boottime = ktime_get_boottime();
-	error->uptime = ktime_sub(ktime_get(),
-				  error->i915->gt.last_init_time);
-	error->capture = jiffies;
-
-	capture_params(error);
-	capture_gen_state(error);
-	capture_uc_state(error);
-	capture_reg_state(error);
-	gem_record_fences(error);
-	gem_record_rings(error);
-	capture_active_buffers(error);
-	capture_pinned_buffers(error);
-
-	error->overlay = intel_overlay_capture_error_state(error->i915);
-	error->display = intel_display_capture_error_state(error->i915);
-
-	error->epoch = capture_find_epoch(error);
-
-	capture_finish(error);
-	return 0;
-}
-
 #define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))
 
 struct i915_gpu_state *
 i915_capture_gpu_state(struct drm_i915_private *i915)
 {
 	struct i915_gpu_state *error;
+	struct compress compress;
 
 	/* Check if GPU capture has been disabled */
 	error = READ_ONCE(i915->gpu_error.first_error);
 	if (IS_ERR(error))
 		return error;
 
-	error = kzalloc(sizeof(*error), GFP_ATOMIC);
+	error = kzalloc(sizeof(*error), ALLOW_FAIL);
 	if (!error) {
 		i915_disable_error_state(i915, -ENOMEM);
 		return ERR_PTR(-ENOMEM);
 	}
 
+	if (!compress_init(&compress)) {
+		kfree(error);
+		i915_disable_error_state(i915, -ENOMEM);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	kref_init(&error->ref);
 	error->i915 = i915;
 
-	stop_machine(capture, error, NULL);
+	error->time = ktime_get_real();
+	error->boottime = ktime_get_boottime();
+	error->uptime = ktime_sub(ktime_get(), i915->gt.last_init_time);
+	error->capture = jiffies;
+
+	capture_params(error);
+	capture_gen_state(error);
+	capture_uc_state(error, &compress);
+	capture_reg_state(error);
+	gem_record_fences(error);
+	gem_record_rings(error, &compress);
+
+	error->overlay = intel_overlay_capture_error_state(i915);
+	error->display = intel_display_capture_error_state(i915);
+
+	error->epoch = capture_find_epoch(error);
+
+	capture_finish(error);
+	compress_fini(&compress);
 
 	return error;
 }
author	Chris Wilson <chris@chris-wilson.co.uk>	2019-07-23 01:28:47 +0300
committer	Chris Wilson <chris@chris-wilson.co.uk>	2019-07-23 02:22:20 +0300
commit	3bdd4f8485310687affe117f1b1a6e9012e897f6 (patch)
tree	f79e6e2ebf06caff04816e9fe3e1004756ee1773 /drivers/gpu/drm/i915/i915_gpu_error.c
parent	df8cf31e749788d8de8c3028e530e6fd9ef8bc79 (diff)
download	linux-3bdd4f8485310687affe117f1b1a6e9012e897f6.tar.xz