summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
authorBen Skeggs <bskeggs@redhat.com>2017-01-18 08:37:24 +0300
committerBen Skeggs <bskeggs@redhat.com>2017-02-17 10:38:15 +0300
commit3ebef76a1d46cd7e45aee6ad3efff9683b3a0f07 (patch)
tree37ada711f09322e53a934961e66e88a4de89e570 /drivers/gpu/drm
parent03f16f5f278af9ef477aa847fda4b09ed87dc382 (diff)
downloadlinux-3ebef76a1d46cd7e45aee6ad3efff9683b3a0f07.tar.xz
drm/nouveau/fifo/gk104-: trigger mmu fault before attempting engine recovery
Greatly improves the chances of recovering the GPU from a CTXSW_TIMEOUT. Tested with piglit's arb_shader_image_load_store-atomicity, which causes GR to hang in such a way that recovery failed (CTXSW_TIMEOUT continually re-triggers). Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c41
1 files changed, 41 insertions, 0 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index e4e68f6fb138..3a24788c3185 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -280,10 +280,13 @@ gk104_fifo_recover_chan(struct nvkm_fifo *base, int chid)
static void
gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
{
+ struct nvkm_engine *engine = fifo->engine[engn].engine;
struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+ struct nvkm_device *device = subdev->device;
const u32 runl = fifo->engine[engn].runl;
const u32 engm = BIT(engn);
struct gk104_fifo_engine_status status;
+ int mmui = -1;
assert_spin_locked(&fifo->base.lock);
if (fifo->recover.engm & engm)
@@ -300,6 +303,44 @@ gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
gk104_fifo_recover_chan(&fifo->base, status.chan->id);
}
+ /* Determine MMU fault ID for the engine, if we're not being
+ * called from the fault handler already.
+ */
+ if (!status.faulted && engine) {
+ mmui = nvkm_top_fault_id(device, engine->subdev.index);
+ if (mmui < 0) {
+ const struct nvkm_enum *en = fifo->func->fault.engine;
+ for (; en && en->name; en++) {
+ if (en->data2 == engine->subdev.index) {
+ mmui = en->value;
+ break;
+ }
+ }
+ }
+ WARN_ON(mmui < 0);
+ }
+
+ /* Trigger a MMU fault for the engine.
+ *
+ * No good idea why this is needed, but nvgpu does something similar,
+ * and it makes recovery from CTXSW_TIMEOUT a lot more reliable.
+ */
+ if (mmui >= 0) {
+ nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000100 | mmui);
+
+ /* Wait for fault to trigger. */
+ nvkm_msec(device, 2000,
+ gk104_fifo_engine_status(fifo, engn, &status);
+ if (status.faulted)
+ break;
+ );
+
+ /* Release MMU fault trigger, and ACK the fault. */
+ nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000000);
+ nvkm_wr32(device, 0x00259c, BIT(mmui));
+ nvkm_wr32(device, 0x002100, 0x10000000);
+ }
+
/* Schedule recovery. */
nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn);
schedule_work(&fifo->recover.work);