60 files changed, 5817 insertions, 4795 deletions
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
index 2e1b92f71d9e..9ad0d0e78a96 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
@@ -1,21 +1,8 @@
-nvkm-y += nvkm/engine/gr/ctxnv40.o
-nvkm-y += nvkm/engine/gr/ctxnv50.o
-nvkm-y += nvkm/engine/gr/ctxgf100.o
-nvkm-y += nvkm/engine/gr/ctxgf108.o
-nvkm-y += nvkm/engine/gr/ctxgf104.o
-nvkm-y += nvkm/engine/gr/ctxgf110.o
-nvkm-y += nvkm/engine/gr/ctxgf117.o
-nvkm-y += nvkm/engine/gr/ctxgf119.o
-nvkm-y += nvkm/engine/gr/ctxgk104.o
-nvkm-y += nvkm/engine/gr/ctxgk20a.o
-nvkm-y += nvkm/engine/gr/ctxgk110.o
-nvkm-y += nvkm/engine/gr/ctxgk110b.o
-nvkm-y += nvkm/engine/gr/ctxgk208.o
-nvkm-y += nvkm/engine/gr/ctxgm107.o
-nvkm-y += nvkm/engine/gr/ctxgm204.o
-nvkm-y += nvkm/engine/gr/ctxgm206.o
+nvkm-y += nvkm/engine/gr/base.o
 nvkm-y += nvkm/engine/gr/nv04.o
 nvkm-y += nvkm/engine/gr/nv10.o
+nvkm-y += nvkm/engine/gr/nv15.o
+nvkm-y += nvkm/engine/gr/nv17.o
 nvkm-y += nvkm/engine/gr/nv20.o
 nvkm-y += nvkm/engine/gr/nv25.o
 nvkm-y += nvkm/engine/gr/nv2a.o
@@ -23,18 +10,43 @@ nvkm-y += nvkm/engine/gr/nv30.o
 nvkm-y += nvkm/engine/gr/nv34.o
 nvkm-y += nvkm/engine/gr/nv35.o
 nvkm-y += nvkm/engine/gr/nv40.o
+nvkm-y += nvkm/engine/gr/nv44.o
 nvkm-y += nvkm/engine/gr/nv50.o
+nvkm-y += nvkm/engine/gr/g84.o
+nvkm-y += nvkm/engine/gr/gt200.o
+nvkm-y += nvkm/engine/gr/mcp79.o
+nvkm-y += nvkm/engine/gr/gt215.o
+nvkm-y += nvkm/engine/gr/mcp89.o
 nvkm-y += nvkm/engine/gr/gf100.o
-nvkm-y += nvkm/engine/gr/gf108.o
 nvkm-y += nvkm/engine/gr/gf104.o
+nvkm-y += nvkm/engine/gr/gf108.o
 nvkm-y += nvkm/engine/gr/gf110.o
 nvkm-y += nvkm/engine/gr/gf117.o
 nvkm-y += nvkm/engine/gr/gf119.o
 nvkm-y += nvkm/engine/gr/gk104.o
-nvkm-y += nvkm/engine/gr/gk20a.o
 nvkm-y += nvkm/engine/gr/gk110.o
 nvkm-y += nvkm/engine/gr/gk110b.o
 nvkm-y += nvkm/engine/gr/gk208.o
+nvkm-y += nvkm/engine/gr/gk20a.o
 nvkm-y += nvkm/engine/gr/gm107.o
 nvkm-y += nvkm/engine/gr/gm204.o
 nvkm-y += nvkm/engine/gr/gm206.o
+nvkm-y += nvkm/engine/gr/gm20b.o
+
+nvkm-y += nvkm/engine/gr/ctxnv40.o
+nvkm-y += nvkm/engine/gr/ctxnv50.o
+nvkm-y += nvkm/engine/gr/ctxgf100.o
+nvkm-y += nvkm/engine/gr/ctxgf104.o
+nvkm-y += nvkm/engine/gr/ctxgf108.o
+nvkm-y += nvkm/engine/gr/ctxgf110.o
+nvkm-y += nvkm/engine/gr/ctxgf117.o
+nvkm-y += nvkm/engine/gr/ctxgf119.o
+nvkm-y += nvkm/engine/gr/ctxgk104.o
+nvkm-y += nvkm/engine/gr/ctxgk110.o
+nvkm-y += nvkm/engine/gr/ctxgk110b.o
+nvkm-y += nvkm/engine/gr/ctxgk208.o
+nvkm-y += nvkm/engine/gr/ctxgk20a.o
+nvkm-y += nvkm/engine/gr/ctxgm107.o
+nvkm-y += nvkm/engine/gr/ctxgm204.o
+nvkm-y += nvkm/engine/gr/ctxgm206.o
+nvkm-y += nvkm/engine/gr/ctxgm20b.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
new file mode 100644
index 000000000000..090765ff070d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "priv.h"
+
+#include <engine/fifo.h>
+
+static void
+nvkm_gr_tile(struct nvkm_engine *engine, int region, struct nvkm_fb_tile *tile)
+{
+	struct nvkm_gr *gr = nvkm_gr(engine);
+	if (gr->func->tile)
+		gr->func->tile(gr, region, tile);
+}
+
+u64
+nvkm_gr_units(struct nvkm_gr *gr)
+{
+	if (gr->func->units)
+		return gr->func->units(gr);
+	return 0;
+}
+
+int
+nvkm_gr_tlb_flush(struct nvkm_gr *gr)
+{
+	if (gr->func->tlb_flush)
+		return gr->func->tlb_flush(gr);
+	return -ENODEV;
+}
+
+static int
+nvkm_gr_oclass_get(struct nvkm_oclass *oclass, int index)
+{
+	struct nvkm_gr *gr = nvkm_gr(oclass->engine);
+	int c = 0;
+
+	if (gr->func->object_get) {
+		int ret = gr->func->object_get(gr, index, &oclass->base);
+		if (oclass->base.oclass)
+			return index;
+		return ret;
+	}
+
+	while (gr->func->sclass[c].oclass) {
+		if (c++ == index) {
+			oclass->base = gr->func->sclass[index];
+			return index;
+		}
+	}
+
+	return c;
+}
+
+static int
+nvkm_gr_cclass_new(struct nvkm_fifo_chan *chan,
+		   const struct nvkm_oclass *oclass,
+		   struct nvkm_object **pobject)
+{
+	struct nvkm_gr *gr = nvkm_gr(oclass->engine);
+	if (gr->func->chan_new)
+		return gr->func->chan_new(gr, chan, oclass, pobject);
+	return 0;
+}
+
+static void
+nvkm_gr_intr(struct nvkm_engine *engine)
+{
+	struct nvkm_gr *gr = nvkm_gr(engine);
+	gr->func->intr(gr);
+}
+
+static int
+nvkm_gr_oneinit(struct nvkm_engine *engine)
+{
+	struct nvkm_gr *gr = nvkm_gr(engine);
+	if (gr->func->oneinit)
+		return gr->func->oneinit(gr);
+	return 0;
+}
+
+static int
+nvkm_gr_init(struct nvkm_engine *engine)
+{
+	struct nvkm_gr *gr = nvkm_gr(engine);
+	return gr->func->init(gr);
+}
+
+static void *
+nvkm_gr_dtor(struct nvkm_engine *engine)
+{
+	struct nvkm_gr *gr = nvkm_gr(engine);
+	if (gr->func->dtor)
+		return gr->func->dtor(gr);
+	return gr;
+}
+
+static const struct nvkm_engine_func
+nvkm_gr = {
+	.dtor = nvkm_gr_dtor,
+	.oneinit = nvkm_gr_oneinit,
+	.init = nvkm_gr_init,
+	.intr = nvkm_gr_intr,
+	.tile = nvkm_gr_tile,
+	.fifo.cclass = nvkm_gr_cclass_new,
+	.fifo.sclass = nvkm_gr_oclass_get,
+};
+
+int
+nvkm_gr_ctor(const struct nvkm_gr_func *func, struct nvkm_device *device,
+	     int index, u32 pmc_enable, bool enable, struct nvkm_gr *gr)
+{
+	gr->func = func;
+	return nvkm_engine_ctor(&nvkm_gr, device, index, pmc_enable,
+				enable, &gr->engine);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
index 57e2c5b13123..56f392d3d4fd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -23,7 +23,6 @@
  */
 #include "ctxgf100.h"
 
-#include <subdev/bar.h>
 #include <subdev/fb.h>
 #include <subdev/mc.h>
 #include <subdev/timer.h>
@@ -1005,6 +1004,7 @@ void
 gf100_grctx_mmio_item(struct gf100_grctx *info, u32 addr, u32 data,
 		      int shift, int buffer)
 {
+	struct nvkm_device *device = info->gr->base.engine.subdev.device;
 	if (info->data) {
 		if (shift >= 0) {
 			info->mmio->addr = addr;
@@ -1021,29 +1021,29 @@ gf100_grctx_mmio_item(struct gf100_grctx *info, u32 addr, u32 data,
 			return;
 	}
 
-	nv_wr32(info->priv, addr, data);
+	nvkm_wr32(device, addr, data);
 }
 
 void
 gf100_grctx_generate_bundle(struct gf100_grctx *info)
 {
-	const struct gf100_grctx_oclass *impl = gf100_grctx_impl(info->priv);
+	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
 	const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS;
 	const int s = 8;
-	const int b = mmio_vram(info, impl->bundle_size, (1 << s), access);
+	const int b = mmio_vram(info, grctx->bundle_size, (1 << s), access);
 	mmio_refn(info, 0x408004, 0x00000000, s, b);
-	mmio_wr32(info, 0x408008, 0x80000000 | (impl->bundle_size >> s));
+	mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s));
 	mmio_refn(info, 0x418808, 0x00000000, s, b);
-	mmio_wr32(info, 0x41880c, 0x80000000 | (impl->bundle_size >> s));
+	mmio_wr32(info, 0x41880c, 0x80000000 | (grctx->bundle_size >> s));
 }
 
 void
 gf100_grctx_generate_pagepool(struct gf100_grctx *info)
 {
-	const struct gf100_grctx_oclass *impl = gf100_grctx_impl(info->priv);
+	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
 	const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS;
 	const int s = 8;
-	const int b = mmio_vram(info, impl->pagepool_size, (1 << s), access);
+	const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), access);
 	mmio_refn(info, 0x40800c, 0x00000000, s, b);
 	mmio_wr32(info, 0x408010, 0x80000000);
 	mmio_refn(info, 0x419004, 0x00000000, s, b);
@@ -1053,13 +1053,13 @@ gf100_grctx_generate_pagepool(struct gf100_grctx *info)
 void
 gf100_grctx_generate_attrib(struct gf100_grctx *info)
 {
-	struct gf100_gr_priv *priv = info->priv;
-	const struct gf100_grctx_oclass *impl = gf100_grctx_impl(priv);
-	const u32 attrib = impl->attrib_nr;
-	const u32   size = 0x20 * (impl->attrib_nr_max + impl->alpha_nr_max);
+	struct gf100_gr *gr = info->gr;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	const u32 attrib = grctx->attrib_nr;
+	const u32   size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
 	const u32 access = NV_MEM_ACCESS_RW;
 	const int s = 12;
-	const int b = mmio_vram(info, size * priv->tpc_total, (1 << s), access);
+	const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), access);
 	int gpc, tpc;
 	u32 bo = 0;
 
@@ -1067,91 +1067,95 @@ gf100_grctx_generate_attrib(struct gf100_grctx *info)
 	mmio_refn(info, 0x419848, 0x10000000, s, b);
 	mmio_wr32(info, 0x405830, (attrib << 16));
 
-	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-		for (tpc = 0; tpc < priv->tpc_nr[gpc]; tpc++) {
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
 			const u32 o = TPC_UNIT(gpc, tpc, 0x0520);
 			mmio_skip(info, o, (attrib << 16) | ++bo);
 			mmio_wr32(info, o, (attrib << 16) | --bo);
-			bo += impl->attrib_nr_max;
+			bo += grctx->attrib_nr_max;
 		}
 	}
 }
 
 void
-gf100_grctx_generate_unkn(struct gf100_gr_priv *priv)
+gf100_grctx_generate_unkn(struct gf100_gr *gr)
 {
 }
 
 void
-gf100_grctx_generate_tpcid(struct gf100_gr_priv *priv)
+gf100_grctx_generate_tpcid(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int gpc, tpc, id;
 
 	for (tpc = 0, id = 0; tpc < 4; tpc++) {
-		for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-			if (tpc < priv->tpc_nr[gpc]) {
-				nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x698), id);
-				nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x4e8), id);
-				nv_wr32(priv, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
-				nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x088), id);
+		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+			if (tpc < gr->tpc_nr[gpc]) {
+				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id);
+				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x4e8), id);
+				nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
+				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id);
 				id++;
 			}
 
-			nv_wr32(priv, GPC_UNIT(gpc, 0x0c08), priv->tpc_nr[gpc]);
-			nv_wr32(priv, GPC_UNIT(gpc, 0x0c8c), priv->tpc_nr[gpc]);
+			nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]);
+			nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]);
 		}
 	}
 }
 
 void
-gf100_grctx_generate_r406028(struct gf100_gr_priv *priv)
+gf100_grctx_generate_r406028(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 tmp[GPC_MAX / 8] = {}, i = 0;
-	for (i = 0; i < priv->gpc_nr; i++)
-		tmp[i / 8] |= priv->tpc_nr[i] << ((i % 8) * 4);
+	for (i = 0; i < gr->gpc_nr; i++)
+		tmp[i / 8] |= gr->tpc_nr[i] << ((i % 8) * 4);
 	for (i = 0; i < 4; i++) {
-		nv_wr32(priv, 0x406028 + (i * 4), tmp[i]);
-		nv_wr32(priv, 0x405870 + (i * 4), tmp[i]);
+		nvkm_wr32(device, 0x406028 + (i * 4), tmp[i]);
+		nvkm_wr32(device, 0x405870 + (i * 4), tmp[i]);
 	}
 }
 
 void
-gf100_grctx_generate_r4060a8(struct gf100_gr_priv *priv)
+gf100_grctx_generate_r4060a8(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u8  tpcnr[GPC_MAX], data[TPC_MAX];
 	int gpc, tpc, i;
 
-	memcpy(tpcnr, priv->tpc_nr, sizeof(priv->tpc_nr));
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
 	memset(data, 0x1f, sizeof(data));
 
 	gpc = -1;
-	for (tpc = 0; tpc < priv->tpc_total; tpc++) {
+	for (tpc = 0; tpc < gr->tpc_total; tpc++) {
 		do {
-			gpc = (gpc + 1) % priv->gpc_nr;
+			gpc = (gpc + 1) % gr->gpc_nr;
 		} while (!tpcnr[gpc]);
 		tpcnr[gpc]--;
 		data[tpc] = gpc;
 	}
 
 	for (i = 0; i < 4; i++)
-		nv_wr32(priv, 0x4060a8 + (i * 4), ((u32 *)data)[i]);
+		nvkm_wr32(device, 0x4060a8 + (i * 4), ((u32 *)data)[i]);
 }
 
 void
-gf100_grctx_generate_r418bb8(struct gf100_gr_priv *priv)
+gf100_grctx_generate_r418bb8(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 data[6] = {}, data2[2] = {};
 	u8  tpcnr[GPC_MAX];
 	u8  shift, ntpcv;
 	int gpc, tpc, i;
 
 	/* calculate first set of magics */
-	memcpy(tpcnr, priv->tpc_nr, sizeof(priv->tpc_nr));
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
 
 	gpc = -1;
-	for (tpc = 0; tpc < priv->tpc_total; tpc++) {
+	for (tpc = 0; tpc < gr->tpc_total; tpc++) {
 		do {
-			gpc = (gpc + 1) % priv->gpc_nr;
+			gpc = (gpc + 1) % gr->gpc_nr;
 		} while (!tpcnr[gpc]);
 		tpcnr[gpc]--;
 
@@ -1163,7 +1167,7 @@ gf100_grctx_generate_r418bb8(struct gf100_gr_priv *priv)
 
 	/* and the second... */
 	shift = 0;
-	ntpcv = priv->tpc_total;
+	ntpcv = gr->tpc_total;
 	while (!(ntpcv & (1 << 4))) {
 		ntpcv <<= 1;
 		shift++;
@@ -1176,202 +1180,211 @@ gf100_grctx_generate_r418bb8(struct gf100_gr_priv *priv)
 		data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5);
 
 	/* GPC_BROADCAST */
-	nv_wr32(priv, 0x418bb8, (priv->tpc_total << 8) |
-				 priv->magic_not_rop_nr);
+	nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) |
+				 gr->magic_not_rop_nr);
 	for (i = 0; i < 6; i++)
-		nv_wr32(priv, 0x418b08 + (i * 4), data[i]);
+		nvkm_wr32(device, 0x418b08 + (i * 4), data[i]);
 
 	/* GPC_BROADCAST.TP_BROADCAST */
-	nv_wr32(priv, 0x419bd0, (priv->tpc_total << 8) |
-				 priv->magic_not_rop_nr | data2[0]);
-	nv_wr32(priv, 0x419be4, data2[1]);
+	nvkm_wr32(device, 0x419bd0, (gr->tpc_total << 8) |
+				 gr->magic_not_rop_nr | data2[0]);
+	nvkm_wr32(device, 0x419be4, data2[1]);
 	for (i = 0; i < 6; i++)
-		nv_wr32(priv, 0x419b00 + (i * 4), data[i]);
+		nvkm_wr32(device, 0x419b00 + (i * 4), data[i]);
 
 	/* UNK78xx */
-	nv_wr32(priv, 0x4078bc, (priv->tpc_total << 8) |
-				 priv->magic_not_rop_nr);
+	nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) |
+				 gr->magic_not_rop_nr);
 	for (i = 0; i < 6; i++)
-		nv_wr32(priv, 0x40780c + (i * 4), data[i]);
+		nvkm_wr32(device, 0x40780c + (i * 4), data[i]);
 }
 
 void
-gf100_grctx_generate_r406800(struct gf100_gr_priv *priv)
+gf100_grctx_generate_r406800(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u64 tpc_mask = 0, tpc_set = 0;
 	u8  tpcnr[GPC_MAX];
 	int gpc, tpc;
 	int i, a, b;
 
-	memcpy(tpcnr, priv->tpc_nr, sizeof(priv->tpc_nr));
-	for (gpc = 0; gpc < priv->gpc_nr; gpc++)
-		tpc_mask |= ((1ULL << priv->tpc_nr[gpc]) - 1) << (gpc * 8);
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
+		tpc_mask |= ((1ULL << gr->tpc_nr[gpc]) - 1) << (gpc * 8);
 
 	for (i = 0, gpc = -1, b = -1; i < 32; i++) {
-		a = (i * (priv->tpc_total - 1)) / 32;
+		a = (i * (gr->tpc_total - 1)) / 32;
 		if (a != b) {
 			b = a;
 			do {
-				gpc = (gpc + 1) % priv->gpc_nr;
+				gpc = (gpc + 1) % gr->gpc_nr;
 			} while (!tpcnr[gpc]);
-			tpc = priv->tpc_nr[gpc] - tpcnr[gpc]--;
+			tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
 
 			tpc_set |= 1ULL << ((gpc * 8) + tpc);
 		}
 
-		nv_wr32(priv, 0x406800 + (i * 0x20), lower_32_bits(tpc_set));
-		nv_wr32(priv, 0x406c00 + (i * 0x20), lower_32_bits(tpc_set ^ tpc_mask));
-		if (priv->gpc_nr > 4) {
-			nv_wr32(priv, 0x406804 + (i * 0x20), upper_32_bits(tpc_set));
-			nv_wr32(priv, 0x406c04 + (i * 0x20), upper_32_bits(tpc_set ^ tpc_mask));
+		nvkm_wr32(device, 0x406800 + (i * 0x20), lower_32_bits(tpc_set));
+		nvkm_wr32(device, 0x406c00 + (i * 0x20), lower_32_bits(tpc_set ^ tpc_mask));
+		if (gr->gpc_nr > 4) {
+			nvkm_wr32(device, 0x406804 + (i * 0x20), upper_32_bits(tpc_set));
+			nvkm_wr32(device, 0x406c04 + (i * 0x20), upper_32_bits(tpc_set ^ tpc_mask));
 		}
 	}
 }
 
 void
-gf100_grctx_generate_main(struct gf100_gr_priv *priv, struct gf100_grctx *info)
+gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 {
-	struct gf100_grctx_oclass *oclass = (void *)nv_engine(priv)->cclass;
-
-	nvkm_mc(priv)->unk260(nvkm_mc(priv), 0);
-
-	gf100_gr_mmio(priv, oclass->hub);
-	gf100_gr_mmio(priv, oclass->gpc);
-	gf100_gr_mmio(priv, oclass->zcull);
-	gf100_gr_mmio(priv, oclass->tpc);
-	gf100_gr_mmio(priv, oclass->ppc);
-
-	nv_wr32(priv, 0x404154, 0x00000000);
-
-	oclass->bundle(info);
-	oclass->pagepool(info);
-	oclass->attrib(info);
-	oclass->unkn(priv);
-
-	gf100_grctx_generate_tpcid(priv);
-	gf100_grctx_generate_r406028(priv);
-	gf100_grctx_generate_r4060a8(priv);
-	gf100_grctx_generate_r418bb8(priv);
-	gf100_grctx_generate_r406800(priv);
-
-	gf100_gr_icmd(priv, oclass->icmd);
-	nv_wr32(priv, 0x404154, 0x00000400);
-	gf100_gr_mthd(priv, oclass->mthd);
-	nvkm_mc(priv)->unk260(nvkm_mc(priv), 1);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+
+	nvkm_mc_unk260(device->mc, 0);
+
+	gf100_gr_mmio(gr, grctx->hub);
+	gf100_gr_mmio(gr, grctx->gpc);
+	gf100_gr_mmio(gr, grctx->zcull);
+	gf100_gr_mmio(gr, grctx->tpc);
+	gf100_gr_mmio(gr, grctx->ppc);
+
+	nvkm_wr32(device, 0x404154, 0x00000000);
+
+	grctx->bundle(info);
+	grctx->pagepool(info);
+	grctx->attrib(info);
+	grctx->unkn(gr);
+
+	gf100_grctx_generate_tpcid(gr);
+	gf100_grctx_generate_r406028(gr);
+	gf100_grctx_generate_r4060a8(gr);
+	gf100_grctx_generate_r418bb8(gr);
+	gf100_grctx_generate_r406800(gr);
+
+	gf100_gr_icmd(gr, grctx->icmd);
+	nvkm_wr32(device, 0x404154, 0x00000400);
+	gf100_gr_mthd(gr, grctx->mthd);
+	nvkm_mc_unk260(device->mc, 1);
 }
 
 int
-gf100_grctx_generate(struct gf100_gr_priv *priv)
+gf100_grctx_generate(struct gf100_gr *gr)
 {
-	struct gf100_grctx_oclass *oclass = (void *)nv_engine(priv)->cclass;
-	struct nvkm_bar *bar = nvkm_bar(priv);
-	struct nvkm_gpuobj *chan;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_memory *chan;
 	struct gf100_grctx info;
 	int ret, i;
+	u64 addr;
 
 	/* allocate memory to for a "channel", which we'll use to generate
 	 * the default context values
 	 */
-	ret = nvkm_gpuobj_new(nv_object(priv), NULL, 0x80000 + priv->size,
-			      0x1000, NVOBJ_FLAG_ZERO_ALLOC, &chan);
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x80000 + gr->size,
+			      0x1000, true, &chan);
 	if (ret) {
-		nv_error(priv, "failed to allocate channel memory, %d\n", ret);
+		nvkm_error(subdev, "failed to allocate chan memory, %d\n", ret);
 		return ret;
 	}
 
+	addr = nvkm_memory_addr(chan);
+
 	/* PGD pointer */
-	nv_wo32(chan, 0x0200, lower_32_bits(chan->addr + 0x1000));
-	nv_wo32(chan, 0x0204, upper_32_bits(chan->addr + 0x1000));
-	nv_wo32(chan, 0x0208, 0xffffffff);
-	nv_wo32(chan, 0x020c, 0x000000ff);
+	nvkm_kmap(chan);
+	nvkm_wo32(chan, 0x0200, lower_32_bits(addr + 0x1000));
+	nvkm_wo32(chan, 0x0204, upper_32_bits(addr + 0x1000));
+	nvkm_wo32(chan, 0x0208, 0xffffffff);
+	nvkm_wo32(chan, 0x020c, 0x000000ff);
 
 	/* PGT[0] pointer */
-	nv_wo32(chan, 0x1000, 0x00000000);
-	nv_wo32(chan, 0x1004, 0x00000001 | (chan->addr + 0x2000) >> 8);
+	nvkm_wo32(chan, 0x1000, 0x00000000);
+	nvkm_wo32(chan, 0x1004, 0x00000001 | (addr + 0x2000) >> 8);
 
 	/* identity-map the whole "channel" into its own vm */
-	for (i = 0; i < chan->size / 4096; i++) {
-		u64 addr = ((chan->addr + (i * 4096)) >> 8) | 1;
-		nv_wo32(chan, 0x2000 + (i * 8), lower_32_bits(addr));
-		nv_wo32(chan, 0x2004 + (i * 8), upper_32_bits(addr));
+	for (i = 0; i < nvkm_memory_size(chan) / 4096; i++) {
+		u64 addr = ((nvkm_memory_addr(chan) + (i * 4096)) >> 8) | 1;
+		nvkm_wo32(chan, 0x2000 + (i * 8), lower_32_bits(addr));
+		nvkm_wo32(chan, 0x2004 + (i * 8), upper_32_bits(addr));
 	}
 
 	/* context pointer (virt) */
-	nv_wo32(chan, 0x0210, 0x00080004);
-	nv_wo32(chan, 0x0214, 0x00000000);
+	nvkm_wo32(chan, 0x0210, 0x00080004);
+	nvkm_wo32(chan, 0x0214, 0x00000000);
+	nvkm_done(chan);
 
-	bar->flush(bar);
-
-	nv_wr32(priv, 0x100cb8, (chan->addr + 0x1000) >> 8);
-	nv_wr32(priv, 0x100cbc, 0x80000001);
-	nv_wait(priv, 0x100c80, 0x00008000, 0x00008000);
+	nvkm_wr32(device, 0x100cb8, (addr + 0x1000) >> 8);
+	nvkm_wr32(device, 0x100cbc, 0x80000001);
+	nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x100c80) & 0x00008000)
+			break;
+	);
 
 	/* setup default state for mmio list construction */
-	info.priv = priv;
-	info.data = priv->mmio_data;
-	info.mmio = priv->mmio_list;
+	info.gr = gr;
+	info.data = gr->mmio_data;
+	info.mmio = gr->mmio_list;
 	info.addr = 0x2000 + (i * 8);
 	info.buffer_nr = 0;
 
 	/* make channel current */
-	if (priv->firmware) {
-		nv_wr32(priv, 0x409840, 0x00000030);
-		nv_wr32(priv, 0x409500, 0x80000000 | chan->addr >> 12);
-		nv_wr32(priv, 0x409504, 0x00000003);
-		if (!nv_wait(priv, 0x409800, 0x00000010, 0x00000010))
-			nv_error(priv, "load_ctx timeout\n");
-
-		nv_wo32(chan, 0x8001c, 1);
-		nv_wo32(chan, 0x80020, 0);
-		nv_wo32(chan, 0x80028, 0);
-		nv_wo32(chan, 0x8002c, 0);
-		bar->flush(bar);
+	if (gr->firmware) {
+		nvkm_wr32(device, 0x409840, 0x00000030);
+		nvkm_wr32(device, 0x409500, 0x80000000 | addr >> 12);
+		nvkm_wr32(device, 0x409504, 0x00000003);
+		nvkm_msec(device, 2000,
+			if (nvkm_rd32(device, 0x409800) & 0x00000010)
+				break;
+		);
+
+		nvkm_kmap(chan);
+		nvkm_wo32(chan, 0x8001c, 1);
+		nvkm_wo32(chan, 0x80020, 0);
+		nvkm_wo32(chan, 0x80028, 0);
+		nvkm_wo32(chan, 0x8002c, 0);
+		nvkm_done(chan);
 	} else {
-		nv_wr32(priv, 0x409840, 0x80000000);
-		nv_wr32(priv, 0x409500, 0x80000000 | chan->addr >> 12);
-		nv_wr32(priv, 0x409504, 0x00000001);
-		if (!nv_wait(priv, 0x409800, 0x80000000, 0x80000000))
-			nv_error(priv, "HUB_SET_CHAN timeout\n");
+		nvkm_wr32(device, 0x409840, 0x80000000);
+		nvkm_wr32(device, 0x409500, 0x80000000 | addr >> 12);
+		nvkm_wr32(device, 0x409504, 0x00000001);
+		nvkm_msec(device, 2000,
+			if (nvkm_rd32(device, 0x409800) & 0x80000000)
+				break;
+		);
 	}
 
-	oclass->main(priv, &info);
+	grctx->main(gr, &info);
 
 	/* trigger a context unload by unsetting the "next channel valid" bit
 	 * and faking a context switch interrupt
 	 */
-	nv_mask(priv, 0x409b04, 0x80000000, 0x00000000);
-	nv_wr32(priv, 0x409000, 0x00000100);
-	if (!nv_wait(priv, 0x409b00, 0x80000000, 0x00000000)) {
-		nv_error(priv, "grctx template channel unload timeout\n");
+	nvkm_mask(device, 0x409b04, 0x80000000, 0x00000000);
+	nvkm_wr32(device, 0x409000, 0x00000100);
+	if (nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x409b00) & 0x80000000))
+			break;
+	) < 0) {
 		ret = -EBUSY;
 		goto done;
 	}
 
-	priv->data = kmalloc(priv->size, GFP_KERNEL);
-	if (priv->data) {
-		for (i = 0; i < priv->size; i += 4)
-			priv->data[i / 4] = nv_ro32(chan, 0x80000 + i);
+	gr->data = kmalloc(gr->size, GFP_KERNEL);
+	if (gr->data) {
+		nvkm_kmap(chan);
+		for (i = 0; i < gr->size; i += 4)
+			gr->data[i / 4] = nvkm_ro32(chan, 0x80000 + i);
+		nvkm_done(chan);
 		ret = 0;
 	} else {
 		ret = -ENOMEM;
 	}
 
 done:
-	nvkm_gpuobj_ref(NULL, &chan);
+	nvkm_memory_del(&chan);
 	return ret;
 }
 
-struct nvkm_oclass *
-gf100_grctx_oclass = &(struct gf100_grctx_oclass) {
-	.base.handle = NV_ENGCTX(GR, 0xc0),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_context_ctor,
-		.dtor = gf100_gr_context_dtor,
-		.init = _nvkm_gr_context_init,
-		.fini = _nvkm_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
+const struct gf100_grctx_func
+gf100_grctx = {
 	.main  = gf100_grctx_generate_main,
 	.unkn  = gf100_grctx_generate_unkn,
 	.hub   = gf100_grctx_pack_hub,
@@ -1387,4 +1400,4 @@ gf100_grctx_oclass = &(struct gf100_grctx_oclass) {
 	.attrib = gf100_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
-}.base;
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
index 3676a3342bc5..3c64040ec5a2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
@@ -3,7 +3,7 @@
 #include "gf100.h"
 
 struct gf100_grctx {
-	struct gf100_gr_priv *priv;
+	struct gf100_gr *gr;
 	struct gf100_gr_data *data;
 	struct gf100_gr_mmio *mmio;
 	int buffer_nr;
@@ -19,12 +19,11 @@ void gf100_grctx_mmio_item(struct gf100_grctx *, u32 addr, u32 data, int s, int)
 #define mmio_skip(a,b,c) mmio_refn((a), (b), (c), -1, -1)
 #define mmio_wr32(a,b,c) mmio_refn((a), (b), (c),  0, -1)
 
-struct gf100_grctx_oclass {
-	struct nvkm_oclass base;
+struct gf100_grctx_func {
 	/* main context generation function */
-	void  (*main)(struct gf100_gr_priv *, struct gf100_grctx *);
+	void  (*main)(struct gf100_gr *, struct gf100_grctx *);
 	/* context-specific modify-on-first-load list generation function */
-	void  (*unkn)(struct gf100_gr_priv *);
+	void  (*unkn)(struct gf100_gr *);
 	/* mmio context data */
 	const struct gf100_gr_pack *hub;
 	const struct gf100_gr_pack *gpc;
@@ -50,60 +49,61 @@ struct gf100_grctx_oclass {
 	u32 alpha_nr;
 };
 
-static inline const struct gf100_grctx_oclass *
-gf100_grctx_impl(struct gf100_gr_priv *priv)
-{
-	return (void *)nv_engine(priv)->cclass;
-}
-
-extern struct nvkm_oclass *gf100_grctx_oclass;
-int  gf100_grctx_generate(struct gf100_gr_priv *);
-void gf100_grctx_generate_main(struct gf100_gr_priv *, struct gf100_grctx *);
+extern const struct gf100_grctx_func gf100_grctx;
+int  gf100_grctx_generate(struct gf100_gr *);
+void gf100_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *);
 void gf100_grctx_generate_bundle(struct gf100_grctx *);
 void gf100_grctx_generate_pagepool(struct gf100_grctx *);
 void gf100_grctx_generate_attrib(struct gf100_grctx *);
-void gf100_grctx_generate_unkn(struct gf100_gr_priv *);
-void gf100_grctx_generate_tpcid(struct gf100_gr_priv *);
-void gf100_grctx_generate_r406028(struct gf100_gr_priv *);
-void gf100_grctx_generate_r4060a8(struct gf100_gr_priv *);
-void gf100_grctx_generate_r418bb8(struct gf100_gr_priv *);
-void gf100_grctx_generate_r406800(struct gf100_gr_priv *);
-
-extern struct nvkm_oclass *gf108_grctx_oclass;
+void gf100_grctx_generate_unkn(struct gf100_gr *);
+void gf100_grctx_generate_tpcid(struct gf100_gr *);
+void gf100_grctx_generate_r406028(struct gf100_gr *);
+void gf100_grctx_generate_r4060a8(struct gf100_gr *);
+void gf100_grctx_generate_r418bb8(struct gf100_gr *);
+void gf100_grctx_generate_r406800(struct gf100_gr *);
+
+extern const struct gf100_grctx_func gf108_grctx;
 void gf108_grctx_generate_attrib(struct gf100_grctx *);
-void gf108_grctx_generate_unkn(struct gf100_gr_priv *);
+void gf108_grctx_generate_unkn(struct gf100_gr *);
 
-extern struct nvkm_oclass *gf104_grctx_oclass;
-extern struct nvkm_oclass *gf110_grctx_oclass;
+extern const struct gf100_grctx_func gf104_grctx;
+extern const struct gf100_grctx_func gf110_grctx;
 
-extern struct nvkm_oclass *gf117_grctx_oclass;
+extern const struct gf100_grctx_func gf117_grctx;
 void gf117_grctx_generate_attrib(struct gf100_grctx *);
 
-extern struct nvkm_oclass *gf119_grctx_oclass;
+extern const struct gf100_grctx_func gf119_grctx;
 
-extern struct nvkm_oclass *gk104_grctx_oclass;
-extern struct nvkm_oclass *gk20a_grctx_oclass;
-void gk104_grctx_generate_main(struct gf100_gr_priv *, struct gf100_grctx *);
+extern const struct gf100_grctx_func gk104_grctx;
+extern const struct gf100_grctx_func gk20a_grctx;
+void gk104_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *);
 void gk104_grctx_generate_bundle(struct gf100_grctx *);
 void gk104_grctx_generate_pagepool(struct gf100_grctx *);
-void gk104_grctx_generate_unkn(struct gf100_gr_priv *);
-void gk104_grctx_generate_r418bb8(struct gf100_gr_priv *);
-void gk104_grctx_generate_rop_active_fbps(struct gf100_gr_priv *);
+void gk104_grctx_generate_unkn(struct gf100_gr *);
+void gk104_grctx_generate_r418bb8(struct gf100_gr *);
+void gk104_grctx_generate_rop_active_fbps(struct gf100_gr *);
+
 
+void gm107_grctx_generate_bundle(struct gf100_grctx *);
+void gm107_grctx_generate_pagepool(struct gf100_grctx *);
+void gm107_grctx_generate_attrib(struct gf100_grctx *);
 
-extern struct nvkm_oclass *gk110_grctx_oclass;
-extern struct nvkm_oclass *gk110b_grctx_oclass;
-extern struct nvkm_oclass *gk208_grctx_oclass;
+extern const struct gf100_grctx_func gk110_grctx;
+extern const struct gf100_grctx_func gk110b_grctx;
+extern const struct gf100_grctx_func gk208_grctx;
 
-extern struct nvkm_oclass *gm107_grctx_oclass;
+extern const struct gf100_grctx_func gm107_grctx;
 void gm107_grctx_generate_bundle(struct gf100_grctx *);
 void gm107_grctx_generate_pagepool(struct gf100_grctx *);
 void gm107_grctx_generate_attrib(struct gf100_grctx *);
 
-extern struct nvkm_oclass *gm204_grctx_oclass;
-void gm204_grctx_generate_main(struct gf100_gr_priv *, struct gf100_grctx *);
+extern const struct gf100_grctx_func gm204_grctx;
+void gm204_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *);
+void gm204_grctx_generate_tpcid(struct gf100_gr *);
+void gm204_grctx_generate_405b60(struct gf100_gr *);
 
-extern struct nvkm_oclass *gm206_grctx_oclass;
+extern const struct gf100_grctx_func gm206_grctx;
+extern const struct gf100_grctx_func gm20b_grctx;
 
 /* context init value lists */
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
index c5a8d55e2cac..54fd74e9cca0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
@@ -79,17 +79,8 @@ gf104_grctx_pack_tpc[] = {
  * PGRAPH context implementation
  ******************************************************************************/
 
-struct nvkm_oclass *
-gf104_grctx_oclass = &(struct gf100_grctx_oclass) {
-	.base.handle = NV_ENGCTX(GR, 0xc3),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_context_ctor,
-		.dtor = gf100_gr_context_dtor,
-		.init = _nvkm_gr_context_init,
-		.fini = _nvkm_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
+const struct gf100_grctx_func
+gf104_grctx = {
 	.main  = gf100_grctx_generate_main,
 	.unkn  = gf100_grctx_generate_unkn,
 	.hub   = gf100_grctx_pack_hub,
@@ -105,4 +96,4 @@ gf104_grctx_oclass = &(struct gf100_grctx_oclass) {
 	.attrib = gf100_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
-}.base;
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
index 87c844a5f34b..505cdcbfc085 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
@@ -730,18 +730,18 @@ gf108_grctx_pack_tpc[] = {
 void
 gf108_grctx_generate_attrib(struct gf100_grctx *info)
 {
-	struct gf100_gr_priv *priv = info->priv;
-	const struct gf100_grctx_oclass *impl = gf100_grctx_impl(priv);
-	const u32  alpha = impl->alpha_nr;
-	const u32   beta = impl->attrib_nr;
-	const u32   size = 0x20 * (impl->attrib_nr_max + impl->alpha_nr_max);
+	struct gf100_gr *gr = info->gr;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	const u32  alpha = grctx->alpha_nr;
+	const u32   beta = grctx->attrib_nr;
+	const u32   size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
 	const u32 access = NV_MEM_ACCESS_RW;
 	const int s = 12;
-	const int b = mmio_vram(info, size * priv->tpc_total, (1 << s), access);
+	const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), access);
 	const int timeslice_mode = 1;
 	const int max_batches = 0xffff;
 	u32 bo = 0;
-	u32 ao = bo + impl->attrib_nr_max * priv->tpc_total;
+	u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total;
 	int gpc, tpc;
 
 	mmio_refn(info, 0x418810, 0x80000000, s, b);
@@ -749,43 +749,35 @@ gf108_grctx_generate_attrib(struct gf100_grctx *info)
 	mmio_wr32(info, 0x405830, (beta << 16) | alpha);
 	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
-	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-		for (tpc = 0; tpc < priv->tpc_nr[gpc]; tpc++) {
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
 			const u32 a = alpha;
 			const u32 b =  beta;
 			const u32 t = timeslice_mode;
 			const u32 o = TPC_UNIT(gpc, tpc, 0x500);
 			mmio_skip(info, o + 0x20, (t << 28) | (b << 16) | ++bo);
 			mmio_wr32(info, o + 0x20, (t << 28) | (b << 16) | --bo);
-			bo += impl->attrib_nr_max;
+			bo += grctx->attrib_nr_max;
 			mmio_wr32(info, o + 0x44, (a << 16) | ao);
-			ao += impl->alpha_nr_max;
+			ao += grctx->alpha_nr_max;
 		}
 	}
 }
 
 void
-gf108_grctx_generate_unkn(struct gf100_gr_priv *priv)
+gf108_grctx_generate_unkn(struct gf100_gr *gr)
 {
-	nv_mask(priv, 0x418c6c, 0x00000001, 0x00000001);
-	nv_mask(priv, 0x41980c, 0x00000010, 0x00000010);
-	nv_mask(priv, 0x419814, 0x00000004, 0x00000004);
-	nv_mask(priv, 0x4064c0, 0x80000000, 0x80000000);
-	nv_mask(priv, 0x405800, 0x08000000, 0x08000000);
-	nv_mask(priv, 0x419c00, 0x00000008, 0x00000008);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, 0x418c6c, 0x00000001, 0x00000001);
+	nvkm_mask(device, 0x41980c, 0x00000010, 0x00000010);
+	nvkm_mask(device, 0x419814, 0x00000004, 0x00000004);
+	nvkm_mask(device, 0x4064c0, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x405800, 0x08000000, 0x08000000);
+	nvkm_mask(device, 0x419c00, 0x00000008, 0x00000008);
 }
 
-struct nvkm_oclass *
-gf108_grctx_oclass = &(struct gf100_grctx_oclass) {
-	.base.handle = NV_ENGCTX(GR, 0xc1),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_context_ctor,
-		.dtor = gf100_gr_context_dtor,
-		.init = _nvkm_gr_context_init,
-		.fini = _nvkm_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
+const struct gf100_grctx_func
+gf108_grctx = {
 	.main  = gf100_grctx_generate_main,
 	.unkn  = gf108_grctx_generate_unkn,
 	.hub   = gf108_grctx_pack_hub,
@@ -803,4 +795,4 @@ gf108_grctx_oclass = &(struct gf100_grctx_oclass) {
 	.attrib_nr = 0x218,
 	.alpha_nr_max = 0x324,
 	.alpha_nr = 0x218,
-}.base;
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
index b3acd931b978..7df398b53f8f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
@@ -330,17 +330,8 @@ gf110_grctx_pack_gpc[] = {
  * PGRAPH context implementation
  ******************************************************************************/
 
-struct nvkm_oclass *
-gf110_grctx_oclass = &(struct gf100_grctx_oclass) {
-	.base.handle = NV_ENGCTX(GR, 0xc8),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_context_ctor,
-		.dtor = gf100_gr_context_dtor,
-		.init = _nvkm_gr_context_init,
-		.fini = _nvkm_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
+const struct gf100_grctx_func
+gf110_grctx = {
 	.main  = gf100_grctx_generate_main,
 	.unkn  = gf100_grctx_generate_unkn,
 	.hub   = gf100_grctx_pack_hub,
@@ -356,4 +347,4 @@ gf110_grctx_oclass = &(struct gf100_grctx_oclass) {
 	.attrib = gf100_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
-}.base;
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
index 9bbe2c97552e..b5b875928aba 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -182,18 +182,18 @@ gf117_grctx_pack_ppc[] = {
 void
 gf117_grctx_generate_attrib(struct gf100_grctx *info)
 {
-	struct gf100_gr_priv *priv = info->priv;
-	const struct gf100_grctx_oclass *impl = gf100_grctx_impl(priv);
-	const u32  alpha = impl->alpha_nr;
-	const u32   beta = impl->attrib_nr;
-	const u32   size = 0x20 * (impl->attrib_nr_max + impl->alpha_nr_max);
+	struct gf100_gr *gr = info->gr;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	const u32  alpha = grctx->alpha_nr;
+	const u32   beta = grctx->attrib_nr;
+	const u32   size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
 	const u32 access = NV_MEM_ACCESS_RW;
 	const int s = 12;
-	const int b = mmio_vram(info, size * priv->tpc_total, (1 << s), access);
+	const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), access);
 	const int timeslice_mode = 1;
 	const int max_batches = 0xffff;
 	u32 bo = 0;
-	u32 ao = bo + impl->attrib_nr_max * priv->tpc_total;
+	u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total;
 	int gpc, ppc;
 
 	mmio_refn(info, 0x418810, 0x80000000, s, b);
@@ -201,68 +201,60 @@ gf117_grctx_generate_attrib(struct gf100_grctx *info)
 	mmio_wr32(info, 0x405830, (beta << 16) | alpha);
 	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
-	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < priv->ppc_nr[gpc]; ppc++) {
-			const u32 a = alpha * priv->ppc_tpc_nr[gpc][ppc];
-			const u32 b =  beta * priv->ppc_tpc_nr[gpc][ppc];
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) {
+			const u32 a = alpha * gr->ppc_tpc_nr[gpc][ppc];
+			const u32 b =  beta * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 t = timeslice_mode;
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
 			mmio_skip(info, o + 0xc0, (t << 28) | (b << 16) | ++bo);
 			mmio_wr32(info, o + 0xc0, (t << 28) | (b << 16) | --bo);
-			bo += impl->attrib_nr_max * priv->ppc_tpc_nr[gpc][ppc];
+			bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
 			mmio_wr32(info, o + 0xe4, (a << 16) | ao);
-			ao += impl->alpha_nr_max * priv->ppc_tpc_nr[gpc][ppc];
+			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
 		}
 	}
 }
 
 void
-gf117_grctx_generate_main(struct gf100_gr_priv *priv, struct gf100_grctx *info)
+gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 {
-	struct gf100_grctx_oclass *oclass = (void *)nv_engine(priv)->cclass;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	int i;
 
-	nvkm_mc(priv)->unk260(nvkm_mc(priv), 0);
+	nvkm_mc_unk260(device->mc, 0);
 
-	gf100_gr_mmio(priv, oclass->hub);
-	gf100_gr_mmio(priv, oclass->gpc);
-	gf100_gr_mmio(priv, oclass->zcull);
-	gf100_gr_mmio(priv, oclass->tpc);
-	gf100_gr_mmio(priv, oclass->ppc);
+	gf100_gr_mmio(gr, grctx->hub);
+	gf100_gr_mmio(gr, grctx->gpc);
+	gf100_gr_mmio(gr, grctx->zcull);
+	gf100_gr_mmio(gr, grctx->tpc);
+	gf100_gr_mmio(gr, grctx->ppc);
 
-	nv_wr32(priv, 0x404154, 0x00000000);
+	nvkm_wr32(device, 0x404154, 0x00000000);
 
-	oclass->bundle(info);
-	oclass->pagepool(info);
-	oclass->attrib(info);
-	oclass->unkn(priv);
+	grctx->bundle(info);
+	grctx->pagepool(info);
+	grctx->attrib(info);
+	grctx->unkn(gr);
 
-	gf100_grctx_generate_tpcid(priv);
-	gf100_grctx_generate_r406028(priv);
-	gf100_grctx_generate_r4060a8(priv);
-	gk104_grctx_generate_r418bb8(priv);
-	gf100_grctx_generate_r406800(priv);
+	gf100_grctx_generate_tpcid(gr);
+	gf100_grctx_generate_r406028(gr);
+	gf100_grctx_generate_r4060a8(gr);
+	gk104_grctx_generate_r418bb8(gr);
+	gf100_grctx_generate_r406800(gr);
 
 	for (i = 0; i < 8; i++)
-		nv_wr32(priv, 0x4064d0 + (i * 0x04), 0x00000000);
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
 
-	gf100_gr_icmd(priv, oclass->icmd);
-	nv_wr32(priv, 0x404154, 0x00000400);
-	gf100_gr_mthd(priv, oclass->mthd);
-	nvkm_mc(priv)->unk260(nvkm_mc(priv), 1);
+	gf100_gr_icmd(gr, grctx->icmd);
+	nvkm_wr32(device, 0x404154, 0x00000400);
+	gf100_gr_mthd(gr, grctx->mthd);
+	nvkm_mc_unk260(device->mc, 1);
 }
 
-struct nvkm_oclass *
-gf117_grctx_oclass = &(struct gf100_grctx_oclass) {
-	.base.handle = NV_ENGCTX(GR, 0xd7),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_context_ctor,
-		.dtor = gf100_gr_context_dtor,
-		.init = _nvkm_gr_context_init,
-		.fini = _nvkm_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
+const struct gf100_grctx_func
+gf117_grctx = {
 	.main  = gf117_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
 	.hub   = gf117_grctx_pack_hub,
@@ -281,4 +273,4 @@ gf117_grctx_oclass = &(struct gf100_grctx_oclass) {
 	.attrib_nr = 0x218,
 	.alpha_nr_max = 0x7ff,
 	.alpha_nr = 0x324,
-}.base;
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
index 8d8761443809..605185b078be 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
@@ -498,17 +498,8 @@ gf119_grctx_pack_tpc[] = {
  * PGRAPH context implementation
  ******************************************************************************/
 
-struct nvkm_oclass *
-gf119_grctx_oclass = &(struct gf100_grctx_oclass) {
-	.base.handle = NV_ENGCTX(GR, 0xd9),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_context_ctor,
-		.dtor = gf100_gr_context_dtor,
-		.init = _nvkm_gr_context_init,
-		.fini = _nvkm_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
+const struct gf100_grctx_func
+gf119_grctx = {
 	.main  = gf100_grctx_generate_main,
 	.unkn  = gf108_grctx_generate_unkn,
 	.hub   = gf119_grctx_pack_hub,
@@ -526,4 +517,4 @@ gf119_grctx_oclass = &(struct gf100_grctx_oclass) {
 	.attrib_nr = 0x218,
 	.alpha_nr_max = 0x324,
 	.alpha_nr = 0x218,
-}.base;
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
index b12f6a9fd926..a843e3689c3c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
@@ -843,27 +843,27 @@ gk104_grctx_pack_ppc[] = {
 void
 gk104_grctx_generate_bundle(struct gf100_grctx *info)
 {
-	const struct gf100_grctx_oclass *impl = gf100_grctx_impl(info->priv);
-	const u32 state_limit = min(impl->bundle_min_gpm_fifo_depth,
-				    impl->bundle_size / 0x20);
-	const u32 token_limit = impl->bundle_token_limit;
+	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
+	const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth,
+				    grctx->bundle_size / 0x20);
+	const u32 token_limit = grctx->bundle_token_limit;
 	const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS;
 	const int s = 8;
-	const int b = mmio_vram(info, impl->bundle_size, (1 << s), access);
+	const int b = mmio_vram(info, grctx->bundle_size, (1 << s), access);
 	mmio_refn(info, 0x408004, 0x00000000, s, b);
-	mmio_wr32(info, 0x408008, 0x80000000 | (impl->bundle_size >> s));
+	mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s));
 	mmio_refn(info, 0x418808, 0x00000000, s, b);
-	mmio_wr32(info, 0x41880c, 0x80000000 | (impl->bundle_size >> s));
+	mmio_wr32(info, 0x41880c, 0x80000000 | (grctx->bundle_size >> s));
 	mmio_wr32(info, 0x4064c8, (state_limit << 16) | token_limit);
 }
 
 void
 gk104_grctx_generate_pagepool(struct gf100_grctx *info)
 {
-	const struct gf100_grctx_oclass *impl = gf100_grctx_impl(info->priv);
+	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
 	const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS;
 	const int s = 8;
-	const int b = mmio_vram(info, impl->pagepool_size, (1 << s), access);
+	const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), access);
 	mmio_refn(info, 0x40800c, 0x00000000, s, b);
 	mmio_wr32(info, 0x408010, 0x80000000);
 	mmio_refn(info, 0x419004, 0x00000000, s, b);
@@ -872,31 +872,33 @@ gk104_grctx_generate_pagepool(struct gf100_grctx *info)
 }
 
 void
-gk104_grctx_generate_unkn(struct gf100_gr_priv *priv)
+gk104_grctx_generate_unkn(struct gf100_gr *gr)
 {
-	nv_mask(priv, 0x418c6c, 0x00000001, 0x00000001);
-	nv_mask(priv, 0x41980c, 0x00000010, 0x00000010);
-	nv_mask(priv, 0x41be08, 0x00000004, 0x00000004);
-	nv_mask(priv, 0x4064c0, 0x80000000, 0x80000000);
-	nv_mask(priv, 0x405800, 0x08000000, 0x08000000);
-	nv_mask(priv, 0x419c00, 0x00000008, 0x00000008);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_mask(device, 0x418c6c, 0x00000001, 0x00000001);
+	nvkm_mask(device, 0x41980c, 0x00000010, 0x00000010);
+	nvkm_mask(device, 0x41be08, 0x00000004, 0x00000004);
+	nvkm_mask(device, 0x4064c0, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x405800, 0x08000000, 0x08000000);
+	nvkm_mask(device, 0x419c00, 0x00000008, 0x00000008);
 }
 
 void
-gk104_grctx_generate_r418bb8(struct gf100_gr_priv *priv)
+gk104_grctx_generate_r418bb8(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 data[6] = {}, data2[2] = {};
 	u8  tpcnr[GPC_MAX];
 	u8  shift, ntpcv;
 	int gpc, tpc, i;
 
 	/* calculate first set of magics */
-	memcpy(tpcnr, priv->tpc_nr, sizeof(priv->tpc_nr));
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
 
 	gpc = -1;
-	for (tpc = 0; tpc < priv->tpc_total; tpc++) {
+	for (tpc = 0; tpc < gr->tpc_total; tpc++) {
 		do {
-			gpc = (gpc + 1) % priv->gpc_nr;
+			gpc = (gpc + 1) % gr->gpc_nr;
 		} while (!tpcnr[gpc]);
 		tpcnr[gpc]--;
 
@@ -908,7 +910,7 @@ gk104_grctx_generate_r418bb8(struct gf100_gr_priv *priv)
 
 	/* and the second... */
 	shift = 0;
-	ntpcv = priv->tpc_total;
+	ntpcv = gr->tpc_total;
 	while (!(ntpcv & (1 << 4))) {
 		ntpcv <<= 1;
 		shift++;
@@ -921,86 +923,79 @@ gk104_grctx_generate_r418bb8(struct gf100_gr_priv *priv)
 		data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5);
 
 	/* GPC_BROADCAST */
-	nv_wr32(priv, 0x418bb8, (priv->tpc_total << 8) |
-				 priv->magic_not_rop_nr);
+	nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) |
+				 gr->magic_not_rop_nr);
 	for (i = 0; i < 6; i++)
-		nv_wr32(priv, 0x418b08 + (i * 4), data[i]);
+		nvkm_wr32(device, 0x418b08 + (i * 4), data[i]);
 
 	/* GPC_BROADCAST.TP_BROADCAST */
-	nv_wr32(priv, 0x41bfd0, (priv->tpc_total << 8) |
-				 priv->magic_not_rop_nr | data2[0]);
-	nv_wr32(priv, 0x41bfe4, data2[1]);
+	nvkm_wr32(device, 0x41bfd0, (gr->tpc_total << 8) |
+				 gr->magic_not_rop_nr | data2[0]);
+	nvkm_wr32(device, 0x41bfe4, data2[1]);
 	for (i = 0; i < 6; i++)
-		nv_wr32(priv, 0x41bf00 + (i * 4), data[i]);
+		nvkm_wr32(device, 0x41bf00 + (i * 4), data[i]);
 
 	/* UNK78xx */
-	nv_wr32(priv, 0x4078bc, (priv->tpc_total << 8) |
-				 priv->magic_not_rop_nr);
+	nvkm_wr32(device, 0x4078bc, (gr->tpc_total << 8) |
+				 gr->magic_not_rop_nr);
 	for (i = 0; i < 6; i++)
-		nv_wr32(priv, 0x40780c + (i * 4), data[i]);
+		nvkm_wr32(device, 0x40780c + (i * 4), data[i]);
 }
 
 void
-gk104_grctx_generate_rop_active_fbps(struct gf100_gr_priv *priv)
+gk104_grctx_generate_rop_active_fbps(struct gf100_gr *gr)
 {
-	const u32 fbp_count = nv_rd32(priv, 0x120074);
-	nv_mask(priv, 0x408850, 0x0000000f, fbp_count); /* zrop */
-	nv_mask(priv, 0x408958, 0x0000000f, fbp_count); /* crop */
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 fbp_count = nvkm_rd32(device, 0x120074);
+	nvkm_mask(device, 0x408850, 0x0000000f, fbp_count); /* zrop */
+	nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
 }
 
 void
-gk104_grctx_generate_main(struct gf100_gr_priv *priv, struct gf100_grctx *info)
+gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 {
-	struct gf100_grctx_oclass *oclass = (void *)nv_engine(priv)->cclass;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	int i;
 
-	nvkm_mc(priv)->unk260(nvkm_mc(priv), 0);
+	nvkm_mc_unk260(device->mc, 0);
 
-	gf100_gr_mmio(priv, oclass->hub);
-	gf100_gr_mmio(priv, oclass->gpc);
-	gf100_gr_mmio(priv, oclass->zcull);
-	gf100_gr_mmio(priv, oclass->tpc);
-	gf100_gr_mmio(priv, oclass->ppc);
+	gf100_gr_mmio(gr, grctx->hub);
+	gf100_gr_mmio(gr, grctx->gpc);
+	gf100_gr_mmio(gr, grctx->zcull);
+	gf100_gr_mmio(gr, grctx->tpc);
+	gf100_gr_mmio(gr, grctx->ppc);
 
-	nv_wr32(priv, 0x404154, 0x00000000);
+	nvkm_wr32(device, 0x404154, 0x00000000);
 
-	oclass->bundle(info);
-	oclass->pagepool(info);
-	oclass->attrib(info);
-	oclass->unkn(priv);
+	grctx->bundle(info);
+	grctx->pagepool(info);
+	grctx->attrib(info);
+	grctx->unkn(gr);
 
-	gf100_grctx_generate_tpcid(priv);
-	gf100_grctx_generate_r406028(priv);
-	gk104_grctx_generate_r418bb8(priv);
-	gf100_grctx_generate_r406800(priv);
+	gf100_grctx_generate_tpcid(gr);
+	gf100_grctx_generate_r406028(gr);
+	gk104_grctx_generate_r418bb8(gr);
+	gf100_grctx_generate_r406800(gr);
 
 	for (i = 0; i < 8; i++)
-		nv_wr32(priv, 0x4064d0 + (i * 0x04), 0x00000000);
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
 
-	nv_wr32(priv, 0x405b00, (priv->tpc_total << 8) | priv->gpc_nr);
-	gk104_grctx_generate_rop_active_fbps(priv);
-	nv_mask(priv, 0x419f78, 0x00000001, 0x00000000);
+	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
+	gk104_grctx_generate_rop_active_fbps(gr);
+	nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000);
 
-	gf100_gr_icmd(priv, oclass->icmd);
-	nv_wr32(priv, 0x404154, 0x00000400);
-	gf100_gr_mthd(priv, oclass->mthd);
-	nvkm_mc(priv)->unk260(nvkm_mc(priv), 1);
+	gf100_gr_icmd(gr, grctx->icmd);
+	nvkm_wr32(device, 0x404154, 0x00000400);
+	gf100_gr_mthd(gr, grctx->mthd);
+	nvkm_mc_unk260(device->mc, 1);
 
-	nv_mask(priv, 0x418800, 0x00200000, 0x00200000);
-	nv_mask(priv, 0x41be10, 0x00800000, 0x00800000);
+	nvkm_mask(device, 0x418800, 0x00200000, 0x00200000);
+	nvkm_mask(device, 0x41be10, 0x00800000, 0x00800000);
 }
 
-struct nvkm_oclass *
-gk104_grctx_oclass = &(struct gf100_grctx_oclass) {
-	.base.handle = NV_ENGCTX(GR, 0xe4),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_context_ctor,
-		.dtor = gf100_gr_context_dtor,
-		.init = _nvkm_gr_context_init,
-		.fini = _nvkm_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
+const struct gf100_grctx_func
+gk104_grctx = {
 	.main  = gk104_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
 	.hub   = gk104_grctx_pack_hub,
@@ -1021,4 +1016,4 @@ gk104_grctx_oclass = &(struct gf100_grctx_oclass) {
 	.attrib_nr = 0x218,
 	.alpha_nr_max = 0x7ff,
 	.alpha_nr = 0x648,
-}.base;
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
index b3f58be04e9c..7b95ec2fe453 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
@@ -808,17 +808,8 @@ gk110_grctx_pack_ppc[] = {
  * PGRAPH context implementation
  ******************************************************************************/
 
-struct nvkm_oclass *
-gk110_grctx_oclass = &(struct gf100_grctx_oclass) {
-	.base.handle = NV_ENGCTX(GR, 0xf0),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_context_ctor,
-		.dtor = gf100_gr_context_dtor,
-		.init = _nvkm_gr_context_init,
-		.fini = _nvkm_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
+const struct gf100_grctx_func
+gk110_grctx = {
 	.main  = gk104_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
 	.hub   = gk110_grctx_pack_hub,
@@ -839,4 +830,4 @@ gk110_grctx_oclass = &(struct gf100_grctx_oclass) {
 	.attrib_nr = 0x218,
 	.alpha_nr_max = 0x7ff,
 	.alpha_nr = 0x648,
-}.base;
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
index b11c26794fde..048b1152da44 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
@@ -69,17 +69,8 @@ gk110b_grctx_pack_tpc[] = {
  * PGRAPH context implementation
  ******************************************************************************/
 
-struct nvkm_oclass *
-gk110b_grctx_oclass = &(struct gf100_grctx_oclass) {
-	.base.handle = NV_ENGCTX(GR, 0xf1),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_context_ctor,
-		.dtor = gf100_gr_context_dtor,
-		.init = _nvkm_gr_context_init,
-		.fini = _nvkm_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
+const struct gf100_grctx_func
+gk110b_grctx = {
 	.main  = gk104_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
 	.hub   = gk110_grctx_pack_hub,
@@ -100,4 +91,4 @@ gk110b_grctx_oclass = &(struct gf100_grctx_oclass) {
 	.attrib_nr = 0x218,
 	.alpha_nr_max = 0x7ff,
 	.alpha_nr = 0x648,
-}.base;
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
index 6e8ce9fc311a..67b7a1b43617 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
@@ -530,17 +530,8 @@ gk208_grctx_pack_ppc[] = {
  * PGRAPH context implementation
  ******************************************************************************/
 
-struct nvkm_oclass *
-gk208_grctx_oclass = &(struct gf100_grctx_oclass) {
-	.base.handle = NV_ENGCTX(GR, 0x08),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_context_ctor,
-		.dtor = gf100_gr_context_dtor,
-		.init = _nvkm_gr_context_init,
-		.fini = _nvkm_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
+const struct gf100_grctx_func
+gk208_grctx = {
 	.main  = gk104_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
 	.hub   = gk208_grctx_pack_hub,
@@ -561,4 +552,4 @@ gk208_grctx_oclass = &(struct gf100_grctx_oclass) {
 	.attrib_nr = 0x218,
 	.alpha_nr_max = 0x7ff,
 	.alpha_nr = 0x648,
-}.base;
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
index 2f241f6f0f0a..ddaa16a71c84 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -20,34 +20,60 @@
  * DEALINGS IN THE SOFTWARE.
  */
 #include "ctxgf100.h"
+#include "gf100.h"
 
-static const struct gf100_gr_pack
-gk20a_grctx_pack_mthd[] = {
-	{ gk104_grctx_init_a097_0, 0xa297 },
-	{ gf100_grctx_init_902d_0, 0x902d },
-	{}
-};
+#include <subdev/mc.h>
+
+static void
+gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	int idle_timeout_save;
+	int i;
+
+	gf100_gr_mmio(gr, gr->fuc_sw_ctx);
+
+	gf100_gr_wait_idle(gr);
+
+	idle_timeout_save = nvkm_rd32(device, 0x404154);
+	nvkm_wr32(device, 0x404154, 0x00000000);
+
+	grctx->attrib(info);
+
+	grctx->unkn(gr);
+
+	gf100_grctx_generate_tpcid(gr);
+	gf100_grctx_generate_r406028(gr);
+	gk104_grctx_generate_r418bb8(gr);
+	gf100_grctx_generate_r406800(gr);
+
+	for (i = 0; i < 8; i++)
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
 
-struct nvkm_oclass *
-gk20a_grctx_oclass = &(struct gf100_grctx_oclass) {
-	.base.handle = NV_ENGCTX(GR, 0xea),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_context_ctor,
-		.dtor = gf100_gr_context_dtor,
-		.init = _nvkm_gr_context_init,
-		.fini = _nvkm_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
-	.main  = gk104_grctx_generate_main,
+	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
+
+	gk104_grctx_generate_rop_active_fbps(gr);
+
+	nvkm_mask(device, 0x5044b0, 0x8000000, 0x8000000);
+
+	gf100_gr_wait_idle(gr);
+
+	nvkm_wr32(device, 0x404154, idle_timeout_save);
+	gf100_gr_wait_idle(gr);
+
+	gf100_gr_mthd(gr, gr->fuc_method);
+	gf100_gr_wait_idle(gr);
+
+	gf100_gr_icmd(gr, gr->fuc_bundle);
+	grctx->pagepool(info);
+	grctx->bundle(info);
+}
+
+const struct gf100_grctx_func
+gk20a_grctx = {
+	.main  = gk20a_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
-	.hub   = gk104_grctx_pack_hub,
-	.gpc   = gk104_grctx_pack_gpc,
-	.zcull = gf100_grctx_pack_zcull,
-	.tpc   = gk104_grctx_pack_tpc,
-	.ppc   = gk104_grctx_pack_ppc,
-	.icmd  = gk104_grctx_pack_icmd,
-	.mthd  = gk20a_grctx_pack_mthd,
 	.bundle = gk104_grctx_generate_bundle,
 	.bundle_size = 0x1800,
 	.bundle_min_gpm_fifo_depth = 0x62,
@@ -59,4 +85,4 @@ gk20a_grctx_oclass = &(struct gf100_grctx_oclass) {
 	.attrib_nr = 0x240,
 	.alpha_nr_max = 0x648 + (0x648 / 2),
 	.alpha_nr = 0x648,
-}.base;
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
index fbeaae3ae6ce..95f59e3169f2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
@@ -863,27 +863,27 @@ gm107_grctx_pack_ppc[] = {
 void
 gm107_grctx_generate_bundle(struct gf100_grctx *info)
 {
-	const struct gf100_grctx_oclass *impl = gf100_grctx_impl(info->priv);
-	const u32 state_limit = min(impl->bundle_min_gpm_fifo_depth,
-				    impl->bundle_size / 0x20);
-	const u32 token_limit = impl->bundle_token_limit;
+	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
+	const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth,
+				    grctx->bundle_size / 0x20);
+	const u32 token_limit = grctx->bundle_token_limit;
 	const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS;
 	const int s = 8;
-	const int b = mmio_vram(info, impl->bundle_size, (1 << s), access);
+	const int b = mmio_vram(info, grctx->bundle_size, (1 << s), access);
 	mmio_refn(info, 0x408004, 0x00000000, s, b);
-	mmio_wr32(info, 0x408008, 0x80000000 | (impl->bundle_size >> s));
+	mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s));
 	mmio_refn(info, 0x418e24, 0x00000000, s, b);
-	mmio_wr32(info, 0x418e28, 0x80000000 | (impl->bundle_size >> s));
+	mmio_wr32(info, 0x418e28, 0x80000000 | (grctx->bundle_size >> s));
 	mmio_wr32(info, 0x4064c8, (state_limit << 16) | token_limit);
 }
 
 void
 gm107_grctx_generate_pagepool(struct gf100_grctx *info)
 {
-	const struct gf100_grctx_oclass *impl = gf100_grctx_impl(info->priv);
+	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
 	const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS;
 	const int s = 8;
-	const int b = mmio_vram(info, impl->pagepool_size, (1 << s), access);
+	const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), access);
 	mmio_refn(info, 0x40800c, 0x00000000, s, b);
 	mmio_wr32(info, 0x408010, 0x80000000);
 	mmio_refn(info, 0x419004, 0x00000000, s, b);
@@ -895,17 +895,17 @@ gm107_grctx_generate_pagepool(struct gf100_grctx *info)
 void
 gm107_grctx_generate_attrib(struct gf100_grctx *info)
 {
-	struct gf100_gr_priv *priv = info->priv;
-	const struct gf100_grctx_oclass *impl = (void *)gf100_grctx_impl(priv);
-	const u32  alpha = impl->alpha_nr;
-	const u32 attrib = impl->attrib_nr;
-	const u32   size = 0x20 * (impl->attrib_nr_max + impl->alpha_nr_max);
+	struct gf100_gr *gr = info->gr;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	const u32  alpha = grctx->alpha_nr;
+	const u32 attrib = grctx->attrib_nr;
+	const u32   size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
 	const u32 access = NV_MEM_ACCESS_RW;
 	const int s = 12;
-	const int b = mmio_vram(info, size * priv->tpc_total, (1 << s), access);
+	const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), access);
 	const int max_batches = 0xffff;
 	u32 bo = 0;
-	u32 ao = bo + impl->attrib_nr_max * priv->tpc_total;
+	u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total;
 	int gpc, ppc, n = 0;
 
 	mmio_refn(info, 0x418810, 0x80000000, s, b);
@@ -914,97 +914,90 @@ gm107_grctx_generate_attrib(struct gf100_grctx *info)
 	mmio_wr32(info, 0x405830, (attrib << 16) | alpha);
 	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
-	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < priv->ppc_nr[gpc]; ppc++, n++) {
-			const u32 as =  alpha * priv->ppc_tpc_nr[gpc][ppc];
-			const u32 bs = attrib * priv->ppc_tpc_nr[gpc][ppc];
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
+			const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 u = 0x418ea0 + (n * 0x04);
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
 			mmio_wr32(info, o + 0xc0, bs);
 			mmio_wr32(info, o + 0xf4, bo);
-			bo += impl->attrib_nr_max * priv->ppc_tpc_nr[gpc][ppc];
+			bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
 			mmio_wr32(info, o + 0xe4, as);
 			mmio_wr32(info, o + 0xf8, ao);
-			ao += impl->alpha_nr_max * priv->ppc_tpc_nr[gpc][ppc];
+			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
 			mmio_wr32(info, u, ((bs / 3 /*XXX*/) << 16) | bs);
 		}
 	}
 }
 
-static void
-gm107_grctx_generate_tpcid(struct gf100_gr_priv *priv)
+void
+gm107_grctx_generate_tpcid(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int gpc, tpc, id;
 
 	for (tpc = 0, id = 0; tpc < 4; tpc++) {
-		for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-			if (tpc < priv->tpc_nr[gpc]) {
-				nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x698), id);
-				nv_wr32(priv, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
-				nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x088), id);
+		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+			if (tpc < gr->tpc_nr[gpc]) {
+				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id);
+				nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
+				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id);
 				id++;
 			}
 
-			nv_wr32(priv, GPC_UNIT(gpc, 0x0c08), priv->tpc_nr[gpc]);
-			nv_wr32(priv, GPC_UNIT(gpc, 0x0c8c), priv->tpc_nr[gpc]);
+			nvkm_wr32(device, GPC_UNIT(gpc, 0x0c08), gr->tpc_nr[gpc]);
+			nvkm_wr32(device, GPC_UNIT(gpc, 0x0c8c), gr->tpc_nr[gpc]);
 		}
 	}
 }
 
 static void
-gm107_grctx_generate_main(struct gf100_gr_priv *priv, struct gf100_grctx *info)
+gm107_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 {
-	struct gf100_grctx_oclass *oclass = (void *)nv_engine(priv)->cclass;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	int i;
 
-	gf100_gr_mmio(priv, oclass->hub);
-	gf100_gr_mmio(priv, oclass->gpc);
-	gf100_gr_mmio(priv, oclass->zcull);
-	gf100_gr_mmio(priv, oclass->tpc);
-	gf100_gr_mmio(priv, oclass->ppc);
+	gf100_gr_mmio(gr, grctx->hub);
+	gf100_gr_mmio(gr, grctx->gpc);
+	gf100_gr_mmio(gr, grctx->zcull);
+	gf100_gr_mmio(gr, grctx->tpc);
+	gf100_gr_mmio(gr, grctx->ppc);
 
-	nv_wr32(priv, 0x404154, 0x00000000);
+	nvkm_wr32(device, 0x404154, 0x00000000);
 
-	oclass->bundle(info);
-	oclass->pagepool(info);
-	oclass->attrib(info);
-	oclass->unkn(priv);
+	grctx->bundle(info);
+	grctx->pagepool(info);
+	grctx->attrib(info);
+	grctx->unkn(gr);
 
-	gm107_grctx_generate_tpcid(priv);
-	gf100_grctx_generate_r406028(priv);
-	gk104_grctx_generate_r418bb8(priv);
-	gf100_grctx_generate_r406800(priv);
+	gm107_grctx_generate_tpcid(gr);
+	gf100_grctx_generate_r406028(gr);
+	gk104_grctx_generate_r418bb8(gr);
+	gf100_grctx_generate_r406800(gr);
 
-	nv_wr32(priv, 0x4064d0, 0x00000001);
+	nvkm_wr32(device, 0x4064d0, 0x00000001);
 	for (i = 1; i < 8; i++)
-		nv_wr32(priv, 0x4064d0 + (i * 0x04), 0x00000000);
-	nv_wr32(priv, 0x406500, 0x00000001);
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
+	nvkm_wr32(device, 0x406500, 0x00000001);
 
-	nv_wr32(priv, 0x405b00, (priv->tpc_total << 8) | priv->gpc_nr);
+	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
 
-	gk104_grctx_generate_rop_active_fbps(priv);
+	gk104_grctx_generate_rop_active_fbps(gr);
 
-	gf100_gr_icmd(priv, oclass->icmd);
-	nv_wr32(priv, 0x404154, 0x00000400);
-	gf100_gr_mthd(priv, oclass->mthd);
+	gf100_gr_icmd(gr, grctx->icmd);
+	nvkm_wr32(device, 0x404154, 0x00000400);
+	gf100_gr_mthd(gr, grctx->mthd);
 
-	nv_mask(priv, 0x419e00, 0x00808080, 0x00808080);
-	nv_mask(priv, 0x419ccc, 0x80000000, 0x80000000);
-	nv_mask(priv, 0x419f80, 0x80000000, 0x80000000);
-	nv_mask(priv, 0x419f88, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x419e00, 0x00808080, 0x00808080);
+	nvkm_mask(device, 0x419ccc, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x419f80, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x419f88, 0x80000000, 0x80000000);
 }
 
-struct nvkm_oclass *
-gm107_grctx_oclass = &(struct gf100_grctx_oclass) {
-	.base.handle = NV_ENGCTX(GR, 0x08),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_context_ctor,
-		.dtor = gf100_gr_context_dtor,
-		.init = _nvkm_gr_context_init,
-		.fini = _nvkm_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
+const struct gf100_grctx_func
+gm107_grctx = {
 	.main  = gm107_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
 	.hub   = gm107_grctx_pack_hub,
@@ -1025,4 +1018,4 @@ gm107_grctx_oclass = &(struct gf100_grctx_oclass) {
 	.attrib_nr = 0xaa0,
 	.alpha_nr_max = 0x1800,
 	.alpha_nr = 0x1000,
-}.base;
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm204.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm204.c
index ea8e66151aa8..170cbfdbe1ae 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm204.c
@@ -918,17 +918,18 @@ gm204_grctx_pack_ppc[] = {
  * PGRAPH context implementation
  ******************************************************************************/
 
-static void
-gm204_grctx_generate_tpcid(struct gf100_gr_priv *priv)
+void
+gm204_grctx_generate_tpcid(struct gf100_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int gpc, tpc, id;
 
 	for (tpc = 0, id = 0; tpc < 4; tpc++) {
-		for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-			if (tpc < priv->tpc_nr[gpc]) {
-				nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x698), id);
-				nv_wr32(priv, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
-				nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x088), id);
+		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+			if (tpc < gr->tpc_nr[gpc]) {
+				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x698), id);
+				nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), id);
+				nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), id);
 				id++;
 			}
 		}
@@ -936,101 +937,95 @@ gm204_grctx_generate_tpcid(struct gf100_gr_priv *priv)
 }
 
 static void
-gm204_grctx_generate_rop_active_fbps(struct gf100_gr_priv *priv)
+gm204_grctx_generate_rop_active_fbps(struct gf100_gr *gr)
 {
-	const u32 fbp_count = nv_rd32(priv, 0x12006c);
-	nv_mask(priv, 0x408850, 0x0000000f, fbp_count); /* zrop */
-	nv_mask(priv, 0x408958, 0x0000000f, fbp_count); /* crop */
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 fbp_count = nvkm_rd32(device, 0x12006c);
+	nvkm_mask(device, 0x408850, 0x0000000f, fbp_count); /* zrop */
+	nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
 }
 
-static void
-gm204_grctx_generate_405b60(struct gf100_gr_priv *priv)
+void
+gm204_grctx_generate_405b60(struct gf100_gr *gr)
 {
-	const u32 dist_nr = DIV_ROUND_UP(priv->tpc_total, 4);
-	u32 dist[TPC_MAX] = {};
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
+	u32 dist[TPC_MAX / 4] = {};
 	u32 gpcs[GPC_MAX] = {};
 	u8  tpcnr[GPC_MAX];
 	int tpc, gpc, i;
 
-	memcpy(tpcnr, priv->tpc_nr, sizeof(priv->tpc_nr));
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
 
 	/* won't result in the same distribution as the binary driver where
 	 * some of the gpcs have more tpcs than others, but this shall do
 	 * for the moment.  the code for earlier gpus has this issue too.
 	 */
-	for (gpc = -1, i = 0; i < priv->tpc_total; i++) {
+	for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
 		do {
-			gpc = (gpc + 1) % priv->gpc_nr;
+			gpc = (gpc + 1) % gr->gpc_nr;
 		} while(!tpcnr[gpc]);
-		tpc = priv->tpc_nr[gpc] - tpcnr[gpc]--;
+		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
 
 		dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
 		gpcs[gpc] |= i << (tpc * 8);
 	}
 
 	for (i = 0; i < dist_nr; i++)
-		nv_wr32(priv, 0x405b60 + (i * 4), dist[i]);
-	for (i = 0; i < priv->gpc_nr; i++)
-		nv_wr32(priv, 0x405ba0 + (i * 4), gpcs[i]);
+		nvkm_wr32(device, 0x405b60 + (i * 4), dist[i]);
+	for (i = 0; i < gr->gpc_nr; i++)
+		nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]);
 }
 
 void
-gm204_grctx_generate_main(struct gf100_gr_priv *priv, struct gf100_grctx *info)
+gm204_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 {
-	struct gf100_grctx_oclass *oclass = (void *)nv_engine(priv)->cclass;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	u32 tmp;
 	int i;
 
-	gf100_gr_mmio(priv, oclass->hub);
-	gf100_gr_mmio(priv, oclass->gpc);
-	gf100_gr_mmio(priv, oclass->zcull);
-	gf100_gr_mmio(priv, oclass->tpc);
-	gf100_gr_mmio(priv, oclass->ppc);
+	gf100_gr_mmio(gr, grctx->hub);
+	gf100_gr_mmio(gr, grctx->gpc);
+	gf100_gr_mmio(gr, grctx->zcull);
+	gf100_gr_mmio(gr, grctx->tpc);
+	gf100_gr_mmio(gr, grctx->ppc);
 
-	nv_wr32(priv, 0x404154, 0x00000000);
+	nvkm_wr32(device, 0x404154, 0x00000000);
 
-	oclass->bundle(info);
-	oclass->pagepool(info);
-	oclass->attrib(info);
-	oclass->unkn(priv);
+	grctx->bundle(info);
+	grctx->pagepool(info);
+	grctx->attrib(info);
+	grctx->unkn(gr);
 
-	gm204_grctx_generate_tpcid(priv);
-	gf100_grctx_generate_r406028(priv);
-	gk104_grctx_generate_r418bb8(priv);
+	gm204_grctx_generate_tpcid(gr);
+	gf100_grctx_generate_r406028(gr);
+	gk104_grctx_generate_r418bb8(gr);
 
 	for (i = 0; i < 8; i++)
-		nv_wr32(priv, 0x4064d0 + (i * 0x04), 0x00000000);
-	nv_wr32(priv, 0x406500, 0x00000000);
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
+	nvkm_wr32(device, 0x406500, 0x00000000);
 
-	nv_wr32(priv, 0x405b00, (priv->tpc_total << 8) | priv->gpc_nr);
+	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
 
-	gm204_grctx_generate_rop_active_fbps(priv);
+	gm204_grctx_generate_rop_active_fbps(gr);
 
-	for (tmp = 0, i = 0; i < priv->gpc_nr; i++)
-		tmp |= ((1 << priv->tpc_nr[i]) - 1) << (i * 4);
-	nv_wr32(priv, 0x4041c4, tmp);
+	for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
+		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4);
+	nvkm_wr32(device, 0x4041c4, tmp);
 
-	gm204_grctx_generate_405b60(priv);
+	gm204_grctx_generate_405b60(gr);
 
-	gf100_gr_icmd(priv, oclass->icmd);
-	nv_wr32(priv, 0x404154, 0x00000800);
-	gf100_gr_mthd(priv, oclass->mthd);
+	gf100_gr_icmd(gr, grctx->icmd);
+	nvkm_wr32(device, 0x404154, 0x00000800);
+	gf100_gr_mthd(gr, grctx->mthd);
 
-	nv_mask(priv, 0x418e94, 0xffffffff, 0xc4230000);
-	nv_mask(priv, 0x418e4c, 0xffffffff, 0x70000000);
+	nvkm_mask(device, 0x418e94, 0xffffffff, 0xc4230000);
+	nvkm_mask(device, 0x418e4c, 0xffffffff, 0x70000000);
 }
 
-struct nvkm_oclass *
-gm204_grctx_oclass = &(struct gf100_grctx_oclass) {
-	.base.handle = NV_ENGCTX(GR, 0x24),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_context_ctor,
-		.dtor = gf100_gr_context_dtor,
-		.init = _nvkm_gr_context_init,
-		.fini = _nvkm_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
+const struct gf100_grctx_func
+gm204_grctx = {
 	.main  = gm204_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
 	.hub   = gm204_grctx_pack_hub,
@@ -1051,4 +1046,4 @@ gm204_grctx_oclass = &(struct gf100_grctx_oclass) {
 	.attrib_nr = 0x400,
 	.alpha_nr_max = 0x1800,
 	.alpha_nr = 0x1000,
-}.base;
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm206.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm206.c
index 91ec41617943..d6be6034c2c2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm206.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm206.c
@@ -49,17 +49,8 @@ gm206_grctx_pack_gpc[] = {
 	{}
 };
 
-struct nvkm_oclass *
-gm206_grctx_oclass = &(struct gf100_grctx_oclass) {
-	.base.handle = NV_ENGCTX(GR, 0x26),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_context_ctor,
-		.dtor = gf100_gr_context_dtor,
-		.init = _nvkm_gr_context_init,
-		.fini = _nvkm_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
+const struct gf100_grctx_func
+gm206_grctx = {
 	.main  = gm204_grctx_generate_main,
 	.unkn  = gk104_grctx_generate_unkn,
 	.hub   = gm204_grctx_pack_hub,
@@ -80,4 +71,4 @@ gm206_grctx_oclass = &(struct gf100_grctx_oclass) {
 	.attrib_nr = 0x400,
 	.alpha_nr_max = 0x1800,
 	.alpha_nr = 0x1000,
-}.base;
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
new file mode 100644
index 000000000000..670260402538
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "ctxgf100.h"
+
+static void
+gm20b_grctx_generate_r406028(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 tpc_per_gpc = 0;
+	int i;
+
+	for (i = 0; i < gr->gpc_nr; i++)
+		tpc_per_gpc |= gr->tpc_nr[i] << (4 * i);
+
+	nvkm_wr32(device, 0x406028, tpc_per_gpc);
+	nvkm_wr32(device, 0x405870, tpc_per_gpc);
+}
+
+static void
+gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	int idle_timeout_save;
+	int i, tmp;
+
+	gf100_gr_mmio(gr, gr->fuc_sw_ctx);
+
+	gf100_gr_wait_idle(gr);
+
+	idle_timeout_save = nvkm_rd32(device, 0x404154);
+	nvkm_wr32(device, 0x404154, 0x00000000);
+
+	grctx->attrib(info);
+
+	grctx->unkn(gr);
+
+	gm204_grctx_generate_tpcid(gr);
+	gm20b_grctx_generate_r406028(gr);
+	gk104_grctx_generate_r418bb8(gr);
+
+	for (i = 0; i < 8; i++)
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
+
+	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
+
+	gk104_grctx_generate_rop_active_fbps(gr);
+	nvkm_wr32(device, 0x408908, nvkm_rd32(device, 0x410108) | 0x80000000);
+
+	for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
+		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 4);
+	nvkm_wr32(device, 0x4041c4, tmp);
+
+	gm204_grctx_generate_405b60(gr);
+
+	gf100_gr_wait_idle(gr);
+
+	nvkm_wr32(device, 0x404154, idle_timeout_save);
+	gf100_gr_wait_idle(gr);
+
+	gf100_gr_mthd(gr, gr->fuc_method);
+	gf100_gr_wait_idle(gr);
+
+	gf100_gr_icmd(gr, gr->fuc_bundle);
+	grctx->pagepool(info);
+	grctx->bundle(info);
+}
+
+const struct gf100_grctx_func
+gm20b_grctx = {
+	.main  = gm20b_grctx_generate_main,
+	.unkn  = gk104_grctx_generate_unkn,
+	.bundle = gm107_grctx_generate_bundle,
+	.bundle_size = 0x1800,
+	.bundle_min_gpm_fifo_depth = 0x182,
+	.bundle_token_limit = 0x1c0,
+	.pagepool = gm107_grctx_generate_pagepool,
+	.pagepool_size = 0x8000,
+	.attrib = gm107_grctx_generate_attrib,
+	.attrib_nr_max = 0x600,
+	.attrib_nr = 0x400,
+	.alpha_nr_max = 0xc00,
+	.alpha_nr = 0x800,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv40.c
index dc31462afe65..80a6b017af64 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv40.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv40.c
@@ -111,7 +111,6 @@
 
 #include "ctxnv40.h"
 #include "nv40.h"
-#include <core/device.h>
 
 /* TODO:
  *  - get vs count from 0x1540
@@ -583,13 +582,13 @@ nv40_gr_construct_shader(struct nvkm_grctx *ctx)
 
 	offset += 0x0280/4;
 	for (i = 0; i < 16; i++, offset += 2)
-		nv_wo32(obj, offset * 4, 0x3f800000);
+		nvkm_wo32(obj, offset * 4, 0x3f800000);
 
 	for (vs = 0; vs < vs_nr; vs++, offset += vs_len) {
 		for (i = 0; i < vs_nr_b0 * 6; i += 6)
-			nv_wo32(obj, (offset + b0_offset + i) * 4, 0x00000001);
+			nvkm_wo32(obj, (offset + b0_offset + i) * 4, 0x00000001);
 		for (i = 0; i < vs_nr_b1 * 4; i += 4)
-			nv_wo32(obj, (offset + b1_offset + i) * 4, 0x3f800000);
+			nvkm_wo32(obj, (offset + b1_offset + i) * 4, 0x3f800000);
 	}
 }
 
@@ -675,7 +674,7 @@ nv40_grctx_init(struct nvkm_device *device, u32 *size)
 	struct nvkm_grctx ctx = {
 		.device = device,
 		.mode = NVKM_GRCTX_PROG,
-		.data = ctxprog,
+		.ucode = ctxprog,
 		.ctxprog_max = 256,
 	};
 
@@ -684,9 +683,9 @@ nv40_grctx_init(struct nvkm_device *device, u32 *size)
 
 	nv40_grctx_generate(&ctx);
 
-	nv_wr32(device, 0x400324, 0);
+	nvkm_wr32(device, 0x400324, 0);
 	for (i = 0; i < ctx.ctxprog_len; i++)
-		nv_wr32(device, 0x400328, ctxprog[i]);
+		nvkm_wr32(device, 0x400328, ctxprog[i]);
 	*size = ctx.ctxvals_pos * 4;
 
 	kfree(ctxprog);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv40.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv40.h
index 8a89961956af..50e808e9f926 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv40.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv40.h
@@ -9,7 +9,8 @@ struct nvkm_grctx {
 		NVKM_GRCTX_PROG,
 		NVKM_GRCTX_VALS
 	} mode;
-	void *data;
+	u32 *ucode;
+	struct nvkm_gpuobj *data;
 
 	u32 ctxprog_max;
 	u32 ctxprog_len;
@@ -22,7 +23,7 @@ struct nvkm_grctx {
 static inline void
 cp_out(struct nvkm_grctx *ctx, u32 inst)
 {
-	u32 *ctxprog = ctx->data;
+	u32 *ctxprog = ctx->ucode;
 
 	if (ctx->mode != NVKM_GRCTX_PROG)
 		return;
@@ -56,7 +57,7 @@ cp_ctx(struct nvkm_grctx *ctx, u32 reg, u32 length)
 static inline void
 cp_name(struct nvkm_grctx *ctx, int name)
 {
-	u32 *ctxprog = ctx->data;
+	u32 *ctxprog = ctx->ucode;
 	int i;
 
 	if (ctx->mode != NVKM_GRCTX_PROG)
@@ -124,6 +125,6 @@ gr_def(struct nvkm_grctx *ctx, u32 reg, u32 val)
 	reg = (reg - 0x00400000) / 4;
 	reg = (reg - ctx->ctxprog_reg) + ctx->ctxvals_base;
 
-	nv_wo32(ctx->data, reg * 4, val);
+	nvkm_wo32(ctx->data, reg * 4, val);
 }
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv50.c
index 9c9528d2cd90..1e13278cf306 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv50.c
@@ -107,7 +107,6 @@
 
 #include "ctxnv40.h"
 
-#include <core/device.h>
 #include <subdev/fb.h>
 
 #define IS_NVA3F(x) (((x) > 0xa0 && (x) < 0xaa) || (x) == 0xaf)
@@ -269,7 +268,7 @@ nv50_grctx_init(struct nvkm_device *device, u32 *size)
 	struct nvkm_grctx ctx = {
 		.device = device,
 		.mode = NVKM_GRCTX_PROG,
-		.data = ctxprog,
+		.ucode = ctxprog,
 		.ctxprog_max = 512,
 	};
 
@@ -277,9 +276,9 @@ nv50_grctx_init(struct nvkm_device *device, u32 *size)
 		return -ENOMEM;
 	nv50_grctx_generate(&ctx);
 
-	nv_wr32(device, 0x400324, 0);
+	nvkm_wr32(device, 0x400324, 0);
 	for (i = 0; i < ctx.ctxprog_len; i++)
-		nv_wr32(device, 0x400328, ctxprog[i]);
+		nvkm_wr32(device, 0x400328, ctxprog[i]);
 	*size = ctx.ctxvals_pos * 4;
 	kfree(ctxprog);
 	return 0;
@@ -299,7 +298,7 @@ nv50_gr_construct_mmio(struct nvkm_grctx *ctx)
 	struct nvkm_device *device = ctx->device;
 	int i, j;
 	int offset, base;
-	u32 units = nv_rd32 (ctx->device, 0x1540);
+	u32 units = nvkm_rd32(device, 0x1540);
 
 	/* 0800: DISPATCH */
 	cp_ctx(ctx, 0x400808, 7);
@@ -570,7 +569,7 @@ nv50_gr_construct_mmio(struct nvkm_grctx *ctx)
 		else if (device->chipset < 0xa0)
 			gr_def(ctx, 0x407d08, 0x00390040);
 		else {
-			if (nvkm_fb(device)->ram->type != NV_MEM_TYPE_GDDR5)
+			if (device->fb->ram->type != NVKM_RAM_TYPE_GDDR5)
 				gr_def(ctx, 0x407d08, 0x003d0040);
 			else
 				gr_def(ctx, 0x407d08, 0x003c0040);
@@ -784,9 +783,10 @@ nv50_gr_construct_mmio(struct nvkm_grctx *ctx)
 static void
 dd_emit(struct nvkm_grctx *ctx, int num, u32 val) {
 	int i;
-	if (val && ctx->mode == NVKM_GRCTX_VALS)
+	if (val && ctx->mode == NVKM_GRCTX_VALS) {
 		for (i = 0; i < num; i++)
-			nv_wo32(ctx->data, 4 * (ctx->ctxvals_pos + i), val);
+			nvkm_wo32(ctx->data, 4 * (ctx->ctxvals_pos + i), val);
+	}
 	ctx->ctxvals_pos += num;
 }
 
@@ -1156,9 +1156,10 @@ nv50_gr_construct_mmio_ddata(struct nvkm_grctx *ctx)
 static void
 xf_emit(struct nvkm_grctx *ctx, int num, u32 val) {
 	int i;
-	if (val && ctx->mode == NVKM_GRCTX_VALS)
+	if (val && ctx->mode == NVKM_GRCTX_VALS) {
 		for (i = 0; i < num; i++)
-			nv_wo32(ctx->data, 4 * (ctx->ctxvals_pos + (i << 3)), val);
+			nvkm_wo32(ctx->data, 4 * (ctx->ctxvals_pos + (i << 3)), val);
+	}
 	ctx->ctxvals_pos += num << 3;
 }
 
@@ -1190,7 +1191,7 @@ nv50_gr_construct_xfer1(struct nvkm_grctx *ctx)
 	int i;
 	int offset;
 	int size = 0;
-	u32 units = nv_rd32 (ctx->device, 0x1540);
+	u32 units = nvkm_rd32(device, 0x1540);
 
 	offset = (ctx->ctxvals_pos+0x3f)&~0x3f;
 	ctx->ctxvals_base = offset;
@@ -3273,7 +3274,7 @@ nv50_gr_construct_xfer2(struct nvkm_grctx *ctx)
 	struct nvkm_device *device = ctx->device;
 	int i;
 	u32 offset;
-	u32 units = nv_rd32 (ctx->device, 0x1540);
+	u32 units = nvkm_rd32(device, 0x1540);
 	int size = 0;
 
 	offset = (ctx->ctxvals_pos+0x3f)&~0x3f;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c
new file mode 100644
index 000000000000..ce913300539f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "nv50.h"
+
+#include <subdev/timer.h>
+
+static const struct nvkm_bitfield nv50_gr_status[] = {
+	{ 0x00000001, "BUSY" }, /* set when any bit is set */
+	{ 0x00000002, "DISPATCH" },
+	{ 0x00000004, "UNK2" },
+	{ 0x00000008, "UNK3" },
+	{ 0x00000010, "UNK4" },
+	{ 0x00000020, "UNK5" },
+	{ 0x00000040, "M2MF" },
+	{ 0x00000080, "UNK7" },
+	{ 0x00000100, "CTXPROG" },
+	{ 0x00000200, "VFETCH" },
+	{ 0x00000400, "CCACHE_PREGEOM" },
+	{ 0x00000800, "STRMOUT_VATTR_POSTGEOM" },
+	{ 0x00001000, "VCLIP" },
+	{ 0x00002000, "RATTR_APLANE" },
+	{ 0x00004000, "TRAST" },
+	{ 0x00008000, "CLIPID" },
+	{ 0x00010000, "ZCULL" },
+	{ 0x00020000, "ENG2D" },
+	{ 0x00040000, "RMASK" },
+	{ 0x00080000, "TPC_RAST" },
+	{ 0x00100000, "TPC_PROP" },
+	{ 0x00200000, "TPC_TEX" },
+	{ 0x00400000, "TPC_GEOM" },
+	{ 0x00800000, "TPC_MP" },
+	{ 0x01000000, "ROP" },
+	{}
+};
+
+static const struct nvkm_bitfield
+nv50_gr_vstatus_0[] = {
+	{ 0x01, "VFETCH" },
+	{ 0x02, "CCACHE" },
+	{ 0x04, "PREGEOM" },
+	{ 0x08, "POSTGEOM" },
+	{ 0x10, "VATTR" },
+	{ 0x20, "STRMOUT" },
+	{ 0x40, "VCLIP" },
+	{}
+};
+
+static const struct nvkm_bitfield
+nv50_gr_vstatus_1[] = {
+	{ 0x01, "TPC_RAST" },
+	{ 0x02, "TPC_PROP" },
+	{ 0x04, "TPC_TEX" },
+	{ 0x08, "TPC_GEOM" },
+	{ 0x10, "TPC_MP" },
+	{}
+};
+
+static const struct nvkm_bitfield
+nv50_gr_vstatus_2[] = {
+	{ 0x01, "RATTR" },
+	{ 0x02, "APLANE" },
+	{ 0x04, "TRAST" },
+	{ 0x08, "CLIPID" },
+	{ 0x10, "ZCULL" },
+	{ 0x20, "ENG2D" },
+	{ 0x40, "RMASK" },
+	{ 0x80, "ROP" },
+	{}
+};
+
+static void
+nvkm_gr_vstatus_print(struct nv50_gr *gr, int r,
+		      const struct nvkm_bitfield *units, u32 status)
+{
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	u32 stat = status;
+	u8  mask = 0x00;
+	char msg[64];
+	int i;
+
+	for (i = 0; units[i].name && status; i++) {
+		if ((status & 7) == 1)
+			mask |= (1 << i);
+		status >>= 3;
+	}
+
+	nvkm_snprintbf(msg, sizeof(msg), units, mask);
+	nvkm_error(subdev, "PGRAPH_VSTATUS%d: %08x [%s]\n", r, stat, msg);
+}
+
+int
+g84_gr_tlb_flush(struct nvkm_gr *base)
+{
+	struct nv50_gr *gr = nv50_gr(base);
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_timer *tmr = device->timer;
+	bool idle, timeout = false;
+	unsigned long flags;
+	char status[128];
+	u64 start;
+	u32 tmp;
+
+	spin_lock_irqsave(&gr->lock, flags);
+	nvkm_mask(device, 0x400500, 0x00000001, 0x00000000);
+
+	start = nvkm_timer_read(tmr);
+	do {
+		idle = true;
+
+		for (tmp = nvkm_rd32(device, 0x400380); tmp && idle; tmp >>= 3) {
+			if ((tmp & 7) == 1)
+				idle = false;
+		}
+
+		for (tmp = nvkm_rd32(device, 0x400384); tmp && idle; tmp >>= 3) {
+			if ((tmp & 7) == 1)
+				idle = false;
+		}
+
+		for (tmp = nvkm_rd32(device, 0x400388); tmp && idle; tmp >>= 3) {
+			if ((tmp & 7) == 1)
+				idle = false;
+		}
+	} while (!idle &&
+		 !(timeout = nvkm_timer_read(tmr) - start > 2000000000));
+
+	if (timeout) {
+		nvkm_error(subdev, "PGRAPH TLB flush idle timeout fail\n");
+
+		tmp = nvkm_rd32(device, 0x400700);
+		nvkm_snprintbf(status, sizeof(status), nv50_gr_status, tmp);
+		nvkm_error(subdev, "PGRAPH_STATUS %08x [%s]\n", tmp, status);
+
+		nvkm_gr_vstatus_print(gr, 0, nv50_gr_vstatus_0,
+				       nvkm_rd32(device, 0x400380));
+		nvkm_gr_vstatus_print(gr, 1, nv50_gr_vstatus_1,
+				       nvkm_rd32(device, 0x400384));
+		nvkm_gr_vstatus_print(gr, 2, nv50_gr_vstatus_2,
+				       nvkm_rd32(device, 0x400388));
+	}
+
+
+	nvkm_wr32(device, 0x100c80, 0x00000001);
+	nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x100c80) & 0x00000001))
+			break;
+	);
+	nvkm_mask(device, 0x400500, 0x00000001, 0x00000001);
+	spin_unlock_irqrestore(&gr->lock, flags);
+	return timeout ? -EBUSY : 0;
+}
+
+static const struct nvkm_gr_func
+g84_gr = {
+	.init = nv50_gr_init,
+	.intr = nv50_gr_intr,
+	.chan_new = nv50_gr_chan_new,
+	.tlb_flush = g84_gr_tlb_flush,
+	.units = nv50_gr_units,
+	.sclass = {
+		{ -1, -1, 0x0030, &nv50_gr_object },
+		{ -1, -1, 0x502d, &nv50_gr_object },
+		{ -1, -1, 0x5039, &nv50_gr_object },
+		{ -1, -1, 0x50c0, &nv50_gr_object },
+		{ -1, -1, 0x8297, &nv50_gr_object },
+		{}
+	}
+};
+
+int
+g84_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv50_gr_new_(&g84_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index 5606c25e5d02..f1358a564e3e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -26,13 +26,12 @@
 #include "fuc/os.h"
 
 #include <core/client.h>
-#include <core/device.h>
-#include <core/handle.h>
 #include <core/option.h>
-#include <engine/fifo.h>
 #include <subdev/fb.h>
 #include <subdev/mc.h>
+#include <subdev/pmu.h>
 #include <subdev/timer.h>
+#include <engine/fifo.h>
 
 #include <nvif/class.h>
 #include <nvif/unpack.h>
@@ -42,35 +41,36 @@
  ******************************************************************************/
 
 static void
-gf100_gr_zbc_clear_color(struct gf100_gr_priv *priv, int zbc)
+gf100_gr_zbc_clear_color(struct gf100_gr *gr, int zbc)
 {
-	if (priv->zbc_color[zbc].format) {
-		nv_wr32(priv, 0x405804, priv->zbc_color[zbc].ds[0]);
-		nv_wr32(priv, 0x405808, priv->zbc_color[zbc].ds[1]);
-		nv_wr32(priv, 0x40580c, priv->zbc_color[zbc].ds[2]);
-		nv_wr32(priv, 0x405810, priv->zbc_color[zbc].ds[3]);
-	}
-	nv_wr32(priv, 0x405814, priv->zbc_color[zbc].format);
-	nv_wr32(priv, 0x405820, zbc);
-	nv_wr32(priv, 0x405824, 0x00000004); /* TRIGGER | WRITE | COLOR */
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	if (gr->zbc_color[zbc].format) {
+		nvkm_wr32(device, 0x405804, gr->zbc_color[zbc].ds[0]);
+		nvkm_wr32(device, 0x405808, gr->zbc_color[zbc].ds[1]);
+		nvkm_wr32(device, 0x40580c, gr->zbc_color[zbc].ds[2]);
+		nvkm_wr32(device, 0x405810, gr->zbc_color[zbc].ds[3]);
+	}
+	nvkm_wr32(device, 0x405814, gr->zbc_color[zbc].format);
+	nvkm_wr32(device, 0x405820, zbc);
+	nvkm_wr32(device, 0x405824, 0x00000004); /* TRIGGER | WRITE | COLOR */
 }
 
 static int
-gf100_gr_zbc_color_get(struct gf100_gr_priv *priv, int format,
+gf100_gr_zbc_color_get(struct gf100_gr *gr, int format,
 		       const u32 ds[4], const u32 l2[4])
 {
-	struct nvkm_ltc *ltc = nvkm_ltc(priv);
+	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
 	int zbc = -ENOSPC, i;
 
 	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
-		if (priv->zbc_color[i].format) {
-			if (priv->zbc_color[i].format != format)
+		if (gr->zbc_color[i].format) {
+			if (gr->zbc_color[i].format != format)
 				continue;
-			if (memcmp(priv->zbc_color[i].ds, ds, sizeof(
-				   priv->zbc_color[i].ds)))
+			if (memcmp(gr->zbc_color[i].ds, ds, sizeof(
+				   gr->zbc_color[i].ds)))
 				continue;
-			if (memcmp(priv->zbc_color[i].l2, l2, sizeof(
-				   priv->zbc_color[i].l2))) {
+			if (memcmp(gr->zbc_color[i].l2, l2, sizeof(
+				   gr->zbc_color[i].l2))) {
 				WARN_ON(1);
 				return -EINVAL;
 			}
@@ -83,38 +83,39 @@ gf100_gr_zbc_color_get(struct gf100_gr_priv *priv, int format,
 	if (zbc < 0)
 		return zbc;
 
-	memcpy(priv->zbc_color[zbc].ds, ds, sizeof(priv->zbc_color[zbc].ds));
-	memcpy(priv->zbc_color[zbc].l2, l2, sizeof(priv->zbc_color[zbc].l2));
-	priv->zbc_color[zbc].format = format;
-	ltc->zbc_color_get(ltc, zbc, l2);
-	gf100_gr_zbc_clear_color(priv, zbc);
+	memcpy(gr->zbc_color[zbc].ds, ds, sizeof(gr->zbc_color[zbc].ds));
+	memcpy(gr->zbc_color[zbc].l2, l2, sizeof(gr->zbc_color[zbc].l2));
+	gr->zbc_color[zbc].format = format;
+	nvkm_ltc_zbc_color_get(ltc, zbc, l2);
+	gf100_gr_zbc_clear_color(gr, zbc);
 	return zbc;
 }
 
 static void
-gf100_gr_zbc_clear_depth(struct gf100_gr_priv *priv, int zbc)
+gf100_gr_zbc_clear_depth(struct gf100_gr *gr, int zbc)
 {
-	if (priv->zbc_depth[zbc].format)
-		nv_wr32(priv, 0x405818, priv->zbc_depth[zbc].ds);
-	nv_wr32(priv, 0x40581c, priv->zbc_depth[zbc].format);
-	nv_wr32(priv, 0x405820, zbc);
-	nv_wr32(priv, 0x405824, 0x00000005); /* TRIGGER | WRITE | DEPTH */
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	if (gr->zbc_depth[zbc].format)
+		nvkm_wr32(device, 0x405818, gr->zbc_depth[zbc].ds);
+	nvkm_wr32(device, 0x40581c, gr->zbc_depth[zbc].format);
+	nvkm_wr32(device, 0x405820, zbc);
+	nvkm_wr32(device, 0x405824, 0x00000005); /* TRIGGER | WRITE | DEPTH */
 }
 
 static int
-gf100_gr_zbc_depth_get(struct gf100_gr_priv *priv, int format,
+gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format,
 		       const u32 ds, const u32 l2)
 {
-	struct nvkm_ltc *ltc = nvkm_ltc(priv);
+	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
 	int zbc = -ENOSPC, i;
 
 	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
-		if (priv->zbc_depth[i].format) {
-			if (priv->zbc_depth[i].format != format)
+		if (gr->zbc_depth[i].format) {
+			if (gr->zbc_depth[i].format != format)
 				continue;
-			if (priv->zbc_depth[i].ds != ds)
+			if (gr->zbc_depth[i].ds != ds)
 				continue;
-			if (priv->zbc_depth[i].l2 != l2) {
+			if (gr->zbc_depth[i].l2 != l2) {
 				WARN_ON(1);
 				return -EINVAL;
 			}
@@ -127,11 +128,11 @@ gf100_gr_zbc_depth_get(struct gf100_gr_priv *priv, int format,
 	if (zbc < 0)
 		return zbc;
 
-	priv->zbc_depth[zbc].format = format;
-	priv->zbc_depth[zbc].ds = ds;
-	priv->zbc_depth[zbc].l2 = l2;
-	ltc->zbc_depth_get(ltc, zbc, l2);
-	gf100_gr_zbc_clear_depth(priv, zbc);
+	gr->zbc_depth[zbc].format = format;
+	gr->zbc_depth[zbc].ds = ds;
+	gr->zbc_depth[zbc].l2 = l2;
+	nvkm_ltc_zbc_depth_get(ltc, zbc, l2);
+	gf100_gr_zbc_clear_depth(gr, zbc);
 	return zbc;
 }
 
@@ -142,7 +143,7 @@ gf100_gr_zbc_depth_get(struct gf100_gr_priv *priv, int format,
 static int
 gf100_fermi_mthd_zbc_color(struct nvkm_object *object, void *data, u32 size)
 {
-	struct gf100_gr_priv *priv = (void *)object->engine;
+	struct gf100_gr *gr = (void *)object->engine;
 	union {
 		struct fermi_a_zbc_color_v0 v0;
 	} *args = data;
@@ -169,7 +170,7 @@ gf100_fermi_mthd_zbc_color(struct nvkm_object *object, void *data, u32 size)
 		case FERMI_A_ZBC_COLOR_V0_FMT_AU8BU8GU8RU8:
 		case FERMI_A_ZBC_COLOR_V0_FMT_A2R10G10B10:
 		case FERMI_A_ZBC_COLOR_V0_FMT_BF10GF11RF11:
-			ret = gf100_gr_zbc_color_get(priv, args->v0.format,
+			ret = gf100_gr_zbc_color_get(gr, args->v0.format,
 							   args->v0.ds,
 							   args->v0.l2);
 			if (ret >= 0) {
@@ -188,7 +189,7 @@ gf100_fermi_mthd_zbc_color(struct nvkm_object *object, void *data, u32 size)
 static int
 gf100_fermi_mthd_zbc_depth(struct nvkm_object *object, void *data, u32 size)
 {
-	struct gf100_gr_priv *priv = (void *)object->engine;
+	struct gf100_gr *gr = (void *)object->engine;
 	union {
 		struct fermi_a_zbc_depth_v0 v0;
 	} *args = data;
@@ -197,7 +198,7 @@ gf100_fermi_mthd_zbc_depth(struct nvkm_object *object, void *data, u32 size)
 	if (nvif_unpack(args->v0, 0, 0, false)) {
 		switch (args->v0.format) {
 		case FERMI_A_ZBC_DEPTH_V0_FMT_FP32:
-			ret = gf100_gr_zbc_depth_get(priv, args->v0.format,
+			ret = gf100_gr_zbc_depth_get(gr, args->v0.format,
 							   args->v0.ds,
 							   args->v0.l2);
 			return (ret >= 0) ? 0 : -ENOSPC;
@@ -223,106 +224,176 @@ gf100_fermi_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
 	return -EINVAL;
 }
 
-struct nvkm_ofuncs
-gf100_fermi_ofuncs = {
-	.ctor = _nvkm_object_ctor,
-	.dtor = nvkm_object_destroy,
-	.init = nvkm_object_init,
-	.fini = nvkm_object_fini,
+const struct nvkm_object_func
+gf100_fermi = {
 	.mthd = gf100_fermi_mthd,
 };
 
-static int
-gf100_gr_set_shader_exceptions(struct nvkm_object *object, u32 mthd,
-			       void *pdata, u32 size)
+static void
+gf100_gr_mthd_set_shader_exceptions(struct nvkm_device *device, u32 data)
 {
-	struct gf100_gr_priv *priv = (void *)object->engine;
-	if (size >= sizeof(u32)) {
-		u32 data = *(u32 *)pdata ? 0xffffffff : 0x00000000;
-		nv_wr32(priv, 0x419e44, data);
-		nv_wr32(priv, 0x419e4c, data);
-		return 0;
+	nvkm_wr32(device, 0x419e44, data ? 0xffffffff : 0x00000000);
+	nvkm_wr32(device, 0x419e4c, data ? 0xffffffff : 0x00000000);
+}
+
+static bool
+gf100_gr_mthd_sw(struct nvkm_device *device, u16 class, u32 mthd, u32 data)
+{
+	switch (class & 0x00ff) {
+	case 0x97:
+	case 0xc0:
+		switch (mthd) {
+		case 0x1528:
+			gf100_gr_mthd_set_shader_exceptions(device, data);
+			return true;
+		default:
+			break;
+		}
+		break;
+	default:
+		break;
 	}
-	return -EINVAL;
+	return false;
 }
 
-struct nvkm_omthds
-gf100_gr_9097_omthds[] = {
-	{ 0x1528, 0x1528, gf100_gr_set_shader_exceptions },
-	{}
-};
+static int
+gf100_gr_object_get(struct nvkm_gr *base, int index, struct nvkm_sclass *sclass)
+{
+	struct gf100_gr *gr = gf100_gr(base);
+	int c = 0;
 
-struct nvkm_omthds
-gf100_gr_90c0_omthds[] = {
-	{ 0x1528, 0x1528, gf100_gr_set_shader_exceptions },
-	{}
-};
+	while (gr->func->sclass[c].oclass) {
+		if (c++ == index) {
+			*sclass = gr->func->sclass[index];
+			return index;
+		}
+	}
 
-struct nvkm_oclass
-gf100_gr_sclass[] = {
-	{ FERMI_TWOD_A, &nvkm_object_ofuncs },
-	{ FERMI_MEMORY_TO_MEMORY_FORMAT_A, &nvkm_object_ofuncs },
-	{ FERMI_A, &gf100_fermi_ofuncs, gf100_gr_9097_omthds },
-	{ FERMI_COMPUTE_A, &nvkm_object_ofuncs, gf100_gr_90c0_omthds },
-	{}
-};
+	return c;
+}
 
 /*******************************************************************************
  * PGRAPH context
  ******************************************************************************/
 
-int
-gf100_gr_context_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-		      struct nvkm_oclass *oclass, void *args, u32 size,
-		      struct nvkm_object **pobject)
+static int
+gf100_gr_chan_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent,
+		   int align, struct nvkm_gpuobj **pgpuobj)
 {
-	struct nvkm_vm *vm = nvkm_client(parent)->vm;
-	struct gf100_gr_priv *priv = (void *)engine;
-	struct gf100_gr_data *data = priv->mmio_data;
-	struct gf100_gr_mmio *mmio = priv->mmio_list;
-	struct gf100_gr_chan *chan;
+	struct gf100_gr_chan *chan = gf100_gr_chan(object);
+	struct gf100_gr *gr = chan->gr;
 	int ret, i;
 
-	/* allocate memory for context, and fill with default values */
-	ret = nvkm_gr_context_create(parent, engine, oclass, NULL,
-				     priv->size, 0x100,
-				     NVOBJ_FLAG_ZERO_ALLOC, &chan);
-	*pobject = nv_object(chan);
+	ret = nvkm_gpuobj_new(gr->base.engine.subdev.device, gr->size,
+			      align, false, parent, pgpuobj);
 	if (ret)
 		return ret;
 
+	nvkm_kmap(*pgpuobj);
+	for (i = 0; i < gr->size; i += 4)
+		nvkm_wo32(*pgpuobj, i, gr->data[i / 4]);
+
+	if (!gr->firmware) {
+		nvkm_wo32(*pgpuobj, 0x00, chan->mmio_nr / 2);
+		nvkm_wo32(*pgpuobj, 0x04, chan->mmio_vma.offset >> 8);
+	} else {
+		nvkm_wo32(*pgpuobj, 0xf4, 0);
+		nvkm_wo32(*pgpuobj, 0xf8, 0);
+		nvkm_wo32(*pgpuobj, 0x10, chan->mmio_nr / 2);
+		nvkm_wo32(*pgpuobj, 0x14, lower_32_bits(chan->mmio_vma.offset));
+		nvkm_wo32(*pgpuobj, 0x18, upper_32_bits(chan->mmio_vma.offset));
+		nvkm_wo32(*pgpuobj, 0x1c, 1);
+		nvkm_wo32(*pgpuobj, 0x20, 0);
+		nvkm_wo32(*pgpuobj, 0x28, 0);
+		nvkm_wo32(*pgpuobj, 0x2c, 0);
+	}
+	nvkm_done(*pgpuobj);
+	return 0;
+}
+
+static void *
+gf100_gr_chan_dtor(struct nvkm_object *object)
+{
+	struct gf100_gr_chan *chan = gf100_gr_chan(object);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
+		if (chan->data[i].vma.node) {
+			nvkm_vm_unmap(&chan->data[i].vma);
+			nvkm_vm_put(&chan->data[i].vma);
+		}
+		nvkm_memory_del(&chan->data[i].mem);
+	}
+
+	if (chan->mmio_vma.node) {
+		nvkm_vm_unmap(&chan->mmio_vma);
+		nvkm_vm_put(&chan->mmio_vma);
+	}
+	nvkm_memory_del(&chan->mmio);
+	return chan;
+}
+
+static const struct nvkm_object_func
+gf100_gr_chan = {
+	.dtor = gf100_gr_chan_dtor,
+	.bind = gf100_gr_chan_bind,
+};
+
+static int
+gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+		  const struct nvkm_oclass *oclass,
+		  struct nvkm_object **pobject)
+{
+	struct gf100_gr *gr = gf100_gr(base);
+	struct gf100_gr_data *data = gr->mmio_data;
+	struct gf100_gr_mmio *mmio = gr->mmio_list;
+	struct gf100_gr_chan *chan;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	int ret, i;
+
+	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_object_ctor(&gf100_gr_chan, oclass, &chan->object);
+	chan->gr = gr;
+	*pobject = &chan->object;
+
 	/* allocate memory for a "mmio list" buffer that's used by the HUB
 	 * fuc to modify some per-context register settings on first load
 	 * of the context.
 	 */
-	ret = nvkm_gpuobj_new(nv_object(chan), NULL, 0x1000, 0x100, 0,
-			      &chan->mmio);
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
+			      false, &chan->mmio);
 	if (ret)
 		return ret;
 
-	ret = nvkm_gpuobj_map_vm(nv_gpuobj(chan->mmio), vm,
-				 NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS,
-				 &chan->mmio_vma);
+	ret = nvkm_vm_get(fifoch->vm, 0x1000, 12, NV_MEM_ACCESS_RW |
+			  NV_MEM_ACCESS_SYS, &chan->mmio_vma);
 	if (ret)
 		return ret;
 
+	nvkm_memory_map(chan->mmio, &chan->mmio_vma, 0);
+
 	/* allocate buffers referenced by mmio list */
-	for (i = 0; data->size && i < ARRAY_SIZE(priv->mmio_data); i++) {
-		ret = nvkm_gpuobj_new(nv_object(chan), NULL, data->size,
-				      data->align, 0, &chan->data[i].mem);
+	for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) {
+		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
+				      data->size, data->align, false,
+				      &chan->data[i].mem);
 		if (ret)
 			return ret;
 
-		ret = nvkm_gpuobj_map_vm(chan->data[i].mem, vm, data->access,
-					 &chan->data[i].vma);
+		ret = nvkm_vm_get(fifoch->vm,
+				  nvkm_memory_size(chan->data[i].mem), 12,
+				  data->access, &chan->data[i].vma);
 		if (ret)
 			return ret;
 
+		nvkm_memory_map(chan->data[i].mem, &chan->data[i].vma, 0);
 		data++;
 	}
 
 	/* finally, fill in the mmio list and point the context at it */
-	for (i = 0; mmio->addr && i < ARRAY_SIZE(priv->mmio_list); i++) {
+	nvkm_kmap(chan->mmio);
+	for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) {
 		u32 addr = mmio->addr;
 		u32 data = mmio->data;
 
@@ -331,49 +402,14 @@ gf100_gr_context_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
 			data |= info >> mmio->shift;
 		}
 
-		nv_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
-		nv_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
+		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
+		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
 		mmio++;
 	}
-
-	for (i = 0; i < priv->size; i += 4)
-		nv_wo32(chan, i, priv->data[i / 4]);
-
-	if (!priv->firmware) {
-		nv_wo32(chan, 0x00, chan->mmio_nr / 2);
-		nv_wo32(chan, 0x04, chan->mmio_vma.offset >> 8);
-	} else {
-		nv_wo32(chan, 0xf4, 0);
-		nv_wo32(chan, 0xf8, 0);
-		nv_wo32(chan, 0x10, chan->mmio_nr / 2);
-		nv_wo32(chan, 0x14, lower_32_bits(chan->mmio_vma.offset));
-		nv_wo32(chan, 0x18, upper_32_bits(chan->mmio_vma.offset));
-		nv_wo32(chan, 0x1c, 1);
-		nv_wo32(chan, 0x20, 0);
-		nv_wo32(chan, 0x28, 0);
-		nv_wo32(chan, 0x2c, 0);
-	}
-
+	nvkm_done(chan->mmio);
 	return 0;
 }
 
-void
-gf100_gr_context_dtor(struct nvkm_object *object)
-{
-	struct gf100_gr_chan *chan = (void *)object;
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
-		nvkm_gpuobj_unmap(&chan->data[i].vma);
-		nvkm_gpuobj_ref(NULL, &chan->data[i].mem);
-	}
-
-	nvkm_gpuobj_unmap(&chan->mmio_vma);
-	nvkm_gpuobj_ref(NULL, &chan->mmio);
-
-	nvkm_gr_context_destroy(&chan->base);
-}
-
 /*******************************************************************************
  * PGRAPH register lists
  ******************************************************************************/
@@ -635,7 +671,7 @@ gf100_gr_pack_mmio[] = {
  ******************************************************************************/
 
 void
-gf100_gr_zbc_init(struct gf100_gr_priv *priv)
+gf100_gr_zbc_init(struct gf100_gr *gr)
 {
 	const u32  zero[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
 			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
@@ -645,27 +681,62 @@ gf100_gr_zbc_init(struct gf100_gr_priv *priv)
 			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
 	const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
 			      0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
-	struct nvkm_ltc *ltc = nvkm_ltc(priv);
+	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
 	int index;
 
-	if (!priv->zbc_color[0].format) {
-		gf100_gr_zbc_color_get(priv, 1,  & zero[0],   &zero[4]);
-		gf100_gr_zbc_color_get(priv, 2,  &  one[0],    &one[4]);
-		gf100_gr_zbc_color_get(priv, 4,  &f32_0[0],  &f32_0[4]);
-		gf100_gr_zbc_color_get(priv, 4,  &f32_1[0],  &f32_1[4]);
-		gf100_gr_zbc_depth_get(priv, 1, 0x00000000, 0x00000000);
-		gf100_gr_zbc_depth_get(priv, 1, 0x3f800000, 0x3f800000);
+	if (!gr->zbc_color[0].format) {
+		gf100_gr_zbc_color_get(gr, 1,  & zero[0],   &zero[4]);
+		gf100_gr_zbc_color_get(gr, 2,  &  one[0],    &one[4]);
+		gf100_gr_zbc_color_get(gr, 4,  &f32_0[0],  &f32_0[4]);
+		gf100_gr_zbc_color_get(gr, 4,  &f32_1[0],  &f32_1[4]);
+		gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000);
+		gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000);
 	}
 
 	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
-		gf100_gr_zbc_clear_color(priv, index);
+		gf100_gr_zbc_clear_color(gr, index);
 	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
-		gf100_gr_zbc_clear_depth(priv, index);
+		gf100_gr_zbc_clear_depth(gr, index);
+}
+
+/**
+ * Wait until GR goes idle. GR is considered idle if it is disabled by the
+ * MC (0x200) register, or GR is not busy and a context switch is not in
+ * progress.
+ */
+int
+gf100_gr_wait_idle(struct gf100_gr *gr)
+{
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000);
+	bool gr_enabled, ctxsw_active, gr_busy;
+
+	do {
+		/*
+		 * required to make sure FIFO_ENGINE_STATUS (0x2640) is
+		 * up-to-date
+		 */
+		nvkm_rd32(device, 0x400700);
+
+		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
+		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
+		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
+
+		if (!gr_enabled || (!gr_busy && !ctxsw_active))
+			return 0;
+	} while (time_before(jiffies, end_jiffies));
+
+	nvkm_error(subdev,
+		   "wait for idle timeout (en: %d, ctxsw: %d, busy: %d)\n",
+		   gr_enabled, ctxsw_active, gr_busy);
+	return -EAGAIN;
 }
 
 void
-gf100_gr_mmio(struct gf100_gr_priv *priv, const struct gf100_gr_pack *p)
+gf100_gr_mmio(struct gf100_gr *gr, const struct gf100_gr_pack *p)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_gr_pack *pack;
 	const struct gf100_gr_init *init;
 
@@ -673,43 +744,54 @@ gf100_gr_mmio(struct gf100_gr_priv *priv, const struct gf100_gr_pack *p)
 		u32 next = init->addr + init->count * init->pitch;
 		u32 addr = init->addr;
 		while (addr < next) {
-			nv_wr32(priv, addr, init->data);
+			nvkm_wr32(device, addr, init->data);
 			addr += init->pitch;
 		}
 	}
 }
 
 void
-gf100_gr_icmd(struct gf100_gr_priv *priv, const struct gf100_gr_pack *p)
+gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_gr_pack *pack;
 	const struct gf100_gr_init *init;
 	u32 data = 0;
 
-	nv_wr32(priv, 0x400208, 0x80000000);
+	nvkm_wr32(device, 0x400208, 0x80000000);
 
 	pack_for_each_init(init, pack, p) {
 		u32 next = init->addr + init->count * init->pitch;
 		u32 addr = init->addr;
 
 		if ((pack == p && init == p->init) || data != init->data) {
-			nv_wr32(priv, 0x400204, init->data);
+			nvkm_wr32(device, 0x400204, init->data);
 			data = init->data;
 		}
 
 		while (addr < next) {
-			nv_wr32(priv, 0x400200, addr);
-			nv_wait(priv, 0x400700, 0x00000002, 0x00000000);
+			nvkm_wr32(device, 0x400200, addr);
+			/**
+			 * Wait for GR to go idle after submitting a
+			 * GO_IDLE bundle
+			 */
+			if ((addr & 0xffff) == 0xe100)
+				gf100_gr_wait_idle(gr);
+			nvkm_msec(device, 2000,
+				if (!(nvkm_rd32(device, 0x400700) & 0x00000004))
+					break;
+			);
 			addr += init->pitch;
 		}
 	}
 
-	nv_wr32(priv, 0x400208, 0x00000000);
+	nvkm_wr32(device, 0x400208, 0x00000000);
 }
 
 void
-gf100_gr_mthd(struct gf100_gr_priv *priv, const struct gf100_gr_pack *p)
+gf100_gr_mthd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_gr_pack *pack;
 	const struct gf100_gr_init *init;
 	u32 data = 0;
@@ -720,79 +802,75 @@ gf100_gr_mthd(struct gf100_gr_priv *priv, const struct gf100_gr_pack *p)
 		u32 addr = init->addr;
 
 		if ((pack == p && init == p->init) || data != init->data) {
-			nv_wr32(priv, 0x40448c, init->data);
+			nvkm_wr32(device, 0x40448c, init->data);
 			data = init->data;
 		}
 
 		while (addr < next) {
-			nv_wr32(priv, 0x404488, ctrl | (addr << 14));
+			nvkm_wr32(device, 0x404488, ctrl | (addr << 14));
 			addr += init->pitch;
 		}
 	}
 }
 
 u64
-gf100_gr_units(struct nvkm_gr *gr)
+gf100_gr_units(struct nvkm_gr *base)
 {
-	struct gf100_gr_priv *priv = (void *)gr;
+	struct gf100_gr *gr = gf100_gr(base);
 	u64 cfg;
 
-	cfg  = (u32)priv->gpc_nr;
-	cfg |= (u32)priv->tpc_total << 8;
-	cfg |= (u64)priv->rop_nr << 32;
+	cfg  = (u32)gr->gpc_nr;
+	cfg |= (u32)gr->tpc_total << 8;
+	cfg |= (u64)gr->rop_nr << 32;
 
 	return cfg;
 }
 
-static const struct nvkm_enum gk104_sked_error[] = {
-	{ 7, "CONSTANT_BUFFER_SIZE" },
-	{ 9, "LOCAL_MEMORY_SIZE_POS" },
-	{ 10, "LOCAL_MEMORY_SIZE_NEG" },
-	{ 11, "WARP_CSTACK_SIZE" },
-	{ 12, "TOTAL_TEMP_SIZE" },
-	{ 13, "REGISTER_COUNT" },
-	{ 18, "TOTAL_THREADS" },
-	{ 20, "PROGRAM_OFFSET" },
-	{ 21, "SHARED_MEMORY_SIZE" },
-	{ 25, "SHARED_CONFIG_TOO_SMALL" },
-	{ 26, "TOTAL_REGISTER_COUNT" },
+static const struct nvkm_bitfield gk104_sked_error[] = {
+	{ 0x00000080, "CONSTANT_BUFFER_SIZE" },
+	{ 0x00000200, "LOCAL_MEMORY_SIZE_POS" },
+	{ 0x00000400, "LOCAL_MEMORY_SIZE_NEG" },
+	{ 0x00000800, "WARP_CSTACK_SIZE" },
+	{ 0x00001000, "TOTAL_TEMP_SIZE" },
+	{ 0x00002000, "REGISTER_COUNT" },
+	{ 0x00040000, "TOTAL_THREADS" },
+	{ 0x00100000, "PROGRAM_OFFSET" },
+	{ 0x00200000, "SHARED_MEMORY_SIZE" },
+	{ 0x02000000, "SHARED_CONFIG_TOO_SMALL" },
+	{ 0x04000000, "TOTAL_REGISTER_COUNT" },
 	{}
 };
 
-static const struct nvkm_enum gf100_gpc_rop_error[] = {
-	{ 1, "RT_PITCH_OVERRUN" },
-	{ 4, "RT_WIDTH_OVERRUN" },
-	{ 5, "RT_HEIGHT_OVERRUN" },
-	{ 7, "ZETA_STORAGE_TYPE_MISMATCH" },
-	{ 8, "RT_STORAGE_TYPE_MISMATCH" },
-	{ 10, "RT_LINEAR_MISMATCH" },
+static const struct nvkm_bitfield gf100_gpc_rop_error[] = {
+	{ 0x00000002, "RT_PITCH_OVERRUN" },
+	{ 0x00000010, "RT_WIDTH_OVERRUN" },
+	{ 0x00000020, "RT_HEIGHT_OVERRUN" },
+	{ 0x00000080, "ZETA_STORAGE_TYPE_MISMATCH" },
+	{ 0x00000100, "RT_STORAGE_TYPE_MISMATCH" },
+	{ 0x00000400, "RT_LINEAR_MISMATCH" },
 	{}
 };
 
 static void
-gf100_gr_trap_gpc_rop(struct gf100_gr_priv *priv, int gpc)
+gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
 {
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	char error[128];
 	u32 trap[4];
-	int i;
 
-	trap[0] = nv_rd32(priv, GPC_UNIT(gpc, 0x0420));
-	trap[1] = nv_rd32(priv, GPC_UNIT(gpc, 0x0434));
-	trap[2] = nv_rd32(priv, GPC_UNIT(gpc, 0x0438));
-	trap[3] = nv_rd32(priv, GPC_UNIT(gpc, 0x043c));
+	trap[0] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0420)) & 0x3fffffff;
+	trap[1] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0434));
+	trap[2] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0438));
+	trap[3] = nvkm_rd32(device, GPC_UNIT(gpc, 0x043c));
 
-	nv_error(priv, "GPC%d/PROP trap:", gpc);
-	for (i = 0; i <= 29; ++i) {
-		if (!(trap[0] & (1 << i)))
-			continue;
-		pr_cont(" ");
-		nvkm_enum_print(gf100_gpc_rop_error, i);
-	}
-	pr_cont("\n");
+	nvkm_snprintbf(error, sizeof(error), gf100_gpc_rop_error, trap[0]);
 
-	nv_error(priv, "x = %u, y = %u, format = %x, storage type = %x\n",
-		 trap[1] & 0xffff, trap[1] >> 16, (trap[2] >> 8) & 0x3f,
-		 trap[3] & 0xff);
-	nv_wr32(priv, GPC_UNIT(gpc, 0x0420), 0xc0000000);
+	nvkm_error(subdev, "GPC%d/PROP trap: %08x [%s] x = %u, y = %u, "
+			   "format = %x, storage type = %x\n",
+		   gpc, trap[0], error, trap[1] & 0xffff, trap[1] >> 16,
+		   (trap[2] >> 8) & 0x3f, trap[3] & 0xff);
+	nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
 }
 
 static const struct nvkm_enum gf100_mp_warp_error[] = {
@@ -815,401 +893,418 @@ static const struct nvkm_bitfield gf100_mp_global_error[] = {
 };
 
 static void
-gf100_gr_trap_mp(struct gf100_gr_priv *priv, int gpc, int tpc)
+gf100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
 {
-	u32 werr = nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x648));
-	u32 gerr = nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x650));
-
-	nv_error(priv, "GPC%i/TPC%i/MP trap:", gpc, tpc);
-	nvkm_bitfield_print(gf100_mp_global_error, gerr);
-	if (werr) {
-		pr_cont(" ");
-		nvkm_enum_print(gf100_mp_warp_error, werr & 0xffff);
-	}
-	pr_cont("\n");
-
-	nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x648), 0x00000000);
-	nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x650), gerr);
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 werr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x648));
+	u32 gerr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x650));
+	const struct nvkm_enum *warp;
+	char glob[128];
+
+	nvkm_snprintbf(glob, sizeof(glob), gf100_mp_global_error, gerr);
+	warp = nvkm_enum_find(gf100_mp_warp_error, werr & 0xffff);
+
+	nvkm_error(subdev, "GPC%i/TPC%i/MP trap: "
+			   "global %08x [%s] warp %04x [%s]\n",
+		   gpc, tpc, gerr, glob, werr, warp ? warp->name : "");
+
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x648), 0x00000000);
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x650), gerr);
 }
 
 static void
-gf100_gr_trap_tpc(struct gf100_gr_priv *priv, int gpc, int tpc)
+gf100_gr_trap_tpc(struct gf100_gr *gr, int gpc, int tpc)
 {
-	u32 stat = nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x0508));
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0508));
 
 	if (stat & 0x00000001) {
-		u32 trap = nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x0224));
-		nv_error(priv, "GPC%d/TPC%d/TEX: 0x%08x\n", gpc, tpc, trap);
-		nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x0224), 0xc0000000);
+		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0224));
+		nvkm_error(subdev, "GPC%d/TPC%d/TEX: %08x\n", gpc, tpc, trap);
+		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0224), 0xc0000000);
 		stat &= ~0x00000001;
 	}
 
 	if (stat & 0x00000002) {
-		gf100_gr_trap_mp(priv, gpc, tpc);
+		gf100_gr_trap_mp(gr, gpc, tpc);
 		stat &= ~0x00000002;
 	}
 
 	if (stat & 0x00000004) {
-		u32 trap = nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x0084));
-		nv_error(priv, "GPC%d/TPC%d/POLY: 0x%08x\n", gpc, tpc, trap);
-		nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x0084), 0xc0000000);
+		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0084));
+		nvkm_error(subdev, "GPC%d/TPC%d/POLY: %08x\n", gpc, tpc, trap);
+		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0084), 0xc0000000);
 		stat &= ~0x00000004;
 	}
 
 	if (stat & 0x00000008) {
-		u32 trap = nv_rd32(priv, TPC_UNIT(gpc, tpc, 0x048c));
-		nv_error(priv, "GPC%d/TPC%d/L1C: 0x%08x\n", gpc, tpc, trap);
-		nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x048c), 0xc0000000);
+		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x048c));
+		nvkm_error(subdev, "GPC%d/TPC%d/L1C: %08x\n", gpc, tpc, trap);
+		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x048c), 0xc0000000);
 		stat &= ~0x00000008;
 	}
 
 	if (stat) {
-		nv_error(priv, "GPC%d/TPC%d/0x%08x: unknown\n", gpc, tpc, stat);
+		nvkm_error(subdev, "GPC%d/TPC%d/%08x: unknown\n", gpc, tpc, stat);
 	}
 }
 
 static void
-gf100_gr_trap_gpc(struct gf100_gr_priv *priv, int gpc)
+gf100_gr_trap_gpc(struct gf100_gr *gr, int gpc)
 {
-	u32 stat = nv_rd32(priv, GPC_UNIT(gpc, 0x2c90));
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, GPC_UNIT(gpc, 0x2c90));
 	int tpc;
 
 	if (stat & 0x00000001) {
-		gf100_gr_trap_gpc_rop(priv, gpc);
+		gf100_gr_trap_gpc_rop(gr, gpc);
 		stat &= ~0x00000001;
 	}
 
 	if (stat & 0x00000002) {
-		u32 trap = nv_rd32(priv, GPC_UNIT(gpc, 0x0900));
-		nv_error(priv, "GPC%d/ZCULL: 0x%08x\n", gpc, trap);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0900), 0xc0000000);
+		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0900));
+		nvkm_error(subdev, "GPC%d/ZCULL: %08x\n", gpc, trap);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
 		stat &= ~0x00000002;
 	}
 
 	if (stat & 0x00000004) {
-		u32 trap = nv_rd32(priv, GPC_UNIT(gpc, 0x1028));
-		nv_error(priv, "GPC%d/CCACHE: 0x%08x\n", gpc, trap);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x1028), 0xc0000000);
+		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x1028));
+		nvkm_error(subdev, "GPC%d/CCACHE: %08x\n", gpc, trap);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
 		stat &= ~0x00000004;
 	}
 
 	if (stat & 0x00000008) {
-		u32 trap = nv_rd32(priv, GPC_UNIT(gpc, 0x0824));
-		nv_error(priv, "GPC%d/ESETUP: 0x%08x\n", gpc, trap);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0824), 0xc0000000);
+		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0824));
+		nvkm_error(subdev, "GPC%d/ESETUP: %08x\n", gpc, trap);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
 		stat &= ~0x00000009;
 	}
 
-	for (tpc = 0; tpc < priv->tpc_nr[gpc]; tpc++) {
+	for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
 		u32 mask = 0x00010000 << tpc;
 		if (stat & mask) {
-			gf100_gr_trap_tpc(priv, gpc, tpc);
-			nv_wr32(priv, GPC_UNIT(gpc, 0x2c90), mask);
+			gf100_gr_trap_tpc(gr, gpc, tpc);
+			nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), mask);
 			stat &= ~mask;
 		}
 	}
 
 	if (stat) {
-		nv_error(priv, "GPC%d/0x%08x: unknown\n", gpc, stat);
+		nvkm_error(subdev, "GPC%d/%08x: unknown\n", gpc, stat);
 	}
 }
 
 static void
-gf100_gr_trap_intr(struct gf100_gr_priv *priv)
+gf100_gr_trap_intr(struct gf100_gr *gr)
 {
-	u32 trap = nv_rd32(priv, 0x400108);
-	int rop, gpc, i;
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 trap = nvkm_rd32(device, 0x400108);
+	int rop, gpc;
 
 	if (trap & 0x00000001) {
-		u32 stat = nv_rd32(priv, 0x404000);
-		nv_error(priv, "DISPATCH 0x%08x\n", stat);
-		nv_wr32(priv, 0x404000, 0xc0000000);
-		nv_wr32(priv, 0x400108, 0x00000001);
+		u32 stat = nvkm_rd32(device, 0x404000);
+		nvkm_error(subdev, "DISPATCH %08x\n", stat);
+		nvkm_wr32(device, 0x404000, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x00000001);
 		trap &= ~0x00000001;
 	}
 
 	if (trap & 0x00000002) {
-		u32 stat = nv_rd32(priv, 0x404600);
-		nv_error(priv, "M2MF 0x%08x\n", stat);
-		nv_wr32(priv, 0x404600, 0xc0000000);
-		nv_wr32(priv, 0x400108, 0x00000002);
+		u32 stat = nvkm_rd32(device, 0x404600);
+		nvkm_error(subdev, "M2MF %08x\n", stat);
+		nvkm_wr32(device, 0x404600, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x00000002);
 		trap &= ~0x00000002;
 	}
 
 	if (trap & 0x00000008) {
-		u32 stat = nv_rd32(priv, 0x408030);
-		nv_error(priv, "CCACHE 0x%08x\n", stat);
-		nv_wr32(priv, 0x408030, 0xc0000000);
-		nv_wr32(priv, 0x400108, 0x00000008);
+		u32 stat = nvkm_rd32(device, 0x408030);
+		nvkm_error(subdev, "CCACHE %08x\n", stat);
+		nvkm_wr32(device, 0x408030, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x00000008);
 		trap &= ~0x00000008;
 	}
 
 	if (trap & 0x00000010) {
-		u32 stat = nv_rd32(priv, 0x405840);
-		nv_error(priv, "SHADER 0x%08x\n", stat);
-		nv_wr32(priv, 0x405840, 0xc0000000);
-		nv_wr32(priv, 0x400108, 0x00000010);
+		u32 stat = nvkm_rd32(device, 0x405840);
+		nvkm_error(subdev, "SHADER %08x\n", stat);
+		nvkm_wr32(device, 0x405840, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x00000010);
 		trap &= ~0x00000010;
 	}
 
 	if (trap & 0x00000040) {
-		u32 stat = nv_rd32(priv, 0x40601c);
-		nv_error(priv, "UNK6 0x%08x\n", stat);
-		nv_wr32(priv, 0x40601c, 0xc0000000);
-		nv_wr32(priv, 0x400108, 0x00000040);
+		u32 stat = nvkm_rd32(device, 0x40601c);
+		nvkm_error(subdev, "UNK6 %08x\n", stat);
+		nvkm_wr32(device, 0x40601c, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x00000040);
 		trap &= ~0x00000040;
 	}
 
 	if (trap & 0x00000080) {
-		u32 stat = nv_rd32(priv, 0x404490);
-		nv_error(priv, "MACRO 0x%08x\n", stat);
-		nv_wr32(priv, 0x404490, 0xc0000000);
-		nv_wr32(priv, 0x400108, 0x00000080);
+		u32 stat = nvkm_rd32(device, 0x404490);
+		nvkm_error(subdev, "MACRO %08x\n", stat);
+		nvkm_wr32(device, 0x404490, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x00000080);
 		trap &= ~0x00000080;
 	}
 
 	if (trap & 0x00000100) {
-		u32 stat = nv_rd32(priv, 0x407020);
+		u32 stat = nvkm_rd32(device, 0x407020) & 0x3fffffff;
+		char sked[128];
 
-		nv_error(priv, "SKED:");
-		for (i = 0; i <= 29; ++i) {
-			if (!(stat & (1 << i)))
-				continue;
-			pr_cont(" ");
-			nvkm_enum_print(gk104_sked_error, i);
-		}
-		pr_cont("\n");
+		nvkm_snprintbf(sked, sizeof(sked), gk104_sked_error, stat);
+		nvkm_error(subdev, "SKED: %08x [%s]\n", stat, sked);
 
-		if (stat & 0x3fffffff)
-			nv_wr32(priv, 0x407020, 0x40000000);
-		nv_wr32(priv, 0x400108, 0x00000100);
+		if (stat)
+			nvkm_wr32(device, 0x407020, 0x40000000);
+		nvkm_wr32(device, 0x400108, 0x00000100);
 		trap &= ~0x00000100;
 	}
 
 	if (trap & 0x01000000) {
-		u32 stat = nv_rd32(priv, 0x400118);
-		for (gpc = 0; stat && gpc < priv->gpc_nr; gpc++) {
+		u32 stat = nvkm_rd32(device, 0x400118);
+		for (gpc = 0; stat && gpc < gr->gpc_nr; gpc++) {
 			u32 mask = 0x00000001 << gpc;
 			if (stat & mask) {
-				gf100_gr_trap_gpc(priv, gpc);
-				nv_wr32(priv, 0x400118, mask);
+				gf100_gr_trap_gpc(gr, gpc);
+				nvkm_wr32(device, 0x400118, mask);
 				stat &= ~mask;
 			}
 		}
-		nv_wr32(priv, 0x400108, 0x01000000);
+		nvkm_wr32(device, 0x400108, 0x01000000);
 		trap &= ~0x01000000;
 	}
 
 	if (trap & 0x02000000) {
-		for (rop = 0; rop < priv->rop_nr; rop++) {
-			u32 statz = nv_rd32(priv, ROP_UNIT(rop, 0x070));
-			u32 statc = nv_rd32(priv, ROP_UNIT(rop, 0x144));
-			nv_error(priv, "ROP%d 0x%08x 0x%08x\n",
+		for (rop = 0; rop < gr->rop_nr; rop++) {
+			u32 statz = nvkm_rd32(device, ROP_UNIT(rop, 0x070));
+			u32 statc = nvkm_rd32(device, ROP_UNIT(rop, 0x144));
+			nvkm_error(subdev, "ROP%d %08x %08x\n",
 				 rop, statz, statc);
-			nv_wr32(priv, ROP_UNIT(rop, 0x070), 0xc0000000);
-			nv_wr32(priv, ROP_UNIT(rop, 0x144), 0xc0000000);
+			nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
+			nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
 		}
-		nv_wr32(priv, 0x400108, 0x02000000);
+		nvkm_wr32(device, 0x400108, 0x02000000);
 		trap &= ~0x02000000;
 	}
 
 	if (trap) {
-		nv_error(priv, "TRAP UNHANDLED 0x%08x\n", trap);
-		nv_wr32(priv, 0x400108, trap);
+		nvkm_error(subdev, "TRAP UNHANDLED %08x\n", trap);
+		nvkm_wr32(device, 0x400108, trap);
 	}
 }
 
 static void
-gf100_gr_ctxctl_debug_unit(struct gf100_gr_priv *priv, u32 base)
+gf100_gr_ctxctl_debug_unit(struct gf100_gr *gr, u32 base)
 {
-	nv_error(priv, "%06x - done 0x%08x\n", base,
-		 nv_rd32(priv, base + 0x400));
-	nv_error(priv, "%06x - stat 0x%08x 0x%08x 0x%08x 0x%08x\n", base,
-		 nv_rd32(priv, base + 0x800), nv_rd32(priv, base + 0x804),
-		 nv_rd32(priv, base + 0x808), nv_rd32(priv, base + 0x80c));
-	nv_error(priv, "%06x - stat 0x%08x 0x%08x 0x%08x 0x%08x\n", base,
-		 nv_rd32(priv, base + 0x810), nv_rd32(priv, base + 0x814),
-		 nv_rd32(priv, base + 0x818), nv_rd32(priv, base + 0x81c));
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	nvkm_error(subdev, "%06x - done %08x\n", base,
+		   nvkm_rd32(device, base + 0x400));
+	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
+		   nvkm_rd32(device, base + 0x800),
+		   nvkm_rd32(device, base + 0x804),
+		   nvkm_rd32(device, base + 0x808),
+		   nvkm_rd32(device, base + 0x80c));
+	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
+		   nvkm_rd32(device, base + 0x810),
+		   nvkm_rd32(device, base + 0x814),
+		   nvkm_rd32(device, base + 0x818),
+		   nvkm_rd32(device, base + 0x81c));
 }
 
 void
-gf100_gr_ctxctl_debug(struct gf100_gr_priv *priv)
+gf100_gr_ctxctl_debug(struct gf100_gr *gr)
 {
-	u32 gpcnr = nv_rd32(priv, 0x409604) & 0xffff;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 gpcnr = nvkm_rd32(device, 0x409604) & 0xffff;
 	u32 gpc;
 
-	gf100_gr_ctxctl_debug_unit(priv, 0x409000);
+	gf100_gr_ctxctl_debug_unit(gr, 0x409000);
 	for (gpc = 0; gpc < gpcnr; gpc++)
-		gf100_gr_ctxctl_debug_unit(priv, 0x502000 + (gpc * 0x8000));
+		gf100_gr_ctxctl_debug_unit(gr, 0x502000 + (gpc * 0x8000));
 }
 
 static void
-gf100_gr_ctxctl_isr(struct gf100_gr_priv *priv)
+gf100_gr_ctxctl_isr(struct gf100_gr *gr)
 {
-	u32 stat = nv_rd32(priv, 0x409c18);
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0x409c18);
 
 	if (stat & 0x00000001) {
-		u32 code = nv_rd32(priv, 0x409814);
+		u32 code = nvkm_rd32(device, 0x409814);
 		if (code == E_BAD_FWMTHD) {
-			u32 class = nv_rd32(priv, 0x409808);
-			u32  addr = nv_rd32(priv, 0x40980c);
+			u32 class = nvkm_rd32(device, 0x409808);
+			u32  addr = nvkm_rd32(device, 0x40980c);
 			u32  subc = (addr & 0x00070000) >> 16;
 			u32  mthd = (addr & 0x00003ffc);
-			u32  data = nv_rd32(priv, 0x409810);
+			u32  data = nvkm_rd32(device, 0x409810);
 
-			nv_error(priv, "FECS MTHD subc %d class 0x%04x "
-				       "mthd 0x%04x data 0x%08x\n",
-				 subc, class, mthd, data);
+			nvkm_error(subdev, "FECS MTHD subc %d class %04x "
+					   "mthd %04x data %08x\n",
+				   subc, class, mthd, data);
 
-			nv_wr32(priv, 0x409c20, 0x00000001);
+			nvkm_wr32(device, 0x409c20, 0x00000001);
 			stat &= ~0x00000001;
 		} else {
-			nv_error(priv, "FECS ucode error %d\n", code);
+			nvkm_error(subdev, "FECS ucode error %d\n", code);
 		}
 	}
 
 	if (stat & 0x00080000) {
-		nv_error(priv, "FECS watchdog timeout\n");
-		gf100_gr_ctxctl_debug(priv);
-		nv_wr32(priv, 0x409c20, 0x00080000);
+		nvkm_error(subdev, "FECS watchdog timeout\n");
+		gf100_gr_ctxctl_debug(gr);
+		nvkm_wr32(device, 0x409c20, 0x00080000);
 		stat &= ~0x00080000;
 	}
 
 	if (stat) {
-		nv_error(priv, "FECS 0x%08x\n", stat);
-		gf100_gr_ctxctl_debug(priv);
-		nv_wr32(priv, 0x409c20, stat);
+		nvkm_error(subdev, "FECS %08x\n", stat);
+		gf100_gr_ctxctl_debug(gr);
+		nvkm_wr32(device, 0x409c20, stat);
 	}
 }
 
 static void
-gf100_gr_intr(struct nvkm_subdev *subdev)
+gf100_gr_intr(struct nvkm_gr *base)
 {
-	struct nvkm_fifo *pfifo = nvkm_fifo(subdev);
-	struct nvkm_engine *engine = nv_engine(subdev);
-	struct nvkm_object *engctx;
-	struct nvkm_handle *handle;
-	struct gf100_gr_priv *priv = (void *)subdev;
-	u64 inst = nv_rd32(priv, 0x409b00) & 0x0fffffff;
-	u32 stat = nv_rd32(priv, 0x400100);
-	u32 addr = nv_rd32(priv, 0x400704);
+	struct gf100_gr *gr = gf100_gr(base);
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_fifo_chan *chan;
+	unsigned long flags;
+	u64 inst = nvkm_rd32(device, 0x409b00) & 0x0fffffff;
+	u32 stat = nvkm_rd32(device, 0x400100);
+	u32 addr = nvkm_rd32(device, 0x400704);
 	u32 mthd = (addr & 0x00003ffc);
 	u32 subc = (addr & 0x00070000) >> 16;
-	u32 data = nv_rd32(priv, 0x400708);
-	u32 code = nv_rd32(priv, 0x400110);
+	u32 data = nvkm_rd32(device, 0x400708);
+	u32 code = nvkm_rd32(device, 0x400110);
 	u32 class;
-	int chid;
+	const char *name = "unknown";
+	int chid = -1;
 
-	if (nv_device(priv)->card_type < NV_E0 || subc < 4)
-		class = nv_rd32(priv, 0x404200 + (subc * 4));
+	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
+	if (chan) {
+		name = chan->object.client->name;
+		chid = chan->chid;
+	}
+
+	if (device->card_type < NV_E0 || subc < 4)
+		class = nvkm_rd32(device, 0x404200 + (subc * 4));
 	else
 		class = 0x0000;
 
-	engctx = nvkm_engctx_get(engine, inst);
-	chid   = pfifo->chid(pfifo, engctx);
-
 	if (stat & 0x00000001) {
 		/*
 		 * notifier interrupt, only needed for cyclestats
 		 * can be safely ignored
 		 */
-		nv_wr32(priv, 0x400100, 0x00000001);
+		nvkm_wr32(device, 0x400100, 0x00000001);
 		stat &= ~0x00000001;
 	}
 
 	if (stat & 0x00000010) {
-		handle = nvkm_handle_get_class(engctx, class);
-		if (!handle || nv_call(handle->object, mthd, data)) {
-			nv_error(priv,
-				 "ILLEGAL_MTHD ch %d [0x%010llx %s] subc %d class 0x%04x mthd 0x%04x data 0x%08x\n",
-				 chid, inst << 12, nvkm_client_name(engctx),
-				 subc, class, mthd, data);
+		if (!gf100_gr_mthd_sw(device, class, mthd, data)) {
+			nvkm_error(subdev, "ILLEGAL_MTHD ch %d [%010llx %s] "
+				   "subc %d class %04x mthd %04x data %08x\n",
+				   chid, inst << 12, name, subc,
+				   class, mthd, data);
 		}
-		nvkm_handle_put(handle);
-		nv_wr32(priv, 0x400100, 0x00000010);
+		nvkm_wr32(device, 0x400100, 0x00000010);
 		stat &= ~0x00000010;
 	}
 
 	if (stat & 0x00000020) {
-		nv_error(priv,
-			 "ILLEGAL_CLASS ch %d [0x%010llx %s] subc %d class 0x%04x mthd 0x%04x data 0x%08x\n",
-			 chid, inst << 12, nvkm_client_name(engctx), subc,
-			 class, mthd, data);
-		nv_wr32(priv, 0x400100, 0x00000020);
+		nvkm_error(subdev, "ILLEGAL_CLASS ch %d [%010llx %s] "
+			   "subc %d class %04x mthd %04x data %08x\n",
+			   chid, inst << 12, name, subc, class, mthd, data);
+		nvkm_wr32(device, 0x400100, 0x00000020);
 		stat &= ~0x00000020;
 	}
 
 	if (stat & 0x00100000) {
-		nv_error(priv, "DATA_ERROR [");
-		nvkm_enum_print(nv50_data_error_names, code);
-		pr_cont("] ch %d [0x%010llx %s] subc %d class 0x%04x mthd 0x%04x data 0x%08x\n",
-			chid, inst << 12, nvkm_client_name(engctx), subc,
-			class, mthd, data);
-		nv_wr32(priv, 0x400100, 0x00100000);
+		const struct nvkm_enum *en =
+			nvkm_enum_find(nv50_data_error_names, code);
+		nvkm_error(subdev, "DATA_ERROR %08x [%s] ch %d [%010llx %s] "
+				   "subc %d class %04x mthd %04x data %08x\n",
+			   code, en ? en->name : "", chid, inst << 12,
+			   name, subc, class, mthd, data);
+		nvkm_wr32(device, 0x400100, 0x00100000);
 		stat &= ~0x00100000;
 	}
 
 	if (stat & 0x00200000) {
-		nv_error(priv, "TRAP ch %d [0x%010llx %s]\n", chid, inst << 12,
-			 nvkm_client_name(engctx));
-		gf100_gr_trap_intr(priv);
-		nv_wr32(priv, 0x400100, 0x00200000);
+		nvkm_error(subdev, "TRAP ch %d [%010llx %s]\n",
+			   chid, inst << 12, name);
+		gf100_gr_trap_intr(gr);
+		nvkm_wr32(device, 0x400100, 0x00200000);
 		stat &= ~0x00200000;
 	}
 
 	if (stat & 0x00080000) {
-		gf100_gr_ctxctl_isr(priv);
-		nv_wr32(priv, 0x400100, 0x00080000);
+		gf100_gr_ctxctl_isr(gr);
+		nvkm_wr32(device, 0x400100, 0x00080000);
 		stat &= ~0x00080000;
 	}
 
 	if (stat) {
-		nv_error(priv, "unknown stat 0x%08x\n", stat);
-		nv_wr32(priv, 0x400100, stat);
+		nvkm_error(subdev, "intr %08x\n", stat);
+		nvkm_wr32(device, 0x400100, stat);
 	}
 
-	nv_wr32(priv, 0x400500, 0x00010001);
-	nvkm_engctx_put(engctx);
+	nvkm_wr32(device, 0x400500, 0x00010001);
+	nvkm_fifo_chan_put(device->fifo, flags, &chan);
 }
 
 void
-gf100_gr_init_fw(struct gf100_gr_priv *priv, u32 fuc_base,
+gf100_gr_init_fw(struct gf100_gr *gr, u32 fuc_base,
 		 struct gf100_gr_fuc *code, struct gf100_gr_fuc *data)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int i;
 
-	nv_wr32(priv, fuc_base + 0x01c0, 0x01000000);
+	nvkm_wr32(device, fuc_base + 0x01c0, 0x01000000);
 	for (i = 0; i < data->size / 4; i++)
-		nv_wr32(priv, fuc_base + 0x01c4, data->data[i]);
+		nvkm_wr32(device, fuc_base + 0x01c4, data->data[i]);
 
-	nv_wr32(priv, fuc_base + 0x0180, 0x01000000);
+	nvkm_wr32(device, fuc_base + 0x0180, 0x01000000);
 	for (i = 0; i < code->size / 4; i++) {
 		if ((i & 0x3f) == 0)
-			nv_wr32(priv, fuc_base + 0x0188, i >> 6);
-		nv_wr32(priv, fuc_base + 0x0184, code->data[i]);
+			nvkm_wr32(device, fuc_base + 0x0188, i >> 6);
+		nvkm_wr32(device, fuc_base + 0x0184, code->data[i]);
 	}
 
 	/* code must be padded to 0x40 words */
 	for (; i & 0x3f; i++)
-		nv_wr32(priv, fuc_base + 0x0184, 0);
+		nvkm_wr32(device, fuc_base + 0x0184, 0);
 }
 
 static void
-gf100_gr_init_csdata(struct gf100_gr_priv *priv,
+gf100_gr_init_csdata(struct gf100_gr *gr,
 		     const struct gf100_gr_pack *pack,
 		     u32 falcon, u32 starstar, u32 base)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_gr_pack *iter;
 	const struct gf100_gr_init *init;
 	u32 addr = ~0, prev = ~0, xfer = 0;
 	u32 star, temp;
 
-	nv_wr32(priv, falcon + 0x01c0, 0x02000000 + starstar);
-	star = nv_rd32(priv, falcon + 0x01c4);
-	temp = nv_rd32(priv, falcon + 0x01c4);
+	nvkm_wr32(device, falcon + 0x01c0, 0x02000000 + starstar);
+	star = nvkm_rd32(device, falcon + 0x01c4);
+	temp = nvkm_rd32(device, falcon + 0x01c4);
 	if (temp > star)
 		star = temp;
-	nv_wr32(priv, falcon + 0x01c0, 0x01000000 + star);
+	nvkm_wr32(device, falcon + 0x01c0, 0x01000000 + star);
 
 	pack_for_each_init(init, iter, pack) {
 		u32 head = init->addr - base;
@@ -1218,7 +1313,7 @@ gf100_gr_init_csdata(struct gf100_gr_priv *priv,
 			if (head != prev + 4 || xfer >= 32) {
 				if (xfer) {
 					u32 data = ((--xfer << 26) | addr);
-					nv_wr32(priv, falcon + 0x01c4, data);
+					nvkm_wr32(device, falcon + 0x01c4, data);
 					star += 4;
 				}
 				addr = head;
@@ -1230,157 +1325,166 @@ gf100_gr_init_csdata(struct gf100_gr_priv *priv,
 		}
 	}
 
-	nv_wr32(priv, falcon + 0x01c4, (--xfer << 26) | addr);
-	nv_wr32(priv, falcon + 0x01c0, 0x01000004 + starstar);
-	nv_wr32(priv, falcon + 0x01c4, star + 4);
+	nvkm_wr32(device, falcon + 0x01c4, (--xfer << 26) | addr);
+	nvkm_wr32(device, falcon + 0x01c0, 0x01000004 + starstar);
+	nvkm_wr32(device, falcon + 0x01c4, star + 4);
 }
 
 int
-gf100_gr_init_ctxctl(struct gf100_gr_priv *priv)
+gf100_gr_init_ctxctl(struct gf100_gr *gr)
 {
-	struct gf100_gr_oclass *oclass = (void *)nv_object(priv)->oclass;
-	struct gf100_grctx_oclass *cclass = (void *)nv_engine(priv)->cclass;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
 	int i;
 
-	if (priv->firmware) {
+	if (gr->firmware) {
 		/* load fuc microcode */
-		nvkm_mc(priv)->unk260(nvkm_mc(priv), 0);
-		gf100_gr_init_fw(priv, 0x409000, &priv->fuc409c,
-						 &priv->fuc409d);
-		gf100_gr_init_fw(priv, 0x41a000, &priv->fuc41ac,
-						 &priv->fuc41ad);
-		nvkm_mc(priv)->unk260(nvkm_mc(priv), 1);
+		nvkm_mc_unk260(device->mc, 0);
+		gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c, &gr->fuc409d);
+		gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac, &gr->fuc41ad);
+		nvkm_mc_unk260(device->mc, 1);
 
 		/* start both of them running */
-		nv_wr32(priv, 0x409840, 0xffffffff);
-		nv_wr32(priv, 0x41a10c, 0x00000000);
-		nv_wr32(priv, 0x40910c, 0x00000000);
-		nv_wr32(priv, 0x41a100, 0x00000002);
-		nv_wr32(priv, 0x409100, 0x00000002);
-		if (!nv_wait(priv, 0x409800, 0x00000001, 0x00000001))
-			nv_warn(priv, "0x409800 wait failed\n");
-
-		nv_wr32(priv, 0x409840, 0xffffffff);
-		nv_wr32(priv, 0x409500, 0x7fffffff);
-		nv_wr32(priv, 0x409504, 0x00000021);
-
-		nv_wr32(priv, 0x409840, 0xffffffff);
-		nv_wr32(priv, 0x409500, 0x00000000);
-		nv_wr32(priv, 0x409504, 0x00000010);
-		if (!nv_wait_ne(priv, 0x409800, 0xffffffff, 0x00000000)) {
-			nv_error(priv, "fuc09 req 0x10 timeout\n");
+		nvkm_wr32(device, 0x409840, 0xffffffff);
+		nvkm_wr32(device, 0x41a10c, 0x00000000);
+		nvkm_wr32(device, 0x40910c, 0x00000000);
+		nvkm_wr32(device, 0x41a100, 0x00000002);
+		nvkm_wr32(device, 0x409100, 0x00000002);
+		if (nvkm_msec(device, 2000,
+			if (nvkm_rd32(device, 0x409800) & 0x00000001)
+				break;
+		) < 0)
 			return -EBUSY;
-		}
-		priv->size = nv_rd32(priv, 0x409800);
 
-		nv_wr32(priv, 0x409840, 0xffffffff);
-		nv_wr32(priv, 0x409500, 0x00000000);
-		nv_wr32(priv, 0x409504, 0x00000016);
-		if (!nv_wait_ne(priv, 0x409800, 0xffffffff, 0x00000000)) {
-			nv_error(priv, "fuc09 req 0x16 timeout\n");
+		nvkm_wr32(device, 0x409840, 0xffffffff);
+		nvkm_wr32(device, 0x409500, 0x7fffffff);
+		nvkm_wr32(device, 0x409504, 0x00000021);
+
+		nvkm_wr32(device, 0x409840, 0xffffffff);
+		nvkm_wr32(device, 0x409500, 0x00000000);
+		nvkm_wr32(device, 0x409504, 0x00000010);
+		if (nvkm_msec(device, 2000,
+			if ((gr->size = nvkm_rd32(device, 0x409800)))
+				break;
+		) < 0)
 			return -EBUSY;
-		}
 
-		nv_wr32(priv, 0x409840, 0xffffffff);
-		nv_wr32(priv, 0x409500, 0x00000000);
-		nv_wr32(priv, 0x409504, 0x00000025);
-		if (!nv_wait_ne(priv, 0x409800, 0xffffffff, 0x00000000)) {
-			nv_error(priv, "fuc09 req 0x25 timeout\n");
+		nvkm_wr32(device, 0x409840, 0xffffffff);
+		nvkm_wr32(device, 0x409500, 0x00000000);
+		nvkm_wr32(device, 0x409504, 0x00000016);
+		if (nvkm_msec(device, 2000,
+			if (nvkm_rd32(device, 0x409800))
+				break;
+		) < 0)
+			return -EBUSY;
+
+		nvkm_wr32(device, 0x409840, 0xffffffff);
+		nvkm_wr32(device, 0x409500, 0x00000000);
+		nvkm_wr32(device, 0x409504, 0x00000025);
+		if (nvkm_msec(device, 2000,
+			if (nvkm_rd32(device, 0x409800))
+				break;
+		) < 0)
 			return -EBUSY;
-		}
 
-		if (nv_device(priv)->chipset >= 0xe0) {
-			nv_wr32(priv, 0x409800, 0x00000000);
-			nv_wr32(priv, 0x409500, 0x00000001);
-			nv_wr32(priv, 0x409504, 0x00000030);
-			if (!nv_wait_ne(priv, 0x409800, 0xffffffff, 0x00000000)) {
-				nv_error(priv, "fuc09 req 0x30 timeout\n");
+		if (device->chipset >= 0xe0) {
+			nvkm_wr32(device, 0x409800, 0x00000000);
+			nvkm_wr32(device, 0x409500, 0x00000001);
+			nvkm_wr32(device, 0x409504, 0x00000030);
+			if (nvkm_msec(device, 2000,
+				if (nvkm_rd32(device, 0x409800))
+					break;
+			) < 0)
 				return -EBUSY;
-			}
 
-			nv_wr32(priv, 0x409810, 0xb00095c8);
-			nv_wr32(priv, 0x409800, 0x00000000);
-			nv_wr32(priv, 0x409500, 0x00000001);
-			nv_wr32(priv, 0x409504, 0x00000031);
-			if (!nv_wait_ne(priv, 0x409800, 0xffffffff, 0x00000000)) {
-				nv_error(priv, "fuc09 req 0x31 timeout\n");
+			nvkm_wr32(device, 0x409810, 0xb00095c8);
+			nvkm_wr32(device, 0x409800, 0x00000000);
+			nvkm_wr32(device, 0x409500, 0x00000001);
+			nvkm_wr32(device, 0x409504, 0x00000031);
+			if (nvkm_msec(device, 2000,
+				if (nvkm_rd32(device, 0x409800))
+					break;
+			) < 0)
 				return -EBUSY;
-			}
 
-			nv_wr32(priv, 0x409810, 0x00080420);
-			nv_wr32(priv, 0x409800, 0x00000000);
-			nv_wr32(priv, 0x409500, 0x00000001);
-			nv_wr32(priv, 0x409504, 0x00000032);
-			if (!nv_wait_ne(priv, 0x409800, 0xffffffff, 0x00000000)) {
-				nv_error(priv, "fuc09 req 0x32 timeout\n");
+			nvkm_wr32(device, 0x409810, 0x00080420);
+			nvkm_wr32(device, 0x409800, 0x00000000);
+			nvkm_wr32(device, 0x409500, 0x00000001);
+			nvkm_wr32(device, 0x409504, 0x00000032);
+			if (nvkm_msec(device, 2000,
+				if (nvkm_rd32(device, 0x409800))
+					break;
+			) < 0)
 				return -EBUSY;
-			}
 
-			nv_wr32(priv, 0x409614, 0x00000070);
-			nv_wr32(priv, 0x409614, 0x00000770);
-			nv_wr32(priv, 0x40802c, 0x00000001);
+			nvkm_wr32(device, 0x409614, 0x00000070);
+			nvkm_wr32(device, 0x409614, 0x00000770);
+			nvkm_wr32(device, 0x40802c, 0x00000001);
 		}
 
-		if (priv->data == NULL) {
-			int ret = gf100_grctx_generate(priv);
+		if (gr->data == NULL) {
+			int ret = gf100_grctx_generate(gr);
 			if (ret) {
-				nv_error(priv, "failed to construct context\n");
+				nvkm_error(subdev, "failed to construct context\n");
 				return ret;
 			}
 		}
 
 		return 0;
 	} else
-	if (!oclass->fecs.ucode) {
+	if (!gr->func->fecs.ucode) {
 		return -ENOSYS;
 	}
 
 	/* load HUB microcode */
-	nvkm_mc(priv)->unk260(nvkm_mc(priv), 0);
-	nv_wr32(priv, 0x4091c0, 0x01000000);
-	for (i = 0; i < oclass->fecs.ucode->data.size / 4; i++)
-		nv_wr32(priv, 0x4091c4, oclass->fecs.ucode->data.data[i]);
+	nvkm_mc_unk260(device->mc, 0);
+	nvkm_wr32(device, 0x4091c0, 0x01000000);
+	for (i = 0; i < gr->func->fecs.ucode->data.size / 4; i++)
+		nvkm_wr32(device, 0x4091c4, gr->func->fecs.ucode->data.data[i]);
 
-	nv_wr32(priv, 0x409180, 0x01000000);
-	for (i = 0; i < oclass->fecs.ucode->code.size / 4; i++) {
+	nvkm_wr32(device, 0x409180, 0x01000000);
+	for (i = 0; i < gr->func->fecs.ucode->code.size / 4; i++) {
 		if ((i & 0x3f) == 0)
-			nv_wr32(priv, 0x409188, i >> 6);
-		nv_wr32(priv, 0x409184, oclass->fecs.ucode->code.data[i]);
+			nvkm_wr32(device, 0x409188, i >> 6);
+		nvkm_wr32(device, 0x409184, gr->func->fecs.ucode->code.data[i]);
 	}
 
 	/* load GPC microcode */
-	nv_wr32(priv, 0x41a1c0, 0x01000000);
-	for (i = 0; i < oclass->gpccs.ucode->data.size / 4; i++)
-		nv_wr32(priv, 0x41a1c4, oclass->gpccs.ucode->data.data[i]);
+	nvkm_wr32(device, 0x41a1c0, 0x01000000);
+	for (i = 0; i < gr->func->gpccs.ucode->data.size / 4; i++)
+		nvkm_wr32(device, 0x41a1c4, gr->func->gpccs.ucode->data.data[i]);
 
-	nv_wr32(priv, 0x41a180, 0x01000000);
-	for (i = 0; i < oclass->gpccs.ucode->code.size / 4; i++) {
+	nvkm_wr32(device, 0x41a180, 0x01000000);
+	for (i = 0; i < gr->func->gpccs.ucode->code.size / 4; i++) {
 		if ((i & 0x3f) == 0)
-			nv_wr32(priv, 0x41a188, i >> 6);
-		nv_wr32(priv, 0x41a184, oclass->gpccs.ucode->code.data[i]);
+			nvkm_wr32(device, 0x41a188, i >> 6);
+		nvkm_wr32(device, 0x41a184, gr->func->gpccs.ucode->code.data[i]);
 	}
-	nvkm_mc(priv)->unk260(nvkm_mc(priv), 1);
+	nvkm_mc_unk260(device->mc, 1);
 
 	/* load register lists */
-	gf100_gr_init_csdata(priv, cclass->hub, 0x409000, 0x000, 0x000000);
-	gf100_gr_init_csdata(priv, cclass->gpc, 0x41a000, 0x000, 0x418000);
-	gf100_gr_init_csdata(priv, cclass->tpc, 0x41a000, 0x004, 0x419800);
-	gf100_gr_init_csdata(priv, cclass->ppc, 0x41a000, 0x008, 0x41be00);
+	gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000);
+	gf100_gr_init_csdata(gr, grctx->gpc, 0x41a000, 0x000, 0x418000);
+	gf100_gr_init_csdata(gr, grctx->tpc, 0x41a000, 0x004, 0x419800);
+	gf100_gr_init_csdata(gr, grctx->ppc, 0x41a000, 0x008, 0x41be00);
 
 	/* start HUB ucode running, it'll init the GPCs */
-	nv_wr32(priv, 0x40910c, 0x00000000);
-	nv_wr32(priv, 0x409100, 0x00000002);
-	if (!nv_wait(priv, 0x409800, 0x80000000, 0x80000000)) {
-		nv_error(priv, "HUB_INIT timed out\n");
-		gf100_gr_ctxctl_debug(priv);
+	nvkm_wr32(device, 0x40910c, 0x00000000);
+	nvkm_wr32(device, 0x409100, 0x00000002);
+	if (nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x409800) & 0x80000000)
+			break;
+	) < 0) {
+		gf100_gr_ctxctl_debug(gr);
 		return -EBUSY;
 	}
 
-	priv->size = nv_rd32(priv, 0x409804);
-	if (priv->data == NULL) {
-		int ret = gf100_grctx_generate(priv);
+	gr->size = nvkm_rd32(device, 0x409804);
+	if (gr->data == NULL) {
+		int ret = gf100_grctx_generate(gr);
 		if (ret) {
-			nv_error(priv, "failed to construct context\n");
+			nvkm_error(subdev, "failed to construct context\n");
 			return ret;
 		}
 	}
@@ -1388,143 +1492,160 @@ gf100_gr_init_ctxctl(struct gf100_gr_priv *priv)
 	return 0;
 }
 
-int
-gf100_gr_init(struct nvkm_object *object)
+static int
+gf100_gr_oneinit(struct nvkm_gr *base)
 {
-	struct gf100_gr_oclass *oclass = (void *)object->oclass;
-	struct gf100_gr_priv *priv = (void *)object;
-	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, priv->tpc_total);
-	u32 data[TPC_MAX / 8] = {};
-	u8  tpcnr[GPC_MAX];
-	int gpc, tpc, rop;
-	int ret, i;
+	struct gf100_gr *gr = gf100_gr(base);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	int ret, i, j;
+
+	nvkm_pmu_pgob(device->pmu, false);
 
-	ret = nvkm_gr_init(&priv->base);
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
+			      &gr->unk4188b4);
 	if (ret)
 		return ret;
 
-	nv_wr32(priv, GPC_BCAST(0x0880), 0x00000000);
-	nv_wr32(priv, GPC_BCAST(0x08a4), 0x00000000);
-	nv_wr32(priv, GPC_BCAST(0x0888), 0x00000000);
-	nv_wr32(priv, GPC_BCAST(0x088c), 0x00000000);
-	nv_wr32(priv, GPC_BCAST(0x0890), 0x00000000);
-	nv_wr32(priv, GPC_BCAST(0x0894), 0x00000000);
-	nv_wr32(priv, GPC_BCAST(0x08b4), priv->unk4188b4->addr >> 8);
-	nv_wr32(priv, GPC_BCAST(0x08b8), priv->unk4188b8->addr >> 8);
-
-	gf100_gr_mmio(priv, oclass->mmio);
-
-	memcpy(tpcnr, priv->tpc_nr, sizeof(priv->tpc_nr));
-	for (i = 0, gpc = -1; i < priv->tpc_total; i++) {
-		do {
-			gpc = (gpc + 1) % priv->gpc_nr;
-		} while (!tpcnr[gpc]);
-		tpc = priv->tpc_nr[gpc] - tpcnr[gpc]--;
-
-		data[i / 8] |= tpc << ((i % 8) * 4);
-	}
-
-	nv_wr32(priv, GPC_BCAST(0x0980), data[0]);
-	nv_wr32(priv, GPC_BCAST(0x0984), data[1]);
-	nv_wr32(priv, GPC_BCAST(0x0988), data[2]);
-	nv_wr32(priv, GPC_BCAST(0x098c), data[3]);
-
-	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0914),
-			priv->magic_not_rop_nr << 8 | priv->tpc_nr[gpc]);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0910), 0x00040000 |
-			priv->tpc_total);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0918), magicgpc918);
-	}
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
+			      &gr->unk4188b8);
+	if (ret)
+		return ret;
 
-	if (nv_device(priv)->chipset != 0xd7)
-		nv_wr32(priv, GPC_BCAST(0x1bd4), magicgpc918);
-	else
-		nv_wr32(priv, GPC_BCAST(0x3fd4), magicgpc918);
-
-	nv_wr32(priv, GPC_BCAST(0x08ac), nv_rd32(priv, 0x100800));
-
-	nv_wr32(priv, 0x400500, 0x00010001);
-
-	nv_wr32(priv, 0x400100, 0xffffffff);
-	nv_wr32(priv, 0x40013c, 0xffffffff);
-
-	nv_wr32(priv, 0x409c24, 0x000f0000);
-	nv_wr32(priv, 0x404000, 0xc0000000);
-	nv_wr32(priv, 0x404600, 0xc0000000);
-	nv_wr32(priv, 0x408030, 0xc0000000);
-	nv_wr32(priv, 0x40601c, 0xc0000000);
-	nv_wr32(priv, 0x404490, 0xc0000000);
-	nv_wr32(priv, 0x406018, 0xc0000000);
-	nv_wr32(priv, 0x405840, 0xc0000000);
-	nv_wr32(priv, 0x405844, 0x00ffffff);
-	nv_mask(priv, 0x419cc0, 0x00000008, 0x00000008);
-	nv_mask(priv, 0x419eb4, 0x00001000, 0x00001000);
-
-	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0420), 0xc0000000);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0900), 0xc0000000);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x1028), 0xc0000000);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0824), 0xc0000000);
-		for (tpc = 0; tpc < priv->tpc_nr[gpc]; tpc++) {
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
+	nvkm_kmap(gr->unk4188b4);
+	for (i = 0; i < 0x1000; i += 4)
+		nvkm_wo32(gr->unk4188b4, i, 0x00000010);
+	nvkm_done(gr->unk4188b4);
+
+	nvkm_kmap(gr->unk4188b8);
+	for (i = 0; i < 0x1000; i += 4)
+		nvkm_wo32(gr->unk4188b8, i, 0x00000010);
+	nvkm_done(gr->unk4188b8);
+
+	gr->rop_nr = (nvkm_rd32(device, 0x409604) & 0x001f0000) >> 16;
+	gr->gpc_nr =  nvkm_rd32(device, 0x409604) & 0x0000001f;
+	for (i = 0; i < gr->gpc_nr; i++) {
+		gr->tpc_nr[i]  = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
+		gr->tpc_total += gr->tpc_nr[i];
+		gr->ppc_nr[i]  = gr->func->ppc_nr;
+		for (j = 0; j < gr->ppc_nr[i]; j++) {
+			u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
+			gr->ppc_tpc_nr[i][j] = hweight8(mask);
 		}
-		nv_wr32(priv, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
 	}
 
-	for (rop = 0; rop < priv->rop_nr; rop++) {
-		nv_wr32(priv, ROP_UNIT(rop, 0x144), 0xc0000000);
-		nv_wr32(priv, ROP_UNIT(rop, 0x070), 0xc0000000);
-		nv_wr32(priv, ROP_UNIT(rop, 0x204), 0xffffffff);
-		nv_wr32(priv, ROP_UNIT(rop, 0x208), 0xffffffff);
+	/*XXX: these need figuring out... though it might not even matter */
+	switch (device->chipset) {
+	case 0xc0:
+		if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
+			gr->magic_not_rop_nr = 0x07;
+		} else
+		if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
+			gr->magic_not_rop_nr = 0x05;
+		} else
+		if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
+			gr->magic_not_rop_nr = 0x06;
+		}
+		break;
+	case 0xc3: /* 450, 4/0/0/0, 2 */
+		gr->magic_not_rop_nr = 0x03;
+		break;
+	case 0xc4: /* 460, 3/4/0/0, 4 */
+		gr->magic_not_rop_nr = 0x01;
+		break;
+	case 0xc1: /* 2/0/0/0, 1 */
+		gr->magic_not_rop_nr = 0x01;
+		break;
+	case 0xc8: /* 4/4/3/4, 5 */
+		gr->magic_not_rop_nr = 0x06;
+		break;
+	case 0xce: /* 4/4/0/0, 4 */
+		gr->magic_not_rop_nr = 0x03;
+		break;
+	case 0xcf: /* 4/0/0/0, 3 */
+		gr->magic_not_rop_nr = 0x03;
+		break;
+	case 0xd7:
+	case 0xd9: /* 1/0/0/0, 1 */
+	case 0xea: /* gk20a */
+	case 0x12b: /* gm20b */
+		gr->magic_not_rop_nr = 0x01;
+		break;
 	}
 
-	nv_wr32(priv, 0x400108, 0xffffffff);
-	nv_wr32(priv, 0x400138, 0xffffffff);
-	nv_wr32(priv, 0x400118, 0xffffffff);
-	nv_wr32(priv, 0x400130, 0xffffffff);
-	nv_wr32(priv, 0x40011c, 0xffffffff);
-	nv_wr32(priv, 0x400134, 0xffffffff);
-
-	nv_wr32(priv, 0x400054, 0x34ce3464);
-
-	gf100_gr_zbc_init(priv);
+	return 0;
+}
 
-	return gf100_gr_init_ctxctl(priv);
+int
+gf100_gr_init_(struct nvkm_gr *base)
+{
+	struct gf100_gr *gr = gf100_gr(base);
+	nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
+	return gr->func->init(gr);
 }
 
-static void
+void
 gf100_gr_dtor_fw(struct gf100_gr_fuc *fuc)
 {
 	kfree(fuc->data);
 	fuc->data = NULL;
 }
 
+void *
+gf100_gr_dtor(struct nvkm_gr *base)
+{
+	struct gf100_gr *gr = gf100_gr(base);
+
+	if (gr->func->dtor)
+		gr->func->dtor(gr);
+	kfree(gr->data);
+
+	gf100_gr_dtor_fw(&gr->fuc409c);
+	gf100_gr_dtor_fw(&gr->fuc409d);
+	gf100_gr_dtor_fw(&gr->fuc41ac);
+	gf100_gr_dtor_fw(&gr->fuc41ad);
+
+	nvkm_memory_del(&gr->unk4188b8);
+	nvkm_memory_del(&gr->unk4188b4);
+	return gr;
+}
+
+static const struct nvkm_gr_func
+gf100_gr_ = {
+	.dtor = gf100_gr_dtor,
+	.oneinit = gf100_gr_oneinit,
+	.init = gf100_gr_init_,
+	.intr = gf100_gr_intr,
+	.units = gf100_gr_units,
+	.chan_new = gf100_gr_chan_new,
+	.object_get = gf100_gr_object_get,
+};
+
 int
-gf100_gr_ctor_fw(struct gf100_gr_priv *priv, const char *fwname,
+gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname,
 		 struct gf100_gr_fuc *fuc)
 {
-	struct nvkm_device *device = nv_device(priv);
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
 	const struct firmware *fw;
-	char f[32];
+	char f[64];
+	char cname[16];
 	int ret;
+	int i;
 
-	snprintf(f, sizeof(f), "nouveau/nv%02x_%s", device->chipset, fwname);
-	ret = request_firmware(&fw, f, nv_device_base(device));
+	/* Convert device name to lowercase */
+	strncpy(cname, device->chip->name, sizeof(cname));
+	cname[sizeof(cname) - 1] = '\0';
+	i = strlen(cname);
+	while (i) {
+		--i;
+		cname[i] = tolower(cname[i]);
+	}
+
+	snprintf(f, sizeof(f), "nvidia/%s/%s.bin", cname, fwname);
+	ret = request_firmware(&fw, f, device->dev);
 	if (ret) {
-		snprintf(f, sizeof(f), "nouveau/%s", fwname);
-		ret = request_firmware(&fw, f, nv_device_base(device));
-		if (ret) {
-			nv_error(priv, "failed to load %s\n", fwname);
-			return ret;
-		}
+		nvkm_error(subdev, "failed to load %s\n", fwname);
+		return ret;
 	}
 
 	fuc->size = fw->size;
@@ -1533,126 +1654,150 @@ gf100_gr_ctor_fw(struct gf100_gr_priv *priv, const char *fwname,
 	return (fuc->data != NULL) ? 0 : -ENOMEM;
 }
 
-void
-gf100_gr_dtor(struct nvkm_object *object)
+int
+gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
+	      int index, struct gf100_gr *gr)
 {
-	struct gf100_gr_priv *priv = (void *)object;
+	int ret;
 
-	kfree(priv->data);
+	gr->func = func;
+	gr->firmware = nvkm_boolopt(device->cfgopt, "NvGrUseFW",
+				    func->fecs.ucode == NULL);
 
-	gf100_gr_dtor_fw(&priv->fuc409c);
-	gf100_gr_dtor_fw(&priv->fuc409d);
-	gf100_gr_dtor_fw(&priv->fuc41ac);
-	gf100_gr_dtor_fw(&priv->fuc41ad);
+	ret = nvkm_gr_ctor(&gf100_gr_, device, index, 0x08001000,
+			   gr->firmware || func->fecs.ucode != NULL,
+			   &gr->base);
+	if (ret)
+		return ret;
 
-	nvkm_gpuobj_ref(NULL, &priv->unk4188b8);
-	nvkm_gpuobj_ref(NULL, &priv->unk4188b4);
+	if (gr->firmware) {
+		nvkm_info(&gr->base.engine.subdev, "using external firmware\n");
+		if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
+		    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d) ||
+		    gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
+		    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
+			return -ENODEV;
+	}
 
-	nvkm_gr_destroy(&priv->base);
+	return 0;
 }
 
 int
-gf100_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-	      struct nvkm_oclass *bclass, void *data, u32 size,
-	      struct nvkm_object **pobject)
+gf100_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
+	      int index, struct nvkm_gr **pgr)
 {
-	struct gf100_gr_oclass *oclass = (void *)bclass;
-	struct nvkm_device *device = nv_device(parent);
-	struct gf100_gr_priv *priv;
-	bool use_ext_fw, enable;
-	int ret, i, j;
+	struct gf100_gr *gr;
+	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
+		return -ENOMEM;
+	*pgr = &gr->base;
+	return gf100_gr_ctor(func, device, index, gr);
+}
 
-	use_ext_fw = nvkm_boolopt(device->cfgopt, "NvGrUseFW",
-				  oclass->fecs.ucode == NULL);
-	enable = use_ext_fw || oclass->fecs.ucode != NULL;
+int
+gf100_gr_init(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
+	u32 data[TPC_MAX / 8] = {};
+	u8  tpcnr[GPC_MAX];
+	int gpc, tpc, rop;
+	int i;
 
-	ret = nvkm_gr_create(parent, engine, bclass, enable, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
+	nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(gr->unk4188b4) >> 8);
+	nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(gr->unk4188b8) >> 8);
 
-	nv_subdev(priv)->unit = 0x08001000;
-	nv_subdev(priv)->intr = gf100_gr_intr;
+	gf100_gr_mmio(gr, gr->func->mmio);
 
-	priv->base.units = gf100_gr_units;
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
+	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
+		do {
+			gpc = (gpc + 1) % gr->gpc_nr;
+		} while (!tpcnr[gpc]);
+		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
 
-	if (use_ext_fw) {
-		nv_info(priv, "using external firmware\n");
-		if (gf100_gr_ctor_fw(priv, "fuc409c", &priv->fuc409c) ||
-		    gf100_gr_ctor_fw(priv, "fuc409d", &priv->fuc409d) ||
-		    gf100_gr_ctor_fw(priv, "fuc41ac", &priv->fuc41ac) ||
-		    gf100_gr_ctor_fw(priv, "fuc41ad", &priv->fuc41ad))
-			return -ENODEV;
-		priv->firmware = true;
+		data[i / 8] |= tpc << ((i % 8) * 4);
 	}
 
-	ret = nvkm_gpuobj_new(nv_object(priv), NULL, 0x1000, 256, 0,
-			      &priv->unk4188b4);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(nv_object(priv), NULL, 0x1000, 256, 0,
-			      &priv->unk4188b8);
-	if (ret)
-		return ret;
+	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
+	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
+	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
+	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
 
-	for (i = 0; i < 0x1000; i += 4) {
-		nv_wo32(priv->unk4188b4, i, 0x00000010);
-		nv_wo32(priv->unk4188b8, i, 0x00000010);
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
+			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
+			gr->tpc_total);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
 	}
 
-	priv->rop_nr = (nv_rd32(priv, 0x409604) & 0x001f0000) >> 16;
-	priv->gpc_nr =  nv_rd32(priv, 0x409604) & 0x0000001f;
-	for (i = 0; i < priv->gpc_nr; i++) {
-		priv->tpc_nr[i]  = nv_rd32(priv, GPC_UNIT(i, 0x2608));
-		priv->tpc_total += priv->tpc_nr[i];
-		priv->ppc_nr[i]  = oclass->ppc_nr;
-		for (j = 0; j < priv->ppc_nr[i]; j++) {
-			u8 mask = nv_rd32(priv, GPC_UNIT(i, 0x0c30 + (j * 4)));
-			priv->ppc_tpc_nr[i][j] = hweight8(mask);
+	if (device->chipset != 0xd7)
+		nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918);
+	else
+		nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+
+	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
+
+	nvkm_wr32(device, 0x400500, 0x00010001);
+
+	nvkm_wr32(device, 0x400100, 0xffffffff);
+	nvkm_wr32(device, 0x40013c, 0xffffffff);
+
+	nvkm_wr32(device, 0x409c24, 0x000f0000);
+	nvkm_wr32(device, 0x404000, 0xc0000000);
+	nvkm_wr32(device, 0x404600, 0xc0000000);
+	nvkm_wr32(device, 0x408030, 0xc0000000);
+	nvkm_wr32(device, 0x40601c, 0xc0000000);
+	nvkm_wr32(device, 0x404490, 0xc0000000);
+	nvkm_wr32(device, 0x406018, 0xc0000000);
+	nvkm_wr32(device, 0x405840, 0xc0000000);
+	nvkm_wr32(device, 0x405844, 0x00ffffff);
+	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
+	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
+		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
 		}
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
 	}
 
-	/*XXX: these need figuring out... though it might not even matter */
-	switch (nv_device(priv)->chipset) {
-	case 0xc0:
-		if (priv->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
-			priv->magic_not_rop_nr = 0x07;
-		} else
-		if (priv->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
-			priv->magic_not_rop_nr = 0x05;
-		} else
-		if (priv->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
-			priv->magic_not_rop_nr = 0x06;
-		}
-		break;
-	case 0xc3: /* 450, 4/0/0/0, 2 */
-		priv->magic_not_rop_nr = 0x03;
-		break;
-	case 0xc4: /* 460, 3/4/0/0, 4 */
-		priv->magic_not_rop_nr = 0x01;
-		break;
-	case 0xc1: /* 2/0/0/0, 1 */
-		priv->magic_not_rop_nr = 0x01;
-		break;
-	case 0xc8: /* 4/4/3/4, 5 */
-		priv->magic_not_rop_nr = 0x06;
-		break;
-	case 0xce: /* 4/4/0/0, 4 */
-		priv->magic_not_rop_nr = 0x03;
-		break;
-	case 0xcf: /* 4/0/0/0, 3 */
-		priv->magic_not_rop_nr = 0x03;
-		break;
-	case 0xd7:
-	case 0xd9: /* 1/0/0/0, 1 */
-		priv->magic_not_rop_nr = 0x01;
-		break;
+	for (rop = 0; rop < gr->rop_nr; rop++) {
+		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
 	}
 
-	nv_engine(priv)->cclass = *oclass->cclass;
-	nv_engine(priv)->sclass =  oclass->sclass;
-	return 0;
+	nvkm_wr32(device, 0x400108, 0xffffffff);
+	nvkm_wr32(device, 0x400138, 0xffffffff);
+	nvkm_wr32(device, 0x400118, 0xffffffff);
+	nvkm_wr32(device, 0x400130, 0xffffffff);
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
+
+	nvkm_wr32(device, 0x400054, 0x34ce3464);
+
+	gf100_gr_zbc_init(gr);
+
+	return gf100_gr_init_ctxctl(gr);
 }
 
 #include "fuc/hubgf100.fuc3.h"
@@ -1675,18 +1820,24 @@ gf100_gr_gpccs_ucode = {
 	.data.size = sizeof(gf100_grgpc_data),
 };
 
-struct nvkm_oclass *
-gf100_gr_oclass = &(struct gf100_gr_oclass) {
-	.base.handle = NV_ENGINE(GR, 0xc0),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_ctor,
-		.dtor = gf100_gr_dtor,
-		.init = gf100_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-	.cclass = &gf100_grctx_oclass,
-	.sclass =  gf100_gr_sclass,
+static const struct gf100_gr_func
+gf100_gr = {
+	.init = gf100_gr_init,
 	.mmio = gf100_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
-}.base;
+	.grctx = &gf100_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
+		{ -1, -1, FERMI_A, &gf100_fermi },
+		{ -1, -1, FERMI_COMPUTE_A },
+		{}
+	}
+};
+
+int
+gf100_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gf100_gr_new_(&gf100_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index 8af1a89eda84..4611961b1187 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -21,11 +21,14 @@
  *
  * Authors: Ben Skeggs
  */
-#ifndef __NVC0_GR_H__
-#define __NVC0_GR_H__
-#include <engine/gr.h>
+#ifndef __GF100_GR_H__
+#define __GF100_GR_H__
+#define gf100_gr(p) container_of((p), struct gf100_gr, base)
+#include "priv.h"
 
+#include <core/gpuobj.h>
 #include <subdev/ltc.h>
+#include <subdev/mmu.h>
 
 #define GPC_MAX 32
 #define TPC_MAX (GPC_MAX * 8)
@@ -67,7 +70,8 @@ struct gf100_gr_zbc_depth {
 	u32 l2;
 };
 
-struct gf100_gr_priv {
+struct gf100_gr {
+	const struct gf100_gr_func *func;
 	struct nvkm_gr base;
 
 	struct gf100_gr_fuc fuc409c;
@@ -76,6 +80,15 @@ struct gf100_gr_priv {
 	struct gf100_gr_fuc fuc41ad;
 	bool firmware;
 
+	/*
+	 * Used if the register packs are loaded from NVIDIA fw instead of
+	 * using hardcoded arrays.
+	 */
+	struct gf100_gr_pack *fuc_sw_nonctx;
+	struct gf100_gr_pack *fuc_sw_ctx;
+	struct gf100_gr_pack *fuc_bundle;
+	struct gf100_gr_pack *fuc_method;
+
 	struct gf100_gr_zbc_color zbc_color[NVKM_LTC_MAX_ZBC_CNT];
 	struct gf100_gr_zbc_depth zbc_depth[NVKM_LTC_MAX_ZBC_CNT];
 
@@ -86,8 +99,8 @@ struct gf100_gr_priv {
 	u8 ppc_nr[GPC_MAX];
 	u8 ppc_tpc_nr[GPC_MAX][4];
 
-	struct nvkm_gpuobj *unk4188b4;
-	struct nvkm_gpuobj *unk4188b8;
+	struct nvkm_memory *unk4188b4;
+	struct nvkm_memory *unk4188b8;
 
 	struct gf100_gr_data mmio_data[4];
 	struct gf100_gr_mmio mmio_list[4096/8];
@@ -97,48 +110,65 @@ struct gf100_gr_priv {
 	u8 magic_not_rop_nr;
 };
 
+int gf100_gr_ctor(const struct gf100_gr_func *, struct nvkm_device *,
+		  int, struct gf100_gr *);
+int gf100_gr_new_(const struct gf100_gr_func *, struct nvkm_device *,
+		  int, struct nvkm_gr **);
+void *gf100_gr_dtor(struct nvkm_gr *);
+
+struct gf100_gr_func {
+	void (*dtor)(struct gf100_gr *);
+	int (*init)(struct gf100_gr *);
+	void (*init_gpc_mmu)(struct gf100_gr *);
+	void (*set_hww_esr_report_mask)(struct gf100_gr *);
+	const struct gf100_gr_pack *mmio;
+	struct {
+		struct gf100_gr_ucode *ucode;
+	} fecs;
+	struct {
+		struct gf100_gr_ucode *ucode;
+	} gpccs;
+	int ppc_nr;
+	const struct gf100_grctx_func *grctx;
+	struct nvkm_sclass sclass[];
+};
+
+int gf100_gr_init(struct gf100_gr *);
+
+int gk104_gr_init(struct gf100_gr *);
+
+int gk20a_gr_new_(const struct gf100_gr_func *, struct nvkm_device *,
+		  int, struct nvkm_gr **);
+void gk20a_gr_dtor(struct gf100_gr *);
+int gk20a_gr_init(struct gf100_gr *);
+
+int gm204_gr_init(struct gf100_gr *);
+
+#define gf100_gr_chan(p) container_of((p), struct gf100_gr_chan, object)
+
 struct gf100_gr_chan {
-	struct nvkm_gr_chan base;
+	struct nvkm_object object;
+	struct gf100_gr *gr;
 
-	struct nvkm_gpuobj *mmio;
+	struct nvkm_memory *mmio;
 	struct nvkm_vma mmio_vma;
 	int mmio_nr;
+
 	struct {
-		struct nvkm_gpuobj *mem;
+		struct nvkm_memory *mem;
 		struct nvkm_vma vma;
 	} data[4];
 };
 
-int  gf100_gr_context_ctor(struct nvkm_object *, struct nvkm_object *,
-			     struct nvkm_oclass *, void *, u32,
-			     struct nvkm_object **);
-void gf100_gr_context_dtor(struct nvkm_object *);
-
-void gf100_gr_ctxctl_debug(struct gf100_gr_priv *);
+void gf100_gr_ctxctl_debug(struct gf100_gr *);
 
+void gf100_gr_dtor_fw(struct gf100_gr_fuc *);
+int  gf100_gr_ctor_fw(struct gf100_gr *, const char *,
+		      struct gf100_gr_fuc *);
 u64  gf100_gr_units(struct nvkm_gr *);
-int  gf100_gr_ctor(struct nvkm_object *, struct nvkm_object *,
-		     struct nvkm_oclass *, void *data, u32 size,
-		     struct nvkm_object **);
-void gf100_gr_dtor(struct nvkm_object *);
-int  gf100_gr_init(struct nvkm_object *);
-void gf100_gr_zbc_init(struct gf100_gr_priv *);
-
-int  gk104_gr_ctor(struct nvkm_object *, struct nvkm_object *,
-		     struct nvkm_oclass *, void *data, u32 size,
-		     struct nvkm_object **);
-int  gk104_gr_init(struct nvkm_object *);
-
-int  gm204_gr_init(struct nvkm_object *);
+void gf100_gr_zbc_init(struct gf100_gr *);
 
-extern struct nvkm_ofuncs gf100_fermi_ofuncs;
-
-extern struct nvkm_oclass gf100_gr_sclass[];
-extern struct nvkm_omthds gf100_gr_9097_omthds[];
-extern struct nvkm_omthds gf100_gr_90c0_omthds[];
-extern struct nvkm_oclass gf110_gr_sclass[];
-extern struct nvkm_oclass gk110_gr_sclass[];
-extern struct nvkm_oclass gm204_gr_sclass[];
+extern const struct nvkm_object_func gf100_fermi;
 
 struct gf100_gr_init {
 	u32 addr;
@@ -167,24 +197,11 @@ extern struct gf100_gr_ucode gf100_gr_gpccs_ucode;
 extern struct gf100_gr_ucode gk110_gr_fecs_ucode;
 extern struct gf100_gr_ucode gk110_gr_gpccs_ucode;
 
-struct gf100_gr_oclass {
-	struct nvkm_oclass base;
-	struct nvkm_oclass **cclass;
-	struct nvkm_oclass *sclass;
-	const struct gf100_gr_pack *mmio;
-	struct {
-		struct gf100_gr_ucode *ucode;
-	} fecs;
-	struct {
-		struct gf100_gr_ucode *ucode;
-	} gpccs;
-	int ppc_nr;
-};
-
-void gf100_gr_mmio(struct gf100_gr_priv *, const struct gf100_gr_pack *);
-void gf100_gr_icmd(struct gf100_gr_priv *, const struct gf100_gr_pack *);
-void gf100_gr_mthd(struct gf100_gr_priv *, const struct gf100_gr_pack *);
-int  gf100_gr_init_ctxctl(struct gf100_gr_priv *);
+int  gf100_gr_wait_idle(struct gf100_gr *);
+void gf100_gr_mmio(struct gf100_gr *, const struct gf100_gr_pack *);
+void gf100_gr_icmd(struct gf100_gr *, const struct gf100_gr_pack *);
+void gf100_gr_mthd(struct gf100_gr *, const struct gf100_gr_pack *);
+int  gf100_gr_init_ctxctl(struct gf100_gr *);
 
 /* register init value lists */
 
@@ -260,7 +277,7 @@ extern const struct gf100_gr_init gm107_gr_init_tex_0[];
 extern const struct gf100_gr_init gm107_gr_init_l1c_0[];
 extern const struct gf100_gr_init gm107_gr_init_wwdx_0[];
 extern const struct gf100_gr_init gm107_gr_init_cbm_0[];
-void gm107_gr_init_bios(struct gf100_gr_priv *);
+void gm107_gr_init_bios(struct gf100_gr *);
 
 extern const struct gf100_gr_pack gm204_gr_pack_mmio[];
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
index 20d3b85db3b5..8f253e0a22f4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
@@ -24,6 +24,8 @@
 #include "gf100.h"
 #include "ctxgf100.h"
 
+#include <nvif/class.h>
+
 /*******************************************************************************
  * PGRAPH register lists
  ******************************************************************************/
@@ -110,18 +112,24 @@ gf104_gr_pack_mmio[] = {
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
-struct nvkm_oclass *
-gf104_gr_oclass = &(struct gf100_gr_oclass) {
-	.base.handle = NV_ENGINE(GR, 0xc3),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_ctor,
-		.dtor = gf100_gr_dtor,
-		.init = gf100_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-	.cclass = &gf104_grctx_oclass,
-	.sclass = gf100_gr_sclass,
+static const struct gf100_gr_func
+gf104_gr = {
+	.init = gf100_gr_init,
 	.mmio = gf104_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
-}.base;
+	.grctx = &gf104_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
+		{ -1, -1, FERMI_A, &gf100_fermi },
+		{ -1, -1, FERMI_COMPUTE_A },
+		{}
+	}
+};
+
+int
+gf104_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gf100_gr_new_(&gf104_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
index 8df73421c78c..815a5aafa245 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
@@ -27,20 +27,6 @@
 #include <nvif/class.h>
 
 /*******************************************************************************
- * Graphics object classes
- ******************************************************************************/
-
-static struct nvkm_oclass
-gf108_gr_sclass[] = {
-	{ FERMI_TWOD_A, &nvkm_object_ofuncs },
-	{ FERMI_MEMORY_TO_MEMORY_FORMAT_A, &nvkm_object_ofuncs },
-	{ FERMI_A, &gf100_fermi_ofuncs, gf100_gr_9097_omthds },
-	{ FERMI_B, &gf100_fermi_ofuncs, gf100_gr_9097_omthds },
-	{ FERMI_COMPUTE_A, &nvkm_object_ofuncs, gf100_gr_90c0_omthds },
-	{}
-};
-
-/*******************************************************************************
  * PGRAPH register lists
  ******************************************************************************/
 
@@ -117,18 +103,25 @@ gf108_gr_pack_mmio[] = {
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
-struct nvkm_oclass *
-gf108_gr_oclass = &(struct gf100_gr_oclass) {
-	.base.handle = NV_ENGINE(GR, 0xc1),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_ctor,
-		.dtor = gf100_gr_dtor,
-		.init = gf100_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-	.cclass = &gf108_grctx_oclass,
-	.sclass = gf108_gr_sclass,
+static const struct gf100_gr_func
+gf108_gr = {
+	.init = gf100_gr_init,
 	.mmio = gf108_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
-}.base;
+	.grctx = &gf108_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
+		{ -1, -1, FERMI_A, &gf100_fermi },
+		{ -1, -1, FERMI_B, &gf100_fermi },
+		{ -1, -1, FERMI_COMPUTE_A },
+		{}
+	}
+};
+
+int
+gf108_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gf100_gr_new_(&gf108_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
index ef76e2dd1d31..d13187409d68 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
@@ -27,21 +27,6 @@
 #include <nvif/class.h>
 
 /*******************************************************************************
- * Graphics object classes
- ******************************************************************************/
-
-struct nvkm_oclass
-gf110_gr_sclass[] = {
-	{ FERMI_TWOD_A, &nvkm_object_ofuncs },
-	{ FERMI_MEMORY_TO_MEMORY_FORMAT_A, &nvkm_object_ofuncs },
-	{ FERMI_A, &gf100_fermi_ofuncs, gf100_gr_9097_omthds },
-	{ FERMI_B, &gf100_fermi_ofuncs, gf100_gr_9097_omthds },
-	{ FERMI_C, &gf100_fermi_ofuncs, gf100_gr_9097_omthds },
-	{ FERMI_COMPUTE_A, &nvkm_object_ofuncs, gf100_gr_90c0_omthds },
-	{}
-};
-
-/*******************************************************************************
  * PGRAPH register lists
  ******************************************************************************/
 
@@ -99,18 +84,26 @@ gf110_gr_pack_mmio[] = {
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
-struct nvkm_oclass *
-gf110_gr_oclass = &(struct gf100_gr_oclass) {
-	.base.handle = NV_ENGINE(GR, 0xc8),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_ctor,
-		.dtor = gf100_gr_dtor,
-		.init = gf100_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-	.cclass = &gf110_grctx_oclass,
-	.sclass = gf110_gr_sclass,
+static const struct gf100_gr_func
+gf110_gr = {
+	.init = gf100_gr_init,
 	.mmio = gf110_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
-}.base;
+	.grctx = &gf110_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
+		{ -1, -1, FERMI_A, &gf100_fermi },
+		{ -1, -1, FERMI_B, &gf100_fermi },
+		{ -1, -1, FERMI_C, &gf100_fermi },
+		{ -1, -1, FERMI_COMPUTE_A },
+		{}
+	}
+};
+
+int
+gf110_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gf100_gr_new_(&gf110_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
index 871ac5f806f6..28483d8bf3d2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
@@ -24,6 +24,8 @@
 #include "gf100.h"
 #include "ctxgf100.h"
 
+#include <nvif/class.h>
+
 /*******************************************************************************
  * PGRAPH register lists
  ******************************************************************************/
@@ -118,19 +120,27 @@ gf117_gr_gpccs_ucode = {
 	.data.size = sizeof(gf117_grgpc_data),
 };
 
-struct nvkm_oclass *
-gf117_gr_oclass = &(struct gf100_gr_oclass) {
-	.base.handle = NV_ENGINE(GR, 0xd7),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_ctor,
-		.dtor = gf100_gr_dtor,
-		.init = gf100_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-	.cclass = &gf117_grctx_oclass,
-	.sclass = gf110_gr_sclass,
+static const struct gf100_gr_func
+gf117_gr = {
+	.init = gf100_gr_init,
 	.mmio = gf117_gr_pack_mmio,
 	.fecs.ucode = &gf117_gr_fecs_ucode,
 	.gpccs.ucode = &gf117_gr_gpccs_ucode,
 	.ppc_nr = 1,
-}.base;
+	.grctx = &gf117_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
+		{ -1, -1, FERMI_A, &gf100_fermi },
+		{ -1, -1, FERMI_B, &gf100_fermi },
+		{ -1, -1, FERMI_C, &gf100_fermi },
+		{ -1, -1, FERMI_COMPUTE_A },
+		{}
+	}
+};
+
+int
+gf117_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gf100_gr_new_(&gf117_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
index e6dd651e2636..9811a72e0313 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
@@ -24,6 +24,8 @@
 #include "gf100.h"
 #include "ctxgf100.h"
 
+#include <nvif/class.h>
+
 /*******************************************************************************
  * PGRAPH register lists
  ******************************************************************************/
@@ -173,18 +175,26 @@ gf119_gr_pack_mmio[] = {
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
-struct nvkm_oclass *
-gf119_gr_oclass = &(struct gf100_gr_oclass) {
-	.base.handle = NV_ENGINE(GR, 0xd9),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_ctor,
-		.dtor = gf100_gr_dtor,
-		.init = gf100_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-	.cclass = &gf119_grctx_oclass,
-	.sclass = gf110_gr_sclass,
+static const struct gf100_gr_func
+gf119_gr = {
+	.init = gf100_gr_init,
 	.mmio = gf119_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
-}.base;
+	.grctx = &gf119_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
+		{ -1, -1, FERMI_A, &gf100_fermi },
+		{ -1, -1, FERMI_B, &gf100_fermi },
+		{ -1, -1, FERMI_C, &gf100_fermi },
+		{ -1, -1, FERMI_COMPUTE_A },
+		{}
+	}
+};
+
+int
+gf119_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gf100_gr_new_(&gf119_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
index 46f7844eca70..abf54928a1a4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
@@ -24,24 +24,9 @@
 #include "gf100.h"
 #include "ctxgf100.h"
 
-#include <subdev/pmu.h>
-
 #include <nvif/class.h>
 
 /*******************************************************************************
- * Graphics object classes
- ******************************************************************************/
-
-static struct nvkm_oclass
-gk104_gr_sclass[] = {
-	{ FERMI_TWOD_A, &nvkm_object_ofuncs },
-	{ KEPLER_INLINE_TO_MEMORY_A, &nvkm_object_ofuncs },
-	{ KEPLER_A, &gf100_fermi_ofuncs, gf100_gr_9097_omthds },
-	{ KEPLER_COMPUTE_A, &nvkm_object_ofuncs, gf100_gr_90c0_omthds },
-	{}
-};
-
-/*******************************************************************************
  * PGRAPH register lists
  ******************************************************************************/
 
@@ -193,132 +178,112 @@ gk104_gr_pack_mmio[] = {
  ******************************************************************************/
 
 int
-gk104_gr_init(struct nvkm_object *object)
+gk104_gr_init(struct gf100_gr *gr)
 {
-	struct gf100_gr_oclass *oclass = (void *)object->oclass;
-	struct gf100_gr_priv *priv = (void *)object;
-	struct nvkm_pmu *pmu = nvkm_pmu(priv);
-	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, priv->tpc_total);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
 	u32 data[TPC_MAX / 8] = {};
 	u8  tpcnr[GPC_MAX];
 	int gpc, tpc, rop;
-	int ret, i;
-
-	if (pmu)
-		pmu->pgob(pmu, false);
+	int i;
 
-	ret = nvkm_gr_init(&priv->base);
-	if (ret)
-		return ret;
+	nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(gr->unk4188b4) >> 8);
+	nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(gr->unk4188b8) >> 8);
 
-	nv_wr32(priv, GPC_BCAST(0x0880), 0x00000000);
-	nv_wr32(priv, GPC_BCAST(0x08a4), 0x00000000);
-	nv_wr32(priv, GPC_BCAST(0x0888), 0x00000000);
-	nv_wr32(priv, GPC_BCAST(0x088c), 0x00000000);
-	nv_wr32(priv, GPC_BCAST(0x0890), 0x00000000);
-	nv_wr32(priv, GPC_BCAST(0x0894), 0x00000000);
-	nv_wr32(priv, GPC_BCAST(0x08b4), priv->unk4188b4->addr >> 8);
-	nv_wr32(priv, GPC_BCAST(0x08b8), priv->unk4188b8->addr >> 8);
+	gf100_gr_mmio(gr, gr->func->mmio);
 
-	gf100_gr_mmio(priv, oclass->mmio);
-
-	nv_wr32(priv, GPC_UNIT(0, 0x3018), 0x00000001);
+	nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
 
 	memset(data, 0x00, sizeof(data));
-	memcpy(tpcnr, priv->tpc_nr, sizeof(priv->tpc_nr));
-	for (i = 0, gpc = -1; i < priv->tpc_total; i++) {
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
+	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
 		do {
-			gpc = (gpc + 1) % priv->gpc_nr;
+			gpc = (gpc + 1) % gr->gpc_nr;
 		} while (!tpcnr[gpc]);
-		tpc = priv->tpc_nr[gpc] - tpcnr[gpc]--;
+		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
 
 		data[i / 8] |= tpc << ((i % 8) * 4);
 	}
 
-	nv_wr32(priv, GPC_BCAST(0x0980), data[0]);
-	nv_wr32(priv, GPC_BCAST(0x0984), data[1]);
-	nv_wr32(priv, GPC_BCAST(0x0988), data[2]);
-	nv_wr32(priv, GPC_BCAST(0x098c), data[3]);
-
-	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0914),
-			priv->magic_not_rop_nr << 8 | priv->tpc_nr[gpc]);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0910), 0x00040000 |
-			priv->tpc_total);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0918), magicgpc918);
+	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
+	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
+	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
+	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
+			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
+			gr->tpc_total);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
 	}
 
-	nv_wr32(priv, GPC_BCAST(0x3fd4), magicgpc918);
-	nv_wr32(priv, GPC_BCAST(0x08ac), nv_rd32(priv, 0x100800));
-
-	nv_wr32(priv, 0x400500, 0x00010001);
-
-	nv_wr32(priv, 0x400100, 0xffffffff);
-	nv_wr32(priv, 0x40013c, 0xffffffff);
-
-	nv_wr32(priv, 0x409ffc, 0x00000000);
-	nv_wr32(priv, 0x409c14, 0x00003e3e);
-	nv_wr32(priv, 0x409c24, 0x000f0001);
-	nv_wr32(priv, 0x404000, 0xc0000000);
-	nv_wr32(priv, 0x404600, 0xc0000000);
-	nv_wr32(priv, 0x408030, 0xc0000000);
-	nv_wr32(priv, 0x404490, 0xc0000000);
-	nv_wr32(priv, 0x406018, 0xc0000000);
-	nv_wr32(priv, 0x407020, 0x40000000);
-	nv_wr32(priv, 0x405840, 0xc0000000);
-	nv_wr32(priv, 0x405844, 0x00ffffff);
-	nv_mask(priv, 0x419cc0, 0x00000008, 0x00000008);
-	nv_mask(priv, 0x419eb4, 0x00001000, 0x00001000);
-
-	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-		nv_wr32(priv, GPC_UNIT(gpc, 0x3038), 0xc0000000);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0420), 0xc0000000);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0900), 0xc0000000);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x1028), 0xc0000000);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0824), 0xc0000000);
-		for (tpc = 0; tpc < priv->tpc_nr[gpc]; tpc++) {
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
+	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
+
+	nvkm_wr32(device, 0x400500, 0x00010001);
+
+	nvkm_wr32(device, 0x400100, 0xffffffff);
+	nvkm_wr32(device, 0x40013c, 0xffffffff);
+
+	nvkm_wr32(device, 0x409ffc, 0x00000000);
+	nvkm_wr32(device, 0x409c14, 0x00003e3e);
+	nvkm_wr32(device, 0x409c24, 0x000f0001);
+	nvkm_wr32(device, 0x404000, 0xc0000000);
+	nvkm_wr32(device, 0x404600, 0xc0000000);
+	nvkm_wr32(device, 0x408030, 0xc0000000);
+	nvkm_wr32(device, 0x404490, 0xc0000000);
+	nvkm_wr32(device, 0x406018, 0xc0000000);
+	nvkm_wr32(device, 0x407020, 0x40000000);
+	nvkm_wr32(device, 0x405840, 0xc0000000);
+	nvkm_wr32(device, 0x405844, 0x00ffffff);
+	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
+	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x3038), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
+		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
 		}
-		nv_wr32(priv, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
 	}
 
-	for (rop = 0; rop < priv->rop_nr; rop++) {
-		nv_wr32(priv, ROP_UNIT(rop, 0x144), 0xc0000000);
-		nv_wr32(priv, ROP_UNIT(rop, 0x070), 0xc0000000);
-		nv_wr32(priv, ROP_UNIT(rop, 0x204), 0xffffffff);
-		nv_wr32(priv, ROP_UNIT(rop, 0x208), 0xffffffff);
+	for (rop = 0; rop < gr->rop_nr; rop++) {
+		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
 	}
 
-	nv_wr32(priv, 0x400108, 0xffffffff);
-	nv_wr32(priv, 0x400138, 0xffffffff);
-	nv_wr32(priv, 0x400118, 0xffffffff);
-	nv_wr32(priv, 0x400130, 0xffffffff);
-	nv_wr32(priv, 0x40011c, 0xffffffff);
-	nv_wr32(priv, 0x400134, 0xffffffff);
+	nvkm_wr32(device, 0x400108, 0xffffffff);
+	nvkm_wr32(device, 0x400138, 0xffffffff);
+	nvkm_wr32(device, 0x400118, 0xffffffff);
+	nvkm_wr32(device, 0x400130, 0xffffffff);
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
 
-	nv_wr32(priv, 0x400054, 0x34ce3464);
+	nvkm_wr32(device, 0x400054, 0x34ce3464);
 
-	gf100_gr_zbc_init(priv);
+	gf100_gr_zbc_init(gr);
 
-	return gf100_gr_init_ctxctl(priv);
-}
-
-int
-gk104_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-	      struct nvkm_oclass *oclass, void *data, u32 size,
-	      struct nvkm_object **pobject)
-{
-	struct nvkm_pmu *pmu = nvkm_pmu(parent);
-	if (pmu)
-		pmu->pgob(pmu, false);
-	return gf100_gr_ctor(parent, engine, oclass, data, size, pobject);
+	return gf100_gr_init_ctxctl(gr);
 }
 
 #include "fuc/hubgk104.fuc3.h"
@@ -341,19 +306,25 @@ gk104_gr_gpccs_ucode = {
 	.data.size = sizeof(gk104_grgpc_data),
 };
 
-struct nvkm_oclass *
-gk104_gr_oclass = &(struct gf100_gr_oclass) {
-	.base.handle = NV_ENGINE(GR, 0xe4),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gk104_gr_ctor,
-		.dtor = gf100_gr_dtor,
-		.init = gk104_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-	.cclass = &gk104_grctx_oclass,
-	.sclass = gk104_gr_sclass,
+static const struct gf100_gr_func
+gk104_gr = {
+	.init = gk104_gr_init,
 	.mmio = gk104_gr_pack_mmio,
 	.fecs.ucode = &gk104_gr_fecs_ucode,
 	.gpccs.ucode = &gk104_gr_gpccs_ucode,
 	.ppc_nr = 1,
-}.base;
+	.grctx = &gk104_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_A },
+		{ -1, -1, KEPLER_A, &gf100_fermi },
+		{ -1, -1, KEPLER_COMPUTE_A },
+		{}
+	}
+};
+
+int
+gk104_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gf100_gr_new_(&gk104_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
index f4cd8e5546af..32aa2946e7b7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
@@ -29,19 +29,6 @@
 #include <nvif/class.h>
 
 /*******************************************************************************
- * Graphics object classes
- ******************************************************************************/
-
-struct nvkm_oclass
-gk110_gr_sclass[] = {
-	{ FERMI_TWOD_A, &nvkm_object_ofuncs },
-	{ KEPLER_INLINE_TO_MEMORY_B, &nvkm_object_ofuncs },
-	{ KEPLER_B, &gf100_fermi_ofuncs, gf100_gr_9097_omthds },
-	{ KEPLER_COMPUTE_B, &nvkm_object_ofuncs, gf100_gr_90c0_omthds },
-	{}
-};
-
-/*******************************************************************************
  * PGRAPH register lists
  ******************************************************************************/
 
@@ -193,19 +180,25 @@ gk110_gr_gpccs_ucode = {
 	.data.size = sizeof(gk110_grgpc_data),
 };
 
-struct nvkm_oclass *
-gk110_gr_oclass = &(struct gf100_gr_oclass) {
-	.base.handle = NV_ENGINE(GR, 0xf0),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gk104_gr_ctor,
-		.dtor = gf100_gr_dtor,
-		.init = gk104_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-	.cclass = &gk110_grctx_oclass,
-	.sclass =  gk110_gr_sclass,
+static const struct gf100_gr_func
+gk110_gr = {
+	.init = gk104_gr_init,
 	.mmio = gk110_gr_pack_mmio,
 	.fecs.ucode = &gk110_gr_fecs_ucode,
 	.gpccs.ucode = &gk110_gr_gpccs_ucode,
 	.ppc_nr = 2,
-}.base;
+	.grctx = &gk110_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+		{ -1, -1, KEPLER_B, &gf100_fermi },
+		{ -1, -1, KEPLER_COMPUTE_B },
+		{}
+	}
+};
+
+int
+gk110_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gf100_gr_new_(&gk110_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
index 9ff9eab0ccaf..22f88afbf35f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
@@ -24,6 +24,8 @@
 #include "gf100.h"
 #include "ctxgf100.h"
 
+#include <nvif/class.h>
+
 /*******************************************************************************
  * PGRAPH register lists
  ******************************************************************************/
@@ -98,19 +100,25 @@ gk110b_gr_pack_mmio[] = {
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
-struct nvkm_oclass *
-gk110b_gr_oclass = &(struct gf100_gr_oclass) {
-	.base.handle = NV_ENGINE(GR, 0xf1),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gk104_gr_ctor,
-		.dtor = gf100_gr_dtor,
-		.init = gk104_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-	.cclass = &gk110b_grctx_oclass,
-	.sclass =  gk110_gr_sclass,
+static const struct gf100_gr_func
+gk110b_gr = {
+	.init = gk104_gr_init,
 	.mmio = gk110b_gr_pack_mmio,
 	.fecs.ucode = &gk110_gr_fecs_ucode,
 	.gpccs.ucode = &gk110_gr_gpccs_ucode,
 	.ppc_nr = 2,
-}.base;
+	.grctx = &gk110b_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+		{ -1, -1, KEPLER_B, &gf100_fermi },
+		{ -1, -1, KEPLER_COMPUTE_B },
+		{}
+	}
+};
+
+int
+gk110b_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gf100_gr_new_(&gk110b_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
index 85f44a3d5d11..ee7554fc87dc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
@@ -29,19 +29,6 @@
 #include <nvif/class.h>
 
 /*******************************************************************************
- * Graphics object classes
- ******************************************************************************/
-
-static struct nvkm_oclass
-gk208_gr_sclass[] = {
-	{ FERMI_TWOD_A, &nvkm_object_ofuncs },
-	{ KEPLER_INLINE_TO_MEMORY_B, &nvkm_object_ofuncs },
-	{ KEPLER_B, &gf100_fermi_ofuncs },
-	{ KEPLER_COMPUTE_B, &nvkm_object_ofuncs },
-	{}
-};
-
-/*******************************************************************************
  * PGRAPH register lists
  ******************************************************************************/
 
@@ -172,19 +159,25 @@ gk208_gr_gpccs_ucode = {
 	.data.size = sizeof(gk208_grgpc_data),
 };
 
-struct nvkm_oclass *
-gk208_gr_oclass = &(struct gf100_gr_oclass) {
-	.base.handle = NV_ENGINE(GR, 0x08),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gk104_gr_ctor,
-		.dtor = gf100_gr_dtor,
-		.init = gk104_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-	.cclass = &gk208_grctx_oclass,
-	.sclass =  gk208_gr_sclass,
+static const struct gf100_gr_func
+gk208_gr = {
+	.init = gk104_gr_init,
 	.mmio = gk208_gr_pack_mmio,
 	.fecs.ucode = &gk208_gr_fecs_ucode,
 	.gpccs.ucode = &gk208_gr_gpccs_ucode,
 	.ppc_nr = 1,
-}.base;
+	.grctx = &gk208_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+		{ -1, -1, KEPLER_B, &gf100_fermi },
+		{ -1, -1, KEPLER_COMPUTE_B },
+		{}
+	}
+};
+
+int
+gk208_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gf100_gr_new_(&gk208_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
index 40ff5eb9180c..b8758d3b8b51 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -22,28 +22,335 @@
 #include "gf100.h"
 #include "ctxgf100.h"
 
+#include <subdev/timer.h>
+
 #include <nvif/class.h>
 
-static struct nvkm_oclass
-gk20a_gr_sclass[] = {
-	{ FERMI_TWOD_A, &nvkm_object_ofuncs },
-	{ KEPLER_INLINE_TO_MEMORY_A, &nvkm_object_ofuncs },
-	{ KEPLER_C, &gf100_fermi_ofuncs, gf100_gr_9097_omthds },
-	{ KEPLER_COMPUTE_A, &nvkm_object_ofuncs, gf100_gr_90c0_omthds },
-	{}
+static void
+gk20a_gr_init_dtor(struct gf100_gr_pack *pack)
+{
+	vfree(pack);
+}
+
+struct gk20a_fw_av
+{
+	u32 addr;
+	u32 data;
+};
+
+static struct gf100_gr_pack *
+gk20a_gr_av_to_init(struct gf100_gr_fuc *fuc)
+{
+	struct gf100_gr_init *init;
+	struct gf100_gr_pack *pack;
+	const int nent = (fuc->size / sizeof(struct gk20a_fw_av));
+	int i;
+
+	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
+	if (!pack)
+		return ERR_PTR(-ENOMEM);
+
+	init = (void *)(pack + 2);
+
+	pack[0].init = init;
+
+	for (i = 0; i < nent; i++) {
+		struct gf100_gr_init *ent = &init[i];
+		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i];
+
+		ent->addr = av->addr;
+		ent->data = av->data;
+		ent->count = 1;
+		ent->pitch = 1;
+	}
+
+	return pack;
+}
+
+struct gk20a_fw_aiv
+{
+	u32 addr;
+	u32 index;
+	u32 data;
 };
 
-struct nvkm_oclass *
-gk20a_gr_oclass = &(struct gf100_gr_oclass) {
-	.base.handle = NV_ENGINE(GR, 0xea),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_ctor,
-		.dtor = gf100_gr_dtor,
-		.init = gk104_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-	.cclass = &gk20a_grctx_oclass,
-	.sclass = gk20a_gr_sclass,
-	.mmio = gk104_gr_pack_mmio,
+static struct gf100_gr_pack *
+gk20a_gr_aiv_to_init(struct gf100_gr_fuc *fuc)
+{
+	struct gf100_gr_init *init;
+	struct gf100_gr_pack *pack;
+	const int nent = (fuc->size / sizeof(struct gk20a_fw_aiv));
+	int i;
+
+	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
+	if (!pack)
+		return ERR_PTR(-ENOMEM);
+
+	init = (void *)(pack + 2);
+
+	pack[0].init = init;
+
+	for (i = 0; i < nent; i++) {
+		struct gf100_gr_init *ent = &init[i];
+		struct gk20a_fw_aiv *av = &((struct gk20a_fw_aiv *)fuc->data)[i];
+
+		ent->addr = av->addr;
+		ent->data = av->data;
+		ent->count = 1;
+		ent->pitch = 1;
+	}
+
+	return pack;
+}
+
+static struct gf100_gr_pack *
+gk20a_gr_av_to_method(struct gf100_gr_fuc *fuc)
+{
+	struct gf100_gr_init *init;
+	struct gf100_gr_pack *pack;
+	/* We don't suppose we will initialize more than 16 classes here... */
+	static const unsigned int max_classes = 16;
+	const int nent = (fuc->size / sizeof(struct gk20a_fw_av));
+	int i, classidx = 0;
+	u32 prevclass = 0;
+
+	pack = vzalloc((sizeof(*pack) * max_classes) +
+		       (sizeof(*init) * (nent + 1)));
+	if (!pack)
+		return ERR_PTR(-ENOMEM);
+
+	init = (void *)(pack + max_classes);
+
+	for (i = 0; i < nent; i++) {
+		struct gf100_gr_init *ent = &init[i];
+		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i];
+		u32 class = av->addr & 0xffff;
+		u32 addr = (av->addr & 0xffff0000) >> 14;
+
+		if (prevclass != class) {
+			pack[classidx].init = ent;
+			pack[classidx].type = class;
+			prevclass = class;
+			if (++classidx >= max_classes) {
+				vfree(pack);
+				return ERR_PTR(-ENOSPC);
+			}
+		}
+
+		ent->addr = addr;
+		ent->data = av->data;
+		ent->count = 1;
+		ent->pitch = 1;
+	}
+
+	return pack;
+}
+
+static int
+gk20a_gr_wait_mem_scrubbing(struct gf100_gr *gr)
+{
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+
+	if (nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x40910c) & 0x00000006))
+			break;
+	) < 0) {
+		nvkm_error(subdev, "FECS mem scrubbing timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	if (nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x41a10c) & 0x00000006))
+			break;
+	) < 0) {
+		nvkm_error(subdev, "GPCCS mem scrubbing timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static void
+gk20a_gr_set_hww_esr_report_mask(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, 0x419e44, 0x1ffffe);
+	nvkm_wr32(device, 0x419e4c, 0x7f);
+}
+
+int
+gk20a_gr_init(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
+	u32 data[TPC_MAX / 8] = {};
+	u8  tpcnr[GPC_MAX];
+	int gpc, tpc;
+	int ret, i;
+
+	/* Clear SCC RAM */
+	nvkm_wr32(device, 0x40802c, 0x1);
+
+	gf100_gr_mmio(gr, gr->fuc_sw_nonctx);
+
+	ret = gk20a_gr_wait_mem_scrubbing(gr);
+	if (ret)
+		return ret;
+
+	ret = gf100_gr_wait_idle(gr);
+	if (ret)
+		return ret;
+
+	/* MMU debug buffer */
+	nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(gr->unk4188b4) >> 8);
+	nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(gr->unk4188b8) >> 8);
+
+	if (gr->func->init_gpc_mmu)
+		gr->func->init_gpc_mmu(gr);
+
+	/* Set the PE as stream master */
+	nvkm_mask(device, 0x503018, 0x1, 0x1);
+
+	/* Zcull init */
+	memset(data, 0x00, sizeof(data));
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
+	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
+		do {
+			gpc = (gpc + 1) % gr->gpc_nr;
+		} while (!tpcnr[gpc]);
+		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
+
+		data[i / 8] |= tpc << ((i % 8) * 4);
+	}
+
+	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
+	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
+	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
+	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
+			  gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
+			  gr->tpc_total);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
+	}
+
+	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+
+	/* Enable FIFO access */
+	nvkm_wr32(device, 0x400500, 0x00010001);
+
+	/* Enable interrupts */
+	nvkm_wr32(device, 0x400100, 0xffffffff);
+	nvkm_wr32(device, 0x40013c, 0xffffffff);
+
+	/* Enable FECS error interrupts */
+	nvkm_wr32(device, 0x409c24, 0x000f0000);
+
+	/* Enable hardware warning exceptions */
+	nvkm_wr32(device, 0x404000, 0xc0000000);
+	nvkm_wr32(device, 0x404600, 0xc0000000);
+
+	if (gr->func->set_hww_esr_report_mask)
+		gr->func->set_hww_esr_report_mask(gr);
+
+	/* Enable TPC exceptions per GPC */
+	nvkm_wr32(device, 0x419d0c, 0x2);
+	nvkm_wr32(device, 0x41ac94, (((1 << gr->tpc_total) - 1) & 0xff) << 16);
+
+	/* Reset and enable all exceptions */
+	nvkm_wr32(device, 0x400108, 0xffffffff);
+	nvkm_wr32(device, 0x400138, 0xffffffff);
+	nvkm_wr32(device, 0x400118, 0xffffffff);
+	nvkm_wr32(device, 0x400130, 0xffffffff);
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
+
+	gf100_gr_zbc_init(gr);
+
+	return gf100_gr_init_ctxctl(gr);
+}
+
+void
+gk20a_gr_dtor(struct gf100_gr *gr)
+{
+	gk20a_gr_init_dtor(gr->fuc_method);
+	gk20a_gr_init_dtor(gr->fuc_bundle);
+	gk20a_gr_init_dtor(gr->fuc_sw_ctx);
+	gk20a_gr_init_dtor(gr->fuc_sw_nonctx);
+}
+
+int
+gk20a_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
+	      int index, struct nvkm_gr **pgr)
+{
+	struct gf100_gr_fuc fuc;
+	struct gf100_gr *gr;
+	int ret;
+
+	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
+		return -ENOMEM;
+	*pgr = &gr->base;
+
+	ret = gf100_gr_ctor(func, device, index, gr);
+	if (ret)
+		return ret;
+
+	ret = gf100_gr_ctor_fw(gr, "sw_nonctx", &fuc);
+	if (ret)
+		return ret;
+	gr->fuc_sw_nonctx = gk20a_gr_av_to_init(&fuc);
+	gf100_gr_dtor_fw(&fuc);
+	if (IS_ERR(gr->fuc_sw_nonctx))
+		return PTR_ERR(gr->fuc_sw_nonctx);
+
+	ret = gf100_gr_ctor_fw(gr, "sw_ctx", &fuc);
+	if (ret)
+		return ret;
+	gr->fuc_sw_ctx = gk20a_gr_aiv_to_init(&fuc);
+	gf100_gr_dtor_fw(&fuc);
+	if (IS_ERR(gr->fuc_sw_ctx))
+		return PTR_ERR(gr->fuc_sw_ctx);
+
+	ret = gf100_gr_ctor_fw(gr, "sw_bundle_init", &fuc);
+	if (ret)
+		return ret;
+	gr->fuc_bundle = gk20a_gr_av_to_init(&fuc);
+	gf100_gr_dtor_fw(&fuc);
+	if (IS_ERR(gr->fuc_bundle))
+		return PTR_ERR(gr->fuc_bundle);
+
+	ret = gf100_gr_ctor_fw(gr, "sw_method_init", &fuc);
+	if (ret)
+		return ret;
+	gr->fuc_method = gk20a_gr_av_to_method(&fuc);
+	gf100_gr_dtor_fw(&fuc);
+	if (IS_ERR(gr->fuc_method))
+		return PTR_ERR(gr->fuc_method);
+
+	return 0;
+}
+
+static const struct gf100_gr_func
+gk20a_gr = {
+	.dtor = gk20a_gr_dtor,
+	.init = gk20a_gr_init,
+	.set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask,
 	.ppc_nr = 1,
-}.base;
+	.grctx = &gk20a_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_A },
+		{ -1, -1, KEPLER_C, &gf100_fermi },
+		{ -1, -1, KEPLER_COMPUTE_A },
+		{}
+	}
+};
+
+int
+gk20a_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gk20a_gr_new_(&gk20a_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
index a5ebd459bc24..56e960212e5d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
@@ -30,19 +30,6 @@
 #include <nvif/class.h>
 
 /*******************************************************************************
- * Graphics object classes
- ******************************************************************************/
-
-static struct nvkm_oclass
-gm107_gr_sclass[] = {
-	{ FERMI_TWOD_A, &nvkm_object_ofuncs },
-	{ KEPLER_INLINE_TO_MEMORY_B, &nvkm_object_ofuncs },
-	{ MAXWELL_A, &gf100_fermi_ofuncs, gf100_gr_9097_omthds },
-	{ MAXWELL_COMPUTE_A, &nvkm_object_ofuncs, gf100_gr_90c0_omthds },
-	{}
-};
-
-/*******************************************************************************
  * PGRAPH register lists
  ******************************************************************************/
 
@@ -292,7 +279,7 @@ gm107_gr_pack_mmio[] = {
  ******************************************************************************/
 
 void
-gm107_gr_init_bios(struct gf100_gr_priv *priv)
+gm107_gr_init_bios(struct gf100_gr *gr)
 {
 	static const struct {
 		u32 ctrl;
@@ -304,7 +291,8 @@ gm107_gr_init_bios(struct gf100_gr_priv *priv)
 		{ 0x419af0, 0x419af4 },
 		{ 0x419af8, 0x419afc },
 	};
-	struct nvkm_bios *bios = nvkm_bios(priv);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_bios *bios = device->bios;
 	struct nvbios_P0260E infoE;
 	struct nvbios_P0260X infoX;
 	int E = -1, X;
@@ -312,124 +300,119 @@ gm107_gr_init_bios(struct gf100_gr_priv *priv)
 
 	while (nvbios_P0260Ep(bios, ++E, &ver, &hdr, &infoE)) {
 		if (X = -1, E < ARRAY_SIZE(regs)) {
-			nv_wr32(priv, regs[E].ctrl, infoE.data);
+			nvkm_wr32(device, regs[E].ctrl, infoE.data);
 			while (nvbios_P0260Xp(bios, ++X, &ver, &hdr, &infoX))
-				nv_wr32(priv, regs[E].data, infoX.data);
+				nvkm_wr32(device, regs[E].data, infoX.data);
 		}
 	}
 }
 
 int
-gm107_gr_init(struct nvkm_object *object)
+gm107_gr_init(struct gf100_gr *gr)
 {
-	struct gf100_gr_oclass *oclass = (void *)object->oclass;
-	struct gf100_gr_priv *priv = (void *)object;
-	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, priv->tpc_total);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
 	u32 data[TPC_MAX / 8] = {};
 	u8  tpcnr[GPC_MAX];
 	int gpc, tpc, ppc, rop;
-	int ret, i;
-
-	ret = nvkm_gr_init(&priv->base);
-	if (ret)
-		return ret;
+	int i;
 
-	nv_wr32(priv, GPC_BCAST(0x0880), 0x00000000);
-	nv_wr32(priv, GPC_BCAST(0x0890), 0x00000000);
-	nv_wr32(priv, GPC_BCAST(0x0894), 0x00000000);
-	nv_wr32(priv, GPC_BCAST(0x08b4), priv->unk4188b4->addr >> 8);
-	nv_wr32(priv, GPC_BCAST(0x08b8), priv->unk4188b8->addr >> 8);
+	nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
+	nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(gr->unk4188b4) >> 8);
+	nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(gr->unk4188b8) >> 8);
 
-	gf100_gr_mmio(priv, oclass->mmio);
+	gf100_gr_mmio(gr, gr->func->mmio);
 
-	gm107_gr_init_bios(priv);
+	gm107_gr_init_bios(gr);
 
-	nv_wr32(priv, GPC_UNIT(0, 0x3018), 0x00000001);
+	nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
 
 	memset(data, 0x00, sizeof(data));
-	memcpy(tpcnr, priv->tpc_nr, sizeof(priv->tpc_nr));
-	for (i = 0, gpc = -1; i < priv->tpc_total; i++) {
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
+	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
 		do {
-			gpc = (gpc + 1) % priv->gpc_nr;
+			gpc = (gpc + 1) % gr->gpc_nr;
 		} while (!tpcnr[gpc]);
-		tpc = priv->tpc_nr[gpc] - tpcnr[gpc]--;
+		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
 
 		data[i / 8] |= tpc << ((i % 8) * 4);
 	}
 
-	nv_wr32(priv, GPC_BCAST(0x0980), data[0]);
-	nv_wr32(priv, GPC_BCAST(0x0984), data[1]);
-	nv_wr32(priv, GPC_BCAST(0x0988), data[2]);
-	nv_wr32(priv, GPC_BCAST(0x098c), data[3]);
-
-	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0914),
-			priv->magic_not_rop_nr << 8 | priv->tpc_nr[gpc]);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0910), 0x00040000 |
-			priv->tpc_total);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0918), magicgpc918);
+	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
+	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
+	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
+	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
+			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
+			gr->tpc_total);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
 	}
 
-	nv_wr32(priv, GPC_BCAST(0x3fd4), magicgpc918);
-	nv_wr32(priv, GPC_BCAST(0x08ac), nv_rd32(priv, 0x100800));
-
-	nv_wr32(priv, 0x400500, 0x00010001);
-
-	nv_wr32(priv, 0x400100, 0xffffffff);
-	nv_wr32(priv, 0x40013c, 0xffffffff);
-	nv_wr32(priv, 0x400124, 0x00000002);
-	nv_wr32(priv, 0x409c24, 0x000e0000);
-
-	nv_wr32(priv, 0x404000, 0xc0000000);
-	nv_wr32(priv, 0x404600, 0xc0000000);
-	nv_wr32(priv, 0x408030, 0xc0000000);
-	nv_wr32(priv, 0x404490, 0xc0000000);
-	nv_wr32(priv, 0x406018, 0xc0000000);
-	nv_wr32(priv, 0x407020, 0x40000000);
-	nv_wr32(priv, 0x405840, 0xc0000000);
-	nv_wr32(priv, 0x405844, 0x00ffffff);
-	nv_mask(priv, 0x419cc0, 0x00000008, 0x00000008);
-
-	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < 2 /* priv->ppc_nr[gpc] */; ppc++)
-			nv_wr32(priv, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0420), 0xc0000000);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0900), 0xc0000000);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x1028), 0xc0000000);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0824), 0xc0000000);
-		for (tpc = 0; tpc < priv->tpc_nr[gpc]; tpc++) {
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005);
+	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
+
+	nvkm_wr32(device, 0x400500, 0x00010001);
+
+	nvkm_wr32(device, 0x400100, 0xffffffff);
+	nvkm_wr32(device, 0x40013c, 0xffffffff);
+	nvkm_wr32(device, 0x400124, 0x00000002);
+	nvkm_wr32(device, 0x409c24, 0x000e0000);
+
+	nvkm_wr32(device, 0x404000, 0xc0000000);
+	nvkm_wr32(device, 0x404600, 0xc0000000);
+	nvkm_wr32(device, 0x408030, 0xc0000000);
+	nvkm_wr32(device, 0x404490, 0xc0000000);
+	nvkm_wr32(device, 0x406018, 0xc0000000);
+	nvkm_wr32(device, 0x407020, 0x40000000);
+	nvkm_wr32(device, 0x405840, 0xc0000000);
+	nvkm_wr32(device, 0x405844, 0x00ffffff);
+	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		for (ppc = 0; ppc < 2 /* gr->ppc_nr[gpc] */; ppc++)
+			nvkm_wr32(device, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
+		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005);
 		}
-		nv_wr32(priv, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
 	}
 
-	for (rop = 0; rop < priv->rop_nr; rop++) {
-		nv_wr32(priv, ROP_UNIT(rop, 0x144), 0x40000000);
-		nv_wr32(priv, ROP_UNIT(rop, 0x070), 0x40000000);
-		nv_wr32(priv, ROP_UNIT(rop, 0x204), 0xffffffff);
-		nv_wr32(priv, ROP_UNIT(rop, 0x208), 0xffffffff);
+	for (rop = 0; rop < gr->rop_nr; rop++) {
+		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
 	}
 
-	nv_wr32(priv, 0x400108, 0xffffffff);
-	nv_wr32(priv, 0x400138, 0xffffffff);
-	nv_wr32(priv, 0x400118, 0xffffffff);
-	nv_wr32(priv, 0x400130, 0xffffffff);
-	nv_wr32(priv, 0x40011c, 0xffffffff);
-	nv_wr32(priv, 0x400134, 0xffffffff);
+	nvkm_wr32(device, 0x400108, 0xffffffff);
+	nvkm_wr32(device, 0x400138, 0xffffffff);
+	nvkm_wr32(device, 0x400118, 0xffffffff);
+	nvkm_wr32(device, 0x400130, 0xffffffff);
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
 
-	nv_wr32(priv, 0x400054, 0x2c350f63);
+	nvkm_wr32(device, 0x400054, 0x2c350f63);
 
-	gf100_gr_zbc_init(priv);
+	gf100_gr_zbc_init(gr);
 
-	return gf100_gr_init_ctxctl(priv);
+	return gf100_gr_init_ctxctl(gr);
 }
 
 #include "fuc/hubgm107.fuc5.h"
@@ -452,19 +435,25 @@ gm107_gr_gpccs_ucode = {
 	.data.size = sizeof(gm107_grgpc_data),
 };
 
-struct nvkm_oclass *
-gm107_gr_oclass = &(struct gf100_gr_oclass) {
-	.base.handle = NV_ENGINE(GR, 0x07),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_ctor,
-		.dtor = gf100_gr_dtor,
-		.init = gm107_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-	.cclass = &gm107_grctx_oclass,
-	.sclass =  gm107_gr_sclass,
+static const struct gf100_gr_func
+gm107_gr = {
+	.init = gm107_gr_init,
 	.mmio = gm107_gr_pack_mmio,
 	.fecs.ucode = &gm107_gr_fecs_ucode,
 	.gpccs.ucode = &gm107_gr_gpccs_ucode,
 	.ppc_nr = 2,
-}.base;
+	.grctx = &gm107_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+		{ -1, -1, MAXWELL_A, &gf100_fermi },
+		{ -1, -1, MAXWELL_COMPUTE_A },
+		{}
+	}
+};
+
+int
+gm107_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gf100_gr_new_(&gm107_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c
index fdb1dcf16a59..90381dde451a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm204.c
@@ -27,19 +27,6 @@
 #include <nvif/class.h>
 
 /*******************************************************************************
- * Graphics object classes
- ******************************************************************************/
-
-struct nvkm_oclass
-gm204_gr_sclass[] = {
-	{ FERMI_TWOD_A, &nvkm_object_ofuncs },
-	{ KEPLER_INLINE_TO_MEMORY_B, &nvkm_object_ofuncs },
-	{ MAXWELL_B, &gf100_fermi_ofuncs, gf100_gr_9097_omthds },
-	{ MAXWELL_COMPUTE_B, &nvkm_object_ofuncs, gf100_gr_90c0_omthds },
-	{}
-};
-
-/*******************************************************************************
  * PGRAPH register lists
  ******************************************************************************/
 
@@ -243,144 +230,144 @@ gm204_gr_data[] = {
  ******************************************************************************/
 
 static int
-gm204_gr_init_ctxctl(struct gf100_gr_priv *priv)
+gm204_gr_init_ctxctl(struct gf100_gr *gr)
 {
 	return 0;
 }
 
 int
-gm204_gr_init(struct nvkm_object *object)
+gm204_gr_init(struct gf100_gr *gr)
 {
-	struct gf100_gr_oclass *oclass = (void *)object->oclass;
-	struct gf100_gr_priv *priv = (void *)object;
-	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, priv->tpc_total);
-	u32 data[TPC_MAX / 8] = {};
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
+	u32 data[TPC_MAX / 8] = {}, tmp;
 	u8  tpcnr[GPC_MAX];
 	int gpc, tpc, ppc, rop;
-	int ret, i;
-	u32 tmp;
-
-	ret = nvkm_gr_init(&priv->base);
-	if (ret)
-		return ret;
+	int i;
 
-	tmp = nv_rd32(priv, 0x100c80); /*XXX: mask? */
-	nv_wr32(priv, 0x418880, 0x00001000 | (tmp & 0x00000fff));
-	nv_wr32(priv, 0x418890, 0x00000000);
-	nv_wr32(priv, 0x418894, 0x00000000);
-	nv_wr32(priv, 0x4188b4, priv->unk4188b4->addr >> 8);
-	nv_wr32(priv, 0x4188b8, priv->unk4188b8->addr >> 8);
-	nv_mask(priv, 0x4188b0, 0x00040000, 0x00040000);
+	tmp = nvkm_rd32(device, 0x100c80); /*XXX: mask? */
+	nvkm_wr32(device, 0x418880, 0x00001000 | (tmp & 0x00000fff));
+	nvkm_wr32(device, 0x418890, 0x00000000);
+	nvkm_wr32(device, 0x418894, 0x00000000);
+	nvkm_wr32(device, 0x4188b4, nvkm_memory_addr(gr->unk4188b4) >> 8);
+	nvkm_wr32(device, 0x4188b8, nvkm_memory_addr(gr->unk4188b8) >> 8);
+	nvkm_mask(device, 0x4188b0, 0x00040000, 0x00040000);
 
 	/*XXX: belongs in fb */
-	nv_wr32(priv, 0x100cc8, priv->unk4188b4->addr >> 8);
-	nv_wr32(priv, 0x100ccc, priv->unk4188b8->addr >> 8);
-	nv_mask(priv, 0x100cc4, 0x00040000, 0x00040000);
+	nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(gr->unk4188b4) >> 8);
+	nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(gr->unk4188b8) >> 8);
+	nvkm_mask(device, 0x100cc4, 0x00040000, 0x00040000);
 
-	gf100_gr_mmio(priv, oclass->mmio);
+	gf100_gr_mmio(gr, gr->func->mmio);
 
-	gm107_gr_init_bios(priv);
+	gm107_gr_init_bios(gr);
 
-	nv_wr32(priv, GPC_UNIT(0, 0x3018), 0x00000001);
+	nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
 
 	memset(data, 0x00, sizeof(data));
-	memcpy(tpcnr, priv->tpc_nr, sizeof(priv->tpc_nr));
-	for (i = 0, gpc = -1; i < priv->tpc_total; i++) {
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
+	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
 		do {
-			gpc = (gpc + 1) % priv->gpc_nr;
+			gpc = (gpc + 1) % gr->gpc_nr;
 		} while (!tpcnr[gpc]);
-		tpc = priv->tpc_nr[gpc] - tpcnr[gpc]--;
+		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
 
 		data[i / 8] |= tpc << ((i % 8) * 4);
 	}
 
-	nv_wr32(priv, GPC_BCAST(0x0980), data[0]);
-	nv_wr32(priv, GPC_BCAST(0x0984), data[1]);
-	nv_wr32(priv, GPC_BCAST(0x0988), data[2]);
-	nv_wr32(priv, GPC_BCAST(0x098c), data[3]);
-
-	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0914),
-			priv->magic_not_rop_nr << 8 | priv->tpc_nr[gpc]);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0910), 0x00040000 |
-			priv->tpc_total);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0918), magicgpc918);
+	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
+	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
+	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
+	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
+			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
+			gr->tpc_total);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
 	}
 
-	nv_wr32(priv, GPC_BCAST(0x3fd4), magicgpc918);
-	nv_wr32(priv, GPC_BCAST(0x08ac), nv_rd32(priv, 0x100800));
-	nv_wr32(priv, GPC_BCAST(0x033c), nv_rd32(priv, 0x100804));
-
-	nv_wr32(priv, 0x400500, 0x00010001);
-	nv_wr32(priv, 0x400100, 0xffffffff);
-	nv_wr32(priv, 0x40013c, 0xffffffff);
-	nv_wr32(priv, 0x400124, 0x00000002);
-	nv_wr32(priv, 0x409c24, 0x000e0000);
-	nv_wr32(priv, 0x405848, 0xc0000000);
-	nv_wr32(priv, 0x40584c, 0x00000001);
-	nv_wr32(priv, 0x404000, 0xc0000000);
-	nv_wr32(priv, 0x404600, 0xc0000000);
-	nv_wr32(priv, 0x408030, 0xc0000000);
-	nv_wr32(priv, 0x404490, 0xc0000000);
-	nv_wr32(priv, 0x406018, 0xc0000000);
-	nv_wr32(priv, 0x407020, 0x40000000);
-	nv_wr32(priv, 0x405840, 0xc0000000);
-	nv_wr32(priv, 0x405844, 0x00ffffff);
-	nv_mask(priv, 0x419cc0, 0x00000008, 0x00000008);
-
-	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < priv->ppc_nr[gpc]; ppc++)
-			nv_wr32(priv, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0420), 0xc0000000);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0900), 0xc0000000);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x1028), 0xc0000000);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x0824), 0xc0000000);
-		for (tpc = 0; tpc < priv->tpc_nr[gpc]; tpc++) {
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
-			nv_wr32(priv, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005);
+	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
+	nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804));
+
+	nvkm_wr32(device, 0x400500, 0x00010001);
+	nvkm_wr32(device, 0x400100, 0xffffffff);
+	nvkm_wr32(device, 0x40013c, 0xffffffff);
+	nvkm_wr32(device, 0x400124, 0x00000002);
+	nvkm_wr32(device, 0x409c24, 0x000e0000);
+	nvkm_wr32(device, 0x405848, 0xc0000000);
+	nvkm_wr32(device, 0x40584c, 0x00000001);
+	nvkm_wr32(device, 0x404000, 0xc0000000);
+	nvkm_wr32(device, 0x404600, 0xc0000000);
+	nvkm_wr32(device, 0x408030, 0xc0000000);
+	nvkm_wr32(device, 0x404490, 0xc0000000);
+	nvkm_wr32(device, 0x406018, 0xc0000000);
+	nvkm_wr32(device, 0x407020, 0x40000000);
+	nvkm_wr32(device, 0x405840, 0xc0000000);
+	nvkm_wr32(device, 0x405844, 0x00ffffff);
+	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++)
+			nvkm_wr32(device, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
+		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000005);
 		}
-		nv_wr32(priv, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
-		nv_wr32(priv, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
 	}
 
-	for (rop = 0; rop < priv->rop_nr; rop++) {
-		nv_wr32(priv, ROP_UNIT(rop, 0x144), 0x40000000);
-		nv_wr32(priv, ROP_UNIT(rop, 0x070), 0x40000000);
-		nv_wr32(priv, ROP_UNIT(rop, 0x204), 0xffffffff);
-		nv_wr32(priv, ROP_UNIT(rop, 0x208), 0xffffffff);
+	for (rop = 0; rop < gr->rop_nr; rop++) {
+		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
 	}
 
-	nv_wr32(priv, 0x400108, 0xffffffff);
-	nv_wr32(priv, 0x400138, 0xffffffff);
-	nv_wr32(priv, 0x400118, 0xffffffff);
-	nv_wr32(priv, 0x400130, 0xffffffff);
-	nv_wr32(priv, 0x40011c, 0xffffffff);
-	nv_wr32(priv, 0x400134, 0xffffffff);
+	nvkm_wr32(device, 0x400108, 0xffffffff);
+	nvkm_wr32(device, 0x400138, 0xffffffff);
+	nvkm_wr32(device, 0x400118, 0xffffffff);
+	nvkm_wr32(device, 0x400130, 0xffffffff);
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
 
-	nv_wr32(priv, 0x400054, 0x2c350f63);
+	nvkm_wr32(device, 0x400054, 0x2c350f63);
 
-	gf100_gr_zbc_init(priv);
+	gf100_gr_zbc_init(gr);
 
-	return gm204_gr_init_ctxctl(priv);
+	return gm204_gr_init_ctxctl(gr);
 }
 
-struct nvkm_oclass *
-gm204_gr_oclass = &(struct gf100_gr_oclass) {
-	.base.handle = NV_ENGINE(GR, 0x24),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_ctor,
-		.dtor = gf100_gr_dtor,
-		.init = gm204_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-	.cclass = &gm204_grctx_oclass,
-	.sclass =  gm204_gr_sclass,
+static const struct gf100_gr_func
+gm204_gr = {
+	.init = gm204_gr_init,
 	.mmio = gm204_gr_pack_mmio,
 	.ppc_nr = 2,
-}.base;
+	.grctx = &gm204_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+		{ -1, -1, MAXWELL_B, &gf100_fermi },
+		{ -1, -1, MAXWELL_COMPUTE_B },
+		{}
+	}
+};
+
+int
+gm204_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gf100_gr_new_(&gm204_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm206.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm206.c
index 04b9733d146a..341dc560acbb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm206.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm206.c
@@ -24,17 +24,25 @@
 #include "gf100.h"
 #include "ctxgf100.h"
 
-struct nvkm_oclass *
-gm206_gr_oclass = &(struct gf100_gr_oclass) {
-	.base.handle = NV_ENGINE(GR, 0x26),
-	.base.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = gf100_gr_ctor,
-		.dtor = gf100_gr_dtor,
-		.init = gm204_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-	.cclass = &gm206_grctx_oclass,
-	.sclass =  gm204_gr_sclass,
+#include <nvif/class.h>
+
+static const struct gf100_gr_func
+gm206_gr = {
+	.init = gm204_gr_init,
 	.mmio = gm204_gr_pack_mmio,
 	.ppc_nr = 2,
-}.base;
+	.grctx = &gm206_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+		{ -1, -1, MAXWELL_B, &gf100_fermi },
+		{ -1, -1, MAXWELL_COMPUTE_B },
+		{}
+	}
+};
+
+int
+gm206_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gf100_gr_new_(&gm206_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
new file mode 100644
index 000000000000..65b6e3d1e90d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "gf100.h"
+#include "ctxgf100.h"
+
+#include <subdev/timer.h>
+
+#include <nvif/class.h>
+
+static void
+gm20b_gr_init_gpc_mmu(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 val;
+
+	/* TODO this needs to be removed once secure boot works */
+	if (1) {
+		nvkm_wr32(device, 0x100ce4, 0xffffffff);
+	}
+
+	/* TODO update once secure boot works */
+	val = nvkm_rd32(device, 0x100c80);
+	val &= 0xf000087f;
+	nvkm_wr32(device, 0x418880, val);
+	nvkm_wr32(device, 0x418890, 0);
+	nvkm_wr32(device, 0x418894, 0);
+
+	nvkm_wr32(device, 0x4188b0, nvkm_rd32(device, 0x100cc4));
+	nvkm_wr32(device, 0x4188b4, nvkm_rd32(device, 0x100cc8));
+	nvkm_wr32(device, 0x4188b8, nvkm_rd32(device, 0x100ccc));
+
+	nvkm_wr32(device, 0x4188ac, nvkm_rd32(device, 0x100800));
+}
+
+static void
+gm20b_gr_set_hww_esr_report_mask(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	nvkm_wr32(device, 0x419e44, 0xdffffe);
+	nvkm_wr32(device, 0x419e4c, 0x5);
+}
+
+static const struct gf100_gr_func
+gm20b_gr = {
+	.dtor = gk20a_gr_dtor,
+	.init = gk20a_gr_init,
+	.init_gpc_mmu = gm20b_gr_init_gpc_mmu,
+	.set_hww_esr_report_mask = gm20b_gr_set_hww_esr_report_mask,
+	.ppc_nr = 1,
+	.grctx = &gm20b_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+		{ -1, -1, MAXWELL_B, &gf100_fermi },
+		{ -1, -1, MAXWELL_COMPUTE_B },
+		{}
+	}
+};
+
+int
+gm20b_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gk20a_gr_new_(&gm20b_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c
new file mode 100644
index 000000000000..2e68919f00b2
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "nv50.h"
+
+static const struct nvkm_gr_func
+gt200_gr = {
+	.init = nv50_gr_init,
+	.intr = nv50_gr_intr,
+	.chan_new = nv50_gr_chan_new,
+	.tlb_flush = g84_gr_tlb_flush,
+	.units = nv50_gr_units,
+	.sclass = {
+		{ -1, -1, 0x0030, &nv50_gr_object },
+		{ -1, -1, 0x502d, &nv50_gr_object },
+		{ -1, -1, 0x5039, &nv50_gr_object },
+		{ -1, -1, 0x50c0, &nv50_gr_object },
+		{ -1, -1, 0x8397, &nv50_gr_object },
+		{}
+	}
+};
+
+int
+gt200_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv50_gr_new_(&gt200_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c
new file mode 100644
index 000000000000..2bf7aac360cc
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "nv50.h"
+
+static const struct nvkm_gr_func
+gt215_gr = {
+	.init = nv50_gr_init,
+	.intr = nv50_gr_intr,
+	.chan_new = nv50_gr_chan_new,
+	.tlb_flush = g84_gr_tlb_flush,
+	.units = nv50_gr_units,
+	.sclass = {
+		{ -1, -1, 0x0030, &nv50_gr_object },
+		{ -1, -1, 0x502d, &nv50_gr_object },
+		{ -1, -1, 0x5039, &nv50_gr_object },
+		{ -1, -1, 0x50c0, &nv50_gr_object },
+		{ -1, -1, 0x8597, &nv50_gr_object },
+		{ -1, -1, 0x85c0, &nv50_gr_object },
+		{}
+	}
+};
+
+int
+gt215_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv50_gr_new_(&gt215_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c
new file mode 100644
index 000000000000..95d5219faf93
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "nv50.h"
+
+static const struct nvkm_gr_func
+mcp79_gr = {
+	.init = nv50_gr_init,
+	.intr = nv50_gr_intr,
+	.chan_new = nv50_gr_chan_new,
+	.units = nv50_gr_units,
+	.sclass = {
+		{ -1, -1, 0x0030, &nv50_gr_object },
+		{ -1, -1, 0x502d, &nv50_gr_object },
+		{ -1, -1, 0x5039, &nv50_gr_object },
+		{ -1, -1, 0x50c0, &nv50_gr_object },
+		{ -1, -1, 0x8397, &nv50_gr_object },
+		{}
+	}
+};
+
+int
+mcp79_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv50_gr_new_(&mcp79_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c
new file mode 100644
index 000000000000..027b58e5976b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "nv50.h"
+
+static const struct nvkm_gr_func
+mcp89_gr = {
+	.init = nv50_gr_init,
+	.intr = nv50_gr_intr,
+	.chan_new = nv50_gr_chan_new,
+	.tlb_flush = g84_gr_tlb_flush,
+	.units = nv50_gr_units,
+	.sclass = {
+		{ -1, -1, 0x0030, &nv50_gr_object },
+		{ -1, -1, 0x502d, &nv50_gr_object },
+		{ -1, -1, 0x5039, &nv50_gr_object },
+		{ -1, -1, 0x50c0, &nv50_gr_object },
+		{ -1, -1, 0x85c0, &nv50_gr_object },
+		{ -1, -1, 0x8697, &nv50_gr_object },
+		{}
+	}
+};
+
+int
+mcp89_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv50_gr_new_(&mcp89_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
index 2614510c28d0..85c5b7fea5f5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
@@ -21,13 +21,13 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-#include <engine/gr.h>
+#include "priv.h"
 #include "regs.h"
 
 #include <core/client.h>
-#include <core/device.h>
-#include <core/handle.h>
+#include <core/gpuobj.h>
 #include <engine/fifo.h>
+#include <engine/fifo/chan.h>
 #include <subdev/instmem.h>
 #include <subdev/timer.h>
 
@@ -346,25 +346,23 @@ nv04_gr_ctx_regs[] = {
 	NV04_PGRAPH_DEBUG_3
 };
 
-struct nv04_gr_priv {
+#define nv04_gr(p) container_of((p), struct nv04_gr, base)
+
+struct nv04_gr {
 	struct nvkm_gr base;
 	struct nv04_gr_chan *chan[16];
 	spinlock_t lock;
 };
 
+#define nv04_gr_chan(p) container_of((p), struct nv04_gr_chan, object)
+
 struct nv04_gr_chan {
-	struct nvkm_object base;
+	struct nvkm_object object;
+	struct nv04_gr *gr;
 	int chid;
 	u32 nv04[ARRAY_SIZE(nv04_gr_ctx_regs)];
 };
 
-
-static inline struct nv04_gr_priv *
-nv04_gr_priv(struct nv04_gr_chan *chan)
-{
-	return (void *)nv_object(chan)->engine;
-}
-
 /*******************************************************************************
  * Graphics object classes
  ******************************************************************************/
@@ -444,35 +442,34 @@ nv04_gr_priv(struct nv04_gr_chan *chan)
  */
 
 static void
-nv04_gr_set_ctx1(struct nvkm_object *object, u32 mask, u32 value)
+nv04_gr_set_ctx1(struct nvkm_device *device, u32 inst, u32 mask, u32 value)
 {
-	struct nv04_gr_priv *priv = (void *)object->engine;
-	int subc = (nv_rd32(priv, NV04_PGRAPH_TRAPPED_ADDR) >> 13) & 0x7;
+	int subc = (nvkm_rd32(device, NV04_PGRAPH_TRAPPED_ADDR) >> 13) & 0x7;
 	u32 tmp;
 
-	tmp  = nv_ro32(object, 0x00);
+	tmp  = nvkm_rd32(device, 0x700000 + inst);
 	tmp &= ~mask;
 	tmp |= value;
-	nv_wo32(object, 0x00, tmp);
+	nvkm_wr32(device, 0x700000 + inst, tmp);
 
-	nv_wr32(priv, NV04_PGRAPH_CTX_SWITCH1, tmp);
-	nv_wr32(priv, NV04_PGRAPH_CTX_CACHE1 + (subc<<2), tmp);
+	nvkm_wr32(device, NV04_PGRAPH_CTX_SWITCH1, tmp);
+	nvkm_wr32(device, NV04_PGRAPH_CTX_CACHE1 + (subc << 2), tmp);
 }
 
 static void
-nv04_gr_set_ctx_val(struct nvkm_object *object, u32 mask, u32 value)
+nv04_gr_set_ctx_val(struct nvkm_device *device, u32 inst, u32 mask, u32 value)
 {
 	int class, op, valid = 1;
 	u32 tmp, ctx1;
 
-	ctx1 = nv_ro32(object, 0x00);
+	ctx1 = nvkm_rd32(device, 0x700000 + inst);
 	class = ctx1 & 0xff;
 	op = (ctx1 >> 15) & 7;
 
-	tmp = nv_ro32(object, 0x0c);
+	tmp = nvkm_rd32(device, 0x70000c + inst);
 	tmp &= ~mask;
 	tmp |= value;
-	nv_wo32(object, 0x0c, tmp);
+	nvkm_wr32(device, 0x70000c + inst, tmp);
 
 	/* check for valid surf2d/surf_dst/surf_color */
 	if (!(tmp & 0x02000000))
@@ -504,527 +501,567 @@ nv04_gr_set_ctx_val(struct nvkm_object *object, u32 mask, u32 value)
 		break;
 	}
 
-	nv04_gr_set_ctx1(object, 0x01000000, valid << 24);
+	nv04_gr_set_ctx1(device, inst, 0x01000000, valid << 24);
 }
 
-static int
-nv04_gr_mthd_set_operation(struct nvkm_object *object, u32 mthd,
-			   void *args, u32 size)
+static bool
+nv04_gr_mthd_set_operation(struct nvkm_device *device, u32 inst, u32 data)
 {
-	u32 class = nv_ro32(object, 0) & 0xff;
-	u32 data = *(u32 *)args;
+	u8 class = nvkm_rd32(device, 0x700000) & 0x000000ff;
 	if (data > 5)
-		return 1;
+		return false;
 	/* Old versions of the objects only accept first three operations. */
 	if (data > 2 && class < 0x40)
-		return 1;
-	nv04_gr_set_ctx1(object, 0x00038000, data << 15);
+		return false;
+	nv04_gr_set_ctx1(device, inst, 0x00038000, data << 15);
 	/* changing operation changes set of objects needed for validation */
-	nv04_gr_set_ctx_val(object, 0, 0);
-	return 0;
+	nv04_gr_set_ctx_val(device, inst, 0, 0);
+	return true;
 }
 
-static int
-nv04_gr_mthd_surf3d_clip_h(struct nvkm_object *object, u32 mthd,
-			   void *args, u32 size)
+static bool
+nv04_gr_mthd_surf3d_clip_h(struct nvkm_device *device, u32 inst, u32 data)
 {
-	struct nv04_gr_priv *priv = (void *)object->engine;
-	u32 data = *(u32 *)args;
 	u32 min = data & 0xffff, max;
 	u32 w = data >> 16;
 	if (min & 0x8000)
 		/* too large */
-		return 1;
+		return false;
 	if (w & 0x8000)
 		/* yes, it accepts negative for some reason. */
 		w |= 0xffff0000;
 	max = min + w;
 	max &= 0x3ffff;
-	nv_wr32(priv, 0x40053c, min);
-	nv_wr32(priv, 0x400544, max);
-	return 0;
+	nvkm_wr32(device, 0x40053c, min);
+	nvkm_wr32(device, 0x400544, max);
+	return true;
 }
 
-static int
-nv04_gr_mthd_surf3d_clip_v(struct nvkm_object *object, u32 mthd,
-			   void *args, u32 size)
+static bool
+nv04_gr_mthd_surf3d_clip_v(struct nvkm_device *device, u32 inst, u32 data)
 {
-	struct nv04_gr_priv *priv = (void *)object->engine;
-	u32 data = *(u32 *)args;
 	u32 min = data & 0xffff, max;
 	u32 w = data >> 16;
 	if (min & 0x8000)
 		/* too large */
-		return 1;
+		return false;
 	if (w & 0x8000)
 		/* yes, it accepts negative for some reason. */
 		w |= 0xffff0000;
 	max = min + w;
 	max &= 0x3ffff;
-	nv_wr32(priv, 0x400540, min);
-	nv_wr32(priv, 0x400548, max);
-	return 0;
+	nvkm_wr32(device, 0x400540, min);
+	nvkm_wr32(device, 0x400548, max);
+	return true;
 }
 
-static u16
-nv04_gr_mthd_bind_class(struct nvkm_object *object, u32 *args, u32 size)
+static u8
+nv04_gr_mthd_bind_class(struct nvkm_device *device, u32 inst)
 {
-	struct nvkm_instmem *imem = nvkm_instmem(object);
-	u32 inst = *(u32 *)args << 4;
-	return nv_ro32(imem, inst);
+	return nvkm_rd32(device, 0x700000 + (inst << 4));
 }
 
-static int
-nv04_gr_mthd_bind_surf2d(struct nvkm_object *object, u32 mthd,
-			    void *args, u32 size)
+static bool
+nv04_gr_mthd_bind_surf2d(struct nvkm_device *device, u32 inst, u32 data)
 {
-	switch (nv04_gr_mthd_bind_class(object, args, size)) {
+	switch (nv04_gr_mthd_bind_class(device, data)) {
 	case 0x30:
-		nv04_gr_set_ctx1(object, 0x00004000, 0);
-		nv04_gr_set_ctx_val(object, 0x02000000, 0);
-		return 0;
+		nv04_gr_set_ctx1(device, inst, 0x00004000, 0);
+		nv04_gr_set_ctx_val(device, inst, 0x02000000, 0);
+		return true;
 	case 0x42:
-		nv04_gr_set_ctx1(object, 0x00004000, 0);
-		nv04_gr_set_ctx_val(object, 0x02000000, 0x02000000);
-		return 0;
+		nv04_gr_set_ctx1(device, inst, 0x00004000, 0);
+		nv04_gr_set_ctx_val(device, inst, 0x02000000, 0x02000000);
+		return true;
 	}
-	return 1;
+	return false;
 }
 
-static int
-nv04_gr_mthd_bind_surf2d_swzsurf(struct nvkm_object *object, u32 mthd,
-				 void *args, u32 size)
+static bool
+nv04_gr_mthd_bind_surf2d_swzsurf(struct nvkm_device *device, u32 inst, u32 data)
 {
-	switch (nv04_gr_mthd_bind_class(object, args, size)) {
+	switch (nv04_gr_mthd_bind_class(device, data)) {
 	case 0x30:
-		nv04_gr_set_ctx1(object, 0x00004000, 0);
-		nv04_gr_set_ctx_val(object, 0x02000000, 0);
-		return 0;
+		nv04_gr_set_ctx1(device, inst, 0x00004000, 0);
+		nv04_gr_set_ctx_val(device, inst, 0x02000000, 0);
+		return true;
 	case 0x42:
-		nv04_gr_set_ctx1(object, 0x00004000, 0);
-		nv04_gr_set_ctx_val(object, 0x02000000, 0x02000000);
-		return 0;
+		nv04_gr_set_ctx1(device, inst, 0x00004000, 0);
+		nv04_gr_set_ctx_val(device, inst, 0x02000000, 0x02000000);
+		return true;
 	case 0x52:
-		nv04_gr_set_ctx1(object, 0x00004000, 0x00004000);
-		nv04_gr_set_ctx_val(object, 0x02000000, 0x02000000);
-		return 0;
+		nv04_gr_set_ctx1(device, inst, 0x00004000, 0x00004000);
+		nv04_gr_set_ctx_val(device, inst, 0x02000000, 0x02000000);
+		return true;
 	}
-	return 1;
+	return false;
 }
 
-static int
-nv01_gr_mthd_bind_patt(struct nvkm_object *object, u32 mthd,
-		       void *args, u32 size)
+static bool
+nv01_gr_mthd_bind_patt(struct nvkm_device *device, u32 inst, u32 data)
 {
-	switch (nv04_gr_mthd_bind_class(object, args, size)) {
+	switch (nv04_gr_mthd_bind_class(device, data)) {
 	case 0x30:
-		nv04_gr_set_ctx_val(object, 0x08000000, 0);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x08000000, 0);
+		return true;
 	case 0x18:
-		nv04_gr_set_ctx_val(object, 0x08000000, 0x08000000);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x08000000, 0x08000000);
+		return true;
 	}
-	return 1;
+	return false;
 }
 
-static int
-nv04_gr_mthd_bind_patt(struct nvkm_object *object, u32 mthd,
-		       void *args, u32 size)
+static bool
+nv04_gr_mthd_bind_patt(struct nvkm_device *device, u32 inst, u32 data)
 {
-	switch (nv04_gr_mthd_bind_class(object, args, size)) {
+	switch (nv04_gr_mthd_bind_class(device, data)) {
 	case 0x30:
-		nv04_gr_set_ctx_val(object, 0x08000000, 0);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x08000000, 0);
+		return true;
 	case 0x44:
-		nv04_gr_set_ctx_val(object, 0x08000000, 0x08000000);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x08000000, 0x08000000);
+		return true;
 	}
-	return 1;
+	return false;
 }
 
-static int
-nv04_gr_mthd_bind_rop(struct nvkm_object *object, u32 mthd,
-		      void *args, u32 size)
+static bool
+nv04_gr_mthd_bind_rop(struct nvkm_device *device, u32 inst, u32 data)
 {
-	switch (nv04_gr_mthd_bind_class(object, args, size)) {
+	switch (nv04_gr_mthd_bind_class(device, data)) {
 	case 0x30:
-		nv04_gr_set_ctx_val(object, 0x10000000, 0);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x10000000, 0);
+		return true;
 	case 0x43:
-		nv04_gr_set_ctx_val(object, 0x10000000, 0x10000000);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x10000000, 0x10000000);
+		return true;
 	}
-	return 1;
+	return false;
 }
 
-static int
-nv04_gr_mthd_bind_beta1(struct nvkm_object *object, u32 mthd,
-			void *args, u32 size)
+static bool
+nv04_gr_mthd_bind_beta1(struct nvkm_device *device, u32 inst, u32 data)
 {
-	switch (nv04_gr_mthd_bind_class(object, args, size)) {
+	switch (nv04_gr_mthd_bind_class(device, data)) {
 	case 0x30:
-		nv04_gr_set_ctx_val(object, 0x20000000, 0);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x20000000, 0);
+		return true;
 	case 0x12:
-		nv04_gr_set_ctx_val(object, 0x20000000, 0x20000000);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x20000000, 0x20000000);
+		return true;
 	}
-	return 1;
+	return false;
 }
 
-static int
-nv04_gr_mthd_bind_beta4(struct nvkm_object *object, u32 mthd,
-			void *args, u32 size)
+static bool
+nv04_gr_mthd_bind_beta4(struct nvkm_device *device, u32 inst, u32 data)
 {
-	switch (nv04_gr_mthd_bind_class(object, args, size)) {
+	switch (nv04_gr_mthd_bind_class(device, data)) {
 	case 0x30:
-		nv04_gr_set_ctx_val(object, 0x40000000, 0);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x40000000, 0);
+		return true;
 	case 0x72:
-		nv04_gr_set_ctx_val(object, 0x40000000, 0x40000000);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x40000000, 0x40000000);
+		return true;
 	}
-	return 1;
+	return false;
 }
 
-static int
-nv04_gr_mthd_bind_surf_dst(struct nvkm_object *object, u32 mthd,
-			   void *args, u32 size)
+static bool
+nv04_gr_mthd_bind_surf_dst(struct nvkm_device *device, u32 inst, u32 data)
 {
-	switch (nv04_gr_mthd_bind_class(object, args, size)) {
+	switch (nv04_gr_mthd_bind_class(device, data)) {
 	case 0x30:
-		nv04_gr_set_ctx_val(object, 0x02000000, 0);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x02000000, 0);
+		return true;
 	case 0x58:
-		nv04_gr_set_ctx_val(object, 0x02000000, 0x02000000);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x02000000, 0x02000000);
+		return true;
 	}
-	return 1;
+	return false;
 }
 
-static int
-nv04_gr_mthd_bind_surf_src(struct nvkm_object *object, u32 mthd,
-			   void *args, u32 size)
+static bool
+nv04_gr_mthd_bind_surf_src(struct nvkm_device *device, u32 inst, u32 data)
 {
-	switch (nv04_gr_mthd_bind_class(object, args, size)) {
+	switch (nv04_gr_mthd_bind_class(device, data)) {
 	case 0x30:
-		nv04_gr_set_ctx_val(object, 0x04000000, 0);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x04000000, 0);
+		return true;
 	case 0x59:
-		nv04_gr_set_ctx_val(object, 0x04000000, 0x04000000);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x04000000, 0x04000000);
+		return true;
 	}
-	return 1;
+	return false;
 }
 
-static int
-nv04_gr_mthd_bind_surf_color(struct nvkm_object *object, u32 mthd,
-			     void *args, u32 size)
+static bool
+nv04_gr_mthd_bind_surf_color(struct nvkm_device *device, u32 inst, u32 data)
 {
-	switch (nv04_gr_mthd_bind_class(object, args, size)) {
+	switch (nv04_gr_mthd_bind_class(device, data)) {
 	case 0x30:
-		nv04_gr_set_ctx_val(object, 0x02000000, 0);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x02000000, 0);
+		return true;
 	case 0x5a:
-		nv04_gr_set_ctx_val(object, 0x02000000, 0x02000000);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x02000000, 0x02000000);
+		return true;
 	}
-	return 1;
+	return false;
 }
 
-static int
-nv04_gr_mthd_bind_surf_zeta(struct nvkm_object *object, u32 mthd,
-			    void *args, u32 size)
+static bool
+nv04_gr_mthd_bind_surf_zeta(struct nvkm_device *device, u32 inst, u32 data)
 {
-	switch (nv04_gr_mthd_bind_class(object, args, size)) {
+	switch (nv04_gr_mthd_bind_class(device, data)) {
 	case 0x30:
-		nv04_gr_set_ctx_val(object, 0x04000000, 0);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x04000000, 0);
+		return true;
 	case 0x5b:
-		nv04_gr_set_ctx_val(object, 0x04000000, 0x04000000);
-		return 0;
+		nv04_gr_set_ctx_val(device, inst, 0x04000000, 0x04000000);
+		return true;
 	}
-	return 1;
+	return false;
 }
 
-static int
-nv01_gr_mthd_bind_clip(struct nvkm_object *object, u32 mthd,
-		       void *args, u32 size)
+static bool
+nv01_gr_mthd_bind_clip(struct nvkm_device *device, u32 inst, u32 data)
 {
-	switch (nv04_gr_mthd_bind_class(object, args, size)) {
+	switch (nv04_gr_mthd_bind_class(device, data)) {
 	case 0x30:
-		nv04_gr_set_ctx1(object, 0x2000, 0);
-		return 0;
+		nv04_gr_set_ctx1(device, inst, 0x2000, 0);
+		return true;
 	case 0x19:
-		nv04_gr_set_ctx1(object, 0x2000, 0x2000);
-		return 0;
+		nv04_gr_set_ctx1(device, inst, 0x2000, 0x2000);
+		return true;
 	}
-	return 1;
+	return false;
 }
 
-static int
-nv01_gr_mthd_bind_chroma(struct nvkm_object *object, u32 mthd,
-			 void *args, u32 size)
+static bool
+nv01_gr_mthd_bind_chroma(struct nvkm_device *device, u32 inst, u32 data)
 {
-	switch (nv04_gr_mthd_bind_class(object, args, size)) {
+	switch (nv04_gr_mthd_bind_class(device, data)) {
 	case 0x30:
-		nv04_gr_set_ctx1(object, 0x1000, 0);
-		return 0;
+		nv04_gr_set_ctx1(device, inst, 0x1000, 0);
+		return true;
 	/* Yes, for some reason even the old versions of objects
 	 * accept 0x57 and not 0x17. Consistency be damned.
 	 */
 	case 0x57:
-		nv04_gr_set_ctx1(object, 0x1000, 0x1000);
-		return 0;
+		nv04_gr_set_ctx1(device, inst, 0x1000, 0x1000);
+		return true;
 	}
-	return 1;
+	return false;
 }
 
-static struct nvkm_omthds
-nv03_gr_gdi_omthds[] = {
-	{ 0x0184, 0x0184, nv01_gr_mthd_bind_patt },
-	{ 0x0188, 0x0188, nv04_gr_mthd_bind_rop },
-	{ 0x018c, 0x018c, nv04_gr_mthd_bind_beta1 },
-	{ 0x0190, 0x0190, nv04_gr_mthd_bind_surf_dst },
-	{ 0x02fc, 0x02fc, nv04_gr_mthd_set_operation },
-	{}
-};
-
-static struct nvkm_omthds
-nv04_gr_gdi_omthds[] = {
-	{ 0x0188, 0x0188, nv04_gr_mthd_bind_patt },
-	{ 0x018c, 0x018c, nv04_gr_mthd_bind_rop },
-	{ 0x0190, 0x0190, nv04_gr_mthd_bind_beta1 },
-	{ 0x0194, 0x0194, nv04_gr_mthd_bind_beta4 },
-	{ 0x0198, 0x0198, nv04_gr_mthd_bind_surf2d },
-	{ 0x02fc, 0x02fc, nv04_gr_mthd_set_operation },
-	{}
-};
+static bool
+nv03_gr_mthd_gdi(struct nvkm_device *device, u32 inst, u32 mthd, u32 data)
+{
+	bool (*func)(struct nvkm_device *, u32, u32);
+	switch (mthd) {
+	case 0x0184: func = nv01_gr_mthd_bind_patt; break;
+	case 0x0188: func = nv04_gr_mthd_bind_rop; break;
+	case 0x018c: func = nv04_gr_mthd_bind_beta1; break;
+	case 0x0190: func = nv04_gr_mthd_bind_surf_dst; break;
+	case 0x02fc: func = nv04_gr_mthd_set_operation; break;
+	default:
+		return false;
+	}
+	return func(device, inst, data);
+}
 
-static struct nvkm_omthds
-nv01_gr_blit_omthds[] = {
-	{ 0x0184, 0x0184, nv01_gr_mthd_bind_chroma },
-	{ 0x0188, 0x0188, nv01_gr_mthd_bind_clip },
-	{ 0x018c, 0x018c, nv01_gr_mthd_bind_patt },
-	{ 0x0190, 0x0190, nv04_gr_mthd_bind_rop },
-	{ 0x0194, 0x0194, nv04_gr_mthd_bind_beta1 },
-	{ 0x0198, 0x0198, nv04_gr_mthd_bind_surf_dst },
-	{ 0x019c, 0x019c, nv04_gr_mthd_bind_surf_src },
-	{ 0x02fc, 0x02fc, nv04_gr_mthd_set_operation },
-	{}
-};
+static bool
+nv04_gr_mthd_gdi(struct nvkm_device *device, u32 inst, u32 mthd, u32 data)
+{
+	bool (*func)(struct nvkm_device *, u32, u32);
+	switch (mthd) {
+	case 0x0188: func = nv04_gr_mthd_bind_patt; break;
+	case 0x018c: func = nv04_gr_mthd_bind_rop; break;
+	case 0x0190: func = nv04_gr_mthd_bind_beta1; break;
+	case 0x0194: func = nv04_gr_mthd_bind_beta4; break;
+	case 0x0198: func = nv04_gr_mthd_bind_surf2d; break;
+	case 0x02fc: func = nv04_gr_mthd_set_operation; break;
+	default:
+		return false;
+	}
+	return func(device, inst, data);
+}
 
-static struct nvkm_omthds
-nv04_gr_blit_omthds[] = {
-	{ 0x0184, 0x0184, nv01_gr_mthd_bind_chroma },
-	{ 0x0188, 0x0188, nv01_gr_mthd_bind_clip },
-	{ 0x018c, 0x018c, nv04_gr_mthd_bind_patt },
-	{ 0x0190, 0x0190, nv04_gr_mthd_bind_rop },
-	{ 0x0194, 0x0194, nv04_gr_mthd_bind_beta1 },
-	{ 0x0198, 0x0198, nv04_gr_mthd_bind_beta4 },
-	{ 0x019c, 0x019c, nv04_gr_mthd_bind_surf2d },
-	{ 0x02fc, 0x02fc, nv04_gr_mthd_set_operation },
-	{}
-};
+static bool
+nv01_gr_mthd_blit(struct nvkm_device *device, u32 inst, u32 mthd, u32 data)
+{
+	bool (*func)(struct nvkm_device *, u32, u32);
+	switch (mthd) {
+	case 0x0184: func = nv01_gr_mthd_bind_chroma; break;
+	case 0x0188: func = nv01_gr_mthd_bind_clip; break;
+	case 0x018c: func = nv01_gr_mthd_bind_patt; break;
+	case 0x0190: func = nv04_gr_mthd_bind_rop; break;
+	case 0x0194: func = nv04_gr_mthd_bind_beta1; break;
+	case 0x0198: func = nv04_gr_mthd_bind_surf_dst; break;
+	case 0x019c: func = nv04_gr_mthd_bind_surf_src; break;
+	case 0x02fc: func = nv04_gr_mthd_set_operation; break;
+	default:
+		return false;
+	}
+	return func(device, inst, data);
+}
 
-static struct nvkm_omthds
-nv04_gr_iifc_omthds[] = {
-	{ 0x0188, 0x0188, nv01_gr_mthd_bind_chroma },
-	{ 0x018c, 0x018c, nv01_gr_mthd_bind_clip },
-	{ 0x0190, 0x0190, nv04_gr_mthd_bind_patt },
-	{ 0x0194, 0x0194, nv04_gr_mthd_bind_rop },
-	{ 0x0198, 0x0198, nv04_gr_mthd_bind_beta1 },
-	{ 0x019c, 0x019c, nv04_gr_mthd_bind_beta4 },
-	{ 0x01a0, 0x01a0, nv04_gr_mthd_bind_surf2d_swzsurf },
-	{ 0x03e4, 0x03e4, nv04_gr_mthd_set_operation },
-	{}
-};
+static bool
+nv04_gr_mthd_blit(struct nvkm_device *device, u32 inst, u32 mthd, u32 data)
+{
+	bool (*func)(struct nvkm_device *, u32, u32);
+	switch (mthd) {
+	case 0x0184: func = nv01_gr_mthd_bind_chroma; break;
+	case 0x0188: func = nv01_gr_mthd_bind_clip; break;
+	case 0x018c: func = nv04_gr_mthd_bind_patt; break;
+	case 0x0190: func = nv04_gr_mthd_bind_rop; break;
+	case 0x0194: func = nv04_gr_mthd_bind_beta1; break;
+	case 0x0198: func = nv04_gr_mthd_bind_beta4; break;
+	case 0x019c: func = nv04_gr_mthd_bind_surf2d; break;
+	case 0x02fc: func = nv04_gr_mthd_set_operation; break;
+	default:
+		return false;
+	}
+	return func(device, inst, data);
+}
 
-static struct nvkm_omthds
-nv01_gr_ifc_omthds[] = {
-	{ 0x0184, 0x0184, nv01_gr_mthd_bind_chroma },
-	{ 0x0188, 0x0188, nv01_gr_mthd_bind_clip },
-	{ 0x018c, 0x018c, nv01_gr_mthd_bind_patt },
-	{ 0x0190, 0x0190, nv04_gr_mthd_bind_rop },
-	{ 0x0194, 0x0194, nv04_gr_mthd_bind_beta1 },
-	{ 0x0198, 0x0198, nv04_gr_mthd_bind_surf_dst },
-	{ 0x02fc, 0x02fc, nv04_gr_mthd_set_operation },
-	{}
-};
+static bool
+nv04_gr_mthd_iifc(struct nvkm_device *device, u32 inst, u32 mthd, u32 data)
+{
+	bool (*func)(struct nvkm_device *, u32, u32);
+	switch (mthd) {
+	case 0x0188: func = nv01_gr_mthd_bind_chroma; break;
+	case 0x018c: func = nv01_gr_mthd_bind_clip; break;
+	case 0x0190: func = nv04_gr_mthd_bind_patt; break;
+	case 0x0194: func = nv04_gr_mthd_bind_rop; break;
+	case 0x0198: func = nv04_gr_mthd_bind_beta1; break;
+	case 0x019c: func = nv04_gr_mthd_bind_beta4; break;
+	case 0x01a0: func = nv04_gr_mthd_bind_surf2d_swzsurf; break;
+	case 0x03e4: func = nv04_gr_mthd_set_operation; break;
+	default:
+		return false;
+	}
+	return func(device, inst, data);
+}
 
-static struct nvkm_omthds
-nv04_gr_ifc_omthds[] = {
-	{ 0x0184, 0x0184, nv01_gr_mthd_bind_chroma },
-	{ 0x0188, 0x0188, nv01_gr_mthd_bind_clip },
-	{ 0x018c, 0x018c, nv04_gr_mthd_bind_patt },
-	{ 0x0190, 0x0190, nv04_gr_mthd_bind_rop },
-	{ 0x0194, 0x0194, nv04_gr_mthd_bind_beta1 },
-	{ 0x0198, 0x0198, nv04_gr_mthd_bind_beta4 },
-	{ 0x019c, 0x019c, nv04_gr_mthd_bind_surf2d },
-	{ 0x02fc, 0x02fc, nv04_gr_mthd_set_operation },
-	{}
-};
+static bool
+nv01_gr_mthd_ifc(struct nvkm_device *device, u32 inst, u32 mthd, u32 data)
+{
+	bool (*func)(struct nvkm_device *, u32, u32);
+	switch (mthd) {
+	case 0x0184: func = nv01_gr_mthd_bind_chroma; break;
+	case 0x0188: func = nv01_gr_mthd_bind_clip; break;
+	case 0x018c: func = nv01_gr_mthd_bind_patt; break;
+	case 0x0190: func = nv04_gr_mthd_bind_rop; break;
+	case 0x0194: func = nv04_gr_mthd_bind_beta1; break;
+	case 0x0198: func = nv04_gr_mthd_bind_surf_dst; break;
+	case 0x02fc: func = nv04_gr_mthd_set_operation; break;
+	default:
+		return false;
+	}
+	return func(device, inst, data);
+}
 
-static struct nvkm_omthds
-nv03_gr_sifc_omthds[] = {
-	{ 0x0184, 0x0184, nv01_gr_mthd_bind_chroma },
-	{ 0x0188, 0x0188, nv01_gr_mthd_bind_patt },
-	{ 0x018c, 0x018c, nv04_gr_mthd_bind_rop },
-	{ 0x0190, 0x0190, nv04_gr_mthd_bind_beta1 },
-	{ 0x0194, 0x0194, nv04_gr_mthd_bind_surf_dst },
-	{ 0x02fc, 0x02fc, nv04_gr_mthd_set_operation },
-	{}
-};
+static bool
+nv04_gr_mthd_ifc(struct nvkm_device *device, u32 inst, u32 mthd, u32 data)
+{
+	bool (*func)(struct nvkm_device *, u32, u32);
+	switch (mthd) {
+	case 0x0184: func = nv01_gr_mthd_bind_chroma; break;
+	case 0x0188: func = nv01_gr_mthd_bind_clip; break;
+	case 0x018c: func = nv04_gr_mthd_bind_patt; break;
+	case 0x0190: func = nv04_gr_mthd_bind_rop; break;
+	case 0x0194: func = nv04_gr_mthd_bind_beta1; break;
+	case 0x0198: func = nv04_gr_mthd_bind_beta4; break;
+	case 0x019c: func = nv04_gr_mthd_bind_surf2d; break;
+	case 0x02fc: func = nv04_gr_mthd_set_operation; break;
+	default:
+		return false;
+	}
+	return func(device, inst, data);
+}
 
-static struct nvkm_omthds
-nv04_gr_sifc_omthds[] = {
-	{ 0x0184, 0x0184, nv01_gr_mthd_bind_chroma },
-	{ 0x0188, 0x0188, nv04_gr_mthd_bind_patt },
-	{ 0x018c, 0x018c, nv04_gr_mthd_bind_rop },
-	{ 0x0190, 0x0190, nv04_gr_mthd_bind_beta1 },
-	{ 0x0194, 0x0194, nv04_gr_mthd_bind_beta4 },
-	{ 0x0198, 0x0198, nv04_gr_mthd_bind_surf2d },
-	{ 0x02fc, 0x02fc, nv04_gr_mthd_set_operation },
-	{}
-};
+static bool
+nv03_gr_mthd_sifc(struct nvkm_device *device, u32 inst, u32 mthd, u32 data)
+{
+	bool (*func)(struct nvkm_device *, u32, u32);
+	switch (mthd) {
+	case 0x0184: func = nv01_gr_mthd_bind_chroma; break;
+	case 0x0188: func = nv01_gr_mthd_bind_patt; break;
+	case 0x018c: func = nv04_gr_mthd_bind_rop; break;
+	case 0x0190: func = nv04_gr_mthd_bind_beta1; break;
+	case 0x0194: func = nv04_gr_mthd_bind_surf_dst; break;
+	case 0x02fc: func = nv04_gr_mthd_set_operation; break;
+	default:
+		return false;
+	}
+	return func(device, inst, data);
+}
 
-static struct nvkm_omthds
-nv03_gr_sifm_omthds[] = {
-	{ 0x0188, 0x0188, nv01_gr_mthd_bind_patt },
-	{ 0x018c, 0x018c, nv04_gr_mthd_bind_rop },
-	{ 0x0190, 0x0190, nv04_gr_mthd_bind_beta1 },
-	{ 0x0194, 0x0194, nv04_gr_mthd_bind_surf_dst },
-	{ 0x0304, 0x0304, nv04_gr_mthd_set_operation },
-	{}
-};
+static bool
+nv04_gr_mthd_sifc(struct nvkm_device *device, u32 inst, u32 mthd, u32 data)
+{
+	bool (*func)(struct nvkm_device *, u32, u32);
+	switch (mthd) {
+	case 0x0184: func = nv01_gr_mthd_bind_chroma; break;
+	case 0x0188: func = nv04_gr_mthd_bind_patt; break;
+	case 0x018c: func = nv04_gr_mthd_bind_rop; break;
+	case 0x0190: func = nv04_gr_mthd_bind_beta1; break;
+	case 0x0194: func = nv04_gr_mthd_bind_beta4; break;
+	case 0x0198: func = nv04_gr_mthd_bind_surf2d; break;
+	case 0x02fc: func = nv04_gr_mthd_set_operation; break;
+	default:
+		return false;
+	}
+	return func(device, inst, data);
+}
 
-static struct nvkm_omthds
-nv04_gr_sifm_omthds[] = {
-	{ 0x0188, 0x0188, nv04_gr_mthd_bind_patt },
-	{ 0x018c, 0x018c, nv04_gr_mthd_bind_rop },
-	{ 0x0190, 0x0190, nv04_gr_mthd_bind_beta1 },
-	{ 0x0194, 0x0194, nv04_gr_mthd_bind_beta4 },
-	{ 0x0198, 0x0198, nv04_gr_mthd_bind_surf2d },
-	{ 0x0304, 0x0304, nv04_gr_mthd_set_operation },
-	{}
-};
+static bool
+nv03_gr_mthd_sifm(struct nvkm_device *device, u32 inst, u32 mthd, u32 data)
+{
+	bool (*func)(struct nvkm_device *, u32, u32);
+	switch (mthd) {
+	case 0x0188: func = nv01_gr_mthd_bind_patt; break;
+	case 0x018c: func = nv04_gr_mthd_bind_rop; break;
+	case 0x0190: func = nv04_gr_mthd_bind_beta1; break;
+	case 0x0194: func = nv04_gr_mthd_bind_surf_dst; break;
+	case 0x0304: func = nv04_gr_mthd_set_operation; break;
+	default:
+		return false;
+	}
+	return func(device, inst, data);
+}
 
-static struct nvkm_omthds
-nv04_gr_surf3d_omthds[] = {
-	{ 0x02f8, 0x02f8, nv04_gr_mthd_surf3d_clip_h },
-	{ 0x02fc, 0x02fc, nv04_gr_mthd_surf3d_clip_v },
-	{}
-};
+static bool
+nv04_gr_mthd_sifm(struct nvkm_device *device, u32 inst, u32 mthd, u32 data)
+{
+	bool (*func)(struct nvkm_device *, u32, u32);
+	switch (mthd) {
+	case 0x0188: func = nv04_gr_mthd_bind_patt; break;
+	case 0x018c: func = nv04_gr_mthd_bind_rop; break;
+	case 0x0190: func = nv04_gr_mthd_bind_beta1; break;
+	case 0x0194: func = nv04_gr_mthd_bind_beta4; break;
+	case 0x0198: func = nv04_gr_mthd_bind_surf2d; break;
+	case 0x0304: func = nv04_gr_mthd_set_operation; break;
+	default:
+		return false;
+	}
+	return func(device, inst, data);
+}
 
-static struct nvkm_omthds
-nv03_gr_ttri_omthds[] = {
-	{ 0x0188, 0x0188, nv01_gr_mthd_bind_clip },
-	{ 0x018c, 0x018c, nv04_gr_mthd_bind_surf_color },
-	{ 0x0190, 0x0190, nv04_gr_mthd_bind_surf_zeta },
-	{}
-};
+static bool
+nv04_gr_mthd_surf3d(struct nvkm_device *device, u32 inst, u32 mthd, u32 data)
+{
+	bool (*func)(struct nvkm_device *, u32, u32);
+	switch (mthd) {
+	case 0x02f8: func = nv04_gr_mthd_surf3d_clip_h; break;
+	case 0x02fc: func = nv04_gr_mthd_surf3d_clip_v; break;
+	default:
+		return false;
+	}
+	return func(device, inst, data);
+}
 
-static struct nvkm_omthds
-nv01_gr_prim_omthds[] = {
-	{ 0x0184, 0x0184, nv01_gr_mthd_bind_clip },
-	{ 0x0188, 0x0188, nv01_gr_mthd_bind_patt },
-	{ 0x018c, 0x018c, nv04_gr_mthd_bind_rop },
-	{ 0x0190, 0x0190, nv04_gr_mthd_bind_beta1 },
-	{ 0x0194, 0x0194, nv04_gr_mthd_bind_surf_dst },
-	{ 0x02fc, 0x02fc, nv04_gr_mthd_set_operation },
-	{}
-};
+static bool
+nv03_gr_mthd_ttri(struct nvkm_device *device, u32 inst, u32 mthd, u32 data)
+{
+	bool (*func)(struct nvkm_device *, u32, u32);
+	switch (mthd) {
+	case 0x0188: func = nv01_gr_mthd_bind_clip; break;
+	case 0x018c: func = nv04_gr_mthd_bind_surf_color; break;
+	case 0x0190: func = nv04_gr_mthd_bind_surf_zeta; break;
+	default:
+		return false;
+	}
+	return func(device, inst, data);
+}
 
-static struct nvkm_omthds
-nv04_gr_prim_omthds[] = {
-	{ 0x0184, 0x0184, nv01_gr_mthd_bind_clip },
-	{ 0x0188, 0x0188, nv04_gr_mthd_bind_patt },
-	{ 0x018c, 0x018c, nv04_gr_mthd_bind_rop },
-	{ 0x0190, 0x0190, nv04_gr_mthd_bind_beta1 },
-	{ 0x0194, 0x0194, nv04_gr_mthd_bind_beta4 },
-	{ 0x0198, 0x0198, nv04_gr_mthd_bind_surf2d },
-	{ 0x02fc, 0x02fc, nv04_gr_mthd_set_operation },
-	{}
-};
+static bool
+nv01_gr_mthd_prim(struct nvkm_device *device, u32 inst, u32 mthd, u32 data)
+{
+	bool (*func)(struct nvkm_device *, u32, u32);
+	switch (mthd) {
+	case 0x0184: func = nv01_gr_mthd_bind_clip; break;
+	case 0x0188: func = nv01_gr_mthd_bind_patt; break;
+	case 0x018c: func = nv04_gr_mthd_bind_rop; break;
+	case 0x0190: func = nv04_gr_mthd_bind_beta1; break;
+	case 0x0194: func = nv04_gr_mthd_bind_surf_dst; break;
+	case 0x02fc: func = nv04_gr_mthd_set_operation; break;
+	default:
+		return false;
+	}
+	return func(device, inst, data);
+}
 
-static int
-nv04_gr_object_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-		    struct nvkm_oclass *oclass, void *data, u32 size,
-		    struct nvkm_object **pobject)
+static bool
+nv04_gr_mthd_prim(struct nvkm_device *device, u32 inst, u32 mthd, u32 data)
 {
-	struct nvkm_gpuobj *obj;
-	int ret;
+	bool (*func)(struct nvkm_device *, u32, u32);
+	switch (mthd) {
+	case 0x0184: func = nv01_gr_mthd_bind_clip; break;
+	case 0x0188: func = nv04_gr_mthd_bind_patt; break;
+	case 0x018c: func = nv04_gr_mthd_bind_rop; break;
+	case 0x0190: func = nv04_gr_mthd_bind_beta1; break;
+	case 0x0194: func = nv04_gr_mthd_bind_beta4; break;
+	case 0x0198: func = nv04_gr_mthd_bind_surf2d; break;
+	case 0x02fc: func = nv04_gr_mthd_set_operation; break;
+	default:
+		return false;
+	}
+	return func(device, inst, data);
+}
 
-	ret = nvkm_gpuobj_create(parent, engine, oclass, 0, parent,
-				 16, 16, 0, &obj);
-	*pobject = nv_object(obj);
-	if (ret)
-		return ret;
+static bool
+nv04_gr_mthd(struct nvkm_device *device, u32 inst, u32 mthd, u32 data)
+{
+	bool (*func)(struct nvkm_device *, u32, u32, u32);
+	switch (nvkm_rd32(device, 0x700000 + inst) & 0x000000ff) {
+	case 0x1c ... 0x1e:
+		   func = nv01_gr_mthd_prim; break;
+	case 0x1f: func = nv01_gr_mthd_blit; break;
+	case 0x21: func = nv01_gr_mthd_ifc; break;
+	case 0x36: func = nv03_gr_mthd_sifc; break;
+	case 0x37: func = nv03_gr_mthd_sifm; break;
+	case 0x48: func = nv03_gr_mthd_ttri; break;
+	case 0x4a: func = nv04_gr_mthd_gdi; break;
+	case 0x4b: func = nv03_gr_mthd_gdi; break;
+	case 0x53: func = nv04_gr_mthd_surf3d; break;
+	case 0x5c ... 0x5e:
+		   func = nv04_gr_mthd_prim; break;
+	case 0x5f: func = nv04_gr_mthd_blit; break;
+	case 0x60: func = nv04_gr_mthd_iifc; break;
+	case 0x61: func = nv04_gr_mthd_ifc; break;
+	case 0x76: func = nv04_gr_mthd_sifc; break;
+	case 0x77: func = nv04_gr_mthd_sifm; break;
+	default:
+		return false;
+	}
+	return func(device, inst, mthd, data);
+}
 
-	nv_wo32(obj, 0x00, nv_mclass(obj));
+static int
+nv04_gr_object_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent,
+		    int align, struct nvkm_gpuobj **pgpuobj)
+{
+	int ret = nvkm_gpuobj_new(object->engine->subdev.device, 16, align,
+				  false, parent, pgpuobj);
+	if (ret == 0) {
+		nvkm_kmap(*pgpuobj);
+		nvkm_wo32(*pgpuobj, 0x00, object->oclass);
 #ifdef __BIG_ENDIAN
-	nv_mo32(obj, 0x00, 0x00080000, 0x00080000);
+		nvkm_mo32(*pgpuobj, 0x00, 0x00080000, 0x00080000);
 #endif
-	nv_wo32(obj, 0x04, 0x00000000);
-	nv_wo32(obj, 0x08, 0x00000000);
-	nv_wo32(obj, 0x0c, 0x00000000);
-	return 0;
+		nvkm_wo32(*pgpuobj, 0x04, 0x00000000);
+		nvkm_wo32(*pgpuobj, 0x08, 0x00000000);
+		nvkm_wo32(*pgpuobj, 0x0c, 0x00000000);
+		nvkm_done(*pgpuobj);
+	}
+	return ret;
 }
 
-struct nvkm_ofuncs
-nv04_gr_ofuncs = {
-	.ctor = nv04_gr_object_ctor,
-	.dtor = _nvkm_gpuobj_dtor,
-	.init = _nvkm_gpuobj_init,
-	.fini = _nvkm_gpuobj_fini,
-	.rd32 = _nvkm_gpuobj_rd32,
-	.wr32 = _nvkm_gpuobj_wr32,
-};
-
-static struct nvkm_oclass
-nv04_gr_sclass[] = {
-	{ 0x0012, &nv04_gr_ofuncs }, /* beta1 */
-	{ 0x0017, &nv04_gr_ofuncs }, /* chroma */
-	{ 0x0018, &nv04_gr_ofuncs }, /* pattern (nv01) */
-	{ 0x0019, &nv04_gr_ofuncs }, /* clip */
-	{ 0x001c, &nv04_gr_ofuncs, nv01_gr_prim_omthds }, /* line */
-	{ 0x001d, &nv04_gr_ofuncs, nv01_gr_prim_omthds }, /* tri */
-	{ 0x001e, &nv04_gr_ofuncs, nv01_gr_prim_omthds }, /* rect */
-	{ 0x001f, &nv04_gr_ofuncs, nv01_gr_blit_omthds },
-	{ 0x0021, &nv04_gr_ofuncs, nv01_gr_ifc_omthds },
-	{ 0x0030, &nv04_gr_ofuncs }, /* null */
-	{ 0x0036, &nv04_gr_ofuncs, nv03_gr_sifc_omthds },
-	{ 0x0037, &nv04_gr_ofuncs, nv03_gr_sifm_omthds },
-	{ 0x0038, &nv04_gr_ofuncs }, /* dvd subpicture */
-	{ 0x0039, &nv04_gr_ofuncs }, /* m2mf */
-	{ 0x0042, &nv04_gr_ofuncs }, /* surf2d */
-	{ 0x0043, &nv04_gr_ofuncs }, /* rop */
-	{ 0x0044, &nv04_gr_ofuncs }, /* pattern */
-	{ 0x0048, &nv04_gr_ofuncs, nv03_gr_ttri_omthds },
-	{ 0x004a, &nv04_gr_ofuncs, nv04_gr_gdi_omthds },
-	{ 0x004b, &nv04_gr_ofuncs, nv03_gr_gdi_omthds },
-	{ 0x0052, &nv04_gr_ofuncs }, /* swzsurf */
-	{ 0x0053, &nv04_gr_ofuncs, nv04_gr_surf3d_omthds },
-	{ 0x0054, &nv04_gr_ofuncs }, /* ttri */
-	{ 0x0055, &nv04_gr_ofuncs }, /* mtri */
-	{ 0x0057, &nv04_gr_ofuncs }, /* chroma */
-	{ 0x0058, &nv04_gr_ofuncs }, /* surf_dst */
-	{ 0x0059, &nv04_gr_ofuncs }, /* surf_src */
-	{ 0x005a, &nv04_gr_ofuncs }, /* surf_color */
-	{ 0x005b, &nv04_gr_ofuncs }, /* surf_zeta */
-	{ 0x005c, &nv04_gr_ofuncs, nv04_gr_prim_omthds }, /* line */
-	{ 0x005d, &nv04_gr_ofuncs, nv04_gr_prim_omthds }, /* tri */
-	{ 0x005e, &nv04_gr_ofuncs, nv04_gr_prim_omthds }, /* rect */
-	{ 0x005f, &nv04_gr_ofuncs, nv04_gr_blit_omthds },
-	{ 0x0060, &nv04_gr_ofuncs, nv04_gr_iifc_omthds },
-	{ 0x0061, &nv04_gr_ofuncs, nv04_gr_ifc_omthds },
-	{ 0x0064, &nv04_gr_ofuncs }, /* iifc (nv05) */
-	{ 0x0065, &nv04_gr_ofuncs }, /* ifc (nv05) */
-	{ 0x0066, &nv04_gr_ofuncs }, /* sifc (nv05) */
-	{ 0x0072, &nv04_gr_ofuncs }, /* beta4 */
-	{ 0x0076, &nv04_gr_ofuncs, nv04_gr_sifc_omthds },
-	{ 0x0077, &nv04_gr_ofuncs, nv04_gr_sifm_omthds },
-	{},
+const struct nvkm_object_func
+nv04_gr_object = {
+	.bind = nv04_gr_object_bind,
 };
 
 /*******************************************************************************
@@ -1032,13 +1069,14 @@ nv04_gr_sclass[] = {
  ******************************************************************************/
 
 static struct nv04_gr_chan *
-nv04_gr_channel(struct nv04_gr_priv *priv)
+nv04_gr_channel(struct nv04_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct nv04_gr_chan *chan = NULL;
-	if (nv_rd32(priv, NV04_PGRAPH_CTX_CONTROL) & 0x00010000) {
-		int chid = nv_rd32(priv, NV04_PGRAPH_CTX_USER) >> 24;
-		if (chid < ARRAY_SIZE(priv->chan))
-			chan = priv->chan[chid];
+	if (nvkm_rd32(device, NV04_PGRAPH_CTX_CONTROL) & 0x00010000) {
+		int chid = nvkm_rd32(device, NV04_PGRAPH_CTX_USER) >> 24;
+		if (chid < ARRAY_SIZE(gr->chan))
+			chan = gr->chan[chid];
 	}
 	return chan;
 }
@@ -1046,55 +1084,52 @@ nv04_gr_channel(struct nv04_gr_priv *priv)
 static int
 nv04_gr_load_context(struct nv04_gr_chan *chan, int chid)
 {
-	struct nv04_gr_priv *priv = nv04_gr_priv(chan);
+	struct nvkm_device *device = chan->gr->base.engine.subdev.device;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(nv04_gr_ctx_regs); i++)
-		nv_wr32(priv, nv04_gr_ctx_regs[i], chan->nv04[i]);
+		nvkm_wr32(device, nv04_gr_ctx_regs[i], chan->nv04[i]);
 
-	nv_wr32(priv, NV04_PGRAPH_CTX_CONTROL, 0x10010100);
-	nv_mask(priv, NV04_PGRAPH_CTX_USER, 0xff000000, chid << 24);
-	nv_mask(priv, NV04_PGRAPH_FFINTFC_ST2, 0xfff00000, 0x00000000);
+	nvkm_wr32(device, NV04_PGRAPH_CTX_CONTROL, 0x10010100);
+	nvkm_mask(device, NV04_PGRAPH_CTX_USER, 0xff000000, chid << 24);
+	nvkm_mask(device, NV04_PGRAPH_FFINTFC_ST2, 0xfff00000, 0x00000000);
 	return 0;
 }
 
 static int
 nv04_gr_unload_context(struct nv04_gr_chan *chan)
 {
-	struct nv04_gr_priv *priv = nv04_gr_priv(chan);
+	struct nvkm_device *device = chan->gr->base.engine.subdev.device;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(nv04_gr_ctx_regs); i++)
-		chan->nv04[i] = nv_rd32(priv, nv04_gr_ctx_regs[i]);
+		chan->nv04[i] = nvkm_rd32(device, nv04_gr_ctx_regs[i]);
 
-	nv_wr32(priv, NV04_PGRAPH_CTX_CONTROL, 0x10000000);
-	nv_mask(priv, NV04_PGRAPH_CTX_USER, 0xff000000, 0x0f000000);
+	nvkm_wr32(device, NV04_PGRAPH_CTX_CONTROL, 0x10000000);
+	nvkm_mask(device, NV04_PGRAPH_CTX_USER, 0xff000000, 0x0f000000);
 	return 0;
 }
 
 static void
-nv04_gr_context_switch(struct nv04_gr_priv *priv)
+nv04_gr_context_switch(struct nv04_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct nv04_gr_chan *prev = NULL;
 	struct nv04_gr_chan *next = NULL;
-	unsigned long flags;
 	int chid;
 
-	spin_lock_irqsave(&priv->lock, flags);
-	nv04_gr_idle(priv);
+	nv04_gr_idle(&gr->base);
 
 	/* If previous context is valid, we need to save it */
-	prev = nv04_gr_channel(priv);
+	prev = nv04_gr_channel(gr);
 	if (prev)
 		nv04_gr_unload_context(prev);
 
 	/* load context for next channel */
-	chid = (nv_rd32(priv, NV04_PGRAPH_TRAPPED_ADDR) >> 24) & 0x0f;
-	next = priv->chan[chid];
+	chid = (nvkm_rd32(device, NV04_PGRAPH_TRAPPED_ADDR) >> 24) & 0x0f;
+	next = gr->chan[chid];
 	if (next)
 		nv04_gr_load_context(next, chid);
-
-	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
 static u32 *ctx_reg(struct nv04_gr_chan *chan, u32 reg)
@@ -1109,98 +1144,85 @@ static u32 *ctx_reg(struct nv04_gr_chan *chan, u32 reg)
 	return NULL;
 }
 
-static int
-nv04_gr_context_ctor(struct nvkm_object *parent,
-		     struct nvkm_object *engine,
-		     struct nvkm_oclass *oclass, void *data, u32 size,
-		     struct nvkm_object **pobject)
+static void *
+nv04_gr_chan_dtor(struct nvkm_object *object)
 {
-	struct nvkm_fifo_chan *fifo = (void *)parent;
-	struct nv04_gr_priv *priv = (void *)engine;
-	struct nv04_gr_chan *chan;
+	struct nv04_gr_chan *chan = nv04_gr_chan(object);
+	struct nv04_gr *gr = chan->gr;
 	unsigned long flags;
-	int ret;
-
-	ret = nvkm_object_create(parent, engine, oclass, 0, &chan);
-	*pobject = nv_object(chan);
-	if (ret)
-		return ret;
-
-	spin_lock_irqsave(&priv->lock, flags);
-	if (priv->chan[fifo->chid]) {
-		*pobject = nv_object(priv->chan[fifo->chid]);
-		atomic_inc(&(*pobject)->refcount);
-		spin_unlock_irqrestore(&priv->lock, flags);
-		nvkm_object_destroy(&chan->base);
-		return 1;
-	}
 
-	*ctx_reg(chan, NV04_PGRAPH_DEBUG_3) = 0xfad4ff31;
-
-	priv->chan[fifo->chid] = chan;
-	chan->chid = fifo->chid;
-	spin_unlock_irqrestore(&priv->lock, flags);
-	return 0;
+	spin_lock_irqsave(&gr->lock, flags);
+	gr->chan[chan->chid] = NULL;
+	spin_unlock_irqrestore(&gr->lock, flags);
+	return chan;
 }
 
-static void
-nv04_gr_context_dtor(struct nvkm_object *object)
+static int
+nv04_gr_chan_fini(struct nvkm_object *object, bool suspend)
 {
-	struct nv04_gr_priv *priv = (void *)object->engine;
-	struct nv04_gr_chan *chan = (void *)object;
+	struct nv04_gr_chan *chan = nv04_gr_chan(object);
+	struct nv04_gr *gr = chan->gr;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	unsigned long flags;
 
-	spin_lock_irqsave(&priv->lock, flags);
-	priv->chan[chan->chid] = NULL;
-	spin_unlock_irqrestore(&priv->lock, flags);
-
-	nvkm_object_destroy(&chan->base);
+	spin_lock_irqsave(&gr->lock, flags);
+	nvkm_mask(device, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000);
+	if (nv04_gr_channel(gr) == chan)
+		nv04_gr_unload_context(chan);
+	nvkm_mask(device, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001);
+	spin_unlock_irqrestore(&gr->lock, flags);
+	return 0;
 }
 
+static const struct nvkm_object_func
+nv04_gr_chan = {
+	.dtor = nv04_gr_chan_dtor,
+	.fini = nv04_gr_chan_fini,
+};
+
 static int
-nv04_gr_context_fini(struct nvkm_object *object, bool suspend)
+nv04_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
-	struct nv04_gr_priv *priv = (void *)object->engine;
-	struct nv04_gr_chan *chan = (void *)object;
+	struct nv04_gr *gr = nv04_gr(base);
+	struct nv04_gr_chan *chan;
 	unsigned long flags;
 
-	spin_lock_irqsave(&priv->lock, flags);
-	nv_mask(priv, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000);
-	if (nv04_gr_channel(priv) == chan)
-		nv04_gr_unload_context(chan);
-	nv_mask(priv, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001);
-	spin_unlock_irqrestore(&priv->lock, flags);
+	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_object_ctor(&nv04_gr_chan, oclass, &chan->object);
+	chan->gr = gr;
+	chan->chid = fifoch->chid;
+	*pobject = &chan->object;
 
-	return nvkm_object_fini(&chan->base, suspend);
-}
+	*ctx_reg(chan, NV04_PGRAPH_DEBUG_3) = 0xfad4ff31;
 
-static struct nvkm_oclass
-nv04_gr_cclass = {
-	.handle = NV_ENGCTX(GR, 0x04),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv04_gr_context_ctor,
-		.dtor = nv04_gr_context_dtor,
-		.init = nvkm_object_init,
-		.fini = nv04_gr_context_fini,
-	},
-};
+	spin_lock_irqsave(&gr->lock, flags);
+	gr->chan[chan->chid] = chan;
+	spin_unlock_irqrestore(&gr->lock, flags);
+	return 0;
+}
 
 /*******************************************************************************
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
 bool
-nv04_gr_idle(void *obj)
+nv04_gr_idle(struct nvkm_gr *gr)
 {
-	struct nvkm_gr *gr = nvkm_gr(obj);
+	struct nvkm_subdev *subdev = &gr->engine.subdev;
+	struct nvkm_device *device = subdev->device;
 	u32 mask = 0xffffffff;
 
-	if (nv_device(obj)->card_type == NV_40)
+	if (device->card_type == NV_40)
 		mask &= ~NV40_PGRAPH_STATUS_SYNC_STALL;
 
-	if (!nv_wait(gr, NV04_PGRAPH_STATUS, mask, 0)) {
-		nv_error(gr, "idle timed out with status 0x%08x\n",
-			 nv_rd32(gr, NV04_PGRAPH_STATUS));
+	if (nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, NV04_PGRAPH_STATUS) & mask))
+			break;
+	) < 0) {
+		nvkm_error(subdev, "idle timed out with status %08x\n",
+			   nvkm_rd32(device, NV04_PGRAPH_STATUS));
 		return false;
 	}
 
@@ -1247,136 +1269,159 @@ nv04_gr_nsource[] = {
 };
 
 static void
-nv04_gr_intr(struct nvkm_subdev *subdev)
+nv04_gr_intr(struct nvkm_gr *base)
 {
-	struct nv04_gr_priv *priv = (void *)subdev;
-	struct nv04_gr_chan *chan = NULL;
-	struct nvkm_namedb *namedb = NULL;
-	struct nvkm_handle *handle = NULL;
-	u32 stat = nv_rd32(priv, NV03_PGRAPH_INTR);
-	u32 nsource = nv_rd32(priv, NV03_PGRAPH_NSOURCE);
-	u32 nstatus = nv_rd32(priv, NV03_PGRAPH_NSTATUS);
-	u32 addr = nv_rd32(priv, NV04_PGRAPH_TRAPPED_ADDR);
+	struct nv04_gr *gr = nv04_gr(base);
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, NV03_PGRAPH_INTR);
+	u32 nsource = nvkm_rd32(device, NV03_PGRAPH_NSOURCE);
+	u32 nstatus = nvkm_rd32(device, NV03_PGRAPH_NSTATUS);
+	u32 addr = nvkm_rd32(device, NV04_PGRAPH_TRAPPED_ADDR);
 	u32 chid = (addr & 0x0f000000) >> 24;
 	u32 subc = (addr & 0x0000e000) >> 13;
 	u32 mthd = (addr & 0x00001ffc);
-	u32 data = nv_rd32(priv, NV04_PGRAPH_TRAPPED_DATA);
-	u32 class = nv_rd32(priv, 0x400180 + subc * 4) & 0xff;
-	u32 inst = (nv_rd32(priv, 0x40016c) & 0xffff) << 4;
+	u32 data = nvkm_rd32(device, NV04_PGRAPH_TRAPPED_DATA);
+	u32 class = nvkm_rd32(device, 0x400180 + subc * 4) & 0xff;
+	u32 inst = (nvkm_rd32(device, 0x40016c) & 0xffff) << 4;
 	u32 show = stat;
+	char msg[128], src[128], sta[128];
+	struct nv04_gr_chan *chan;
 	unsigned long flags;
 
-	spin_lock_irqsave(&priv->lock, flags);
-	chan = priv->chan[chid];
-	if (chan)
-		namedb = (void *)nv_pclass(nv_object(chan), NV_NAMEDB_CLASS);
-	spin_unlock_irqrestore(&priv->lock, flags);
+	spin_lock_irqsave(&gr->lock, flags);
+	chan = gr->chan[chid];
 
 	if (stat & NV_PGRAPH_INTR_NOTIFY) {
 		if (chan && (nsource & NV03_PGRAPH_NSOURCE_ILLEGAL_MTHD)) {
-			handle = nvkm_namedb_get_vinst(namedb, inst);
-			if (handle && !nv_call(handle->object, mthd, data))
+			if (!nv04_gr_mthd(device, inst, mthd, data))
 				show &= ~NV_PGRAPH_INTR_NOTIFY;
 		}
 	}
 
 	if (stat & NV_PGRAPH_INTR_CONTEXT_SWITCH) {
-		nv_wr32(priv, NV03_PGRAPH_INTR, NV_PGRAPH_INTR_CONTEXT_SWITCH);
+		nvkm_wr32(device, NV03_PGRAPH_INTR, NV_PGRAPH_INTR_CONTEXT_SWITCH);
 		stat &= ~NV_PGRAPH_INTR_CONTEXT_SWITCH;
 		show &= ~NV_PGRAPH_INTR_CONTEXT_SWITCH;
-		nv04_gr_context_switch(priv);
+		nv04_gr_context_switch(gr);
 	}
 
-	nv_wr32(priv, NV03_PGRAPH_INTR, stat);
-	nv_wr32(priv, NV04_PGRAPH_FIFO, 0x00000001);
+	nvkm_wr32(device, NV03_PGRAPH_INTR, stat);
+	nvkm_wr32(device, NV04_PGRAPH_FIFO, 0x00000001);
 
 	if (show) {
-		nv_error(priv, "%s", "");
-		nvkm_bitfield_print(nv04_gr_intr_name, show);
-		pr_cont(" nsource:");
-		nvkm_bitfield_print(nv04_gr_nsource, nsource);
-		pr_cont(" nstatus:");
-		nvkm_bitfield_print(nv04_gr_nstatus, nstatus);
-		pr_cont("\n");
-		nv_error(priv,
-			 "ch %d [%s] subc %d class 0x%04x mthd 0x%04x data 0x%08x\n",
-			 chid, nvkm_client_name(chan), subc, class, mthd,
-			 data);
+		nvkm_snprintbf(msg, sizeof(msg), nv04_gr_intr_name, show);
+		nvkm_snprintbf(src, sizeof(src), nv04_gr_nsource, nsource);
+		nvkm_snprintbf(sta, sizeof(sta), nv04_gr_nstatus, nstatus);
+		nvkm_error(subdev, "intr %08x [%s] nsource %08x [%s] "
+				   "nstatus %08x [%s] ch %d [%s] subc %d "
+				   "class %04x mthd %04x data %08x\n",
+			   show, msg, nsource, src, nstatus, sta, chid,
+			   chan ? chan->object.client->name : "unknown",
+			   subc, class, mthd, data);
 	}
 
-	nvkm_namedb_put(handle);
+	spin_unlock_irqrestore(&gr->lock, flags);
 }
 
 static int
-nv04_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-	     struct nvkm_oclass *oclass, void *data, u32 size,
-	     struct nvkm_object **pobject)
+nv04_gr_init(struct nvkm_gr *base)
 {
-	struct nv04_gr_priv *priv;
-	int ret;
-
-	ret = nvkm_gr_create(parent, engine, oclass, true, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	nv_subdev(priv)->unit = 0x00001000;
-	nv_subdev(priv)->intr = nv04_gr_intr;
-	nv_engine(priv)->cclass = &nv04_gr_cclass;
-	nv_engine(priv)->sclass = nv04_gr_sclass;
-	spin_lock_init(&priv->lock);
-	return 0;
-}
-
-static int
-nv04_gr_init(struct nvkm_object *object)
-{
-	struct nvkm_engine *engine = nv_engine(object);
-	struct nv04_gr_priv *priv = (void *)engine;
-	int ret;
-
-	ret = nvkm_gr_init(&priv->base);
-	if (ret)
-		return ret;
+	struct nv04_gr *gr = nv04_gr(base);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 
 	/* Enable PGRAPH interrupts */
-	nv_wr32(priv, NV03_PGRAPH_INTR, 0xFFFFFFFF);
-	nv_wr32(priv, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
-
-	nv_wr32(priv, NV04_PGRAPH_VALID1, 0);
-	nv_wr32(priv, NV04_PGRAPH_VALID2, 0);
-	/*nv_wr32(priv, NV04_PGRAPH_DEBUG_0, 0x000001FF);
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_0, 0x001FFFFF);*/
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_0, 0x1231c000);
+	nvkm_wr32(device, NV03_PGRAPH_INTR, 0xFFFFFFFF);
+	nvkm_wr32(device, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
+
+	nvkm_wr32(device, NV04_PGRAPH_VALID1, 0);
+	nvkm_wr32(device, NV04_PGRAPH_VALID2, 0);
+	/*nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0x000001FF);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0x001FFFFF);*/
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0x1231c000);
 	/*1231C000 blob, 001 haiku*/
 	/*V_WRITE(NV04_PGRAPH_DEBUG_1, 0xf2d91100);*/
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_1, 0x72111100);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_1, 0x72111100);
 	/*0x72111100 blob , 01 haiku*/
-	/*nv_wr32(priv, NV04_PGRAPH_DEBUG_2, 0x11d5f870);*/
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_2, 0x11d5f071);
+	/*nvkm_wr32(device, NV04_PGRAPH_DEBUG_2, 0x11d5f870);*/
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_2, 0x11d5f071);
 	/*haiku same*/
 
-	/*nv_wr32(priv, NV04_PGRAPH_DEBUG_3, 0xfad4ff31);*/
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_3, 0xf0d4ff31);
+	/*nvkm_wr32(device, NV04_PGRAPH_DEBUG_3, 0xfad4ff31);*/
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_3, 0xf0d4ff31);
 	/*haiku and blob 10d4*/
 
-	nv_wr32(priv, NV04_PGRAPH_STATE        , 0xFFFFFFFF);
-	nv_wr32(priv, NV04_PGRAPH_CTX_CONTROL  , 0x10000100);
-	nv_mask(priv, NV04_PGRAPH_CTX_USER, 0xff000000, 0x0f000000);
+	nvkm_wr32(device, NV04_PGRAPH_STATE        , 0xFFFFFFFF);
+	nvkm_wr32(device, NV04_PGRAPH_CTX_CONTROL  , 0x10000100);
+	nvkm_mask(device, NV04_PGRAPH_CTX_USER, 0xff000000, 0x0f000000);
 
 	/* These don't belong here, they're part of a per-channel context */
-	nv_wr32(priv, NV04_PGRAPH_PATTERN_SHAPE, 0x00000000);
-	nv_wr32(priv, NV04_PGRAPH_BETA_AND     , 0xFFFFFFFF);
+	nvkm_wr32(device, NV04_PGRAPH_PATTERN_SHAPE, 0x00000000);
+	nvkm_wr32(device, NV04_PGRAPH_BETA_AND     , 0xFFFFFFFF);
 	return 0;
 }
 
-struct nvkm_oclass
-nv04_gr_oclass = {
-	.handle = NV_ENGINE(GR, 0x04),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv04_gr_ctor,
-		.dtor = _nvkm_gr_dtor,
-		.init = nv04_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
+static const struct nvkm_gr_func
+nv04_gr = {
+	.init = nv04_gr_init,
+	.intr = nv04_gr_intr,
+	.chan_new = nv04_gr_chan_new,
+	.sclass = {
+		{ -1, -1, 0x0012, &nv04_gr_object }, /* beta1 */
+		{ -1, -1, 0x0017, &nv04_gr_object }, /* chroma */
+		{ -1, -1, 0x0018, &nv04_gr_object }, /* pattern (nv01) */
+		{ -1, -1, 0x0019, &nv04_gr_object }, /* clip */
+		{ -1, -1, 0x001c, &nv04_gr_object }, /* line */
+		{ -1, -1, 0x001d, &nv04_gr_object }, /* tri */
+		{ -1, -1, 0x001e, &nv04_gr_object }, /* rect */
+		{ -1, -1, 0x001f, &nv04_gr_object },
+		{ -1, -1, 0x0021, &nv04_gr_object },
+		{ -1, -1, 0x0030, &nv04_gr_object }, /* null */
+		{ -1, -1, 0x0036, &nv04_gr_object },
+		{ -1, -1, 0x0037, &nv04_gr_object },
+		{ -1, -1, 0x0038, &nv04_gr_object }, /* dvd subpicture */
+		{ -1, -1, 0x0039, &nv04_gr_object }, /* m2mf */
+		{ -1, -1, 0x0042, &nv04_gr_object }, /* surf2d */
+		{ -1, -1, 0x0043, &nv04_gr_object }, /* rop */
+		{ -1, -1, 0x0044, &nv04_gr_object }, /* pattern */
+		{ -1, -1, 0x0048, &nv04_gr_object },
+		{ -1, -1, 0x004a, &nv04_gr_object },
+		{ -1, -1, 0x004b, &nv04_gr_object },
+		{ -1, -1, 0x0052, &nv04_gr_object }, /* swzsurf */
+		{ -1, -1, 0x0053, &nv04_gr_object },
+		{ -1, -1, 0x0054, &nv04_gr_object }, /* ttri */
+		{ -1, -1, 0x0055, &nv04_gr_object }, /* mtri */
+		{ -1, -1, 0x0057, &nv04_gr_object }, /* chroma */
+		{ -1, -1, 0x0058, &nv04_gr_object }, /* surf_dst */
+		{ -1, -1, 0x0059, &nv04_gr_object }, /* surf_src */
+		{ -1, -1, 0x005a, &nv04_gr_object }, /* surf_color */
+		{ -1, -1, 0x005b, &nv04_gr_object }, /* surf_zeta */
+		{ -1, -1, 0x005c, &nv04_gr_object }, /* line */
+		{ -1, -1, 0x005d, &nv04_gr_object }, /* tri */
+		{ -1, -1, 0x005e, &nv04_gr_object }, /* rect */
+		{ -1, -1, 0x005f, &nv04_gr_object },
+		{ -1, -1, 0x0060, &nv04_gr_object },
+		{ -1, -1, 0x0061, &nv04_gr_object },
+		{ -1, -1, 0x0064, &nv04_gr_object }, /* iifc (nv05) */
+		{ -1, -1, 0x0065, &nv04_gr_object }, /* ifc (nv05) */
+		{ -1, -1, 0x0066, &nv04_gr_object }, /* sifc (nv05) */
+		{ -1, -1, 0x0072, &nv04_gr_object }, /* beta4 */
+		{ -1, -1, 0x0076, &nv04_gr_object },
+		{ -1, -1, 0x0077, &nv04_gr_object },
+		{}
+	}
 };
+
+int
+nv04_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	struct nv04_gr *gr;
+
+	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
+		return -ENOMEM;
+	spin_lock_init(&gr->lock);
+	*pgr = &gr->base;
+
+	return nvkm_gr_ctor(&nv04_gr, device, index, 0x00001000,
+			    true, &gr->base);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c
index 389904eb603f..4542867fa9e6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c
@@ -21,13 +21,13 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-#include <engine/gr.h>
+#include "nv10.h"
 #include "regs.h"
 
 #include <core/client.h>
-#include <core/device.h>
-#include <core/handle.h>
+#include <core/gpuobj.h>
 #include <engine/fifo.h>
+#include <engine/fifo/chan.h>
 #include <subdev/fb.h>
 
 struct pipe_state {
@@ -386,14 +386,19 @@ static int nv17_gr_ctx_regs[] = {
 	0x00400a04,
 };
 
-struct nv10_gr_priv {
+#define nv10_gr(p) container_of((p), struct nv10_gr, base)
+
+struct nv10_gr {
 	struct nvkm_gr base;
 	struct nv10_gr_chan *chan[32];
 	spinlock_t lock;
 };
 
+#define nv10_gr_chan(p) container_of((p), struct nv10_gr_chan, object)
+
 struct nv10_gr_chan {
-	struct nvkm_object base;
+	struct nvkm_object object;
+	struct nv10_gr *gr;
 	int chid;
 	int nv10[ARRAY_SIZE(nv10_gr_ctx_regs)];
 	int nv17[ARRAY_SIZE(nv17_gr_ctx_regs)];
@@ -402,214 +407,151 @@ struct nv10_gr_chan {
 };
 
 
-static inline struct nv10_gr_priv *
-nv10_gr_priv(struct nv10_gr_chan *chan)
-{
-	return (void *)nv_object(chan)->engine;
-}
-
 /*******************************************************************************
  * Graphics object classes
  ******************************************************************************/
 
-#define PIPE_SAVE(priv, state, addr)					\
+#define PIPE_SAVE(gr, state, addr)					\
 	do {								\
 		int __i;						\
-		nv_wr32(priv, NV10_PGRAPH_PIPE_ADDRESS, addr);		\
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, addr);		\
 		for (__i = 0; __i < ARRAY_SIZE(state); __i++)		\
-			state[__i] = nv_rd32(priv, NV10_PGRAPH_PIPE_DATA); \
+			state[__i] = nvkm_rd32(device, NV10_PGRAPH_PIPE_DATA); \
 	} while (0)
 
-#define PIPE_RESTORE(priv, state, addr)					\
+#define PIPE_RESTORE(gr, state, addr)					\
 	do {								\
 		int __i;						\
-		nv_wr32(priv, NV10_PGRAPH_PIPE_ADDRESS, addr);		\
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, addr);		\
 		for (__i = 0; __i < ARRAY_SIZE(state); __i++)		\
-			nv_wr32(priv, NV10_PGRAPH_PIPE_DATA, state[__i]); \
+			nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, state[__i]); \
 	} while (0)
 
-static struct nvkm_oclass
-nv10_gr_sclass[] = {
-	{ 0x0012, &nv04_gr_ofuncs }, /* beta1 */
-	{ 0x0019, &nv04_gr_ofuncs }, /* clip */
-	{ 0x0030, &nv04_gr_ofuncs }, /* null */
-	{ 0x0039, &nv04_gr_ofuncs }, /* m2mf */
-	{ 0x0043, &nv04_gr_ofuncs }, /* rop */
-	{ 0x0044, &nv04_gr_ofuncs }, /* pattern */
-	{ 0x004a, &nv04_gr_ofuncs }, /* gdi */
-	{ 0x0052, &nv04_gr_ofuncs }, /* swzsurf */
-	{ 0x005f, &nv04_gr_ofuncs }, /* blit */
-	{ 0x0062, &nv04_gr_ofuncs }, /* surf2d */
-	{ 0x0072, &nv04_gr_ofuncs }, /* beta4 */
-	{ 0x0089, &nv04_gr_ofuncs }, /* sifm */
-	{ 0x008a, &nv04_gr_ofuncs }, /* ifc */
-	{ 0x009f, &nv04_gr_ofuncs }, /* blit */
-	{ 0x0093, &nv04_gr_ofuncs }, /* surf3d */
-	{ 0x0094, &nv04_gr_ofuncs }, /* ttri */
-	{ 0x0095, &nv04_gr_ofuncs }, /* mtri */
-	{ 0x0056, &nv04_gr_ofuncs }, /* celcius */
-	{},
-};
-
-static struct nvkm_oclass
-nv15_gr_sclass[] = {
-	{ 0x0012, &nv04_gr_ofuncs }, /* beta1 */
-	{ 0x0019, &nv04_gr_ofuncs }, /* clip */
-	{ 0x0030, &nv04_gr_ofuncs }, /* null */
-	{ 0x0039, &nv04_gr_ofuncs }, /* m2mf */
-	{ 0x0043, &nv04_gr_ofuncs }, /* rop */
-	{ 0x0044, &nv04_gr_ofuncs }, /* pattern */
-	{ 0x004a, &nv04_gr_ofuncs }, /* gdi */
-	{ 0x0052, &nv04_gr_ofuncs }, /* swzsurf */
-	{ 0x005f, &nv04_gr_ofuncs }, /* blit */
-	{ 0x0062, &nv04_gr_ofuncs }, /* surf2d */
-	{ 0x0072, &nv04_gr_ofuncs }, /* beta4 */
-	{ 0x0089, &nv04_gr_ofuncs }, /* sifm */
-	{ 0x008a, &nv04_gr_ofuncs }, /* ifc */
-	{ 0x009f, &nv04_gr_ofuncs }, /* blit */
-	{ 0x0093, &nv04_gr_ofuncs }, /* surf3d */
-	{ 0x0094, &nv04_gr_ofuncs }, /* ttri */
-	{ 0x0095, &nv04_gr_ofuncs }, /* mtri */
-	{ 0x0096, &nv04_gr_ofuncs }, /* celcius */
-	{},
-};
-
-static int
-nv17_gr_mthd_lma_window(struct nvkm_object *object, u32 mthd,
-			void *args, u32 size)
+static void
+nv17_gr_mthd_lma_window(struct nv10_gr_chan *chan, u32 mthd, u32 data)
 {
-	struct nv10_gr_chan *chan = (void *)object->parent;
-	struct nv10_gr_priv *priv = nv10_gr_priv(chan);
+	struct nvkm_device *device = chan->object.engine->subdev.device;
+	struct nvkm_gr *gr = &chan->gr->base;
 	struct pipe_state *pipe = &chan->pipe_state;
 	u32 pipe_0x0040[1], pipe_0x64c0[8], pipe_0x6a80[3], pipe_0x6ab0[3];
 	u32 xfmode0, xfmode1;
-	u32 data = *(u32 *)args;
 	int i;
 
 	chan->lma_window[(mthd - 0x1638) / 4] = data;
 
 	if (mthd != 0x1644)
-		return 0;
+		return;
 
-	nv04_gr_idle(priv);
+	nv04_gr_idle(gr);
 
-	PIPE_SAVE(priv, pipe_0x0040, 0x0040);
-	PIPE_SAVE(priv, pipe->pipe_0x0200, 0x0200);
+	PIPE_SAVE(device, pipe_0x0040, 0x0040);
+	PIPE_SAVE(device, pipe->pipe_0x0200, 0x0200);
 
-	PIPE_RESTORE(priv, chan->lma_window, 0x6790);
+	PIPE_RESTORE(device, chan->lma_window, 0x6790);
 
-	nv04_gr_idle(priv);
+	nv04_gr_idle(gr);
 
-	xfmode0 = nv_rd32(priv, NV10_PGRAPH_XFMODE0);
-	xfmode1 = nv_rd32(priv, NV10_PGRAPH_XFMODE1);
+	xfmode0 = nvkm_rd32(device, NV10_PGRAPH_XFMODE0);
+	xfmode1 = nvkm_rd32(device, NV10_PGRAPH_XFMODE1);
 
-	PIPE_SAVE(priv, pipe->pipe_0x4400, 0x4400);
-	PIPE_SAVE(priv, pipe_0x64c0, 0x64c0);
-	PIPE_SAVE(priv, pipe_0x6ab0, 0x6ab0);
-	PIPE_SAVE(priv, pipe_0x6a80, 0x6a80);
+	PIPE_SAVE(device, pipe->pipe_0x4400, 0x4400);
+	PIPE_SAVE(device, pipe_0x64c0, 0x64c0);
+	PIPE_SAVE(device, pipe_0x6ab0, 0x6ab0);
+	PIPE_SAVE(device, pipe_0x6a80, 0x6a80);
 
-	nv04_gr_idle(priv);
+	nv04_gr_idle(gr);
 
-	nv_wr32(priv, NV10_PGRAPH_XFMODE0, 0x10000000);
-	nv_wr32(priv, NV10_PGRAPH_XFMODE1, 0x00000000);
-	nv_wr32(priv, NV10_PGRAPH_PIPE_ADDRESS, 0x000064c0);
+	nvkm_wr32(device, NV10_PGRAPH_XFMODE0, 0x10000000);
+	nvkm_wr32(device, NV10_PGRAPH_XFMODE1, 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x000064c0);
 	for (i = 0; i < 4; i++)
-		nv_wr32(priv, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
 	for (i = 0; i < 4; i++)
-		nv_wr32(priv, NV10_PGRAPH_PIPE_DATA, 0x00000000);
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x00000000);
 
-	nv_wr32(priv, NV10_PGRAPH_PIPE_ADDRESS, 0x00006ab0);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x00006ab0);
 	for (i = 0; i < 3; i++)
-		nv_wr32(priv, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
 
-	nv_wr32(priv, NV10_PGRAPH_PIPE_ADDRESS, 0x00006a80);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x00006a80);
 	for (i = 0; i < 3; i++)
-		nv_wr32(priv, NV10_PGRAPH_PIPE_DATA, 0x00000000);
-
-	nv_wr32(priv, NV10_PGRAPH_PIPE_ADDRESS, 0x00000040);
-	nv_wr32(priv, NV10_PGRAPH_PIPE_DATA, 0x00000008);
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x00000000);
 
-	PIPE_RESTORE(priv, pipe->pipe_0x0200, 0x0200);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x00000040);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x00000008);
 
-	nv04_gr_idle(priv);
+	PIPE_RESTORE(device, pipe->pipe_0x0200, 0x0200);
 
-	PIPE_RESTORE(priv, pipe_0x0040, 0x0040);
+	nv04_gr_idle(gr);
 
-	nv_wr32(priv, NV10_PGRAPH_XFMODE0, xfmode0);
-	nv_wr32(priv, NV10_PGRAPH_XFMODE1, xfmode1);
+	PIPE_RESTORE(device, pipe_0x0040, 0x0040);
 
-	PIPE_RESTORE(priv, pipe_0x64c0, 0x64c0);
-	PIPE_RESTORE(priv, pipe_0x6ab0, 0x6ab0);
-	PIPE_RESTORE(priv, pipe_0x6a80, 0x6a80);
-	PIPE_RESTORE(priv, pipe->pipe_0x4400, 0x4400);
+	nvkm_wr32(device, NV10_PGRAPH_XFMODE0, xfmode0);
+	nvkm_wr32(device, NV10_PGRAPH_XFMODE1, xfmode1);
 
-	nv_wr32(priv, NV10_PGRAPH_PIPE_ADDRESS, 0x000000c0);
-	nv_wr32(priv, NV10_PGRAPH_PIPE_DATA, 0x00000000);
+	PIPE_RESTORE(device, pipe_0x64c0, 0x64c0);
+	PIPE_RESTORE(device, pipe_0x6ab0, 0x6ab0);
+	PIPE_RESTORE(device, pipe_0x6a80, 0x6a80);
+	PIPE_RESTORE(device, pipe->pipe_0x4400, 0x4400);
 
-	nv04_gr_idle(priv);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x000000c0);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x00000000);
 
-	return 0;
+	nv04_gr_idle(gr);
 }
 
-static int
-nv17_gr_mthd_lma_enable(struct nvkm_object *object, u32 mthd,
-			void *args, u32 size)
+static void
+nv17_gr_mthd_lma_enable(struct nv10_gr_chan *chan, u32 mthd, u32 data)
 {
-	struct nv10_gr_chan *chan = (void *)object->parent;
-	struct nv10_gr_priv *priv = nv10_gr_priv(chan);
+	struct nvkm_device *device = chan->object.engine->subdev.device;
+	struct nvkm_gr *gr = &chan->gr->base;
 
-	nv04_gr_idle(priv);
+	nv04_gr_idle(gr);
 
-	nv_mask(priv, NV10_PGRAPH_DEBUG_4, 0x00000100, 0x00000100);
-	nv_mask(priv, 0x4006b0, 0x08000000, 0x08000000);
-	return 0;
+	nvkm_mask(device, NV10_PGRAPH_DEBUG_4, 0x00000100, 0x00000100);
+	nvkm_mask(device, 0x4006b0, 0x08000000, 0x08000000);
 }
 
-static struct nvkm_omthds
-nv17_celcius_omthds[] = {
-	{ 0x1638, 0x1638, nv17_gr_mthd_lma_window },
-	{ 0x163c, 0x163c, nv17_gr_mthd_lma_window },
-	{ 0x1640, 0x1640, nv17_gr_mthd_lma_window },
-	{ 0x1644, 0x1644, nv17_gr_mthd_lma_window },
-	{ 0x1658, 0x1658, nv17_gr_mthd_lma_enable },
-	{}
-};
+static bool
+nv17_gr_mthd_celcius(struct nv10_gr_chan *chan, u32 mthd, u32 data)
+{
+	void (*func)(struct nv10_gr_chan *, u32, u32);
+	switch (mthd) {
+	case 0x1638 ... 0x1644:
+		     func = nv17_gr_mthd_lma_window; break;
+	case 0x1658: func = nv17_gr_mthd_lma_enable; break;
+	default:
+		return false;
+	}
+	func(chan, mthd, data);
+	return true;
+}
 
-static struct nvkm_oclass
-nv17_gr_sclass[] = {
-	{ 0x0012, &nv04_gr_ofuncs }, /* beta1 */
-	{ 0x0019, &nv04_gr_ofuncs }, /* clip */
-	{ 0x0030, &nv04_gr_ofuncs }, /* null */
-	{ 0x0039, &nv04_gr_ofuncs }, /* m2mf */
-	{ 0x0043, &nv04_gr_ofuncs }, /* rop */
-	{ 0x0044, &nv04_gr_ofuncs }, /* pattern */
-	{ 0x004a, &nv04_gr_ofuncs }, /* gdi */
-	{ 0x0052, &nv04_gr_ofuncs }, /* swzsurf */
-	{ 0x005f, &nv04_gr_ofuncs }, /* blit */
-	{ 0x0062, &nv04_gr_ofuncs }, /* surf2d */
-	{ 0x0072, &nv04_gr_ofuncs }, /* beta4 */
-	{ 0x0089, &nv04_gr_ofuncs }, /* sifm */
-	{ 0x008a, &nv04_gr_ofuncs }, /* ifc */
-	{ 0x009f, &nv04_gr_ofuncs }, /* blit */
-	{ 0x0093, &nv04_gr_ofuncs }, /* surf3d */
-	{ 0x0094, &nv04_gr_ofuncs }, /* ttri */
-	{ 0x0095, &nv04_gr_ofuncs }, /* mtri */
-	{ 0x0099, &nv04_gr_ofuncs, nv17_celcius_omthds },
-	{},
-};
+static bool
+nv10_gr_mthd(struct nv10_gr_chan *chan, u8 class, u32 mthd, u32 data)
+{
+	bool (*func)(struct nv10_gr_chan *, u32, u32);
+	switch (class) {
+	case 0x99: func = nv17_gr_mthd_celcius; break;
+	default:
+		return false;
+	}
+	return func(chan, mthd, data);
+}
 
 /*******************************************************************************
  * PGRAPH context
  ******************************************************************************/
 
 static struct nv10_gr_chan *
-nv10_gr_channel(struct nv10_gr_priv *priv)
+nv10_gr_channel(struct nv10_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct nv10_gr_chan *chan = NULL;
-	if (nv_rd32(priv, 0x400144) & 0x00010000) {
-		int chid = nv_rd32(priv, 0x400148) >> 24;
-		if (chid < ARRAY_SIZE(priv->chan))
-			chan = priv->chan[chid];
+	if (nvkm_rd32(device, 0x400144) & 0x00010000) {
+		int chid = nvkm_rd32(device, 0x400148) >> 24;
+		if (chid < ARRAY_SIZE(gr->chan))
+			chan = gr->chan[chid];
 	}
 	return chan;
 }
@@ -617,75 +559,78 @@ nv10_gr_channel(struct nv10_gr_priv *priv)
 static void
 nv10_gr_save_pipe(struct nv10_gr_chan *chan)
 {
-	struct nv10_gr_priv *priv = nv10_gr_priv(chan);
+	struct nv10_gr *gr = chan->gr;
 	struct pipe_state *pipe = &chan->pipe_state;
-
-	PIPE_SAVE(priv, pipe->pipe_0x4400, 0x4400);
-	PIPE_SAVE(priv, pipe->pipe_0x0200, 0x0200);
-	PIPE_SAVE(priv, pipe->pipe_0x6400, 0x6400);
-	PIPE_SAVE(priv, pipe->pipe_0x6800, 0x6800);
-	PIPE_SAVE(priv, pipe->pipe_0x6c00, 0x6c00);
-	PIPE_SAVE(priv, pipe->pipe_0x7000, 0x7000);
-	PIPE_SAVE(priv, pipe->pipe_0x7400, 0x7400);
-	PIPE_SAVE(priv, pipe->pipe_0x7800, 0x7800);
-	PIPE_SAVE(priv, pipe->pipe_0x0040, 0x0040);
-	PIPE_SAVE(priv, pipe->pipe_0x0000, 0x0000);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	PIPE_SAVE(gr, pipe->pipe_0x4400, 0x4400);
+	PIPE_SAVE(gr, pipe->pipe_0x0200, 0x0200);
+	PIPE_SAVE(gr, pipe->pipe_0x6400, 0x6400);
+	PIPE_SAVE(gr, pipe->pipe_0x6800, 0x6800);
+	PIPE_SAVE(gr, pipe->pipe_0x6c00, 0x6c00);
+	PIPE_SAVE(gr, pipe->pipe_0x7000, 0x7000);
+	PIPE_SAVE(gr, pipe->pipe_0x7400, 0x7400);
+	PIPE_SAVE(gr, pipe->pipe_0x7800, 0x7800);
+	PIPE_SAVE(gr, pipe->pipe_0x0040, 0x0040);
+	PIPE_SAVE(gr, pipe->pipe_0x0000, 0x0000);
 }
 
 static void
 nv10_gr_load_pipe(struct nv10_gr_chan *chan)
 {
-	struct nv10_gr_priv *priv = nv10_gr_priv(chan);
+	struct nv10_gr *gr = chan->gr;
 	struct pipe_state *pipe = &chan->pipe_state;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 xfmode0, xfmode1;
 	int i;
 
-	nv04_gr_idle(priv);
+	nv04_gr_idle(&gr->base);
 	/* XXX check haiku comments */
-	xfmode0 = nv_rd32(priv, NV10_PGRAPH_XFMODE0);
-	xfmode1 = nv_rd32(priv, NV10_PGRAPH_XFMODE1);
-	nv_wr32(priv, NV10_PGRAPH_XFMODE0, 0x10000000);
-	nv_wr32(priv, NV10_PGRAPH_XFMODE1, 0x00000000);
-	nv_wr32(priv, NV10_PGRAPH_PIPE_ADDRESS, 0x000064c0);
+	xfmode0 = nvkm_rd32(device, NV10_PGRAPH_XFMODE0);
+	xfmode1 = nvkm_rd32(device, NV10_PGRAPH_XFMODE1);
+	nvkm_wr32(device, NV10_PGRAPH_XFMODE0, 0x10000000);
+	nvkm_wr32(device, NV10_PGRAPH_XFMODE1, 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x000064c0);
 	for (i = 0; i < 4; i++)
-		nv_wr32(priv, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
 	for (i = 0; i < 4; i++)
-		nv_wr32(priv, NV10_PGRAPH_PIPE_DATA, 0x00000000);
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x00000000);
 
-	nv_wr32(priv, NV10_PGRAPH_PIPE_ADDRESS, 0x00006ab0);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x00006ab0);
 	for (i = 0; i < 3; i++)
-		nv_wr32(priv, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x3f800000);
 
-	nv_wr32(priv, NV10_PGRAPH_PIPE_ADDRESS, 0x00006a80);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x00006a80);
 	for (i = 0; i < 3; i++)
-		nv_wr32(priv, NV10_PGRAPH_PIPE_DATA, 0x00000000);
+		nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x00000000);
 
-	nv_wr32(priv, NV10_PGRAPH_PIPE_ADDRESS, 0x00000040);
-	nv_wr32(priv, NV10_PGRAPH_PIPE_DATA, 0x00000008);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_ADDRESS, 0x00000040);
+	nvkm_wr32(device, NV10_PGRAPH_PIPE_DATA, 0x00000008);
 
 
-	PIPE_RESTORE(priv, pipe->pipe_0x0200, 0x0200);
-	nv04_gr_idle(priv);
+	PIPE_RESTORE(gr, pipe->pipe_0x0200, 0x0200);
+	nv04_gr_idle(&gr->base);
 
 	/* restore XFMODE */
-	nv_wr32(priv, NV10_PGRAPH_XFMODE0, xfmode0);
-	nv_wr32(priv, NV10_PGRAPH_XFMODE1, xfmode1);
-	PIPE_RESTORE(priv, pipe->pipe_0x6400, 0x6400);
-	PIPE_RESTORE(priv, pipe->pipe_0x6800, 0x6800);
-	PIPE_RESTORE(priv, pipe->pipe_0x6c00, 0x6c00);
-	PIPE_RESTORE(priv, pipe->pipe_0x7000, 0x7000);
-	PIPE_RESTORE(priv, pipe->pipe_0x7400, 0x7400);
-	PIPE_RESTORE(priv, pipe->pipe_0x7800, 0x7800);
-	PIPE_RESTORE(priv, pipe->pipe_0x4400, 0x4400);
-	PIPE_RESTORE(priv, pipe->pipe_0x0000, 0x0000);
-	PIPE_RESTORE(priv, pipe->pipe_0x0040, 0x0040);
-	nv04_gr_idle(priv);
+	nvkm_wr32(device, NV10_PGRAPH_XFMODE0, xfmode0);
+	nvkm_wr32(device, NV10_PGRAPH_XFMODE1, xfmode1);
+	PIPE_RESTORE(gr, pipe->pipe_0x6400, 0x6400);
+	PIPE_RESTORE(gr, pipe->pipe_0x6800, 0x6800);
+	PIPE_RESTORE(gr, pipe->pipe_0x6c00, 0x6c00);
+	PIPE_RESTORE(gr, pipe->pipe_0x7000, 0x7000);
+	PIPE_RESTORE(gr, pipe->pipe_0x7400, 0x7400);
+	PIPE_RESTORE(gr, pipe->pipe_0x7800, 0x7800);
+	PIPE_RESTORE(gr, pipe->pipe_0x4400, 0x4400);
+	PIPE_RESTORE(gr, pipe->pipe_0x0000, 0x0000);
+	PIPE_RESTORE(gr, pipe->pipe_0x0040, 0x0040);
+	nv04_gr_idle(&gr->base);
 }
 
 static void
 nv10_gr_create_pipe(struct nv10_gr_chan *chan)
 {
-	struct nv10_gr_priv *priv = nv10_gr_priv(chan);
+	struct nv10_gr *gr = chan->gr;
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct pipe_state *pipe_state = &chan->pipe_state;
 	u32 *pipe_state_addr;
 	int i;
@@ -698,7 +643,7 @@ nv10_gr_create_pipe(struct nv10_gr_chan *chan)
 		u32 *__end_addr = pipe_state->pipe_##addr + \
 				ARRAY_SIZE(pipe_state->pipe_##addr); \
 		if (pipe_state_addr != __end_addr) \
-			nv_error(priv, "incomplete pipe init for 0x%x :  %p/%p\n", \
+			nvkm_error(subdev, "incomplete pipe init for 0x%x :  %p/%p\n", \
 				addr, pipe_state_addr, __end_addr); \
 	} while (0)
 #define NV_WRITE_PIPE_INIT(value) *(pipe_state_addr++) = value
@@ -838,33 +783,36 @@ nv10_gr_create_pipe(struct nv10_gr_chan *chan)
 }
 
 static int
-nv10_gr_ctx_regs_find_offset(struct nv10_gr_priv *priv, int reg)
+nv10_gr_ctx_regs_find_offset(struct nv10_gr *gr, int reg)
 {
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	int i;
 	for (i = 0; i < ARRAY_SIZE(nv10_gr_ctx_regs); i++) {
 		if (nv10_gr_ctx_regs[i] == reg)
 			return i;
 	}
-	nv_error(priv, "unknow offset nv10_ctx_regs %d\n", reg);
+	nvkm_error(subdev, "unknown offset nv10_ctx_regs %d\n", reg);
 	return -1;
 }
 
 static int
-nv17_gr_ctx_regs_find_offset(struct nv10_gr_priv *priv, int reg)
+nv17_gr_ctx_regs_find_offset(struct nv10_gr *gr, int reg)
 {
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	int i;
 	for (i = 0; i < ARRAY_SIZE(nv17_gr_ctx_regs); i++) {
 		if (nv17_gr_ctx_regs[i] == reg)
 			return i;
 	}
-	nv_error(priv, "unknow offset nv17_ctx_regs %d\n", reg);
+	nvkm_error(subdev, "unknown offset nv17_ctx_regs %d\n", reg);
 	return -1;
 }
 
 static void
 nv10_gr_load_dma_vtxbuf(struct nv10_gr_chan *chan, int chid, u32 inst)
 {
-	struct nv10_gr_priv *priv = nv10_gr_priv(chan);
+	struct nv10_gr *gr = chan->gr;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 st2, st2_dl, st2_dh, fifo_ptr, fifo[0x60/4];
 	u32 ctx_user, ctx_switch[5];
 	int i, subchan = -1;
@@ -876,7 +824,7 @@ nv10_gr_load_dma_vtxbuf(struct nv10_gr_chan *chan, int chid, u32 inst)
 
 	/* Look for a celsius object */
 	for (i = 0; i < 8; i++) {
-		int class = nv_rd32(priv, NV10_PGRAPH_CTX_CACHE(i, 0)) & 0xfff;
+		int class = nvkm_rd32(device, NV10_PGRAPH_CTX_CACHE(i, 0)) & 0xfff;
 
 		if (class == 0x56 || class == 0x96 || class == 0x99) {
 			subchan = i;
@@ -888,159 +836,183 @@ nv10_gr_load_dma_vtxbuf(struct nv10_gr_chan *chan, int chid, u32 inst)
 		return;
 
 	/* Save the current ctx object */
-	ctx_user = nv_rd32(priv, NV10_PGRAPH_CTX_USER);
+	ctx_user = nvkm_rd32(device, NV10_PGRAPH_CTX_USER);
 	for (i = 0; i < 5; i++)
-		ctx_switch[i] = nv_rd32(priv, NV10_PGRAPH_CTX_SWITCH(i));
+		ctx_switch[i] = nvkm_rd32(device, NV10_PGRAPH_CTX_SWITCH(i));
 
 	/* Save the FIFO state */
-	st2 = nv_rd32(priv, NV10_PGRAPH_FFINTFC_ST2);
-	st2_dl = nv_rd32(priv, NV10_PGRAPH_FFINTFC_ST2_DL);
-	st2_dh = nv_rd32(priv, NV10_PGRAPH_FFINTFC_ST2_DH);
-	fifo_ptr = nv_rd32(priv, NV10_PGRAPH_FFINTFC_FIFO_PTR);
+	st2 = nvkm_rd32(device, NV10_PGRAPH_FFINTFC_ST2);
+	st2_dl = nvkm_rd32(device, NV10_PGRAPH_FFINTFC_ST2_DL);
+	st2_dh = nvkm_rd32(device, NV10_PGRAPH_FFINTFC_ST2_DH);
+	fifo_ptr = nvkm_rd32(device, NV10_PGRAPH_FFINTFC_FIFO_PTR);
 
 	for (i = 0; i < ARRAY_SIZE(fifo); i++)
-		fifo[i] = nv_rd32(priv, 0x4007a0 + 4 * i);
+		fifo[i] = nvkm_rd32(device, 0x4007a0 + 4 * i);
 
 	/* Switch to the celsius subchannel */
 	for (i = 0; i < 5; i++)
-		nv_wr32(priv, NV10_PGRAPH_CTX_SWITCH(i),
-			nv_rd32(priv, NV10_PGRAPH_CTX_CACHE(subchan, i)));
-	nv_mask(priv, NV10_PGRAPH_CTX_USER, 0xe000, subchan << 13);
+		nvkm_wr32(device, NV10_PGRAPH_CTX_SWITCH(i),
+			nvkm_rd32(device, NV10_PGRAPH_CTX_CACHE(subchan, i)));
+	nvkm_mask(device, NV10_PGRAPH_CTX_USER, 0xe000, subchan << 13);
 
 	/* Inject NV10TCL_DMA_VTXBUF */
-	nv_wr32(priv, NV10_PGRAPH_FFINTFC_FIFO_PTR, 0);
-	nv_wr32(priv, NV10_PGRAPH_FFINTFC_ST2,
+	nvkm_wr32(device, NV10_PGRAPH_FFINTFC_FIFO_PTR, 0);
+	nvkm_wr32(device, NV10_PGRAPH_FFINTFC_ST2,
 		0x2c000000 | chid << 20 | subchan << 16 | 0x18c);
-	nv_wr32(priv, NV10_PGRAPH_FFINTFC_ST2_DL, inst);
-	nv_mask(priv, NV10_PGRAPH_CTX_CONTROL, 0, 0x10000);
-	nv_mask(priv, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001);
-	nv_mask(priv, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_FFINTFC_ST2_DL, inst);
+	nvkm_mask(device, NV10_PGRAPH_CTX_CONTROL, 0, 0x10000);
+	nvkm_mask(device, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001);
+	nvkm_mask(device, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000);
 
 	/* Restore the FIFO state */
 	for (i = 0; i < ARRAY_SIZE(fifo); i++)
-		nv_wr32(priv, 0x4007a0 + 4 * i, fifo[i]);
+		nvkm_wr32(device, 0x4007a0 + 4 * i, fifo[i]);
 
-	nv_wr32(priv, NV10_PGRAPH_FFINTFC_FIFO_PTR, fifo_ptr);
-	nv_wr32(priv, NV10_PGRAPH_FFINTFC_ST2, st2);
-	nv_wr32(priv, NV10_PGRAPH_FFINTFC_ST2_DL, st2_dl);
-	nv_wr32(priv, NV10_PGRAPH_FFINTFC_ST2_DH, st2_dh);
+	nvkm_wr32(device, NV10_PGRAPH_FFINTFC_FIFO_PTR, fifo_ptr);
+	nvkm_wr32(device, NV10_PGRAPH_FFINTFC_ST2, st2);
+	nvkm_wr32(device, NV10_PGRAPH_FFINTFC_ST2_DL, st2_dl);
+	nvkm_wr32(device, NV10_PGRAPH_FFINTFC_ST2_DH, st2_dh);
 
 	/* Restore the current ctx object */
 	for (i = 0; i < 5; i++)
-		nv_wr32(priv, NV10_PGRAPH_CTX_SWITCH(i), ctx_switch[i]);
-	nv_wr32(priv, NV10_PGRAPH_CTX_USER, ctx_user);
+		nvkm_wr32(device, NV10_PGRAPH_CTX_SWITCH(i), ctx_switch[i]);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_USER, ctx_user);
 }
 
 static int
 nv10_gr_load_context(struct nv10_gr_chan *chan, int chid)
 {
-	struct nv10_gr_priv *priv = nv10_gr_priv(chan);
+	struct nv10_gr *gr = chan->gr;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 inst;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(nv10_gr_ctx_regs); i++)
-		nv_wr32(priv, nv10_gr_ctx_regs[i], chan->nv10[i]);
+		nvkm_wr32(device, nv10_gr_ctx_regs[i], chan->nv10[i]);
 
-	if (nv_device(priv)->card_type >= NV_11 &&
-	    nv_device(priv)->chipset >= 0x17) {
+	if (device->card_type >= NV_11 && device->chipset >= 0x17) {
 		for (i = 0; i < ARRAY_SIZE(nv17_gr_ctx_regs); i++)
-			nv_wr32(priv, nv17_gr_ctx_regs[i], chan->nv17[i]);
+			nvkm_wr32(device, nv17_gr_ctx_regs[i], chan->nv17[i]);
 	}
 
 	nv10_gr_load_pipe(chan);
 
-	inst = nv_rd32(priv, NV10_PGRAPH_GLOBALSTATE1) & 0xffff;
+	inst = nvkm_rd32(device, NV10_PGRAPH_GLOBALSTATE1) & 0xffff;
 	nv10_gr_load_dma_vtxbuf(chan, chid, inst);
 
-	nv_wr32(priv, NV10_PGRAPH_CTX_CONTROL, 0x10010100);
-	nv_mask(priv, NV10_PGRAPH_CTX_USER, 0xff000000, chid << 24);
-	nv_mask(priv, NV10_PGRAPH_FFINTFC_ST2, 0x30000000, 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_CONTROL, 0x10010100);
+	nvkm_mask(device, NV10_PGRAPH_CTX_USER, 0xff000000, chid << 24);
+	nvkm_mask(device, NV10_PGRAPH_FFINTFC_ST2, 0x30000000, 0x00000000);
 	return 0;
 }
 
 static int
 nv10_gr_unload_context(struct nv10_gr_chan *chan)
 {
-	struct nv10_gr_priv *priv = nv10_gr_priv(chan);
+	struct nv10_gr *gr = chan->gr;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(nv10_gr_ctx_regs); i++)
-		chan->nv10[i] = nv_rd32(priv, nv10_gr_ctx_regs[i]);
+		chan->nv10[i] = nvkm_rd32(device, nv10_gr_ctx_regs[i]);
 
-	if (nv_device(priv)->card_type >= NV_11 &&
-	    nv_device(priv)->chipset >= 0x17) {
+	if (device->card_type >= NV_11 && device->chipset >= 0x17) {
 		for (i = 0; i < ARRAY_SIZE(nv17_gr_ctx_regs); i++)
-			chan->nv17[i] = nv_rd32(priv, nv17_gr_ctx_regs[i]);
+			chan->nv17[i] = nvkm_rd32(device, nv17_gr_ctx_regs[i]);
 	}
 
 	nv10_gr_save_pipe(chan);
 
-	nv_wr32(priv, NV10_PGRAPH_CTX_CONTROL, 0x10000000);
-	nv_mask(priv, NV10_PGRAPH_CTX_USER, 0xff000000, 0x1f000000);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_CONTROL, 0x10000000);
+	nvkm_mask(device, NV10_PGRAPH_CTX_USER, 0xff000000, 0x1f000000);
 	return 0;
 }
 
 static void
-nv10_gr_context_switch(struct nv10_gr_priv *priv)
+nv10_gr_context_switch(struct nv10_gr *gr)
 {
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	struct nv10_gr_chan *prev = NULL;
 	struct nv10_gr_chan *next = NULL;
-	unsigned long flags;
 	int chid;
 
-	spin_lock_irqsave(&priv->lock, flags);
-	nv04_gr_idle(priv);
+	nv04_gr_idle(&gr->base);
 
 	/* If previous context is valid, we need to save it */
-	prev = nv10_gr_channel(priv);
+	prev = nv10_gr_channel(gr);
 	if (prev)
 		nv10_gr_unload_context(prev);
 
 	/* load context for next channel */
-	chid = (nv_rd32(priv, NV04_PGRAPH_TRAPPED_ADDR) >> 20) & 0x1f;
-	next = priv->chan[chid];
+	chid = (nvkm_rd32(device, NV04_PGRAPH_TRAPPED_ADDR) >> 20) & 0x1f;
+	next = gr->chan[chid];
 	if (next)
 		nv10_gr_load_context(next, chid);
+}
+
+static int
+nv10_gr_chan_fini(struct nvkm_object *object, bool suspend)
+{
+	struct nv10_gr_chan *chan = nv10_gr_chan(object);
+	struct nv10_gr *gr = chan->gr;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	unsigned long flags;
 
-	spin_unlock_irqrestore(&priv->lock, flags);
+	spin_lock_irqsave(&gr->lock, flags);
+	nvkm_mask(device, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000);
+	if (nv10_gr_channel(gr) == chan)
+		nv10_gr_unload_context(chan);
+	nvkm_mask(device, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001);
+	spin_unlock_irqrestore(&gr->lock, flags);
+	return 0;
+}
+
+static void *
+nv10_gr_chan_dtor(struct nvkm_object *object)
+{
+	struct nv10_gr_chan *chan = nv10_gr_chan(object);
+	struct nv10_gr *gr = chan->gr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gr->lock, flags);
+	gr->chan[chan->chid] = NULL;
+	spin_unlock_irqrestore(&gr->lock, flags);
+	return chan;
 }
 
+static const struct nvkm_object_func
+nv10_gr_chan = {
+	.dtor = nv10_gr_chan_dtor,
+	.fini = nv10_gr_chan_fini,
+};
+
 #define NV_WRITE_CTX(reg, val) do { \
-	int offset = nv10_gr_ctx_regs_find_offset(priv, reg); \
+	int offset = nv10_gr_ctx_regs_find_offset(gr, reg); \
 	if (offset > 0) \
 		chan->nv10[offset] = val; \
 	} while (0)
 
 #define NV17_WRITE_CTX(reg, val) do { \
-	int offset = nv17_gr_ctx_regs_find_offset(priv, reg); \
+	int offset = nv17_gr_ctx_regs_find_offset(gr, reg); \
 	if (offset > 0) \
 		chan->nv17[offset] = val; \
 	} while (0)
 
-static int
-nv10_gr_context_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-		     struct nvkm_oclass *oclass, void *data, u32 size,
-		     struct nvkm_object **pobject)
+int
+nv10_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
-	struct nvkm_fifo_chan *fifo = (void *)parent;
-	struct nv10_gr_priv *priv = (void *)engine;
+	struct nv10_gr *gr = nv10_gr(base);
 	struct nv10_gr_chan *chan;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	unsigned long flags;
-	int ret;
-
-	ret = nvkm_object_create(parent, engine, oclass, 0, &chan);
-	*pobject = nv_object(chan);
-	if (ret)
-		return ret;
-
-	spin_lock_irqsave(&priv->lock, flags);
-	if (priv->chan[fifo->chid]) {
-		*pobject = nv_object(priv->chan[fifo->chid]);
-		atomic_inc(&(*pobject)->refcount);
-		spin_unlock_irqrestore(&priv->lock, flags);
-		nvkm_object_destroy(&chan->base);
-		return 1;
-	}
+
+	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_object_ctor(&nv10_gr_chan, oclass, &chan->object);
+	chan->gr = gr;
+	chan->chid = fifoch->chid;
+	*pobject = &chan->object;
 
 	NV_WRITE_CTX(0x00400e88, 0x08000000);
 	NV_WRITE_CTX(0x00400e9c, 0x4b7fffff);
@@ -1049,12 +1021,11 @@ nv10_gr_context_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
 	NV_WRITE_CTX(0x00400e14, 0x00001000);
 	NV_WRITE_CTX(0x00400e30, 0x00080008);
 	NV_WRITE_CTX(0x00400e34, 0x00080008);
-	if (nv_device(priv)->card_type >= NV_11 &&
-	    nv_device(priv)->chipset >= 0x17) {
+	if (device->card_type >= NV_11 && device->chipset >= 0x17) {
 		/* is it really needed ??? */
 		NV17_WRITE_CTX(NV10_PGRAPH_DEBUG_4,
-					nv_rd32(priv, NV10_PGRAPH_DEBUG_4));
-		NV17_WRITE_CTX(0x004006b0, nv_rd32(priv, 0x004006b0));
+			       nvkm_rd32(device, NV10_PGRAPH_DEBUG_4));
+		NV17_WRITE_CTX(0x004006b0, nvkm_rd32(device, 0x004006b0));
 		NV17_WRITE_CTX(0x00400eac, 0x0fff0000);
 		NV17_WRITE_CTX(0x00400eb0, 0x0fff0000);
 		NV17_WRITE_CTX(0x00400ec0, 0x00000080);
@@ -1064,74 +1035,32 @@ nv10_gr_context_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
 
 	nv10_gr_create_pipe(chan);
 
-	priv->chan[fifo->chid] = chan;
-	chan->chid = fifo->chid;
-	spin_unlock_irqrestore(&priv->lock, flags);
+	spin_lock_irqsave(&gr->lock, flags);
+	gr->chan[chan->chid] = chan;
+	spin_unlock_irqrestore(&gr->lock, flags);
 	return 0;
 }
 
-static void
-nv10_gr_context_dtor(struct nvkm_object *object)
-{
-	struct nv10_gr_priv *priv = (void *)object->engine;
-	struct nv10_gr_chan *chan = (void *)object;
-	unsigned long flags;
-
-	spin_lock_irqsave(&priv->lock, flags);
-	priv->chan[chan->chid] = NULL;
-	spin_unlock_irqrestore(&priv->lock, flags);
-
-	nvkm_object_destroy(&chan->base);
-}
-
-static int
-nv10_gr_context_fini(struct nvkm_object *object, bool suspend)
-{
-	struct nv10_gr_priv *priv = (void *)object->engine;
-	struct nv10_gr_chan *chan = (void *)object;
-	unsigned long flags;
-
-	spin_lock_irqsave(&priv->lock, flags);
-	nv_mask(priv, NV04_PGRAPH_FIFO, 0x00000001, 0x00000000);
-	if (nv10_gr_channel(priv) == chan)
-		nv10_gr_unload_context(chan);
-	nv_mask(priv, NV04_PGRAPH_FIFO, 0x00000001, 0x00000001);
-	spin_unlock_irqrestore(&priv->lock, flags);
-
-	return nvkm_object_fini(&chan->base, suspend);
-}
-
-static struct nvkm_oclass
-nv10_gr_cclass = {
-	.handle = NV_ENGCTX(GR, 0x10),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv10_gr_context_ctor,
-		.dtor = nv10_gr_context_dtor,
-		.init = nvkm_object_init,
-		.fini = nv10_gr_context_fini,
-	},
-};
-
 /*******************************************************************************
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
-static void
-nv10_gr_tile_prog(struct nvkm_engine *engine, int i)
+void
+nv10_gr_tile(struct nvkm_gr *base, int i, struct nvkm_fb_tile *tile)
 {
-	struct nvkm_fb_tile *tile = &nvkm_fb(engine)->tile.region[i];
-	struct nvkm_fifo *pfifo = nvkm_fifo(engine);
-	struct nv10_gr_priv *priv = (void *)engine;
+	struct nv10_gr *gr = nv10_gr(base);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_fifo *fifo = device->fifo;
 	unsigned long flags;
 
-	pfifo->pause(pfifo, &flags);
-	nv04_gr_idle(priv);
+	nvkm_fifo_pause(fifo, &flags);
+	nv04_gr_idle(&gr->base);
 
-	nv_wr32(priv, NV10_PGRAPH_TLIMIT(i), tile->limit);
-	nv_wr32(priv, NV10_PGRAPH_TSIZE(i), tile->pitch);
-	nv_wr32(priv, NV10_PGRAPH_TILE(i), tile->addr);
+	nvkm_wr32(device, NV10_PGRAPH_TLIMIT(i), tile->limit);
+	nvkm_wr32(device, NV10_PGRAPH_TSIZE(i), tile->pitch);
+	nvkm_wr32(device, NV10_PGRAPH_TILE(i), tile->addr);
 
-	pfifo->start(pfifo, &flags);
+	nvkm_fifo_start(fifo, &flags);
 }
 
 const struct nvkm_bitfield nv10_gr_intr_name[] = {
@@ -1148,168 +1077,145 @@ const struct nvkm_bitfield nv10_gr_nstatus[] = {
 	{}
 };
 
-static void
-nv10_gr_intr(struct nvkm_subdev *subdev)
+void
+nv10_gr_intr(struct nvkm_gr *base)
 {
-	struct nv10_gr_priv *priv = (void *)subdev;
-	struct nv10_gr_chan *chan = NULL;
-	struct nvkm_namedb *namedb = NULL;
-	struct nvkm_handle *handle = NULL;
-	u32 stat = nv_rd32(priv, NV03_PGRAPH_INTR);
-	u32 nsource = nv_rd32(priv, NV03_PGRAPH_NSOURCE);
-	u32 nstatus = nv_rd32(priv, NV03_PGRAPH_NSTATUS);
-	u32 addr = nv_rd32(priv, NV04_PGRAPH_TRAPPED_ADDR);
+	struct nv10_gr *gr = nv10_gr(base);
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, NV03_PGRAPH_INTR);
+	u32 nsource = nvkm_rd32(device, NV03_PGRAPH_NSOURCE);
+	u32 nstatus = nvkm_rd32(device, NV03_PGRAPH_NSTATUS);
+	u32 addr = nvkm_rd32(device, NV04_PGRAPH_TRAPPED_ADDR);
 	u32 chid = (addr & 0x01f00000) >> 20;
 	u32 subc = (addr & 0x00070000) >> 16;
 	u32 mthd = (addr & 0x00001ffc);
-	u32 data = nv_rd32(priv, NV04_PGRAPH_TRAPPED_DATA);
-	u32 class = nv_rd32(priv, 0x400160 + subc * 4) & 0xfff;
+	u32 data = nvkm_rd32(device, NV04_PGRAPH_TRAPPED_DATA);
+	u32 class = nvkm_rd32(device, 0x400160 + subc * 4) & 0xfff;
 	u32 show = stat;
+	char msg[128], src[128], sta[128];
+	struct nv10_gr_chan *chan;
 	unsigned long flags;
 
-	spin_lock_irqsave(&priv->lock, flags);
-	chan = priv->chan[chid];
-	if (chan)
-		namedb = (void *)nv_pclass(nv_object(chan), NV_NAMEDB_CLASS);
-	spin_unlock_irqrestore(&priv->lock, flags);
+	spin_lock_irqsave(&gr->lock, flags);
+	chan = gr->chan[chid];
 
 	if (stat & NV_PGRAPH_INTR_ERROR) {
 		if (chan && (nsource & NV03_PGRAPH_NSOURCE_ILLEGAL_MTHD)) {
-			handle = nvkm_namedb_get_class(namedb, class);
-			if (handle && !nv_call(handle->object, mthd, data))
+			if (!nv10_gr_mthd(chan, class, mthd, data))
 				show &= ~NV_PGRAPH_INTR_ERROR;
 		}
 	}
 
 	if (stat & NV_PGRAPH_INTR_CONTEXT_SWITCH) {
-		nv_wr32(priv, NV03_PGRAPH_INTR, NV_PGRAPH_INTR_CONTEXT_SWITCH);
+		nvkm_wr32(device, NV03_PGRAPH_INTR, NV_PGRAPH_INTR_CONTEXT_SWITCH);
 		stat &= ~NV_PGRAPH_INTR_CONTEXT_SWITCH;
 		show &= ~NV_PGRAPH_INTR_CONTEXT_SWITCH;
-		nv10_gr_context_switch(priv);
+		nv10_gr_context_switch(gr);
 	}
 
-	nv_wr32(priv, NV03_PGRAPH_INTR, stat);
-	nv_wr32(priv, NV04_PGRAPH_FIFO, 0x00000001);
+	nvkm_wr32(device, NV03_PGRAPH_INTR, stat);
+	nvkm_wr32(device, NV04_PGRAPH_FIFO, 0x00000001);
 
 	if (show) {
-		nv_error(priv, "%s", "");
-		nvkm_bitfield_print(nv10_gr_intr_name, show);
-		pr_cont(" nsource:");
-		nvkm_bitfield_print(nv04_gr_nsource, nsource);
-		pr_cont(" nstatus:");
-		nvkm_bitfield_print(nv10_gr_nstatus, nstatus);
-		pr_cont("\n");
-		nv_error(priv,
-			 "ch %d [%s] subc %d class 0x%04x mthd 0x%04x data 0x%08x\n",
-			 chid, nvkm_client_name(chan), subc, class, mthd,
-			 data);
+		nvkm_snprintbf(msg, sizeof(msg), nv10_gr_intr_name, show);
+		nvkm_snprintbf(src, sizeof(src), nv04_gr_nsource, nsource);
+		nvkm_snprintbf(sta, sizeof(sta), nv10_gr_nstatus, nstatus);
+		nvkm_error(subdev, "intr %08x [%s] nsource %08x [%s] "
+				   "nstatus %08x [%s] ch %d [%s] subc %d "
+				   "class %04x mthd %04x data %08x\n",
+			   show, msg, nsource, src, nstatus, sta, chid,
+			   chan ? chan->object.client->name : "unknown",
+			   subc, class, mthd, data);
 	}
 
-	nvkm_namedb_put(handle);
+	spin_unlock_irqrestore(&gr->lock, flags);
 }
 
-static int
-nv10_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-	     struct nvkm_oclass *oclass, void *data, u32 size,
-	     struct nvkm_object **pobject)
+int
+nv10_gr_init(struct nvkm_gr *base)
 {
-	struct nv10_gr_priv *priv;
-	int ret;
-
-	ret = nvkm_gr_create(parent, engine, oclass, true, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	nv_subdev(priv)->unit = 0x00001000;
-	nv_subdev(priv)->intr = nv10_gr_intr;
-	nv_engine(priv)->cclass = &nv10_gr_cclass;
-
-	if (nv_device(priv)->chipset <= 0x10)
-		nv_engine(priv)->sclass = nv10_gr_sclass;
-	else
-	if (nv_device(priv)->chipset <  0x17 ||
-	    nv_device(priv)->card_type < NV_11)
-		nv_engine(priv)->sclass = nv15_gr_sclass;
-	else
-		nv_engine(priv)->sclass = nv17_gr_sclass;
-
-	nv_engine(priv)->tile_prog = nv10_gr_tile_prog;
-	spin_lock_init(&priv->lock);
-	return 0;
-}
-
-static void
-nv10_gr_dtor(struct nvkm_object *object)
-{
-	struct nv10_gr_priv *priv = (void *)object;
-	nvkm_gr_destroy(&priv->base);
-}
-
-static int
-nv10_gr_init(struct nvkm_object *object)
-{
-	struct nvkm_engine *engine = nv_engine(object);
-	struct nvkm_fb *pfb = nvkm_fb(object);
-	struct nv10_gr_priv *priv = (void *)engine;
-	int ret, i;
-
-	ret = nvkm_gr_init(&priv->base);
-	if (ret)
-		return ret;
-
-	nv_wr32(priv, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
-	nv_wr32(priv, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
-
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_0, 0xFFFFFFFF);
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_0, 0x00000000);
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_1, 0x00118700);
-	/* nv_wr32(priv, NV04_PGRAPH_DEBUG_2, 0x24E00810); */ /* 0x25f92ad9 */
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_2, 0x25f92ad9);
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_3, 0x55DE0830 | (1 << 29) | (1 << 31));
-
-	if (nv_device(priv)->card_type >= NV_11 &&
-	    nv_device(priv)->chipset >= 0x17) {
-		nv_wr32(priv, NV10_PGRAPH_DEBUG_4, 0x1f000000);
-		nv_wr32(priv, 0x400a10, 0x03ff3fb6);
-		nv_wr32(priv, 0x400838, 0x002f8684);
-		nv_wr32(priv, 0x40083c, 0x00115f3f);
-		nv_wr32(priv, 0x4006b0, 0x40000020);
+	struct nv10_gr *gr = nv10_gr(base);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
+	nvkm_wr32(device, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
+
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0xFFFFFFFF);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0x00000000);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_1, 0x00118700);
+	/* nvkm_wr32(device, NV04_PGRAPH_DEBUG_2, 0x24E00810); */ /* 0x25f92ad9 */
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_2, 0x25f92ad9);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_3, 0x55DE0830 | (1 << 29) | (1 << 31));
+
+	if (device->card_type >= NV_11 && device->chipset >= 0x17) {
+		nvkm_wr32(device, NV10_PGRAPH_DEBUG_4, 0x1f000000);
+		nvkm_wr32(device, 0x400a10, 0x03ff3fb6);
+		nvkm_wr32(device, 0x400838, 0x002f8684);
+		nvkm_wr32(device, 0x40083c, 0x00115f3f);
+		nvkm_wr32(device, 0x4006b0, 0x40000020);
 	} else {
-		nv_wr32(priv, NV10_PGRAPH_DEBUG_4, 0x00000000);
+		nvkm_wr32(device, NV10_PGRAPH_DEBUG_4, 0x00000000);
 	}
 
-	/* Turn all the tiling regions off. */
-	for (i = 0; i < pfb->tile.regions; i++)
-		engine->tile_prog(engine, i);
-
-	nv_wr32(priv, NV10_PGRAPH_CTX_SWITCH(0), 0x00000000);
-	nv_wr32(priv, NV10_PGRAPH_CTX_SWITCH(1), 0x00000000);
-	nv_wr32(priv, NV10_PGRAPH_CTX_SWITCH(2), 0x00000000);
-	nv_wr32(priv, NV10_PGRAPH_CTX_SWITCH(3), 0x00000000);
-	nv_wr32(priv, NV10_PGRAPH_CTX_SWITCH(4), 0x00000000);
-	nv_wr32(priv, NV10_PGRAPH_STATE, 0xFFFFFFFF);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_SWITCH(0), 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_SWITCH(1), 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_SWITCH(2), 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_SWITCH(3), 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_SWITCH(4), 0x00000000);
+	nvkm_wr32(device, NV10_PGRAPH_STATE, 0xFFFFFFFF);
 
-	nv_mask(priv, NV10_PGRAPH_CTX_USER, 0xff000000, 0x1f000000);
-	nv_wr32(priv, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
-	nv_wr32(priv, NV10_PGRAPH_FFINTFC_ST2, 0x08000000);
+	nvkm_mask(device, NV10_PGRAPH_CTX_USER, 0xff000000, 0x1f000000);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
+	nvkm_wr32(device, NV10_PGRAPH_FFINTFC_ST2, 0x08000000);
 	return 0;
 }
 
-static int
-nv10_gr_fini(struct nvkm_object *object, bool suspend)
+int
+nv10_gr_new_(const struct nvkm_gr_func *func, struct nvkm_device *device,
+	     int index, struct nvkm_gr **pgr)
 {
-	struct nv10_gr_priv *priv = (void *)object;
-	return nvkm_gr_fini(&priv->base, suspend);
+	struct nv10_gr *gr;
+
+	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
+		return -ENOMEM;
+	spin_lock_init(&gr->lock);
+	*pgr = &gr->base;
+
+	return nvkm_gr_ctor(func, device, index, 0x00001000, true, &gr->base);
 }
 
-struct nvkm_oclass
-nv10_gr_oclass = {
-	.handle = NV_ENGINE(GR, 0x10),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv10_gr_ctor,
-		.dtor = nv10_gr_dtor,
-		.init = nv10_gr_init,
-		.fini = nv10_gr_fini,
-	},
+static const struct nvkm_gr_func
+nv10_gr = {
+	.init = nv10_gr_init,
+	.intr = nv10_gr_intr,
+	.tile = nv10_gr_tile,
+	.chan_new = nv10_gr_chan_new,
+	.sclass = {
+		{ -1, -1, 0x0012, &nv04_gr_object }, /* beta1 */
+		{ -1, -1, 0x0019, &nv04_gr_object }, /* clip */
+		{ -1, -1, 0x0030, &nv04_gr_object }, /* null */
+		{ -1, -1, 0x0039, &nv04_gr_object }, /* m2mf */
+		{ -1, -1, 0x0043, &nv04_gr_object }, /* rop */
+		{ -1, -1, 0x0044, &nv04_gr_object }, /* pattern */
+		{ -1, -1, 0x004a, &nv04_gr_object }, /* gdi */
+		{ -1, -1, 0x0052, &nv04_gr_object }, /* swzsurf */
+		{ -1, -1, 0x005f, &nv04_gr_object }, /* blit */
+		{ -1, -1, 0x0062, &nv04_gr_object }, /* surf2d */
+		{ -1, -1, 0x0072, &nv04_gr_object }, /* beta4 */
+		{ -1, -1, 0x0089, &nv04_gr_object }, /* sifm */
+		{ -1, -1, 0x008a, &nv04_gr_object }, /* ifc */
+		{ -1, -1, 0x009f, &nv04_gr_object }, /* blit */
+		{ -1, -1, 0x0093, &nv04_gr_object }, /* surf3d */
+		{ -1, -1, 0x0094, &nv04_gr_object }, /* ttri */
+		{ -1, -1, 0x0095, &nv04_gr_object }, /* mtri */
+		{ -1, -1, 0x0056, &nv04_gr_object }, /* celcius */
+		{}
+	}
 };
+
+int
+nv10_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv10_gr_new_(&nv10_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.h
new file mode 100644
index 000000000000..d7c3d86cc99d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.h
@@ -0,0 +1,13 @@
+#ifndef __NV10_GR_H__
+#define __NV10_GR_H__
+#include "priv.h"
+
+int nv10_gr_new_(const struct nvkm_gr_func *, struct nvkm_device *, int index,
+		 struct nvkm_gr **);
+int nv10_gr_init(struct nvkm_gr *);
+void nv10_gr_intr(struct nvkm_gr *);
+void nv10_gr_tile(struct nvkm_gr *, int, struct nvkm_fb_tile *);
+
+int nv10_gr_chan_new(struct nvkm_gr *, struct nvkm_fifo_chan *,
+		     const struct nvkm_oclass *, struct nvkm_object **);
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv15.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv15.c
new file mode 100644
index 000000000000..3e2c6856b4c4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv15.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2007 Matthieu CASTET <castet.matthieu@free.fr>
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragr) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "nv10.h"
+
+static const struct nvkm_gr_func
+nv15_gr = {
+	.init = nv10_gr_init,
+	.intr = nv10_gr_intr,
+	.tile = nv10_gr_tile,
+	.chan_new = nv10_gr_chan_new,
+	.sclass = {
+		{ -1, -1, 0x0012, &nv04_gr_object }, /* beta1 */
+		{ -1, -1, 0x0019, &nv04_gr_object }, /* clip */
+		{ -1, -1, 0x0030, &nv04_gr_object }, /* null */
+		{ -1, -1, 0x0039, &nv04_gr_object }, /* m2mf */
+		{ -1, -1, 0x0043, &nv04_gr_object }, /* rop */
+		{ -1, -1, 0x0044, &nv04_gr_object }, /* pattern */
+		{ -1, -1, 0x004a, &nv04_gr_object }, /* gdi */
+		{ -1, -1, 0x0052, &nv04_gr_object }, /* swzsurf */
+		{ -1, -1, 0x005f, &nv04_gr_object }, /* blit */
+		{ -1, -1, 0x0062, &nv04_gr_object }, /* surf2d */
+		{ -1, -1, 0x0072, &nv04_gr_object }, /* beta4 */
+		{ -1, -1, 0x0089, &nv04_gr_object }, /* sifm */
+		{ -1, -1, 0x008a, &nv04_gr_object }, /* ifc */
+		{ -1, -1, 0x009f, &nv04_gr_object }, /* blit */
+		{ -1, -1, 0x0093, &nv04_gr_object }, /* surf3d */
+		{ -1, -1, 0x0094, &nv04_gr_object }, /* ttri */
+		{ -1, -1, 0x0095, &nv04_gr_object }, /* mtri */
+		{ -1, -1, 0x0096, &nv04_gr_object }, /* celcius */
+		{}
+	}
+};
+
+int
+nv15_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv10_gr_new_(&nv15_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv17.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv17.c
new file mode 100644
index 000000000000..12437d085a73
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv17.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2007 Matthieu CASTET <castet.matthieu@free.fr>
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragr) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "nv10.h"
+
+static const struct nvkm_gr_func
+nv17_gr = {
+	.init = nv10_gr_init,
+	.intr = nv10_gr_intr,
+	.tile = nv10_gr_tile,
+	.chan_new = nv10_gr_chan_new,
+	.sclass = {
+		{ -1, -1, 0x0012, &nv04_gr_object }, /* beta1 */
+		{ -1, -1, 0x0019, &nv04_gr_object }, /* clip */
+		{ -1, -1, 0x0030, &nv04_gr_object }, /* null */
+		{ -1, -1, 0x0039, &nv04_gr_object }, /* m2mf */
+		{ -1, -1, 0x0043, &nv04_gr_object }, /* rop */
+		{ -1, -1, 0x0044, &nv04_gr_object }, /* pattern */
+		{ -1, -1, 0x004a, &nv04_gr_object }, /* gdi */
+		{ -1, -1, 0x0052, &nv04_gr_object }, /* swzsurf */
+		{ -1, -1, 0x005f, &nv04_gr_object }, /* blit */
+		{ -1, -1, 0x0062, &nv04_gr_object }, /* surf2d */
+		{ -1, -1, 0x0072, &nv04_gr_object }, /* beta4 */
+		{ -1, -1, 0x0089, &nv04_gr_object }, /* sifm */
+		{ -1, -1, 0x008a, &nv04_gr_object }, /* ifc */
+		{ -1, -1, 0x009f, &nv04_gr_object }, /* blit */
+		{ -1, -1, 0x0093, &nv04_gr_object }, /* surf3d */
+		{ -1, -1, 0x0094, &nv04_gr_object }, /* ttri */
+		{ -1, -1, 0x0095, &nv04_gr_object }, /* mtri */
+		{ -1, -1, 0x0099, &nv04_gr_object },
+		{}
+	}
+};
+
+int
+nv17_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv10_gr_new_(&nv17_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c
index 1713ffb669e8..5caef65d3c6e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c
@@ -2,375 +2,374 @@
 #include "regs.h"
 
 #include <core/client.h>
-#include <core/device.h>
-#include <core/handle.h>
+#include <core/gpuobj.h>
 #include <engine/fifo.h>
+#include <engine/fifo/chan.h>
 #include <subdev/fb.h>
 #include <subdev/timer.h>
 
 /*******************************************************************************
- * Graphics object classes
+ * PGRAPH context
  ******************************************************************************/
 
-static struct nvkm_oclass
-nv20_gr_sclass[] = {
-	{ 0x0012, &nv04_gr_ofuncs, NULL }, /* beta1 */
-	{ 0x0019, &nv04_gr_ofuncs, NULL }, /* clip */
-	{ 0x0030, &nv04_gr_ofuncs, NULL }, /* null */
-	{ 0x0039, &nv04_gr_ofuncs, NULL }, /* m2mf */
-	{ 0x0043, &nv04_gr_ofuncs, NULL }, /* rop */
-	{ 0x0044, &nv04_gr_ofuncs, NULL }, /* patt */
-	{ 0x004a, &nv04_gr_ofuncs, NULL }, /* gdi */
-	{ 0x0062, &nv04_gr_ofuncs, NULL }, /* surf2d */
-	{ 0x0072, &nv04_gr_ofuncs, NULL }, /* beta4 */
-	{ 0x0089, &nv04_gr_ofuncs, NULL }, /* sifm */
-	{ 0x008a, &nv04_gr_ofuncs, NULL }, /* ifc */
-	{ 0x0096, &nv04_gr_ofuncs, NULL }, /* celcius */
-	{ 0x0097, &nv04_gr_ofuncs, NULL }, /* kelvin */
-	{ 0x009e, &nv04_gr_ofuncs, NULL }, /* swzsurf */
-	{ 0x009f, &nv04_gr_ofuncs, NULL }, /* imageblit */
-	{},
-};
+int
+nv20_gr_chan_init(struct nvkm_object *object)
+{
+	struct nv20_gr_chan *chan = nv20_gr_chan(object);
+	struct nv20_gr *gr = chan->gr;
+	u32 inst = nvkm_memory_addr(chan->inst);
 
-/*******************************************************************************
- * PGRAPH context
- ******************************************************************************/
+	nvkm_kmap(gr->ctxtab);
+	nvkm_wo32(gr->ctxtab, chan->chid * 4, inst >> 4);
+	nvkm_done(gr->ctxtab);
+	return 0;
+}
+
+int
+nv20_gr_chan_fini(struct nvkm_object *object, bool suspend)
+{
+	struct nv20_gr_chan *chan = nv20_gr_chan(object);
+	struct nv20_gr *gr = chan->gr;
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 inst = nvkm_memory_addr(chan->inst);
+	int chid = -1;
+
+	nvkm_mask(device, 0x400720, 0x00000001, 0x00000000);
+	if (nvkm_rd32(device, 0x400144) & 0x00010000)
+		chid = (nvkm_rd32(device, 0x400148) & 0x1f000000) >> 24;
+	if (chan->chid == chid) {
+		nvkm_wr32(device, 0x400784, inst >> 4);
+		nvkm_wr32(device, 0x400788, 0x00000002);
+		nvkm_msec(device, 2000,
+			if (!nvkm_rd32(device, 0x400700))
+				break;
+		);
+		nvkm_wr32(device, 0x400144, 0x10000000);
+		nvkm_mask(device, 0x400148, 0xff000000, 0x1f000000);
+	}
+	nvkm_mask(device, 0x400720, 0x00000001, 0x00000001);
+
+	nvkm_kmap(gr->ctxtab);
+	nvkm_wo32(gr->ctxtab, chan->chid * 4, 0x00000000);
+	nvkm_done(gr->ctxtab);
+	return 0;
+}
+
+void *
+nv20_gr_chan_dtor(struct nvkm_object *object)
+{
+	struct nv20_gr_chan *chan = nv20_gr_chan(object);
+	nvkm_memory_del(&chan->inst);
+	return chan;
+}
+
+static const struct nvkm_object_func
+nv20_gr_chan = {
+	.dtor = nv20_gr_chan_dtor,
+	.init = nv20_gr_chan_init,
+	.fini = nv20_gr_chan_fini,
+};
 
 static int
-nv20_gr_context_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-		     struct nvkm_oclass *oclass, void *data, u32 size,
-		     struct nvkm_object **pobject)
+nv20_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
+	struct nv20_gr *gr = nv20_gr(base);
 	struct nv20_gr_chan *chan;
 	int ret, i;
 
-	ret = nvkm_gr_context_create(parent, engine, oclass, NULL, 0x37f0,
-				     16, NVOBJ_FLAG_ZERO_ALLOC, &chan);
-	*pobject = nv_object(chan);
+	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_object_ctor(&nv20_gr_chan, oclass, &chan->object);
+	chan->gr = gr;
+	chan->chid = fifoch->chid;
+	*pobject = &chan->object;
+
+	ret = nvkm_memory_new(gr->base.engine.subdev.device,
+			      NVKM_MEM_TARGET_INST, 0x37f0, 16, true,
+			      &chan->inst);
 	if (ret)
 		return ret;
 
-	chan->chid = nvkm_fifo_chan(parent)->chid;
-
-	nv_wo32(chan, 0x0000, 0x00000001 | (chan->chid << 24));
-	nv_wo32(chan, 0x033c, 0xffff0000);
-	nv_wo32(chan, 0x03a0, 0x0fff0000);
-	nv_wo32(chan, 0x03a4, 0x0fff0000);
-	nv_wo32(chan, 0x047c, 0x00000101);
-	nv_wo32(chan, 0x0490, 0x00000111);
-	nv_wo32(chan, 0x04a8, 0x44400000);
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x0000, 0x00000001 | (chan->chid << 24));
+	nvkm_wo32(chan->inst, 0x033c, 0xffff0000);
+	nvkm_wo32(chan->inst, 0x03a0, 0x0fff0000);
+	nvkm_wo32(chan->inst, 0x03a4, 0x0fff0000);
+	nvkm_wo32(chan->inst, 0x047c, 0x00000101);
+	nvkm_wo32(chan->inst, 0x0490, 0x00000111);
+	nvkm_wo32(chan->inst, 0x04a8, 0x44400000);
 	for (i = 0x04d4; i <= 0x04e0; i += 4)
-		nv_wo32(chan, i, 0x00030303);
+		nvkm_wo32(chan->inst, i, 0x00030303);
 	for (i = 0x04f4; i <= 0x0500; i += 4)
-		nv_wo32(chan, i, 0x00080000);
+		nvkm_wo32(chan->inst, i, 0x00080000);
 	for (i = 0x050c; i <= 0x0518; i += 4)
-		nv_wo32(chan, i, 0x01012000);
+		nvkm_wo32(chan->inst, i, 0x01012000);
 	for (i = 0x051c; i <= 0x0528; i += 4)
-		nv_wo32(chan, i, 0x000105b8);
+		nvkm_wo32(chan->inst, i, 0x000105b8);
 	for (i = 0x052c; i <= 0x0538; i += 4)
-		nv_wo32(chan, i, 0x00080008);
+		nvkm_wo32(chan->inst, i, 0x00080008);
 	for (i = 0x055c; i <= 0x0598; i += 4)
-		nv_wo32(chan, i, 0x07ff0000);
-	nv_wo32(chan, 0x05a4, 0x4b7fffff);
-	nv_wo32(chan, 0x05fc, 0x00000001);
-	nv_wo32(chan, 0x0604, 0x00004000);
-	nv_wo32(chan, 0x0610, 0x00000001);
-	nv_wo32(chan, 0x0618, 0x00040000);
-	nv_wo32(chan, 0x061c, 0x00010000);
+		nvkm_wo32(chan->inst, i, 0x07ff0000);
+	nvkm_wo32(chan->inst, 0x05a4, 0x4b7fffff);
+	nvkm_wo32(chan->inst, 0x05fc, 0x00000001);
+	nvkm_wo32(chan->inst, 0x0604, 0x00004000);
+	nvkm_wo32(chan->inst, 0x0610, 0x00000001);
+	nvkm_wo32(chan->inst, 0x0618, 0x00040000);
+	nvkm_wo32(chan->inst, 0x061c, 0x00010000);
 	for (i = 0x1c1c; i <= 0x248c; i += 16) {
-		nv_wo32(chan, (i + 0), 0x10700ff9);
-		nv_wo32(chan, (i + 4), 0x0436086c);
-		nv_wo32(chan, (i + 8), 0x000c001b);
+		nvkm_wo32(chan->inst, (i + 0), 0x10700ff9);
+		nvkm_wo32(chan->inst, (i + 4), 0x0436086c);
+		nvkm_wo32(chan->inst, (i + 8), 0x000c001b);
 	}
-	nv_wo32(chan, 0x281c, 0x3f800000);
-	nv_wo32(chan, 0x2830, 0x3f800000);
-	nv_wo32(chan, 0x285c, 0x40000000);
-	nv_wo32(chan, 0x2860, 0x3f800000);
-	nv_wo32(chan, 0x2864, 0x3f000000);
-	nv_wo32(chan, 0x286c, 0x40000000);
-	nv_wo32(chan, 0x2870, 0x3f800000);
-	nv_wo32(chan, 0x2878, 0xbf800000);
-	nv_wo32(chan, 0x2880, 0xbf800000);
-	nv_wo32(chan, 0x34a4, 0x000fe000);
-	nv_wo32(chan, 0x3530, 0x000003f8);
-	nv_wo32(chan, 0x3540, 0x002fe000);
+	nvkm_wo32(chan->inst, 0x281c, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x2830, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x285c, 0x40000000);
+	nvkm_wo32(chan->inst, 0x2860, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x2864, 0x3f000000);
+	nvkm_wo32(chan->inst, 0x286c, 0x40000000);
+	nvkm_wo32(chan->inst, 0x2870, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x2878, 0xbf800000);
+	nvkm_wo32(chan->inst, 0x2880, 0xbf800000);
+	nvkm_wo32(chan->inst, 0x34a4, 0x000fe000);
+	nvkm_wo32(chan->inst, 0x3530, 0x000003f8);
+	nvkm_wo32(chan->inst, 0x3540, 0x002fe000);
 	for (i = 0x355c; i <= 0x3578; i += 4)
-		nv_wo32(chan, i, 0x001c527c);
-	return 0;
-}
-
-int
-nv20_gr_context_init(struct nvkm_object *object)
-{
-	struct nv20_gr_priv *priv = (void *)object->engine;
-	struct nv20_gr_chan *chan = (void *)object;
-	int ret;
-
-	ret = nvkm_gr_context_init(&chan->base);
-	if (ret)
-		return ret;
-
-	nv_wo32(priv->ctxtab, chan->chid * 4, nv_gpuobj(chan)->addr >> 4);
+		nvkm_wo32(chan->inst, i, 0x001c527c);
+	nvkm_done(chan->inst);
 	return 0;
 }
 
-int
-nv20_gr_context_fini(struct nvkm_object *object, bool suspend)
-{
-	struct nv20_gr_priv *priv = (void *)object->engine;
-	struct nv20_gr_chan *chan = (void *)object;
-	int chid = -1;
-
-	nv_mask(priv, 0x400720, 0x00000001, 0x00000000);
-	if (nv_rd32(priv, 0x400144) & 0x00010000)
-		chid = (nv_rd32(priv, 0x400148) & 0x1f000000) >> 24;
-	if (chan->chid == chid) {
-		nv_wr32(priv, 0x400784, nv_gpuobj(chan)->addr >> 4);
-		nv_wr32(priv, 0x400788, 0x00000002);
-		nv_wait(priv, 0x400700, 0xffffffff, 0x00000000);
-		nv_wr32(priv, 0x400144, 0x10000000);
-		nv_mask(priv, 0x400148, 0xff000000, 0x1f000000);
-	}
-	nv_mask(priv, 0x400720, 0x00000001, 0x00000001);
-
-	nv_wo32(priv->ctxtab, chan->chid * 4, 0x00000000);
-	return nvkm_gr_context_fini(&chan->base, suspend);
-}
-
-static struct nvkm_oclass
-nv20_gr_cclass = {
-	.handle = NV_ENGCTX(GR, 0x20),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv20_gr_context_ctor,
-		.dtor = _nvkm_gr_context_dtor,
-		.init = nv20_gr_context_init,
-		.fini = nv20_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
-};
-
 /*******************************************************************************
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
 void
-nv20_gr_tile_prog(struct nvkm_engine *engine, int i)
+nv20_gr_tile(struct nvkm_gr *base, int i, struct nvkm_fb_tile *tile)
 {
-	struct nvkm_fb_tile *tile = &nvkm_fb(engine)->tile.region[i];
-	struct nvkm_fifo *pfifo = nvkm_fifo(engine);
-	struct nv20_gr_priv *priv = (void *)engine;
+	struct nv20_gr *gr = nv20_gr(base);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_fifo *fifo = device->fifo;
 	unsigned long flags;
 
-	pfifo->pause(pfifo, &flags);
-	nv04_gr_idle(priv);
+	nvkm_fifo_pause(fifo, &flags);
+	nv04_gr_idle(&gr->base);
 
-	nv_wr32(priv, NV20_PGRAPH_TLIMIT(i), tile->limit);
-	nv_wr32(priv, NV20_PGRAPH_TSIZE(i), tile->pitch);
-	nv_wr32(priv, NV20_PGRAPH_TILE(i), tile->addr);
+	nvkm_wr32(device, NV20_PGRAPH_TLIMIT(i), tile->limit);
+	nvkm_wr32(device, NV20_PGRAPH_TSIZE(i), tile->pitch);
+	nvkm_wr32(device, NV20_PGRAPH_TILE(i), tile->addr);
 
-	nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + 4 * i);
-	nv_wr32(priv, NV10_PGRAPH_RDI_DATA, tile->limit);
-	nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + 4 * i);
-	nv_wr32(priv, NV10_PGRAPH_RDI_DATA, tile->pitch);
-	nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i);
-	nv_wr32(priv, NV10_PGRAPH_RDI_DATA, tile->addr);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + 4 * i);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_DATA, tile->limit);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + 4 * i);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_DATA, tile->pitch);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_DATA, tile->addr);
 
-	if (nv_device(engine)->chipset != 0x34) {
-		nv_wr32(priv, NV20_PGRAPH_ZCOMP(i), tile->zcomp);
-		nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x00ea0090 + 4 * i);
-		nv_wr32(priv, NV10_PGRAPH_RDI_DATA, tile->zcomp);
+	if (device->chipset != 0x34) {
+		nvkm_wr32(device, NV20_PGRAPH_ZCOMP(i), tile->zcomp);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00ea0090 + 4 * i);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_DATA, tile->zcomp);
 	}
 
-	pfifo->start(pfifo, &flags);
+	nvkm_fifo_start(fifo, &flags);
 }
 
 void
-nv20_gr_intr(struct nvkm_subdev *subdev)
+nv20_gr_intr(struct nvkm_gr *base)
 {
-	struct nvkm_engine *engine = nv_engine(subdev);
-	struct nvkm_object *engctx;
-	struct nvkm_handle *handle;
-	struct nv20_gr_priv *priv = (void *)subdev;
-	u32 stat = nv_rd32(priv, NV03_PGRAPH_INTR);
-	u32 nsource = nv_rd32(priv, NV03_PGRAPH_NSOURCE);
-	u32 nstatus = nv_rd32(priv, NV03_PGRAPH_NSTATUS);
-	u32 addr = nv_rd32(priv, NV04_PGRAPH_TRAPPED_ADDR);
+	struct nv20_gr *gr = nv20_gr(base);
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_fifo_chan *chan;
+	u32 stat = nvkm_rd32(device, NV03_PGRAPH_INTR);
+	u32 nsource = nvkm_rd32(device, NV03_PGRAPH_NSOURCE);
+	u32 nstatus = nvkm_rd32(device, NV03_PGRAPH_NSTATUS);
+	u32 addr = nvkm_rd32(device, NV04_PGRAPH_TRAPPED_ADDR);
 	u32 chid = (addr & 0x01f00000) >> 20;
 	u32 subc = (addr & 0x00070000) >> 16;
 	u32 mthd = (addr & 0x00001ffc);
-	u32 data = nv_rd32(priv, NV04_PGRAPH_TRAPPED_DATA);
-	u32 class = nv_rd32(priv, 0x400160 + subc * 4) & 0xfff;
+	u32 data = nvkm_rd32(device, NV04_PGRAPH_TRAPPED_DATA);
+	u32 class = nvkm_rd32(device, 0x400160 + subc * 4) & 0xfff;
 	u32 show = stat;
+	char msg[128], src[128], sta[128];
+	unsigned long flags;
 
-	engctx = nvkm_engctx_get(engine, chid);
-	if (stat & NV_PGRAPH_INTR_ERROR) {
-		if (nsource & NV03_PGRAPH_NSOURCE_ILLEGAL_MTHD) {
-			handle = nvkm_handle_get_class(engctx, class);
-			if (handle && !nv_call(handle->object, mthd, data))
-				show &= ~NV_PGRAPH_INTR_ERROR;
-			nvkm_handle_put(handle);
-		}
-	}
+	chan = nvkm_fifo_chan_chid(device->fifo, chid, &flags);
 
-	nv_wr32(priv, NV03_PGRAPH_INTR, stat);
-	nv_wr32(priv, NV04_PGRAPH_FIFO, 0x00000001);
+	nvkm_wr32(device, NV03_PGRAPH_INTR, stat);
+	nvkm_wr32(device, NV04_PGRAPH_FIFO, 0x00000001);
 
 	if (show) {
-		nv_error(priv, "%s", "");
-		nvkm_bitfield_print(nv10_gr_intr_name, show);
-		pr_cont(" nsource:");
-		nvkm_bitfield_print(nv04_gr_nsource, nsource);
-		pr_cont(" nstatus:");
-		nvkm_bitfield_print(nv10_gr_nstatus, nstatus);
-		pr_cont("\n");
-		nv_error(priv,
-			 "ch %d [%s] subc %d class 0x%04x mthd 0x%04x data 0x%08x\n",
-			 chid, nvkm_client_name(engctx), subc, class, mthd,
-			 data);
+		nvkm_snprintbf(msg, sizeof(msg), nv10_gr_intr_name, show);
+		nvkm_snprintbf(src, sizeof(src), nv04_gr_nsource, nsource);
+		nvkm_snprintbf(sta, sizeof(sta), nv10_gr_nstatus, nstatus);
+		nvkm_error(subdev, "intr %08x [%s] nsource %08x [%s] "
+				   "nstatus %08x [%s] ch %d [%s] subc %d "
+				   "class %04x mthd %04x data %08x\n",
+			   show, msg, nsource, src, nstatus, sta, chid,
+			   chan ? chan->object.client->name : "unknown",
+			   subc, class, mthd, data);
 	}
 
-	nvkm_engctx_put(engctx);
-}
-
-static int
-nv20_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-	     struct nvkm_oclass *oclass, void *data, u32 size,
-	     struct nvkm_object **pobject)
-{
-	struct nv20_gr_priv *priv;
-	int ret;
-
-	ret = nvkm_gr_create(parent, engine, oclass, true, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(nv_object(priv), NULL, 32 * 4, 16,
-			      NVOBJ_FLAG_ZERO_ALLOC, &priv->ctxtab);
-	if (ret)
-		return ret;
-
-	nv_subdev(priv)->unit = 0x00001000;
-	nv_subdev(priv)->intr = nv20_gr_intr;
-	nv_engine(priv)->cclass = &nv20_gr_cclass;
-	nv_engine(priv)->sclass = nv20_gr_sclass;
-	nv_engine(priv)->tile_prog = nv20_gr_tile_prog;
-	return 0;
+	nvkm_fifo_chan_put(device->fifo, flags, &chan);
 }
 
-void
-nv20_gr_dtor(struct nvkm_object *object)
+int
+nv20_gr_oneinit(struct nvkm_gr *base)
 {
-	struct nv20_gr_priv *priv = (void *)object;
-	nvkm_gpuobj_ref(NULL, &priv->ctxtab);
-	nvkm_gr_destroy(&priv->base);
+	struct nv20_gr *gr = nv20_gr(base);
+	return nvkm_memory_new(gr->base.engine.subdev.device,
+			       NVKM_MEM_TARGET_INST, 32 * 4, 16,
+			       true, &gr->ctxtab);
 }
 
 int
-nv20_gr_init(struct nvkm_object *object)
+nv20_gr_init(struct nvkm_gr *base)
 {
-	struct nvkm_engine *engine = nv_engine(object);
-	struct nv20_gr_priv *priv = (void *)engine;
-	struct nvkm_fb *pfb = nvkm_fb(object);
+	struct nv20_gr *gr = nv20_gr(base);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 tmp, vramsz;
-	int ret, i;
-
-	ret = nvkm_gr_init(&priv->base);
-	if (ret)
-		return ret;
+	int i;
 
-	nv_wr32(priv, NV20_PGRAPH_CHANNEL_CTX_TABLE, priv->ctxtab->addr >> 4);
+	nvkm_wr32(device, NV20_PGRAPH_CHANNEL_CTX_TABLE,
+			  nvkm_memory_addr(gr->ctxtab) >> 4);
 
-	if (nv_device(priv)->chipset == 0x20) {
-		nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x003d0000);
+	if (device->chipset == 0x20) {
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x003d0000);
 		for (i = 0; i < 15; i++)
-			nv_wr32(priv, NV10_PGRAPH_RDI_DATA, 0x00000000);
-		nv_wait(priv, 0x400700, 0xffffffff, 0x00000000);
+			nvkm_wr32(device, NV10_PGRAPH_RDI_DATA, 0x00000000);
+		nvkm_msec(device, 2000,
+			if (!nvkm_rd32(device, 0x400700))
+				break;
+		);
 	} else {
-		nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x02c80000);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x02c80000);
 		for (i = 0; i < 32; i++)
-			nv_wr32(priv, NV10_PGRAPH_RDI_DATA, 0x00000000);
-		nv_wait(priv, 0x400700, 0xffffffff, 0x00000000);
+			nvkm_wr32(device, NV10_PGRAPH_RDI_DATA, 0x00000000);
+		nvkm_msec(device, 2000,
+			if (!nvkm_rd32(device, 0x400700))
+				break;
+		);
 	}
 
-	nv_wr32(priv, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
-	nv_wr32(priv, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
+	nvkm_wr32(device, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
+	nvkm_wr32(device, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
 
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_0, 0xFFFFFFFF);
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_0, 0x00000000);
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_1, 0x00118700);
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_3, 0xF3CE0475); /* 0x4 = auto ctx switch */
-	nv_wr32(priv, NV10_PGRAPH_DEBUG_4, 0x00000000);
-	nv_wr32(priv, 0x40009C           , 0x00000040);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0xFFFFFFFF);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0x00000000);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_1, 0x00118700);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_3, 0xF3CE0475); /* 0x4 = auto ctx switch */
+	nvkm_wr32(device, NV10_PGRAPH_DEBUG_4, 0x00000000);
+	nvkm_wr32(device, 0x40009C           , 0x00000040);
 
-	if (nv_device(priv)->chipset >= 0x25) {
-		nv_wr32(priv, 0x400890, 0x00a8cfff);
-		nv_wr32(priv, 0x400610, 0x304B1FB6);
-		nv_wr32(priv, 0x400B80, 0x1cbd3883);
-		nv_wr32(priv, 0x400B84, 0x44000000);
-		nv_wr32(priv, 0x400098, 0x40000080);
-		nv_wr32(priv, 0x400B88, 0x000000ff);
+	if (device->chipset >= 0x25) {
+		nvkm_wr32(device, 0x400890, 0x00a8cfff);
+		nvkm_wr32(device, 0x400610, 0x304B1FB6);
+		nvkm_wr32(device, 0x400B80, 0x1cbd3883);
+		nvkm_wr32(device, 0x400B84, 0x44000000);
+		nvkm_wr32(device, 0x400098, 0x40000080);
+		nvkm_wr32(device, 0x400B88, 0x000000ff);
 
 	} else {
-		nv_wr32(priv, 0x400880, 0x0008c7df);
-		nv_wr32(priv, 0x400094, 0x00000005);
-		nv_wr32(priv, 0x400B80, 0x45eae20e);
-		nv_wr32(priv, 0x400B84, 0x24000000);
-		nv_wr32(priv, 0x400098, 0x00000040);
-		nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x00E00038);
-		nv_wr32(priv, NV10_PGRAPH_RDI_DATA , 0x00000030);
-		nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x00E10038);
-		nv_wr32(priv, NV10_PGRAPH_RDI_DATA , 0x00000030);
+		nvkm_wr32(device, 0x400880, 0x0008c7df);
+		nvkm_wr32(device, 0x400094, 0x00000005);
+		nvkm_wr32(device, 0x400B80, 0x45eae20e);
+		nvkm_wr32(device, 0x400B84, 0x24000000);
+		nvkm_wr32(device, 0x400098, 0x00000040);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00E00038);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_DATA , 0x00000030);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00E10038);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_DATA , 0x00000030);
 	}
 
-	/* Turn all the tiling regions off. */
-	for (i = 0; i < pfb->tile.regions; i++)
-		engine->tile_prog(engine, i);
+	nvkm_wr32(device, 0x4009a0, nvkm_rd32(device, 0x100324));
+	nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA000C);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_DATA, nvkm_rd32(device, 0x100324));
 
-	nv_wr32(priv, 0x4009a0, nv_rd32(priv, 0x100324));
-	nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x00EA000C);
-	nv_wr32(priv, NV10_PGRAPH_RDI_DATA, nv_rd32(priv, 0x100324));
+	nvkm_wr32(device, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
+	nvkm_wr32(device, NV10_PGRAPH_STATE      , 0xFFFFFFFF);
 
-	nv_wr32(priv, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
-	nv_wr32(priv, NV10_PGRAPH_STATE      , 0xFFFFFFFF);
-
-	tmp = nv_rd32(priv, NV10_PGRAPH_SURFACE) & 0x0007ff00;
-	nv_wr32(priv, NV10_PGRAPH_SURFACE, tmp);
-	tmp = nv_rd32(priv, NV10_PGRAPH_SURFACE) | 0x00020100;
-	nv_wr32(priv, NV10_PGRAPH_SURFACE, tmp);
+	tmp = nvkm_rd32(device, NV10_PGRAPH_SURFACE) & 0x0007ff00;
+	nvkm_wr32(device, NV10_PGRAPH_SURFACE, tmp);
+	tmp = nvkm_rd32(device, NV10_PGRAPH_SURFACE) | 0x00020100;
+	nvkm_wr32(device, NV10_PGRAPH_SURFACE, tmp);
 
 	/* begin RAM config */
-	vramsz = nv_device_resource_len(nv_device(priv), 0) - 1;
-	nv_wr32(priv, 0x4009A4, nv_rd32(priv, 0x100200));
-	nv_wr32(priv, 0x4009A8, nv_rd32(priv, 0x100204));
-	nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x00EA0000);
-	nv_wr32(priv, NV10_PGRAPH_RDI_DATA , nv_rd32(priv, 0x100200));
-	nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x00EA0004);
-	nv_wr32(priv, NV10_PGRAPH_RDI_DATA , nv_rd32(priv, 0x100204));
-	nv_wr32(priv, 0x400820, 0);
-	nv_wr32(priv, 0x400824, 0);
-	nv_wr32(priv, 0x400864, vramsz - 1);
-	nv_wr32(priv, 0x400868, vramsz - 1);
+	vramsz = device->func->resource_size(device, 1) - 1;
+	nvkm_wr32(device, 0x4009A4, nvkm_rd32(device, 0x100200));
+	nvkm_wr32(device, 0x4009A8, nvkm_rd32(device, 0x100204));
+	nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA0000);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_DATA , nvkm_rd32(device, 0x100200));
+	nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA0004);
+	nvkm_wr32(device, NV10_PGRAPH_RDI_DATA , nvkm_rd32(device, 0x100204));
+	nvkm_wr32(device, 0x400820, 0);
+	nvkm_wr32(device, 0x400824, 0);
+	nvkm_wr32(device, 0x400864, vramsz - 1);
+	nvkm_wr32(device, 0x400868, vramsz - 1);
 
 	/* interesting.. the below overwrites some of the tile setup above.. */
-	nv_wr32(priv, 0x400B20, 0x00000000);
-	nv_wr32(priv, 0x400B04, 0xFFFFFFFF);
+	nvkm_wr32(device, 0x400B20, 0x00000000);
+	nvkm_wr32(device, 0x400B04, 0xFFFFFFFF);
 
-	nv_wr32(priv, NV03_PGRAPH_ABS_UCLIP_XMIN, 0);
-	nv_wr32(priv, NV03_PGRAPH_ABS_UCLIP_YMIN, 0);
-	nv_wr32(priv, NV03_PGRAPH_ABS_UCLIP_XMAX, 0x7fff);
-	nv_wr32(priv, NV03_PGRAPH_ABS_UCLIP_YMAX, 0x7fff);
+	nvkm_wr32(device, NV03_PGRAPH_ABS_UCLIP_XMIN, 0);
+	nvkm_wr32(device, NV03_PGRAPH_ABS_UCLIP_YMIN, 0);
+	nvkm_wr32(device, NV03_PGRAPH_ABS_UCLIP_XMAX, 0x7fff);
+	nvkm_wr32(device, NV03_PGRAPH_ABS_UCLIP_YMAX, 0x7fff);
 	return 0;
 }
 
-struct nvkm_oclass
-nv20_gr_oclass = {
-	.handle = NV_ENGINE(GR, 0x20),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv20_gr_ctor,
-		.dtor = nv20_gr_dtor,
-		.init = nv20_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
+void *
+nv20_gr_dtor(struct nvkm_gr *base)
+{
+	struct nv20_gr *gr = nv20_gr(base);
+	nvkm_memory_del(&gr->ctxtab);
+	return gr;
+}
+
+int
+nv20_gr_new_(const struct nvkm_gr_func *func, struct nvkm_device *device,
+	     int index, struct nvkm_gr **pgr)
+{
+	struct nv20_gr *gr;
+
+	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
+		return -ENOMEM;
+	*pgr = &gr->base;
+
+	return nvkm_gr_ctor(func, device, index, 0x00001000, true, &gr->base);
+}
+
+static const struct nvkm_gr_func
+nv20_gr = {
+	.dtor = nv20_gr_dtor,
+	.oneinit = nv20_gr_oneinit,
+	.init = nv20_gr_init,
+	.intr = nv20_gr_intr,
+	.tile = nv20_gr_tile,
+	.chan_new = nv20_gr_chan_new,
+	.sclass = {
+		{ -1, -1, 0x0012, &nv04_gr_object }, /* beta1 */
+		{ -1, -1, 0x0019, &nv04_gr_object }, /* clip */
+		{ -1, -1, 0x0030, &nv04_gr_object }, /* null */
+		{ -1, -1, 0x0039, &nv04_gr_object }, /* m2mf */
+		{ -1, -1, 0x0043, &nv04_gr_object }, /* rop */
+		{ -1, -1, 0x0044, &nv04_gr_object }, /* patt */
+		{ -1, -1, 0x004a, &nv04_gr_object }, /* gdi */
+		{ -1, -1, 0x0062, &nv04_gr_object }, /* surf2d */
+		{ -1, -1, 0x0072, &nv04_gr_object }, /* beta4 */
+		{ -1, -1, 0x0089, &nv04_gr_object }, /* sifm */
+		{ -1, -1, 0x008a, &nv04_gr_object }, /* ifc */
+		{ -1, -1, 0x0096, &nv04_gr_object }, /* celcius */
+		{ -1, -1, 0x0097, &nv04_gr_object }, /* kelvin */
+		{ -1, -1, 0x009e, &nv04_gr_object }, /* swzsurf */
+		{ -1, -1, 0x009f, &nv04_gr_object }, /* imageblit */
+		{}
+	}
 };
+
+int
+nv20_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv20_gr_new_(&nv20_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.h
index ac4dc048fed1..cdf4501e3798 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.h
@@ -1,26 +1,33 @@
 #ifndef __NV20_GR_H__
 #define __NV20_GR_H__
-#include <engine/gr.h>
+#define nv20_gr(p) container_of((p), struct nv20_gr, base)
+#include "priv.h"
 
-struct nv20_gr_priv {
+struct nv20_gr {
 	struct nvkm_gr base;
-	struct nvkm_gpuobj *ctxtab;
+	struct nvkm_memory *ctxtab;
 };
 
-struct nv20_gr_chan {
-	struct nvkm_gr_chan base;
-	int chid;
-};
+int nv20_gr_new_(const struct nvkm_gr_func *, struct nvkm_device *,
+		 int, struct nvkm_gr **);
+void *nv20_gr_dtor(struct nvkm_gr *);
+int nv20_gr_oneinit(struct nvkm_gr *);
+int nv20_gr_init(struct nvkm_gr *);
+void nv20_gr_intr(struct nvkm_gr *);
+void nv20_gr_tile(struct nvkm_gr *, int, struct nvkm_fb_tile *);
 
-extern struct nvkm_oclass nv25_gr_sclass[];
-int  nv20_gr_context_init(struct nvkm_object *);
-int  nv20_gr_context_fini(struct nvkm_object *, bool);
+int nv30_gr_init(struct nvkm_gr *);
 
-void nv20_gr_tile_prog(struct nvkm_engine *, int);
-void nv20_gr_intr(struct nvkm_subdev *);
+#define nv20_gr_chan(p) container_of((p), struct nv20_gr_chan, object)
 
-void nv20_gr_dtor(struct nvkm_object *);
-int  nv20_gr_init(struct nvkm_object *);
+struct nv20_gr_chan {
+	struct nvkm_object object;
+	struct nv20_gr *gr;
+	int chid;
+	struct nvkm_memory *inst;
+};
 
-int  nv30_gr_init(struct nvkm_object *);
+void *nv20_gr_chan_dtor(struct nvkm_object *);
+int nv20_gr_chan_init(struct nvkm_object *);
+int nv20_gr_chan_fini(struct nvkm_object *, bool);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c
index bc362519cebb..6c4a00819b4b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c
@@ -1,158 +1,134 @@
 #include "nv20.h"
 #include "regs.h"
 
+#include <core/gpuobj.h>
 #include <engine/fifo.h>
+#include <engine/fifo/chan.h>
 
 /*******************************************************************************
- * Graphics object classes
+ * PGRAPH context
  ******************************************************************************/
 
-struct nvkm_oclass
-nv25_gr_sclass[] = {
-	{ 0x0012, &nv04_gr_ofuncs, NULL }, /* beta1 */
-	{ 0x0019, &nv04_gr_ofuncs, NULL }, /* clip */
-	{ 0x0030, &nv04_gr_ofuncs, NULL }, /* null */
-	{ 0x0039, &nv04_gr_ofuncs, NULL }, /* m2mf */
-	{ 0x0043, &nv04_gr_ofuncs, NULL }, /* rop */
-	{ 0x0044, &nv04_gr_ofuncs, NULL }, /* patt */
-	{ 0x004a, &nv04_gr_ofuncs, NULL }, /* gdi */
-	{ 0x0062, &nv04_gr_ofuncs, NULL }, /* surf2d */
-	{ 0x0072, &nv04_gr_ofuncs, NULL }, /* beta4 */
-	{ 0x0089, &nv04_gr_ofuncs, NULL }, /* sifm */
-	{ 0x008a, &nv04_gr_ofuncs, NULL }, /* ifc */
-	{ 0x0096, &nv04_gr_ofuncs, NULL }, /* celcius */
-	{ 0x009e, &nv04_gr_ofuncs, NULL }, /* swzsurf */
-	{ 0x009f, &nv04_gr_ofuncs, NULL }, /* imageblit */
-	{ 0x0597, &nv04_gr_ofuncs, NULL }, /* kelvin */
-	{},
+static const struct nvkm_object_func
+nv25_gr_chan = {
+	.dtor = nv20_gr_chan_dtor,
+	.init = nv20_gr_chan_init,
+	.fini = nv20_gr_chan_fini,
 };
 
-/*******************************************************************************
- * PGRAPH context
- ******************************************************************************/
-
 static int
-nv25_gr_context_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-		     struct nvkm_oclass *oclass, void *data, u32 size,
-		     struct nvkm_object **pobject)
+nv25_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
+	struct nv20_gr *gr = nv20_gr(base);
 	struct nv20_gr_chan *chan;
 	int ret, i;
 
-	ret = nvkm_gr_context_create(parent, engine, oclass, NULL, 0x3724,
-				     16, NVOBJ_FLAG_ZERO_ALLOC, &chan);
-	*pobject = nv_object(chan);
+	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_object_ctor(&nv25_gr_chan, oclass, &chan->object);
+	chan->gr = gr;
+	chan->chid = fifoch->chid;
+	*pobject = &chan->object;
+
+	ret = nvkm_memory_new(gr->base.engine.subdev.device,
+			      NVKM_MEM_TARGET_INST, 0x3724, 16, true,
+			      &chan->inst);
 	if (ret)
 		return ret;
 
-	chan->chid = nvkm_fifo_chan(parent)->chid;
-
-	nv_wo32(chan, 0x0028, 0x00000001 | (chan->chid << 24));
-	nv_wo32(chan, 0x035c, 0xffff0000);
-	nv_wo32(chan, 0x03c0, 0x0fff0000);
-	nv_wo32(chan, 0x03c4, 0x0fff0000);
-	nv_wo32(chan, 0x049c, 0x00000101);
-	nv_wo32(chan, 0x04b0, 0x00000111);
-	nv_wo32(chan, 0x04c8, 0x00000080);
-	nv_wo32(chan, 0x04cc, 0xffff0000);
-	nv_wo32(chan, 0x04d0, 0x00000001);
-	nv_wo32(chan, 0x04e4, 0x44400000);
-	nv_wo32(chan, 0x04fc, 0x4b800000);
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x0028, 0x00000001 | (chan->chid << 24));
+	nvkm_wo32(chan->inst, 0x035c, 0xffff0000);
+	nvkm_wo32(chan->inst, 0x03c0, 0x0fff0000);
+	nvkm_wo32(chan->inst, 0x03c4, 0x0fff0000);
+	nvkm_wo32(chan->inst, 0x049c, 0x00000101);
+	nvkm_wo32(chan->inst, 0x04b0, 0x00000111);
+	nvkm_wo32(chan->inst, 0x04c8, 0x00000080);
+	nvkm_wo32(chan->inst, 0x04cc, 0xffff0000);
+	nvkm_wo32(chan->inst, 0x04d0, 0x00000001);
+	nvkm_wo32(chan->inst, 0x04e4, 0x44400000);
+	nvkm_wo32(chan->inst, 0x04fc, 0x4b800000);
 	for (i = 0x0510; i <= 0x051c; i += 4)
-		nv_wo32(chan, i, 0x00030303);
+		nvkm_wo32(chan->inst, i, 0x00030303);
 	for (i = 0x0530; i <= 0x053c; i += 4)
-		nv_wo32(chan, i, 0x00080000);
+		nvkm_wo32(chan->inst, i, 0x00080000);
 	for (i = 0x0548; i <= 0x0554; i += 4)
-		nv_wo32(chan, i, 0x01012000);
+		nvkm_wo32(chan->inst, i, 0x01012000);
 	for (i = 0x0558; i <= 0x0564; i += 4)
-		nv_wo32(chan, i, 0x000105b8);
+		nvkm_wo32(chan->inst, i, 0x000105b8);
 	for (i = 0x0568; i <= 0x0574; i += 4)
-		nv_wo32(chan, i, 0x00080008);
+		nvkm_wo32(chan->inst, i, 0x00080008);
 	for (i = 0x0598; i <= 0x05d4; i += 4)
-		nv_wo32(chan, i, 0x07ff0000);
-	nv_wo32(chan, 0x05e0, 0x4b7fffff);
-	nv_wo32(chan, 0x0620, 0x00000080);
-	nv_wo32(chan, 0x0624, 0x30201000);
-	nv_wo32(chan, 0x0628, 0x70605040);
-	nv_wo32(chan, 0x062c, 0xb0a09080);
-	nv_wo32(chan, 0x0630, 0xf0e0d0c0);
-	nv_wo32(chan, 0x0664, 0x00000001);
-	nv_wo32(chan, 0x066c, 0x00004000);
-	nv_wo32(chan, 0x0678, 0x00000001);
-	nv_wo32(chan, 0x0680, 0x00040000);
-	nv_wo32(chan, 0x0684, 0x00010000);
+		nvkm_wo32(chan->inst, i, 0x07ff0000);
+	nvkm_wo32(chan->inst, 0x05e0, 0x4b7fffff);
+	nvkm_wo32(chan->inst, 0x0620, 0x00000080);
+	nvkm_wo32(chan->inst, 0x0624, 0x30201000);
+	nvkm_wo32(chan->inst, 0x0628, 0x70605040);
+	nvkm_wo32(chan->inst, 0x062c, 0xb0a09080);
+	nvkm_wo32(chan->inst, 0x0630, 0xf0e0d0c0);
+	nvkm_wo32(chan->inst, 0x0664, 0x00000001);
+	nvkm_wo32(chan->inst, 0x066c, 0x00004000);
+	nvkm_wo32(chan->inst, 0x0678, 0x00000001);
+	nvkm_wo32(chan->inst, 0x0680, 0x00040000);
+	nvkm_wo32(chan->inst, 0x0684, 0x00010000);
 	for (i = 0x1b04; i <= 0x2374; i += 16) {
-		nv_wo32(chan, (i + 0), 0x10700ff9);
-		nv_wo32(chan, (i + 4), 0x0436086c);
-		nv_wo32(chan, (i + 8), 0x000c001b);
+		nvkm_wo32(chan->inst, (i + 0), 0x10700ff9);
+		nvkm_wo32(chan->inst, (i + 4), 0x0436086c);
+		nvkm_wo32(chan->inst, (i + 8), 0x000c001b);
 	}
-	nv_wo32(chan, 0x2704, 0x3f800000);
-	nv_wo32(chan, 0x2718, 0x3f800000);
-	nv_wo32(chan, 0x2744, 0x40000000);
-	nv_wo32(chan, 0x2748, 0x3f800000);
-	nv_wo32(chan, 0x274c, 0x3f000000);
-	nv_wo32(chan, 0x2754, 0x40000000);
-	nv_wo32(chan, 0x2758, 0x3f800000);
-	nv_wo32(chan, 0x2760, 0xbf800000);
-	nv_wo32(chan, 0x2768, 0xbf800000);
-	nv_wo32(chan, 0x308c, 0x000fe000);
-	nv_wo32(chan, 0x3108, 0x000003f8);
-	nv_wo32(chan, 0x3468, 0x002fe000);
+	nvkm_wo32(chan->inst, 0x2704, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x2718, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x2744, 0x40000000);
+	nvkm_wo32(chan->inst, 0x2748, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x274c, 0x3f000000);
+	nvkm_wo32(chan->inst, 0x2754, 0x40000000);
+	nvkm_wo32(chan->inst, 0x2758, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x2760, 0xbf800000);
+	nvkm_wo32(chan->inst, 0x2768, 0xbf800000);
+	nvkm_wo32(chan->inst, 0x308c, 0x000fe000);
+	nvkm_wo32(chan->inst, 0x3108, 0x000003f8);
+	nvkm_wo32(chan->inst, 0x3468, 0x002fe000);
 	for (i = 0x3484; i <= 0x34a0; i += 4)
-		nv_wo32(chan, i, 0x001c527c);
+		nvkm_wo32(chan->inst, i, 0x001c527c);
+	nvkm_done(chan->inst);
 	return 0;
 }
 
-static struct nvkm_oclass
-nv25_gr_cclass = {
-	.handle = NV_ENGCTX(GR, 0x25),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv25_gr_context_ctor,
-		.dtor = _nvkm_gr_context_dtor,
-		.init = nv20_gr_context_init,
-		.fini = nv20_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
-};
-
 /*******************************************************************************
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
-static int
-nv25_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-	     struct nvkm_oclass *oclass, void *data, u32 size,
-	     struct nvkm_object **pobject)
-{
-	struct nv20_gr_priv *priv;
-	int ret;
-
-	ret = nvkm_gr_create(parent, engine, oclass, true, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(nv_object(priv), NULL, 32 * 4, 16,
-			      NVOBJ_FLAG_ZERO_ALLOC, &priv->ctxtab);
-	if (ret)
-		return ret;
+static const struct nvkm_gr_func
+nv25_gr = {
+	.dtor = nv20_gr_dtor,
+	.oneinit = nv20_gr_oneinit,
+	.init = nv20_gr_init,
+	.intr = nv20_gr_intr,
+	.tile = nv20_gr_tile,
+	.chan_new = nv25_gr_chan_new,
+	.sclass = {
+		{ -1, -1, 0x0012, &nv04_gr_object }, /* beta1 */
+		{ -1, -1, 0x0019, &nv04_gr_object }, /* clip */
+		{ -1, -1, 0x0030, &nv04_gr_object }, /* null */
+		{ -1, -1, 0x0039, &nv04_gr_object }, /* m2mf */
+		{ -1, -1, 0x0043, &nv04_gr_object }, /* rop */
+		{ -1, -1, 0x0044, &nv04_gr_object }, /* patt */
+		{ -1, -1, 0x004a, &nv04_gr_object }, /* gdi */
+		{ -1, -1, 0x0062, &nv04_gr_object }, /* surf2d */
+		{ -1, -1, 0x0072, &nv04_gr_object }, /* beta4 */
+		{ -1, -1, 0x0089, &nv04_gr_object }, /* sifm */
+		{ -1, -1, 0x008a, &nv04_gr_object }, /* ifc */
+		{ -1, -1, 0x0096, &nv04_gr_object }, /* celcius */
+		{ -1, -1, 0x009e, &nv04_gr_object }, /* swzsurf */
+		{ -1, -1, 0x009f, &nv04_gr_object }, /* imageblit */
+		{ -1, -1, 0x0597, &nv04_gr_object }, /* kelvin */
+		{}
+	}
+};
 
-	nv_subdev(priv)->unit = 0x00001000;
-	nv_subdev(priv)->intr = nv20_gr_intr;
-	nv_engine(priv)->cclass = &nv25_gr_cclass;
-	nv_engine(priv)->sclass = nv25_gr_sclass;
-	nv_engine(priv)->tile_prog = nv20_gr_tile_prog;
-	return 0;
+int
+nv25_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv20_gr_new_(&nv25_gr, device, index, pgr);
 }
-
-struct nvkm_oclass
-nv25_gr_oclass = {
-	.handle = NV_ENGINE(GR, 0x25),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv25_gr_ctor,
-		.dtor = nv20_gr_dtor,
-		.init = nv20_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c
index 22a5096e283d..3cad26dbc2b1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c
@@ -1,125 +1,125 @@
 #include "nv20.h"
 #include "regs.h"
 
+#include <core/gpuobj.h>
 #include <engine/fifo.h>
+#include <engine/fifo/chan.h>
 
 /*******************************************************************************
  * PGRAPH context
  ******************************************************************************/
 
+static const struct nvkm_object_func
+nv2a_gr_chan = {
+	.dtor = nv20_gr_chan_dtor,
+	.init = nv20_gr_chan_init,
+	.fini = nv20_gr_chan_fini,
+};
+
 static int
-nv2a_gr_context_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-		     struct nvkm_oclass *oclass, void *data, u32 size,
-		     struct nvkm_object **pobject)
+nv2a_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
+	struct nv20_gr *gr = nv20_gr(base);
 	struct nv20_gr_chan *chan;
 	int ret, i;
 
-	ret = nvkm_gr_context_create(parent, engine, oclass, NULL, 0x36b0,
-				     16, NVOBJ_FLAG_ZERO_ALLOC, &chan);
-	*pobject = nv_object(chan);
+	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_object_ctor(&nv2a_gr_chan, oclass, &chan->object);
+	chan->gr = gr;
+	chan->chid = fifoch->chid;
+	*pobject = &chan->object;
+
+	ret = nvkm_memory_new(gr->base.engine.subdev.device,
+			      NVKM_MEM_TARGET_INST, 0x36b0, 16, true,
+			      &chan->inst);
 	if (ret)
 		return ret;
 
-	chan->chid = nvkm_fifo_chan(parent)->chid;
-
-	nv_wo32(chan, 0x0000, 0x00000001 | (chan->chid << 24));
-	nv_wo32(chan, 0x033c, 0xffff0000);
-	nv_wo32(chan, 0x03a0, 0x0fff0000);
-	nv_wo32(chan, 0x03a4, 0x0fff0000);
-	nv_wo32(chan, 0x047c, 0x00000101);
-	nv_wo32(chan, 0x0490, 0x00000111);
-	nv_wo32(chan, 0x04a8, 0x44400000);
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x0000, 0x00000001 | (chan->chid << 24));
+	nvkm_wo32(chan->inst, 0x033c, 0xffff0000);
+	nvkm_wo32(chan->inst, 0x03a0, 0x0fff0000);
+	nvkm_wo32(chan->inst, 0x03a4, 0x0fff0000);
+	nvkm_wo32(chan->inst, 0x047c, 0x00000101);
+	nvkm_wo32(chan->inst, 0x0490, 0x00000111);
+	nvkm_wo32(chan->inst, 0x04a8, 0x44400000);
 	for (i = 0x04d4; i <= 0x04e0; i += 4)
-		nv_wo32(chan, i, 0x00030303);
+		nvkm_wo32(chan->inst, i, 0x00030303);
 	for (i = 0x04f4; i <= 0x0500; i += 4)
-		nv_wo32(chan, i, 0x00080000);
+		nvkm_wo32(chan->inst, i, 0x00080000);
 	for (i = 0x050c; i <= 0x0518; i += 4)
-		nv_wo32(chan, i, 0x01012000);
+		nvkm_wo32(chan->inst, i, 0x01012000);
 	for (i = 0x051c; i <= 0x0528; i += 4)
-		nv_wo32(chan, i, 0x000105b8);
+		nvkm_wo32(chan->inst, i, 0x000105b8);
 	for (i = 0x052c; i <= 0x0538; i += 4)
-		nv_wo32(chan, i, 0x00080008);
+		nvkm_wo32(chan->inst, i, 0x00080008);
 	for (i = 0x055c; i <= 0x0598; i += 4)
-		nv_wo32(chan, i, 0x07ff0000);
-	nv_wo32(chan, 0x05a4, 0x4b7fffff);
-	nv_wo32(chan, 0x05fc, 0x00000001);
-	nv_wo32(chan, 0x0604, 0x00004000);
-	nv_wo32(chan, 0x0610, 0x00000001);
-	nv_wo32(chan, 0x0618, 0x00040000);
-	nv_wo32(chan, 0x061c, 0x00010000);
+		nvkm_wo32(chan->inst, i, 0x07ff0000);
+	nvkm_wo32(chan->inst, 0x05a4, 0x4b7fffff);
+	nvkm_wo32(chan->inst, 0x05fc, 0x00000001);
+	nvkm_wo32(chan->inst, 0x0604, 0x00004000);
+	nvkm_wo32(chan->inst, 0x0610, 0x00000001);
+	nvkm_wo32(chan->inst, 0x0618, 0x00040000);
+	nvkm_wo32(chan->inst, 0x061c, 0x00010000);
 	for (i = 0x1a9c; i <= 0x22fc; i += 16) { /*XXX: check!! */
-		nv_wo32(chan, (i + 0), 0x10700ff9);
-		nv_wo32(chan, (i + 4), 0x0436086c);
-		nv_wo32(chan, (i + 8), 0x000c001b);
+		nvkm_wo32(chan->inst, (i + 0), 0x10700ff9);
+		nvkm_wo32(chan->inst, (i + 4), 0x0436086c);
+		nvkm_wo32(chan->inst, (i + 8), 0x000c001b);
 	}
-	nv_wo32(chan, 0x269c, 0x3f800000);
-	nv_wo32(chan, 0x26b0, 0x3f800000);
-	nv_wo32(chan, 0x26dc, 0x40000000);
-	nv_wo32(chan, 0x26e0, 0x3f800000);
-	nv_wo32(chan, 0x26e4, 0x3f000000);
-	nv_wo32(chan, 0x26ec, 0x40000000);
-	nv_wo32(chan, 0x26f0, 0x3f800000);
-	nv_wo32(chan, 0x26f8, 0xbf800000);
-	nv_wo32(chan, 0x2700, 0xbf800000);
-	nv_wo32(chan, 0x3024, 0x000fe000);
-	nv_wo32(chan, 0x30a0, 0x000003f8);
-	nv_wo32(chan, 0x33fc, 0x002fe000);
+	nvkm_wo32(chan->inst, 0x269c, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x26b0, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x26dc, 0x40000000);
+	nvkm_wo32(chan->inst, 0x26e0, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x26e4, 0x3f000000);
+	nvkm_wo32(chan->inst, 0x26ec, 0x40000000);
+	nvkm_wo32(chan->inst, 0x26f0, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x26f8, 0xbf800000);
+	nvkm_wo32(chan->inst, 0x2700, 0xbf800000);
+	nvkm_wo32(chan->inst, 0x3024, 0x000fe000);
+	nvkm_wo32(chan->inst, 0x30a0, 0x000003f8);
+	nvkm_wo32(chan->inst, 0x33fc, 0x002fe000);
 	for (i = 0x341c; i <= 0x3438; i += 4)
-		nv_wo32(chan, i, 0x001c527c);
+		nvkm_wo32(chan->inst, i, 0x001c527c);
+	nvkm_done(chan->inst);
 	return 0;
 }
 
-static struct nvkm_oclass
-nv2a_gr_cclass = {
-	.handle = NV_ENGCTX(GR, 0x2a),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv2a_gr_context_ctor,
-		.dtor = _nvkm_gr_context_dtor,
-		.init = nv20_gr_context_init,
-		.fini = nv20_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
-};
-
 /*******************************************************************************
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
-static int
-nv2a_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-	     struct nvkm_oclass *oclass, void *data, u32 size,
-	     struct nvkm_object **pobject)
-{
-	struct nv20_gr_priv *priv;
-	int ret;
-
-	ret = nvkm_gr_create(parent, engine, oclass, true, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(nv_object(priv), NULL, 32 * 4, 16,
-			      NVOBJ_FLAG_ZERO_ALLOC, &priv->ctxtab);
-	if (ret)
-		return ret;
+static const struct nvkm_gr_func
+nv2a_gr = {
+	.dtor = nv20_gr_dtor,
+	.oneinit = nv20_gr_oneinit,
+	.init = nv20_gr_init,
+	.intr = nv20_gr_intr,
+	.tile = nv20_gr_tile,
+	.chan_new = nv2a_gr_chan_new,
+	.sclass = {
+		{ -1, -1, 0x0012, &nv04_gr_object }, /* beta1 */
+		{ -1, -1, 0x0019, &nv04_gr_object }, /* clip */
+		{ -1, -1, 0x0030, &nv04_gr_object }, /* null */
+		{ -1, -1, 0x0039, &nv04_gr_object }, /* m2mf */
+		{ -1, -1, 0x0043, &nv04_gr_object }, /* rop */
+		{ -1, -1, 0x0044, &nv04_gr_object }, /* patt */
+		{ -1, -1, 0x004a, &nv04_gr_object }, /* gdi */
+		{ -1, -1, 0x0062, &nv04_gr_object }, /* surf2d */
+		{ -1, -1, 0x0072, &nv04_gr_object }, /* beta4 */
+		{ -1, -1, 0x0089, &nv04_gr_object }, /* sifm */
+		{ -1, -1, 0x008a, &nv04_gr_object }, /* ifc */
+		{ -1, -1, 0x0096, &nv04_gr_object }, /* celcius */
+		{ -1, -1, 0x009e, &nv04_gr_object }, /* swzsurf */
+		{ -1, -1, 0x009f, &nv04_gr_object }, /* imageblit */
+		{ -1, -1, 0x0597, &nv04_gr_object }, /* kelvin */
+		{}
+	}
+};
 
-	nv_subdev(priv)->unit = 0x00001000;
-	nv_subdev(priv)->intr = nv20_gr_intr;
-	nv_engine(priv)->cclass = &nv2a_gr_cclass;
-	nv_engine(priv)->sclass = nv25_gr_sclass;
-	nv_engine(priv)->tile_prog = nv20_gr_tile_prog;
-	return 0;
+int
+nv2a_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv20_gr_new_(&nv2a_gr, device, index, pgr);
 }
-
-struct nvkm_oclass
-nv2a_gr_oclass = {
-	.handle = NV_ENGINE(GR, 0x2a),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv2a_gr_ctor,
-		.dtor = nv20_gr_dtor,
-		.init = nv20_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
index dcc84eb54fb6..69de8c6259fe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
@@ -1,231 +1,198 @@
 #include "nv20.h"
 #include "regs.h"
 
-#include <core/device.h>
+#include <core/gpuobj.h>
 #include <engine/fifo.h>
+#include <engine/fifo/chan.h>
 #include <subdev/fb.h>
 
 /*******************************************************************************
- * Graphics object classes
+ * PGRAPH context
  ******************************************************************************/
 
-static struct nvkm_oclass
-nv30_gr_sclass[] = {
-	{ 0x0012, &nv04_gr_ofuncs, NULL }, /* beta1 */
-	{ 0x0019, &nv04_gr_ofuncs, NULL }, /* clip */
-	{ 0x0030, &nv04_gr_ofuncs, NULL }, /* null */
-	{ 0x0039, &nv04_gr_ofuncs, NULL }, /* m2mf */
-	{ 0x0043, &nv04_gr_ofuncs, NULL }, /* rop */
-	{ 0x0044, &nv04_gr_ofuncs, NULL }, /* patt */
-	{ 0x004a, &nv04_gr_ofuncs, NULL }, /* gdi */
-	{ 0x0062, &nv04_gr_ofuncs, NULL }, /* surf2d */
-	{ 0x0072, &nv04_gr_ofuncs, NULL }, /* beta4 */
-	{ 0x0089, &nv04_gr_ofuncs, NULL }, /* sifm */
-	{ 0x008a, &nv04_gr_ofuncs, NULL }, /* ifc */
-	{ 0x009f, &nv04_gr_ofuncs, NULL }, /* imageblit */
-	{ 0x0362, &nv04_gr_ofuncs, NULL }, /* surf2d (nv30) */
-	{ 0x0389, &nv04_gr_ofuncs, NULL }, /* sifm (nv30) */
-	{ 0x038a, &nv04_gr_ofuncs, NULL }, /* ifc (nv30) */
-	{ 0x039e, &nv04_gr_ofuncs, NULL }, /* swzsurf (nv30) */
-	{ 0x0397, &nv04_gr_ofuncs, NULL }, /* rankine */
-	{},
+static const struct nvkm_object_func
+nv30_gr_chan = {
+	.dtor = nv20_gr_chan_dtor,
+	.init = nv20_gr_chan_init,
+	.fini = nv20_gr_chan_fini,
 };
 
-/*******************************************************************************
- * PGRAPH context
- ******************************************************************************/
-
 static int
-nv30_gr_context_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-		     struct nvkm_oclass *oclass, void *data, u32 size,
-		     struct nvkm_object **pobject)
+nv30_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
+	struct nv20_gr *gr = nv20_gr(base);
 	struct nv20_gr_chan *chan;
 	int ret, i;
 
-	ret = nvkm_gr_context_create(parent, engine, oclass, NULL, 0x5f48,
-				     16, NVOBJ_FLAG_ZERO_ALLOC, &chan);
-	*pobject = nv_object(chan);
+	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_object_ctor(&nv30_gr_chan, oclass, &chan->object);
+	chan->gr = gr;
+	chan->chid = fifoch->chid;
+	*pobject = &chan->object;
+
+	ret = nvkm_memory_new(gr->base.engine.subdev.device,
+			      NVKM_MEM_TARGET_INST, 0x5f48, 16, true,
+			      &chan->inst);
 	if (ret)
 		return ret;
 
-	chan->chid = nvkm_fifo_chan(parent)->chid;
-
-	nv_wo32(chan, 0x0028, 0x00000001 | (chan->chid << 24));
-	nv_wo32(chan, 0x0410, 0x00000101);
-	nv_wo32(chan, 0x0424, 0x00000111);
-	nv_wo32(chan, 0x0428, 0x00000060);
-	nv_wo32(chan, 0x0444, 0x00000080);
-	nv_wo32(chan, 0x0448, 0xffff0000);
-	nv_wo32(chan, 0x044c, 0x00000001);
-	nv_wo32(chan, 0x0460, 0x44400000);
-	nv_wo32(chan, 0x048c, 0xffff0000);
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x0028, 0x00000001 | (chan->chid << 24));
+	nvkm_wo32(chan->inst, 0x0410, 0x00000101);
+	nvkm_wo32(chan->inst, 0x0424, 0x00000111);
+	nvkm_wo32(chan->inst, 0x0428, 0x00000060);
+	nvkm_wo32(chan->inst, 0x0444, 0x00000080);
+	nvkm_wo32(chan->inst, 0x0448, 0xffff0000);
+	nvkm_wo32(chan->inst, 0x044c, 0x00000001);
+	nvkm_wo32(chan->inst, 0x0460, 0x44400000);
+	nvkm_wo32(chan->inst, 0x048c, 0xffff0000);
 	for (i = 0x04e0; i < 0x04e8; i += 4)
-		nv_wo32(chan, i, 0x0fff0000);
-	nv_wo32(chan, 0x04ec, 0x00011100);
+		nvkm_wo32(chan->inst, i, 0x0fff0000);
+	nvkm_wo32(chan->inst, 0x04ec, 0x00011100);
 	for (i = 0x0508; i < 0x0548; i += 4)
-		nv_wo32(chan, i, 0x07ff0000);
-	nv_wo32(chan, 0x0550, 0x4b7fffff);
-	nv_wo32(chan, 0x058c, 0x00000080);
-	nv_wo32(chan, 0x0590, 0x30201000);
-	nv_wo32(chan, 0x0594, 0x70605040);
-	nv_wo32(chan, 0x0598, 0xb8a89888);
-	nv_wo32(chan, 0x059c, 0xf8e8d8c8);
-	nv_wo32(chan, 0x05b0, 0xb0000000);
+		nvkm_wo32(chan->inst, i, 0x07ff0000);
+	nvkm_wo32(chan->inst, 0x0550, 0x4b7fffff);
+	nvkm_wo32(chan->inst, 0x058c, 0x00000080);
+	nvkm_wo32(chan->inst, 0x0590, 0x30201000);
+	nvkm_wo32(chan->inst, 0x0594, 0x70605040);
+	nvkm_wo32(chan->inst, 0x0598, 0xb8a89888);
+	nvkm_wo32(chan->inst, 0x059c, 0xf8e8d8c8);
+	nvkm_wo32(chan->inst, 0x05b0, 0xb0000000);
 	for (i = 0x0600; i < 0x0640; i += 4)
-		nv_wo32(chan, i, 0x00010588);
+		nvkm_wo32(chan->inst, i, 0x00010588);
 	for (i = 0x0640; i < 0x0680; i += 4)
-		nv_wo32(chan, i, 0x00030303);
+		nvkm_wo32(chan->inst, i, 0x00030303);
 	for (i = 0x06c0; i < 0x0700; i += 4)
-		nv_wo32(chan, i, 0x0008aae4);
+		nvkm_wo32(chan->inst, i, 0x0008aae4);
 	for (i = 0x0700; i < 0x0740; i += 4)
-		nv_wo32(chan, i, 0x01012000);
+		nvkm_wo32(chan->inst, i, 0x01012000);
 	for (i = 0x0740; i < 0x0780; i += 4)
-		nv_wo32(chan, i, 0x00080008);
-	nv_wo32(chan, 0x085c, 0x00040000);
-	nv_wo32(chan, 0x0860, 0x00010000);
+		nvkm_wo32(chan->inst, i, 0x00080008);
+	nvkm_wo32(chan->inst, 0x085c, 0x00040000);
+	nvkm_wo32(chan->inst, 0x0860, 0x00010000);
 	for (i = 0x0864; i < 0x0874; i += 4)
-		nv_wo32(chan, i, 0x00040004);
+		nvkm_wo32(chan->inst, i, 0x00040004);
 	for (i = 0x1f18; i <= 0x3088 ; i += 16) {
-		nv_wo32(chan, i + 0, 0x10700ff9);
-		nv_wo32(chan, i + 1, 0x0436086c);
-		nv_wo32(chan, i + 2, 0x000c001b);
+		nvkm_wo32(chan->inst, i + 0, 0x10700ff9);
+		nvkm_wo32(chan->inst, i + 1, 0x0436086c);
+		nvkm_wo32(chan->inst, i + 2, 0x000c001b);
 	}
 	for (i = 0x30b8; i < 0x30c8; i += 4)
-		nv_wo32(chan, i, 0x0000ffff);
-	nv_wo32(chan, 0x344c, 0x3f800000);
-	nv_wo32(chan, 0x3808, 0x3f800000);
-	nv_wo32(chan, 0x381c, 0x3f800000);
-	nv_wo32(chan, 0x3848, 0x40000000);
-	nv_wo32(chan, 0x384c, 0x3f800000);
-	nv_wo32(chan, 0x3850, 0x3f000000);
-	nv_wo32(chan, 0x3858, 0x40000000);
-	nv_wo32(chan, 0x385c, 0x3f800000);
-	nv_wo32(chan, 0x3864, 0xbf800000);
-	nv_wo32(chan, 0x386c, 0xbf800000);
+		nvkm_wo32(chan->inst, i, 0x0000ffff);
+	nvkm_wo32(chan->inst, 0x344c, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x3808, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x381c, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x3848, 0x40000000);
+	nvkm_wo32(chan->inst, 0x384c, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x3850, 0x3f000000);
+	nvkm_wo32(chan->inst, 0x3858, 0x40000000);
+	nvkm_wo32(chan->inst, 0x385c, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x3864, 0xbf800000);
+	nvkm_wo32(chan->inst, 0x386c, 0xbf800000);
+	nvkm_done(chan->inst);
 	return 0;
 }
 
-static struct nvkm_oclass
-nv30_gr_cclass = {
-	.handle = NV_ENGCTX(GR, 0x30),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv30_gr_context_ctor,
-		.dtor = _nvkm_gr_context_dtor,
-		.init = nv20_gr_context_init,
-		.fini = nv20_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
-};
-
 /*******************************************************************************
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
-static int
-nv30_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-	     struct nvkm_oclass *oclass, void *data, u32 size,
-	     struct nvkm_object **pobject)
-{
-	struct nv20_gr_priv *priv;
-	int ret;
-
-	ret = nvkm_gr_create(parent, engine, oclass, true, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(nv_object(priv), NULL, 32 * 4, 16,
-			      NVOBJ_FLAG_ZERO_ALLOC, &priv->ctxtab);
-	if (ret)
-		return ret;
-
-	nv_subdev(priv)->unit = 0x00001000;
-	nv_subdev(priv)->intr = nv20_gr_intr;
-	nv_engine(priv)->cclass = &nv30_gr_cclass;
-	nv_engine(priv)->sclass = nv30_gr_sclass;
-	nv_engine(priv)->tile_prog = nv20_gr_tile_prog;
-	return 0;
-}
-
 int
-nv30_gr_init(struct nvkm_object *object)
+nv30_gr_init(struct nvkm_gr *base)
 {
-	struct nvkm_engine *engine = nv_engine(object);
-	struct nv20_gr_priv *priv = (void *)engine;
-	struct nvkm_fb *pfb = nvkm_fb(object);
-	int ret, i;
-
-	ret = nvkm_gr_init(&priv->base);
-	if (ret)
-		return ret;
-
-	nv_wr32(priv, NV20_PGRAPH_CHANNEL_CTX_TABLE, priv->ctxtab->addr >> 4);
-
-	nv_wr32(priv, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
-	nv_wr32(priv, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
-
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_0, 0xFFFFFFFF);
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_0, 0x00000000);
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_1, 0x401287c0);
-	nv_wr32(priv, 0x400890, 0x01b463ff);
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_3, 0xf2de0475);
-	nv_wr32(priv, NV10_PGRAPH_DEBUG_4, 0x00008000);
-	nv_wr32(priv, NV04_PGRAPH_LIMIT_VIOL_PIX, 0xf04bdff6);
-	nv_wr32(priv, 0x400B80, 0x1003d888);
-	nv_wr32(priv, 0x400B84, 0x0c000000);
-	nv_wr32(priv, 0x400098, 0x00000000);
-	nv_wr32(priv, 0x40009C, 0x0005ad00);
-	nv_wr32(priv, 0x400B88, 0x62ff00ff); /* suspiciously like PGRAPH_DEBUG_2 */
-	nv_wr32(priv, 0x4000a0, 0x00000000);
-	nv_wr32(priv, 0x4000a4, 0x00000008);
-	nv_wr32(priv, 0x4008a8, 0xb784a400);
-	nv_wr32(priv, 0x400ba0, 0x002f8685);
-	nv_wr32(priv, 0x400ba4, 0x00231f3f);
-	nv_wr32(priv, 0x4008a4, 0x40000020);
-
-	if (nv_device(priv)->chipset == 0x34) {
-		nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x00EA0004);
-		nv_wr32(priv, NV10_PGRAPH_RDI_DATA , 0x00200201);
-		nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x00EA0008);
-		nv_wr32(priv, NV10_PGRAPH_RDI_DATA , 0x00000008);
-		nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x00EA0000);
-		nv_wr32(priv, NV10_PGRAPH_RDI_DATA , 0x00000032);
-		nv_wr32(priv, NV10_PGRAPH_RDI_INDEX, 0x00E00004);
-		nv_wr32(priv, NV10_PGRAPH_RDI_DATA , 0x00000002);
+	struct nv20_gr *gr = nv20_gr(base);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, NV20_PGRAPH_CHANNEL_CTX_TABLE,
+			  nvkm_memory_addr(gr->ctxtab) >> 4);
+
+	nvkm_wr32(device, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
+	nvkm_wr32(device, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
+
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0xFFFFFFFF);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0x00000000);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_1, 0x401287c0);
+	nvkm_wr32(device, 0x400890, 0x01b463ff);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_3, 0xf2de0475);
+	nvkm_wr32(device, NV10_PGRAPH_DEBUG_4, 0x00008000);
+	nvkm_wr32(device, NV04_PGRAPH_LIMIT_VIOL_PIX, 0xf04bdff6);
+	nvkm_wr32(device, 0x400B80, 0x1003d888);
+	nvkm_wr32(device, 0x400B84, 0x0c000000);
+	nvkm_wr32(device, 0x400098, 0x00000000);
+	nvkm_wr32(device, 0x40009C, 0x0005ad00);
+	nvkm_wr32(device, 0x400B88, 0x62ff00ff); /* suspiciously like PGRAPH_DEBUG_2 */
+	nvkm_wr32(device, 0x4000a0, 0x00000000);
+	nvkm_wr32(device, 0x4000a4, 0x00000008);
+	nvkm_wr32(device, 0x4008a8, 0xb784a400);
+	nvkm_wr32(device, 0x400ba0, 0x002f8685);
+	nvkm_wr32(device, 0x400ba4, 0x00231f3f);
+	nvkm_wr32(device, 0x4008a4, 0x40000020);
+
+	if (device->chipset == 0x34) {
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA0004);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_DATA , 0x00200201);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA0008);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_DATA , 0x00000008);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00EA0000);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_DATA , 0x00000032);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_INDEX, 0x00E00004);
+		nvkm_wr32(device, NV10_PGRAPH_RDI_DATA , 0x00000002);
 	}
 
-	nv_wr32(priv, 0x4000c0, 0x00000016);
-
-	/* Turn all the tiling regions off. */
-	for (i = 0; i < pfb->tile.regions; i++)
-		engine->tile_prog(engine, i);
+	nvkm_wr32(device, 0x4000c0, 0x00000016);
 
-	nv_wr32(priv, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
-	nv_wr32(priv, NV10_PGRAPH_STATE      , 0xFFFFFFFF);
-	nv_wr32(priv, 0x0040075c             , 0x00000001);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
+	nvkm_wr32(device, NV10_PGRAPH_STATE      , 0xFFFFFFFF);
+	nvkm_wr32(device, 0x0040075c             , 0x00000001);
 
 	/* begin RAM config */
-	/* vramsz = pci_resource_len(priv->dev->pdev, 0) - 1; */
-	nv_wr32(priv, 0x4009A4, nv_rd32(priv, 0x100200));
-	nv_wr32(priv, 0x4009A8, nv_rd32(priv, 0x100204));
-	if (nv_device(priv)->chipset != 0x34) {
-		nv_wr32(priv, 0x400750, 0x00EA0000);
-		nv_wr32(priv, 0x400754, nv_rd32(priv, 0x100200));
-		nv_wr32(priv, 0x400750, 0x00EA0004);
-		nv_wr32(priv, 0x400754, nv_rd32(priv, 0x100204));
+	/* vramsz = pci_resource_len(gr->dev->pdev, 1) - 1; */
+	nvkm_wr32(device, 0x4009A4, nvkm_rd32(device, 0x100200));
+	nvkm_wr32(device, 0x4009A8, nvkm_rd32(device, 0x100204));
+	if (device->chipset != 0x34) {
+		nvkm_wr32(device, 0x400750, 0x00EA0000);
+		nvkm_wr32(device, 0x400754, nvkm_rd32(device, 0x100200));
+		nvkm_wr32(device, 0x400750, 0x00EA0004);
+		nvkm_wr32(device, 0x400754, nvkm_rd32(device, 0x100204));
 	}
+
 	return 0;
 }
 
-struct nvkm_oclass
-nv30_gr_oclass = {
-	.handle = NV_ENGINE(GR, 0x30),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv30_gr_ctor,
-		.dtor = nv20_gr_dtor,
-		.init = nv30_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
+static const struct nvkm_gr_func
+nv30_gr = {
+	.dtor = nv20_gr_dtor,
+	.oneinit = nv20_gr_oneinit,
+	.init = nv30_gr_init,
+	.intr = nv20_gr_intr,
+	.tile = nv20_gr_tile,
+	.chan_new = nv30_gr_chan_new,
+	.sclass = {
+		{ -1, -1, 0x0012, &nv04_gr_object }, /* beta1 */
+		{ -1, -1, 0x0019, &nv04_gr_object }, /* clip */
+		{ -1, -1, 0x0030, &nv04_gr_object }, /* null */
+		{ -1, -1, 0x0039, &nv04_gr_object }, /* m2mf */
+		{ -1, -1, 0x0043, &nv04_gr_object }, /* rop */
+		{ -1, -1, 0x0044, &nv04_gr_object }, /* patt */
+		{ -1, -1, 0x004a, &nv04_gr_object }, /* gdi */
+		{ -1, -1, 0x0062, &nv04_gr_object }, /* surf2d */
+		{ -1, -1, 0x0072, &nv04_gr_object }, /* beta4 */
+		{ -1, -1, 0x0089, &nv04_gr_object }, /* sifm */
+		{ -1, -1, 0x008a, &nv04_gr_object }, /* ifc */
+		{ -1, -1, 0x009f, &nv04_gr_object }, /* imageblit */
+		{ -1, -1, 0x0362, &nv04_gr_object }, /* surf2d (nv30) */
+		{ -1, -1, 0x0389, &nv04_gr_object }, /* sifm (nv30) */
+		{ -1, -1, 0x038a, &nv04_gr_object }, /* ifc (nv30) */
+		{ -1, -1, 0x039e, &nv04_gr_object }, /* swzsurf (nv30) */
+		{ -1, -1, 0x0397, &nv04_gr_object }, /* rankine */
+		{}
+	}
 };
+
+int
+nv30_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv20_gr_new_(&nv30_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
index 985b7f3306ae..2207dac23981 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
@@ -1,159 +1,135 @@
 #include "nv20.h"
 #include "regs.h"
 
+#include <core/gpuobj.h>
 #include <engine/fifo.h>
+#include <engine/fifo/chan.h>
 
 /*******************************************************************************
- * Graphics object classes
+ * PGRAPH context
  ******************************************************************************/
 
-static struct nvkm_oclass
-nv34_gr_sclass[] = {
-	{ 0x0012, &nv04_gr_ofuncs, NULL }, /* beta1 */
-	{ 0x0019, &nv04_gr_ofuncs, NULL }, /* clip */
-	{ 0x0030, &nv04_gr_ofuncs, NULL }, /* null */
-	{ 0x0039, &nv04_gr_ofuncs, NULL }, /* m2mf */
-	{ 0x0043, &nv04_gr_ofuncs, NULL }, /* rop */
-	{ 0x0044, &nv04_gr_ofuncs, NULL }, /* patt */
-	{ 0x004a, &nv04_gr_ofuncs, NULL }, /* gdi */
-	{ 0x0062, &nv04_gr_ofuncs, NULL }, /* surf2d */
-	{ 0x0072, &nv04_gr_ofuncs, NULL }, /* beta4 */
-	{ 0x0089, &nv04_gr_ofuncs, NULL }, /* sifm */
-	{ 0x008a, &nv04_gr_ofuncs, NULL }, /* ifc */
-	{ 0x009f, &nv04_gr_ofuncs, NULL }, /* imageblit */
-	{ 0x0362, &nv04_gr_ofuncs, NULL }, /* surf2d (nv30) */
-	{ 0x0389, &nv04_gr_ofuncs, NULL }, /* sifm (nv30) */
-	{ 0x038a, &nv04_gr_ofuncs, NULL }, /* ifc (nv30) */
-	{ 0x039e, &nv04_gr_ofuncs, NULL }, /* swzsurf (nv30) */
-	{ 0x0697, &nv04_gr_ofuncs, NULL }, /* rankine */
-	{},
+static const struct nvkm_object_func
+nv34_gr_chan = {
+	.dtor = nv20_gr_chan_dtor,
+	.init = nv20_gr_chan_init,
+	.fini = nv20_gr_chan_fini,
 };
 
-/*******************************************************************************
- * PGRAPH context
- ******************************************************************************/
-
 static int
-nv34_gr_context_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-		     struct nvkm_oclass *oclass, void *data, u32 size,
-		     struct nvkm_object **pobject)
+nv34_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
+	struct nv20_gr *gr = nv20_gr(base);
 	struct nv20_gr_chan *chan;
 	int ret, i;
 
-	ret = nvkm_gr_context_create(parent, engine, oclass, NULL, 0x46dc,
-				     16, NVOBJ_FLAG_ZERO_ALLOC, &chan);
-	*pobject = nv_object(chan);
+	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_object_ctor(&nv34_gr_chan, oclass, &chan->object);
+	chan->gr = gr;
+	chan->chid = fifoch->chid;
+	*pobject = &chan->object;
+
+	ret = nvkm_memory_new(gr->base.engine.subdev.device,
+			      NVKM_MEM_TARGET_INST, 0x46dc, 16, true,
+			      &chan->inst);
 	if (ret)
 		return ret;
 
-	chan->chid = nvkm_fifo_chan(parent)->chid;
-
-	nv_wo32(chan, 0x0028, 0x00000001 | (chan->chid << 24));
-	nv_wo32(chan, 0x040c, 0x01000101);
-	nv_wo32(chan, 0x0420, 0x00000111);
-	nv_wo32(chan, 0x0424, 0x00000060);
-	nv_wo32(chan, 0x0440, 0x00000080);
-	nv_wo32(chan, 0x0444, 0xffff0000);
-	nv_wo32(chan, 0x0448, 0x00000001);
-	nv_wo32(chan, 0x045c, 0x44400000);
-	nv_wo32(chan, 0x0480, 0xffff0000);
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x0028, 0x00000001 | (chan->chid << 24));
+	nvkm_wo32(chan->inst, 0x040c, 0x01000101);
+	nvkm_wo32(chan->inst, 0x0420, 0x00000111);
+	nvkm_wo32(chan->inst, 0x0424, 0x00000060);
+	nvkm_wo32(chan->inst, 0x0440, 0x00000080);
+	nvkm_wo32(chan->inst, 0x0444, 0xffff0000);
+	nvkm_wo32(chan->inst, 0x0448, 0x00000001);
+	nvkm_wo32(chan->inst, 0x045c, 0x44400000);
+	nvkm_wo32(chan->inst, 0x0480, 0xffff0000);
 	for (i = 0x04d4; i < 0x04dc; i += 4)
-		nv_wo32(chan, i, 0x0fff0000);
-	nv_wo32(chan, 0x04e0, 0x00011100);
+		nvkm_wo32(chan->inst, i, 0x0fff0000);
+	nvkm_wo32(chan->inst, 0x04e0, 0x00011100);
 	for (i = 0x04fc; i < 0x053c; i += 4)
-		nv_wo32(chan, i, 0x07ff0000);
-	nv_wo32(chan, 0x0544, 0x4b7fffff);
-	nv_wo32(chan, 0x057c, 0x00000080);
-	nv_wo32(chan, 0x0580, 0x30201000);
-	nv_wo32(chan, 0x0584, 0x70605040);
-	nv_wo32(chan, 0x0588, 0xb8a89888);
-	nv_wo32(chan, 0x058c, 0xf8e8d8c8);
-	nv_wo32(chan, 0x05a0, 0xb0000000);
+		nvkm_wo32(chan->inst, i, 0x07ff0000);
+	nvkm_wo32(chan->inst, 0x0544, 0x4b7fffff);
+	nvkm_wo32(chan->inst, 0x057c, 0x00000080);
+	nvkm_wo32(chan->inst, 0x0580, 0x30201000);
+	nvkm_wo32(chan->inst, 0x0584, 0x70605040);
+	nvkm_wo32(chan->inst, 0x0588, 0xb8a89888);
+	nvkm_wo32(chan->inst, 0x058c, 0xf8e8d8c8);
+	nvkm_wo32(chan->inst, 0x05a0, 0xb0000000);
 	for (i = 0x05f0; i < 0x0630; i += 4)
-		nv_wo32(chan, i, 0x00010588);
+		nvkm_wo32(chan->inst, i, 0x00010588);
 	for (i = 0x0630; i < 0x0670; i += 4)
-		nv_wo32(chan, i, 0x00030303);
+		nvkm_wo32(chan->inst, i, 0x00030303);
 	for (i = 0x06b0; i < 0x06f0; i += 4)
-		nv_wo32(chan, i, 0x0008aae4);
+		nvkm_wo32(chan->inst, i, 0x0008aae4);
 	for (i = 0x06f0; i < 0x0730; i += 4)
-		nv_wo32(chan, i, 0x01012000);
+		nvkm_wo32(chan->inst, i, 0x01012000);
 	for (i = 0x0730; i < 0x0770; i += 4)
-		nv_wo32(chan, i, 0x00080008);
-	nv_wo32(chan, 0x0850, 0x00040000);
-	nv_wo32(chan, 0x0854, 0x00010000);
+		nvkm_wo32(chan->inst, i, 0x00080008);
+	nvkm_wo32(chan->inst, 0x0850, 0x00040000);
+	nvkm_wo32(chan->inst, 0x0854, 0x00010000);
 	for (i = 0x0858; i < 0x0868; i += 4)
-		nv_wo32(chan, i, 0x00040004);
+		nvkm_wo32(chan->inst, i, 0x00040004);
 	for (i = 0x15ac; i <= 0x271c ; i += 16) {
-		nv_wo32(chan, i + 0, 0x10700ff9);
-		nv_wo32(chan, i + 1, 0x0436086c);
-		nv_wo32(chan, i + 2, 0x000c001b);
+		nvkm_wo32(chan->inst, i + 0, 0x10700ff9);
+		nvkm_wo32(chan->inst, i + 1, 0x0436086c);
+		nvkm_wo32(chan->inst, i + 2, 0x000c001b);
 	}
 	for (i = 0x274c; i < 0x275c; i += 4)
-		nv_wo32(chan, i, 0x0000ffff);
-	nv_wo32(chan, 0x2ae0, 0x3f800000);
-	nv_wo32(chan, 0x2e9c, 0x3f800000);
-	nv_wo32(chan, 0x2eb0, 0x3f800000);
-	nv_wo32(chan, 0x2edc, 0x40000000);
-	nv_wo32(chan, 0x2ee0, 0x3f800000);
-	nv_wo32(chan, 0x2ee4, 0x3f000000);
-	nv_wo32(chan, 0x2eec, 0x40000000);
-	nv_wo32(chan, 0x2ef0, 0x3f800000);
-	nv_wo32(chan, 0x2ef8, 0xbf800000);
-	nv_wo32(chan, 0x2f00, 0xbf800000);
+		nvkm_wo32(chan->inst, i, 0x0000ffff);
+	nvkm_wo32(chan->inst, 0x2ae0, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x2e9c, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x2eb0, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x2edc, 0x40000000);
+	nvkm_wo32(chan->inst, 0x2ee0, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x2ee4, 0x3f000000);
+	nvkm_wo32(chan->inst, 0x2eec, 0x40000000);
+	nvkm_wo32(chan->inst, 0x2ef0, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x2ef8, 0xbf800000);
+	nvkm_wo32(chan->inst, 0x2f00, 0xbf800000);
+	nvkm_done(chan->inst);
 	return 0;
 }
 
-static struct nvkm_oclass
-nv34_gr_cclass = {
-	.handle = NV_ENGCTX(GR, 0x34),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv34_gr_context_ctor,
-		.dtor = _nvkm_gr_context_dtor,
-		.init = nv20_gr_context_init,
-		.fini = nv20_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
-};
-
 /*******************************************************************************
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
-static int
-nv34_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-	     struct nvkm_oclass *oclass, void *data, u32 size,
-	     struct nvkm_object **pobject)
-{
-	struct nv20_gr_priv *priv;
-	int ret;
-
-	ret = nvkm_gr_create(parent, engine, oclass, true, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(nv_object(priv), NULL, 32 * 4, 16,
-			      NVOBJ_FLAG_ZERO_ALLOC, &priv->ctxtab);
-	if (ret)
-		return ret;
+static const struct nvkm_gr_func
+nv34_gr = {
+	.dtor = nv20_gr_dtor,
+	.oneinit = nv20_gr_oneinit,
+	.init = nv30_gr_init,
+	.intr = nv20_gr_intr,
+	.tile = nv20_gr_tile,
+	.chan_new = nv34_gr_chan_new,
+	.sclass = {
+		{ -1, -1, 0x0012, &nv04_gr_object }, /* beta1 */
+		{ -1, -1, 0x0019, &nv04_gr_object }, /* clip */
+		{ -1, -1, 0x0030, &nv04_gr_object }, /* null */
+		{ -1, -1, 0x0039, &nv04_gr_object }, /* m2mf */
+		{ -1, -1, 0x0043, &nv04_gr_object }, /* rop */
+		{ -1, -1, 0x0044, &nv04_gr_object }, /* patt */
+		{ -1, -1, 0x004a, &nv04_gr_object }, /* gdi */
+		{ -1, -1, 0x0062, &nv04_gr_object }, /* surf2d */
+		{ -1, -1, 0x0072, &nv04_gr_object }, /* beta4 */
+		{ -1, -1, 0x0089, &nv04_gr_object }, /* sifm */
+		{ -1, -1, 0x008a, &nv04_gr_object }, /* ifc */
+		{ -1, -1, 0x009f, &nv04_gr_object }, /* imageblit */
+		{ -1, -1, 0x0362, &nv04_gr_object }, /* surf2d (nv30) */
+		{ -1, -1, 0x0389, &nv04_gr_object }, /* sifm (nv30) */
+		{ -1, -1, 0x038a, &nv04_gr_object }, /* ifc (nv30) */
+		{ -1, -1, 0x039e, &nv04_gr_object }, /* swzsurf (nv30) */
+		{ -1, -1, 0x0697, &nv04_gr_object }, /* rankine */
+		{}
+	}
+};
 
-	nv_subdev(priv)->unit = 0x00001000;
-	nv_subdev(priv)->intr = nv20_gr_intr;
-	nv_engine(priv)->cclass = &nv34_gr_cclass;
-	nv_engine(priv)->sclass = nv34_gr_sclass;
-	nv_engine(priv)->tile_prog = nv20_gr_tile_prog;
-	return 0;
+int
+nv34_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv20_gr_new_(&nv34_gr, device, index, pgr);
 }
-
-struct nvkm_oclass
-nv34_gr_oclass = {
-	.handle = NV_ENGINE(GR, 0x34),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv34_gr_ctor,
-		.dtor = nv20_gr_dtor,
-		.init = nv30_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c
index 707625f19ff5..740df0f52c38 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c
@@ -1,159 +1,135 @@
 #include "nv20.h"
 #include "regs.h"
 
+#include <core/gpuobj.h>
 #include <engine/fifo.h>
+#include <engine/fifo/chan.h>
 
 /*******************************************************************************
- * Graphics object classes
+ * PGRAPH context
  ******************************************************************************/
 
-static struct nvkm_oclass
-nv35_gr_sclass[] = {
-	{ 0x0012, &nv04_gr_ofuncs, NULL }, /* beta1 */
-	{ 0x0019, &nv04_gr_ofuncs, NULL }, /* clip */
-	{ 0x0030, &nv04_gr_ofuncs, NULL }, /* null */
-	{ 0x0039, &nv04_gr_ofuncs, NULL }, /* m2mf */
-	{ 0x0043, &nv04_gr_ofuncs, NULL }, /* rop */
-	{ 0x0044, &nv04_gr_ofuncs, NULL }, /* patt */
-	{ 0x004a, &nv04_gr_ofuncs, NULL }, /* gdi */
-	{ 0x0062, &nv04_gr_ofuncs, NULL }, /* surf2d */
-	{ 0x0072, &nv04_gr_ofuncs, NULL }, /* beta4 */
-	{ 0x0089, &nv04_gr_ofuncs, NULL }, /* sifm */
-	{ 0x008a, &nv04_gr_ofuncs, NULL }, /* ifc */
-	{ 0x009f, &nv04_gr_ofuncs, NULL }, /* imageblit */
-	{ 0x0362, &nv04_gr_ofuncs, NULL }, /* surf2d (nv30) */
-	{ 0x0389, &nv04_gr_ofuncs, NULL }, /* sifm (nv30) */
-	{ 0x038a, &nv04_gr_ofuncs, NULL }, /* ifc (nv30) */
-	{ 0x039e, &nv04_gr_ofuncs, NULL }, /* swzsurf (nv30) */
-	{ 0x0497, &nv04_gr_ofuncs, NULL }, /* rankine */
-	{},
+static const struct nvkm_object_func
+nv35_gr_chan = {
+	.dtor = nv20_gr_chan_dtor,
+	.init = nv20_gr_chan_init,
+	.fini = nv20_gr_chan_fini,
 };
 
-/*******************************************************************************
- * PGRAPH context
- ******************************************************************************/
-
 static int
-nv35_gr_context_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-		     struct nvkm_oclass *oclass, void *data, u32 size,
-		     struct nvkm_object **pobject)
+nv35_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
+	struct nv20_gr *gr = nv20_gr(base);
 	struct nv20_gr_chan *chan;
 	int ret, i;
 
-	ret = nvkm_gr_context_create(parent, engine, oclass, NULL, 0x577c,
-				     16, NVOBJ_FLAG_ZERO_ALLOC, &chan);
-	*pobject = nv_object(chan);
+	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_object_ctor(&nv35_gr_chan, oclass, &chan->object);
+	chan->gr = gr;
+	chan->chid = fifoch->chid;
+	*pobject = &chan->object;
+
+	ret = nvkm_memory_new(gr->base.engine.subdev.device,
+			      NVKM_MEM_TARGET_INST, 0x577c, 16, true,
+			      &chan->inst);
 	if (ret)
 		return ret;
 
-	chan->chid = nvkm_fifo_chan(parent)->chid;
-
-	nv_wo32(chan, 0x0028, 0x00000001 | (chan->chid << 24));
-	nv_wo32(chan, 0x040c, 0x00000101);
-	nv_wo32(chan, 0x0420, 0x00000111);
-	nv_wo32(chan, 0x0424, 0x00000060);
-	nv_wo32(chan, 0x0440, 0x00000080);
-	nv_wo32(chan, 0x0444, 0xffff0000);
-	nv_wo32(chan, 0x0448, 0x00000001);
-	nv_wo32(chan, 0x045c, 0x44400000);
-	nv_wo32(chan, 0x0488, 0xffff0000);
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x0028, 0x00000001 | (chan->chid << 24));
+	nvkm_wo32(chan->inst, 0x040c, 0x00000101);
+	nvkm_wo32(chan->inst, 0x0420, 0x00000111);
+	nvkm_wo32(chan->inst, 0x0424, 0x00000060);
+	nvkm_wo32(chan->inst, 0x0440, 0x00000080);
+	nvkm_wo32(chan->inst, 0x0444, 0xffff0000);
+	nvkm_wo32(chan->inst, 0x0448, 0x00000001);
+	nvkm_wo32(chan->inst, 0x045c, 0x44400000);
+	nvkm_wo32(chan->inst, 0x0488, 0xffff0000);
 	for (i = 0x04dc; i < 0x04e4; i += 4)
-		nv_wo32(chan, i, 0x0fff0000);
-	nv_wo32(chan, 0x04e8, 0x00011100);
+		nvkm_wo32(chan->inst, i, 0x0fff0000);
+	nvkm_wo32(chan->inst, 0x04e8, 0x00011100);
 	for (i = 0x0504; i < 0x0544; i += 4)
-		nv_wo32(chan, i, 0x07ff0000);
-	nv_wo32(chan, 0x054c, 0x4b7fffff);
-	nv_wo32(chan, 0x0588, 0x00000080);
-	nv_wo32(chan, 0x058c, 0x30201000);
-	nv_wo32(chan, 0x0590, 0x70605040);
-	nv_wo32(chan, 0x0594, 0xb8a89888);
-	nv_wo32(chan, 0x0598, 0xf8e8d8c8);
-	nv_wo32(chan, 0x05ac, 0xb0000000);
+		nvkm_wo32(chan->inst, i, 0x07ff0000);
+	nvkm_wo32(chan->inst, 0x054c, 0x4b7fffff);
+	nvkm_wo32(chan->inst, 0x0588, 0x00000080);
+	nvkm_wo32(chan->inst, 0x058c, 0x30201000);
+	nvkm_wo32(chan->inst, 0x0590, 0x70605040);
+	nvkm_wo32(chan->inst, 0x0594, 0xb8a89888);
+	nvkm_wo32(chan->inst, 0x0598, 0xf8e8d8c8);
+	nvkm_wo32(chan->inst, 0x05ac, 0xb0000000);
 	for (i = 0x0604; i < 0x0644; i += 4)
-		nv_wo32(chan, i, 0x00010588);
+		nvkm_wo32(chan->inst, i, 0x00010588);
 	for (i = 0x0644; i < 0x0684; i += 4)
-		nv_wo32(chan, i, 0x00030303);
+		nvkm_wo32(chan->inst, i, 0x00030303);
 	for (i = 0x06c4; i < 0x0704; i += 4)
-		nv_wo32(chan, i, 0x0008aae4);
+		nvkm_wo32(chan->inst, i, 0x0008aae4);
 	for (i = 0x0704; i < 0x0744; i += 4)
-		nv_wo32(chan, i, 0x01012000);
+		nvkm_wo32(chan->inst, i, 0x01012000);
 	for (i = 0x0744; i < 0x0784; i += 4)
-		nv_wo32(chan, i, 0x00080008);
-	nv_wo32(chan, 0x0860, 0x00040000);
-	nv_wo32(chan, 0x0864, 0x00010000);
+		nvkm_wo32(chan->inst, i, 0x00080008);
+	nvkm_wo32(chan->inst, 0x0860, 0x00040000);
+	nvkm_wo32(chan->inst, 0x0864, 0x00010000);
 	for (i = 0x0868; i < 0x0878; i += 4)
-		nv_wo32(chan, i, 0x00040004);
+		nvkm_wo32(chan->inst, i, 0x00040004);
 	for (i = 0x1f1c; i <= 0x308c ; i += 16) {
-		nv_wo32(chan, i + 0, 0x10700ff9);
-		nv_wo32(chan, i + 4, 0x0436086c);
-		nv_wo32(chan, i + 8, 0x000c001b);
+		nvkm_wo32(chan->inst, i + 0, 0x10700ff9);
+		nvkm_wo32(chan->inst, i + 4, 0x0436086c);
+		nvkm_wo32(chan->inst, i + 8, 0x000c001b);
 	}
 	for (i = 0x30bc; i < 0x30cc; i += 4)
-		nv_wo32(chan, i, 0x0000ffff);
-	nv_wo32(chan, 0x3450, 0x3f800000);
-	nv_wo32(chan, 0x380c, 0x3f800000);
-	nv_wo32(chan, 0x3820, 0x3f800000);
-	nv_wo32(chan, 0x384c, 0x40000000);
-	nv_wo32(chan, 0x3850, 0x3f800000);
-	nv_wo32(chan, 0x3854, 0x3f000000);
-	nv_wo32(chan, 0x385c, 0x40000000);
-	nv_wo32(chan, 0x3860, 0x3f800000);
-	nv_wo32(chan, 0x3868, 0xbf800000);
-	nv_wo32(chan, 0x3870, 0xbf800000);
+		nvkm_wo32(chan->inst, i, 0x0000ffff);
+	nvkm_wo32(chan->inst, 0x3450, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x380c, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x3820, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x384c, 0x40000000);
+	nvkm_wo32(chan->inst, 0x3850, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x3854, 0x3f000000);
+	nvkm_wo32(chan->inst, 0x385c, 0x40000000);
+	nvkm_wo32(chan->inst, 0x3860, 0x3f800000);
+	nvkm_wo32(chan->inst, 0x3868, 0xbf800000);
+	nvkm_wo32(chan->inst, 0x3870, 0xbf800000);
+	nvkm_done(chan->inst);
 	return 0;
 }
 
-static struct nvkm_oclass
-nv35_gr_cclass = {
-	.handle = NV_ENGCTX(GR, 0x35),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv35_gr_context_ctor,
-		.dtor = _nvkm_gr_context_dtor,
-		.init = nv20_gr_context_init,
-		.fini = nv20_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
-};
-
 /*******************************************************************************
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
-static int
-nv35_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-	     struct nvkm_oclass *oclass, void *data, u32 size,
-	     struct nvkm_object **pobject)
-{
-	struct nv20_gr_priv *priv;
-	int ret;
-
-	ret = nvkm_gr_create(parent, engine, oclass, true, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(nv_object(priv), NULL, 32 * 4, 16,
-			      NVOBJ_FLAG_ZERO_ALLOC, &priv->ctxtab);
-	if (ret)
-		return ret;
+static const struct nvkm_gr_func
+nv35_gr = {
+	.dtor = nv20_gr_dtor,
+	.oneinit = nv20_gr_oneinit,
+	.init = nv30_gr_init,
+	.intr = nv20_gr_intr,
+	.tile = nv20_gr_tile,
+	.chan_new = nv35_gr_chan_new,
+	.sclass = {
+		{ -1, -1, 0x0012, &nv04_gr_object }, /* beta1 */
+		{ -1, -1, 0x0019, &nv04_gr_object }, /* clip */
+		{ -1, -1, 0x0030, &nv04_gr_object }, /* null */
+		{ -1, -1, 0x0039, &nv04_gr_object }, /* m2mf */
+		{ -1, -1, 0x0043, &nv04_gr_object }, /* rop */
+		{ -1, -1, 0x0044, &nv04_gr_object }, /* patt */
+		{ -1, -1, 0x004a, &nv04_gr_object }, /* gdi */
+		{ -1, -1, 0x0062, &nv04_gr_object }, /* surf2d */
+		{ -1, -1, 0x0072, &nv04_gr_object }, /* beta4 */
+		{ -1, -1, 0x0089, &nv04_gr_object }, /* sifm */
+		{ -1, -1, 0x008a, &nv04_gr_object }, /* ifc */
+		{ -1, -1, 0x009f, &nv04_gr_object }, /* imageblit */
+		{ -1, -1, 0x0362, &nv04_gr_object }, /* surf2d (nv30) */
+		{ -1, -1, 0x0389, &nv04_gr_object }, /* sifm (nv30) */
+		{ -1, -1, 0x038a, &nv04_gr_object }, /* ifc (nv30) */
+		{ -1, -1, 0x039e, &nv04_gr_object }, /* swzsurf (nv30) */
+		{ -1, -1, 0x0497, &nv04_gr_object }, /* rankine */
+		{}
+	}
+};
 
-	nv_subdev(priv)->unit = 0x00001000;
-	nv_subdev(priv)->intr = nv20_gr_intr;
-	nv_engine(priv)->cclass = &nv35_gr_cclass;
-	nv_engine(priv)->sclass = nv35_gr_sclass;
-	nv_engine(priv)->tile_prog = nv20_gr_tile_prog;
-	return 0;
+int
+nv35_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv20_gr_new_(&nv35_gr, device, index, pgr);
 }
-
-struct nvkm_oclass
-nv35_gr_oclass = {
-	.handle = NV_ENGINE(GR, 0x35),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv35_gr_ctor,
-		.dtor = nv20_gr_dtor,
-		.init = nv30_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
index 7e1937980e3f..ffa902ece872 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
@@ -25,26 +25,15 @@
 #include "regs.h"
 
 #include <core/client.h>
-#include <core/handle.h>
+#include <core/gpuobj.h>
 #include <subdev/fb.h>
 #include <subdev/timer.h>
 #include <engine/fifo.h>
 
-struct nv40_gr_priv {
-	struct nvkm_gr base;
-	u32 size;
-};
-
-struct nv40_gr_chan {
-	struct nvkm_gr_chan base;
-};
-
-static u64
+u64
 nv40_gr_units(struct nvkm_gr *gr)
 {
-	struct nv40_gr_priv *priv = (void *)gr;
-
-	return nv_rd32(priv, 0x1540);
+	return nvkm_rd32(gr->engine.subdev.device, 0x1540);
 }
 
 /*******************************************************************************
@@ -52,80 +41,29 @@ nv40_gr_units(struct nvkm_gr *gr)
  ******************************************************************************/
 
 static int
-nv40_gr_object_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-		    struct nvkm_oclass *oclass, void *data, u32 size,
-		    struct nvkm_object **pobject)
+nv40_gr_object_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent,
+		    int align, struct nvkm_gpuobj **pgpuobj)
 {
-	struct nvkm_gpuobj *obj;
-	int ret;
-
-	ret = nvkm_gpuobj_create(parent, engine, oclass, 0, parent,
-				 20, 16, 0, &obj);
-	*pobject = nv_object(obj);
-	if (ret)
-		return ret;
-
-	nv_wo32(obj, 0x00, nv_mclass(obj));
-	nv_wo32(obj, 0x04, 0x00000000);
-	nv_wo32(obj, 0x08, 0x00000000);
+	int ret = nvkm_gpuobj_new(object->engine->subdev.device, 20, align,
+				  false, parent, pgpuobj);
+	if (ret == 0) {
+		nvkm_kmap(*pgpuobj);
+		nvkm_wo32(*pgpuobj, 0x00, object->oclass);
+		nvkm_wo32(*pgpuobj, 0x04, 0x00000000);
+		nvkm_wo32(*pgpuobj, 0x08, 0x00000000);
 #ifdef __BIG_ENDIAN
-	nv_mo32(obj, 0x08, 0x01000000, 0x01000000);
+		nvkm_mo32(*pgpuobj, 0x08, 0x01000000, 0x01000000);
 #endif
-	nv_wo32(obj, 0x0c, 0x00000000);
-	nv_wo32(obj, 0x10, 0x00000000);
-	return 0;
+		nvkm_wo32(*pgpuobj, 0x0c, 0x00000000);
+		nvkm_wo32(*pgpuobj, 0x10, 0x00000000);
+		nvkm_done(*pgpuobj);
+	}
+	return ret;
 }
 
-static struct nvkm_ofuncs
-nv40_gr_ofuncs = {
-	.ctor = nv40_gr_object_ctor,
-	.dtor = _nvkm_gpuobj_dtor,
-	.init = _nvkm_gpuobj_init,
-	.fini = _nvkm_gpuobj_fini,
-	.rd32 = _nvkm_gpuobj_rd32,
-	.wr32 = _nvkm_gpuobj_wr32,
-};
-
-static struct nvkm_oclass
-nv40_gr_sclass[] = {
-	{ 0x0012, &nv40_gr_ofuncs, NULL }, /* beta1 */
-	{ 0x0019, &nv40_gr_ofuncs, NULL }, /* clip */
-	{ 0x0030, &nv40_gr_ofuncs, NULL }, /* null */
-	{ 0x0039, &nv40_gr_ofuncs, NULL }, /* m2mf */
-	{ 0x0043, &nv40_gr_ofuncs, NULL }, /* rop */
-	{ 0x0044, &nv40_gr_ofuncs, NULL }, /* patt */
-	{ 0x004a, &nv40_gr_ofuncs, NULL }, /* gdi */
-	{ 0x0062, &nv40_gr_ofuncs, NULL }, /* surf2d */
-	{ 0x0072, &nv40_gr_ofuncs, NULL }, /* beta4 */
-	{ 0x0089, &nv40_gr_ofuncs, NULL }, /* sifm */
-	{ 0x008a, &nv40_gr_ofuncs, NULL }, /* ifc */
-	{ 0x009f, &nv40_gr_ofuncs, NULL }, /* imageblit */
-	{ 0x3062, &nv40_gr_ofuncs, NULL }, /* surf2d (nv40) */
-	{ 0x3089, &nv40_gr_ofuncs, NULL }, /* sifm (nv40) */
-	{ 0x309e, &nv40_gr_ofuncs, NULL }, /* swzsurf (nv40) */
-	{ 0x4097, &nv40_gr_ofuncs, NULL }, /* curie */
-	{},
-};
-
-static struct nvkm_oclass
-nv44_gr_sclass[] = {
-	{ 0x0012, &nv40_gr_ofuncs, NULL }, /* beta1 */
-	{ 0x0019, &nv40_gr_ofuncs, NULL }, /* clip */
-	{ 0x0030, &nv40_gr_ofuncs, NULL }, /* null */
-	{ 0x0039, &nv40_gr_ofuncs, NULL }, /* m2mf */
-	{ 0x0043, &nv40_gr_ofuncs, NULL }, /* rop */
-	{ 0x0044, &nv40_gr_ofuncs, NULL }, /* patt */
-	{ 0x004a, &nv40_gr_ofuncs, NULL }, /* gdi */
-	{ 0x0062, &nv40_gr_ofuncs, NULL }, /* surf2d */
-	{ 0x0072, &nv40_gr_ofuncs, NULL }, /* beta4 */
-	{ 0x0089, &nv40_gr_ofuncs, NULL }, /* sifm */
-	{ 0x008a, &nv40_gr_ofuncs, NULL }, /* ifc */
-	{ 0x009f, &nv40_gr_ofuncs, NULL }, /* imageblit */
-	{ 0x3062, &nv40_gr_ofuncs, NULL }, /* surf2d (nv40) */
-	{ 0x3089, &nv40_gr_ofuncs, NULL }, /* sifm (nv40) */
-	{ 0x309e, &nv40_gr_ofuncs, NULL }, /* swzsurf (nv40) */
-	{ 0x4497, &nv40_gr_ofuncs, NULL }, /* curie */
-	{},
+const struct nvkm_object_func
+nv40_gr_object = {
+	.bind = nv40_gr_object_bind,
 };
 
 /*******************************************************************************
@@ -133,361 +71,334 @@ nv44_gr_sclass[] = {
  ******************************************************************************/
 
 static int
-nv40_gr_context_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-		     struct nvkm_oclass *oclass, void *data, u32 size,
-		     struct nvkm_object **pobject)
+nv40_gr_chan_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent,
+		  int align, struct nvkm_gpuobj **pgpuobj)
 {
-	struct nv40_gr_priv *priv = (void *)engine;
-	struct nv40_gr_chan *chan;
-	int ret;
-
-	ret = nvkm_gr_context_create(parent, engine, oclass, NULL, priv->size,
-				     16, NVOBJ_FLAG_ZERO_ALLOC, &chan);
-	*pobject = nv_object(chan);
-	if (ret)
-		return ret;
-
-	nv40_grctx_fill(nv_device(priv), nv_gpuobj(chan));
-	nv_wo32(chan, 0x00000, nv_gpuobj(chan)->addr >> 4);
-	return 0;
+	struct nv40_gr_chan *chan = nv40_gr_chan(object);
+	struct nv40_gr *gr = chan->gr;
+	int ret = nvkm_gpuobj_new(gr->base.engine.subdev.device, gr->size,
+				  align, true, parent, pgpuobj);
+	if (ret == 0) {
+		chan->inst = (*pgpuobj)->addr;
+		nvkm_kmap(*pgpuobj);
+		nv40_grctx_fill(gr->base.engine.subdev.device, *pgpuobj);
+		nvkm_wo32(*pgpuobj, 0x00000, chan->inst >> 4);
+		nvkm_done(*pgpuobj);
+	}
+	return ret;
 }
 
 static int
-nv40_gr_context_fini(struct nvkm_object *object, bool suspend)
+nv40_gr_chan_fini(struct nvkm_object *object, bool suspend)
 {
-	struct nv40_gr_priv *priv = (void *)object->engine;
-	struct nv40_gr_chan *chan = (void *)object;
-	u32 inst = 0x01000000 | nv_gpuobj(chan)->addr >> 4;
+	struct nv40_gr_chan *chan = nv40_gr_chan(object);
+	struct nv40_gr *gr = chan->gr;
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 inst = 0x01000000 | chan->inst >> 4;
 	int ret = 0;
 
-	nv_mask(priv, 0x400720, 0x00000001, 0x00000000);
+	nvkm_mask(device, 0x400720, 0x00000001, 0x00000000);
 
-	if (nv_rd32(priv, 0x40032c) == inst) {
+	if (nvkm_rd32(device, 0x40032c) == inst) {
 		if (suspend) {
-			nv_wr32(priv, 0x400720, 0x00000000);
-			nv_wr32(priv, 0x400784, inst);
-			nv_mask(priv, 0x400310, 0x00000020, 0x00000020);
-			nv_mask(priv, 0x400304, 0x00000001, 0x00000001);
-			if (!nv_wait(priv, 0x400300, 0x00000001, 0x00000000)) {
-				u32 insn = nv_rd32(priv, 0x400308);
-				nv_warn(priv, "ctxprog timeout 0x%08x\n", insn);
+			nvkm_wr32(device, 0x400720, 0x00000000);
+			nvkm_wr32(device, 0x400784, inst);
+			nvkm_mask(device, 0x400310, 0x00000020, 0x00000020);
+			nvkm_mask(device, 0x400304, 0x00000001, 0x00000001);
+			if (nvkm_msec(device, 2000,
+				if (!(nvkm_rd32(device, 0x400300) & 0x00000001))
+					break;
+			) < 0) {
+				u32 insn = nvkm_rd32(device, 0x400308);
+				nvkm_warn(subdev, "ctxprog timeout %08x\n", insn);
 				ret = -EBUSY;
 			}
 		}
 
-		nv_mask(priv, 0x40032c, 0x01000000, 0x00000000);
+		nvkm_mask(device, 0x40032c, 0x01000000, 0x00000000);
 	}
 
-	if (nv_rd32(priv, 0x400330) == inst)
-		nv_mask(priv, 0x400330, 0x01000000, 0x00000000);
+	if (nvkm_rd32(device, 0x400330) == inst)
+		nvkm_mask(device, 0x400330, 0x01000000, 0x00000000);
 
-	nv_mask(priv, 0x400720, 0x00000001, 0x00000001);
+	nvkm_mask(device, 0x400720, 0x00000001, 0x00000001);
 	return ret;
 }
 
-static struct nvkm_oclass
-nv40_gr_cclass = {
-	.handle = NV_ENGCTX(GR, 0x40),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv40_gr_context_ctor,
-		.dtor = _nvkm_gr_context_dtor,
-		.init = _nvkm_gr_context_init,
-		.fini = nv40_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
+static void *
+nv40_gr_chan_dtor(struct nvkm_object *object)
+{
+	struct nv40_gr_chan *chan = nv40_gr_chan(object);
+	unsigned long flags;
+	spin_lock_irqsave(&chan->gr->base.engine.lock, flags);
+	list_del(&chan->head);
+	spin_unlock_irqrestore(&chan->gr->base.engine.lock, flags);
+	return chan;
+}
+
+static const struct nvkm_object_func
+nv40_gr_chan = {
+	.dtor = nv40_gr_chan_dtor,
+	.fini = nv40_gr_chan_fini,
+	.bind = nv40_gr_chan_bind,
 };
 
+int
+nv40_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
+{
+	struct nv40_gr *gr = nv40_gr(base);
+	struct nv40_gr_chan *chan;
+	unsigned long flags;
+
+	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_object_ctor(&nv40_gr_chan, oclass, &chan->object);
+	chan->gr = gr;
+	*pobject = &chan->object;
+
+	spin_lock_irqsave(&chan->gr->base.engine.lock, flags);
+	list_add(&chan->head, &gr->chan);
+	spin_unlock_irqrestore(&chan->gr->base.engine.lock, flags);
+	return 0;
+}
+
 /*******************************************************************************
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
 static void
-nv40_gr_tile_prog(struct nvkm_engine *engine, int i)
+nv40_gr_tile(struct nvkm_gr *base, int i, struct nvkm_fb_tile *tile)
 {
-	struct nvkm_fb_tile *tile = &nvkm_fb(engine)->tile.region[i];
-	struct nvkm_fifo *pfifo = nvkm_fifo(engine);
-	struct nv40_gr_priv *priv = (void *)engine;
+	struct nv40_gr *gr = nv40_gr(base);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_fifo *fifo = device->fifo;
 	unsigned long flags;
 
-	pfifo->pause(pfifo, &flags);
-	nv04_gr_idle(priv);
+	nvkm_fifo_pause(fifo, &flags);
+	nv04_gr_idle(&gr->base);
 
-	switch (nv_device(priv)->chipset) {
+	switch (device->chipset) {
 	case 0x40:
 	case 0x41:
 	case 0x42:
 	case 0x43:
 	case 0x45:
-	case 0x4e:
-		nv_wr32(priv, NV20_PGRAPH_TSIZE(i), tile->pitch);
-		nv_wr32(priv, NV20_PGRAPH_TLIMIT(i), tile->limit);
-		nv_wr32(priv, NV20_PGRAPH_TILE(i), tile->addr);
-		nv_wr32(priv, NV40_PGRAPH_TSIZE1(i), tile->pitch);
-		nv_wr32(priv, NV40_PGRAPH_TLIMIT1(i), tile->limit);
-		nv_wr32(priv, NV40_PGRAPH_TILE1(i), tile->addr);
-		switch (nv_device(priv)->chipset) {
+		nvkm_wr32(device, NV20_PGRAPH_TSIZE(i), tile->pitch);
+		nvkm_wr32(device, NV20_PGRAPH_TLIMIT(i), tile->limit);
+		nvkm_wr32(device, NV20_PGRAPH_TILE(i), tile->addr);
+		nvkm_wr32(device, NV40_PGRAPH_TSIZE1(i), tile->pitch);
+		nvkm_wr32(device, NV40_PGRAPH_TLIMIT1(i), tile->limit);
+		nvkm_wr32(device, NV40_PGRAPH_TILE1(i), tile->addr);
+		switch (device->chipset) {
 		case 0x40:
 		case 0x45:
-			nv_wr32(priv, NV20_PGRAPH_ZCOMP(i), tile->zcomp);
-			nv_wr32(priv, NV40_PGRAPH_ZCOMP1(i), tile->zcomp);
+			nvkm_wr32(device, NV20_PGRAPH_ZCOMP(i), tile->zcomp);
+			nvkm_wr32(device, NV40_PGRAPH_ZCOMP1(i), tile->zcomp);
 			break;
 		case 0x41:
 		case 0x42:
 		case 0x43:
-			nv_wr32(priv, NV41_PGRAPH_ZCOMP0(i), tile->zcomp);
-			nv_wr32(priv, NV41_PGRAPH_ZCOMP1(i), tile->zcomp);
+			nvkm_wr32(device, NV41_PGRAPH_ZCOMP0(i), tile->zcomp);
+			nvkm_wr32(device, NV41_PGRAPH_ZCOMP1(i), tile->zcomp);
 			break;
 		default:
 			break;
 		}
 		break;
-	case 0x44:
-	case 0x4a:
-		nv_wr32(priv, NV20_PGRAPH_TSIZE(i), tile->pitch);
-		nv_wr32(priv, NV20_PGRAPH_TLIMIT(i), tile->limit);
-		nv_wr32(priv, NV20_PGRAPH_TILE(i), tile->addr);
-		break;
-	case 0x46:
-	case 0x4c:
 	case 0x47:
 	case 0x49:
 	case 0x4b:
-	case 0x63:
-	case 0x67:
-	case 0x68:
-		nv_wr32(priv, NV47_PGRAPH_TSIZE(i), tile->pitch);
-		nv_wr32(priv, NV47_PGRAPH_TLIMIT(i), tile->limit);
-		nv_wr32(priv, NV47_PGRAPH_TILE(i), tile->addr);
-		nv_wr32(priv, NV40_PGRAPH_TSIZE1(i), tile->pitch);
-		nv_wr32(priv, NV40_PGRAPH_TLIMIT1(i), tile->limit);
-		nv_wr32(priv, NV40_PGRAPH_TILE1(i), tile->addr);
-		switch (nv_device(priv)->chipset) {
-		case 0x47:
-		case 0x49:
-		case 0x4b:
-			nv_wr32(priv, NV47_PGRAPH_ZCOMP0(i), tile->zcomp);
-			nv_wr32(priv, NV47_PGRAPH_ZCOMP1(i), tile->zcomp);
-			break;
-		default:
-			break;
-		}
+		nvkm_wr32(device, NV47_PGRAPH_TSIZE(i), tile->pitch);
+		nvkm_wr32(device, NV47_PGRAPH_TLIMIT(i), tile->limit);
+		nvkm_wr32(device, NV47_PGRAPH_TILE(i), tile->addr);
+		nvkm_wr32(device, NV40_PGRAPH_TSIZE1(i), tile->pitch);
+		nvkm_wr32(device, NV40_PGRAPH_TLIMIT1(i), tile->limit);
+		nvkm_wr32(device, NV40_PGRAPH_TILE1(i), tile->addr);
+		nvkm_wr32(device, NV47_PGRAPH_ZCOMP0(i), tile->zcomp);
+		nvkm_wr32(device, NV47_PGRAPH_ZCOMP1(i), tile->zcomp);
 		break;
 	default:
+		WARN_ON(1);
 		break;
 	}
 
-	pfifo->start(pfifo, &flags);
+	nvkm_fifo_start(fifo, &flags);
 }
 
-static void
-nv40_gr_intr(struct nvkm_subdev *subdev)
+void
+nv40_gr_intr(struct nvkm_gr *base)
 {
-	struct nvkm_fifo *pfifo = nvkm_fifo(subdev);
-	struct nvkm_engine *engine = nv_engine(subdev);
-	struct nvkm_object *engctx;
-	struct nvkm_handle *handle = NULL;
-	struct nv40_gr_priv *priv = (void *)subdev;
-	u32 stat = nv_rd32(priv, NV03_PGRAPH_INTR);
-	u32 nsource = nv_rd32(priv, NV03_PGRAPH_NSOURCE);
-	u32 nstatus = nv_rd32(priv, NV03_PGRAPH_NSTATUS);
-	u32 inst = nv_rd32(priv, 0x40032c) & 0x000fffff;
-	u32 addr = nv_rd32(priv, NV04_PGRAPH_TRAPPED_ADDR);
+	struct nv40_gr *gr = nv40_gr(base);
+	struct nv40_gr_chan *temp, *chan = NULL;
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, NV03_PGRAPH_INTR);
+	u32 nsource = nvkm_rd32(device, NV03_PGRAPH_NSOURCE);
+	u32 nstatus = nvkm_rd32(device, NV03_PGRAPH_NSTATUS);
+	u32 inst = nvkm_rd32(device, 0x40032c) & 0x000fffff;
+	u32 addr = nvkm_rd32(device, NV04_PGRAPH_TRAPPED_ADDR);
 	u32 subc = (addr & 0x00070000) >> 16;
 	u32 mthd = (addr & 0x00001ffc);
-	u32 data = nv_rd32(priv, NV04_PGRAPH_TRAPPED_DATA);
-	u32 class = nv_rd32(priv, 0x400160 + subc * 4) & 0xffff;
+	u32 data = nvkm_rd32(device, NV04_PGRAPH_TRAPPED_DATA);
+	u32 class = nvkm_rd32(device, 0x400160 + subc * 4) & 0xffff;
 	u32 show = stat;
-	int chid;
-
-	engctx = nvkm_engctx_get(engine, inst);
-	chid   = pfifo->chid(pfifo, engctx);
+	char msg[128], src[128], sta[128];
+	unsigned long flags;
 
-	if (stat & NV_PGRAPH_INTR_ERROR) {
-		if (nsource & NV03_PGRAPH_NSOURCE_ILLEGAL_MTHD) {
-			handle = nvkm_handle_get_class(engctx, class);
-			if (handle && !nv_call(handle->object, mthd, data))
-				show &= ~NV_PGRAPH_INTR_ERROR;
-			nvkm_handle_put(handle);
+	spin_lock_irqsave(&gr->base.engine.lock, flags);
+	list_for_each_entry(temp, &gr->chan, head) {
+		if (temp->inst >> 4 == inst) {
+			chan = temp;
+			list_del(&chan->head);
+			list_add(&chan->head, &gr->chan);
+			break;
 		}
+	}
 
+	if (stat & NV_PGRAPH_INTR_ERROR) {
 		if (nsource & NV03_PGRAPH_NSOURCE_DMA_VTX_PROTECTION) {
-			nv_mask(priv, 0x402000, 0, 0);
+			nvkm_mask(device, 0x402000, 0, 0);
 		}
 	}
 
-	nv_wr32(priv, NV03_PGRAPH_INTR, stat);
-	nv_wr32(priv, NV04_PGRAPH_FIFO, 0x00000001);
+	nvkm_wr32(device, NV03_PGRAPH_INTR, stat);
+	nvkm_wr32(device, NV04_PGRAPH_FIFO, 0x00000001);
 
 	if (show) {
-		nv_error(priv, "%s", "");
-		nvkm_bitfield_print(nv10_gr_intr_name, show);
-		pr_cont(" nsource:");
-		nvkm_bitfield_print(nv04_gr_nsource, nsource);
-		pr_cont(" nstatus:");
-		nvkm_bitfield_print(nv10_gr_nstatus, nstatus);
-		pr_cont("\n");
-		nv_error(priv,
-			 "ch %d [0x%08x %s] subc %d class 0x%04x mthd 0x%04x data 0x%08x\n",
-			 chid, inst << 4, nvkm_client_name(engctx), subc,
-			 class, mthd, data);
+		nvkm_snprintbf(msg, sizeof(msg), nv10_gr_intr_name, show);
+		nvkm_snprintbf(src, sizeof(src), nv04_gr_nsource, nsource);
+		nvkm_snprintbf(sta, sizeof(sta), nv10_gr_nstatus, nstatus);
+		nvkm_error(subdev, "intr %08x [%s] nsource %08x [%s] "
+				   "nstatus %08x [%s] ch %d [%08x %s] subc %d "
+				   "class %04x mthd %04x data %08x\n",
+			   show, msg, nsource, src, nstatus, sta,
+			   chan ? chan->fifo->chid : -1, inst << 4,
+			   chan ? chan->fifo->object.client->name : "unknown",
+			   subc, class, mthd, data);
 	}
 
-	nvkm_engctx_put(engctx);
+	spin_unlock_irqrestore(&gr->base.engine.lock, flags);
 }
 
-static int
-nv40_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-	     struct nvkm_oclass *oclass, void *data, u32 size,
-	     struct nvkm_object **pobject)
+int
+nv40_gr_init(struct nvkm_gr *base)
 {
-	struct nv40_gr_priv *priv;
-	int ret;
-
-	ret = nvkm_gr_create(parent, engine, oclass, true, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	nv_subdev(priv)->unit = 0x00001000;
-	nv_subdev(priv)->intr = nv40_gr_intr;
-	nv_engine(priv)->cclass = &nv40_gr_cclass;
-	if (nv44_gr_class(priv))
-		nv_engine(priv)->sclass = nv44_gr_sclass;
-	else
-		nv_engine(priv)->sclass = nv40_gr_sclass;
-	nv_engine(priv)->tile_prog = nv40_gr_tile_prog;
-
-	priv->base.units = nv40_gr_units;
-	return 0;
-}
-
-static int
-nv40_gr_init(struct nvkm_object *object)
-{
-	struct nvkm_engine *engine = nv_engine(object);
-	struct nvkm_fb *pfb = nvkm_fb(object);
-	struct nv40_gr_priv *priv = (void *)engine;
+	struct nv40_gr *gr = nv40_gr(base);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int ret, i, j;
 	u32 vramsz;
 
-	ret = nvkm_gr_init(&priv->base);
-	if (ret)
-		return ret;
-
 	/* generate and upload context program */
-	ret = nv40_grctx_init(nv_device(priv), &priv->size);
+	ret = nv40_grctx_init(device, &gr->size);
 	if (ret)
 		return ret;
 
 	/* No context present currently */
-	nv_wr32(priv, NV40_PGRAPH_CTXCTL_CUR, 0x00000000);
+	nvkm_wr32(device, NV40_PGRAPH_CTXCTL_CUR, 0x00000000);
 
-	nv_wr32(priv, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
-	nv_wr32(priv, NV40_PGRAPH_INTR_EN, 0xFFFFFFFF);
+	nvkm_wr32(device, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
+	nvkm_wr32(device, NV40_PGRAPH_INTR_EN, 0xFFFFFFFF);
 
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_0, 0xFFFFFFFF);
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_0, 0x00000000);
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_1, 0x401287c0);
-	nv_wr32(priv, NV04_PGRAPH_DEBUG_3, 0xe0de8055);
-	nv_wr32(priv, NV10_PGRAPH_DEBUG_4, 0x00008000);
-	nv_wr32(priv, NV04_PGRAPH_LIMIT_VIOL_PIX, 0x00be3c5f);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0xFFFFFFFF);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_0, 0x00000000);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_1, 0x401287c0);
+	nvkm_wr32(device, NV04_PGRAPH_DEBUG_3, 0xe0de8055);
+	nvkm_wr32(device, NV10_PGRAPH_DEBUG_4, 0x00008000);
+	nvkm_wr32(device, NV04_PGRAPH_LIMIT_VIOL_PIX, 0x00be3c5f);
 
-	nv_wr32(priv, NV10_PGRAPH_CTX_CONTROL, 0x10010100);
-	nv_wr32(priv, NV10_PGRAPH_STATE      , 0xFFFFFFFF);
+	nvkm_wr32(device, NV10_PGRAPH_CTX_CONTROL, 0x10010100);
+	nvkm_wr32(device, NV10_PGRAPH_STATE      , 0xFFFFFFFF);
 
-	j = nv_rd32(priv, 0x1540) & 0xff;
+	j = nvkm_rd32(device, 0x1540) & 0xff;
 	if (j) {
 		for (i = 0; !(j & 1); j >>= 1, i++)
 			;
-		nv_wr32(priv, 0x405000, i);
+		nvkm_wr32(device, 0x405000, i);
 	}
 
-	if (nv_device(priv)->chipset == 0x40) {
-		nv_wr32(priv, 0x4009b0, 0x83280fff);
-		nv_wr32(priv, 0x4009b4, 0x000000a0);
+	if (device->chipset == 0x40) {
+		nvkm_wr32(device, 0x4009b0, 0x83280fff);
+		nvkm_wr32(device, 0x4009b4, 0x000000a0);
 	} else {
-		nv_wr32(priv, 0x400820, 0x83280eff);
-		nv_wr32(priv, 0x400824, 0x000000a0);
+		nvkm_wr32(device, 0x400820, 0x83280eff);
+		nvkm_wr32(device, 0x400824, 0x000000a0);
 	}
 
-	switch (nv_device(priv)->chipset) {
+	switch (device->chipset) {
 	case 0x40:
 	case 0x45:
-		nv_wr32(priv, 0x4009b8, 0x0078e366);
-		nv_wr32(priv, 0x4009bc, 0x0000014c);
+		nvkm_wr32(device, 0x4009b8, 0x0078e366);
+		nvkm_wr32(device, 0x4009bc, 0x0000014c);
 		break;
 	case 0x41:
 	case 0x42: /* pciid also 0x00Cx */
 	/* case 0x0120: XXX (pciid) */
-		nv_wr32(priv, 0x400828, 0x007596ff);
-		nv_wr32(priv, 0x40082c, 0x00000108);
+		nvkm_wr32(device, 0x400828, 0x007596ff);
+		nvkm_wr32(device, 0x40082c, 0x00000108);
 		break;
 	case 0x43:
-		nv_wr32(priv, 0x400828, 0x0072cb77);
-		nv_wr32(priv, 0x40082c, 0x00000108);
+		nvkm_wr32(device, 0x400828, 0x0072cb77);
+		nvkm_wr32(device, 0x40082c, 0x00000108);
 		break;
 	case 0x44:
 	case 0x46: /* G72 */
 	case 0x4a:
 	case 0x4c: /* G7x-based C51 */
 	case 0x4e:
-		nv_wr32(priv, 0x400860, 0);
-		nv_wr32(priv, 0x400864, 0);
+		nvkm_wr32(device, 0x400860, 0);
+		nvkm_wr32(device, 0x400864, 0);
 		break;
 	case 0x47: /* G70 */
 	case 0x49: /* G71 */
 	case 0x4b: /* G73 */
-		nv_wr32(priv, 0x400828, 0x07830610);
-		nv_wr32(priv, 0x40082c, 0x0000016A);
+		nvkm_wr32(device, 0x400828, 0x07830610);
+		nvkm_wr32(device, 0x40082c, 0x0000016A);
 		break;
 	default:
 		break;
 	}
 
-	nv_wr32(priv, 0x400b38, 0x2ffff800);
-	nv_wr32(priv, 0x400b3c, 0x00006000);
+	nvkm_wr32(device, 0x400b38, 0x2ffff800);
+	nvkm_wr32(device, 0x400b3c, 0x00006000);
 
 	/* Tiling related stuff. */
-	switch (nv_device(priv)->chipset) {
+	switch (device->chipset) {
 	case 0x44:
 	case 0x4a:
-		nv_wr32(priv, 0x400bc4, 0x1003d888);
-		nv_wr32(priv, 0x400bbc, 0xb7a7b500);
+		nvkm_wr32(device, 0x400bc4, 0x1003d888);
+		nvkm_wr32(device, 0x400bbc, 0xb7a7b500);
 		break;
 	case 0x46:
-		nv_wr32(priv, 0x400bc4, 0x0000e024);
-		nv_wr32(priv, 0x400bbc, 0xb7a7b520);
+		nvkm_wr32(device, 0x400bc4, 0x0000e024);
+		nvkm_wr32(device, 0x400bbc, 0xb7a7b520);
 		break;
 	case 0x4c:
 	case 0x4e:
 	case 0x67:
-		nv_wr32(priv, 0x400bc4, 0x1003d888);
-		nv_wr32(priv, 0x400bbc, 0xb7a7b540);
+		nvkm_wr32(device, 0x400bc4, 0x1003d888);
+		nvkm_wr32(device, 0x400bbc, 0xb7a7b540);
 		break;
 	default:
 		break;
 	}
 
-	/* Turn all the tiling regions off. */
-	for (i = 0; i < pfb->tile.regions; i++)
-		engine->tile_prog(engine, i);
-
 	/* begin RAM config */
-	vramsz = nv_device_resource_len(nv_device(priv), 0) - 1;
-	switch (nv_device(priv)->chipset) {
+	vramsz = device->func->resource_size(device, 1) - 1;
+	switch (device->chipset) {
 	case 0x40:
-		nv_wr32(priv, 0x4009A4, nv_rd32(priv, 0x100200));
-		nv_wr32(priv, 0x4009A8, nv_rd32(priv, 0x100204));
-		nv_wr32(priv, 0x4069A4, nv_rd32(priv, 0x100200));
-		nv_wr32(priv, 0x4069A8, nv_rd32(priv, 0x100204));
-		nv_wr32(priv, 0x400820, 0);
-		nv_wr32(priv, 0x400824, 0);
-		nv_wr32(priv, 0x400864, vramsz);
-		nv_wr32(priv, 0x400868, vramsz);
+		nvkm_wr32(device, 0x4009A4, nvkm_rd32(device, 0x100200));
+		nvkm_wr32(device, 0x4009A8, nvkm_rd32(device, 0x100204));
+		nvkm_wr32(device, 0x4069A4, nvkm_rd32(device, 0x100200));
+		nvkm_wr32(device, 0x4069A8, nvkm_rd32(device, 0x100204));
+		nvkm_wr32(device, 0x400820, 0);
+		nvkm_wr32(device, 0x400824, 0);
+		nvkm_wr32(device, 0x400864, vramsz);
+		nvkm_wr32(device, 0x400868, vramsz);
 		break;
 	default:
-		switch (nv_device(priv)->chipset) {
+		switch (device->chipset) {
 		case 0x41:
 		case 0x42:
 		case 0x43:
@@ -495,33 +406,70 @@ nv40_gr_init(struct nvkm_object *object)
 		case 0x4e:
 		case 0x44:
 		case 0x4a:
-			nv_wr32(priv, 0x4009F0, nv_rd32(priv, 0x100200));
-			nv_wr32(priv, 0x4009F4, nv_rd32(priv, 0x100204));
+			nvkm_wr32(device, 0x4009F0, nvkm_rd32(device, 0x100200));
+			nvkm_wr32(device, 0x4009F4, nvkm_rd32(device, 0x100204));
 			break;
 		default:
-			nv_wr32(priv, 0x400DF0, nv_rd32(priv, 0x100200));
-			nv_wr32(priv, 0x400DF4, nv_rd32(priv, 0x100204));
+			nvkm_wr32(device, 0x400DF0, nvkm_rd32(device, 0x100200));
+			nvkm_wr32(device, 0x400DF4, nvkm_rd32(device, 0x100204));
 			break;
 		}
-		nv_wr32(priv, 0x4069F0, nv_rd32(priv, 0x100200));
-		nv_wr32(priv, 0x4069F4, nv_rd32(priv, 0x100204));
-		nv_wr32(priv, 0x400840, 0);
-		nv_wr32(priv, 0x400844, 0);
-		nv_wr32(priv, 0x4008A0, vramsz);
-		nv_wr32(priv, 0x4008A4, vramsz);
+		nvkm_wr32(device, 0x4069F0, nvkm_rd32(device, 0x100200));
+		nvkm_wr32(device, 0x4069F4, nvkm_rd32(device, 0x100204));
+		nvkm_wr32(device, 0x400840, 0);
+		nvkm_wr32(device, 0x400844, 0);
+		nvkm_wr32(device, 0x4008A0, vramsz);
+		nvkm_wr32(device, 0x4008A4, vramsz);
 		break;
 	}
 
 	return 0;
 }
 
-struct nvkm_oclass
-nv40_gr_oclass = {
-	.handle = NV_ENGINE(GR, 0x40),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv40_gr_ctor,
-		.dtor = _nvkm_gr_dtor,
-		.init = nv40_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
+int
+nv40_gr_new_(const struct nvkm_gr_func *func, struct nvkm_device *device,
+	     int index, struct nvkm_gr **pgr)
+{
+	struct nv40_gr *gr;
+
+	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
+		return -ENOMEM;
+	*pgr = &gr->base;
+	INIT_LIST_HEAD(&gr->chan);
+
+	return nvkm_gr_ctor(func, device, index, 0x00001000, true, &gr->base);
+}
+
+static const struct nvkm_gr_func
+nv40_gr = {
+	.init = nv40_gr_init,
+	.intr = nv40_gr_intr,
+	.tile = nv40_gr_tile,
+	.units = nv40_gr_units,
+	.chan_new = nv40_gr_chan_new,
+	.sclass = {
+		{ -1, -1, 0x0012, &nv40_gr_object }, /* beta1 */
+		{ -1, -1, 0x0019, &nv40_gr_object }, /* clip */
+		{ -1, -1, 0x0030, &nv40_gr_object }, /* null */
+		{ -1, -1, 0x0039, &nv40_gr_object }, /* m2mf */
+		{ -1, -1, 0x0043, &nv40_gr_object }, /* rop */
+		{ -1, -1, 0x0044, &nv40_gr_object }, /* patt */
+		{ -1, -1, 0x004a, &nv40_gr_object }, /* gdi */
+		{ -1, -1, 0x0062, &nv40_gr_object }, /* surf2d */
+		{ -1, -1, 0x0072, &nv40_gr_object }, /* beta4 */
+		{ -1, -1, 0x0089, &nv40_gr_object }, /* sifm */
+		{ -1, -1, 0x008a, &nv40_gr_object }, /* ifc */
+		{ -1, -1, 0x009f, &nv40_gr_object }, /* imageblit */
+		{ -1, -1, 0x3062, &nv40_gr_object }, /* surf2d (nv40) */
+		{ -1, -1, 0x3089, &nv40_gr_object }, /* sifm (nv40) */
+		{ -1, -1, 0x309e, &nv40_gr_object }, /* swzsurf (nv40) */
+		{ -1, -1, 0x4097, &nv40_gr_object }, /* curie */
+		{}
+	}
 };
+
+int
+nv40_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv40_gr_new_(&nv40_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h
index d852bd6de571..2812ed11f877 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h
@@ -1,22 +1,45 @@
 #ifndef __NV40_GR_H__
 #define __NV40_GR_H__
-#include <engine/gr.h>
+#define nv40_gr(p) container_of((p), struct nv40_gr, base)
+#include "priv.h"
 
-#include <core/device.h>
-struct nvkm_gpuobj;
+struct nv40_gr {
+	struct nvkm_gr base;
+	u32 size;
+	struct list_head chan;
+};
+
+int nv40_gr_new_(const struct nvkm_gr_func *, struct nvkm_device *, int index,
+		 struct nvkm_gr **);
+int nv40_gr_init(struct nvkm_gr *);
+void nv40_gr_intr(struct nvkm_gr *);
+u64 nv40_gr_units(struct nvkm_gr *);
+
+#define nv40_gr_chan(p) container_of((p), struct nv40_gr_chan, object)
+
+struct nv40_gr_chan {
+	struct nvkm_object object;
+	struct nv40_gr *gr;
+	struct nvkm_fifo_chan *fifo;
+	u32 inst;
+	struct list_head head;
+};
+
+int nv40_gr_chan_new(struct nvkm_gr *, struct nvkm_fifo_chan *,
+		     const struct nvkm_oclass *, struct nvkm_object **);
+
+extern const struct nvkm_object_func nv40_gr_object;
 
 /* returns 1 if device is one of the nv4x using the 0x4497 object class,
  * helpful to determine a number of other hardware features
  */
 static inline int
-nv44_gr_class(void *priv)
+nv44_gr_class(struct nvkm_device *device)
 {
-	struct nvkm_device *device = nv_device(priv);
-
 	if ((device->chipset & 0xf0) == 0x60)
 		return 1;
 
-	return !(0x0baf & (1 << (device->chipset & 0x0f)));
+	return !(0x0aaf & (1 << (device->chipset & 0x0f)));
 }
 
 int  nv40_grctx_init(struct nvkm_device *, u32 *size);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv44.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv44.c
new file mode 100644
index 000000000000..45ff80254eb4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv44.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "nv40.h"
+#include "regs.h"
+
+#include <subdev/fb.h>
+#include <engine/fifo.h>
+
+static void
+nv44_gr_tile(struct nvkm_gr *base, int i, struct nvkm_fb_tile *tile)
+{
+	struct nv40_gr *gr = nv40_gr(base);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	struct nvkm_fifo *fifo = device->fifo;
+	unsigned long flags;
+
+	nvkm_fifo_pause(fifo, &flags);
+	nv04_gr_idle(&gr->base);
+
+	switch (device->chipset) {
+	case 0x44:
+	case 0x4a:
+		nvkm_wr32(device, NV20_PGRAPH_TSIZE(i), tile->pitch);
+		nvkm_wr32(device, NV20_PGRAPH_TLIMIT(i), tile->limit);
+		nvkm_wr32(device, NV20_PGRAPH_TILE(i), tile->addr);
+		break;
+	case 0x46:
+	case 0x4c:
+	case 0x63:
+	case 0x67:
+	case 0x68:
+		nvkm_wr32(device, NV47_PGRAPH_TSIZE(i), tile->pitch);
+		nvkm_wr32(device, NV47_PGRAPH_TLIMIT(i), tile->limit);
+		nvkm_wr32(device, NV47_PGRAPH_TILE(i), tile->addr);
+		nvkm_wr32(device, NV40_PGRAPH_TSIZE1(i), tile->pitch);
+		nvkm_wr32(device, NV40_PGRAPH_TLIMIT1(i), tile->limit);
+		nvkm_wr32(device, NV40_PGRAPH_TILE1(i), tile->addr);
+		break;
+	case 0x4e:
+		nvkm_wr32(device, NV20_PGRAPH_TSIZE(i), tile->pitch);
+		nvkm_wr32(device, NV20_PGRAPH_TLIMIT(i), tile->limit);
+		nvkm_wr32(device, NV20_PGRAPH_TILE(i), tile->addr);
+		nvkm_wr32(device, NV40_PGRAPH_TSIZE1(i), tile->pitch);
+		nvkm_wr32(device, NV40_PGRAPH_TLIMIT1(i), tile->limit);
+		nvkm_wr32(device, NV40_PGRAPH_TILE1(i), tile->addr);
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+
+	nvkm_fifo_start(fifo, &flags);
+}
+
+static const struct nvkm_gr_func
+nv44_gr = {
+	.init = nv40_gr_init,
+	.intr = nv40_gr_intr,
+	.tile = nv44_gr_tile,
+	.units = nv40_gr_units,
+	.chan_new = nv40_gr_chan_new,
+	.sclass = {
+		{ -1, -1, 0x0012, &nv40_gr_object }, /* beta1 */
+		{ -1, -1, 0x0019, &nv40_gr_object }, /* clip */
+		{ -1, -1, 0x0030, &nv40_gr_object }, /* null */
+		{ -1, -1, 0x0039, &nv40_gr_object }, /* m2mf */
+		{ -1, -1, 0x0043, &nv40_gr_object }, /* rop */
+		{ -1, -1, 0x0044, &nv40_gr_object }, /* patt */
+		{ -1, -1, 0x004a, &nv40_gr_object }, /* gdi */
+		{ -1, -1, 0x0062, &nv40_gr_object }, /* surf2d */
+		{ -1, -1, 0x0072, &nv40_gr_object }, /* beta4 */
+		{ -1, -1, 0x0089, &nv40_gr_object }, /* sifm */
+		{ -1, -1, 0x008a, &nv40_gr_object }, /* ifc */
+		{ -1, -1, 0x009f, &nv40_gr_object }, /* imageblit */
+		{ -1, -1, 0x3062, &nv40_gr_object }, /* surf2d (nv40) */
+		{ -1, -1, 0x3089, &nv40_gr_object }, /* sifm (nv40) */
+		{ -1, -1, 0x309e, &nv40_gr_object }, /* swzsurf (nv40) */
+		{ -1, -1, 0x4497, &nv40_gr_object }, /* curie */
+		{}
+	}
+};
+
+int
+nv44_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv40_gr_new_(&nv44_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
index 270d7cd63fc7..b19b912d5787 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
@@ -24,27 +24,13 @@
 #include "nv50.h"
 
 #include <core/client.h>
-#include <core/device.h>
-#include <core/handle.h>
+#include <core/gpuobj.h>
 #include <engine/fifo.h>
-#include <subdev/timer.h>
 
-struct nv50_gr_priv {
-	struct nvkm_gr base;
-	spinlock_t lock;
-	u32 size;
-};
-
-struct nv50_gr_chan {
-	struct nvkm_gr_chan base;
-};
-
-static u64
+u64
 nv50_gr_units(struct nvkm_gr *gr)
 {
-	struct nv50_gr_priv *priv = (void *)gr;
-
-	return nv_rd32(priv, 0x1540);
+	return nvkm_rd32(gr->engine.subdev.device, 0x1540);
 }
 
 /*******************************************************************************
@@ -52,86 +38,25 @@ nv50_gr_units(struct nvkm_gr *gr)
  ******************************************************************************/
 
 static int
-nv50_gr_object_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-		    struct nvkm_oclass *oclass, void *data, u32 size,
-		    struct nvkm_object **pobject)
+nv50_gr_object_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent,
+		    int align, struct nvkm_gpuobj **pgpuobj)
 {
-	struct nvkm_gpuobj *obj;
-	int ret;
-
-	ret = nvkm_gpuobj_create(parent, engine, oclass, 0, parent,
-				 16, 16, 0, &obj);
-	*pobject = nv_object(obj);
-	if (ret)
-		return ret;
-
-	nv_wo32(obj, 0x00, nv_mclass(obj));
-	nv_wo32(obj, 0x04, 0x00000000);
-	nv_wo32(obj, 0x08, 0x00000000);
-	nv_wo32(obj, 0x0c, 0x00000000);
-	return 0;
+	int ret = nvkm_gpuobj_new(object->engine->subdev.device, 16,
+				  align, false, parent, pgpuobj);
+	if (ret == 0) {
+		nvkm_kmap(*pgpuobj);
+		nvkm_wo32(*pgpuobj, 0x00, object->oclass);
+		nvkm_wo32(*pgpuobj, 0x04, 0x00000000);
+		nvkm_wo32(*pgpuobj, 0x08, 0x00000000);
+		nvkm_wo32(*pgpuobj, 0x0c, 0x00000000);
+		nvkm_done(*pgpuobj);
+	}
+	return ret;
 }
 
-static struct nvkm_ofuncs
-nv50_gr_ofuncs = {
-	.ctor = nv50_gr_object_ctor,
-	.dtor = _nvkm_gpuobj_dtor,
-	.init = _nvkm_gpuobj_init,
-	.fini = _nvkm_gpuobj_fini,
-	.rd32 = _nvkm_gpuobj_rd32,
-	.wr32 = _nvkm_gpuobj_wr32,
-};
-
-static struct nvkm_oclass
-nv50_gr_sclass[] = {
-	{ 0x0030, &nv50_gr_ofuncs },
-	{ 0x502d, &nv50_gr_ofuncs },
-	{ 0x5039, &nv50_gr_ofuncs },
-	{ 0x5097, &nv50_gr_ofuncs },
-	{ 0x50c0, &nv50_gr_ofuncs },
-	{}
-};
-
-static struct nvkm_oclass
-g84_gr_sclass[] = {
-	{ 0x0030, &nv50_gr_ofuncs },
-	{ 0x502d, &nv50_gr_ofuncs },
-	{ 0x5039, &nv50_gr_ofuncs },
-	{ 0x50c0, &nv50_gr_ofuncs },
-	{ 0x8297, &nv50_gr_ofuncs },
-	{}
-};
-
-static struct nvkm_oclass
-gt200_gr_sclass[] = {
-	{ 0x0030, &nv50_gr_ofuncs },
-	{ 0x502d, &nv50_gr_ofuncs },
-	{ 0x5039, &nv50_gr_ofuncs },
-	{ 0x50c0, &nv50_gr_ofuncs },
-	{ 0x8397, &nv50_gr_ofuncs },
-	{}
-};
-
-static struct nvkm_oclass
-gt215_gr_sclass[] = {
-	{ 0x0030, &nv50_gr_ofuncs },
-	{ 0x502d, &nv50_gr_ofuncs },
-	{ 0x5039, &nv50_gr_ofuncs },
-	{ 0x50c0, &nv50_gr_ofuncs },
-	{ 0x8597, &nv50_gr_ofuncs },
-	{ 0x85c0, &nv50_gr_ofuncs },
-	{}
-};
-
-static struct nvkm_oclass
-mcp89_gr_sclass[] = {
-	{ 0x0030, &nv50_gr_ofuncs },
-	{ 0x502d, &nv50_gr_ofuncs },
-	{ 0x5039, &nv50_gr_ofuncs },
-	{ 0x50c0, &nv50_gr_ofuncs },
-	{ 0x85c0, &nv50_gr_ofuncs },
-	{ 0x8697, &nv50_gr_ofuncs },
-	{}
+const struct nvkm_object_func
+nv50_gr_object = {
+	.bind = nv50_gr_object_bind,
 };
 
 /*******************************************************************************
@@ -139,160 +64,43 @@ mcp89_gr_sclass[] = {
  ******************************************************************************/
 
 static int
-nv50_gr_context_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-		     struct nvkm_oclass *oclass, void *data, u32 size,
-		     struct nvkm_object **pobject)
+nv50_gr_chan_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent,
+		  int align, struct nvkm_gpuobj **pgpuobj)
 {
-	struct nv50_gr_priv *priv = (void *)engine;
-	struct nv50_gr_chan *chan;
-	int ret;
-
-	ret = nvkm_gr_context_create(parent, engine, oclass, NULL, priv->size,
-				     0, NVOBJ_FLAG_ZERO_ALLOC, &chan);
-	*pobject = nv_object(chan);
-	if (ret)
-		return ret;
-
-	nv50_grctx_fill(nv_device(priv), nv_gpuobj(chan));
-	return 0;
+	struct nv50_gr *gr = nv50_gr_chan(object)->gr;
+	int ret = nvkm_gpuobj_new(gr->base.engine.subdev.device, gr->size,
+				  align, true, parent, pgpuobj);
+	if (ret == 0) {
+		nvkm_kmap(*pgpuobj);
+		nv50_grctx_fill(gr->base.engine.subdev.device, *pgpuobj);
+		nvkm_done(*pgpuobj);
+	}
+	return ret;
 }
 
-static struct nvkm_oclass
-nv50_gr_cclass = {
-	.handle = NV_ENGCTX(GR, 0x50),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv50_gr_context_ctor,
-		.dtor = _nvkm_gr_context_dtor,
-		.init = _nvkm_gr_context_init,
-		.fini = _nvkm_gr_context_fini,
-		.rd32 = _nvkm_gr_context_rd32,
-		.wr32 = _nvkm_gr_context_wr32,
-	},
-};
-
-/*******************************************************************************
- * PGRAPH engine/subdev functions
- ******************************************************************************/
-
-static const struct nvkm_bitfield nv50_pgr_status[] = {
-	{ 0x00000001, "BUSY" }, /* set when any bit is set */
-	{ 0x00000002, "DISPATCH" },
-	{ 0x00000004, "UNK2" },
-	{ 0x00000008, "UNK3" },
-	{ 0x00000010, "UNK4" },
-	{ 0x00000020, "UNK5" },
-	{ 0x00000040, "M2MF" },
-	{ 0x00000080, "UNK7" },
-	{ 0x00000100, "CTXPROG" },
-	{ 0x00000200, "VFETCH" },
-	{ 0x00000400, "CCACHE_PREGEOM" },
-	{ 0x00000800, "STRMOUT_VATTR_POSTGEOM" },
-	{ 0x00001000, "VCLIP" },
-	{ 0x00002000, "RATTR_APLANE" },
-	{ 0x00004000, "TRAST" },
-	{ 0x00008000, "CLIPID" },
-	{ 0x00010000, "ZCULL" },
-	{ 0x00020000, "ENG2D" },
-	{ 0x00040000, "RMASK" },
-	{ 0x00080000, "TPC_RAST" },
-	{ 0x00100000, "TPC_PROP" },
-	{ 0x00200000, "TPC_TEX" },
-	{ 0x00400000, "TPC_GEOM" },
-	{ 0x00800000, "TPC_MP" },
-	{ 0x01000000, "ROP" },
-	{}
-};
-
-static const char *const nv50_pgr_vstatus_0[] = {
-	"VFETCH", "CCACHE", "PREGEOM", "POSTGEOM", "VATTR", "STRMOUT", "VCLIP",
-	NULL
-};
-
-static const char *const nv50_pgr_vstatus_1[] = {
-	"TPC_RAST", "TPC_PROP", "TPC_TEX", "TPC_GEOM", "TPC_MP", NULL
-};
-
-static const char *const nv50_pgr_vstatus_2[] = {
-	"RATTR", "APLANE", "TRAST", "CLIPID", "ZCULL", "ENG2D", "RMASK",
-	"ROP", NULL
+static const struct nvkm_object_func
+nv50_gr_chan = {
+	.bind = nv50_gr_chan_bind,
 };
 
-static void
-nvkm_pgr_vstatus_print(struct nv50_gr_priv *priv, int r,
-		       const char *const units[], u32 status)
+int
+nv50_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
-	int i;
-
-	nv_error(priv, "PGRAPH_VSTATUS%d: 0x%08x", r, status);
+	struct nv50_gr *gr = nv50_gr(base);
+	struct nv50_gr_chan *chan;
 
-	for (i = 0; units[i] && status; i++) {
-		if ((status & 7) == 1)
-			pr_cont(" %s", units[i]);
-		status >>= 3;
-	}
-	if (status)
-		pr_cont(" (invalid: 0x%x)", status);
-	pr_cont("\n");
+	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_object_ctor(&nv50_gr_chan, oclass, &chan->object);
+	chan->gr = gr;
+	*pobject = &chan->object;
+	return 0;
 }
 
-static int
-g84_gr_tlb_flush(struct nvkm_engine *engine)
-{
-	struct nvkm_timer *ptimer = nvkm_timer(engine);
-	struct nv50_gr_priv *priv = (void *)engine;
-	bool idle, timeout = false;
-	unsigned long flags;
-	u64 start;
-	u32 tmp;
-
-	spin_lock_irqsave(&priv->lock, flags);
-	nv_mask(priv, 0x400500, 0x00000001, 0x00000000);
-
-	start = ptimer->read(ptimer);
-	do {
-		idle = true;
-
-		for (tmp = nv_rd32(priv, 0x400380); tmp && idle; tmp >>= 3) {
-			if ((tmp & 7) == 1)
-				idle = false;
-		}
-
-		for (tmp = nv_rd32(priv, 0x400384); tmp && idle; tmp >>= 3) {
-			if ((tmp & 7) == 1)
-				idle = false;
-		}
-
-		for (tmp = nv_rd32(priv, 0x400388); tmp && idle; tmp >>= 3) {
-			if ((tmp & 7) == 1)
-				idle = false;
-		}
-	} while (!idle &&
-		 !(timeout = ptimer->read(ptimer) - start > 2000000000));
-
-	if (timeout) {
-		nv_error(priv, "PGRAPH TLB flush idle timeout fail\n");
-
-		tmp = nv_rd32(priv, 0x400700);
-		nv_error(priv, "PGRAPH_STATUS  : 0x%08x", tmp);
-		nvkm_bitfield_print(nv50_pgr_status, tmp);
-		pr_cont("\n");
-
-		nvkm_pgr_vstatus_print(priv, 0, nv50_pgr_vstatus_0,
-				       nv_rd32(priv, 0x400380));
-		nvkm_pgr_vstatus_print(priv, 1, nv50_pgr_vstatus_1,
-				       nv_rd32(priv, 0x400384));
-		nvkm_pgr_vstatus_print(priv, 2, nv50_pgr_vstatus_2,
-				       nv_rd32(priv, 0x400388));
-	}
-
-
-	nv_wr32(priv, 0x100c80, 0x00000001);
-	if (!nv_wait(priv, 0x100c80, 0x00000001, 0x00000000))
-		nv_error(priv, "vm flush timeout\n");
-	nv_mask(priv, 0x400500, 0x00000001, 0x00000001);
-	spin_unlock_irqrestore(&priv->lock, flags);
-	return timeout ? -EBUSY : 0;
-}
+/*******************************************************************************
+ * PGRAPH engine/subdev functions
+ ******************************************************************************/
 
 static const struct nvkm_bitfield nv50_mp_exec_errors[] = {
 	{ 0x01, "STACK_UNDERFLOW" },
@@ -427,157 +235,172 @@ static const struct nvkm_bitfield nv50_gr_trap_prop[] = {
 };
 
 static void
-nv50_priv_prop_trap(struct nv50_gr_priv *priv,
-		    u32 ustatus_addr, u32 ustatus, u32 tp)
+nv50_gr_prop_trap(struct nv50_gr *gr, u32 ustatus_addr, u32 ustatus, u32 tp)
 {
-	u32 e0c = nv_rd32(priv, ustatus_addr + 0x04);
-	u32 e10 = nv_rd32(priv, ustatus_addr + 0x08);
-	u32 e14 = nv_rd32(priv, ustatus_addr + 0x0c);
-	u32 e18 = nv_rd32(priv, ustatus_addr + 0x10);
-	u32 e1c = nv_rd32(priv, ustatus_addr + 0x14);
-	u32 e20 = nv_rd32(priv, ustatus_addr + 0x18);
-	u32 e24 = nv_rd32(priv, ustatus_addr + 0x1c);
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 e0c = nvkm_rd32(device, ustatus_addr + 0x04);
+	u32 e10 = nvkm_rd32(device, ustatus_addr + 0x08);
+	u32 e14 = nvkm_rd32(device, ustatus_addr + 0x0c);
+	u32 e18 = nvkm_rd32(device, ustatus_addr + 0x10);
+	u32 e1c = nvkm_rd32(device, ustatus_addr + 0x14);
+	u32 e20 = nvkm_rd32(device, ustatus_addr + 0x18);
+	u32 e24 = nvkm_rd32(device, ustatus_addr + 0x1c);
+	char msg[128];
 
 	/* CUDA memory: l[], g[] or stack. */
 	if (ustatus & 0x00000080) {
 		if (e18 & 0x80000000) {
 			/* g[] read fault? */
-			nv_error(priv, "TRAP_PROP - TP %d - CUDA_FAULT - Global read fault at address %02x%08x\n",
+			nvkm_error(subdev, "TRAP_PROP - TP %d - CUDA_FAULT - Global read fault at address %02x%08x\n",
 					 tp, e14, e10 | ((e18 >> 24) & 0x1f));
 			e18 &= ~0x1f000000;
 		} else if (e18 & 0xc) {
 			/* g[] write fault? */
-			nv_error(priv, "TRAP_PROP - TP %d - CUDA_FAULT - Global write fault at address %02x%08x\n",
+			nvkm_error(subdev, "TRAP_PROP - TP %d - CUDA_FAULT - Global write fault at address %02x%08x\n",
 				 tp, e14, e10 | ((e18 >> 7) & 0x1f));
 			e18 &= ~0x00000f80;
 		} else {
-			nv_error(priv, "TRAP_PROP - TP %d - Unknown CUDA fault at address %02x%08x\n",
+			nvkm_error(subdev, "TRAP_PROP - TP %d - Unknown CUDA fault at address %02x%08x\n",
 				 tp, e14, e10);
 		}
 		ustatus &= ~0x00000080;
 	}
 	if (ustatus) {
-		nv_error(priv, "TRAP_PROP - TP %d -", tp);
-		nvkm_bitfield_print(nv50_gr_trap_prop, ustatus);
-		pr_cont(" - Address %02x%08x\n", e14, e10);
+		nvkm_snprintbf(msg, sizeof(msg), nv50_gr_trap_prop, ustatus);
+		nvkm_error(subdev, "TRAP_PROP - TP %d - %08x [%s] - "
+				   "Address %02x%08x\n",
+			   tp, ustatus, msg, e14, e10);
 	}
-	nv_error(priv, "TRAP_PROP - TP %d - e0c: %08x, e18: %08x, e1c: %08x, e20: %08x, e24: %08x\n",
+	nvkm_error(subdev, "TRAP_PROP - TP %d - e0c: %08x, e18: %08x, e1c: %08x, e20: %08x, e24: %08x\n",
 		 tp, e0c, e18, e1c, e20, e24);
 }
 
 static void
-nv50_priv_mp_trap(struct nv50_gr_priv *priv, int tpid, int display)
+nv50_gr_mp_trap(struct nv50_gr *gr, int tpid, int display)
 {
-	u32 units = nv_rd32(priv, 0x1540);
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 units = nvkm_rd32(device, 0x1540);
 	u32 addr, mp10, status, pc, oplow, ophigh;
+	char msg[128];
 	int i;
 	int mps = 0;
 	for (i = 0; i < 4; i++) {
 		if (!(units & 1 << (i+24)))
 			continue;
-		if (nv_device(priv)->chipset < 0xa0)
+		if (device->chipset < 0xa0)
 			addr = 0x408200 + (tpid << 12) + (i << 7);
 		else
 			addr = 0x408100 + (tpid << 11) + (i << 7);
-		mp10 = nv_rd32(priv, addr + 0x10);
-		status = nv_rd32(priv, addr + 0x14);
+		mp10 = nvkm_rd32(device, addr + 0x10);
+		status = nvkm_rd32(device, addr + 0x14);
 		if (!status)
 			continue;
 		if (display) {
-			nv_rd32(priv, addr + 0x20);
-			pc = nv_rd32(priv, addr + 0x24);
-			oplow = nv_rd32(priv, addr + 0x70);
-			ophigh = nv_rd32(priv, addr + 0x74);
-			nv_error(priv, "TRAP_MP_EXEC - "
-					"TP %d MP %d:", tpid, i);
-			nvkm_bitfield_print(nv50_mp_exec_errors, status);
-			pr_cont(" at %06x warp %d, opcode %08x %08x\n",
-					pc&0xffffff, pc >> 24,
-					oplow, ophigh);
+			nvkm_rd32(device, addr + 0x20);
+			pc = nvkm_rd32(device, addr + 0x24);
+			oplow = nvkm_rd32(device, addr + 0x70);
+			ophigh = nvkm_rd32(device, addr + 0x74);
+			nvkm_snprintbf(msg, sizeof(msg),
+				       nv50_mp_exec_errors, status);
+			nvkm_error(subdev, "TRAP_MP_EXEC - TP %d MP %d: "
+					   "%08x [%s] at %06x warp %d, "
+					   "opcode %08x %08x\n",
+				   tpid, i, status, msg, pc & 0xffffff,
+				   pc >> 24, oplow, ophigh);
 		}
-		nv_wr32(priv, addr + 0x10, mp10);
-		nv_wr32(priv, addr + 0x14, 0);
+		nvkm_wr32(device, addr + 0x10, mp10);
+		nvkm_wr32(device, addr + 0x14, 0);
 		mps++;
 	}
 	if (!mps && display)
-		nv_error(priv, "TRAP_MP_EXEC - TP %d: "
+		nvkm_error(subdev, "TRAP_MP_EXEC - TP %d: "
 				"No MPs claiming errors?\n", tpid);
 }
 
 static void
-nv50_priv_tp_trap(struct nv50_gr_priv *priv, int type, u32 ustatus_old,
+nv50_gr_tp_trap(struct nv50_gr *gr, int type, u32 ustatus_old,
 		  u32 ustatus_new, int display, const char *name)
 {
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 units = nvkm_rd32(device, 0x1540);
 	int tps = 0;
-	u32 units = nv_rd32(priv, 0x1540);
 	int i, r;
+	char msg[128];
 	u32 ustatus_addr, ustatus;
 	for (i = 0; i < 16; i++) {
 		if (!(units & (1 << i)))
 			continue;
-		if (nv_device(priv)->chipset < 0xa0)
+		if (device->chipset < 0xa0)
 			ustatus_addr = ustatus_old + (i << 12);
 		else
 			ustatus_addr = ustatus_new + (i << 11);
-		ustatus = nv_rd32(priv, ustatus_addr) & 0x7fffffff;
+		ustatus = nvkm_rd32(device, ustatus_addr) & 0x7fffffff;
 		if (!ustatus)
 			continue;
 		tps++;
 		switch (type) {
 		case 6: /* texture error... unknown for now */
 			if (display) {
-				nv_error(priv, "magic set %d:\n", i);
+				nvkm_error(subdev, "magic set %d:\n", i);
 				for (r = ustatus_addr + 4; r <= ustatus_addr + 0x10; r += 4)
-					nv_error(priv, "\t0x%08x: 0x%08x\n", r,
-						nv_rd32(priv, r));
+					nvkm_error(subdev, "\t%08x: %08x\n", r,
+						   nvkm_rd32(device, r));
 				if (ustatus) {
-					nv_error(priv, "%s - TP%d:", name, i);
-					nvkm_bitfield_print(nv50_tex_traps,
-							       ustatus);
-					pr_cont("\n");
+					nvkm_snprintbf(msg, sizeof(msg),
+						       nv50_tex_traps, ustatus);
+					nvkm_error(subdev,
+						   "%s - TP%d: %08x [%s]\n",
+						   name, i, ustatus, msg);
 					ustatus = 0;
 				}
 			}
 			break;
 		case 7: /* MP error */
 			if (ustatus & 0x04030000) {
-				nv50_priv_mp_trap(priv, i, display);
+				nv50_gr_mp_trap(gr, i, display);
 				ustatus &= ~0x04030000;
 			}
 			if (ustatus && display) {
-				nv_error(priv, "%s - TP%d:", name, i);
-				nvkm_bitfield_print(nv50_mpc_traps, ustatus);
-				pr_cont("\n");
+				nvkm_snprintbf(msg, sizeof(msg),
+					       nv50_mpc_traps, ustatus);
+				nvkm_error(subdev, "%s - TP%d: %08x [%s]\n",
+					   name, i, ustatus, msg);
 				ustatus = 0;
 			}
 			break;
 		case 8: /* PROP error */
 			if (display)
-				nv50_priv_prop_trap(
-						priv, ustatus_addr, ustatus, i);
+				nv50_gr_prop_trap(
+						gr, ustatus_addr, ustatus, i);
 			ustatus = 0;
 			break;
 		}
 		if (ustatus) {
 			if (display)
-				nv_error(priv, "%s - TP%d: Unhandled ustatus 0x%08x\n", name, i, ustatus);
+				nvkm_error(subdev, "%s - TP%d: Unhandled ustatus %08x\n", name, i, ustatus);
 		}
-		nv_wr32(priv, ustatus_addr, 0xc0000000);
+		nvkm_wr32(device, ustatus_addr, 0xc0000000);
 	}
 
 	if (!tps && display)
-		nv_warn(priv, "%s - No TPs claiming errors?\n", name);
+		nvkm_warn(subdev, "%s - No TPs claiming errors?\n", name);
 }
 
 static int
-nv50_gr_trap_handler(struct nv50_gr_priv *priv, u32 display,
-		     int chid, u64 inst, struct nvkm_object *engctx)
+nv50_gr_trap_handler(struct nv50_gr *gr, u32 display,
+		     int chid, u64 inst, const char *name)
 {
-	u32 status = nv_rd32(priv, 0x400108);
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 status = nvkm_rd32(device, 0x400108);
 	u32 ustatus;
+	char msg[128];
 
 	if (!status && display) {
-		nv_error(priv, "TRAP: no units reporting traps?\n");
+		nvkm_error(subdev, "TRAP: no units reporting traps?\n");
 		return 1;
 	}
 
@@ -585,71 +408,72 @@ nv50_gr_trap_handler(struct nv50_gr_priv *priv, u32 display,
 	 * COND, QUERY. If you get a trap from it, the command is still stuck
 	 * in DISPATCH and you need to do something about it. */
 	if (status & 0x001) {
-		ustatus = nv_rd32(priv, 0x400804) & 0x7fffffff;
+		ustatus = nvkm_rd32(device, 0x400804) & 0x7fffffff;
 		if (!ustatus && display) {
-			nv_error(priv, "TRAP_DISPATCH - no ustatus?\n");
+			nvkm_error(subdev, "TRAP_DISPATCH - no ustatus?\n");
 		}
 
-		nv_wr32(priv, 0x400500, 0x00000000);
+		nvkm_wr32(device, 0x400500, 0x00000000);
 
 		/* Known to be triggered by screwed up NOTIFY and COND... */
 		if (ustatus & 0x00000001) {
-			u32 addr = nv_rd32(priv, 0x400808);
+			u32 addr = nvkm_rd32(device, 0x400808);
 			u32 subc = (addr & 0x00070000) >> 16;
 			u32 mthd = (addr & 0x00001ffc);
-			u32 datal = nv_rd32(priv, 0x40080c);
-			u32 datah = nv_rd32(priv, 0x400810);
-			u32 class = nv_rd32(priv, 0x400814);
-			u32 r848 = nv_rd32(priv, 0x400848);
+			u32 datal = nvkm_rd32(device, 0x40080c);
+			u32 datah = nvkm_rd32(device, 0x400810);
+			u32 class = nvkm_rd32(device, 0x400814);
+			u32 r848 = nvkm_rd32(device, 0x400848);
 
-			nv_error(priv, "TRAP DISPATCH_FAULT\n");
+			nvkm_error(subdev, "TRAP DISPATCH_FAULT\n");
 			if (display && (addr & 0x80000000)) {
-				nv_error(priv,
-					 "ch %d [0x%010llx %s] subc %d class 0x%04x mthd 0x%04x data 0x%08x%08x 400808 0x%08x 400848 0x%08x\n",
-					 chid, inst,
-					 nvkm_client_name(engctx), subc,
-					 class, mthd, datah, datal, addr, r848);
+				nvkm_error(subdev,
+					   "ch %d [%010llx %s] subc %d "
+					   "class %04x mthd %04x data %08x%08x "
+					   "400808 %08x 400848 %08x\n",
+					   chid, inst, name, subc, class, mthd,
+					   datah, datal, addr, r848);
 			} else
 			if (display) {
-				nv_error(priv, "no stuck command?\n");
+				nvkm_error(subdev, "no stuck command?\n");
 			}
 
-			nv_wr32(priv, 0x400808, 0);
-			nv_wr32(priv, 0x4008e8, nv_rd32(priv, 0x4008e8) & 3);
-			nv_wr32(priv, 0x400848, 0);
+			nvkm_wr32(device, 0x400808, 0);
+			nvkm_wr32(device, 0x4008e8, nvkm_rd32(device, 0x4008e8) & 3);
+			nvkm_wr32(device, 0x400848, 0);
 			ustatus &= ~0x00000001;
 		}
 
 		if (ustatus & 0x00000002) {
-			u32 addr = nv_rd32(priv, 0x40084c);
+			u32 addr = nvkm_rd32(device, 0x40084c);
 			u32 subc = (addr & 0x00070000) >> 16;
 			u32 mthd = (addr & 0x00001ffc);
-			u32 data = nv_rd32(priv, 0x40085c);
-			u32 class = nv_rd32(priv, 0x400814);
+			u32 data = nvkm_rd32(device, 0x40085c);
+			u32 class = nvkm_rd32(device, 0x400814);
 
-			nv_error(priv, "TRAP DISPATCH_QUERY\n");
+			nvkm_error(subdev, "TRAP DISPATCH_QUERY\n");
 			if (display && (addr & 0x80000000)) {
-				nv_error(priv,
-					 "ch %d [0x%010llx %s] subc %d class 0x%04x mthd 0x%04x data 0x%08x 40084c 0x%08x\n",
-					 chid, inst,
-					 nvkm_client_name(engctx), subc,
-					 class, mthd, data, addr);
+				nvkm_error(subdev,
+					   "ch %d [%010llx %s] subc %d "
+					   "class %04x mthd %04x data %08x "
+					   "40084c %08x\n", chid, inst, name,
+					   subc, class, mthd, data, addr);
 			} else
 			if (display) {
-				nv_error(priv, "no stuck command?\n");
+				nvkm_error(subdev, "no stuck command?\n");
 			}
 
-			nv_wr32(priv, 0x40084c, 0);
+			nvkm_wr32(device, 0x40084c, 0);
 			ustatus &= ~0x00000002;
 		}
 
 		if (ustatus && display) {
-			nv_error(priv, "TRAP_DISPATCH (unknown "
-				      "0x%08x)\n", ustatus);
+			nvkm_error(subdev, "TRAP_DISPATCH "
+					   "(unknown %08x)\n", ustatus);
 		}
 
-		nv_wr32(priv, 0x400804, 0xc0000000);
-		nv_wr32(priv, 0x400108, 0x001);
+		nvkm_wr32(device, 0x400804, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x001);
 		status &= ~0x001;
 		if (!status)
 			return 0;
@@ -657,81 +481,91 @@ nv50_gr_trap_handler(struct nv50_gr_priv *priv, u32 display,
 
 	/* M2MF: Memory to memory copy engine. */
 	if (status & 0x002) {
-		u32 ustatus = nv_rd32(priv, 0x406800) & 0x7fffffff;
+		u32 ustatus = nvkm_rd32(device, 0x406800) & 0x7fffffff;
 		if (display) {
-			nv_error(priv, "TRAP_M2MF");
-			nvkm_bitfield_print(nv50_gr_trap_m2mf, ustatus);
-			pr_cont("\n");
-			nv_error(priv, "TRAP_M2MF %08x %08x %08x %08x\n",
-				nv_rd32(priv, 0x406804), nv_rd32(priv, 0x406808),
-				nv_rd32(priv, 0x40680c), nv_rd32(priv, 0x406810));
-
+			nvkm_snprintbf(msg, sizeof(msg),
+				       nv50_gr_trap_m2mf, ustatus);
+			nvkm_error(subdev, "TRAP_M2MF %08x [%s]\n",
+				   ustatus, msg);
+			nvkm_error(subdev, "TRAP_M2MF %08x %08x %08x %08x\n",
+				   nvkm_rd32(device, 0x406804),
+				   nvkm_rd32(device, 0x406808),
+				   nvkm_rd32(device, 0x40680c),
+				   nvkm_rd32(device, 0x406810));
 		}
 
 		/* No sane way found yet -- just reset the bugger. */
-		nv_wr32(priv, 0x400040, 2);
-		nv_wr32(priv, 0x400040, 0);
-		nv_wr32(priv, 0x406800, 0xc0000000);
-		nv_wr32(priv, 0x400108, 0x002);
+		nvkm_wr32(device, 0x400040, 2);
+		nvkm_wr32(device, 0x400040, 0);
+		nvkm_wr32(device, 0x406800, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x002);
 		status &= ~0x002;
 	}
 
 	/* VFETCH: Fetches data from vertex buffers. */
 	if (status & 0x004) {
-		u32 ustatus = nv_rd32(priv, 0x400c04) & 0x7fffffff;
+		u32 ustatus = nvkm_rd32(device, 0x400c04) & 0x7fffffff;
 		if (display) {
-			nv_error(priv, "TRAP_VFETCH");
-			nvkm_bitfield_print(nv50_gr_trap_vfetch, ustatus);
-			pr_cont("\n");
-			nv_error(priv, "TRAP_VFETCH %08x %08x %08x %08x\n",
-				nv_rd32(priv, 0x400c00), nv_rd32(priv, 0x400c08),
-				nv_rd32(priv, 0x400c0c), nv_rd32(priv, 0x400c10));
+			nvkm_snprintbf(msg, sizeof(msg),
+				       nv50_gr_trap_vfetch, ustatus);
+			nvkm_error(subdev, "TRAP_VFETCH %08x [%s]\n",
+				   ustatus, msg);
+			nvkm_error(subdev, "TRAP_VFETCH %08x %08x %08x %08x\n",
+				   nvkm_rd32(device, 0x400c00),
+				   nvkm_rd32(device, 0x400c08),
+				   nvkm_rd32(device, 0x400c0c),
+				   nvkm_rd32(device, 0x400c10));
 		}
 
-		nv_wr32(priv, 0x400c04, 0xc0000000);
-		nv_wr32(priv, 0x400108, 0x004);
+		nvkm_wr32(device, 0x400c04, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x004);
 		status &= ~0x004;
 	}
 
 	/* STRMOUT: DirectX streamout / OpenGL transform feedback. */
 	if (status & 0x008) {
-		ustatus = nv_rd32(priv, 0x401800) & 0x7fffffff;
+		ustatus = nvkm_rd32(device, 0x401800) & 0x7fffffff;
 		if (display) {
-			nv_error(priv, "TRAP_STRMOUT");
-			nvkm_bitfield_print(nv50_gr_trap_strmout, ustatus);
-			pr_cont("\n");
-			nv_error(priv, "TRAP_STRMOUT %08x %08x %08x %08x\n",
-				nv_rd32(priv, 0x401804), nv_rd32(priv, 0x401808),
-				nv_rd32(priv, 0x40180c), nv_rd32(priv, 0x401810));
-
+			nvkm_snprintbf(msg, sizeof(msg),
+				       nv50_gr_trap_strmout, ustatus);
+			nvkm_error(subdev, "TRAP_STRMOUT %08x [%s]\n",
+				   ustatus, msg);
+			nvkm_error(subdev, "TRAP_STRMOUT %08x %08x %08x %08x\n",
+				   nvkm_rd32(device, 0x401804),
+				   nvkm_rd32(device, 0x401808),
+				   nvkm_rd32(device, 0x40180c),
+				   nvkm_rd32(device, 0x401810));
 		}
 
 		/* No sane way found yet -- just reset the bugger. */
-		nv_wr32(priv, 0x400040, 0x80);
-		nv_wr32(priv, 0x400040, 0);
-		nv_wr32(priv, 0x401800, 0xc0000000);
-		nv_wr32(priv, 0x400108, 0x008);
+		nvkm_wr32(device, 0x400040, 0x80);
+		nvkm_wr32(device, 0x400040, 0);
+		nvkm_wr32(device, 0x401800, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x008);
 		status &= ~0x008;
 	}
 
 	/* CCACHE: Handles code and c[] caches and fills them. */
 	if (status & 0x010) {
-		ustatus = nv_rd32(priv, 0x405018) & 0x7fffffff;
+		ustatus = nvkm_rd32(device, 0x405018) & 0x7fffffff;
 		if (display) {
-			nv_error(priv, "TRAP_CCACHE");
-			nvkm_bitfield_print(nv50_gr_trap_ccache, ustatus);
-			pr_cont("\n");
-			nv_error(priv, "TRAP_CCACHE %08x %08x %08x %08x"
-				     " %08x %08x %08x\n",
-				nv_rd32(priv, 0x405000), nv_rd32(priv, 0x405004),
-				nv_rd32(priv, 0x405008), nv_rd32(priv, 0x40500c),
-				nv_rd32(priv, 0x405010), nv_rd32(priv, 0x405014),
-				nv_rd32(priv, 0x40501c));
-
+			nvkm_snprintbf(msg, sizeof(msg),
+				       nv50_gr_trap_ccache, ustatus);
+			nvkm_error(subdev, "TRAP_CCACHE %08x [%s]\n",
+				   ustatus, msg);
+			nvkm_error(subdev, "TRAP_CCACHE %08x %08x %08x %08x "
+					   "%08x %08x %08x\n",
+				   nvkm_rd32(device, 0x405000),
+				   nvkm_rd32(device, 0x405004),
+				   nvkm_rd32(device, 0x405008),
+				   nvkm_rd32(device, 0x40500c),
+				   nvkm_rd32(device, 0x405010),
+				   nvkm_rd32(device, 0x405014),
+				   nvkm_rd32(device, 0x40501c));
 		}
 
-		nv_wr32(priv, 0x405018, 0xc0000000);
-		nv_wr32(priv, 0x400108, 0x010);
+		nvkm_wr32(device, 0x405018, 0xc0000000);
+		nvkm_wr32(device, 0x400108, 0x010);
 		status &= ~0x010;
 	}
 
@@ -739,239 +573,174 @@ nv50_gr_trap_handler(struct nv50_gr_priv *priv, u32 display,
 	 * remaining, so try to handle it anyway. Perhaps related to that
 	 * unknown DMA slot on tesla? */
 	if (status & 0x20) {
-		ustatus = nv_rd32(priv, 0x402000) & 0x7fffffff;
+		ustatus = nvkm_rd32(device, 0x402000) & 0x7fffffff;
 		if (display)
-			nv_error(priv, "TRAP_UNKC04 0x%08x\n", ustatus);
-		nv_wr32(priv, 0x402000, 0xc0000000);
+			nvkm_error(subdev, "TRAP_UNKC04 %08x\n", ustatus);
+		nvkm_wr32(device, 0x402000, 0xc0000000);
 		/* no status modifiction on purpose */
 	}
 
 	/* TEXTURE: CUDA texturing units */
 	if (status & 0x040) {
-		nv50_priv_tp_trap(priv, 6, 0x408900, 0x408600, display,
+		nv50_gr_tp_trap(gr, 6, 0x408900, 0x408600, display,
 				    "TRAP_TEXTURE");
-		nv_wr32(priv, 0x400108, 0x040);
+		nvkm_wr32(device, 0x400108, 0x040);
 		status &= ~0x040;
 	}
 
 	/* MP: CUDA execution engines. */
 	if (status & 0x080) {
-		nv50_priv_tp_trap(priv, 7, 0x408314, 0x40831c, display,
+		nv50_gr_tp_trap(gr, 7, 0x408314, 0x40831c, display,
 				    "TRAP_MP");
-		nv_wr32(priv, 0x400108, 0x080);
+		nvkm_wr32(device, 0x400108, 0x080);
 		status &= ~0x080;
 	}
 
 	/* PROP:  Handles TP-initiated uncached memory accesses:
 	 * l[], g[], stack, 2d surfaces, render targets. */
 	if (status & 0x100) {
-		nv50_priv_tp_trap(priv, 8, 0x408e08, 0x408708, display,
+		nv50_gr_tp_trap(gr, 8, 0x408e08, 0x408708, display,
 				    "TRAP_PROP");
-		nv_wr32(priv, 0x400108, 0x100);
+		nvkm_wr32(device, 0x400108, 0x100);
 		status &= ~0x100;
 	}
 
 	if (status) {
 		if (display)
-			nv_error(priv, "TRAP: unknown 0x%08x\n", status);
-		nv_wr32(priv, 0x400108, status);
+			nvkm_error(subdev, "TRAP: unknown %08x\n", status);
+		nvkm_wr32(device, 0x400108, status);
 	}
 
 	return 1;
 }
 
-static void
-nv50_gr_intr(struct nvkm_subdev *subdev)
+void
+nv50_gr_intr(struct nvkm_gr *base)
 {
-	struct nvkm_fifo *pfifo = nvkm_fifo(subdev);
-	struct nvkm_engine *engine = nv_engine(subdev);
-	struct nvkm_object *engctx;
-	struct nvkm_handle *handle = NULL;
-	struct nv50_gr_priv *priv = (void *)subdev;
-	u32 stat = nv_rd32(priv, 0x400100);
-	u32 inst = nv_rd32(priv, 0x40032c) & 0x0fffffff;
-	u32 addr = nv_rd32(priv, 0x400704);
+	struct nv50_gr *gr = nv50_gr(base);
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_fifo_chan *chan;
+	u32 stat = nvkm_rd32(device, 0x400100);
+	u32 inst = nvkm_rd32(device, 0x40032c) & 0x0fffffff;
+	u32 addr = nvkm_rd32(device, 0x400704);
 	u32 subc = (addr & 0x00070000) >> 16;
 	u32 mthd = (addr & 0x00001ffc);
-	u32 data = nv_rd32(priv, 0x400708);
-	u32 class = nv_rd32(priv, 0x400814);
+	u32 data = nvkm_rd32(device, 0x400708);
+	u32 class = nvkm_rd32(device, 0x400814);
 	u32 show = stat, show_bitfield = stat;
-	int chid;
-
-	engctx = nvkm_engctx_get(engine, inst);
-	chid   = pfifo->chid(pfifo, engctx);
-
-	if (stat & 0x00000010) {
-		handle = nvkm_handle_get_class(engctx, class);
-		if (handle && !nv_call(handle->object, mthd, data))
-			show &= ~0x00000010;
-		nvkm_handle_put(handle);
+	const struct nvkm_enum *en;
+	unsigned long flags;
+	const char *name = "unknown";
+	char msg[128];
+	int chid = -1;
+
+	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
+	if (chan)  {
+		name = chan->object.client->name;
+		chid = chan->chid;
 	}
 
 	if (show & 0x00100000) {
-		u32 ecode = nv_rd32(priv, 0x400110);
-		nv_error(priv, "DATA_ERROR ");
-		nvkm_enum_print(nv50_data_error_names, ecode);
-		pr_cont("\n");
+		u32 ecode = nvkm_rd32(device, 0x400110);
+		en = nvkm_enum_find(nv50_data_error_names, ecode);
+		nvkm_error(subdev, "DATA_ERROR %08x [%s]\n",
+			   ecode, en ? en->name : "");
 		show_bitfield &= ~0x00100000;
 	}
 
 	if (stat & 0x00200000) {
-		if (!nv50_gr_trap_handler(priv, show, chid, (u64)inst << 12,
-					  engctx))
+		if (!nv50_gr_trap_handler(gr, show, chid, (u64)inst << 12, name))
 			show &= ~0x00200000;
 		show_bitfield &= ~0x00200000;
 	}
 
-	nv_wr32(priv, 0x400100, stat);
-	nv_wr32(priv, 0x400500, 0x00010001);
+	nvkm_wr32(device, 0x400100, stat);
+	nvkm_wr32(device, 0x400500, 0x00010001);
 
 	if (show) {
 		show &= show_bitfield;
-		if (show) {
-			nv_error(priv, "%s", "");
-			nvkm_bitfield_print(nv50_gr_intr_name, show);
-			pr_cont("\n");
-		}
-		nv_error(priv,
-			 "ch %d [0x%010llx %s] subc %d class 0x%04x mthd 0x%04x data 0x%08x\n",
-			 chid, (u64)inst << 12, nvkm_client_name(engctx),
-			 subc, class, mthd, data);
+		nvkm_snprintbf(msg, sizeof(msg), nv50_gr_intr_name, show);
+		nvkm_error(subdev, "%08x [%s] ch %d [%010llx %s] subc %d "
+				   "class %04x mthd %04x data %08x\n",
+			   stat, msg, chid, (u64)inst << 12, name,
+			   subc, class, mthd, data);
 	}
 
-	if (nv_rd32(priv, 0x400824) & (1 << 31))
-		nv_wr32(priv, 0x400824, nv_rd32(priv, 0x400824) & ~(1 << 31));
+	if (nvkm_rd32(device, 0x400824) & (1 << 31))
+		nvkm_wr32(device, 0x400824, nvkm_rd32(device, 0x400824) & ~(1 << 31));
 
-	nvkm_engctx_put(engctx);
+	nvkm_fifo_chan_put(device->fifo, flags, &chan);
 }
 
-static int
-nv50_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
-	     struct nvkm_oclass *oclass, void *data, u32 size,
-	     struct nvkm_object **pobject)
+int
+nv50_gr_init(struct nvkm_gr *base)
 {
-	struct nv50_gr_priv *priv;
-	int ret;
-
-	ret = nvkm_gr_create(parent, engine, oclass, true, &priv);
-	*pobject = nv_object(priv);
-	if (ret)
-		return ret;
-
-	nv_subdev(priv)->unit = 0x00201000;
-	nv_subdev(priv)->intr = nv50_gr_intr;
-	nv_engine(priv)->cclass = &nv50_gr_cclass;
-
-	priv->base.units = nv50_gr_units;
-
-	switch (nv_device(priv)->chipset) {
-	case 0x50:
-		nv_engine(priv)->sclass = nv50_gr_sclass;
-		break;
-	case 0x84:
-	case 0x86:
-	case 0x92:
-	case 0x94:
-	case 0x96:
-	case 0x98:
-		nv_engine(priv)->sclass = g84_gr_sclass;
-		break;
-	case 0xa0:
-	case 0xaa:
-	case 0xac:
-		nv_engine(priv)->sclass = gt200_gr_sclass;
-		break;
-	case 0xa3:
-	case 0xa5:
-	case 0xa8:
-		nv_engine(priv)->sclass = gt215_gr_sclass;
-		break;
-	case 0xaf:
-		nv_engine(priv)->sclass = mcp89_gr_sclass;
-		break;
-
-	}
-
-	/* unfortunate hw bug workaround... */
-	if (nv_device(priv)->chipset != 0x50 &&
-	    nv_device(priv)->chipset != 0xac)
-		nv_engine(priv)->tlb_flush = g84_gr_tlb_flush;
-
-	spin_lock_init(&priv->lock);
-	return 0;
-}
-
-static int
-nv50_gr_init(struct nvkm_object *object)
-{
-	struct nv50_gr_priv *priv = (void *)object;
+	struct nv50_gr *gr = nv50_gr(base);
+	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int ret, units, i;
 
-	ret = nvkm_gr_init(&priv->base);
-	if (ret)
-		return ret;
-
 	/* NV_PGRAPH_DEBUG_3_HW_CTX_SWITCH_ENABLED */
-	nv_wr32(priv, 0x40008c, 0x00000004);
+	nvkm_wr32(device, 0x40008c, 0x00000004);
 
 	/* reset/enable traps and interrupts */
-	nv_wr32(priv, 0x400804, 0xc0000000);
-	nv_wr32(priv, 0x406800, 0xc0000000);
-	nv_wr32(priv, 0x400c04, 0xc0000000);
-	nv_wr32(priv, 0x401800, 0xc0000000);
-	nv_wr32(priv, 0x405018, 0xc0000000);
-	nv_wr32(priv, 0x402000, 0xc0000000);
-
-	units = nv_rd32(priv, 0x001540);
+	nvkm_wr32(device, 0x400804, 0xc0000000);
+	nvkm_wr32(device, 0x406800, 0xc0000000);
+	nvkm_wr32(device, 0x400c04, 0xc0000000);
+	nvkm_wr32(device, 0x401800, 0xc0000000);
+	nvkm_wr32(device, 0x405018, 0xc0000000);
+	nvkm_wr32(device, 0x402000, 0xc0000000);
+
+	units = nvkm_rd32(device, 0x001540);
 	for (i = 0; i < 16; i++) {
 		if (!(units & (1 << i)))
 			continue;
 
-		if (nv_device(priv)->chipset < 0xa0) {
-			nv_wr32(priv, 0x408900 + (i << 12), 0xc0000000);
-			nv_wr32(priv, 0x408e08 + (i << 12), 0xc0000000);
-			nv_wr32(priv, 0x408314 + (i << 12), 0xc0000000);
+		if (device->chipset < 0xa0) {
+			nvkm_wr32(device, 0x408900 + (i << 12), 0xc0000000);
+			nvkm_wr32(device, 0x408e08 + (i << 12), 0xc0000000);
+			nvkm_wr32(device, 0x408314 + (i << 12), 0xc0000000);
 		} else {
-			nv_wr32(priv, 0x408600 + (i << 11), 0xc0000000);
-			nv_wr32(priv, 0x408708 + (i << 11), 0xc0000000);
-			nv_wr32(priv, 0x40831c + (i << 11), 0xc0000000);
+			nvkm_wr32(device, 0x408600 + (i << 11), 0xc0000000);
+			nvkm_wr32(device, 0x408708 + (i << 11), 0xc0000000);
+			nvkm_wr32(device, 0x40831c + (i << 11), 0xc0000000);
 		}
 	}
 
-	nv_wr32(priv, 0x400108, 0xffffffff);
-	nv_wr32(priv, 0x400138, 0xffffffff);
-	nv_wr32(priv, 0x400100, 0xffffffff);
-	nv_wr32(priv, 0x40013c, 0xffffffff);
-	nv_wr32(priv, 0x400500, 0x00010001);
+	nvkm_wr32(device, 0x400108, 0xffffffff);
+	nvkm_wr32(device, 0x400138, 0xffffffff);
+	nvkm_wr32(device, 0x400100, 0xffffffff);
+	nvkm_wr32(device, 0x40013c, 0xffffffff);
+	nvkm_wr32(device, 0x400500, 0x00010001);
 
 	/* upload context program, initialise ctxctl defaults */
-	ret = nv50_grctx_init(nv_device(priv), &priv->size);
+	ret = nv50_grctx_init(device, &gr->size);
 	if (ret)
 		return ret;
 
-	nv_wr32(priv, 0x400824, 0x00000000);
-	nv_wr32(priv, 0x400828, 0x00000000);
-	nv_wr32(priv, 0x40082c, 0x00000000);
-	nv_wr32(priv, 0x400830, 0x00000000);
-	nv_wr32(priv, 0x40032c, 0x00000000);
-	nv_wr32(priv, 0x400330, 0x00000000);
+	nvkm_wr32(device, 0x400824, 0x00000000);
+	nvkm_wr32(device, 0x400828, 0x00000000);
+	nvkm_wr32(device, 0x40082c, 0x00000000);
+	nvkm_wr32(device, 0x400830, 0x00000000);
+	nvkm_wr32(device, 0x40032c, 0x00000000);
+	nvkm_wr32(device, 0x400330, 0x00000000);
 
 	/* some unknown zcull magic */
-	switch (nv_device(priv)->chipset & 0xf0) {
+	switch (device->chipset & 0xf0) {
 	case 0x50:
 	case 0x80:
 	case 0x90:
-		nv_wr32(priv, 0x402ca8, 0x00000800);
+		nvkm_wr32(device, 0x402ca8, 0x00000800);
 		break;
 	case 0xa0:
 	default:
-		if (nv_device(priv)->chipset == 0xa0 ||
-		    nv_device(priv)->chipset == 0xaa ||
-		    nv_device(priv)->chipset == 0xac) {
-			nv_wr32(priv, 0x402ca8, 0x00000802);
+		if (device->chipset == 0xa0 ||
+		    device->chipset == 0xaa ||
+		    device->chipset == 0xac) {
+			nvkm_wr32(device, 0x402ca8, 0x00000802);
 		} else {
-			nv_wr32(priv, 0x402cc0, 0x00000000);
-			nv_wr32(priv, 0x402ca8, 0x00000002);
+			nvkm_wr32(device, 0x402cc0, 0x00000000);
+			nvkm_wr32(device, 0x402ca8, 0x00000002);
 		}
 
 		break;
@@ -979,21 +748,47 @@ nv50_gr_init(struct nvkm_object *object)
 
 	/* zero out zcull regions */
 	for (i = 0; i < 8; i++) {
-		nv_wr32(priv, 0x402c20 + (i * 0x10), 0x00000000);
-		nv_wr32(priv, 0x402c24 + (i * 0x10), 0x00000000);
-		nv_wr32(priv, 0x402c28 + (i * 0x10), 0x00000000);
-		nv_wr32(priv, 0x402c2c + (i * 0x10), 0x00000000);
+		nvkm_wr32(device, 0x402c20 + (i * 0x10), 0x00000000);
+		nvkm_wr32(device, 0x402c24 + (i * 0x10), 0x00000000);
+		nvkm_wr32(device, 0x402c28 + (i * 0x10), 0x00000000);
+		nvkm_wr32(device, 0x402c2c + (i * 0x10), 0x00000000);
 	}
+
 	return 0;
 }
 
-struct nvkm_oclass
-nv50_gr_oclass = {
-	.handle = NV_ENGINE(GR, 0x50),
-	.ofuncs = &(struct nvkm_ofuncs) {
-		.ctor = nv50_gr_ctor,
-		.dtor = _nvkm_gr_dtor,
-		.init = nv50_gr_init,
-		.fini = _nvkm_gr_fini,
-	},
+int
+nv50_gr_new_(const struct nvkm_gr_func *func, struct nvkm_device *device,
+	     int index, struct nvkm_gr **pgr)
+{
+	struct nv50_gr *gr;
+
+	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
+		return -ENOMEM;
+	spin_lock_init(&gr->lock);
+	*pgr = &gr->base;
+
+	return nvkm_gr_ctor(func, device, index, 0x00201000, true, &gr->base);
+}
+
+static const struct nvkm_gr_func
+nv50_gr = {
+	.init = nv50_gr_init,
+	.intr = nv50_gr_intr,
+	.chan_new = nv50_gr_chan_new,
+	.units = nv50_gr_units,
+	.sclass = {
+		{ -1, -1, 0x0030, &nv50_gr_object },
+		{ -1, -1, 0x502d, &nv50_gr_object },
+		{ -1, -1, 0x5039, &nv50_gr_object },
+		{ -1, -1, 0x5097, &nv50_gr_object },
+		{ -1, -1, 0x50c0, &nv50_gr_object },
+		{}
+	}
 };
+
+int
+nv50_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return nv50_gr_new_(&nv50_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h
index bcf786f6b731..45eec83a5969 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h
@@ -1,8 +1,34 @@
 #ifndef __NV50_GR_H__
 #define __NV50_GR_H__
-#include <engine/gr.h>
-struct nvkm_device;
-struct nvkm_gpuobj;
+#define nv50_gr(p) container_of((p), struct nv50_gr, base)
+#include "priv.h"
+
+struct nv50_gr {
+	struct nvkm_gr base;
+	const struct nv50_gr_func *func;
+	spinlock_t lock;
+	u32 size;
+};
+
+int nv50_gr_new_(const struct nvkm_gr_func *, struct nvkm_device *, int index,
+		 struct nvkm_gr **);
+int nv50_gr_init(struct nvkm_gr *);
+void nv50_gr_intr(struct nvkm_gr *);
+u64 nv50_gr_units(struct nvkm_gr *);
+
+int g84_gr_tlb_flush(struct nvkm_gr *);
+
+#define nv50_gr_chan(p) container_of((p), struct nv50_gr_chan, object)
+
+struct nv50_gr_chan {
+	struct nvkm_object object;
+	struct nv50_gr *gr;
+};
+
+int nv50_gr_chan_new(struct nvkm_gr *, struct nvkm_fifo_chan *,
+		     const struct nvkm_oclass *, struct nvkm_object **);
+
+extern const struct nvkm_object_func nv50_gr_object;
 
 int  nv50_grctx_init(struct nvkm_device *, u32 *size);
 void nv50_grctx_fill(struct nvkm_device *, struct nvkm_gpuobj *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
new file mode 100644
index 000000000000..a234590be88e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
@@ -0,0 +1,38 @@
+#ifndef __NVKM_GR_PRIV_H__
+#define __NVKM_GR_PRIV_H__
+#define nvkm_gr(p) container_of((p), struct nvkm_gr, engine)
+#include <engine/gr.h>
+#include <core/enum.h>
+struct nvkm_fb_tile;
+struct nvkm_fifo_chan;
+
+int nvkm_gr_ctor(const struct nvkm_gr_func *, struct nvkm_device *,
+		 int index, u32 pmc_enable, bool enable,
+		 struct nvkm_gr *);
+
+bool nv04_gr_idle(struct nvkm_gr *);
+
+struct nvkm_gr_func {
+	void *(*dtor)(struct nvkm_gr *);
+	int (*oneinit)(struct nvkm_gr *);
+	int (*init)(struct nvkm_gr *);
+	void (*intr)(struct nvkm_gr *);
+	void (*tile)(struct nvkm_gr *, int region, struct nvkm_fb_tile *);
+	int (*tlb_flush)(struct nvkm_gr *);
+	int (*chan_new)(struct nvkm_gr *, struct nvkm_fifo_chan *,
+			const struct nvkm_oclass *, struct nvkm_object **);
+	int (*object_get)(struct nvkm_gr *, int, struct nvkm_sclass *);
+	/* Returns chipset-specific counts of units packed into an u64.
+	 */
+	u64 (*units)(struct nvkm_gr *);
+	struct nvkm_sclass sclass[];
+};
+
+extern const struct nvkm_bitfield nv04_gr_nsource[];
+extern const struct nvkm_object_func nv04_gr_object;
+
+extern const struct nvkm_bitfield nv10_gr_intr_name[];
+extern const struct nvkm_bitfield nv10_gr_nstatus[];
+
+extern const struct nvkm_enum nv50_data_error_names[];
+#endif