115 files changed, 4549 insertions, 485 deletions
diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild
index 5e5617006da5..cf6b3a80c0c8 100644
--- a/drivers/gpu/drm/nouveau/Kbuild
+++ b/drivers/gpu/drm/nouveau/Kbuild
@@ -47,6 +47,9 @@ nouveau-y += nouveau_prime.o
 nouveau-y += nouveau_sgdma.o
 nouveau-y += nouveau_ttm.o
 nouveau-y += nouveau_vmm.o
+nouveau-y += nouveau_exec.o
+nouveau-y += nouveau_sched.o
+nouveau-y += nouveau_uvmm.o
 
 # DRM - modesetting
 nouveau-$(CONFIG_DRM_NOUVEAU_BACKLIGHT) += nouveau_backlight.o
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index a70bd65e1400..c52e8096cca4 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -10,6 +10,8 @@ config DRM_NOUVEAU
 	select DRM_KMS_HELPER
 	select DRM_TTM
 	select DRM_TTM_HELPER
+	select DRM_EXEC
+	select DRM_SCHED
 	select I2C
 	select I2C_ALGOBIT
 	select BACKLIGHT_CLASS_DEVICE if DRM_NOUVEAU_BACKLIGHT
diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index a6f2e681bde9..a34924523133 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -1122,11 +1122,18 @@ nv04_page_flip_emit(struct nouveau_channel *chan,
 	PUSH_NVSQ(push, NV_SW, NV_SW_PAGE_FLIP, 0x00000000);
 	PUSH_KICK(push);
 
-	ret = nouveau_fence_new(chan, false, pfence);
+	ret = nouveau_fence_new(pfence);
 	if (ret)
 		goto fail;
 
+	ret = nouveau_fence_emit(*pfence, chan);
+	if (ret)
+		goto fail_fence_unref;
+
 	return 0;
+
+fail_fence_unref:
+	nouveau_fence_unref(pfence);
 fail:
 	spin_lock_irqsave(&dev->event_lock, flags);
 	list_del(&s->head);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs507a.c b/drivers/gpu/drm/nouveau/dispnv50/curs507a.c
index 78ee32da01c8..a95ee5dcc2e3 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/curs507a.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/curs507a.c
@@ -29,6 +29,7 @@
 #include <nvhw/class/cl507a.h>
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_fourcc.h>
 
 bool
 curs507a_space(struct nv50_wndw *wndw)
@@ -99,6 +100,7 @@ curs507a_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
 {
 	struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev);
 	struct nv50_head *head = nv50_head(asyw->state.crtc);
+	struct drm_framebuffer *fb = asyw->state.fb;
 	int ret;
 
 	ret = drm_atomic_helper_check_plane_state(&asyw->state, &asyh->state,
@@ -124,11 +126,30 @@ curs507a_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
 		return -EINVAL;
 	}
 
+	if (asyw->image.pitch[0] != asyw->image.w * fb->format->cpp[0]) {
+		NV_ATOMIC(drm,
+			  "%s: invalid cursor image pitch: image must be packed (pitch = %d, width = %d)\n",
+			  wndw->plane.name, asyw->image.pitch[0], asyw->image.w);
+		return -EINVAL;
+	}
+
 	ret = head->func->curs_layout(head, asyw, asyh);
-	if (ret)
+	if (ret) {
+		NV_ATOMIC(drm,
+			  "%s: invalid cursor image size: unsupported size %dx%d\n",
+			  wndw->plane.name, asyw->image.w, asyw->image.h);
+		return ret;
+	}
+
+	ret = head->func->curs_format(head, asyw, asyh);
+	if (ret) {
+		NV_ATOMIC(drm,
+			  "%s: invalid cursor image format 0x%X\n",
+			  wndw->plane.name, fb->format->format);
 		return ret;
+	}
 
-	return head->func->curs_format(head, asyw, asyh);
+	return 0;
 }
 
 static const u32
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 42e1665ba11a..4e7c9c353c51 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -910,15 +910,19 @@ nv50_msto_prepare(struct drm_atomic_state *state,
 	struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev);
 	struct nv50_mstc *mstc = msto->mstc;
 	struct nv50_mstm *mstm = mstc->mstm;
-	struct drm_dp_mst_atomic_payload *payload;
+	struct drm_dp_mst_topology_state *old_mst_state;
+	struct drm_dp_mst_atomic_payload *payload, *old_payload;
 
 	NV_ATOMIC(drm, "%s: msto prepare\n", msto->encoder.name);
 
+	old_mst_state = drm_atomic_get_old_mst_topology_state(state, mgr);
+
 	payload = drm_atomic_get_mst_payload_state(mst_state, mstc->port);
+	old_payload = drm_atomic_get_mst_payload_state(old_mst_state, mstc->port);
 
 	// TODO: Figure out if we want to do a better job of handling VCPI allocation failures here?
 	if (msto->disabled) {
-		drm_dp_remove_payload(mgr, mst_state, payload, payload);
+		drm_dp_remove_payload(mgr, mst_state, old_payload, payload);
 
 		nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index, 0, 0, 0, 0);
 	} else {
@@ -1124,7 +1128,7 @@ nv50_mstc_mode_valid(struct drm_connector *connector,
 	 * MSTB's max possible PBN
 	 */
 
-	return nv50_dp_mode_valid(connector, outp, mode, NULL);
+	return nv50_dp_mode_valid(outp, mode, NULL);
 }
 
 static int
@@ -1873,6 +1877,8 @@ nv50_pior_destroy(struct drm_encoder *encoder)
 	nvif_outp_dtor(&nv_encoder->outp);
 
 	drm_encoder_cleanup(encoder);
+
+	mutex_destroy(&nv_encoder->dp.hpd_irq_lock);
 	kfree(encoder);
 }
 
@@ -1917,6 +1923,8 @@ nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe)
 	nv_encoder->i2c = ddc;
 	nv_encoder->aux = aux;
 
+	mutex_init(&nv_encoder->dp.hpd_irq_lock);
+
 	encoder = to_drm_encoder(nv_encoder);
 	encoder->possible_crtcs = dcbe->heads;
 	encoder->possible_clones = 0;
diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000c.h b/drivers/gpu/drm/nouveau/include/nvif/if000c.h
index 9c7ff56831c5..a5a182b3c28d 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/if000c.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/if000c.h
@@ -3,7 +3,10 @@
 struct nvif_vmm_v0 {
 	__u8  version;
 	__u8  page_nr;
-	__u8  managed;
+#define NVIF_VMM_V0_TYPE_UNMANAGED                                         0x00
+#define NVIF_VMM_V0_TYPE_MANAGED                                           0x01
+#define NVIF_VMM_V0_TYPE_RAW                                               0x02
+	__u8  type;
 	__u8  pad03[5];
 	__u64 addr;
 	__u64 size;
@@ -17,6 +20,7 @@ struct nvif_vmm_v0 {
 #define NVIF_VMM_V0_UNMAP                                                  0x04
 #define NVIF_VMM_V0_PFNMAP                                                 0x05
 #define NVIF_VMM_V0_PFNCLR                                                 0x06
+#define NVIF_VMM_V0_RAW                                                    0x07
 #define NVIF_VMM_V0_MTHD(i)                                         ((i) + 0x80)
 
 struct nvif_vmm_page_v0 {
@@ -66,6 +70,26 @@ struct nvif_vmm_unmap_v0 {
 	__u64 addr;
 };
 
+struct nvif_vmm_raw_v0 {
+	__u8 version;
+#define NVIF_VMM_RAW_V0_GET	0x0
+#define NVIF_VMM_RAW_V0_PUT	0x1
+#define NVIF_VMM_RAW_V0_MAP	0x2
+#define NVIF_VMM_RAW_V0_UNMAP	0x3
+#define NVIF_VMM_RAW_V0_SPARSE	0x4
+	__u8  op;
+	__u8  sparse;
+	__u8  ref;
+	__u8  shift;
+	__u32 argc;
+	__u8  pad01[7];
+	__u64 addr;
+	__u64 size;
+	__u64 offset;
+	__u64 memory;
+	__u64 argv;
+};
+
 struct nvif_vmm_pfnmap_v0 {
 	__u8  version;
 	__u8  page;
diff --git a/drivers/gpu/drm/nouveau/include/nvif/vmm.h b/drivers/gpu/drm/nouveau/include/nvif/vmm.h
index a2ee92201ace..0ecedd0ee0a5 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/vmm.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/vmm.h
@@ -4,6 +4,12 @@
 struct nvif_mem;
 struct nvif_mmu;
 
+enum nvif_vmm_type {
+	UNMANAGED,
+	MANAGED,
+	RAW,
+};
+
 enum nvif_vmm_get {
 	ADDR,
 	PTES,
@@ -30,8 +36,9 @@ struct nvif_vmm {
 	int page_nr;
 };
 
-int nvif_vmm_ctor(struct nvif_mmu *, const char *name, s32 oclass, bool managed,
-		  u64 addr, u64 size, void *argv, u32 argc, struct nvif_vmm *);
+int nvif_vmm_ctor(struct nvif_mmu *, const char *name, s32 oclass,
+		  enum nvif_vmm_type, u64 addr, u64 size, void *argv, u32 argc,
+		  struct nvif_vmm *);
 void nvif_vmm_dtor(struct nvif_vmm *);
 int nvif_vmm_get(struct nvif_vmm *, enum nvif_vmm_get, bool sparse,
 		 u8 page, u8 align, u64 size, struct nvif_vma *);
@@ -39,4 +46,12 @@ void nvif_vmm_put(struct nvif_vmm *, struct nvif_vma *);
 int nvif_vmm_map(struct nvif_vmm *, u64 addr, u64 size, void *argv, u32 argc,
 		 struct nvif_mem *, u64 offset);
 int nvif_vmm_unmap(struct nvif_vmm *, u64);
+
+int nvif_vmm_raw_get(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift);
+int nvif_vmm_raw_put(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift);
+int nvif_vmm_raw_map(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift,
+		     void *argv, u32 argc, struct nvif_mem *mem, u64 offset);
+int nvif_vmm_raw_unmap(struct nvif_vmm *vmm, u64 addr, u64 size,
+		       u8 shift, bool sparse);
+int nvif_vmm_raw_sparse(struct nvif_vmm *vmm, u64 addr, u64 size, bool ref);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
index b67b9c1a6b4e..738899fcf30b 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
@@ -3,7 +3,7 @@
 #define __NVKM_ENGINE_H__
 #define nvkm_engine(p) container_of((p), struct nvkm_engine, subdev)
 #include <core/subdev.h>
-struct nvkm_fifo_chan;
+struct nvkm_chan;
 struct nvkm_fb_tile;
 
 extern const struct nvkm_subdev_func nvkm_engine;
@@ -22,6 +22,7 @@ struct nvkm_engine_func {
 	int (*init)(struct nvkm_engine *);
 	int (*fini)(struct nvkm_engine *, bool suspend);
 	int (*reset)(struct nvkm_engine *);
+	int (*nonstall)(struct nvkm_engine *);
 	void (*intr)(struct nvkm_engine *);
 	void (*tile)(struct nvkm_engine *, int region, struct nvkm_fb_tile *);
 	bool (*chsw_load)(struct nvkm_engine *);
@@ -32,8 +33,7 @@ struct nvkm_engine_func {
 	} base;
 
 	struct {
-		int (*cclass)(struct nvkm_fifo_chan *,
-			      const struct nvkm_oclass *,
+		int (*cclass)(struct nvkm_chan *, const struct nvkm_oclass *,
 			      struct nvkm_object **);
 		int (*sclass)(struct nvkm_oclass *, int index);
 	} fifo;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/os.h b/drivers/gpu/drm/nouveau/include/nvkm/core/os.h
index 4486d9862849..3fd5c007a663 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/os.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/os.h
@@ -49,9 +49,4 @@ nvkm_blob_dtor(struct nvkm_blob *blob)
 	(p = container_of((h), typeof(*p), m), nvkm_list_find_next(p, (h), m, (c)))
 #define nvkm_list_foreach(p,h,m,c)                                                           \
 	for (p = nvkm_list_find(p, (h), m, (c)); p; p = nvkm_list_find_next(p, (h), m, (c)))
-
-/*FIXME: remove after */
-#define nvkm_fifo_chan nvkm_chan
-#define nvkm_fifo_chan_func nvkm_chan_func
-#define nvkm_fifo_cgrp nvkm_cgrp
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h
index cd86d9198e4a..b7bb8a29a729 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h
@@ -3,7 +3,7 @@
 #define __NVKM_FLCNEN_H__
 #define nvkm_falcon(p) container_of((p), struct nvkm_falcon, engine)
 #include <core/engine.h>
-struct nvkm_fifo_chan;
+struct nvkm_chan;
 
 enum nvkm_falcon_dmaidx {
 	FALCON_DMAIDX_UCODE		= 0,
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
index 01a22a13b452..1755b0df3cc1 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
@@ -59,6 +59,7 @@ struct nvkm_fb {
 	struct nvkm_memory *mmu_wr;
 };
 
+u64 nvkm_fb_vidmem_size(struct nvkm_device *);
 int nvkm_fb_mem_unlock(struct nvkm_fb *);
 
 void nvkm_fb_tile_init(struct nvkm_fb *, int region, u32 addr, u32 size,
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h
index 40a1065ae626..ef441dfdea09 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h
@@ -16,7 +16,7 @@ struct nvkm_i2c_bus {
 	const struct nvkm_i2c_bus_func *func;
 	struct nvkm_i2c_pad *pad;
 #define NVKM_I2C_BUS_CCB(n) /* 'n' is ccb index */                           (n)
-#define NVKM_I2C_BUS_EXT(n) /* 'n' is dcb external encoder type */ ((n) + 0x100)
+#define NVKM_I2C_BUS_EXT(n) /* 'n' is dcb external encoder type */  ((n) + 0x10)
 #define NVKM_I2C_BUS_PRI /* ccb primary comm. port */                        -1
 #define NVKM_I2C_BUS_SEC /* ccb secondary comm. port */                      -2
 	int id;
@@ -38,7 +38,7 @@ struct nvkm_i2c_aux {
 	const struct nvkm_i2c_aux_func *func;
 	struct nvkm_i2c_pad *pad;
 #define NVKM_I2C_AUX_CCB(n) /* 'n' is ccb index */                           (n)
-#define NVKM_I2C_AUX_EXT(n) /* 'n' is dcb external encoder type */ ((n) + 0x100)
+#define NVKM_I2C_AUX_EXT(n) /* 'n' is dcb external encoder type */  ((n) + 0x10)
 	int id;
 
 	struct mutex mutex;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
index 70e7887ef4b4..2fd2f2433fc7 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
@@ -17,6 +17,7 @@ struct nvkm_vma {
 	bool part:1; /* Region was split from an allocated region by map(). */
 	bool busy:1; /* Region busy (for temporarily preventing user access). */
 	bool mapped:1; /* Region contains valid pages. */
+	bool no_comp:1; /* Force no memory compression. */
 	struct nvkm_memory *memory; /* Memory currently mapped into VMA. */
 	struct nvkm_tags *tags; /* Compression tag reference. */
 };
@@ -27,10 +28,26 @@ struct nvkm_vmm {
 	const char *name;
 	u32 debug;
 	struct kref kref;
-	struct mutex mutex;
+
+	struct {
+		struct mutex vmm;
+		struct mutex ref;
+		struct mutex map;
+	} mutex;
 
 	u64 start;
 	u64 limit;
+	struct {
+		struct {
+			u64 addr;
+			u64 size;
+		} p;
+		struct {
+			u64 addr;
+			u64 size;
+		} n;
+		bool raw;
+	} managed;
 
 	struct nvkm_vmm_pt *pd;
 	struct list_head join;
@@ -70,6 +87,7 @@ struct nvkm_vmm_map {
 
 	const struct nvkm_vmm_page *page;
 
+	bool no_comp;
 	struct nvkm_tags *tags;
 	u64 next;
 	u64 type;
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 82dab51d8aeb..30afbec9e3b1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -35,6 +35,7 @@
 #include "nouveau_chan.h"
 #include "nouveau_abi16.h"
 #include "nouveau_vmm.h"
+#include "nouveau_sched.h"
 
 static struct nouveau_abi16 *
 nouveau_abi16(struct drm_file *file_priv)
@@ -125,6 +126,17 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
 {
 	struct nouveau_abi16_ntfy *ntfy, *temp;
 
+	/* When a client exits without waiting for it's queued up jobs to
+	 * finish it might happen that we fault the channel. This is due to
+	 * drm_file_free() calling drm_gem_release() before the postclose()
+	 * callback. Hence, we can't tear down this scheduler entity before
+	 * uvmm mappings are unmapped. Currently, we can't detect this case.
+	 *
+	 * However, this should be rare and harmless, since the channel isn't
+	 * needed anymore.
+	 */
+	nouveau_sched_entity_fini(&chan->sched_entity);
+
 	/* wait for all activity to stop before cleaning up */
 	if (chan->chan)
 		nouveau_channel_idle(chan->chan);
@@ -261,6 +273,13 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 	if (!drm->channel)
 		return nouveau_abi16_put(abi16, -ENODEV);
 
+	/* If uvmm wasn't initialized until now disable it completely to prevent
+	 * userspace from mixing up UAPIs.
+	 *
+	 * The client lock is already acquired by nouveau_abi16_get().
+	 */
+	__nouveau_cli_disable_uvmm_noinit(cli);
+
 	device = &abi16->device;
 	engine = NV_DEVICE_HOST_RUNLIST_ENGINES_GR;
 
@@ -304,6 +323,11 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 	if (ret)
 		goto done;
 
+	ret = nouveau_sched_entity_init(&chan->sched_entity, &drm->sched,
+					drm->sched_wq);
+	if (ret)
+		goto done;
+
 	init->channel = chan->chan->chid;
 
 	if (device->info.family >= NV_DEVICE_INFO_V0_TESLA)
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h
index 27eae85f33e6..9f538486c10e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.h
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h
@@ -26,6 +26,7 @@ struct nouveau_abi16_chan {
 	struct nouveau_bo *ntfy;
 	struct nouveau_vma *ntfy_vma;
 	struct nvkm_mm  heap;
+	struct nouveau_sched_entity sched_entity;
 };
 
 struct nouveau_abi16 {
@@ -43,28 +44,6 @@ int  nouveau_abi16_usif(struct drm_file *, void *data, u32 size);
 #define NOUVEAU_GEM_DOMAIN_VRAM      (1 << 1)
 #define NOUVEAU_GEM_DOMAIN_GART      (1 << 2)
 
-struct drm_nouveau_channel_alloc {
-	uint32_t     fb_ctxdma_handle;
-	uint32_t     tt_ctxdma_handle;
-
-	int          channel;
-	uint32_t     pushbuf_domains;
-
-	/* Notifier memory */
-	uint32_t     notifier_handle;
-
-	/* DRM-enforced subchannel assignments */
-	struct {
-		uint32_t handle;
-		uint32_t grclass;
-	} subchan[8];
-	uint32_t nr_subchan;
-};
-
-struct drm_nouveau_channel_free {
-	int channel;
-};
-
 struct drm_nouveau_grobj_alloc {
 	int      channel;
 	uint32_t handle;
@@ -83,31 +62,12 @@ struct drm_nouveau_gpuobj_free {
 	uint32_t handle;
 };
 
-#define NOUVEAU_GETPARAM_PCI_VENDOR      3
-#define NOUVEAU_GETPARAM_PCI_DEVICE      4
-#define NOUVEAU_GETPARAM_BUS_TYPE        5
-#define NOUVEAU_GETPARAM_FB_SIZE         8
-#define NOUVEAU_GETPARAM_AGP_SIZE        9
-#define NOUVEAU_GETPARAM_CHIPSET_ID      11
-#define NOUVEAU_GETPARAM_VM_VRAM_BASE    12
-#define NOUVEAU_GETPARAM_GRAPH_UNITS     13
-#define NOUVEAU_GETPARAM_PTIMER_TIME     14
-#define NOUVEAU_GETPARAM_HAS_BO_USAGE    15
-#define NOUVEAU_GETPARAM_HAS_PAGEFLIP    16
-struct drm_nouveau_getparam {
-	uint64_t param;
-	uint64_t value;
-};
-
 struct drm_nouveau_setparam {
 	uint64_t param;
 	uint64_t value;
 };
 
-#define DRM_IOCTL_NOUVEAU_GETPARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GETPARAM, struct drm_nouveau_getparam)
 #define DRM_IOCTL_NOUVEAU_SETPARAM           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SETPARAM, struct drm_nouveau_setparam)
-#define DRM_IOCTL_NOUVEAU_CHANNEL_ALLOC      DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_ALLOC, struct drm_nouveau_channel_alloc)
-#define DRM_IOCTL_NOUVEAU_CHANNEL_FREE       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_CHANNEL_FREE, struct drm_nouveau_channel_free)
 #define DRM_IOCTL_NOUVEAU_GROBJ_ALLOC        DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GROBJ_ALLOC, struct drm_nouveau_grobj_alloc)
 #define DRM_IOCTL_NOUVEAU_NOTIFIEROBJ_ALLOC  DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_NOTIFIEROBJ_ALLOC, struct drm_nouveau_notifierobj_alloc)
 #define DRM_IOCTL_NOUVEAU_GPUOBJ_FREE        DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GPUOBJ_FREE, struct drm_nouveau_gpuobj_free)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index c2ec91cc845d..19cab37ac69c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -199,12 +199,12 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, int *align, u64 *size)
 
 struct nouveau_bo *
 nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain,
-		 u32 tile_mode, u32 tile_flags)
+		 u32 tile_mode, u32 tile_flags, bool internal)
 {
 	struct nouveau_drm *drm = cli->drm;
 	struct nouveau_bo *nvbo;
 	struct nvif_mmu *mmu = &cli->mmu;
-	struct nvif_vmm *vmm = cli->svm.cli ? &cli->svm.vmm : &cli->vmm.vmm;
+	struct nvif_vmm *vmm = &nouveau_cli_vmm(cli)->vmm;
 	int i, pi = -1;
 
 	if (!*size) {
@@ -215,6 +215,7 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain,
 	nvbo = kzalloc(sizeof(struct nouveau_bo), GFP_KERNEL);
 	if (!nvbo)
 		return ERR_PTR(-ENOMEM);
+
 	INIT_LIST_HEAD(&nvbo->head);
 	INIT_LIST_HEAD(&nvbo->entry);
 	INIT_LIST_HEAD(&nvbo->vma_list);
@@ -232,68 +233,103 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain,
 			nvbo->force_coherent = true;
 	}
 
-	if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) {
-		nvbo->kind = (tile_flags & 0x0000ff00) >> 8;
-		if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
-			kfree(nvbo);
-			return ERR_PTR(-EINVAL);
+	nvbo->contig = !(tile_flags & NOUVEAU_GEM_TILE_NONCONTIG);
+	if (!nouveau_cli_uvmm(cli) || internal) {
+		/* for BO noVM allocs, don't assign kinds */
+		if (cli->device.info.family >= NV_DEVICE_INFO_V0_FERMI) {
+			nvbo->kind = (tile_flags & 0x0000ff00) >> 8;
+			if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
+				kfree(nvbo);
+				return ERR_PTR(-EINVAL);
+			}
+
+			nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind;
+		} else if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+			nvbo->kind = (tile_flags & 0x00007f00) >> 8;
+			nvbo->comp = (tile_flags & 0x00030000) >> 16;
+			if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
+				kfree(nvbo);
+				return ERR_PTR(-EINVAL);
+			}
+		} else {
+			nvbo->zeta = (tile_flags & 0x00000007);
 		}
+		nvbo->mode = tile_mode;
+
+		/* Determine the desirable target GPU page size for the buffer. */
+		for (i = 0; i < vmm->page_nr; i++) {
+			/* Because we cannot currently allow VMM maps to fail
+			 * during buffer migration, we need to determine page
+			 * size for the buffer up-front, and pre-allocate its
+			 * page tables.
+			 *
+			 * Skip page sizes that can't support needed domains.
+			 */
+			if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
+			    (domain & NOUVEAU_GEM_DOMAIN_VRAM) && !vmm->page[i].vram)
+				continue;
+			if ((domain & NOUVEAU_GEM_DOMAIN_GART) &&
+			    (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
+				continue;
 
-		nvbo->comp = mmu->kind[nvbo->kind] != nvbo->kind;
-	} else
-	if (cli->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		nvbo->kind = (tile_flags & 0x00007f00) >> 8;
-		nvbo->comp = (tile_flags & 0x00030000) >> 16;
-		if (!nvif_mmu_kind_valid(mmu, nvbo->kind)) {
+			/* Select this page size if it's the first that supports
+			 * the potential memory domains, or when it's compatible
+			 * with the requested compression settings.
+			 */
+			if (pi < 0 || !nvbo->comp || vmm->page[i].comp)
+				pi = i;
+
+			/* Stop once the buffer is larger than the current page size. */
+			if (*size >= 1ULL << vmm->page[i].shift)
+				break;
+		}
+
+		if (WARN_ON(pi < 0)) {
 			kfree(nvbo);
 			return ERR_PTR(-EINVAL);
 		}
-	} else {
-		nvbo->zeta = (tile_flags & 0x00000007);
-	}
-	nvbo->mode = tile_mode;
-	nvbo->contig = !(tile_flags & NOUVEAU_GEM_TILE_NONCONTIG);
-
-	/* Determine the desirable target GPU page size for the buffer. */
-	for (i = 0; i < vmm->page_nr; i++) {
-		/* Because we cannot currently allow VMM maps to fail
-		 * during buffer migration, we need to determine page
-		 * size for the buffer up-front, and pre-allocate its
-		 * page tables.
-		 *
-		 * Skip page sizes that can't support needed domains.
-		 */
-		if (cli->device.info.family > NV_DEVICE_INFO_V0_CURIE &&
-		    (domain & NOUVEAU_GEM_DOMAIN_VRAM) && !vmm->page[i].vram)
-			continue;
-		if ((domain & NOUVEAU_GEM_DOMAIN_GART) &&
-		    (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
-			continue;
 
-		/* Select this page size if it's the first that supports
-		 * the potential memory domains, or when it's compatible
-		 * with the requested compression settings.
-		 */
-		if (pi < 0 || !nvbo->comp || vmm->page[i].comp)
-			pi = i;
-
-		/* Stop once the buffer is larger than the current page size. */
-		if (*size >= 1ULL << vmm->page[i].shift)
-			break;
-	}
+		/* Disable compression if suitable settings couldn't be found. */
+		if (nvbo->comp && !vmm->page[pi].comp) {
+			if (mmu->object.oclass >= NVIF_CLASS_MMU_GF100)
+				nvbo->kind = mmu->kind[nvbo->kind];
+			nvbo->comp = 0;
+		}
+		nvbo->page = vmm->page[pi].shift;
+	} else {
+		/* reject other tile flags when in VM mode. */
+		if (tile_mode)
+			return ERR_PTR(-EINVAL);
+		if (tile_flags & ~NOUVEAU_GEM_TILE_NONCONTIG)
+			return ERR_PTR(-EINVAL);
 
-	if (WARN_ON(pi < 0)) {
-		kfree(nvbo);
-		return ERR_PTR(-EINVAL);
-	}
+		/* Determine the desirable target GPU page size for the buffer. */
+		for (i = 0; i < vmm->page_nr; i++) {
+			/* Because we cannot currently allow VMM maps to fail
+			 * during buffer migration, we need to determine page
+			 * size for the buffer up-front, and pre-allocate its
+			 * page tables.
+			 *
+			 * Skip page sizes that can't support needed domains.
+			 */
+			if ((domain & NOUVEAU_GEM_DOMAIN_VRAM) && !vmm->page[i].vram)
+				continue;
+			if ((domain & NOUVEAU_GEM_DOMAIN_GART) &&
+			    (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
+				continue;
 
-	/* Disable compression if suitable settings couldn't be found. */
-	if (nvbo->comp && !vmm->page[pi].comp) {
-		if (mmu->object.oclass >= NVIF_CLASS_MMU_GF100)
-			nvbo->kind = mmu->kind[nvbo->kind];
-		nvbo->comp = 0;
+			if (pi < 0)
+				pi = i;
+			/* Stop once the buffer is larger than the current page size. */
+			if (*size >= 1ULL << vmm->page[i].shift)
+				break;
+		}
+		if (WARN_ON(pi < 0)) {
+			kfree(nvbo);
+			return ERR_PTR(-EINVAL);
+		}
+		nvbo->page = vmm->page[pi].shift;
 	}
-	nvbo->page = vmm->page[pi].shift;
 
 	nouveau_bo_fixup_align(nvbo, align, size);
 
@@ -306,18 +342,26 @@ nouveau_bo_init(struct nouveau_bo *nvbo, u64 size, int align, u32 domain,
 {
 	int type = sg ? ttm_bo_type_sg : ttm_bo_type_device;
 	int ret;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = false,
+		.no_wait_gpu = false,
+		.resv = robj,
+	};
 
 	nouveau_bo_placement_set(nvbo, domain, 0);
 	INIT_LIST_HEAD(&nvbo->io_reserve_lru);
 
-	ret = ttm_bo_init_validate(nvbo->bo.bdev, &nvbo->bo, type,
-				   &nvbo->placement, align >> PAGE_SHIFT, false,
+	ret = ttm_bo_init_reserved(nvbo->bo.bdev, &nvbo->bo, type,
+				   &nvbo->placement, align >> PAGE_SHIFT, &ctx,
 				   sg, robj, nouveau_bo_del_ttm);
 	if (ret) {
 		/* ttm will call nouveau_bo_del_ttm if it fails.. */
 		return ret;
 	}
 
+	if (!robj)
+		ttm_bo_unreserve(&nvbo->bo);
+
 	return 0;
 }
 
@@ -331,7 +375,7 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
 	int ret;
 
 	nvbo = nouveau_bo_alloc(cli, &size, &align, domain, tile_mode,
-				tile_flags);
+				tile_flags, true);
 	if (IS_ERR(nvbo))
 		return PTR_ERR(nvbo);
 
@@ -339,6 +383,11 @@ nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
 	dma_resv_init(&nvbo->bo.base._resv);
 	drm_vma_node_reset(&nvbo->bo.base.vma_node);
 
+	/* This must be called before ttm_bo_init_reserved(). Subsequent
+	 * bo_move() callbacks might already iterate the GEMs GPUVA list.
+	 */
+	drm_gem_gpuva_init(&nvbo->bo.base);
+
 	ret = nouveau_bo_init(nvbo, size, align, domain, sg, robj);
 	if (ret)
 		return ret;
@@ -817,29 +866,39 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict,
 		mutex_lock(&cli->mutex);
 	else
 		mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
+
 	ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, ctx->interruptible);
-	if (ret == 0) {
-		ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
-		if (ret == 0) {
-			ret = nouveau_fence_new(chan, false, &fence);
-			if (ret == 0) {
-				/* TODO: figure out a better solution here
-				 *
-				 * wait on the fence here explicitly as going through
-				 * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
-				 *
-				 * Without this the operation can timeout and we'll fallback to a
-				 * software copy, which might take several minutes to finish.
-				 */
-				nouveau_fence_wait(fence, false, false);
-				ret = ttm_bo_move_accel_cleanup(bo,
-								&fence->base,
-								evict, false,
-								new_reg);
-				nouveau_fence_unref(&fence);
-			}
-		}
+	if (ret)
+		goto out_unlock;
+
+	ret = drm->ttm.move(chan, bo, bo->resource, new_reg);
+	if (ret)
+		goto out_unlock;
+
+	ret = nouveau_fence_new(&fence);
+	if (ret)
+		goto out_unlock;
+
+	ret = nouveau_fence_emit(fence, chan);
+	if (ret) {
+		nouveau_fence_unref(&fence);
+		goto out_unlock;
 	}
+
+	/* TODO: figure out a better solution here
+	 *
+	 * wait on the fence here explicitly as going through
+	 * ttm_bo_move_accel_cleanup somehow doesn't seem to do it.
+	 *
+	 * Without this the operation can timeout and we'll fallback to a
+	 * software copy, which might take several minutes to finish.
+	 */
+	nouveau_fence_wait(fence, false, false);
+	ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false,
+					new_reg);
+	nouveau_fence_unref(&fence);
+
+out_unlock:
 	mutex_unlock(&cli->mutex);
 	return ret;
 }
@@ -935,6 +994,7 @@ static void nouveau_bo_move_ntfy(struct ttm_buffer_object *bo,
 		list_for_each_entry(vma, &nvbo->vma_list, head) {
 			nouveau_vma_map(vma, mem);
 		}
+		nouveau_uvmm_bo_map_all(nvbo, mem);
 	} else {
 		list_for_each_entry(vma, &nvbo->vma_list, head) {
 			ret = dma_resv_wait_timeout(bo->base.resv,
@@ -943,6 +1003,7 @@ static void nouveau_bo_move_ntfy(struct ttm_buffer_object *bo,
 			WARN_ON(ret <= 0);
 			nouveau_vma_unmap(vma);
 		}
+		nouveau_uvmm_bo_unmap_all(nvbo);
 	}
 
 	if (new_reg)
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
index 774dd93ca76b..07f671cf895e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -26,6 +26,7 @@ struct nouveau_bo {
 	struct list_head entry;
 	int pbbo_index;
 	bool validate_mapped;
+	bool no_share;
 
 	/* GPU address space is independent of CPU word size */
 	uint64_t offset;
@@ -73,7 +74,7 @@ extern struct ttm_device_funcs nouveau_bo_driver;
 
 void nouveau_bo_move_init(struct nouveau_drm *);
 struct nouveau_bo *nouveau_bo_alloc(struct nouveau_cli *, u64 *size, int *align,
-				    u32 domain, u32 tile_mode, u32 tile_flags);
+				    u32 domain, u32 tile_mode, u32 tile_flags, bool internal);
 int  nouveau_bo_init(struct nouveau_bo *, u64 size, int align, u32 domain,
 		     struct sg_table *sg, struct dma_resv *robj);
 int  nouveau_bo_new(struct nouveau_cli *, u64 size, int align, u32 domain,
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index e648ecd0c1a0..1fd5ccf41128 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -40,6 +40,14 @@ MODULE_PARM_DESC(vram_pushbuf, "Create DMA push buffers in VRAM");
 int nouveau_vram_pushbuf;
 module_param_named(vram_pushbuf, nouveau_vram_pushbuf, int, 0400);
 
+void
+nouveau_channel_kill(struct nouveau_channel *chan)
+{
+	atomic_set(&chan->killed, 1);
+	if (chan->fence)
+		nouveau_fence_context_kill(chan->fence, -ENODEV);
+}
+
 static int
 nouveau_channel_killed(struct nvif_event *event, void *repv, u32 repc)
 {
@@ -47,9 +55,9 @@ nouveau_channel_killed(struct nvif_event *event, void *repv, u32 repc)
 	struct nouveau_cli *cli = (void *)chan->user.client;
 
 	NV_PRINTK(warn, cli, "channel %d killed!\n", chan->chid);
-	atomic_set(&chan->killed, 1);
-	if (chan->fence)
-		nouveau_fence_context_kill(chan->fence, -ENODEV);
+
+	if (unlikely(!atomic_read(&chan->killed)))
+		nouveau_channel_kill(chan);
 
 	return NVIF_EVENT_DROP;
 }
@@ -62,9 +70,11 @@ nouveau_channel_idle(struct nouveau_channel *chan)
 		struct nouveau_fence *fence = NULL;
 		int ret;
 
-		ret = nouveau_fence_new(chan, false, &fence);
+		ret = nouveau_fence_new(&fence);
 		if (!ret) {
-			ret = nouveau_fence_wait(fence, false, false);
+			ret = nouveau_fence_emit(fence, chan);
+			if (!ret)
+				ret = nouveau_fence_wait(fence, false, false);
 			nouveau_fence_unref(&fence);
 		}
 
@@ -90,6 +100,7 @@ nouveau_channel_del(struct nouveau_channel **pchan)
 		if (cli)
 			nouveau_svmm_part(chan->vmm->svmm, chan->inst);
 
+		nvif_object_dtor(&chan->blit);
 		nvif_object_dtor(&chan->nvsw);
 		nvif_object_dtor(&chan->gart);
 		nvif_object_dtor(&chan->vram);
@@ -148,7 +159,7 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device,
 
 	chan->device = device;
 	chan->drm = drm;
-	chan->vmm = cli->svm.cli ? &cli->svm : &cli->vmm;
+	chan->vmm = nouveau_cli_vmm(cli);
 	atomic_set(&chan->killed, 0);
 
 	/* allocate memory for dma push buffer */
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h
index e06a8ffed31a..5de2ef4e98c2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.h
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.h
@@ -53,6 +53,7 @@ struct nouveau_channel {
 	u32 user_put;
 
 	struct nvif_object user;
+	struct nvif_object blit;
 
 	struct nvif_event kill;
 	atomic_t killed;
@@ -65,6 +66,7 @@ int  nouveau_channel_new(struct nouveau_drm *, struct nvif_device *, bool priv,
 			 u32 vram, u32 gart, struct nouveau_channel **);
 void nouveau_channel_del(struct nouveau_channel **);
 int  nouveau_channel_idle(struct nouveau_channel *);
+void nouveau_channel_kill(struct nouveau_channel *);
 
 extern int nouveau_vram_pushbuf;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index f75c6f09dd2a..79ea30aac31f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -619,7 +619,10 @@ nouveau_connector_detect(struct drm_connector *connector, bool force)
 
 		nouveau_connector_set_encoder(connector, nv_encoder);
 		conn_status = connector_status_connected;
-		drm_dp_cec_set_edid(&nv_connector->aux, nv_connector->edid);
+
+		if (nv_encoder->dcb->type == DCB_OUTPUT_DP)
+			drm_dp_cec_set_edid(&nv_connector->aux, nv_connector->edid);
+
 		goto out;
 	} else {
 		nouveau_connector_set_edid(nv_connector, NULL);
@@ -967,7 +970,7 @@ nouveau_connector_get_modes(struct drm_connector *connector)
 	/* Determine display colour depth for everything except LVDS now,
 	 * DP requires this before mode_valid() is called.
 	 */
-	if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS && nv_connector->native_mode)
+	if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS)
 		nouveau_connector_detect_depth(connector);
 
 	/* Find the native mode if this is a digital panel, if we didn't
@@ -1079,7 +1082,7 @@ nouveau_connector_mode_valid(struct drm_connector *connector,
 	case DCB_OUTPUT_TV:
 		return get_slave_funcs(encoder)->mode_valid(encoder, mode);
 	case DCB_OUTPUT_DP:
-		return nv50_dp_mode_valid(connector, nv_encoder, mode, NULL);
+		return nv50_dp_mode_valid(nv_encoder, mode, NULL);
 	default:
 		BUG();
 		return MODE_BAD;
@@ -1408,8 +1411,7 @@ nouveau_connector_create(struct drm_device *dev,
 		ret = nvif_conn_ctor(&disp->disp, nv_connector->base.name, nv_connector->index,
 				     &nv_connector->conn);
 		if (ret) {
-			kfree(nv_connector);
-			return ERR_PTR(ret);
+			goto drm_conn_err;
 		}
 
 		ret = nvif_conn_event_ctor(&nv_connector->conn, "kmsHotplug",
@@ -1426,8 +1428,7 @@ nouveau_connector_create(struct drm_device *dev,
 			if (ret) {
 				nvif_event_dtor(&nv_connector->hpd);
 				nvif_conn_dtor(&nv_connector->conn);
-				kfree(nv_connector);
-				return ERR_PTR(ret);
+				goto drm_conn_err;
 			}
 		}
 	}
@@ -1475,4 +1476,9 @@ nouveau_connector_create(struct drm_device *dev,
 
 	drm_connector_register(connector);
 	return connector;
+
+drm_conn_err:
+	drm_connector_cleanup(connector);
+	kfree(nv_connector);
+	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index 99d022a91afc..053f703f2f68 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -203,6 +203,44 @@ nouveau_debugfs_pstate_open(struct inode *inode, struct file *file)
 	return single_open(file, nouveau_debugfs_pstate_get, inode->i_private);
 }
 
+static void
+nouveau_debugfs_gpuva_regions(struct seq_file *m, struct nouveau_uvmm *uvmm)
+{
+	MA_STATE(mas, &uvmm->region_mt, 0, 0);
+	struct nouveau_uvma_region *reg;
+
+	seq_puts  (m, " VA regions  | start              | range              | end                \n");
+	seq_puts  (m, "----------------------------------------------------------------------------\n");
+	mas_for_each(&mas, reg, ULONG_MAX)
+		seq_printf(m, "             | 0x%016llx | 0x%016llx | 0x%016llx\n",
+			   reg->va.addr, reg->va.range, reg->va.addr + reg->va.range);
+}
+
+static int
+nouveau_debugfs_gpuva(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct nouveau_drm *drm = nouveau_drm(node->minor->dev);
+	struct nouveau_cli *cli;
+
+	mutex_lock(&drm->clients_lock);
+	list_for_each_entry(cli, &drm->clients, head) {
+		struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
+
+		if (!uvmm)
+			continue;
+
+		nouveau_uvmm_lock(uvmm);
+		drm_debugfs_gpuva_info(m, &uvmm->umgr);
+		seq_puts(m, "\n");
+		nouveau_debugfs_gpuva_regions(m, uvmm);
+		nouveau_uvmm_unlock(uvmm);
+	}
+	mutex_unlock(&drm->clients_lock);
+
+	return 0;
+}
+
 static const struct file_operations nouveau_pstate_fops = {
 	.owner = THIS_MODULE,
 	.open = nouveau_debugfs_pstate_open,
@@ -214,6 +252,7 @@ static const struct file_operations nouveau_pstate_fops = {
 static struct drm_info_list nouveau_debugfs_list[] = {
 	{ "vbios.rom",  nouveau_debugfs_vbios_image, 0, NULL },
 	{ "strap_peek", nouveau_debugfs_strap_peek, 0, NULL },
+	DRM_DEBUGFS_GPUVA_INFO(nouveau_debugfs_gpuva, NULL),
 };
 #define NOUVEAU_DEBUGFS_ENTRIES ARRAY_SIZE(nouveau_debugfs_list)
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index ec3ffff487fc..99977e5fe716 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -465,7 +465,8 @@ nouveau_display_hpd_work(struct work_struct *work)
 	struct drm_connector *connector;
 	struct drm_connector_list_iter conn_iter;
 	u32 pending;
-	bool changed = false;
+	int changed = 0;
+	struct drm_connector *first_changed_connector = NULL;
 
 	pm_runtime_get_sync(dev->dev);
 
@@ -509,7 +510,12 @@ nouveau_display_hpd_work(struct work_struct *work)
 		if (old_epoch_counter == connector->epoch_counter)
 			continue;
 
-		changed = true;
+		changed++;
+		if (!first_changed_connector) {
+			drm_connector_get(connector);
+			first_changed_connector = connector;
+		}
+
 		drm_dbg_kms(dev, "[CONNECTOR:%d:%s] status updated from %s to %s (epoch counter %llu->%llu)\n",
 			    connector->base.id, connector->name,
 			    drm_get_connector_status_name(old_status),
@@ -520,9 +526,14 @@ nouveau_display_hpd_work(struct work_struct *work)
 	drm_connector_list_iter_end(&conn_iter);
 	mutex_unlock(&dev->mode_config.mutex);
 
-	if (changed)
+	if (changed == 1)
+		drm_kms_helper_connector_hotplug_event(first_changed_connector);
+	else if (changed > 0)
 		drm_kms_helper_hotplug_event(dev);
 
+	if (first_changed_connector)
+		drm_connector_put(first_changed_connector);
+
 	pm_runtime_mark_last_busy(drm->dev->dev);
 noop:
 	pm_runtime_put_autosuspend(dev->dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index b90cac6d5772..b01c029f3a90 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -69,16 +69,19 @@ READ_GET(struct nouveau_channel *chan, uint64_t *prev_get, int *timeout)
 }
 
 void
-nv50_dma_push(struct nouveau_channel *chan, u64 offset, int length)
+nv50_dma_push(struct nouveau_channel *chan, u64 offset, u32 length,
+	      bool no_prefetch)
 {
 	struct nvif_user *user = &chan->drm->client.device.user;
 	struct nouveau_bo *pb = chan->push.buffer;
 	int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base;
 
 	BUG_ON(chan->dma.ib_free < 1);
+	WARN_ON(length > NV50_DMA_PUSH_MAX_LENGTH);
 
 	nouveau_bo_wr32(pb, ip++, lower_32_bits(offset));
-	nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | length << 8);
+	nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | length << 8 |
+			(no_prefetch ? (1 << 31) : 0));
 
 	chan->dma.ib_put = (chan->dma.ib_put + 1) & chan->dma.ib_max;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h
index 035a709c7be1..1744d95b233e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.h
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.h
@@ -31,7 +31,8 @@
 #include "nouveau_chan.h"
 
 int nouveau_dma_wait(struct nouveau_channel *, int slots, int size);
-void nv50_dma_push(struct nouveau_channel *, u64 addr, int length);
+void nv50_dma_push(struct nouveau_channel *, u64 addr, u32 length,
+		   bool no_prefetch);
 
 /*
  * There's a hw race condition where you can't jump to your PUT offset,
@@ -45,6 +46,9 @@ void nv50_dma_push(struct nouveau_channel *, u64 addr, int length);
  */
 #define NOUVEAU_DMA_SKIPS (128 / 4)
 
+/* Maximum push buffer size. */
+#define NV50_DMA_PUSH_MAX_LENGTH 0x7fffff
+
 /* Object handles - for stuff that's doesn't use handle == oclass. */
 enum {
 	NvDmaFB		= 0x80000002,
@@ -89,7 +93,7 @@ FIRE_RING(struct nouveau_channel *chan)
 
 	if (chan->dma.ib_max) {
 		nv50_dma_push(chan, chan->push.addr + (chan->dma.put << 2),
-			      (chan->dma.cur - chan->dma.put) << 2);
+			      (chan->dma.cur - chan->dma.put) << 2, false);
 	} else {
 		WRITE_PUT(chan->dma.cur);
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 789857faa048..61e84562094a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -209,7 +209,8 @@ static vm_fault_t nouveau_dmem_migrate_to_ram(struct vm_fault *vmf)
 		goto done;
 	}
 
-	nouveau_fence_new(dmem->migrate.chan, false, &fence);
+	if (!nouveau_fence_new(&fence))
+		nouveau_fence_emit(fence, dmem->migrate.chan);
 	migrate_vma_pages(&args);
 	nouveau_dmem_fence_done(&fence);
 	dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
@@ -402,7 +403,8 @@ nouveau_dmem_evict_chunk(struct nouveau_dmem_chunk *chunk)
 		}
 	}
 
-	nouveau_fence_new(chunk->drm->dmem->migrate.chan, false, &fence);
+	if (!nouveau_fence_new(&fence))
+		nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
 	migrate_device_pages(src_pfns, dst_pfns, npages);
 	nouveau_dmem_fence_done(&fence);
 	migrate_device_finalize(src_pfns, dst_pfns, npages);
@@ -675,7 +677,8 @@ static void nouveau_dmem_migrate_chunk(struct nouveau_drm *drm,
 		addr += PAGE_SIZE;
 	}
 
-	nouveau_fence_new(drm->dmem->migrate.chan, false, &fence);
+	if (!nouveau_fence_new(&fence))
+		nouveau_fence_emit(fence, drm->dmem->migrate.chan);
 	migrate_vma_pages(args);
 	nouveau_dmem_fence_done(&fence);
 	nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i);
diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
index d49b4875fc3c..6a4980b2d4d4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
@@ -267,8 +267,7 @@ nouveau_dp_irq(struct work_struct *work)
  *   yet)
  */
 enum drm_mode_status
-nv50_dp_mode_valid(struct drm_connector *connector,
-		   struct nouveau_encoder *outp,
+nv50_dp_mode_valid(struct nouveau_encoder *outp,
 		   const struct drm_display_mode *mode,
 		   unsigned *out_clock)
 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 7aac9384600e..4396f501b16a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -68,6 +68,9 @@
 #include "nouveau_platform.h"
 #include "nouveau_svm.h"
 #include "nouveau_dmem.h"
+#include "nouveau_exec.h"
+#include "nouveau_uvmm.h"
+#include "nouveau_sched.h"
 
 DECLARE_DYNDBG_CLASSMAP(drm_debug_classes, DD_CLASS_TYPE_DISJOINT_BITS, 0,
 			"DRM_UT_CORE",
@@ -196,6 +199,8 @@ nouveau_cli_fini(struct nouveau_cli *cli)
 	WARN_ON(!list_empty(&cli->worker));
 
 	usif_client_fini(cli);
+	nouveau_uvmm_fini(&cli->uvmm);
+	nouveau_sched_entity_fini(&cli->sched_entity);
 	nouveau_vmm_fini(&cli->svm);
 	nouveau_vmm_fini(&cli->vmm);
 	nvif_mmu_dtor(&cli->mmu);
@@ -301,6 +306,12 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
 	}
 
 	cli->mem = &mems[ret];
+
+	ret = nouveau_sched_entity_init(&cli->sched_entity, &drm->sched,
+					drm->sched_wq);
+	if (ret)
+		goto done;
+
 	return 0;
 done:
 	if (ret)
@@ -375,15 +386,29 @@ nouveau_accel_gr_init(struct nouveau_drm *drm)
 		ret = nvif_object_ctor(&drm->channel->user, "drmNvsw",
 				       NVDRM_NVSW, nouveau_abi16_swclass(drm),
 				       NULL, 0, &drm->channel->nvsw);
+
+		if (ret == 0 && device->info.chipset >= 0x11) {
+			ret = nvif_object_ctor(&drm->channel->user, "drmBlit",
+					       0x005f, 0x009f,
+					       NULL, 0, &drm->channel->blit);
+		}
+
 		if (ret == 0) {
 			struct nvif_push *push = drm->channel->chan.push;
-			ret = PUSH_WAIT(push, 2);
-			if (ret == 0)
+			ret = PUSH_WAIT(push, 8);
+			if (ret == 0) {
+				if (device->info.chipset >= 0x11) {
+					PUSH_NVSQ(push, NV05F, 0x0000, drm->channel->blit.handle);
+					PUSH_NVSQ(push, NV09F, 0x0120, 0,
+							       0x0124, 1,
+							       0x0128, 2);
+				}
 				PUSH_NVSQ(push, NV_SW, 0x0000, drm->channel->nvsw.handle);
+			}
 		}
 
 		if (ret) {
-			NV_ERROR(drm, "failed to allocate sw class, %d\n", ret);
+			NV_ERROR(drm, "failed to allocate sw or blit class, %d\n", ret);
 			nouveau_accel_gr_fini(drm);
 			return;
 		}
@@ -554,10 +579,14 @@ nouveau_drm_device_init(struct drm_device *dev)
 	nvif_parent_ctor(&nouveau_parent, &drm->parent);
 	drm->master.base.object.parent = &drm->parent;
 
-	ret = nouveau_cli_init(drm, "DRM-master", &drm->master);
+	ret = nouveau_sched_init(drm);
 	if (ret)
 		goto fail_alloc;
 
+	ret = nouveau_cli_init(drm, "DRM-master", &drm->master);
+	if (ret)
+		goto fail_sched;
+
 	ret = nouveau_cli_init(drm, "DRM", &drm->client);
 	if (ret)
 		goto fail_master;
@@ -614,7 +643,6 @@ nouveau_drm_device_init(struct drm_device *dev)
 	}
 
 	return 0;
-
 fail_dispinit:
 	nouveau_display_destroy(dev);
 fail_dispctor:
@@ -627,6 +655,8 @@ fail_ttm:
 	nouveau_cli_fini(&drm->client);
 fail_master:
 	nouveau_cli_fini(&drm->master);
+fail_sched:
+	nouveau_sched_fini(drm);
 fail_alloc:
 	nvif_parent_dtor(&drm->parent);
 	kfree(drm);
@@ -678,6 +708,8 @@ nouveau_drm_device_fini(struct drm_device *dev)
 	}
 	mutex_unlock(&drm->clients_lock);
 
+	nouveau_sched_fini(drm);
+
 	nouveau_cli_fini(&drm->client);
 	nouveau_cli_fini(&drm->master);
 	nvif_parent_dtor(&drm->parent);
@@ -1179,6 +1211,9 @@ nouveau_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_VM_INIT, nouveau_uvmm_ioctl_vm_init, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_VM_BIND, nouveau_uvmm_ioctl_vm_bind, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_EXEC, nouveau_exec_ioctl_exec, DRM_RENDER_ALLOW),
 };
 
 long
@@ -1226,6 +1261,8 @@ nouveau_driver_fops = {
 static struct drm_driver
 driver_stub = {
 	.driver_features = DRIVER_GEM |
+			   DRIVER_SYNCOBJ | DRIVER_SYNCOBJ_TIMELINE |
+			   DRIVER_GEM_GPUVA |
 			   DRIVER_MODESET |
 			   DRIVER_RENDER,
 	.open = nouveau_drm_open,
@@ -1240,10 +1277,7 @@ driver_stub = {
 	.num_ioctls = ARRAY_SIZE(nouveau_ioctls),
 	.fops = &nouveau_driver_fops,
 
-	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
-	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_import_sg_table = nouveau_gem_prime_import_sg_table,
-	.gem_prime_mmap = drm_gem_prime_mmap,
 
 	.dumb_create = nouveau_display_dumb_create,
 	.dumb_map_offset = drm_gem_ttm_dumb_map_offset,
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index b5de312a523f..1fe17ff95f5e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -10,8 +10,8 @@
 #define DRIVER_DATE		"20120801"
 
 #define DRIVER_MAJOR		1
-#define DRIVER_MINOR		3
-#define DRIVER_PATCHLEVEL	1
+#define DRIVER_MINOR		4
+#define DRIVER_PATCHLEVEL	0
 
 /*
  * 1.1.1:
@@ -63,7 +63,9 @@ struct platform_device;
 
 #include "nouveau_fence.h"
 #include "nouveau_bios.h"
+#include "nouveau_sched.h"
 #include "nouveau_vmm.h"
+#include "nouveau_uvmm.h"
 
 struct nouveau_drm_tile {
 	struct nouveau_fence *fence;
@@ -91,6 +93,10 @@ struct nouveau_cli {
 	struct nvif_mmu mmu;
 	struct nouveau_vmm vmm;
 	struct nouveau_vmm svm;
+	struct nouveau_uvmm uvmm;
+
+	struct nouveau_sched_entity sched_entity;
+
 	const struct nvif_mclass *mem;
 
 	struct list_head head;
@@ -112,6 +118,59 @@ struct nouveau_cli_work {
 	struct dma_fence_cb cb;
 };
 
+static inline struct nouveau_uvmm *
+nouveau_cli_uvmm(struct nouveau_cli *cli)
+{
+	if (!cli || !cli->uvmm.vmm.cli)
+		return NULL;
+
+	return &cli->uvmm;
+}
+
+static inline struct nouveau_uvmm *
+nouveau_cli_uvmm_locked(struct nouveau_cli *cli)
+{
+	struct nouveau_uvmm *uvmm;
+
+	mutex_lock(&cli->mutex);
+	uvmm = nouveau_cli_uvmm(cli);
+	mutex_unlock(&cli->mutex);
+
+	return uvmm;
+}
+
+static inline struct nouveau_vmm *
+nouveau_cli_vmm(struct nouveau_cli *cli)
+{
+	struct nouveau_uvmm *uvmm;
+
+	uvmm = nouveau_cli_uvmm(cli);
+	if (uvmm)
+		return &uvmm->vmm;
+
+	if (cli->svm.cli)
+		return &cli->svm;
+
+	return &cli->vmm;
+}
+
+static inline void
+__nouveau_cli_disable_uvmm_noinit(struct nouveau_cli *cli)
+{
+	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
+
+	if (!uvmm)
+		cli->uvmm.disabled = true;
+}
+
+static inline void
+nouveau_cli_disable_uvmm_noinit(struct nouveau_cli *cli)
+{
+	mutex_lock(&cli->mutex);
+	__nouveau_cli_disable_uvmm_noinit(cli);
+	mutex_unlock(&cli->mutex);
+}
+
 void nouveau_cli_work_queue(struct nouveau_cli *, struct dma_fence *,
 			    struct nouveau_cli_work *);
 
@@ -121,6 +180,32 @@ nouveau_cli(struct drm_file *fpriv)
 	return fpriv ? fpriv->driver_priv : NULL;
 }
 
+static inline void
+u_free(void *addr)
+{
+	kvfree(addr);
+}
+
+static inline void *
+u_memcpya(uint64_t user, unsigned int nmemb, unsigned int size)
+{
+	void *mem;
+	void __user *userptr = (void __force __user *)(uintptr_t)user;
+
+	size *= nmemb;
+
+	mem = kvmalloc(size, GFP_KERNEL);
+	if (!mem)
+		return ERR_PTR(-ENOMEM);
+
+	if (copy_from_user(mem, userptr, size)) {
+		u_free(mem);
+		return ERR_PTR(-EFAULT);
+	}
+
+	return mem;
+}
+
 #include <nvif/object.h>
 #include <nvif/parent.h>
 
@@ -222,6 +307,10 @@ struct nouveau_drm {
 		struct mutex lock;
 		bool component_registered;
 	} audio;
+
+	struct drm_gpu_scheduler sched;
+	struct workqueue_struct *sched_wq;
+
 };
 
 static inline struct nouveau_drm *
diff --git a/drivers/gpu/drm/nouveau/nouveau_encoder.h b/drivers/gpu/drm/nouveau/nouveau_encoder.h
index 70c1ad6c4d9d..bcba1a14cfab 100644
--- a/drivers/gpu/drm/nouveau/nouveau_encoder.h
+++ b/drivers/gpu/drm/nouveau/nouveau_encoder.h
@@ -143,8 +143,7 @@ enum nouveau_dp_status {
 int nouveau_dp_detect(struct nouveau_connector *, struct nouveau_encoder *);
 bool nouveau_dp_link_check(struct nouveau_connector *);
 void nouveau_dp_irq(struct work_struct *);
-enum drm_mode_status nv50_dp_mode_valid(struct drm_connector *,
-					struct nouveau_encoder *,
+enum drm_mode_status nv50_dp_mode_valid(struct nouveau_encoder *,
 					const struct drm_display_mode *,
 					unsigned *clock);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c
new file mode 100644
index 000000000000..a90c4cd8cbb2
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
@@ -0,0 +1,424 @@
+// SPDX-License-Identifier: MIT
+
+#include <drm/drm_exec.h>
+
+#include "nouveau_drv.h"
+#include "nouveau_gem.h"
+#include "nouveau_mem.h"
+#include "nouveau_dma.h"
+#include "nouveau_exec.h"
+#include "nouveau_abi16.h"
+#include "nouveau_chan.h"
+#include "nouveau_sched.h"
+#include "nouveau_uvmm.h"
+
+/**
+ * DOC: Overview
+ *
+ * Nouveau's VM_BIND / EXEC UAPI consists of three ioctls: DRM_NOUVEAU_VM_INIT,
+ * DRM_NOUVEAU_VM_BIND and DRM_NOUVEAU_EXEC.
+ *
+ * In order to use the UAPI firstly a user client must initialize the VA space
+ * using the DRM_NOUVEAU_VM_INIT ioctl specifying which region of the VA space
+ * should be managed by the kernel and which by the UMD.
+ *
+ * The DRM_NOUVEAU_VM_BIND ioctl provides clients an interface to manage the
+ * userspace-managable portion of the VA space. It provides operations to map
+ * and unmap memory. Mappings may be flagged as sparse. Sparse mappings are not
+ * backed by a GEM object and the kernel will ignore GEM handles provided
+ * alongside a sparse mapping.
+ *
+ * Userspace may request memory backed mappings either within or outside of the
+ * bounds (but not crossing those bounds) of a previously mapped sparse
+ * mapping. Subsequently requested memory backed mappings within a sparse
+ * mapping will take precedence over the corresponding range of the sparse
+ * mapping. If such memory backed mappings are unmapped the kernel will make
+ * sure that the corresponding sparse mapping will take their place again.
+ * Requests to unmap a sparse mapping that still contains memory backed mappings
+ * will result in those memory backed mappings being unmapped first.
+ *
+ * Unmap requests are not bound to the range of existing mappings and can even
+ * overlap the bounds of sparse mappings. For such a request the kernel will
+ * make sure to unmap all memory backed mappings within the given range,
+ * splitting up memory backed mappings which are only partially contained
+ * within the given range. Unmap requests with the sparse flag set must match
+ * the range of a previously mapped sparse mapping exactly though.
+ *
+ * While the kernel generally permits arbitrary sequences and ranges of memory
+ * backed mappings being mapped and unmapped, either within a single or multiple
+ * VM_BIND ioctl calls, there are some restrictions for sparse mappings.
+ *
+ * The kernel does not permit to:
+ *   - unmap non-existent sparse mappings
+ *   - unmap a sparse mapping and map a new sparse mapping overlapping the range
+ *     of the previously unmapped sparse mapping within the same VM_BIND ioctl
+ *   - unmap a sparse mapping and map new memory backed mappings overlapping the
+ *     range of the previously unmapped sparse mapping within the same VM_BIND
+ *     ioctl
+ *
+ * When using the VM_BIND ioctl to request the kernel to map memory to a given
+ * virtual address in the GPU's VA space there is no guarantee that the actual
+ * mappings are created in the GPU's MMU. If the given memory is swapped out
+ * at the time the bind operation is executed the kernel will stash the mapping
+ * details into it's internal alloctor and create the actual MMU mappings once
+ * the memory is swapped back in. While this is transparent for userspace, it is
+ * guaranteed that all the backing memory is swapped back in and all the memory
+ * mappings, as requested by userspace previously, are actually mapped once the
+ * DRM_NOUVEAU_EXEC ioctl is called to submit an exec job.
+ *
+ * A VM_BIND job can be executed either synchronously or asynchronously. If
+ * exectued asynchronously, userspace may provide a list of syncobjs this job
+ * will wait for and/or a list of syncobj the kernel will signal once the
+ * VM_BIND job finished execution. If executed synchronously the ioctl will
+ * block until the bind job is finished. For synchronous jobs the kernel will
+ * not permit any syncobjs submitted to the kernel.
+ *
+ * To execute a push buffer the UAPI provides the DRM_NOUVEAU_EXEC ioctl. EXEC
+ * jobs are always executed asynchronously, and, equal to VM_BIND jobs, provide
+ * the option to synchronize them with syncobjs.
+ *
+ * Besides that, EXEC jobs can be scheduled for a specified channel to execute on.
+ *
+ * Since VM_BIND jobs update the GPU's VA space on job submit, EXEC jobs do have
+ * an up to date view of the VA space. However, the actual mappings might still
+ * be pending. Hence, EXEC jobs require to have the particular fences - of
+ * the corresponding VM_BIND jobs they depent on - attached to them.
+ */
+
+static int
+nouveau_exec_job_submit(struct nouveau_job *job)
+{
+	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
+	struct nouveau_cli *cli = job->cli;
+	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
+	struct drm_exec *exec = &job->exec;
+	struct drm_gem_object *obj;
+	unsigned long index;
+	int ret;
+
+	ret = nouveau_fence_new(&exec_job->fence);
+	if (ret)
+		return ret;
+
+	nouveau_uvmm_lock(uvmm);
+	drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+			    DRM_EXEC_IGNORE_DUPLICATES);
+	drm_exec_until_all_locked(exec) {
+		struct drm_gpuva *va;
+
+		drm_gpuva_for_each_va(va, &uvmm->umgr) {
+			if (unlikely(va == &uvmm->umgr.kernel_alloc_node))
+				continue;
+
+			ret = drm_exec_prepare_obj(exec, va->gem.obj, 1);
+			drm_exec_retry_on_contention(exec);
+			if (ret)
+				goto err_uvmm_unlock;
+		}
+	}
+	nouveau_uvmm_unlock(uvmm);
+
+	drm_exec_for_each_locked_object(exec, index, obj) {
+		struct nouveau_bo *nvbo = nouveau_gem_object(obj);
+
+		ret = nouveau_bo_validate(nvbo, true, false);
+		if (ret)
+			goto err_exec_fini;
+	}
+
+	return 0;
+
+err_uvmm_unlock:
+	nouveau_uvmm_unlock(uvmm);
+err_exec_fini:
+	drm_exec_fini(exec);
+	return ret;
+
+}
+
+static void
+nouveau_exec_job_armed_submit(struct nouveau_job *job)
+{
+	struct drm_exec *exec = &job->exec;
+	struct drm_gem_object *obj;
+	unsigned long index;
+
+	drm_exec_for_each_locked_object(exec, index, obj)
+		dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage);
+
+	drm_exec_fini(exec);
+}
+
+static struct dma_fence *
+nouveau_exec_job_run(struct nouveau_job *job)
+{
+	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
+	struct nouveau_channel *chan = exec_job->chan;
+	struct nouveau_fence *fence = exec_job->fence;
+	int i, ret;
+
+	ret = nouveau_dma_wait(chan, exec_job->push.count + 1, 16);
+	if (ret) {
+		NV_PRINTK(err, job->cli, "nv50cal_space: %d\n", ret);
+		return ERR_PTR(ret);
+	}
+
+	for (i = 0; i < exec_job->push.count; i++) {
+		struct drm_nouveau_exec_push *p = &exec_job->push.s[i];
+		bool no_prefetch = p->flags & DRM_NOUVEAU_EXEC_PUSH_NO_PREFETCH;
+
+		nv50_dma_push(chan, p->va, p->va_len, no_prefetch);
+	}
+
+	ret = nouveau_fence_emit(fence, chan);
+	if (ret) {
+		NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret);
+		WIND_RING(chan);
+		return ERR_PTR(ret);
+	}
+
+	exec_job->fence = NULL;
+
+	return &fence->base;
+}
+
+static void
+nouveau_exec_job_free(struct nouveau_job *job)
+{
+	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
+
+	nouveau_job_free(job);
+
+	nouveau_fence_unref(&exec_job->fence);
+	kfree(exec_job->push.s);
+	kfree(exec_job);
+}
+
+static enum drm_gpu_sched_stat
+nouveau_exec_job_timeout(struct nouveau_job *job)
+{
+	struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
+	struct nouveau_channel *chan = exec_job->chan;
+
+	if (unlikely(!atomic_read(&chan->killed)))
+		nouveau_channel_kill(chan);
+
+	NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n",
+		  chan->chid);
+
+	nouveau_sched_entity_fini(job->entity);
+
+	return DRM_GPU_SCHED_STAT_ENODEV;
+}
+
+static struct nouveau_job_ops nouveau_exec_job_ops = {
+	.submit = nouveau_exec_job_submit,
+	.armed_submit = nouveau_exec_job_armed_submit,
+	.run = nouveau_exec_job_run,
+	.free = nouveau_exec_job_free,
+	.timeout = nouveau_exec_job_timeout,
+};
+
+int
+nouveau_exec_job_init(struct nouveau_exec_job **pjob,
+		      struct nouveau_exec_job_args *__args)
+{
+	struct nouveau_exec_job *job;
+	struct nouveau_job_args args = {};
+	int i, ret;
+
+	for (i = 0; i < __args->push.count; i++) {
+		struct drm_nouveau_exec_push *p = &__args->push.s[i];
+
+		if (unlikely(p->va_len > NV50_DMA_PUSH_MAX_LENGTH)) {
+			NV_PRINTK(err, nouveau_cli(__args->file_priv),
+				  "pushbuf size exceeds limit: 0x%x max 0x%x\n",
+				  p->va_len, NV50_DMA_PUSH_MAX_LENGTH);
+			return -EINVAL;
+		}
+	}
+
+	job = *pjob = kzalloc(sizeof(*job), GFP_KERNEL);
+	if (!job)
+		return -ENOMEM;
+
+	job->push.count = __args->push.count;
+	if (__args->push.count) {
+		job->push.s = kmemdup(__args->push.s,
+				      sizeof(*__args->push.s) *
+				      __args->push.count,
+				      GFP_KERNEL);
+		if (!job->push.s) {
+			ret = -ENOMEM;
+			goto err_free_job;
+		}
+	}
+
+	job->chan = __args->chan;
+
+	args.sched_entity = __args->sched_entity;
+	args.file_priv = __args->file_priv;
+
+	args.in_sync.count = __args->in_sync.count;
+	args.in_sync.s = __args->in_sync.s;
+
+	args.out_sync.count = __args->out_sync.count;
+	args.out_sync.s = __args->out_sync.s;
+
+	args.ops = &nouveau_exec_job_ops;
+	args.resv_usage = DMA_RESV_USAGE_WRITE;
+
+	ret = nouveau_job_init(&job->base, &args);
+	if (ret)
+		goto err_free_pushs;
+
+	return 0;
+
+err_free_pushs:
+	kfree(job->push.s);
+err_free_job:
+	kfree(job);
+	*pjob = NULL;
+
+	return ret;
+}
+
+static int
+nouveau_exec(struct nouveau_exec_job_args *args)
+{
+	struct nouveau_exec_job *job;
+	int ret;
+
+	ret = nouveau_exec_job_init(&job, args);
+	if (ret)
+		return ret;
+
+	ret = nouveau_job_submit(&job->base);
+	if (ret)
+		goto err_job_fini;
+
+	return 0;
+
+err_job_fini:
+	nouveau_job_fini(&job->base);
+	return ret;
+}
+
+static int
+nouveau_exec_ucopy(struct nouveau_exec_job_args *args,
+		   struct drm_nouveau_exec *req)
+{
+	struct drm_nouveau_sync **s;
+	u32 inc = req->wait_count;
+	u64 ins = req->wait_ptr;
+	u32 outc = req->sig_count;
+	u64 outs = req->sig_ptr;
+	u32 pushc = req->push_count;
+	u64 pushs = req->push_ptr;
+	int ret;
+
+	if (pushc) {
+		args->push.count = pushc;
+		args->push.s = u_memcpya(pushs, pushc, sizeof(*args->push.s));
+		if (IS_ERR(args->push.s))
+			return PTR_ERR(args->push.s);
+	}
+
+	if (inc) {
+		s = &args->in_sync.s;
+
+		args->in_sync.count = inc;
+		*s = u_memcpya(ins, inc, sizeof(**s));
+		if (IS_ERR(*s)) {
+			ret = PTR_ERR(*s);
+			goto err_free_pushs;
+		}
+	}
+
+	if (outc) {
+		s = &args->out_sync.s;
+
+		args->out_sync.count = outc;
+		*s = u_memcpya(outs, outc, sizeof(**s));
+		if (IS_ERR(*s)) {
+			ret = PTR_ERR(*s);
+			goto err_free_ins;
+		}
+	}
+
+	return 0;
+
+err_free_pushs:
+	u_free(args->push.s);
+err_free_ins:
+	u_free(args->in_sync.s);
+	return ret;
+}
+
+static void
+nouveau_exec_ufree(struct nouveau_exec_job_args *args)
+{
+	u_free(args->push.s);
+	u_free(args->in_sync.s);
+	u_free(args->out_sync.s);
+}
+
+int
+nouveau_exec_ioctl_exec(struct drm_device *dev,
+			void *data,
+			struct drm_file *file_priv)
+{
+	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
+	struct nouveau_abi16_chan *chan16;
+	struct nouveau_channel *chan = NULL;
+	struct nouveau_exec_job_args args = {};
+	struct drm_nouveau_exec *req = data;
+	int ret = 0;
+
+	if (unlikely(!abi16))
+		return -ENOMEM;
+
+	/* abi16 locks already */
+	if (unlikely(!nouveau_cli_uvmm(cli)))
+		return nouveau_abi16_put(abi16, -ENOSYS);
+
+	list_for_each_entry(chan16, &abi16->channels, head) {
+		if (chan16->chan->chid == req->channel) {
+			chan = chan16->chan;
+			break;
+		}
+	}
+
+	if (!chan)
+		return nouveau_abi16_put(abi16, -ENOENT);
+
+	if (unlikely(atomic_read(&chan->killed)))
+		return nouveau_abi16_put(abi16, -ENODEV);
+
+	if (!chan->dma.ib_max)
+		return nouveau_abi16_put(abi16, -ENOSYS);
+
+	if (unlikely(req->push_count > NOUVEAU_GEM_MAX_PUSH)) {
+		NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n",
+			 req->push_count, NOUVEAU_GEM_MAX_PUSH);
+		return nouveau_abi16_put(abi16, -EINVAL);
+	}
+
+	ret = nouveau_exec_ucopy(&args, req);
+	if (ret)
+		goto out;
+
+	args.sched_entity = &chan16->sched_entity;
+	args.file_priv = file_priv;
+	args.chan = chan;
+
+	ret = nouveau_exec(&args);
+	if (ret)
+		goto out_free_args;
+
+out_free_args:
+	nouveau_exec_ufree(&args);
+out:
+	return nouveau_abi16_put(abi16, ret);
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.h b/drivers/gpu/drm/nouveau/nouveau_exec.h
new file mode 100644
index 000000000000..778cacd90f65
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef __NOUVEAU_EXEC_H__
+#define __NOUVEAU_EXEC_H__
+
+#include <drm/drm_exec.h>
+
+#include "nouveau_drv.h"
+#include "nouveau_sched.h"
+
+struct nouveau_exec_job_args {
+	struct drm_file *file_priv;
+	struct nouveau_sched_entity *sched_entity;
+
+	struct drm_exec exec;
+	struct nouveau_channel *chan;
+
+	struct {
+		struct drm_nouveau_sync *s;
+		u32 count;
+	} in_sync;
+
+	struct {
+		struct drm_nouveau_sync *s;
+		u32 count;
+	} out_sync;
+
+	struct {
+		struct drm_nouveau_exec_push *s;
+		u32 count;
+	} push;
+};
+
+struct nouveau_exec_job {
+	struct nouveau_job base;
+	struct nouveau_fence *fence;
+	struct nouveau_channel *chan;
+
+	struct {
+		struct drm_nouveau_exec_push *s;
+		u32 count;
+	} push;
+};
+
+#define to_nouveau_exec_job(job)		\
+		container_of((job), struct nouveau_exec_job, base)
+
+int nouveau_exec_job_init(struct nouveau_exec_job **job,
+			  struct nouveau_exec_job_args *args);
+
+int nouveau_exec_ioctl_exec(struct drm_device *dev, void *data,
+			    struct drm_file *file_priv);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index ee5e9d40c166..77c739a55b19 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -96,6 +96,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error)
 		if (nouveau_fence_signal(fence))
 			nvif_event_block(&fctx->event);
 	}
+	fctx->killed = 1;
 	spin_unlock_irqrestore(&fctx->lock, flags);
 }
 
@@ -210,6 +211,9 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
 	struct nouveau_fence_priv *priv = (void*)chan->drm->fence;
 	int ret;
 
+	if (unlikely(!chan->fence))
+		return -ENODEV;
+
 	fence->channel  = chan;
 	fence->timeout  = jiffies + (15 * HZ);
 
@@ -226,6 +230,12 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
 		dma_fence_get(&fence->base);
 		spin_lock_irq(&fctx->lock);
 
+		if (unlikely(fctx->killed)) {
+			spin_unlock_irq(&fctx->lock);
+			dma_fence_put(&fence->base);
+			return -ENODEV;
+		}
+
 		if (nouveau_fence_update(chan, fctx))
 			nvif_event_block(&fctx->event);
 
@@ -396,25 +406,16 @@ nouveau_fence_unref(struct nouveau_fence **pfence)
 }
 
 int
-nouveau_fence_new(struct nouveau_channel *chan, bool sysmem,
-		  struct nouveau_fence **pfence)
+nouveau_fence_new(struct nouveau_fence **pfence)
 {
 	struct nouveau_fence *fence;
-	int ret = 0;
-
-	if (unlikely(!chan->fence))
-		return -ENODEV;
 
 	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
 	if (!fence)
 		return -ENOMEM;
 
-	ret = nouveau_fence_emit(fence, chan);
-	if (ret)
-		nouveau_fence_unref(&fence);
-
 	*pfence = fence;
-	return ret;
+	return 0;
 }
 
 static const char *nouveau_fence_get_get_driver_name(struct dma_fence *fence)
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 0ca2bc85adf6..2c72d96ef17d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -17,8 +17,7 @@ struct nouveau_fence {
 	unsigned long timeout;
 };
 
-int  nouveau_fence_new(struct nouveau_channel *, bool sysmem,
-		       struct nouveau_fence **);
+int  nouveau_fence_new(struct nouveau_fence **);
 void nouveau_fence_unref(struct nouveau_fence **);
 
 int  nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
@@ -45,7 +44,7 @@ struct nouveau_fence_chan {
 	char name[32];
 
 	struct nvif_event event;
-	int notify_ref, dead;
+	int notify_ref, dead, killed;
 };
 
 struct nouveau_fence_priv {
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index ab9062e50977..c0b10d8d3d03 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -103,13 +103,17 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv)
 	struct nouveau_bo *nvbo = nouveau_gem_object(gem);
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
 	struct device *dev = drm->dev->dev;
-	struct nouveau_vmm *vmm = cli->svm.cli ? &cli->svm : &cli->vmm;
+	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
+	struct nouveau_vmm *vmm = nouveau_cli_vmm(cli);
 	struct nouveau_vma *vma;
 	int ret;
 
 	if (vmm->vmm.object.oclass < NVIF_CLASS_VMM_NV50)
 		return 0;
 
+	if (nvbo->no_share && uvmm && &uvmm->resv != nvbo->bo.base.resv)
+		return -EPERM;
+
 	ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
 	if (ret)
 		return ret;
@@ -120,7 +124,11 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv)
 		goto out;
 	}
 
-	ret = nouveau_vma_new(nvbo, vmm, &vma);
+	/* only create a VMA on binding */
+	if (!nouveau_cli_uvmm(cli))
+		ret = nouveau_vma_new(nvbo, vmm, &vma);
+	else
+		ret = 0;
 	pm_runtime_mark_last_busy(dev);
 	pm_runtime_put_autosuspend(dev);
 out:
@@ -180,13 +188,16 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv)
 	struct nouveau_bo *nvbo = nouveau_gem_object(gem);
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
 	struct device *dev = drm->dev->dev;
-	struct nouveau_vmm *vmm = cli->svm.cli ? &cli->svm : & cli->vmm;
+	struct nouveau_vmm *vmm = nouveau_cli_vmm(cli);
 	struct nouveau_vma *vma;
 	int ret;
 
 	if (vmm->vmm.object.oclass < NVIF_CLASS_VMM_NV50)
 		return;
 
+	if (nouveau_cli_uvmm(cli))
+		return;
+
 	ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
 	if (ret)
 		return;
@@ -209,6 +220,7 @@ const struct drm_gem_object_funcs nouveau_gem_object_funcs = {
 	.free = nouveau_gem_object_del,
 	.open = nouveau_gem_object_open,
 	.close = nouveau_gem_object_close,
+	.export = nouveau_gem_prime_export,
 	.pin = nouveau_gem_prime_pin,
 	.unpin = nouveau_gem_prime_unpin,
 	.get_sg_table = nouveau_gem_prime_get_sg_table,
@@ -224,18 +236,28 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain,
 		struct nouveau_bo **pnvbo)
 {
 	struct nouveau_drm *drm = cli->drm;
+	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
+	struct dma_resv *resv = NULL;
 	struct nouveau_bo *nvbo;
 	int ret;
 
+	if (domain & NOUVEAU_GEM_DOMAIN_NO_SHARE) {
+		if (unlikely(!uvmm))
+			return -EINVAL;
+
+		resv = &uvmm->resv;
+	}
+
 	if (!(domain & (NOUVEAU_GEM_DOMAIN_VRAM | NOUVEAU_GEM_DOMAIN_GART)))
 		domain |= NOUVEAU_GEM_DOMAIN_CPU;
 
 	nvbo = nouveau_bo_alloc(cli, &size, &align, domain, tile_mode,
-				tile_flags);
+				tile_flags, false);
 	if (IS_ERR(nvbo))
 		return PTR_ERR(nvbo);
 
 	nvbo->bo.base.funcs = &nouveau_gem_object_funcs;
+	nvbo->no_share = domain & NOUVEAU_GEM_DOMAIN_NO_SHARE;
 
 	/* Initialize the embedded gem-object. We return a single gem-reference
 	 * to the caller, instead of a normal nouveau_bo ttm reference. */
@@ -246,7 +268,14 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain,
 		return ret;
 	}
 
-	ret = nouveau_bo_init(nvbo, size, align, domain, NULL, NULL);
+	if (resv)
+		dma_resv_lock(resv, NULL);
+
+	ret = nouveau_bo_init(nvbo, size, align, domain, NULL, resv);
+
+	if (resv)
+		dma_resv_unlock(resv);
+
 	if (ret)
 		return ret;
 
@@ -269,7 +298,7 @@ nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem,
 {
 	struct nouveau_cli *cli = nouveau_cli(file_priv);
 	struct nouveau_bo *nvbo = nouveau_gem_object(gem);
-	struct nouveau_vmm *vmm = cli->svm.cli ? &cli->svm : &cli->vmm;
+	struct nouveau_vmm *vmm = nouveau_cli_vmm(cli);
 	struct nouveau_vma *vma;
 
 	if (is_power_of_2(nvbo->valid_domains))
@@ -279,13 +308,15 @@ nouveau_gem_info(struct drm_file *file_priv, struct drm_gem_object *gem,
 	else
 		rep->domain = NOUVEAU_GEM_DOMAIN_VRAM;
 	rep->offset = nvbo->offset;
-	if (vmm->vmm.object.oclass >= NVIF_CLASS_VMM_NV50) {
+	if (vmm->vmm.object.oclass >= NVIF_CLASS_VMM_NV50 &&
+	    !nouveau_cli_uvmm(cli)) {
 		vma = nouveau_vma_find(nvbo, vmm);
 		if (!vma)
 			return -EINVAL;
 
 		rep->offset = vma->addr;
-	}
+	} else
+		rep->offset = 0;
 
 	rep->size = nvbo->bo.base.size;
 	rep->map_handle = drm_vma_node_offset_addr(&nvbo->bo.base.vma_node);
@@ -310,6 +341,11 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data,
 	struct nouveau_bo *nvbo = NULL;
 	int ret = 0;
 
+	/* If uvmm wasn't initialized until now disable it completely to prevent
+	 * userspace from mixing up UAPIs.
+	 */
+	nouveau_cli_disable_uvmm_noinit(cli);
+
 	ret = nouveau_gem_new(cli, req->info.size, req->align,
 			      req->info.domain, req->info.tile_mode,
 			      req->info.tile_flags, &nvbo);
@@ -613,32 +649,6 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
 	return 0;
 }
 
-static inline void
-u_free(void *addr)
-{
-	kvfree(addr);
-}
-
-static inline void *
-u_memcpya(uint64_t user, unsigned nmemb, unsigned size)
-{
-	void *mem;
-	void __user *userptr = (void __force __user *)(uintptr_t)user;
-
-	size *= nmemb;
-
-	mem = kvmalloc(size, GFP_KERNEL);
-	if (!mem)
-		return ERR_PTR(-ENOMEM);
-
-	if (copy_from_user(mem, userptr, size)) {
-		u_free(mem);
-		return ERR_PTR(-EFAULT);
-	}
-
-	return mem;
-}
-
 static int
 nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
 				struct drm_nouveau_gem_pushbuf *req,
@@ -747,6 +757,9 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 	if (unlikely(!abi16))
 		return -ENOMEM;
 
+	if (unlikely(nouveau_cli_uvmm(cli)))
+		return -ENOSYS;
+
 	list_for_each_entry(temp, &abi16->channels, head) {
 		if (temp->chan->chid == req->channel) {
 			chan = temp->chan;
@@ -843,9 +856,11 @@ revalidate:
 		for (i = 0; i < req->nr_push; i++) {
 			struct nouveau_vma *vma = (void *)(unsigned long)
 				bo[push[i].bo_index].user_priv;
+			u64 addr = vma->addr + push[i].offset;
+			u32 length = push[i].length & ~NOUVEAU_GEM_PUSHBUF_NO_PREFETCH;
+			bool no_prefetch = push[i].length & NOUVEAU_GEM_PUSHBUF_NO_PREFETCH;
 
-			nv50_dma_push(chan, vma->addr + push[i].offset,
-				      push[i].length);
+			nv50_dma_push(chan, addr, length, no_prefetch);
 		}
 	} else
 	if (drm->client.device.info.chipset >= 0x25) {
@@ -899,8 +914,11 @@ revalidate:
 		}
 	}
 
-	ret = nouveau_fence_new(chan, false, &fence);
+	ret = nouveau_fence_new(&fence);
+	if (!ret)
+		ret = nouveau_fence_emit(fence, chan);
 	if (ret) {
+		nouveau_fence_unref(&fence);
 		NV_PRINTK(err, cli, "error fencing pushbuf: %d\n", ret);
 		WIND_RING(chan);
 		goto out;
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.h b/drivers/gpu/drm/nouveau/nouveau_gem.h
index 3b919c7c931c..10814d446435 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.h
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.h
@@ -37,5 +37,6 @@ extern void nouveau_gem_prime_unpin(struct drm_gem_object *);
 extern struct sg_table *nouveau_gem_prime_get_sg_table(struct drm_gem_object *);
 extern struct drm_gem_object *nouveau_gem_prime_import_sg_table(
 	struct drm_device *, struct dma_buf_attachment *, struct sg_table *);
-
+struct dma_buf *nouveau_gem_prime_export(struct drm_gem_object *gobj,
+					 int flags);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.h b/drivers/gpu/drm/nouveau/nouveau_mem.h
index 76c86d8bb01e..5365a3d3a17f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.h
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.h
@@ -35,4 +35,9 @@ int nouveau_mem_vram(struct ttm_resource *, bool contig, u8 page);
 int nouveau_mem_host(struct ttm_resource *, struct ttm_tt *);
 void nouveau_mem_fini(struct nouveau_mem *);
 int nouveau_mem_map(struct nouveau_mem *, struct nvif_vmm *, struct nvif_vma *);
+int
+nouveau_mem_map_fixed(struct nouveau_mem *mem,
+		      struct nvif_vmm *vmm,
+		      u8 kind, u64 addr,
+		      u64 offset, u64 range);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c
index f42c2b1b0363..1b2ff0c40fc1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_prime.c
+++ b/drivers/gpu/drm/nouveau/nouveau_prime.c
@@ -50,7 +50,7 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev,
 
 	dma_resv_lock(robj, NULL);
 	nvbo = nouveau_bo_alloc(&drm->client, &size, &align,
-				NOUVEAU_GEM_DOMAIN_GART, 0, 0);
+				NOUVEAU_GEM_DOMAIN_GART, 0, 0, true);
 	if (IS_ERR(nvbo)) {
 		obj = ERR_CAST(nvbo);
 		goto unlock;
@@ -102,3 +102,14 @@ void nouveau_gem_prime_unpin(struct drm_gem_object *obj)
 
 	nouveau_bo_unpin(nvbo);
 }
+
+struct dma_buf *nouveau_gem_prime_export(struct drm_gem_object *gobj,
+					 int flags)
+{
+	struct nouveau_bo *nvbo = nouveau_gem_object(gobj);
+
+	if (nvbo->no_share)
+		return ERR_PTR(-EPERM);
+
+	return drm_gem_prime_export(gobj, flags);
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
new file mode 100644
index 000000000000..88217185e0f3
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -0,0 +1,441 @@
+// SPDX-License-Identifier: MIT
+
+#include <linux/slab.h>
+#include <drm/gpu_scheduler.h>
+#include <drm/drm_syncobj.h>
+
+#include "nouveau_drv.h"
+#include "nouveau_gem.h"
+#include "nouveau_mem.h"
+#include "nouveau_dma.h"
+#include "nouveau_exec.h"
+#include "nouveau_abi16.h"
+#include "nouveau_sched.h"
+
+/* FIXME
+ *
+ * We want to make sure that jobs currently executing can't be deferred by
+ * other jobs competing for the hardware. Otherwise we might end up with job
+ * timeouts just because of too many clients submitting too many jobs. We don't
+ * want jobs to time out because of system load, but because of the job being
+ * too bulky.
+ *
+ * For now allow for up to 16 concurrent jobs in flight until we know how many
+ * rings the hardware can process in parallel.
+ */
+#define NOUVEAU_SCHED_HW_SUBMISSIONS		16
+#define NOUVEAU_SCHED_JOB_TIMEOUT_MS		10000
+
+int
+nouveau_job_init(struct nouveau_job *job,
+		 struct nouveau_job_args *args)
+{
+	struct nouveau_sched_entity *entity = args->sched_entity;
+	int ret;
+
+	job->file_priv = args->file_priv;
+	job->cli = nouveau_cli(args->file_priv);
+	job->entity = entity;
+
+	job->sync = args->sync;
+	job->resv_usage = args->resv_usage;
+
+	job->ops = args->ops;
+
+	job->in_sync.count = args->in_sync.count;
+	if (job->in_sync.count) {
+		if (job->sync)
+			return -EINVAL;
+
+		job->in_sync.data = kmemdup(args->in_sync.s,
+					 sizeof(*args->in_sync.s) *
+					 args->in_sync.count,
+					 GFP_KERNEL);
+		if (!job->in_sync.data)
+			return -ENOMEM;
+	}
+
+	job->out_sync.count = args->out_sync.count;
+	if (job->out_sync.count) {
+		if (job->sync) {
+			ret = -EINVAL;
+			goto err_free_in_sync;
+		}
+
+		job->out_sync.data = kmemdup(args->out_sync.s,
+					  sizeof(*args->out_sync.s) *
+					  args->out_sync.count,
+					  GFP_KERNEL);
+		if (!job->out_sync.data) {
+			ret = -ENOMEM;
+			goto err_free_in_sync;
+		}
+
+		job->out_sync.objs = kcalloc(job->out_sync.count,
+					     sizeof(*job->out_sync.objs),
+					     GFP_KERNEL);
+		if (!job->out_sync.objs) {
+			ret = -ENOMEM;
+			goto err_free_out_sync;
+		}
+
+		job->out_sync.chains = kcalloc(job->out_sync.count,
+					       sizeof(*job->out_sync.chains),
+					       GFP_KERNEL);
+		if (!job->out_sync.chains) {
+			ret = -ENOMEM;
+			goto err_free_objs;
+		}
+
+	}
+
+	ret = drm_sched_job_init(&job->base, &entity->base, NULL);
+	if (ret)
+		goto err_free_chains;
+
+	job->state = NOUVEAU_JOB_INITIALIZED;
+
+	return 0;
+
+err_free_chains:
+	kfree(job->out_sync.chains);
+err_free_objs:
+	kfree(job->out_sync.objs);
+err_free_out_sync:
+	kfree(job->out_sync.data);
+err_free_in_sync:
+	kfree(job->in_sync.data);
+return ret;
+}
+
+void
+nouveau_job_free(struct nouveau_job *job)
+{
+	kfree(job->in_sync.data);
+	kfree(job->out_sync.data);
+	kfree(job->out_sync.objs);
+	kfree(job->out_sync.chains);
+}
+
+void nouveau_job_fini(struct nouveau_job *job)
+{
+	dma_fence_put(job->done_fence);
+	drm_sched_job_cleanup(&job->base);
+	job->ops->free(job);
+}
+
+static int
+sync_find_fence(struct nouveau_job *job,
+		struct drm_nouveau_sync *sync,
+		struct dma_fence **fence)
+{
+	u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
+	u64 point = 0;
+	int ret;
+
+	if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
+	    stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
+		return -EOPNOTSUPP;
+
+	if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ)
+		point = sync->timeline_value;
+
+	ret = drm_syncobj_find_fence(job->file_priv,
+				     sync->handle, point,
+				     0 /* flags */, fence);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int
+nouveau_job_add_deps(struct nouveau_job *job)
+{
+	struct dma_fence *in_fence = NULL;
+	int ret, i;
+
+	for (i = 0; i < job->in_sync.count; i++) {
+		struct drm_nouveau_sync *sync = &job->in_sync.data[i];
+
+		ret = sync_find_fence(job, sync, &in_fence);
+		if (ret) {
+			NV_PRINTK(warn, job->cli,
+				  "Failed to find syncobj (-> in): handle=%d\n",
+				  sync->handle);
+			return ret;
+		}
+
+		ret = drm_sched_job_add_dependency(&job->base, in_fence);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void
+nouveau_job_fence_attach_cleanup(struct nouveau_job *job)
+{
+	int i;
+
+	for (i = 0; i < job->out_sync.count; i++) {
+		struct drm_syncobj *obj = job->out_sync.objs[i];
+		struct dma_fence_chain *chain = job->out_sync.chains[i];
+
+		if (obj)
+			drm_syncobj_put(obj);
+
+		if (chain)
+			dma_fence_chain_free(chain);
+	}
+}
+
+static int
+nouveau_job_fence_attach_prepare(struct nouveau_job *job)
+{
+	int i, ret;
+
+	for (i = 0; i < job->out_sync.count; i++) {
+		struct drm_nouveau_sync *sync = &job->out_sync.data[i];
+		struct drm_syncobj **pobj = &job->out_sync.objs[i];
+		struct dma_fence_chain **pchain = &job->out_sync.chains[i];
+		u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
+
+		if (stype != DRM_NOUVEAU_SYNC_SYNCOBJ &&
+		    stype != DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
+			ret = -EINVAL;
+			goto err_sync_cleanup;
+		}
+
+		*pobj = drm_syncobj_find(job->file_priv, sync->handle);
+		if (!*pobj) {
+			NV_PRINTK(warn, job->cli,
+				  "Failed to find syncobj (-> out): handle=%d\n",
+				  sync->handle);
+			ret = -ENOENT;
+			goto err_sync_cleanup;
+		}
+
+		if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
+			*pchain = dma_fence_chain_alloc();
+			if (!*pchain) {
+				ret = -ENOMEM;
+				goto err_sync_cleanup;
+			}
+		}
+	}
+
+	return 0;
+
+err_sync_cleanup:
+	nouveau_job_fence_attach_cleanup(job);
+	return ret;
+}
+
+static void
+nouveau_job_fence_attach(struct nouveau_job *job)
+{
+	struct dma_fence *fence = job->done_fence;
+	int i;
+
+	for (i = 0; i < job->out_sync.count; i++) {
+		struct drm_nouveau_sync *sync = &job->out_sync.data[i];
+		struct drm_syncobj **pobj = &job->out_sync.objs[i];
+		struct dma_fence_chain **pchain = &job->out_sync.chains[i];
+		u32 stype = sync->flags & DRM_NOUVEAU_SYNC_TYPE_MASK;
+
+		if (stype == DRM_NOUVEAU_SYNC_TIMELINE_SYNCOBJ) {
+			drm_syncobj_add_point(*pobj, *pchain, fence,
+					      sync->timeline_value);
+		} else {
+			drm_syncobj_replace_fence(*pobj, fence);
+		}
+
+		drm_syncobj_put(*pobj);
+		*pobj = NULL;
+		*pchain = NULL;
+	}
+}
+
+int
+nouveau_job_submit(struct nouveau_job *job)
+{
+	struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity);
+	struct dma_fence *done_fence = NULL;
+	int ret;
+
+	ret = nouveau_job_add_deps(job);
+	if (ret)
+		goto err;
+
+	ret = nouveau_job_fence_attach_prepare(job);
+	if (ret)
+		goto err;
+
+	/* Make sure the job appears on the sched_entity's queue in the same
+	 * order as it was submitted.
+	 */
+	mutex_lock(&entity->mutex);
+
+	/* Guarantee we won't fail after the submit() callback returned
+	 * successfully.
+	 */
+	if (job->ops->submit) {
+		ret = job->ops->submit(job);
+		if (ret)
+			goto err_cleanup;
+	}
+
+	drm_sched_job_arm(&job->base);
+	job->done_fence = dma_fence_get(&job->base.s_fence->finished);
+	if (job->sync)
+		done_fence = dma_fence_get(job->done_fence);
+
+	/* If a sched job depends on a dma-fence from a job from the same GPU
+	 * scheduler instance, but a different scheduler entity, the GPU
+	 * scheduler does only wait for the particular job to be scheduled,
+	 * rather than for the job to fully complete. This is due to the GPU
+	 * scheduler assuming that there is a scheduler instance per ring.
+	 * However, the current implementation, in order to avoid arbitrary
+	 * amounts of kthreads, has a single scheduler instance while scheduler
+	 * entities represent rings.
+	 *
+	 * As a workaround, set the DRM_SCHED_FENCE_DONT_PIPELINE for all
+	 * out-fences in order to force the scheduler to wait for full job
+	 * completion for dependent jobs from different entities and same
+	 * scheduler instance.
+	 *
+	 * There is some work in progress [1] to address the issues of firmware
+	 * schedulers; once it is in-tree the scheduler topology in Nouveau
+	 * should be re-worked accordingly.
+	 *
+	 * [1] https://lore.kernel.org/dri-devel/20230801205103.627779-1-matthew.brost@intel.com/
+	 */
+	set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags);
+
+	if (job->ops->armed_submit)
+		job->ops->armed_submit(job);
+
+	nouveau_job_fence_attach(job);
+
+	/* Set job state before pushing the job to the scheduler,
+	 * such that we do not overwrite the job state set in run().
+	 */
+	job->state = NOUVEAU_JOB_SUBMIT_SUCCESS;
+
+	drm_sched_entity_push_job(&job->base);
+
+	mutex_unlock(&entity->mutex);
+
+	if (done_fence) {
+		dma_fence_wait(done_fence, true);
+		dma_fence_put(done_fence);
+	}
+
+	return 0;
+
+err_cleanup:
+	mutex_unlock(&entity->mutex);
+	nouveau_job_fence_attach_cleanup(job);
+err:
+	job->state = NOUVEAU_JOB_SUBMIT_FAILED;
+	return ret;
+}
+
+bool
+nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity,
+			   struct work_struct *work)
+{
+	return queue_work(entity->sched_wq, work);
+}
+
+static struct dma_fence *
+nouveau_job_run(struct nouveau_job *job)
+{
+	struct dma_fence *fence;
+
+	fence = job->ops->run(job);
+	if (IS_ERR(fence))
+		job->state = NOUVEAU_JOB_RUN_FAILED;
+	else
+		job->state = NOUVEAU_JOB_RUN_SUCCESS;
+
+	return fence;
+}
+
+static struct dma_fence *
+nouveau_sched_run_job(struct drm_sched_job *sched_job)
+{
+	struct nouveau_job *job = to_nouveau_job(sched_job);
+
+	return nouveau_job_run(job);
+}
+
+static enum drm_gpu_sched_stat
+nouveau_sched_timedout_job(struct drm_sched_job *sched_job)
+{
+	struct nouveau_job *job = to_nouveau_job(sched_job);
+
+	NV_PRINTK(warn, job->cli, "Job timed out.\n");
+
+	if (job->ops->timeout)
+		return job->ops->timeout(job);
+
+	return DRM_GPU_SCHED_STAT_ENODEV;
+}
+
+static void
+nouveau_sched_free_job(struct drm_sched_job *sched_job)
+{
+	struct nouveau_job *job = to_nouveau_job(sched_job);
+
+	nouveau_job_fini(job);
+}
+
+int nouveau_sched_entity_init(struct nouveau_sched_entity *entity,
+			      struct drm_gpu_scheduler *sched,
+			      struct workqueue_struct *sched_wq)
+{
+	mutex_init(&entity->mutex);
+	spin_lock_init(&entity->job.list.lock);
+	INIT_LIST_HEAD(&entity->job.list.head);
+	init_waitqueue_head(&entity->job.wq);
+
+	entity->sched_wq = sched_wq;
+	return drm_sched_entity_init(&entity->base,
+				     DRM_SCHED_PRIORITY_NORMAL,
+				     &sched, 1, NULL);
+}
+
+void
+nouveau_sched_entity_fini(struct nouveau_sched_entity *entity)
+{
+	drm_sched_entity_destroy(&entity->base);
+}
+
+static const struct drm_sched_backend_ops nouveau_sched_ops = {
+	.run_job = nouveau_sched_run_job,
+	.timedout_job = nouveau_sched_timedout_job,
+	.free_job = nouveau_sched_free_job,
+};
+
+int nouveau_sched_init(struct nouveau_drm *drm)
+{
+	struct drm_gpu_scheduler *sched = &drm->sched;
+	long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS);
+
+	drm->sched_wq = create_singlethread_workqueue("nouveau_sched_wq");
+	if (!drm->sched_wq)
+		return -ENOMEM;
+
+	return drm_sched_init(sched, &nouveau_sched_ops,
+			      NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
+			      NULL, NULL, "nouveau_sched", drm->dev->dev);
+}
+
+void nouveau_sched_fini(struct nouveau_drm *drm)
+{
+	destroy_workqueue(drm->sched_wq);
+	drm_sched_fini(&drm->sched);
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h
new file mode 100644
index 000000000000..27ac19792597
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef NOUVEAU_SCHED_H
+#define NOUVEAU_SCHED_H
+
+#include <linux/types.h>
+
+#include <drm/drm_exec.h>
+#include <drm/gpu_scheduler.h>
+
+#include "nouveau_drv.h"
+
+#define to_nouveau_job(sched_job)		\
+		container_of((sched_job), struct nouveau_job, base)
+
+struct nouveau_job_ops;
+
+enum nouveau_job_state {
+	NOUVEAU_JOB_UNINITIALIZED = 0,
+	NOUVEAU_JOB_INITIALIZED,
+	NOUVEAU_JOB_SUBMIT_SUCCESS,
+	NOUVEAU_JOB_SUBMIT_FAILED,
+	NOUVEAU_JOB_RUN_SUCCESS,
+	NOUVEAU_JOB_RUN_FAILED,
+};
+
+struct nouveau_job_args {
+	struct drm_file *file_priv;
+	struct nouveau_sched_entity *sched_entity;
+
+	enum dma_resv_usage resv_usage;
+	bool sync;
+
+	struct {
+		struct drm_nouveau_sync *s;
+		u32 count;
+	} in_sync;
+
+	struct {
+		struct drm_nouveau_sync *s;
+		u32 count;
+	} out_sync;
+
+	struct nouveau_job_ops *ops;
+};
+
+struct nouveau_job {
+	struct drm_sched_job base;
+
+	enum nouveau_job_state state;
+
+	struct nouveau_sched_entity *entity;
+
+	struct drm_file *file_priv;
+	struct nouveau_cli *cli;
+
+	struct drm_exec exec;
+	enum dma_resv_usage resv_usage;
+	struct dma_fence *done_fence;
+
+	bool sync;
+
+	struct {
+		struct drm_nouveau_sync *data;
+		u32 count;
+	} in_sync;
+
+	struct {
+		struct drm_nouveau_sync *data;
+		struct drm_syncobj **objs;
+		struct dma_fence_chain **chains;
+		u32 count;
+	} out_sync;
+
+	struct nouveau_job_ops {
+		/* If .submit() returns without any error, it is guaranteed that
+		 * armed_submit() is called.
+		 */
+		int (*submit)(struct nouveau_job *);
+		void (*armed_submit)(struct nouveau_job *);
+		struct dma_fence *(*run)(struct nouveau_job *);
+		void (*free)(struct nouveau_job *);
+		enum drm_gpu_sched_stat (*timeout)(struct nouveau_job *);
+	} *ops;
+};
+
+int nouveau_job_ucopy_syncs(struct nouveau_job_args *args,
+			    u32 inc, u64 ins,
+			    u32 outc, u64 outs);
+
+int nouveau_job_init(struct nouveau_job *job,
+		     struct nouveau_job_args *args);
+void nouveau_job_free(struct nouveau_job *job);
+
+int nouveau_job_submit(struct nouveau_job *job);
+void nouveau_job_fini(struct nouveau_job *job);
+
+#define to_nouveau_sched_entity(entity)		\
+		container_of((entity), struct nouveau_sched_entity, base)
+
+struct nouveau_sched_entity {
+	struct drm_sched_entity base;
+	struct mutex mutex;
+
+	struct workqueue_struct *sched_wq;
+
+	struct {
+		struct {
+			struct list_head head;
+			spinlock_t lock;
+		} list;
+		struct wait_queue_head wq;
+	} job;
+};
+
+int nouveau_sched_entity_init(struct nouveau_sched_entity *entity,
+			      struct drm_gpu_scheduler *sched,
+			      struct workqueue_struct *sched_wq);
+void nouveau_sched_entity_fini(struct nouveau_sched_entity *entity);
+
+bool nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity,
+				struct work_struct *work);
+
+int nouveau_sched_init(struct nouveau_drm *drm);
+void nouveau_sched_fini(struct nouveau_drm *drm);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index a74ba8d84ba7..186351ecf72f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -350,7 +350,7 @@ nouveau_svmm_init(struct drm_device *dev, void *data,
 	 * VMM instead of the standard one.
 	 */
 	ret = nvif_vmm_ctor(&cli->mmu, "svmVmm",
-			    cli->vmm.vmm.object.oclass, true,
+			    cli->vmm.vmm.object.oclass, MANAGED,
 			    args->unmanaged_addr, args->unmanaged_size,
 			    &(struct gp100_vmm_v0) {
 				.fault_replay = true,
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
new file mode 100644
index 000000000000..aae780e4a4aa
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -0,0 +1,1917 @@
+// SPDX-License-Identifier: MIT
+
+/*
+ * Locking:
+ *
+ * The uvmm mutex protects any operations on the GPU VA space provided by the
+ * DRM GPU VA manager.
+ *
+ * The GEMs dma_resv lock protects the GEMs GPUVA list, hence link/unlink of a
+ * mapping to it's backing GEM must be performed under this lock.
+ *
+ * Actual map/unmap operations within the fence signalling critical path are
+ * protected by installing DMA fences to the corresponding GEMs DMA
+ * reservations, such that concurrent BO moves, which itself walk the GEMs GPUVA
+ * list in order to map/unmap it's entries, can't occur concurrently.
+ *
+ * Accessing the DRM_GPUVA_INVALIDATED flag doesn't need any separate
+ * protection, since there are no accesses other than from BO move callbacks
+ * and from the fence signalling critical path, which are already protected by
+ * the corresponding GEMs DMA reservation fence.
+ */
+
+#include "nouveau_drv.h"
+#include "nouveau_gem.h"
+#include "nouveau_mem.h"
+#include "nouveau_uvmm.h"
+
+#include <nvif/vmm.h>
+#include <nvif/mem.h>
+
+#include <nvif/class.h>
+#include <nvif/if000c.h>
+#include <nvif/if900d.h>
+
+#define NOUVEAU_VA_SPACE_BITS		47 /* FIXME */
+#define NOUVEAU_VA_SPACE_START		0x0
+#define NOUVEAU_VA_SPACE_END		(1ULL << NOUVEAU_VA_SPACE_BITS)
+
+#define list_last_op(_ops) list_last_entry(_ops, struct bind_job_op, entry)
+#define list_prev_op(_op) list_prev_entry(_op, entry)
+#define list_for_each_op(_op, _ops) list_for_each_entry(_op, _ops, entry)
+#define list_for_each_op_from_reverse(_op, _ops) \
+	list_for_each_entry_from_reverse(_op, _ops, entry)
+#define list_for_each_op_safe(_op, _n, _ops) list_for_each_entry_safe(_op, _n, _ops, entry)
+
+enum vm_bind_op {
+	OP_MAP = DRM_NOUVEAU_VM_BIND_OP_MAP,
+	OP_UNMAP = DRM_NOUVEAU_VM_BIND_OP_UNMAP,
+	OP_MAP_SPARSE,
+	OP_UNMAP_SPARSE,
+};
+
+struct nouveau_uvma_prealloc {
+	struct nouveau_uvma *map;
+	struct nouveau_uvma *prev;
+	struct nouveau_uvma *next;
+};
+
+struct bind_job_op {
+	struct list_head entry;
+
+	enum vm_bind_op op;
+	u32 flags;
+
+	struct {
+		u64 addr;
+		u64 range;
+	} va;
+
+	struct {
+		u32 handle;
+		u64 offset;
+		struct drm_gem_object *obj;
+	} gem;
+
+	struct nouveau_uvma_region *reg;
+	struct nouveau_uvma_prealloc new;
+	struct drm_gpuva_ops *ops;
+};
+
+struct uvmm_map_args {
+	struct nouveau_uvma_region *region;
+	u64 addr;
+	u64 range;
+	u8 kind;
+};
+
+static int
+nouveau_uvmm_vmm_sparse_ref(struct nouveau_uvmm *uvmm,
+			    u64 addr, u64 range)
+{
+	struct nvif_vmm *vmm = &uvmm->vmm.vmm;
+
+	return nvif_vmm_raw_sparse(vmm, addr, range, true);
+}
+
+static int
+nouveau_uvmm_vmm_sparse_unref(struct nouveau_uvmm *uvmm,
+			      u64 addr, u64 range)
+{
+	struct nvif_vmm *vmm = &uvmm->vmm.vmm;
+
+	return nvif_vmm_raw_sparse(vmm, addr, range, false);
+}
+
+static int
+nouveau_uvmm_vmm_get(struct nouveau_uvmm *uvmm,
+		     u64 addr, u64 range)
+{
+	struct nvif_vmm *vmm = &uvmm->vmm.vmm;
+
+	return nvif_vmm_raw_get(vmm, addr, range, PAGE_SHIFT);
+}
+
+static int
+nouveau_uvmm_vmm_put(struct nouveau_uvmm *uvmm,
+		     u64 addr, u64 range)
+{
+	struct nvif_vmm *vmm = &uvmm->vmm.vmm;
+
+	return nvif_vmm_raw_put(vmm, addr, range, PAGE_SHIFT);
+}
+
+static int
+nouveau_uvmm_vmm_unmap(struct nouveau_uvmm *uvmm,
+		       u64 addr, u64 range, bool sparse)
+{
+	struct nvif_vmm *vmm = &uvmm->vmm.vmm;
+
+	return nvif_vmm_raw_unmap(vmm, addr, range, PAGE_SHIFT, sparse);
+}
+
+static int
+nouveau_uvmm_vmm_map(struct nouveau_uvmm *uvmm,
+		     u64 addr, u64 range,
+		     u64 bo_offset, u8 kind,
+		     struct nouveau_mem *mem)
+{
+	struct nvif_vmm *vmm = &uvmm->vmm.vmm;
+	union {
+		struct gf100_vmm_map_v0 gf100;
+	} args;
+	u32 argc = 0;
+
+	switch (vmm->object.oclass) {
+	case NVIF_CLASS_VMM_GF100:
+	case NVIF_CLASS_VMM_GM200:
+	case NVIF_CLASS_VMM_GP100:
+		args.gf100.version = 0;
+		if (mem->mem.type & NVIF_MEM_VRAM)
+			args.gf100.vol = 0;
+		else
+			args.gf100.vol = 1;
+		args.gf100.ro = 0;
+		args.gf100.priv = 0;
+		args.gf100.kind = kind;
+		argc = sizeof(args.gf100);
+		break;
+	default:
+		WARN_ON(1);
+		return -ENOSYS;
+	}
+
+	return nvif_vmm_raw_map(vmm, addr, range, PAGE_SHIFT,
+				&args, argc,
+				&mem->mem, bo_offset);
+}
+
+static int
+nouveau_uvma_region_sparse_unref(struct nouveau_uvma_region *reg)
+{
+	u64 addr = reg->va.addr;
+	u64 range = reg->va.range;
+
+	return nouveau_uvmm_vmm_sparse_unref(reg->uvmm, addr, range);
+}
+
+static int
+nouveau_uvma_vmm_put(struct nouveau_uvma *uvma)
+{
+	u64 addr = uvma->va.va.addr;
+	u64 range = uvma->va.va.range;
+
+	return nouveau_uvmm_vmm_put(to_uvmm(uvma), addr, range);
+}
+
+static int
+nouveau_uvma_map(struct nouveau_uvma *uvma,
+		 struct nouveau_mem *mem)
+{
+	u64 addr = uvma->va.va.addr;
+	u64 offset = uvma->va.gem.offset;
+	u64 range = uvma->va.va.range;
+
+	return nouveau_uvmm_vmm_map(to_uvmm(uvma), addr, range,
+				    offset, uvma->kind, mem);
+}
+
+static int
+nouveau_uvma_unmap(struct nouveau_uvma *uvma)
+{
+	u64 addr = uvma->va.va.addr;
+	u64 range = uvma->va.va.range;
+	bool sparse = !!uvma->region;
+
+	if (drm_gpuva_invalidated(&uvma->va))
+		return 0;
+
+	return nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, sparse);
+}
+
+static int
+nouveau_uvma_alloc(struct nouveau_uvma **puvma)
+{
+	*puvma = kzalloc(sizeof(**puvma), GFP_KERNEL);
+	if (!*puvma)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void
+nouveau_uvma_free(struct nouveau_uvma *uvma)
+{
+	kfree(uvma);
+}
+
+static void
+nouveau_uvma_gem_get(struct nouveau_uvma *uvma)
+{
+	drm_gem_object_get(uvma->va.gem.obj);
+}
+
+static void
+nouveau_uvma_gem_put(struct nouveau_uvma *uvma)
+{
+	drm_gem_object_put(uvma->va.gem.obj);
+}
+
+static int
+nouveau_uvma_region_alloc(struct nouveau_uvma_region **preg)
+{
+	*preg = kzalloc(sizeof(**preg), GFP_KERNEL);
+	if (!*preg)
+		return -ENOMEM;
+
+	kref_init(&(*preg)->kref);
+
+	return 0;
+}
+
+static void
+nouveau_uvma_region_free(struct kref *kref)
+{
+	struct nouveau_uvma_region *reg =
+		container_of(kref, struct nouveau_uvma_region, kref);
+
+	kfree(reg);
+}
+
+static void
+nouveau_uvma_region_get(struct nouveau_uvma_region *reg)
+{
+	kref_get(&reg->kref);
+}
+
+static void
+nouveau_uvma_region_put(struct nouveau_uvma_region *reg)
+{
+	kref_put(&reg->kref, nouveau_uvma_region_free);
+}
+
+static int
+__nouveau_uvma_region_insert(struct nouveau_uvmm *uvmm,
+			     struct nouveau_uvma_region *reg)
+{
+	u64 addr = reg->va.addr;
+	u64 range = reg->va.range;
+	u64 last = addr + range - 1;
+	MA_STATE(mas, &uvmm->region_mt, addr, addr);
+
+	if (unlikely(mas_walk(&mas)))
+		return -EEXIST;
+
+	if (unlikely(mas.last < last))
+		return -EEXIST;
+
+	mas.index = addr;
+	mas.last = last;
+
+	mas_store_gfp(&mas, reg, GFP_KERNEL);
+
+	reg->uvmm = uvmm;
+
+	return 0;
+}
+
+static int
+nouveau_uvma_region_insert(struct nouveau_uvmm *uvmm,
+			   struct nouveau_uvma_region *reg,
+			   u64 addr, u64 range)
+{
+	int ret;
+
+	reg->uvmm = uvmm;
+	reg->va.addr = addr;
+	reg->va.range = range;
+
+	ret = __nouveau_uvma_region_insert(uvmm, reg);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void
+nouveau_uvma_region_remove(struct nouveau_uvma_region *reg)
+{
+	struct nouveau_uvmm *uvmm = reg->uvmm;
+	MA_STATE(mas, &uvmm->region_mt, reg->va.addr, 0);
+
+	mas_erase(&mas);
+}
+
+static int
+nouveau_uvma_region_create(struct nouveau_uvmm *uvmm,
+			   u64 addr, u64 range)
+{
+	struct nouveau_uvma_region *reg;
+	int ret;
+
+	if (!drm_gpuva_interval_empty(&uvmm->umgr, addr, range))
+		return -ENOSPC;
+
+	ret = nouveau_uvma_region_alloc(&reg);
+	if (ret)
+		return ret;
+
+	ret = nouveau_uvma_region_insert(uvmm, reg, addr, range);
+	if (ret)
+		goto err_free_region;
+
+	ret = nouveau_uvmm_vmm_sparse_ref(uvmm, addr, range);
+	if (ret)
+		goto err_region_remove;
+
+	return 0;
+
+err_region_remove:
+	nouveau_uvma_region_remove(reg);
+err_free_region:
+	nouveau_uvma_region_put(reg);
+	return ret;
+}
+
+static struct nouveau_uvma_region *
+nouveau_uvma_region_find_first(struct nouveau_uvmm *uvmm,
+			       u64 addr, u64 range)
+{
+	MA_STATE(mas, &uvmm->region_mt, addr, 0);
+
+	return mas_find(&mas, addr + range - 1);
+}
+
+static struct nouveau_uvma_region *
+nouveau_uvma_region_find(struct nouveau_uvmm *uvmm,
+			 u64 addr, u64 range)
+{
+	struct nouveau_uvma_region *reg;
+
+	reg = nouveau_uvma_region_find_first(uvmm, addr, range);
+	if (!reg)
+		return NULL;
+
+	if (reg->va.addr != addr ||
+	    reg->va.range != range)
+		return NULL;
+
+	return reg;
+}
+
+static bool
+nouveau_uvma_region_empty(struct nouveau_uvma_region *reg)
+{
+	struct nouveau_uvmm *uvmm = reg->uvmm;
+
+	return drm_gpuva_interval_empty(&uvmm->umgr,
+					reg->va.addr,
+					reg->va.range);
+}
+
+static int
+__nouveau_uvma_region_destroy(struct nouveau_uvma_region *reg)
+{
+	struct nouveau_uvmm *uvmm = reg->uvmm;
+	u64 addr = reg->va.addr;
+	u64 range = reg->va.range;
+
+	if (!nouveau_uvma_region_empty(reg))
+		return -EBUSY;
+
+	nouveau_uvma_region_remove(reg);
+	nouveau_uvmm_vmm_sparse_unref(uvmm, addr, range);
+	nouveau_uvma_region_put(reg);
+
+	return 0;
+}
+
+static int
+nouveau_uvma_region_destroy(struct nouveau_uvmm *uvmm,
+			    u64 addr, u64 range)
+{
+	struct nouveau_uvma_region *reg;
+
+	reg = nouveau_uvma_region_find(uvmm, addr, range);
+	if (!reg)
+		return -ENOENT;
+
+	return __nouveau_uvma_region_destroy(reg);
+}
+
+static void
+nouveau_uvma_region_dirty(struct nouveau_uvma_region *reg)
+{
+
+	init_completion(&reg->complete);
+	reg->dirty = true;
+}
+
+static void
+nouveau_uvma_region_complete(struct nouveau_uvma_region *reg)
+{
+	complete_all(&reg->complete);
+}
+
+static void
+op_map_prepare_unwind(struct nouveau_uvma *uvma)
+{
+	nouveau_uvma_gem_put(uvma);
+	drm_gpuva_remove(&uvma->va);
+	nouveau_uvma_free(uvma);
+}
+
+static void
+op_unmap_prepare_unwind(struct drm_gpuva *va)
+{
+	drm_gpuva_insert(va->mgr, va);
+}
+
+static void
+nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm,
+			       struct nouveau_uvma_prealloc *new,
+			       struct drm_gpuva_ops *ops,
+			       struct drm_gpuva_op *last,
+			       struct uvmm_map_args *args)
+{
+	struct drm_gpuva_op *op = last;
+	u64 vmm_get_start = args ? args->addr : 0;
+	u64 vmm_get_end = args ? args->addr + args->range : 0;
+
+	/* Unwind GPUVA space. */
+	drm_gpuva_for_each_op_from_reverse(op, ops) {
+		switch (op->op) {
+		case DRM_GPUVA_OP_MAP:
+			op_map_prepare_unwind(new->map);
+			break;
+		case DRM_GPUVA_OP_REMAP: {
+			struct drm_gpuva_op_remap *r = &op->remap;
+
+			if (r->next)
+				op_map_prepare_unwind(new->next);
+
+			if (r->prev)
+				op_map_prepare_unwind(new->prev);
+
+			op_unmap_prepare_unwind(r->unmap->va);
+			break;
+		}
+		case DRM_GPUVA_OP_UNMAP:
+			op_unmap_prepare_unwind(op->unmap.va);
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* Unmap operation don't allocate page tables, hence skip the following
+	 * page table unwind.
+	 */
+	if (!args)
+		return;
+
+	drm_gpuva_for_each_op(op, ops) {
+		switch (op->op) {
+		case DRM_GPUVA_OP_MAP: {
+			u64 vmm_get_range = vmm_get_end - vmm_get_start;
+
+			if (vmm_get_range)
+				nouveau_uvmm_vmm_put(uvmm, vmm_get_start,
+						     vmm_get_range);
+			break;
+		}
+		case DRM_GPUVA_OP_REMAP: {
+			struct drm_gpuva_op_remap *r = &op->remap;
+			struct drm_gpuva *va = r->unmap->va;
+			u64 ustart = va->va.addr;
+			u64 urange = va->va.range;
+			u64 uend = ustart + urange;
+
+			if (r->prev)
+				vmm_get_start = uend;
+
+			if (r->next)
+				vmm_get_end = ustart;
+
+			if (r->prev && r->next)
+				vmm_get_start = vmm_get_end = 0;
+
+			break;
+		}
+		case DRM_GPUVA_OP_UNMAP: {
+			struct drm_gpuva_op_unmap *u = &op->unmap;
+			struct drm_gpuva *va = u->va;
+			u64 ustart = va->va.addr;
+			u64 urange = va->va.range;
+			u64 uend = ustart + urange;
+
+			/* Nothing to do for mappings we merge with. */
+			if (uend == vmm_get_start ||
+			    ustart == vmm_get_end)
+				break;
+
+			if (ustart > vmm_get_start) {
+				u64 vmm_get_range = ustart - vmm_get_start;
+
+				nouveau_uvmm_vmm_put(uvmm, vmm_get_start,
+						     vmm_get_range);
+			}
+			vmm_get_start = uend;
+			break;
+		}
+		default:
+			break;
+		}
+
+		if (op == last)
+			break;
+	}
+}
+
+static void
+nouveau_uvmm_sm_map_prepare_unwind(struct nouveau_uvmm *uvmm,
+				   struct nouveau_uvma_prealloc *new,
+				   struct drm_gpuva_ops *ops,
+				   u64 addr, u64 range)
+{
+	struct drm_gpuva_op *last = drm_gpuva_last_op(ops);
+	struct uvmm_map_args args = {
+		.addr = addr,
+		.range = range,
+	};
+
+	nouveau_uvmm_sm_prepare_unwind(uvmm, new, ops, last, &args);
+}
+
+static void
+nouveau_uvmm_sm_unmap_prepare_unwind(struct nouveau_uvmm *uvmm,
+				     struct nouveau_uvma_prealloc *new,
+				     struct drm_gpuva_ops *ops)
+{
+	struct drm_gpuva_op *last = drm_gpuva_last_op(ops);
+
+	nouveau_uvmm_sm_prepare_unwind(uvmm, new, ops, last, NULL);
+}
+
+static int
+op_map_prepare(struct nouveau_uvmm *uvmm,
+	       struct nouveau_uvma **puvma,
+	       struct drm_gpuva_op_map *op,
+	       struct uvmm_map_args *args)
+{
+	struct nouveau_uvma *uvma;
+	int ret;
+
+	ret = nouveau_uvma_alloc(&uvma);
+	if (ret)
+		return ret;
+
+	uvma->region = args->region;
+	uvma->kind = args->kind;
+
+	drm_gpuva_map(&uvmm->umgr, &uvma->va, op);
+
+	/* Keep a reference until this uvma is destroyed. */
+	nouveau_uvma_gem_get(uvma);
+
+	*puvma = uvma;
+	return 0;
+}
+
+static void
+op_unmap_prepare(struct drm_gpuva_op_unmap *u)
+{
+	drm_gpuva_unmap(u);
+}
+
+static int
+nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm,
+			struct nouveau_uvma_prealloc *new,
+			struct drm_gpuva_ops *ops,
+			struct uvmm_map_args *args)
+{
+	struct drm_gpuva_op *op;
+	u64 vmm_get_start = args ? args->addr : 0;
+	u64 vmm_get_end = args ? args->addr + args->range : 0;
+	int ret;
+
+	drm_gpuva_for_each_op(op, ops) {
+		switch (op->op) {
+		case DRM_GPUVA_OP_MAP: {
+			u64 vmm_get_range = vmm_get_end - vmm_get_start;
+
+			ret = op_map_prepare(uvmm, &new->map, &op->map, args);
+			if (ret)
+				goto unwind;
+
+			if (args && vmm_get_range) {
+				ret = nouveau_uvmm_vmm_get(uvmm, vmm_get_start,
+							   vmm_get_range);
+				if (ret) {
+					op_map_prepare_unwind(new->map);
+					goto unwind;
+				}
+			}
+			break;
+		}
+		case DRM_GPUVA_OP_REMAP: {
+			struct drm_gpuva_op_remap *r = &op->remap;
+			struct drm_gpuva *va = r->unmap->va;
+			struct uvmm_map_args remap_args = {
+				.kind = uvma_from_va(va)->kind,
+				.region = uvma_from_va(va)->region,
+			};
+			u64 ustart = va->va.addr;
+			u64 urange = va->va.range;
+			u64 uend = ustart + urange;
+
+			op_unmap_prepare(r->unmap);
+
+			if (r->prev) {
+				ret = op_map_prepare(uvmm, &new->prev, r->prev,
+						     &remap_args);
+				if (ret)
+					goto unwind;
+
+				if (args)
+					vmm_get_start = uend;
+			}
+
+			if (r->next) {
+				ret = op_map_prepare(uvmm, &new->next, r->next,
+						     &remap_args);
+				if (ret) {
+					if (r->prev)
+						op_map_prepare_unwind(new->prev);
+					goto unwind;
+				}
+
+				if (args)
+					vmm_get_end = ustart;
+			}
+
+			if (args && (r->prev && r->next))
+				vmm_get_start = vmm_get_end = 0;
+
+			break;
+		}
+		case DRM_GPUVA_OP_UNMAP: {
+			struct drm_gpuva_op_unmap *u = &op->unmap;
+			struct drm_gpuva *va = u->va;
+			u64 ustart = va->va.addr;
+			u64 urange = va->va.range;
+			u64 uend = ustart + urange;
+
+			op_unmap_prepare(u);
+
+			if (!args)
+				break;
+
+			/* Nothing to do for mappings we merge with. */
+			if (uend == vmm_get_start ||
+			    ustart == vmm_get_end)
+				break;
+
+			if (ustart > vmm_get_start) {
+				u64 vmm_get_range = ustart - vmm_get_start;
+
+				ret = nouveau_uvmm_vmm_get(uvmm, vmm_get_start,
+							   vmm_get_range);
+				if (ret) {
+					op_unmap_prepare_unwind(va);
+					goto unwind;
+				}
+			}
+			vmm_get_start = uend;
+
+			break;
+		}
+		default:
+			ret = -EINVAL;
+			goto unwind;
+		}
+	}
+
+	return 0;
+
+unwind:
+	if (op != drm_gpuva_first_op(ops))
+		nouveau_uvmm_sm_prepare_unwind(uvmm, new, ops,
+					       drm_gpuva_prev_op(op),
+					       args);
+	return ret;
+}
+
+static int
+nouveau_uvmm_sm_map_prepare(struct nouveau_uvmm *uvmm,
+			    struct nouveau_uvma_prealloc *new,
+			    struct nouveau_uvma_region *region,
+			    struct drm_gpuva_ops *ops,
+			    u64 addr, u64 range, u8 kind)
+{
+	struct uvmm_map_args args = {
+		.region = region,
+		.addr = addr,
+		.range = range,
+		.kind = kind,
+	};
+
+	return nouveau_uvmm_sm_prepare(uvmm, new, ops, &args);
+}
+
+static int
+nouveau_uvmm_sm_unmap_prepare(struct nouveau_uvmm *uvmm,
+			      struct nouveau_uvma_prealloc *new,
+			      struct drm_gpuva_ops *ops)
+{
+	return nouveau_uvmm_sm_prepare(uvmm, new, ops, NULL);
+}
+
+static struct drm_gem_object *
+op_gem_obj(struct drm_gpuva_op *op)
+{
+	switch (op->op) {
+	case DRM_GPUVA_OP_MAP:
+		return op->map.gem.obj;
+	case DRM_GPUVA_OP_REMAP:
+		/* Actually, we're looking for the GEMs backing remap.prev and
+		 * remap.next, but since this is a remap they're identical to
+		 * the GEM backing the unmapped GPUVA.
+		 */
+		return op->remap.unmap->va->gem.obj;
+	case DRM_GPUVA_OP_UNMAP:
+		return op->unmap.va->gem.obj;
+	default:
+		WARN(1, "Unknown operation.\n");
+		return NULL;
+	}
+}
+
+static void
+op_map(struct nouveau_uvma *uvma)
+{
+	struct nouveau_bo *nvbo = nouveau_gem_object(uvma->va.gem.obj);
+
+	nouveau_uvma_map(uvma, nouveau_mem(nvbo->bo.resource));
+}
+
+static void
+op_unmap(struct drm_gpuva_op_unmap *u)
+{
+	struct drm_gpuva *va = u->va;
+	struct nouveau_uvma *uvma = uvma_from_va(va);
+
+	/* nouveau_uvma_unmap() does not unmap if backing BO is evicted. */
+	if (!u->keep)
+		nouveau_uvma_unmap(uvma);
+}
+
+static void
+op_unmap_range(struct drm_gpuva_op_unmap *u,
+	       u64 addr, u64 range)
+{
+	struct nouveau_uvma *uvma = uvma_from_va(u->va);
+	bool sparse = !!uvma->region;
+
+	if (!drm_gpuva_invalidated(u->va))
+		nouveau_uvmm_vmm_unmap(to_uvmm(uvma), addr, range, sparse);
+}
+
+static void
+op_remap(struct drm_gpuva_op_remap *r,
+	 struct nouveau_uvma_prealloc *new)
+{
+	struct drm_gpuva_op_unmap *u = r->unmap;
+	struct nouveau_uvma *uvma = uvma_from_va(u->va);
+	u64 addr = uvma->va.va.addr;
+	u64 range = uvma->va.va.range;
+
+	if (r->prev)
+		addr = r->prev->va.addr + r->prev->va.range;
+
+	if (r->next)
+		range = r->next->va.addr - addr;
+
+	op_unmap_range(u, addr, range);
+}
+
+static int
+nouveau_uvmm_sm(struct nouveau_uvmm *uvmm,
+		struct nouveau_uvma_prealloc *new,
+		struct drm_gpuva_ops *ops)
+{
+	struct drm_gpuva_op *op;
+
+	drm_gpuva_for_each_op(op, ops) {
+		switch (op->op) {
+		case DRM_GPUVA_OP_MAP:
+			op_map(new->map);
+			break;
+		case DRM_GPUVA_OP_REMAP:
+			op_remap(&op->remap, new);
+			break;
+		case DRM_GPUVA_OP_UNMAP:
+			op_unmap(&op->unmap);
+			break;
+		default:
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int
+nouveau_uvmm_sm_map(struct nouveau_uvmm *uvmm,
+		    struct nouveau_uvma_prealloc *new,
+		    struct drm_gpuva_ops *ops)
+{
+	return nouveau_uvmm_sm(uvmm, new, ops);
+}
+
+static int
+nouveau_uvmm_sm_unmap(struct nouveau_uvmm *uvmm,
+		      struct nouveau_uvma_prealloc *new,
+		      struct drm_gpuva_ops *ops)
+{
+	return nouveau_uvmm_sm(uvmm, new, ops);
+}
+
+static void
+nouveau_uvmm_sm_cleanup(struct nouveau_uvmm *uvmm,
+			struct nouveau_uvma_prealloc *new,
+			struct drm_gpuva_ops *ops, bool unmap)
+{
+	struct drm_gpuva_op *op;
+
+	drm_gpuva_for_each_op(op, ops) {
+		switch (op->op) {
+		case DRM_GPUVA_OP_MAP:
+			break;
+		case DRM_GPUVA_OP_REMAP: {
+			struct drm_gpuva_op_remap *r = &op->remap;
+			struct drm_gpuva_op_map *p = r->prev;
+			struct drm_gpuva_op_map *n = r->next;
+			struct drm_gpuva *va = r->unmap->va;
+			struct nouveau_uvma *uvma = uvma_from_va(va);
+
+			if (unmap) {
+				u64 addr = va->va.addr;
+				u64 end = addr + va->va.range;
+
+				if (p)
+					addr = p->va.addr + p->va.range;
+
+				if (n)
+					end = n->va.addr;
+
+				nouveau_uvmm_vmm_put(uvmm, addr, end - addr);
+			}
+
+			nouveau_uvma_gem_put(uvma);
+			nouveau_uvma_free(uvma);
+			break;
+		}
+		case DRM_GPUVA_OP_UNMAP: {
+			struct drm_gpuva_op_unmap *u = &op->unmap;
+			struct drm_gpuva *va = u->va;
+			struct nouveau_uvma *uvma = uvma_from_va(va);
+
+			if (unmap)
+				nouveau_uvma_vmm_put(uvma);
+
+			nouveau_uvma_gem_put(uvma);
+			nouveau_uvma_free(uvma);
+			break;
+		}
+		default:
+			break;
+		}
+	}
+}
+
+static void
+nouveau_uvmm_sm_map_cleanup(struct nouveau_uvmm *uvmm,
+			    struct nouveau_uvma_prealloc *new,
+			    struct drm_gpuva_ops *ops)
+{
+	nouveau_uvmm_sm_cleanup(uvmm, new, ops, false);
+}
+
+static void
+nouveau_uvmm_sm_unmap_cleanup(struct nouveau_uvmm *uvmm,
+			      struct nouveau_uvma_prealloc *new,
+			      struct drm_gpuva_ops *ops)
+{
+	nouveau_uvmm_sm_cleanup(uvmm, new, ops, true);
+}
+
+static int
+nouveau_uvmm_validate_range(struct nouveau_uvmm *uvmm, u64 addr, u64 range)
+{
+	u64 end = addr + range;
+	u64 kernel_managed_end = uvmm->kernel_managed_addr +
+				 uvmm->kernel_managed_size;
+
+	if (addr & ~PAGE_MASK)
+		return -EINVAL;
+
+	if (range & ~PAGE_MASK)
+		return -EINVAL;
+
+	if (end <= addr)
+		return -EINVAL;
+
+	if (addr < NOUVEAU_VA_SPACE_START ||
+	    end > NOUVEAU_VA_SPACE_END)
+		return -EINVAL;
+
+	if (addr < kernel_managed_end &&
+	    end > uvmm->kernel_managed_addr)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int
+nouveau_uvmm_bind_job_alloc(struct nouveau_uvmm_bind_job **pjob)
+{
+	*pjob = kzalloc(sizeof(**pjob), GFP_KERNEL);
+	if (!*pjob)
+		return -ENOMEM;
+
+	kref_init(&(*pjob)->kref);
+
+	return 0;
+}
+
+static void
+nouveau_uvmm_bind_job_free(struct kref *kref)
+{
+	struct nouveau_uvmm_bind_job *job =
+		container_of(kref, struct nouveau_uvmm_bind_job, kref);
+
+	nouveau_job_free(&job->base);
+	kfree(job);
+}
+
+static void
+nouveau_uvmm_bind_job_get(struct nouveau_uvmm_bind_job *job)
+{
+	kref_get(&job->kref);
+}
+
+static void
+nouveau_uvmm_bind_job_put(struct nouveau_uvmm_bind_job *job)
+{
+	kref_put(&job->kref, nouveau_uvmm_bind_job_free);
+}
+
+static int
+bind_validate_op(struct nouveau_job *job,
+		 struct bind_job_op *op)
+{
+	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli);
+	struct drm_gem_object *obj = op->gem.obj;
+
+	if (op->op == OP_MAP) {
+		if (op->gem.offset & ~PAGE_MASK)
+			return -EINVAL;
+
+		if (obj->size <= op->gem.offset)
+			return -EINVAL;
+
+		if (op->va.range > (obj->size - op->gem.offset))
+			return -EINVAL;
+	}
+
+	return nouveau_uvmm_validate_range(uvmm, op->va.addr, op->va.range);
+}
+
+static void
+bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range)
+{
+	struct nouveau_uvmm_bind_job *bind_job;
+	struct nouveau_sched_entity *entity = job->entity;
+	struct bind_job_op *op;
+	u64 end = addr + range;
+
+again:
+	spin_lock(&entity->job.list.lock);
+	list_for_each_entry(bind_job, &entity->job.list.head, entry) {
+		list_for_each_op(op, &bind_job->ops) {
+			if (op->op == OP_UNMAP) {
+				u64 op_addr = op->va.addr;
+				u64 op_end = op_addr + op->va.range;
+
+				if (!(end <= op_addr || addr >= op_end)) {
+					nouveau_uvmm_bind_job_get(bind_job);
+					spin_unlock(&entity->job.list.lock);
+					wait_for_completion(&bind_job->complete);
+					nouveau_uvmm_bind_job_put(bind_job);
+					goto again;
+				}
+			}
+		}
+	}
+	spin_unlock(&entity->job.list.lock);
+}
+
+static int
+bind_validate_map_common(struct nouveau_job *job, u64 addr, u64 range,
+			 bool sparse)
+{
+	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli);
+	struct nouveau_uvma_region *reg;
+	u64 reg_addr, reg_end;
+	u64 end = addr + range;
+
+again:
+	nouveau_uvmm_lock(uvmm);
+	reg = nouveau_uvma_region_find_first(uvmm, addr, range);
+	if (!reg) {
+		nouveau_uvmm_unlock(uvmm);
+		return 0;
+	}
+
+	/* Generally, job submits are serialized, hence only
+	 * dirty regions can be modified concurrently.
+	 */
+	if (reg->dirty) {
+		nouveau_uvma_region_get(reg);
+		nouveau_uvmm_unlock(uvmm);
+		wait_for_completion(&reg->complete);
+		nouveau_uvma_region_put(reg);
+		goto again;
+	}
+	nouveau_uvmm_unlock(uvmm);
+
+	if (sparse)
+		return -ENOSPC;
+
+	reg_addr = reg->va.addr;
+	reg_end = reg_addr + reg->va.range;
+
+	/* Make sure the mapping is either outside of a
+	 * region or fully enclosed by a region.
+	 */
+	if (reg_addr > addr || reg_end < end)
+		return -ENOSPC;
+
+	return 0;
+}
+
+static int
+bind_validate_region(struct nouveau_job *job)
+{
+	struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
+	struct bind_job_op *op;
+	int ret;
+
+	list_for_each_op(op, &bind_job->ops) {
+		u64 op_addr = op->va.addr;
+		u64 op_range = op->va.range;
+		bool sparse = false;
+
+		switch (op->op) {
+		case OP_MAP_SPARSE:
+			sparse = true;
+			bind_validate_map_sparse(job, op_addr, op_range);
+			fallthrough;
+		case OP_MAP:
+			ret = bind_validate_map_common(job, op_addr, op_range,
+						       sparse);
+			if (ret)
+				return ret;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static void
+bind_link_gpuvas(struct drm_gpuva_ops *ops, struct nouveau_uvma_prealloc *new)
+{
+	struct drm_gpuva_op *op;
+
+	drm_gpuva_for_each_op(op, ops) {
+		switch (op->op) {
+		case DRM_GPUVA_OP_MAP:
+			drm_gpuva_link(&new->map->va);
+			break;
+		case DRM_GPUVA_OP_REMAP:
+			if (op->remap.prev)
+				drm_gpuva_link(&new->prev->va);
+			if (op->remap.next)
+				drm_gpuva_link(&new->next->va);
+			drm_gpuva_unlink(op->remap.unmap->va);
+			break;
+		case DRM_GPUVA_OP_UNMAP:
+			drm_gpuva_unlink(op->unmap.va);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static int
+nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
+{
+	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli);
+	struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
+	struct nouveau_sched_entity *entity = job->entity;
+	struct drm_exec *exec = &job->exec;
+	struct bind_job_op *op;
+	int ret;
+
+	list_for_each_op(op, &bind_job->ops) {
+		if (op->op == OP_MAP) {
+			op->gem.obj = drm_gem_object_lookup(job->file_priv,
+							    op->gem.handle);
+			if (!op->gem.obj)
+				return -ENOENT;
+		}
+
+		ret = bind_validate_op(job, op);
+		if (ret)
+			return ret;
+	}
+
+	/* If a sparse region or mapping overlaps a dirty region, we need to
+	 * wait for the region to complete the unbind process. This is due to
+	 * how page table management is currently implemented. A future
+	 * implementation might change this.
+	 */
+	ret = bind_validate_region(job);
+	if (ret)
+		return ret;
+
+	/* Once we start modifying the GPU VA space we need to keep holding the
+	 * uvmm lock until we can't fail anymore. This is due to the set of GPU
+	 * VA space changes must appear atomically and we need to be able to
+	 * unwind all GPU VA space changes on failure.
+	 */
+	nouveau_uvmm_lock(uvmm);
+	list_for_each_op(op, &bind_job->ops) {
+		switch (op->op) {
+		case OP_MAP_SPARSE:
+			ret = nouveau_uvma_region_create(uvmm,
+							 op->va.addr,
+							 op->va.range);
+			if (ret)
+				goto unwind_continue;
+
+			break;
+		case OP_UNMAP_SPARSE:
+			op->reg = nouveau_uvma_region_find(uvmm, op->va.addr,
+							   op->va.range);
+			if (!op->reg || op->reg->dirty) {
+				ret = -ENOENT;
+				goto unwind_continue;
+			}
+
+			op->ops = drm_gpuva_sm_unmap_ops_create(&uvmm->umgr,
+								op->va.addr,
+								op->va.range);
+			if (IS_ERR(op->ops)) {
+				ret = PTR_ERR(op->ops);
+				goto unwind_continue;
+			}
+
+			ret = nouveau_uvmm_sm_unmap_prepare(uvmm, &op->new,
+							    op->ops);
+			if (ret) {
+				drm_gpuva_ops_free(&uvmm->umgr, op->ops);
+				op->ops = NULL;
+				op->reg = NULL;
+				goto unwind_continue;
+			}
+
+			nouveau_uvma_region_dirty(op->reg);
+
+			break;
+		case OP_MAP: {
+			struct nouveau_uvma_region *reg;
+
+			reg = nouveau_uvma_region_find_first(uvmm,
+							     op->va.addr,
+							     op->va.range);
+			if (reg) {
+				u64 reg_addr = reg->va.addr;
+				u64 reg_end = reg_addr + reg->va.range;
+				u64 op_addr = op->va.addr;
+				u64 op_end = op_addr + op->va.range;
+
+				if (unlikely(reg->dirty)) {
+					ret = -EINVAL;
+					goto unwind_continue;
+				}
+
+				/* Make sure the mapping is either outside of a
+				 * region or fully enclosed by a region.
+				 */
+				if (reg_addr > op_addr || reg_end < op_end) {
+					ret = -ENOSPC;
+					goto unwind_continue;
+				}
+			}
+
+			op->ops = drm_gpuva_sm_map_ops_create(&uvmm->umgr,
+							      op->va.addr,
+							      op->va.range,
+							      op->gem.obj,
+							      op->gem.offset);
+			if (IS_ERR(op->ops)) {
+				ret = PTR_ERR(op->ops);
+				goto unwind_continue;
+			}
+
+			ret = nouveau_uvmm_sm_map_prepare(uvmm, &op->new,
+							  reg, op->ops,
+							  op->va.addr,
+							  op->va.range,
+							  op->flags & 0xff);
+			if (ret) {
+				drm_gpuva_ops_free(&uvmm->umgr, op->ops);
+				op->ops = NULL;
+				goto unwind_continue;
+			}
+
+			break;
+		}
+		case OP_UNMAP:
+			op->ops = drm_gpuva_sm_unmap_ops_create(&uvmm->umgr,
+								op->va.addr,
+								op->va.range);
+			if (IS_ERR(op->ops)) {
+				ret = PTR_ERR(op->ops);
+				goto unwind_continue;
+			}
+
+			ret = nouveau_uvmm_sm_unmap_prepare(uvmm, &op->new,
+							    op->ops);
+			if (ret) {
+				drm_gpuva_ops_free(&uvmm->umgr, op->ops);
+				op->ops = NULL;
+				goto unwind_continue;
+			}
+
+			break;
+		default:
+			ret = -EINVAL;
+			goto unwind_continue;
+		}
+	}
+
+	drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
+			    DRM_EXEC_IGNORE_DUPLICATES);
+	drm_exec_until_all_locked(exec) {
+		list_for_each_op(op, &bind_job->ops) {
+			struct drm_gpuva_op *va_op;
+
+			if (IS_ERR_OR_NULL(op->ops))
+				continue;
+
+			drm_gpuva_for_each_op(va_op, op->ops) {
+				struct drm_gem_object *obj = op_gem_obj(va_op);
+
+				if (unlikely(!obj))
+					continue;
+
+				ret = drm_exec_prepare_obj(exec, obj, 1);
+				drm_exec_retry_on_contention(exec);
+				if (ret) {
+					op = list_last_op(&bind_job->ops);
+					goto unwind;
+				}
+			}
+		}
+	}
+
+	list_for_each_op(op, &bind_job->ops) {
+		struct drm_gpuva_op *va_op;
+
+		if (IS_ERR_OR_NULL(op->ops))
+			continue;
+
+		drm_gpuva_for_each_op(va_op, op->ops) {
+			struct drm_gem_object *obj = op_gem_obj(va_op);
+
+			if (unlikely(!obj))
+				continue;
+
+			/* Don't validate GEMs backing mappings we're about to
+			 * unmap, it's not worth the effort.
+			 */
+			if (unlikely(va_op->op == DRM_GPUVA_OP_UNMAP))
+				continue;
+
+			ret = nouveau_bo_validate(nouveau_gem_object(obj),
+						  true, false);
+			if (ret) {
+				op = list_last_op(&bind_job->ops);
+				goto unwind;
+			}
+		}
+	}
+
+	/* Link and unlink GPUVAs while holding the dma_resv lock.
+	 *
+	 * As long as we validate() all GEMs and add fences to all GEMs DMA
+	 * reservations backing map and remap operations we can be sure there
+	 * won't be any concurrent (in)validations during job execution, hence
+	 * we're safe to check drm_gpuva_invalidated() within the fence
+	 * signalling critical path without holding a separate lock.
+	 *
+	 * GPUVAs about to be unmapped are safe as well, since they're unlinked
+	 * already.
+	 *
+	 * GEMs from map and remap operations must be validated before linking
+	 * their corresponding mappings to prevent the actual PT update to
+	 * happen right away in validate() rather than asynchronously as
+	 * intended.
+	 *
+	 * Note that after linking and unlinking the GPUVAs in this loop this
+	 * function cannot fail anymore, hence there is no need for an unwind
+	 * path.
+	 */
+	list_for_each_op(op, &bind_job->ops) {
+		switch (op->op) {
+		case OP_UNMAP_SPARSE:
+		case OP_MAP:
+		case OP_UNMAP:
+			bind_link_gpuvas(op->ops, &op->new);
+			break;
+		default:
+			break;
+		}
+	}
+	nouveau_uvmm_unlock(uvmm);
+
+	spin_lock(&entity->job.list.lock);
+	list_add(&bind_job->entry, &entity->job.list.head);
+	spin_unlock(&entity->job.list.lock);
+
+	return 0;
+
+unwind_continue:
+	op = list_prev_op(op);
+unwind:
+	list_for_each_op_from_reverse(op, &bind_job->ops) {
+		switch (op->op) {
+		case OP_MAP_SPARSE:
+			nouveau_uvma_region_destroy(uvmm, op->va.addr,
+						    op->va.range);
+			break;
+		case OP_UNMAP_SPARSE:
+			__nouveau_uvma_region_insert(uvmm, op->reg);
+			nouveau_uvmm_sm_unmap_prepare_unwind(uvmm, &op->new,
+							     op->ops);
+			break;
+		case OP_MAP:
+			nouveau_uvmm_sm_map_prepare_unwind(uvmm, &op->new,
+							   op->ops,
+							   op->va.addr,
+							   op->va.range);
+			break;
+		case OP_UNMAP:
+			nouveau_uvmm_sm_unmap_prepare_unwind(uvmm, &op->new,
+							     op->ops);
+			break;
+		}
+
+		drm_gpuva_ops_free(&uvmm->umgr, op->ops);
+		op->ops = NULL;
+		op->reg = NULL;
+	}
+
+	nouveau_uvmm_unlock(uvmm);
+	drm_exec_fini(exec);
+	return ret;
+}
+
+static void
+nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job)
+{
+	struct drm_exec *exec = &job->exec;
+	struct drm_gem_object *obj;
+	unsigned long index;
+
+	drm_exec_for_each_locked_object(exec, index, obj)
+		dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage);
+
+	drm_exec_fini(exec);
+}
+
+static struct dma_fence *
+nouveau_uvmm_bind_job_run(struct nouveau_job *job)
+{
+	struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
+	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli);
+	struct bind_job_op *op;
+	int ret = 0;
+
+	list_for_each_op(op, &bind_job->ops) {
+		switch (op->op) {
+		case OP_MAP_SPARSE:
+			/* noop */
+			break;
+		case OP_MAP:
+			ret = nouveau_uvmm_sm_map(uvmm, &op->new, op->ops);
+			if (ret)
+				goto out;
+			break;
+		case OP_UNMAP_SPARSE:
+			fallthrough;
+		case OP_UNMAP:
+			ret = nouveau_uvmm_sm_unmap(uvmm, &op->new, op->ops);
+			if (ret)
+				goto out;
+			break;
+		}
+	}
+
+out:
+	if (ret)
+		NV_PRINTK(err, job->cli, "bind job failed: %d\n", ret);
+	return ERR_PTR(ret);
+}
+
+static void
+nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work)
+{
+	struct nouveau_uvmm_bind_job *bind_job =
+		container_of(work, struct nouveau_uvmm_bind_job, work);
+	struct nouveau_job *job = &bind_job->base;
+	struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli);
+	struct nouveau_sched_entity *entity = job->entity;
+	struct bind_job_op *op, *next;
+
+	list_for_each_op(op, &bind_job->ops) {
+		struct drm_gem_object *obj = op->gem.obj;
+
+		/* When nouveau_uvmm_bind_job_submit() fails op->ops and op->reg
+		 * will be NULL, hence skip the cleanup.
+		 */
+		switch (op->op) {
+		case OP_MAP_SPARSE:
+			/* noop */
+			break;
+		case OP_UNMAP_SPARSE:
+			if (!IS_ERR_OR_NULL(op->ops))
+				nouveau_uvmm_sm_unmap_cleanup(uvmm, &op->new,
+							      op->ops);
+
+			if (op->reg) {
+				nouveau_uvma_region_sparse_unref(op->reg);
+				nouveau_uvmm_lock(uvmm);
+				nouveau_uvma_region_remove(op->reg);
+				nouveau_uvmm_unlock(uvmm);
+				nouveau_uvma_region_complete(op->reg);
+				nouveau_uvma_region_put(op->reg);
+			}
+
+			break;
+		case OP_MAP:
+			if (!IS_ERR_OR_NULL(op->ops))
+				nouveau_uvmm_sm_map_cleanup(uvmm, &op->new,
+							    op->ops);
+			break;
+		case OP_UNMAP:
+			if (!IS_ERR_OR_NULL(op->ops))
+				nouveau_uvmm_sm_unmap_cleanup(uvmm, &op->new,
+							      op->ops);
+			break;
+		}
+
+		if (!IS_ERR_OR_NULL(op->ops))
+			drm_gpuva_ops_free(&uvmm->umgr, op->ops);
+
+		if (obj)
+			drm_gem_object_put(obj);
+	}
+
+	spin_lock(&entity->job.list.lock);
+	list_del(&bind_job->entry);
+	spin_unlock(&entity->job.list.lock);
+
+	complete_all(&bind_job->complete);
+	wake_up(&entity->job.wq);
+
+	/* Remove and free ops after removing the bind job from the job list to
+	 * avoid races against bind_validate_map_sparse().
+	 */
+	list_for_each_op_safe(op, next, &bind_job->ops) {
+		list_del(&op->entry);
+		kfree(op);
+	}
+
+	nouveau_uvmm_bind_job_put(bind_job);
+}
+
+static void
+nouveau_uvmm_bind_job_free_qwork(struct nouveau_job *job)
+{
+	struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
+	struct nouveau_sched_entity *entity = job->entity;
+
+	nouveau_sched_entity_qwork(entity, &bind_job->work);
+}
+
+static struct nouveau_job_ops nouveau_bind_job_ops = {
+	.submit = nouveau_uvmm_bind_job_submit,
+	.armed_submit = nouveau_uvmm_bind_job_armed_submit,
+	.run = nouveau_uvmm_bind_job_run,
+	.free = nouveau_uvmm_bind_job_free_qwork,
+};
+
+static int
+bind_job_op_from_uop(struct bind_job_op **pop,
+		     struct drm_nouveau_vm_bind_op *uop)
+{
+	struct bind_job_op *op;
+
+	op = *pop = kzalloc(sizeof(*op), GFP_KERNEL);
+	if (!op)
+		return -ENOMEM;
+
+	switch (uop->op) {
+	case OP_MAP:
+		op->op = uop->flags & DRM_NOUVEAU_VM_BIND_SPARSE ?
+			 OP_MAP_SPARSE : OP_MAP;
+		break;
+	case OP_UNMAP:
+		op->op = uop->flags & DRM_NOUVEAU_VM_BIND_SPARSE ?
+			 OP_UNMAP_SPARSE : OP_UNMAP;
+		break;
+	default:
+		op->op = uop->op;
+		break;
+	}
+
+	op->flags = uop->flags;
+	op->va.addr = uop->addr;
+	op->va.range = uop->range;
+	op->gem.handle = uop->handle;
+	op->gem.offset = uop->bo_offset;
+
+	return 0;
+}
+
+static void
+bind_job_ops_free(struct list_head *ops)
+{
+	struct bind_job_op *op, *next;
+
+	list_for_each_op_safe(op, next, ops) {
+		list_del(&op->entry);
+		kfree(op);
+	}
+}
+
+static int
+nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob,
+			   struct nouveau_uvmm_bind_job_args *__args)
+{
+	struct nouveau_uvmm_bind_job *job;
+	struct nouveau_job_args args = {};
+	struct bind_job_op *op;
+	int i, ret;
+
+	ret = nouveau_uvmm_bind_job_alloc(&job);
+	if (ret)
+		return ret;
+
+	INIT_LIST_HEAD(&job->ops);
+	INIT_LIST_HEAD(&job->entry);
+
+	for (i = 0; i < __args->op.count; i++) {
+		ret = bind_job_op_from_uop(&op, &__args->op.s[i]);
+		if (ret)
+			goto err_free;
+
+		list_add_tail(&op->entry, &job->ops);
+	}
+
+	init_completion(&job->complete);
+	INIT_WORK(&job->work, nouveau_uvmm_bind_job_free_work_fn);
+
+	args.sched_entity = __args->sched_entity;
+	args.file_priv = __args->file_priv;
+
+	args.in_sync.count = __args->in_sync.count;
+	args.in_sync.s = __args->in_sync.s;
+
+	args.out_sync.count = __args->out_sync.count;
+	args.out_sync.s = __args->out_sync.s;
+
+	args.sync = !(__args->flags & DRM_NOUVEAU_VM_BIND_RUN_ASYNC);
+	args.ops = &nouveau_bind_job_ops;
+	args.resv_usage = DMA_RESV_USAGE_BOOKKEEP;
+
+	ret = nouveau_job_init(&job->base, &args);
+	if (ret)
+		goto err_free;
+
+	*pjob = job;
+	return 0;
+
+err_free:
+	bind_job_ops_free(&job->ops);
+	kfree(job);
+	*pjob = NULL;
+
+	return ret;
+}
+
+int
+nouveau_uvmm_ioctl_vm_init(struct drm_device *dev,
+			   void *data,
+			   struct drm_file *file_priv)
+{
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
+	struct drm_nouveau_vm_init *init = data;
+
+	return nouveau_uvmm_init(&cli->uvmm, cli, init->kernel_managed_addr,
+				 init->kernel_managed_size);
+}
+
+static int
+nouveau_uvmm_vm_bind(struct nouveau_uvmm_bind_job_args *args)
+{
+	struct nouveau_uvmm_bind_job *job;
+	int ret;
+
+	ret = nouveau_uvmm_bind_job_init(&job, args);
+	if (ret)
+		return ret;
+
+	ret = nouveau_job_submit(&job->base);
+	if (ret)
+		goto err_job_fini;
+
+	return 0;
+
+err_job_fini:
+	nouveau_job_fini(&job->base);
+	return ret;
+}
+
+static int
+nouveau_uvmm_vm_bind_ucopy(struct nouveau_uvmm_bind_job_args *args,
+			   struct drm_nouveau_vm_bind *req)
+{
+	struct drm_nouveau_sync **s;
+	u32 inc = req->wait_count;
+	u64 ins = req->wait_ptr;
+	u32 outc = req->sig_count;
+	u64 outs = req->sig_ptr;
+	u32 opc = req->op_count;
+	u64 ops = req->op_ptr;
+	int ret;
+
+	args->flags = req->flags;
+
+	if (opc) {
+		args->op.count = opc;
+		args->op.s = u_memcpya(ops, opc,
+				       sizeof(*args->op.s));
+		if (IS_ERR(args->op.s))
+			return PTR_ERR(args->op.s);
+	}
+
+	if (inc) {
+		s = &args->in_sync.s;
+
+		args->in_sync.count = inc;
+		*s = u_memcpya(ins, inc, sizeof(**s));
+		if (IS_ERR(*s)) {
+			ret = PTR_ERR(*s);
+			goto err_free_ops;
+		}
+	}
+
+	if (outc) {
+		s = &args->out_sync.s;
+
+		args->out_sync.count = outc;
+		*s = u_memcpya(outs, outc, sizeof(**s));
+		if (IS_ERR(*s)) {
+			ret = PTR_ERR(*s);
+			goto err_free_ins;
+		}
+	}
+
+	return 0;
+
+err_free_ops:
+	u_free(args->op.s);
+err_free_ins:
+	u_free(args->in_sync.s);
+	return ret;
+}
+
+static void
+nouveau_uvmm_vm_bind_ufree(struct nouveau_uvmm_bind_job_args *args)
+{
+	u_free(args->op.s);
+	u_free(args->in_sync.s);
+	u_free(args->out_sync.s);
+}
+
+int
+nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev,
+			   void *data,
+			   struct drm_file *file_priv)
+{
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
+	struct nouveau_uvmm_bind_job_args args = {};
+	struct drm_nouveau_vm_bind *req = data;
+	int ret = 0;
+
+	if (unlikely(!nouveau_cli_uvmm_locked(cli)))
+		return -ENOSYS;
+
+	ret = nouveau_uvmm_vm_bind_ucopy(&args, req);
+	if (ret)
+		return ret;
+
+	args.sched_entity = &cli->sched_entity;
+	args.file_priv = file_priv;
+
+	ret = nouveau_uvmm_vm_bind(&args);
+	if (ret)
+		goto out_free_args;
+
+out_free_args:
+	nouveau_uvmm_vm_bind_ufree(&args);
+	return ret;
+}
+
+void
+nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbo, struct nouveau_mem *mem)
+{
+	struct drm_gem_object *obj = &nvbo->bo.base;
+	struct drm_gpuva *va;
+
+	dma_resv_assert_held(obj->resv);
+
+	drm_gem_for_each_gpuva(va, obj) {
+		struct nouveau_uvma *uvma = uvma_from_va(va);
+
+		nouveau_uvma_map(uvma, mem);
+		drm_gpuva_invalidate(va, false);
+	}
+}
+
+void
+nouveau_uvmm_bo_unmap_all(struct nouveau_bo *nvbo)
+{
+	struct drm_gem_object *obj = &nvbo->bo.base;
+	struct drm_gpuva *va;
+
+	dma_resv_assert_held(obj->resv);
+
+	drm_gem_for_each_gpuva(va, obj) {
+		struct nouveau_uvma *uvma = uvma_from_va(va);
+
+		nouveau_uvma_unmap(uvma);
+		drm_gpuva_invalidate(va, true);
+	}
+}
+
+int
+nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
+		  u64 kernel_managed_addr, u64 kernel_managed_size)
+{
+	int ret;
+	u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size;
+
+	mutex_init(&uvmm->mutex);
+	dma_resv_init(&uvmm->resv);
+	mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN);
+	mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex);
+
+	mutex_lock(&cli->mutex);
+
+	if (unlikely(cli->uvmm.disabled)) {
+		ret = -ENOSYS;
+		goto out_unlock;
+	}
+
+	if (kernel_managed_end <= kernel_managed_addr) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (kernel_managed_end > NOUVEAU_VA_SPACE_END) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	uvmm->kernel_managed_addr = kernel_managed_addr;
+	uvmm->kernel_managed_size = kernel_managed_size;
+
+	drm_gpuva_manager_init(&uvmm->umgr, cli->name,
+			       NOUVEAU_VA_SPACE_START,
+			       NOUVEAU_VA_SPACE_END,
+			       kernel_managed_addr, kernel_managed_size,
+			       NULL);
+
+	ret = nvif_vmm_ctor(&cli->mmu, "uvmm",
+			    cli->vmm.vmm.object.oclass, RAW,
+			    kernel_managed_addr, kernel_managed_size,
+			    NULL, 0, &cli->uvmm.vmm.vmm);
+	if (ret)
+		goto out_free_gpuva_mgr;
+
+	cli->uvmm.vmm.cli = cli;
+	mutex_unlock(&cli->mutex);
+
+	return 0;
+
+out_free_gpuva_mgr:
+	drm_gpuva_manager_destroy(&uvmm->umgr);
+out_unlock:
+	mutex_unlock(&cli->mutex);
+	return ret;
+}
+
+void
+nouveau_uvmm_fini(struct nouveau_uvmm *uvmm)
+{
+	MA_STATE(mas, &uvmm->region_mt, 0, 0);
+	struct nouveau_uvma_region *reg;
+	struct nouveau_cli *cli = uvmm->vmm.cli;
+	struct nouveau_sched_entity *entity = &cli->sched_entity;
+	struct drm_gpuva *va, *next;
+
+	if (!cli)
+		return;
+
+	rmb(); /* for list_empty to work without lock */
+	wait_event(entity->job.wq, list_empty(&entity->job.list.head));
+
+	nouveau_uvmm_lock(uvmm);
+	drm_gpuva_for_each_va_safe(va, next, &uvmm->umgr) {
+		struct nouveau_uvma *uvma = uvma_from_va(va);
+		struct drm_gem_object *obj = va->gem.obj;
+
+		if (unlikely(va == &uvmm->umgr.kernel_alloc_node))
+			continue;
+
+		drm_gpuva_remove(va);
+
+		dma_resv_lock(obj->resv, NULL);
+		drm_gpuva_unlink(va);
+		dma_resv_unlock(obj->resv);
+
+		nouveau_uvma_unmap(uvma);
+		nouveau_uvma_vmm_put(uvma);
+
+		nouveau_uvma_gem_put(uvma);
+		nouveau_uvma_free(uvma);
+	}
+
+	mas_for_each(&mas, reg, ULONG_MAX) {
+		mas_erase(&mas);
+		nouveau_uvma_region_sparse_unref(reg);
+		nouveau_uvma_region_put(reg);
+	}
+
+	WARN(!mtree_empty(&uvmm->region_mt),
+	     "nouveau_uvma_region tree not empty, potentially leaking memory.");
+	__mt_destroy(&uvmm->region_mt);
+	nouveau_uvmm_unlock(uvmm);
+
+	mutex_lock(&cli->mutex);
+	nouveau_vmm_fini(&uvmm->vmm);
+	drm_gpuva_manager_destroy(&uvmm->umgr);
+	mutex_unlock(&cli->mutex);
+
+	dma_resv_fini(&uvmm->resv);
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.h b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
new file mode 100644
index 000000000000..fc7f6fd2a4e1
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef __NOUVEAU_UVMM_H__
+#define __NOUVEAU_UVMM_H__
+
+#include <drm/drm_gpuva_mgr.h>
+
+#include "nouveau_drv.h"
+
+struct nouveau_uvmm {
+	struct nouveau_vmm vmm;
+	struct drm_gpuva_manager umgr;
+	struct maple_tree region_mt;
+	struct mutex mutex;
+	struct dma_resv resv;
+
+	u64 kernel_managed_addr;
+	u64 kernel_managed_size;
+
+	bool disabled;
+};
+
+struct nouveau_uvma_region {
+	struct nouveau_uvmm *uvmm;
+
+	struct {
+		u64 addr;
+		u64 range;
+	} va;
+
+	struct kref kref;
+
+	struct completion complete;
+	bool dirty;
+};
+
+struct nouveau_uvma {
+	struct drm_gpuva va;
+
+	struct nouveau_uvma_region *region;
+	u8 kind;
+};
+
+#define uvmm_from_mgr(x) container_of((x), struct nouveau_uvmm, umgr)
+#define uvma_from_va(x) container_of((x), struct nouveau_uvma, va)
+
+#define to_uvmm(x) uvmm_from_mgr((x)->va.mgr)
+
+struct nouveau_uvmm_bind_job {
+	struct nouveau_job base;
+
+	struct kref kref;
+	struct list_head entry;
+	struct work_struct work;
+	struct completion complete;
+
+	/* struct bind_job_op */
+	struct list_head ops;
+};
+
+struct nouveau_uvmm_bind_job_args {
+	struct drm_file *file_priv;
+	struct nouveau_sched_entity *sched_entity;
+
+	unsigned int flags;
+
+	struct {
+		struct drm_nouveau_sync *s;
+		u32 count;
+	} in_sync;
+
+	struct {
+		struct drm_nouveau_sync *s;
+		u32 count;
+	} out_sync;
+
+	struct {
+		struct drm_nouveau_vm_bind_op *s;
+		u32 count;
+	} op;
+};
+
+#define to_uvmm_bind_job(job) container_of((job), struct nouveau_uvmm_bind_job, base)
+
+int nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
+		      u64 kernel_managed_addr, u64 kernel_managed_size);
+void nouveau_uvmm_fini(struct nouveau_uvmm *uvmm);
+
+void nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbov, struct nouveau_mem *mem);
+void nouveau_uvmm_bo_unmap_all(struct nouveau_bo *nvbo);
+
+int nouveau_uvmm_ioctl_vm_init(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv);
+
+int nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv);
+
+static inline void nouveau_uvmm_lock(struct nouveau_uvmm *uvmm)
+{
+	mutex_lock(&uvmm->mutex);
+}
+
+static inline void nouveau_uvmm_unlock(struct nouveau_uvmm *uvmm)
+{
+	mutex_unlock(&uvmm->mutex);
+}
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c
index 67d6619fcd5e..a6602c012671 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c
@@ -128,8 +128,8 @@ nouveau_vmm_fini(struct nouveau_vmm *vmm)
 int
 nouveau_vmm_init(struct nouveau_cli *cli, s32 oclass, struct nouveau_vmm *vmm)
 {
-	int ret = nvif_vmm_ctor(&cli->mmu, "drmVmm", oclass, false, PAGE_SIZE,
-				0, NULL, 0, &vmm->vmm);
+	int ret = nvif_vmm_ctor(&cli->mmu, "drmVmm", oclass, UNMANAGED,
+				PAGE_SIZE, 0, NULL, 0, &vmm->vmm);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/nouveau/nvif/mmu.c b/drivers/gpu/drm/nouveau/nvif/mmu.c
index 3709cbbc19a1..c9dd3cff49a0 100644
--- a/drivers/gpu/drm/nouveau/nvif/mmu.c
+++ b/drivers/gpu/drm/nouveau/nvif/mmu.c
@@ -27,6 +27,9 @@
 void
 nvif_mmu_dtor(struct nvif_mmu *mmu)
 {
+	if (!nvif_object_constructed(&mmu->object))
+		return;
+
 	kfree(mmu->kind);
 	kfree(mmu->type);
 	kfree(mmu->heap);
diff --git a/drivers/gpu/drm/nouveau/nvif/vmm.c b/drivers/gpu/drm/nouveau/nvif/vmm.c
index 6053d6dc2184..99296f03371a 100644
--- a/drivers/gpu/drm/nouveau/nvif/vmm.c
+++ b/drivers/gpu/drm/nouveau/nvif/vmm.c
@@ -104,6 +104,90 @@ nvif_vmm_get(struct nvif_vmm *vmm, enum nvif_vmm_get type, bool sparse,
 	return ret;
 }
 
+int
+nvif_vmm_raw_get(struct nvif_vmm *vmm, u64 addr, u64 size,
+		 u8 shift)
+{
+	struct nvif_vmm_raw_v0 args = {
+		.version = 0,
+		.op = NVIF_VMM_RAW_V0_GET,
+		.addr = addr,
+		.size = size,
+		.shift = shift,
+	};
+
+	return nvif_object_mthd(&vmm->object, NVIF_VMM_V0_RAW,
+				&args, sizeof(args));
+}
+
+int
+nvif_vmm_raw_put(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift)
+{
+	struct nvif_vmm_raw_v0 args = {
+		.version = 0,
+		.op = NVIF_VMM_RAW_V0_PUT,
+		.addr = addr,
+		.size = size,
+		.shift = shift,
+	};
+
+	return nvif_object_mthd(&vmm->object, NVIF_VMM_V0_RAW,
+				&args, sizeof(args));
+}
+
+int
+nvif_vmm_raw_map(struct nvif_vmm *vmm, u64 addr, u64 size, u8 shift,
+		 void *argv, u32 argc, struct nvif_mem *mem, u64 offset)
+{
+	struct nvif_vmm_raw_v0 args = {
+		.version = 0,
+		.op = NVIF_VMM_RAW_V0_MAP,
+		.addr = addr,
+		.size = size,
+		.shift = shift,
+		.memory = nvif_handle(&mem->object),
+		.offset = offset,
+		.argv = (u64)(uintptr_t)argv,
+		.argc = argc,
+	};
+
+
+	return nvif_object_mthd(&vmm->object, NVIF_VMM_V0_RAW,
+				&args, sizeof(args));
+}
+
+int
+nvif_vmm_raw_unmap(struct nvif_vmm *vmm, u64 addr, u64 size,
+		   u8 shift, bool sparse)
+{
+	struct nvif_vmm_raw_v0 args = {
+		.version = 0,
+		.op = NVIF_VMM_RAW_V0_UNMAP,
+		.addr = addr,
+		.size = size,
+		.shift = shift,
+		.sparse = sparse,
+	};
+
+	return nvif_object_mthd(&vmm->object, NVIF_VMM_V0_RAW,
+				&args, sizeof(args));
+}
+
+int
+nvif_vmm_raw_sparse(struct nvif_vmm *vmm, u64 addr, u64 size, bool ref)
+{
+	struct nvif_vmm_raw_v0 args = {
+		.version = 0,
+		.op = NVIF_VMM_RAW_V0_SPARSE,
+		.addr = addr,
+		.size = size,
+		.ref = ref,
+	};
+
+	return nvif_object_mthd(&vmm->object, NVIF_VMM_V0_RAW,
+				&args, sizeof(args));
+}
+
 void
 nvif_vmm_dtor(struct nvif_vmm *vmm)
 {
@@ -112,8 +196,9 @@ nvif_vmm_dtor(struct nvif_vmm *vmm)
 }
 
 int
-nvif_vmm_ctor(struct nvif_mmu *mmu, const char *name, s32 oclass, bool managed,
-	      u64 addr, u64 size, void *argv, u32 argc, struct nvif_vmm *vmm)
+nvif_vmm_ctor(struct nvif_mmu *mmu, const char *name, s32 oclass,
+	      enum nvif_vmm_type type, u64 addr, u64 size, void *argv, u32 argc,
+	      struct nvif_vmm *vmm)
 {
 	struct nvif_vmm_v0 *args;
 	u32 argn = sizeof(*args) + argc;
@@ -125,9 +210,18 @@ nvif_vmm_ctor(struct nvif_mmu *mmu, const char *name, s32 oclass, bool managed,
 	if (!(args = kmalloc(argn, GFP_KERNEL)))
 		return -ENOMEM;
 	args->version = 0;
-	args->managed = managed;
 	args->addr = addr;
 	args->size = size;
+
+	switch (type) {
+	case UNMANAGED: args->type = NVIF_VMM_V0_TYPE_UNMANAGED; break;
+	case MANAGED: args->type = NVIF_VMM_V0_TYPE_MANAGED; break;
+	case RAW: args->type = NVIF_VMM_V0_TYPE_RAW; break;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
 	memcpy(args->data, argv, argc);
 
 	ret = nvif_object_ctor(&mmu->object, name ? name : "nvifVmm", 0,
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/intr.c b/drivers/gpu/drm/nouveau/nvkm/core/intr.c
index e20b7ca218c3..36a747f0039e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/intr.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/intr.c
@@ -212,8 +212,8 @@ nvkm_intr(int irq, void *arg)
 		list_for_each_entry(intr, &device->intr.intr, head) {
 			for (leaf = 0; leaf < intr->leaves; leaf++) {
 				if (intr->stat[leaf]) {
-					nvkm_warn(intr->subdev, "intr%d: %08x\n",
-						  leaf, intr->stat[leaf]);
+					nvkm_debug(intr->subdev, "intr%d: %08x\n",
+						   leaf, intr->stat[leaf]);
 					nvkm_intr_block_locked(intr, leaf, intr->stat[leaf]);
 				}
 			}
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/object.c b/drivers/gpu/drm/nouveau/nvkm/core/object.c
index 301a5e5b5f7f..7c554c14e884 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/object.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/object.c
@@ -185,7 +185,7 @@ nvkm_object_fini(struct nvkm_object *object, bool suspend)
 
 	nvif_debug(object, "%s children...\n", action);
 	time = ktime_to_us(ktime_get());
-	list_for_each_entry(child, &object->tree, head) {
+	list_for_each_entry_reverse(child, &object->tree, head) {
 		ret = nvkm_object_fini(child, suspend);
 		if (ret && suspend)
 			goto fail_child;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga100.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga100.c
index 6648ed62daa6..315a69f7fdd1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga100.c
@@ -36,6 +36,15 @@ ga100_ce_intr(struct nvkm_inth *inth)
 }
 
 int
+ga100_ce_nonstall(struct nvkm_engine *engine)
+{
+	struct nvkm_subdev *subdev = &engine->subdev;
+	struct nvkm_device *device = subdev->device;
+
+	return nvkm_rd32(device, 0x104424 + (subdev->inst * 0x80)) & 0x00000fff;
+}
+
+int
 ga100_ce_fini(struct nvkm_engine *engine, bool suspend)
 {
 	nvkm_inth_block(&engine->subdev.inth);
@@ -67,6 +76,7 @@ ga100_ce = {
 	.oneinit = ga100_ce_oneinit,
 	.init = ga100_ce_init,
 	.fini = ga100_ce_fini,
+	.nonstall = ga100_ce_nonstall,
 	.cclass = &gv100_ce_cclass,
 	.sclass = {
 		{ -1, -1, AMPERE_DMA_COPY_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga102.c
index 9f3448ad625f..461b73c7e2e0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga102.c
@@ -28,6 +28,7 @@ ga102_ce = {
 	.oneinit = ga100_ce_oneinit,
 	.init = ga100_ce_init,
 	.fini = ga100_ce_fini,
+	.nonstall = ga100_ce_nonstall,
 	.cclass = &gv100_ce_cclass,
 	.sclass = {
 		{ -1, -1, AMPERE_DMA_COPY_A },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
index c4c046916fa6..0be72c463b21 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
@@ -3,7 +3,7 @@
 #define __NVKM_CE_PRIV_H__
 #include <engine/ce.h>
 
-void gt215_ce_intr(struct nvkm_falcon *, struct nvkm_fifo_chan *);
+void gt215_ce_intr(struct nvkm_falcon *, struct nvkm_chan *);
 void gk104_ce_intr(struct nvkm_engine *);
 void gp100_ce_intr(struct nvkm_engine *);
 
@@ -12,4 +12,5 @@ extern const struct nvkm_object_func gv100_ce_cclass;
 int ga100_ce_oneinit(struct nvkm_engine *);
 int ga100_ce_init(struct nvkm_engine *);
 int ga100_ce_fini(struct nvkm_engine *, bool);
+int ga100_ce_nonstall(struct nvkm_engine *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
index 40c8ea43c42f..b8ac66b4a2c4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
@@ -26,6 +26,8 @@
 #include "head.h"
 #include "ior.h"
 
+#include <drm/display/drm_dp.h>
+
 #include <subdev/bios.h>
 #include <subdev/bios/init.h>
 #include <subdev/gpio.h>
@@ -634,6 +636,50 @@ nvkm_dp_enable_supported_link_rates(struct nvkm_outp *outp)
 	return outp->dp.rates != 0;
 }
 
+/* XXX: This is a big fat hack, and this is just drm_dp_read_dpcd_caps()
+ * converted to work inside nvkm. This is a temporary holdover until we start
+ * passing the drm_dp_aux device through NVKM
+ */
+static int
+nvkm_dp_read_dpcd_caps(struct nvkm_outp *outp)
+{
+	struct nvkm_i2c_aux *aux = outp->dp.aux;
+	u8 dpcd_ext[DP_RECEIVER_CAP_SIZE];
+	int ret;
+
+	ret = nvkm_rdaux(aux, DPCD_RC00_DPCD_REV, outp->dp.dpcd, DP_RECEIVER_CAP_SIZE);
+	if (ret < 0)
+		return ret;
+
+	/*
+	 * Prior to DP1.3 the bit represented by
+	 * DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT was reserved.
+	 * If it is set DP_DPCD_REV at 0000h could be at a value less than
+	 * the true capability of the panel. The only way to check is to
+	 * then compare 0000h and 2200h.
+	 */
+	if (!(outp->dp.dpcd[DP_TRAINING_AUX_RD_INTERVAL] &
+	      DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT))
+		return 0;
+
+	ret = nvkm_rdaux(aux, DP_DP13_DPCD_REV, dpcd_ext, sizeof(dpcd_ext));
+	if (ret < 0)
+		return ret;
+
+	if (outp->dp.dpcd[DP_DPCD_REV] > dpcd_ext[DP_DPCD_REV]) {
+		OUTP_DBG(outp, "Extended DPCD rev less than base DPCD rev (%d > %d)\n",
+			 outp->dp.dpcd[DP_DPCD_REV], dpcd_ext[DP_DPCD_REV]);
+		return 0;
+	}
+
+	if (!memcmp(outp->dp.dpcd, dpcd_ext, sizeof(dpcd_ext)))
+		return 0;
+
+	memcpy(outp->dp.dpcd, dpcd_ext, sizeof(dpcd_ext));
+
+	return 0;
+}
+
 void
 nvkm_dp_enable(struct nvkm_outp *outp, bool auxpwr)
 {
@@ -689,7 +735,7 @@ nvkm_dp_enable(struct nvkm_outp *outp, bool auxpwr)
 			memset(outp->dp.lttpr, 0x00, sizeof(outp->dp.lttpr));
 		}
 
-		if (!nvkm_rdaux(aux, DPCD_RC00_DPCD_REV, outp->dp.dpcd, sizeof(outp->dp.dpcd))) {
+		if (!nvkm_dp_read_dpcd_caps(outp)) {
 			const u8 rates[] = { 0x1e, 0x14, 0x0a, 0x06, 0 };
 			const u8 *rate;
 			int rate_max;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c
index a4853c4e5ee3..67ef889a0c5f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g94.c
@@ -295,6 +295,7 @@ g94_sor = {
 	.clock = nv50_sor_clock,
 	.war_2 = g94_sor_war_2,
 	.war_3 = g94_sor_war_3,
+	.hdmi = &g84_sor_hdmi,
 	.dp = &g94_sor_dp,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c
index a2c7c6f83dcd..506ffbe7b842 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c
@@ -125,7 +125,7 @@ gt215_sor_hdmi_infoframe_avi(struct nvkm_ior *ior, int head, void *data, u32 siz
 	pack_hdmi_infoframe(&avi, data, size);
 
 	nvkm_mask(device, 0x61c520 + soff, 0x00000001, 0x00000000);
-	if (size)
+	if (!size)
 		return;
 
 	nvkm_wr32(device, 0x61c528 + soff, avi.header);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c
index dad942be6679..46b057fe1412 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c
@@ -81,20 +81,29 @@ nvkm_uconn_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_
 		return -ENOSYS;
 
 	list_for_each_entry(outp, &conn->disp->outps, head) {
-		if (outp->info.connector == conn->index && outp->dp.aux) {
-			if (args->v0.types & NVIF_CONN_EVENT_V0_PLUG  ) bits |= NVKM_I2C_PLUG;
-			if (args->v0.types & NVIF_CONN_EVENT_V0_UNPLUG) bits |= NVKM_I2C_UNPLUG;
-			if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ   ) bits |= NVKM_I2C_IRQ;
+		if (outp->info.connector == conn->index)
+			break;
+	}
 
-			return nvkm_uevent_add(uevent, &device->i2c->event, outp->dp.aux->id, bits,
-					       nvkm_uconn_uevent_aux);
-		}
+	if (&outp->head == &conn->disp->outps)
+		return -EINVAL;
+
+	if (outp->dp.aux && !outp->info.location) {
+		if (args->v0.types & NVIF_CONN_EVENT_V0_PLUG  ) bits |= NVKM_I2C_PLUG;
+		if (args->v0.types & NVIF_CONN_EVENT_V0_UNPLUG) bits |= NVKM_I2C_UNPLUG;
+		if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ   ) bits |= NVKM_I2C_IRQ;
+
+		return nvkm_uevent_add(uevent, &device->i2c->event, outp->dp.aux->id, bits,
+				       nvkm_uconn_uevent_aux);
 	}
 
 	if (args->v0.types & NVIF_CONN_EVENT_V0_PLUG  ) bits |= NVKM_GPIO_HI;
 	if (args->v0.types & NVIF_CONN_EVENT_V0_UNPLUG) bits |= NVKM_GPIO_LO;
-	if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ)
-		return -EINVAL;
+	if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ) {
+		/* TODO: support DP IRQ on ANX9805 and remove this hack. */
+		if (!outp->info.location)
+			return -EINVAL;
+	}
 
 	return nvkm_uevent_add(uevent, &device->gpio->event, conn->info.hpd, bits,
 			       nvkm_uconn_uevent_gpio);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
index 5ea9a2ff0663..5db37247dc29 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
@@ -283,11 +283,21 @@ nvkm_fifo_oneinit(struct nvkm_engine *engine)
 	}
 
 	/* Initialise non-stall intr handling. */
-	if (fifo->func->nonstall_ctor) {
-		ret = fifo->func->nonstall_ctor(fifo);
-		if (ret) {
-			nvkm_error(subdev, "nonstall %d\n", ret);
+	if (fifo->func->nonstall) {
+		if (fifo->func->nonstall_ctor) {
+			ret = fifo->func->nonstall_ctor(fifo);
+			if (ret < 0) {
+				nvkm_error(subdev, "nonstall %d\n", ret);
+				return ret;
+			}
+		} else {
+			ret = 1;
 		}
+
+		ret = nvkm_event_init(fifo->func->nonstall, &fifo->engine.subdev, 1, ret,
+				      &fifo->nonstall.event);
+		if (ret)
+			return ret;
 	}
 
 	/* Allocate USERD + BAR1 polling area. */
@@ -358,7 +368,6 @@ nvkm_fifo_new_(const struct nvkm_fifo_func *func, struct nvkm_device *device,
 	       enum nvkm_subdev_type type, int inst, struct nvkm_fifo **pfifo)
 {
 	struct nvkm_fifo *fifo;
-	int ret;
 
 	if (!(fifo = *pfifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
 		return -ENOMEM;
@@ -374,16 +383,5 @@ nvkm_fifo_new_(const struct nvkm_fifo_func *func, struct nvkm_device *device,
 	spin_lock_init(&fifo->lock);
 	mutex_init(&fifo->mutex);
 
-	ret = nvkm_engine_ctor(&nvkm_fifo, device, type, inst, true, &fifo->engine);
-	if (ret)
-		return ret;
-
-	if (func->nonstall) {
-		ret = nvkm_event_init(func->nonstall, &fifo->engine.subdev, 1, 1,
-				      &fifo->nonstall.event);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
+	return nvkm_engine_ctor(&nvkm_fifo, device, type, inst, true, &fifo->engine);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
index 12a5d99d5e77..c56d2a839efb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
@@ -32,9 +32,6 @@
 
 #include <nvif/class.h>
 
-/*TODO: allocate? */
-#define GA100_FIFO_NONSTALL_VECTOR 0
-
 static u32
 ga100_chan_doorbell_handle(struct nvkm_chan *chan)
 {
@@ -83,7 +80,7 @@ ga100_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm,
 	nvkm_wo32(chan->inst, 0x0e4, priv ? 0x00000020 : 0x00000000);
 	nvkm_wo32(chan->inst, 0x0e8, chan->id);
 	nvkm_wo32(chan->inst, 0x0f4, 0x00001000 | (priv ? 0x00000100 : 0x00000000));
-	nvkm_wo32(chan->inst, 0x0f8, 0x80000000 | GA100_FIFO_NONSTALL_VECTOR);
+	nvkm_wo32(chan->inst, 0x0f8, 0x80000000 | chan->cgrp->runl->nonstall.vector);
 	nvkm_mo32(chan->inst, 0x218, 0x00000000, 0x00000000);
 	nvkm_done(chan->inst);
 	return 0;
@@ -148,8 +145,20 @@ ga100_engn_cxid(struct nvkm_engn *engn, bool *cgid)
 	return -ENODEV;
 }
 
+static int
+ga100_engn_nonstall(struct nvkm_engn *engn)
+{
+	struct nvkm_engine *engine = engn->engine;
+
+	if (WARN_ON(!engine->func->nonstall))
+		return -EINVAL;
+
+	return engine->func->nonstall(engine);
+}
+
 const struct nvkm_engn_func
 ga100_engn = {
+	.nonstall = ga100_engn_nonstall,
 	.cxid = ga100_engn_cxid,
 	.ctor = gk104_ectx_ctor,
 	.bind = gv100_ectx_bind,
@@ -157,6 +166,7 @@ ga100_engn = {
 
 const struct nvkm_engn_func
 ga100_engn_ce = {
+	.nonstall = ga100_engn_nonstall,
 	.cxid = ga100_engn_cxid,
 	.ctor = gv100_ectx_ce_ctor,
 	.bind = gv100_ectx_ce_bind,
@@ -429,7 +439,9 @@ static int
 ga100_runl_new(struct nvkm_fifo *fifo, int id, u32 addr, struct nvkm_runl **prunl)
 {
 	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_top_device *tdev;
 	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
 	u32 chcfg  = nvkm_rd32(device, addr + 0x004);
 	u32 chnum  = 1 << (chcfg & 0x0000000f);
 	u32 chaddr = (chcfg & 0xfffffff0);
@@ -437,26 +449,55 @@ ga100_runl_new(struct nvkm_fifo *fifo, int id, u32 addr, struct nvkm_runl **prun
 	u32 vector = nvkm_rd32(device, addr + 0x160);
 	int i, ret;
 
-	runl = *prunl = nvkm_runl_new(fifo, id, addr, chnum);
+	runl = nvkm_runl_new(fifo, id, addr, chnum);
 	if (IS_ERR(runl))
 		return PTR_ERR(runl);
 
+	*prunl = runl;
+
 	for (i = 0; i < 2; i++) {
 		u32 pbcfg = nvkm_rd32(device, addr + 0x010 + (i * 0x04));
 		if (pbcfg & 0x80000000) {
 			runl->runq[runl->runq_nr] =
 				nvkm_runq_new(fifo, ((pbcfg & 0x03fffc00) - 0x040000) / 0x800);
-			if (!runl->runq[runl->runq_nr])
+			if (!runl->runq[runl->runq_nr]) {
+				RUNL_ERROR(runl, "runq %d", runl->runq_nr);
 				return -ENOMEM;
+			}
 
 			runl->runq_nr++;
 		}
 	}
 
+	nvkm_list_foreach(tdev, &device->top->device, head, tdev->runlist == runl->addr) {
+		if (tdev->engine < 0) {
+			RUNL_DEBUG(runl, "engn !top");
+			return -EINVAL;
+		}
+
+		engn = nvkm_runl_add(runl, tdev->engine, (tdev->type == NVKM_ENGINE_CE) ?
+				     fifo->func->engn_ce : fifo->func->engn,
+				     tdev->type, tdev->inst);
+		if (!engn)
+			return -EINVAL;
+
+		if (!engn->engine->func->nonstall) {
+			RUNL_DEBUG(runl, "engn %s !nonstall", engn->engine->subdev.name);
+			return -EINVAL;
+		}
+	}
+
+	if (list_empty(&runl->engns)) {
+		RUNL_DEBUG(runl, "!engns");
+		return -EINVAL;
+	}
+
 	ret = nvkm_inth_add(&device->vfn->intr, vector & 0x00000fff, NVKM_INTR_PRIO_NORMAL,
 			    &fifo->engine.subdev, ga100_runl_intr, &runl->inth);
-	if (ret)
+	if (ret) {
+		RUNL_ERROR(runl, "inth %d", ret);
 		return ret;
+	}
 
 	runl->chan = chaddr;
 	runl->doorbell = dbcfg >> 16;
@@ -466,9 +507,9 @@ ga100_runl_new(struct nvkm_fifo *fifo, int id, u32 addr, struct nvkm_runl **prun
 static irqreturn_t
 ga100_fifo_nonstall_intr(struct nvkm_inth *inth)
 {
-	struct nvkm_fifo *fifo = container_of(inth, typeof(*fifo), nonstall.intr);
+	struct nvkm_runl *runl = container_of(inth, typeof(*runl), nonstall.inth);
 
-	nvkm_event_ntfy(&fifo->nonstall.event, 0, NVKM_FIFO_NONSTALL_EVENT);
+	nvkm_event_ntfy(&runl->fifo->nonstall.event, runl->id, NVKM_FIFO_NONSTALL_EVENT);
 	return IRQ_HANDLED;
 }
 
@@ -476,16 +517,18 @@ static void
 ga100_fifo_nonstall_block(struct nvkm_event *event, int type, int index)
 {
 	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
+	struct nvkm_runl *runl = nvkm_runl_get(fifo, index, 0);
 
-	nvkm_inth_block(&fifo->nonstall.intr);
+	nvkm_inth_block(&runl->nonstall.inth);
 }
 
 static void
 ga100_fifo_nonstall_allow(struct nvkm_event *event, int type, int index)
 {
 	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
+	struct nvkm_runl *runl = nvkm_runl_get(fifo, index, 0);
 
-	nvkm_inth_allow(&fifo->nonstall.intr);
+	nvkm_inth_allow(&runl->nonstall.inth);
 }
 
 const struct nvkm_event_func
@@ -497,9 +540,29 @@ ga100_fifo_nonstall = {
 int
 ga100_fifo_nonstall_ctor(struct nvkm_fifo *fifo)
 {
-	return nvkm_inth_add(&fifo->engine.subdev.device->vfn->intr, GA100_FIFO_NONSTALL_VECTOR,
-			     NVKM_INTR_PRIO_NORMAL, &fifo->engine.subdev, ga100_fifo_nonstall_intr,
-			     &fifo->nonstall.intr);
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
+	struct nvkm_vfn *vfn = subdev->device->vfn;
+	struct nvkm_runl *runl;
+	int ret, nr = 0;
+
+	nvkm_runl_foreach(runl, fifo) {
+		struct nvkm_engn *engn = list_first_entry(&runl->engns, typeof(*engn), head);
+
+		runl->nonstall.vector = engn->func->nonstall(engn);
+		if (runl->nonstall.vector < 0) {
+			RUNL_ERROR(runl, "nonstall %d", runl->nonstall.vector);
+			return runl->nonstall.vector;
+		}
+
+		ret = nvkm_inth_add(&vfn->intr, runl->nonstall.vector, NVKM_INTR_PRIO_NORMAL,
+				    subdev, ga100_fifo_nonstall_intr, &runl->nonstall.inth);
+		if (ret)
+			return ret;
+
+		nr = max(nr, runl->id + 1);
+	}
+
+	return nr;
 }
 
 int
@@ -514,15 +577,13 @@ ga100_fifo_runl_ctor(struct nvkm_fifo *fifo)
 		runl = nvkm_runl_get(fifo, -1, tdev->runlist);
 		if (!runl) {
 			ret = ga100_runl_new(fifo, id++, tdev->runlist, &runl);
-			if (ret)
-				return ret;
-		}
-
-		if (tdev->engine < 0)
-			continue;
+			if (ret) {
+				if (runl)
+					nvkm_runl_del(runl);
 
-		nvkm_runl_add(runl, tdev->engine, (tdev->type == NVKM_ENGINE_CE) ?
-			      fifo->func->engn_ce : fifo->func->engn, tdev->type, tdev->inst);
+				continue;
+			}
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.c
index 93d628d7d508..454a481a0aef 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.c
@@ -399,7 +399,7 @@ nvkm_runl_new(struct nvkm_fifo *fifo, int runi, u32 addr, int id_nr)
 	int ret;
 
 	if (!(runl = kzalloc(sizeof(*runl), GFP_KERNEL)))
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	runl->func = fifo->func->runl;
 	runl->fifo = fifo;
@@ -419,7 +419,7 @@ nvkm_runl_new(struct nvkm_fifo *fifo, int runi, u32 addr, int id_nr)
 		    (ret = nvkm_chid_new(&nvkm_chan_event, subdev, id_nr, 0, id_nr, &runl->chid))) {
 			RUNL_ERROR(runl, "cgid/chid: %d", ret);
 			nvkm_runl_del(runl);
-			return NULL;
+			return ERR_PTR(ret);
 		}
 	} else {
 		runl->cgid = nvkm_chid_ref(fifo->cgid);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.h
index c93d21bb7bd5..5421321f8e85 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.h
@@ -11,6 +11,7 @@ enum nvkm_subdev_type;
 
 struct nvkm_engn {
 	const struct nvkm_engn_func {
+		int (*nonstall)(struct nvkm_engn *);
 		bool (*chsw)(struct nvkm_engn *);
 		int (*cxid)(struct nvkm_engn *, bool *cgid);
 		void (*mmu_fault_trigger)(struct nvkm_engn *);
@@ -69,6 +70,11 @@ struct nvkm_runl {
 
 	struct nvkm_inth inth;
 
+	struct {
+		int vector;
+		struct nvkm_inth inth;
+	} nonstall;
+
 	struct list_head cgrps;
 	int cgrp_nr;
 	int chan_nr;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/uchan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/uchan.c
index 1dac95ae7b43..04140e0110be 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/uchan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/uchan.c
@@ -52,7 +52,7 @@ nvkm_uchan_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_
 
 	switch (args->v0.type) {
 	case NVIF_CHAN_EVENT_V0_NON_STALL_INTR:
-		return nvkm_uevent_add(uevent, &runl->fifo->nonstall.event, 0,
+		return nvkm_uevent_add(uevent, &runl->fifo->nonstall.event, runl->id,
 				       NVKM_FIFO_NONSTALL_EVENT, NULL);
 	case NVIF_CHAN_EVENT_V0_KILLED:
 		return nvkm_uevent_add(uevent, &runl->chid->event, chan->id,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
index 71b824e6da9d..0096ad401b15 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
@@ -109,8 +109,7 @@ nvkm_gr_oclass_get(struct nvkm_oclass *oclass, int index)
 }
 
 static int
-nvkm_gr_cclass_new(struct nvkm_fifo_chan *chan,
-		   const struct nvkm_oclass *oclass,
+nvkm_gr_cclass_new(struct nvkm_chan *chan, const struct nvkm_oclass *oclass,
 		   struct nvkm_object **pobject)
 {
 	struct nvkm_gr *gr = nvkm_gr(oclass->engine);
@@ -127,6 +126,17 @@ nvkm_gr_intr(struct nvkm_engine *engine)
 }
 
 static int
+nvkm_gr_nonstall(struct nvkm_engine *engine)
+{
+	struct nvkm_gr *gr = nvkm_gr(engine);
+
+	if (gr->func->nonstall)
+		return gr->func->nonstall(gr);
+
+	return -EINVAL;
+}
+
+static int
 nvkm_gr_oneinit(struct nvkm_engine *engine)
 {
 	struct nvkm_gr *gr = nvkm_gr(engine);
@@ -178,6 +188,7 @@ nvkm_gr = {
 	.init = nvkm_gr_init,
 	.fini = nvkm_gr_fini,
 	.reset = nvkm_gr_reset,
+	.nonstall = nvkm_gr_nonstall,
 	.intr = nvkm_gr_intr,
 	.tile = nvkm_gr_tile,
 	.chsw_load = nvkm_gr_chsw_load,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
index 00dbeda7e346..de161e7a04aa 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
@@ -117,6 +117,7 @@ void gk104_grctx_generate_r418800(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gk110_grctx;
 void gk110_grctx_generate_r419eb0(struct gf100_gr *);
+void gk110_grctx_generate_r419f78(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gk110b_grctx;
 extern const struct gf100_grctx_func gk208_grctx;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
index 94233d0119df..52a234b1ef01 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
@@ -906,7 +906,9 @@ static void
 gk104_grctx_generate_r419f78(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000);
+
+	/* bit 3 set disables loads in fp helper invocations, we need it enabled */
+	nvkm_mask(device, 0x419f78, 0x00000009, 0x00000000);
 }
 
 void
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
index 4391458e1fb2..3acdd9eeb74a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
@@ -820,6 +820,15 @@ gk110_grctx_generate_r419eb0(struct gf100_gr *gr)
 	nvkm_mask(device, 0x419eb0, 0x00001000, 0x00001000);
 }
 
+void
+gk110_grctx_generate_r419f78(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	/* bit 3 set disables loads in fp helper invocations, we need it enabled */
+	nvkm_mask(device, 0x419f78, 0x00000008, 0x00000000);
+}
+
 const struct gf100_grctx_func
 gk110_grctx = {
 	.main  = gf100_grctx_generate_main,
@@ -854,4 +863,5 @@ gk110_grctx = {
 	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
 	.r418800 = gk104_grctx_generate_r418800,
 	.r419eb0 = gk110_grctx_generate_r419eb0,
+	.r419f78 = gk110_grctx_generate_r419f78,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
index 7b9a34f9ec3c..5597e87624ac 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
@@ -103,4 +103,5 @@ gk110b_grctx = {
 	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
 	.r418800 = gk104_grctx_generate_r418800,
 	.r419eb0 = gk110_grctx_generate_r419eb0,
+	.r419f78 = gk110_grctx_generate_r419f78,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
index c78d07a8bb7d..612656496541 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
@@ -568,4 +568,5 @@ gk208_grctx = {
 	.dist_skip_table = gf117_grctx_generate_dist_skip_table,
 	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
 	.r418800 = gk104_grctx_generate_r418800,
+	.r419f78 = gk110_grctx_generate_r419f78,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
index beac66eb2a80..9906974ac3f0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
@@ -988,4 +988,5 @@ gm107_grctx = {
 	.r406500 = gm107_grctx_generate_r406500,
 	.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
 	.r419e00 = gm107_grctx_generate_r419e00,
+	.r419f78 = gk110_grctx_generate_r419f78,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ga102.c
index a5b5ac2755a2..00cd70abad67 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ga102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ga102.c
@@ -137,8 +137,15 @@ ga102_gr_oneinit_intr(struct gf100_gr *gr, enum nvkm_intr_type *pvector)
 	return &device->vfn->intr;
 }
 
+static int
+ga102_gr_nonstall(struct gf100_gr *gr)
+{
+	return nvkm_rd32(gr->base.engine.subdev.device, 0x400160) & 0x00000fff;
+}
+
 static const struct gf100_gr_func
 ga102_gr = {
+	.nonstall = ga102_gr_nonstall,
 	.oneinit_intr = ga102_gr_oneinit_intr,
 	.oneinit_tiles = gm200_gr_oneinit_tiles,
 	.oneinit_sm_id = gv100_gr_oneinit_sm_id,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index 5f20079c3660..3648868bb9fc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -374,7 +374,7 @@ gf100_gr_chan = {
 };
 
 static int
-gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch,
 		  const struct nvkm_oclass *oclass,
 		  struct nvkm_object **pobject)
 {
@@ -2494,12 +2494,24 @@ gf100_gr_gpccs_ucode = {
 	.data.size = sizeof(gf100_grgpc_data),
 };
 
+static int
+gf100_gr_nonstall(struct nvkm_gr *base)
+{
+	struct gf100_gr *gr = gf100_gr(base);
+
+	if (gr->func->nonstall)
+		return gr->func->nonstall(gr);
+
+	return -EINVAL;
+}
+
 static const struct nvkm_gr_func
 gf100_gr_ = {
 	.dtor = gf100_gr_dtor,
 	.oneinit = gf100_gr_oneinit,
 	.init = gf100_gr_init_,
 	.fini = gf100_gr_fini,
+	.nonstall = gf100_gr_nonstall,
 	.reset = gf100_gr_reset,
 	.units = gf100_gr_units,
 	.chan_new = gf100_gr_chan_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index 94ca7ac16acf..54f686ba39ac 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -147,6 +147,7 @@ struct gf100_gr_func_zbc {
 };
 
 struct gf100_gr_func {
+	int (*nonstall)(struct gf100_gr *);
 	struct nvkm_intr *(*oneinit_intr)(struct gf100_gr *, enum nvkm_intr_type *);
 	void (*oneinit_tiles)(struct gf100_gr *);
 	int (*oneinit_sm_id)(struct gf100_gr *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
index 81bd682c2102..ca822f07b63e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
@@ -1181,7 +1181,7 @@ nv04_gr_chan = {
 };
 
 static int
-nv04_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+nv04_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch,
 		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
 	struct nv04_gr *gr = nv04_gr(base);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c
index 7fe6e58f6bab..92ef7c9b2910 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c
@@ -999,7 +999,7 @@ nv10_gr_chan = {
 	} while (0)
 
 int
-nv10_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+nv10_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch,
 		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
 	struct nv10_gr *gr = nv10_gr(base);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.h
index 5cfe927c9123..b86090c08060 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.h
@@ -9,6 +9,6 @@ int nv10_gr_init(struct nvkm_gr *);
 void nv10_gr_intr(struct nvkm_gr *);
 void nv10_gr_tile(struct nvkm_gr *, int, struct nvkm_fb_tile *);
 
-int nv10_gr_chan_new(struct nvkm_gr *, struct nvkm_fifo_chan *,
+int nv10_gr_chan_new(struct nvkm_gr *, struct nvkm_chan *,
 		     const struct nvkm_oclass *, struct nvkm_object **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c
index 75434f5de7ad..02a8c62a0a32 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c
@@ -72,7 +72,7 @@ nv20_gr_chan = {
 };
 
 static int
-nv20_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+nv20_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch,
 		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
 	struct nv20_gr *gr = nv20_gr(base);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c
index 94685e4d4f87..d6bc6904dcc8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c
@@ -18,7 +18,7 @@ nv25_gr_chan = {
 };
 
 static int
-nv25_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+nv25_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch,
 		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
 	struct nv20_gr *gr = nv20_gr(base);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c
index 2d6273675291..e5a351b51eb9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c
@@ -18,7 +18,7 @@ nv2a_gr_chan = {
 };
 
 static int
-nv2a_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+nv2a_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch,
 		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
 	struct nv20_gr *gr = nv20_gr(base);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
index 647bd6fede04..80370323755e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
@@ -19,7 +19,7 @@ nv30_gr_chan = {
 };
 
 static int
-nv30_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+nv30_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch,
 		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
 	struct nv20_gr *gr = nv20_gr(base);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
index 2eae3fe4ef4e..cdf043bbdd59 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
@@ -18,7 +18,7 @@ nv34_gr_chan = {
 };
 
 static int
-nv34_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+nv34_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch,
 		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
 	struct nv20_gr *gr = nv20_gr(base);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c
index 657d7cdba369..fa5a6ccb871d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c
@@ -18,7 +18,7 @@ nv35_gr_chan = {
 };
 
 static int
-nv35_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+nv35_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch,
 		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
 	struct nv20_gr *gr = nv20_gr(base);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
index d2df097a6cf6..a5e1f02791b4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
@@ -145,7 +145,7 @@ nv40_gr_chan = {
 };
 
 int
-nv40_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+nv40_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch,
 		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
 	struct nv40_gr *gr = nv40_gr(base);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h
index f3d3d3a5ae5b..84fbc99139e5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.h
@@ -22,12 +22,12 @@ u64 nv40_gr_units(struct nvkm_gr *);
 struct nv40_gr_chan {
 	struct nvkm_object object;
 	struct nv40_gr *gr;
-	struct nvkm_fifo_chan *fifo;
+	struct nvkm_chan *fifo;
 	u32 inst;
 	struct list_head head;
 };
 
-int nv40_gr_chan_new(struct nvkm_gr *, struct nvkm_fifo_chan *,
+int nv40_gr_chan_new(struct nvkm_gr *, struct nvkm_chan *,
 		     const struct nvkm_oclass *, struct nvkm_object **);
 
 extern const struct nvkm_object_func nv40_gr_object;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
index 1ba18a8e380f..c8a0288c092d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
@@ -86,7 +86,7 @@ nv50_gr_chan = {
 };
 
 int
-nv50_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
+nv50_gr_chan_new(struct nvkm_gr *base, struct nvkm_chan *fifoch,
 		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
 	struct nv50_gr *gr = nv50_gr(base);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h
index 84388c42e5c6..97ead0042357 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.h
@@ -27,7 +27,7 @@ struct nv50_gr_chan {
 	struct nv50_gr *gr;
 };
 
-int nv50_gr_chan_new(struct nvkm_gr *, struct nvkm_fifo_chan *,
+int nv50_gr_chan_new(struct nvkm_gr *, struct nvkm_chan *,
 		     const struct nvkm_oclass *, struct nvkm_object **);
 
 extern const struct nvkm_object_func nv50_gr_object;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
index 08d5c96e6458..0884abc73a9d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
@@ -5,7 +5,7 @@
 #include <engine/gr.h>
 #include <core/enum.h>
 struct nvkm_fb_tile;
-struct nvkm_fifo_chan;
+struct nvkm_chan;
 
 int nvkm_gr_ctor(const struct nvkm_gr_func *, struct nvkm_device *, enum nvkm_subdev_type, int,
 		 bool enable, struct nvkm_gr *);
@@ -18,10 +18,11 @@ struct nvkm_gr_func {
 	int (*init)(struct nvkm_gr *);
 	int (*fini)(struct nvkm_gr *, bool);
 	int (*reset)(struct nvkm_gr *);
+	int (*nonstall)(struct nvkm_gr *);
 	void (*intr)(struct nvkm_gr *);
 	void (*tile)(struct nvkm_gr *, int region, struct nvkm_fb_tile *);
 	int (*tlb_flush)(struct nvkm_gr *);
-	int (*chan_new)(struct nvkm_gr *, struct nvkm_fifo_chan *,
+	int (*chan_new)(struct nvkm_gr *, struct nvkm_chan *,
 			const struct nvkm_oclass *, struct nvkm_object **);
 	int (*object_get)(struct nvkm_gr *, int, struct nvkm_sclass *);
 	/* Returns chipset-specific counts of units packed into an u64.
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
index 3b6c8100a242..a7775aa18541 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
@@ -206,19 +206,6 @@ tu102_gr_av_to_init_veid(struct nvkm_blob *blob, struct gf100_gr_pack **ppack)
 	return gk20a_gr_av_to_init_(blob, 64, 0x00100000, ppack);
 }
 
-int
-tu102_gr_load(struct gf100_gr *gr, int ver, const struct gf100_gr_fwif *fwif)
-{
-	int ret;
-
-	ret = gm200_gr_load(gr, ver, fwif);
-	if (ret)
-		return ret;
-
-	return gk20a_gr_load_net(gr, "gr/", "sw_veid_bundle_init", ver, tu102_gr_av_to_init_veid,
-				 &gr->bundle_veid);
-}
-
 static const struct gf100_gr_fwif
 tu102_gr_fwif[] = {
 	{  0, gm200_gr_load, &tu102_gr, &gp108_gr_fecs_acr, &gp108_gr_gpccs_acr },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
index cb0c3991b2ad..db9fc1ecae0d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
@@ -81,8 +81,7 @@ nv31_mpeg_chan = {
 };
 
 int
-nv31_mpeg_chan_new(struct nvkm_fifo_chan *fifoch,
-		   const struct nvkm_oclass *oclass,
+nv31_mpeg_chan_new(struct nvkm_chan *fifoch, const struct nvkm_oclass *oclass,
 		   struct nvkm_object **pobject)
 {
 	struct nv31_mpeg *mpeg = nv31_mpeg(oclass->engine);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.h b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.h
index 9f30aaaf809e..251d659565de 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.h
@@ -24,9 +24,9 @@ struct nv31_mpeg_func {
 struct nv31_mpeg_chan {
 	struct nvkm_object object;
 	struct nv31_mpeg *mpeg;
-	struct nvkm_fifo_chan *fifo;
+	struct nvkm_chan *fifo;
 };
 
-int nv31_mpeg_chan_new(struct nvkm_fifo_chan *, const struct nvkm_oclass *,
+int nv31_mpeg_chan_new(struct nvkm_chan *, const struct nvkm_oclass *,
 		       struct nvkm_object **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
index 0890a279458e..4b1374adbda3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
@@ -43,7 +43,7 @@ struct nv44_mpeg {
 struct nv44_mpeg_chan {
 	struct nvkm_object object;
 	struct nv44_mpeg *mpeg;
-	struct nvkm_fifo_chan *fifo;
+	struct nvkm_chan *fifo;
 	struct list_head head;
 	u32 inst;
 };
@@ -100,8 +100,7 @@ nv44_mpeg_chan = {
 };
 
 static int
-nv44_mpeg_chan_new(struct nvkm_fifo_chan *fifoch,
-		   const struct nvkm_oclass *oclass,
+nv44_mpeg_chan_new(struct nvkm_chan *fifoch, const struct nvkm_oclass *oclass,
 		   struct nvkm_object **pobject)
 {
 	struct nv44_mpeg *mpeg = nv44_mpeg(oclass->engine);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/priv.h
index 667a2d05dd89..044ff4133874 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/priv.h
@@ -2,7 +2,7 @@
 #ifndef __NVKM_MPEG_PRIV_H__
 #define __NVKM_MPEG_PRIV_H__
 #include <engine/mpeg.h>
-struct nvkm_fifo_chan;
+struct nvkm_chan;
 
 int nv31_mpeg_init(struct nvkm_engine *);
 void nv31_mpeg_tile(struct nvkm_engine *, int, struct nvkm_fb_tile *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/sw/base.c
index a9d464db6974..20220d6d4a13 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/base.c
@@ -74,8 +74,7 @@ nvkm_sw_oclass_get(struct nvkm_oclass *oclass, int index)
 }
 
 static int
-nvkm_sw_cclass_get(struct nvkm_fifo_chan *fifoch,
-		   const struct nvkm_oclass *oclass,
+nvkm_sw_cclass_get(struct nvkm_chan *fifoch, const struct nvkm_oclass *oclass,
 		   struct nvkm_object **pobject)
 {
 	struct nvkm_sw *sw = nvkm_sw(oclass->engine);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.c
index 834b8cbed51d..2bf45141de60 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.c
@@ -74,7 +74,7 @@ nvkm_sw_chan = {
 
 int
 nvkm_sw_chan_ctor(const struct nvkm_sw_chan_func *func, struct nvkm_sw *sw,
-		  struct nvkm_fifo_chan *fifo, const struct nvkm_oclass *oclass,
+		  struct nvkm_chan *fifo, const struct nvkm_oclass *oclass,
 		  struct nvkm_sw_chan *chan)
 {
 	unsigned long flags;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h b/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h
index 67b2e5ea93d9..c313aea16a17 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h
@@ -11,7 +11,7 @@ struct nvkm_sw_chan {
 	const struct nvkm_sw_chan_func *func;
 	struct nvkm_object object;
 	struct nvkm_sw *sw;
-	struct nvkm_fifo_chan *fifo;
+	struct nvkm_chan *fifo;
 	struct list_head head;
 
 #define NVKM_SW_CHAN_EVENT_PAGE_FLIP BIT(0)
@@ -24,7 +24,7 @@ struct nvkm_sw_chan_func {
 };
 
 int nvkm_sw_chan_ctor(const struct nvkm_sw_chan_func *, struct nvkm_sw *,
-		      struct nvkm_fifo_chan *, const struct nvkm_oclass *,
+		      struct nvkm_chan *, const struct nvkm_oclass *,
 		      struct nvkm_sw_chan *);
 bool nvkm_sw_chan_mthd(struct nvkm_sw_chan *, int subc, u32 mthd, u32 data);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/sw/gf100.c
index c3cf6f2ff86c..a0273baf4c67 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/gf100.c
@@ -102,7 +102,7 @@ gf100_sw_chan = {
 };
 
 static int
-gf100_sw_chan_new(struct nvkm_sw *sw, struct nvkm_fifo_chan *fifoch,
+gf100_sw_chan_new(struct nvkm_sw *sw, struct nvkm_chan *fifoch,
 		  const struct nvkm_oclass *oclass,
 		  struct nvkm_object **pobject)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv04.c
index 4aa57573869c..8a1d112da894 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv04.c
@@ -106,7 +106,7 @@ nv04_sw_chan = {
 };
 
 static int
-nv04_sw_chan_new(struct nvkm_sw *sw, struct nvkm_fifo_chan *fifo,
+nv04_sw_chan_new(struct nvkm_sw *sw, struct nvkm_chan *fifo,
 		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
 	struct nv04_sw_chan *chan;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv10.c b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv10.c
index e79e640ae535..742c75859569 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv10.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv10.c
@@ -36,7 +36,7 @@ nv10_sw_chan = {
 };
 
 static int
-nv10_sw_chan_new(struct nvkm_sw *sw, struct nvkm_fifo_chan *fifo,
+nv10_sw_chan_new(struct nvkm_sw *sw, struct nvkm_chan *fifo,
 		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
 	struct nvkm_sw_chan *chan;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.c
index 9d7a9b7d5be3..99476d32c5af 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.c
@@ -99,7 +99,7 @@ nv50_sw_chan = {
 };
 
 static int
-nv50_sw_chan_new(struct nvkm_sw *sw, struct nvkm_fifo_chan *fifoch,
+nv50_sw_chan_new(struct nvkm_sw *sw, struct nvkm_chan *fifoch,
 		 const struct nvkm_oclass *oclass, struct nvkm_object **pobject)
 {
 	struct nvkm_disp *disp = sw->engine.subdev.device->disp;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/sw/priv.h
index d9d83b1b8849..8015afaba947 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/priv.h
@@ -15,7 +15,7 @@ struct nvkm_sw_chan_sclass {
 };
 
 struct nvkm_sw_func {
-	int (*chan_new)(struct nvkm_sw *, struct nvkm_fifo_chan *,
+	int (*chan_new)(struct nvkm_sw *, struct nvkm_chan *,
 			const struct nvkm_oclass *, struct nvkm_object **);
 	const struct nvkm_sw_chan_sclass sclass[];
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c
index 795f3a649b12..9b8ca4e898f9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c
@@ -224,7 +224,7 @@ nvkm_acr_oneinit(struct nvkm_subdev *subdev)
 	u64 falcons;
 	int ret, i;
 
-	if (list_empty(&acr->hsfw)) {
+	if (list_empty(&acr->hsfw) || !acr->func || !acr->func->wpr_layout) {
 		nvkm_debug(subdev, "No HSFW(s)\n");
 		nvkm_acr_cleanup(acr);
 		return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
index 6ba5120a2ebe..394c305e759a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
@@ -55,7 +55,7 @@ nvkm-y += nvkm/subdev/fb/ramgk104.o
 nvkm-y += nvkm/subdev/fb/ramgm107.o
 nvkm-y += nvkm/subdev/fb/ramgm200.o
 nvkm-y += nvkm/subdev/fb/ramgp100.o
-nvkm-y += nvkm/subdev/fb/ramga102.o
+nvkm-y += nvkm/subdev/fb/ramgp102.o
 nvkm-y += nvkm/subdev/fb/sddr2.o
 nvkm-y += nvkm/subdev/fb/sddr3.o
 nvkm-y += nvkm/subdev/fb/gddr3.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
index 0955340cc421..8a286a9349ac 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
@@ -174,6 +174,18 @@ nvkm_fb_mem_unlock(struct nvkm_fb *fb)
 	return 0;
 }
 
+u64
+nvkm_fb_vidmem_size(struct nvkm_device *device)
+{
+	struct nvkm_fb *fb = device->fb;
+
+	if (fb && fb->func->vidmem.size)
+		return fb->func->vidmem.size(fb);
+
+	WARN_ON(1);
+	return 0;
+}
+
 static int
 nvkm_fb_init(struct nvkm_subdev *subdev)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c
index a7456e786463..12037fd4fdf2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c
@@ -30,7 +30,8 @@ ga100_fb = {
 	.init_page = gv100_fb_init_page,
 	.init_unkn = gp100_fb_init_unkn,
 	.sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
-	.ram_new = gp100_ram_new,
+	.vidmem.size = gp102_fb_vidmem_size,
+	.ram_new = gp102_ram_new,
 	.default_bigpage = 16,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c
index dd476e079fe1..76f6877b54c6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c
@@ -24,6 +24,12 @@
 
 #include <engine/nvdec.h>
 
+static u64
+ga102_fb_vidmem_size(struct nvkm_fb *fb)
+{
+	return (u64)nvkm_rd32(fb->subdev.device, 0x1183a4) << 20;
+}
+
 static int
 ga102_fb_oneinit(struct nvkm_fb *fb)
 {
@@ -43,7 +49,8 @@ ga102_fb = {
 	.init_page = gv100_fb_init_page,
 	.init_unkn = gp100_fb_init_unkn,
 	.sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
-	.ram_new = ga102_ram_new,
+	.vidmem.size = ga102_fb_vidmem_size,
+	.ram_new = gp102_ram_new,
 	.default_bigpage = 16,
 	.vpr.scrub_required = tu102_fb_vpr_scrub_required,
 	.vpr.scrub = gp102_fb_vpr_scrub,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c
index 14d942e8b857..534553c64805 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c
@@ -40,6 +40,20 @@ gp102_fb_vpr_scrub_required(struct nvkm_fb *fb)
 	return (nvkm_rd32(device, 0x100cd0) & 0x00000010) != 0;
 }
 
+u64
+gp102_fb_vidmem_size(struct nvkm_fb *fb)
+{
+	const u32 data = nvkm_rd32(fb->subdev.device, 0x100ce0);
+	const u32 lmag = (data & 0x000003f0) >> 4;
+	const u32 lsca = (data & 0x0000000f);
+	const u64 size = (u64)lmag << (lsca + 20);
+
+	if (data & 0x40000000)
+		return size / 16 * 15;
+
+	return size;
+}
+
 int
 gp102_fb_oneinit(struct nvkm_fb *fb)
 {
@@ -59,9 +73,10 @@ gp102_fb = {
 	.init_remapper = gp100_fb_init_remapper,
 	.init_page = gm200_fb_init_page,
 	.sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
+	.vidmem.size = gp102_fb_vidmem_size,
 	.vpr.scrub_required = gp102_fb_vpr_scrub_required,
 	.vpr.scrub = gp102_fb_vpr_scrub,
-	.ram_new = gp100_ram_new,
+	.ram_new = gp102_ram_new,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
index 4d8a286a7a34..f422564bee5b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
@@ -36,9 +36,10 @@ gv100_fb = {
 	.init_page = gv100_fb_init_page,
 	.init_unkn = gp100_fb_init_unkn,
 	.sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
+	.vidmem.size = gp102_fb_vidmem_size,
 	.vpr.scrub_required = gp102_fb_vpr_scrub_required,
 	.vpr.scrub = gp102_fb_vpr_scrub,
-	.ram_new = gp100_ram_new,
+	.ram_new = gp102_ram_new,
 	.default_bigpage = 16,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
index 726c30c8bf95..77d6a8c10829 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
@@ -20,6 +20,10 @@ struct nvkm_fb_func {
 		void (*flush_page_init)(struct nvkm_fb *);
 	} sysmem;
 
+	struct nvkm_fb_func_vidmem {
+		u64 (*size)(struct nvkm_fb *);
+	} vidmem;
+
 	struct {
 		bool (*scrub_required)(struct nvkm_fb *);
 		int (*scrub)(struct nvkm_fb *);
@@ -84,6 +88,7 @@ void gp100_fb_init_remapper(struct nvkm_fb *);
 void gp100_fb_init_unkn(struct nvkm_fb *);
 
 int gp102_fb_oneinit(struct nvkm_fb *);
+u64 gp102_fb_vidmem_size(struct nvkm_fb *);
 bool gp102_fb_vpr_scrub_required(struct nvkm_fb *);
 int gp102_fb_vpr_scrub(struct nvkm_fb *);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
index ea7d66f3dd82..50f0c1914f58 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
@@ -70,5 +70,5 @@ int gk104_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 int gm107_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 int gm200_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 int gp100_ram_new(struct nvkm_fb *, struct nvkm_ram **);
-int ga102_ram_new(struct nvkm_fb *, struct nvkm_ram **);
+int gp102_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c
deleted file mode 100644
index 298c136cefe0..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramga102.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright 2021 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-#include "ram.h"
-
-#include <subdev/bios.h>
-#include <subdev/bios/init.h>
-#include <subdev/bios/rammap.h>
-
-static const struct nvkm_ram_func
-ga102_ram = {
-};
-
-int
-ga102_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
-{
-	struct nvkm_device *device = fb->subdev.device;
-	enum nvkm_ram_type type = nvkm_fb_bios_memtype(device->bios);
-	u32 size = nvkm_rd32(device, 0x1183a4);
-
-	return nvkm_ram_new_(&ga102_ram, fb, type, (u64)size << 20, pram);
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c
new file mode 100644
index 000000000000..8550f5e47347
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp102.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: MIT
+#include "ram.h"
+
+#include <subdev/bios.h>
+
+static const struct nvkm_ram_func
+gp102_ram = {
+};
+
+int
+gp102_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
+{
+	enum nvkm_ram_type type = nvkm_fb_bios_memtype(fb->subdev.device->bios);
+	const u32 rsvd_head = ( 256 * 1024); /* vga memory */
+	const u32 rsvd_tail = (1024 * 1024); /* vbios etc */
+	u64 size = fb->func->vidmem.size(fb);
+	int ret;
+
+	ret = nvkm_ram_new_(&gp102_ram, fb, type, size, pram);
+	if (ret)
+		return ret;
+
+	nvkm_mm_fini(&(*pram)->vram);
+
+	return nvkm_mm_init(&(*pram)->vram, NVKM_RAM_MM_NORMAL,
+			    rsvd_head >> NVKM_RAM_MM_SHIFT,
+			    (size - rsvd_head - rsvd_tail) >> NVKM_RAM_MM_SHIFT,
+			    1);
+
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/tu102.c
index b8803c124c3b..bcc23d4c8115 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/tu102.c
@@ -36,9 +36,10 @@ tu102_fb = {
 	.init_page = gv100_fb_init_page,
 	.init_unkn = gp100_fb_init_unkn,
 	.sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
+	.vidmem.size = gp102_fb_vidmem_size,
 	.vpr.scrub_required = tu102_fb_vpr_scrub_required,
 	.vpr.scrub = gp102_fb_vpr_scrub,
-	.ram_new = gp100_ram_new,
+	.ram_new = gp102_ram_new,
 	.default_bigpage = 16,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c
index 976539de4220..731b2f68d3db 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c
@@ -260,10 +260,11 @@ nvkm_i2c_new_(const struct nvkm_i2c_func *func, struct nvkm_device *device,
 {
 	struct nvkm_bios *bios = device->bios;
 	struct nvkm_i2c *i2c;
+	struct nvkm_i2c_aux *aux;
 	struct dcb_i2c_entry ccbE;
 	struct dcb_output dcbE;
 	u8 ver, hdr;
-	int ret, i;
+	int ret, i, ids;
 
 	if (!(i2c = *pi2c = kzalloc(sizeof(*i2c), GFP_KERNEL)))
 		return -ENOMEM;
@@ -406,5 +407,11 @@ nvkm_i2c_new_(const struct nvkm_i2c_func *func, struct nvkm_device *device,
 		}
 	}
 
-	return nvkm_event_init(&nvkm_i2c_intr_func, &i2c->subdev, 4, i, &i2c->event);
+	ids = 0;
+	list_for_each_entry(aux, &i2c->aux, head)
+		ids = max(ids, aux->id + 1);
+	if (!ids)
+		return 0;
+
+	return nvkm_event_init(&nvkm_i2c_intr_func, &i2c->subdev, 4, ids, &i2c->event);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
index 524cd3c0e3fe..8e459d88ff8f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
@@ -58,10 +58,13 @@ nvkm_uvmm_mthd_pfnclr(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
 	} else
 		return ret;
 
+	if (nvkm_vmm_in_managed_range(vmm, addr, size) && vmm->managed.raw)
+		return -EINVAL;
+
 	if (size) {
-		mutex_lock(&vmm->mutex);
+		mutex_lock(&vmm->mutex.vmm);
 		ret = nvkm_vmm_pfn_unmap(vmm, addr, size);
-		mutex_unlock(&vmm->mutex);
+		mutex_unlock(&vmm->mutex.vmm);
 	}
 
 	return ret;
@@ -88,10 +91,13 @@ nvkm_uvmm_mthd_pfnmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
 	} else
 		return ret;
 
+	if (nvkm_vmm_in_managed_range(vmm, addr, size) && vmm->managed.raw)
+		return -EINVAL;
+
 	if (size) {
-		mutex_lock(&vmm->mutex);
+		mutex_lock(&vmm->mutex.vmm);
 		ret = nvkm_vmm_pfn_map(vmm, page, addr, size, phys);
-		mutex_unlock(&vmm->mutex);
+		mutex_unlock(&vmm->mutex.vmm);
 	}
 
 	return ret;
@@ -113,7 +119,10 @@ nvkm_uvmm_mthd_unmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
 	} else
 		return ret;
 
-	mutex_lock(&vmm->mutex);
+	if (nvkm_vmm_in_managed_range(vmm, addr, 0) && vmm->managed.raw)
+		return -EINVAL;
+
+	mutex_lock(&vmm->mutex.vmm);
 	vma = nvkm_vmm_node_search(vmm, addr);
 	if (ret = -ENOENT, !vma || vma->addr != addr) {
 		VMM_DEBUG(vmm, "lookup %016llx: %016llx",
@@ -134,7 +143,7 @@ nvkm_uvmm_mthd_unmap(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
 	nvkm_vmm_unmap_locked(vmm, vma, false);
 	ret = 0;
 done:
-	mutex_unlock(&vmm->mutex);
+	mutex_unlock(&vmm->mutex.vmm);
 	return ret;
 }
 
@@ -159,13 +168,16 @@ nvkm_uvmm_mthd_map(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
 	} else
 		return ret;
 
+	if (nvkm_vmm_in_managed_range(vmm, addr, size) && vmm->managed.raw)
+		return -EINVAL;
+
 	memory = nvkm_umem_search(client, handle);
 	if (IS_ERR(memory)) {
 		VMM_DEBUG(vmm, "memory %016llx %ld\n", handle, PTR_ERR(memory));
 		return PTR_ERR(memory);
 	}
 
-	mutex_lock(&vmm->mutex);
+	mutex_lock(&vmm->mutex.vmm);
 	if (ret = -ENOENT, !(vma = nvkm_vmm_node_search(vmm, addr))) {
 		VMM_DEBUG(vmm, "lookup %016llx", addr);
 		goto fail;
@@ -198,7 +210,7 @@ nvkm_uvmm_mthd_map(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
 		}
 	}
 	vma->busy = true;
-	mutex_unlock(&vmm->mutex);
+	mutex_unlock(&vmm->mutex.vmm);
 
 	ret = nvkm_memory_map(memory, offset, vmm, vma, argv, argc);
 	if (ret == 0) {
@@ -207,11 +219,11 @@ nvkm_uvmm_mthd_map(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
 		return 0;
 	}
 
-	mutex_lock(&vmm->mutex);
+	mutex_lock(&vmm->mutex.vmm);
 	vma->busy = false;
 	nvkm_vmm_unmap_region(vmm, vma);
 fail:
-	mutex_unlock(&vmm->mutex);
+	mutex_unlock(&vmm->mutex.vmm);
 	nvkm_memory_unref(&memory);
 	return ret;
 }
@@ -232,7 +244,7 @@ nvkm_uvmm_mthd_put(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
 	} else
 		return ret;
 
-	mutex_lock(&vmm->mutex);
+	mutex_lock(&vmm->mutex.vmm);
 	vma = nvkm_vmm_node_search(vmm, args->v0.addr);
 	if (ret = -ENOENT, !vma || vma->addr != addr || vma->part) {
 		VMM_DEBUG(vmm, "lookup %016llx: %016llx %d", addr,
@@ -248,7 +260,7 @@ nvkm_uvmm_mthd_put(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
 	nvkm_vmm_put_locked(vmm, vma);
 	ret = 0;
 done:
-	mutex_unlock(&vmm->mutex);
+	mutex_unlock(&vmm->mutex.vmm);
 	return ret;
 }
 
@@ -275,10 +287,10 @@ nvkm_uvmm_mthd_get(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
 	} else
 		return ret;
 
-	mutex_lock(&vmm->mutex);
+	mutex_lock(&vmm->mutex.vmm);
 	ret = nvkm_vmm_get_locked(vmm, getref, mapref, sparse,
 				  page, align, size, &vma);
-	mutex_unlock(&vmm->mutex);
+	mutex_unlock(&vmm->mutex.vmm);
 	if (ret)
 		return ret;
 
@@ -314,6 +326,168 @@ nvkm_uvmm_mthd_page(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
 	return 0;
 }
 
+static inline int
+nvkm_uvmm_page_index(struct nvkm_uvmm *uvmm, u64 size, u8 shift, u8 *refd)
+{
+	struct nvkm_vmm *vmm = uvmm->vmm;
+	const struct nvkm_vmm_page *page;
+
+	if (likely(shift)) {
+		for (page = vmm->func->page; page->shift; page++) {
+			if (shift == page->shift)
+				break;
+		}
+
+		if (!page->shift || !IS_ALIGNED(size, 1ULL << page->shift)) {
+			VMM_DEBUG(vmm, "page %d %016llx", shift, size);
+			return -EINVAL;
+		}
+	} else {
+		return -EINVAL;
+	}
+	*refd = page - vmm->func->page;
+
+	return 0;
+}
+
+static int
+nvkm_uvmm_mthd_raw_get(struct nvkm_uvmm *uvmm, struct nvif_vmm_raw_v0 *args)
+{
+	struct nvkm_vmm *vmm = uvmm->vmm;
+	u8 refd;
+	int ret;
+
+	if (!nvkm_vmm_in_managed_range(vmm, args->addr, args->size))
+		return -EINVAL;
+
+	ret = nvkm_uvmm_page_index(uvmm, args->size, args->shift, &refd);
+	if (ret)
+		return ret;
+
+	return nvkm_vmm_raw_get(vmm, args->addr, args->size, refd);
+}
+
+static int
+nvkm_uvmm_mthd_raw_put(struct nvkm_uvmm *uvmm, struct nvif_vmm_raw_v0 *args)
+{
+	struct nvkm_vmm *vmm = uvmm->vmm;
+	u8 refd;
+	int ret;
+
+	if (!nvkm_vmm_in_managed_range(vmm, args->addr, args->size))
+		return -EINVAL;
+
+	ret = nvkm_uvmm_page_index(uvmm, args->size, args->shift, &refd);
+	if (ret)
+		return ret;
+
+	nvkm_vmm_raw_put(vmm, args->addr, args->size, refd);
+
+	return 0;
+}
+
+static int
+nvkm_uvmm_mthd_raw_map(struct nvkm_uvmm *uvmm, struct nvif_vmm_raw_v0 *args)
+{
+	struct nvkm_client *client = uvmm->object.client;
+	struct nvkm_vmm *vmm = uvmm->vmm;
+	struct nvkm_vma vma = {
+		.addr = args->addr,
+		.size = args->size,
+		.used = true,
+		.mapref = false,
+		.no_comp = true,
+	};
+	struct nvkm_memory *memory;
+	void *argv = (void *)(uintptr_t)args->argv;
+	unsigned int argc = args->argc;
+	u64 handle = args->memory;
+	u8 refd;
+	int ret;
+
+	if (!nvkm_vmm_in_managed_range(vmm, args->addr, args->size))
+		return -EINVAL;
+
+	ret = nvkm_uvmm_page_index(uvmm, args->size, args->shift, &refd);
+	if (ret)
+		return ret;
+
+	vma.page = vma.refd = refd;
+
+	memory = nvkm_umem_search(client, args->memory);
+	if (IS_ERR(memory)) {
+		VMM_DEBUG(vmm, "memory %016llx %ld\n", handle, PTR_ERR(memory));
+		return PTR_ERR(memory);
+	}
+
+	ret = nvkm_memory_map(memory, args->offset, vmm, &vma, argv, argc);
+
+	nvkm_memory_unref(&vma.memory);
+	nvkm_memory_unref(&memory);
+	return ret;
+}
+
+static int
+nvkm_uvmm_mthd_raw_unmap(struct nvkm_uvmm *uvmm, struct nvif_vmm_raw_v0 *args)
+{
+	struct nvkm_vmm *vmm = uvmm->vmm;
+	u8 refd;
+	int ret;
+
+	if (!nvkm_vmm_in_managed_range(vmm, args->addr, args->size))
+		return -EINVAL;
+
+	ret = nvkm_uvmm_page_index(uvmm, args->size, args->shift, &refd);
+	if (ret)
+		return ret;
+
+	nvkm_vmm_raw_unmap(vmm, args->addr, args->size,
+			   args->sparse, refd);
+
+	return 0;
+}
+
+static int
+nvkm_uvmm_mthd_raw_sparse(struct nvkm_uvmm *uvmm, struct nvif_vmm_raw_v0 *args)
+{
+	struct nvkm_vmm *vmm = uvmm->vmm;
+
+	if (!nvkm_vmm_in_managed_range(vmm, args->addr, args->size))
+		return -EINVAL;
+
+	return nvkm_vmm_raw_sparse(vmm, args->addr, args->size, args->ref);
+}
+
+static int
+nvkm_uvmm_mthd_raw(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
+{
+	union {
+		struct nvif_vmm_raw_v0 v0;
+	} *args = argv;
+	int ret = -ENOSYS;
+
+	if (!uvmm->vmm->managed.raw)
+		return -EINVAL;
+
+	if ((ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, true)))
+		return ret;
+
+	switch (args->v0.op) {
+	case NVIF_VMM_RAW_V0_GET:
+		return nvkm_uvmm_mthd_raw_get(uvmm, &args->v0);
+	case NVIF_VMM_RAW_V0_PUT:
+		return nvkm_uvmm_mthd_raw_put(uvmm, &args->v0);
+	case NVIF_VMM_RAW_V0_MAP:
+		return nvkm_uvmm_mthd_raw_map(uvmm, &args->v0);
+	case NVIF_VMM_RAW_V0_UNMAP:
+		return nvkm_uvmm_mthd_raw_unmap(uvmm, &args->v0);
+	case NVIF_VMM_RAW_V0_SPARSE:
+		return nvkm_uvmm_mthd_raw_sparse(uvmm, &args->v0);
+	default:
+		return -EINVAL;
+	};
+}
+
 static int
 nvkm_uvmm_mthd(struct nvkm_object *object, u32 mthd, void *argv, u32 argc)
 {
@@ -326,6 +500,7 @@ nvkm_uvmm_mthd(struct nvkm_object *object, u32 mthd, void *argv, u32 argc)
 	case NVIF_VMM_V0_UNMAP : return nvkm_uvmm_mthd_unmap (uvmm, argv, argc);
 	case NVIF_VMM_V0_PFNMAP: return nvkm_uvmm_mthd_pfnmap(uvmm, argv, argc);
 	case NVIF_VMM_V0_PFNCLR: return nvkm_uvmm_mthd_pfnclr(uvmm, argv, argc);
+	case NVIF_VMM_V0_RAW   : return nvkm_uvmm_mthd_raw   (uvmm, argv, argc);
 	case NVIF_VMM_V0_MTHD(0x00) ... NVIF_VMM_V0_MTHD(0x7f):
 		if (uvmm->vmm->func->mthd) {
 			return uvmm->vmm->func->mthd(uvmm->vmm,
@@ -366,10 +541,11 @@ nvkm_uvmm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
 	struct nvkm_uvmm *uvmm;
 	int ret = -ENOSYS;
 	u64 addr, size;
-	bool managed;
+	bool managed, raw;
 
 	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, more))) {
-		managed = args->v0.managed != 0;
+		managed = args->v0.type == NVIF_VMM_V0_TYPE_MANAGED;
+		raw = args->v0.type == NVIF_VMM_V0_TYPE_RAW;
 		addr = args->v0.addr;
 		size = args->v0.size;
 	} else
@@ -377,12 +553,13 @@ nvkm_uvmm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
 
 	if (!(uvmm = kzalloc(sizeof(*uvmm), GFP_KERNEL)))
 		return -ENOMEM;
+
 	nvkm_object_ctor(&nvkm_uvmm, oclass, &uvmm->object);
 	*pobject = &uvmm->object;
 
 	if (!mmu->vmm) {
-		ret = mmu->func->vmm.ctor(mmu, managed, addr, size, argv, argc,
-					  NULL, "user", &uvmm->vmm);
+		ret = mmu->func->vmm.ctor(mmu, managed || raw, addr, size,
+					  argv, argc, NULL, "user", &uvmm->vmm);
 		if (ret)
 			return ret;
 
@@ -393,6 +570,7 @@ nvkm_uvmm_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
 
 		uvmm->vmm = nvkm_vmm_ref(mmu->vmm);
 	}
+	uvmm->vmm->managed.raw = raw;
 
 	page = uvmm->vmm->func->page;
 	args->v0.page_nr = 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
index ae793f400ba1..eb5fcadcb39a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
@@ -677,40 +677,17 @@ nvkm_vmm_ptes_sparse(struct nvkm_vmm *vmm, u64 addr, u64 size, bool ref)
 }
 
 static void
-nvkm_vmm_ptes_unmap_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
-			u64 addr, u64 size, bool sparse, bool pfn)
-{
-	const struct nvkm_vmm_desc_func *func = page->desc->func;
-	nvkm_vmm_iter(vmm, page, addr, size, "unmap + unref",
-		      false, pfn, nvkm_vmm_unref_ptes, NULL, NULL,
-		      sparse ? func->sparse : func->invalid ? func->invalid :
-							      func->unmap);
-}
-
-static int
-nvkm_vmm_ptes_get_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
-		      u64 addr, u64 size, struct nvkm_vmm_map *map,
-		      nvkm_vmm_pte_func func)
-{
-	u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref + map", true,
-				 false, nvkm_vmm_ref_ptes, func, map, NULL);
-	if (fail != ~0ULL) {
-		if ((size = fail - addr))
-			nvkm_vmm_ptes_unmap_put(vmm, page, addr, size, false, false);
-		return -ENOMEM;
-	}
-	return 0;
-}
-
-static void
 nvkm_vmm_ptes_unmap(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 		    u64 addr, u64 size, bool sparse, bool pfn)
 {
 	const struct nvkm_vmm_desc_func *func = page->desc->func;
+
+	mutex_lock(&vmm->mutex.map);
 	nvkm_vmm_iter(vmm, page, addr, size, "unmap", false, pfn,
 		      NULL, NULL, NULL,
 		      sparse ? func->sparse : func->invalid ? func->invalid :
 							      func->unmap);
+	mutex_unlock(&vmm->mutex.map);
 }
 
 static void
@@ -718,33 +695,108 @@ nvkm_vmm_ptes_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 		  u64 addr, u64 size, struct nvkm_vmm_map *map,
 		  nvkm_vmm_pte_func func)
 {
+	mutex_lock(&vmm->mutex.map);
 	nvkm_vmm_iter(vmm, page, addr, size, "map", false, false,
 		      NULL, func, map, NULL);
+	mutex_unlock(&vmm->mutex.map);
 }
 
 static void
-nvkm_vmm_ptes_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
-		  u64 addr, u64 size)
+nvkm_vmm_ptes_put_locked(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
+			 u64 addr, u64 size)
 {
 	nvkm_vmm_iter(vmm, page, addr, size, "unref", false, false,
 		      nvkm_vmm_unref_ptes, NULL, NULL, NULL);
 }
 
+static void
+nvkm_vmm_ptes_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
+		  u64 addr, u64 size)
+{
+	mutex_lock(&vmm->mutex.ref);
+	nvkm_vmm_ptes_put_locked(vmm, page, addr, size);
+	mutex_unlock(&vmm->mutex.ref);
+}
+
 static int
 nvkm_vmm_ptes_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 		  u64 addr, u64 size)
 {
-	u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref", true, false,
-				 nvkm_vmm_ref_ptes, NULL, NULL, NULL);
+	u64 fail;
+
+	mutex_lock(&vmm->mutex.ref);
+	fail = nvkm_vmm_iter(vmm, page, addr, size, "ref", true, false,
+			     nvkm_vmm_ref_ptes, NULL, NULL, NULL);
 	if (fail != ~0ULL) {
 		if (fail != addr)
-			nvkm_vmm_ptes_put(vmm, page, addr, fail - addr);
+			nvkm_vmm_ptes_put_locked(vmm, page, addr, fail - addr);
+		mutex_unlock(&vmm->mutex.ref);
+		return -ENOMEM;
+	}
+	mutex_unlock(&vmm->mutex.ref);
+	return 0;
+}
+
+static void
+__nvkm_vmm_ptes_unmap_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
+			  u64 addr, u64 size, bool sparse, bool pfn)
+{
+	const struct nvkm_vmm_desc_func *func = page->desc->func;
+
+	nvkm_vmm_iter(vmm, page, addr, size, "unmap + unref",
+		      false, pfn, nvkm_vmm_unref_ptes, NULL, NULL,
+		      sparse ? func->sparse : func->invalid ? func->invalid :
+							      func->unmap);
+}
+
+static void
+nvkm_vmm_ptes_unmap_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
+			u64 addr, u64 size, bool sparse, bool pfn)
+{
+	if (vmm->managed.raw) {
+		nvkm_vmm_ptes_unmap(vmm, page, addr, size, sparse, pfn);
+		nvkm_vmm_ptes_put(vmm, page, addr, size);
+	} else {
+		__nvkm_vmm_ptes_unmap_put(vmm, page, addr, size, sparse, pfn);
+	}
+}
+
+static int
+__nvkm_vmm_ptes_get_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
+			u64 addr, u64 size, struct nvkm_vmm_map *map,
+			nvkm_vmm_pte_func func)
+{
+	u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref + map", true,
+				 false, nvkm_vmm_ref_ptes, func, map, NULL);
+	if (fail != ~0ULL) {
+		if ((size = fail - addr))
+			nvkm_vmm_ptes_unmap_put(vmm, page, addr, size, false, false);
 		return -ENOMEM;
 	}
 	return 0;
 }
 
-static inline struct nvkm_vma *
+static int
+nvkm_vmm_ptes_get_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
+		      u64 addr, u64 size, struct nvkm_vmm_map *map,
+		      nvkm_vmm_pte_func func)
+{
+	int ret;
+
+	if (vmm->managed.raw) {
+		ret = nvkm_vmm_ptes_get(vmm, page, addr, size);
+		if (ret)
+			return ret;
+
+		nvkm_vmm_ptes_map(vmm, page, addr, size, map, func);
+
+		return 0;
+	} else {
+		return __nvkm_vmm_ptes_get_map(vmm, page, addr, size, map, func);
+	}
+}
+
+struct nvkm_vma *
 nvkm_vma_new(u64 addr, u64 size)
 {
 	struct nvkm_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
@@ -1045,7 +1097,9 @@ nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
 	vmm->debug = mmu->subdev.debug;
 	kref_init(&vmm->kref);
 
-	__mutex_init(&vmm->mutex, "&vmm->mutex", key ? key : &_key);
+	__mutex_init(&vmm->mutex.vmm, "&vmm->mutex.vmm", key ? key : &_key);
+	mutex_init(&vmm->mutex.ref);
+	mutex_init(&vmm->mutex.map);
 
 	/* Locate the smallest page size supported by the backend, it will
 	 * have the deepest nesting of page tables.
@@ -1101,6 +1155,9 @@ nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
 		if (addr && (ret = nvkm_vmm_ctor_managed(vmm, 0, addr)))
 			return ret;
 
+		vmm->managed.p.addr = 0;
+		vmm->managed.p.size = addr;
+
 		/* NVKM-managed area. */
 		if (size) {
 			if (!(vma = nvkm_vma_new(addr, size)))
@@ -1114,6 +1171,9 @@ nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
 		size = vmm->limit - addr;
 		if (size && (ret = nvkm_vmm_ctor_managed(vmm, addr, size)))
 			return ret;
+
+		vmm->managed.n.addr = addr;
+		vmm->managed.n.size = size;
 	} else {
 		/* Address-space fully managed by NVKM, requiring calls to
 		 * nvkm_vmm_get()/nvkm_vmm_put() to allocate address-space.
@@ -1362,9 +1422,9 @@ void
 nvkm_vmm_unmap(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 {
 	if (vma->memory) {
-		mutex_lock(&vmm->mutex);
+		mutex_lock(&vmm->mutex.vmm);
 		nvkm_vmm_unmap_locked(vmm, vma, false);
-		mutex_unlock(&vmm->mutex);
+		mutex_unlock(&vmm->mutex.vmm);
 	}
 }
 
@@ -1423,6 +1483,8 @@ nvkm_vmm_map_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma,
 	nvkm_vmm_pte_func func;
 	int ret;
 
+	map->no_comp = vma->no_comp;
+
 	/* Make sure we won't overrun the end of the memory object. */
 	if (unlikely(nvkm_memory_size(map->memory) < map->offset + vma->size)) {
 		VMM_DEBUG(vmm, "overrun %016llx %016llx %016llx",
@@ -1507,10 +1569,15 @@ nvkm_vmm_map(struct nvkm_vmm *vmm, struct nvkm_vma *vma, void *argv, u32 argc,
 	     struct nvkm_vmm_map *map)
 {
 	int ret;
-	mutex_lock(&vmm->mutex);
+
+	if (nvkm_vmm_in_managed_range(vmm, vma->addr, vma->size) &&
+	    vmm->managed.raw)
+		return nvkm_vmm_map_locked(vmm, vma, argv, argc, map);
+
+	mutex_lock(&vmm->mutex.vmm);
 	ret = nvkm_vmm_map_locked(vmm, vma, argv, argc, map);
 	vma->busy = false;
-	mutex_unlock(&vmm->mutex);
+	mutex_unlock(&vmm->mutex.vmm);
 	return ret;
 }
 
@@ -1620,9 +1687,9 @@ nvkm_vmm_put(struct nvkm_vmm *vmm, struct nvkm_vma **pvma)
 {
 	struct nvkm_vma *vma = *pvma;
 	if (vma) {
-		mutex_lock(&vmm->mutex);
+		mutex_lock(&vmm->mutex.vmm);
 		nvkm_vmm_put_locked(vmm, vma);
-		mutex_unlock(&vmm->mutex);
+		mutex_unlock(&vmm->mutex.vmm);
 		*pvma = NULL;
 	}
 }
@@ -1769,9 +1836,49 @@ int
 nvkm_vmm_get(struct nvkm_vmm *vmm, u8 page, u64 size, struct nvkm_vma **pvma)
 {
 	int ret;
-	mutex_lock(&vmm->mutex);
+	mutex_lock(&vmm->mutex.vmm);
 	ret = nvkm_vmm_get_locked(vmm, false, true, false, page, 0, size, pvma);
-	mutex_unlock(&vmm->mutex);
+	mutex_unlock(&vmm->mutex.vmm);
+	return ret;
+}
+
+void
+nvkm_vmm_raw_unmap(struct nvkm_vmm *vmm, u64 addr, u64 size,
+		   bool sparse, u8 refd)
+{
+	const struct nvkm_vmm_page *page = &vmm->func->page[refd];
+
+	nvkm_vmm_ptes_unmap(vmm, page, addr, size, sparse, false);
+}
+
+void
+nvkm_vmm_raw_put(struct nvkm_vmm *vmm, u64 addr, u64 size, u8 refd)
+{
+	const struct nvkm_vmm_page *page = vmm->func->page;
+
+	nvkm_vmm_ptes_put(vmm, &page[refd], addr, size);
+}
+
+int
+nvkm_vmm_raw_get(struct nvkm_vmm *vmm, u64 addr, u64 size, u8 refd)
+{
+	const struct nvkm_vmm_page *page = vmm->func->page;
+
+	if (unlikely(!size))
+		return -EINVAL;
+
+	return nvkm_vmm_ptes_get(vmm, &page[refd], addr, size);
+}
+
+int
+nvkm_vmm_raw_sparse(struct nvkm_vmm *vmm, u64 addr, u64 size, bool ref)
+{
+	int ret;
+
+	mutex_lock(&vmm->mutex.ref);
+	ret = nvkm_vmm_ptes_sparse(vmm, addr, size, ref);
+	mutex_unlock(&vmm->mutex.ref);
+
 	return ret;
 }
 
@@ -1779,9 +1886,9 @@ void
 nvkm_vmm_part(struct nvkm_vmm *vmm, struct nvkm_memory *inst)
 {
 	if (inst && vmm && vmm->func->part) {
-		mutex_lock(&vmm->mutex);
+		mutex_lock(&vmm->mutex.vmm);
 		vmm->func->part(vmm, inst);
-		mutex_unlock(&vmm->mutex);
+		mutex_unlock(&vmm->mutex.vmm);
 	}
 }
 
@@ -1790,9 +1897,9 @@ nvkm_vmm_join(struct nvkm_vmm *vmm, struct nvkm_memory *inst)
 {
 	int ret = 0;
 	if (vmm->func->join) {
-		mutex_lock(&vmm->mutex);
+		mutex_lock(&vmm->mutex.vmm);
 		ret = vmm->func->join(vmm, inst);
-		mutex_unlock(&vmm->mutex);
+		mutex_unlock(&vmm->mutex.vmm);
 	}
 	return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
index f6188aa9171c..f9bc30cdb2b3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
@@ -163,6 +163,7 @@ int nvkm_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *,
 		  u32 pd_header, bool managed, u64 addr, u64 size,
 		  struct lock_class_key *, const char *name,
 		  struct nvkm_vmm **);
+struct nvkm_vma *nvkm_vma_new(u64 addr, u64 size);
 struct nvkm_vma *nvkm_vmm_node_search(struct nvkm_vmm *, u64 addr);
 struct nvkm_vma *nvkm_vmm_node_split(struct nvkm_vmm *, struct nvkm_vma *,
 				     u64 addr, u64 size);
@@ -173,6 +174,30 @@ void nvkm_vmm_put_locked(struct nvkm_vmm *, struct nvkm_vma *);
 void nvkm_vmm_unmap_locked(struct nvkm_vmm *, struct nvkm_vma *, bool pfn);
 void nvkm_vmm_unmap_region(struct nvkm_vmm *, struct nvkm_vma *);
 
+int nvkm_vmm_raw_get(struct nvkm_vmm *vmm, u64 addr, u64 size, u8 refd);
+void nvkm_vmm_raw_put(struct nvkm_vmm *vmm, u64 addr, u64 size, u8 refd);
+void nvkm_vmm_raw_unmap(struct nvkm_vmm *vmm, u64 addr, u64 size,
+			bool sparse, u8 refd);
+int nvkm_vmm_raw_sparse(struct nvkm_vmm *, u64 addr, u64 size, bool ref);
+
+static inline bool
+nvkm_vmm_in_managed_range(struct nvkm_vmm *vmm, u64 start, u64 size)
+{
+	u64 p_start = vmm->managed.p.addr;
+	u64 p_end = p_start + vmm->managed.p.size;
+	u64 n_start = vmm->managed.n.addr;
+	u64 n_end = n_start + vmm->managed.n.size;
+	u64 end = start + size;
+
+	if (start >= p_start && end <= p_end)
+		return true;
+
+	if (start >= n_start && end <= n_end)
+		return true;
+
+	return false;
+}
+
 #define NVKM_VMM_PFN_ADDR                                 0xfffffffffffff000ULL
 #define NVKM_VMM_PFN_ADDR_SHIFT                                              12
 #define NVKM_VMM_PFN_APER                                 0x00000000000000f0ULL
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c
index 5438384d9a67..5e857c02e9aa 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgf100.c
@@ -287,15 +287,17 @@ gf100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc,
 			return -EINVAL;
 		}
 
-		ret = nvkm_memory_tags_get(memory, device, tags,
-					   nvkm_ltc_tags_clear,
-					   &map->tags);
-		if (ret) {
-			VMM_DEBUG(vmm, "comp %d", ret);
-			return ret;
+		if (!map->no_comp) {
+			ret = nvkm_memory_tags_get(memory, device, tags,
+						   nvkm_ltc_tags_clear,
+						   &map->tags);
+			if (ret) {
+				VMM_DEBUG(vmm, "comp %d", ret);
+				return ret;
+			}
 		}
 
-		if (map->tags->mn) {
+		if (!map->no_comp && map->tags->mn) {
 			u64 tags = map->tags->mn->offset + (map->offset >> 17);
 			if (page->shift == 17 || !gm20x) {
 				map->type |= tags << 44;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
index 17899fc95b2d..f3630d0e0d55 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c
@@ -453,15 +453,17 @@ gp100_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc,
 			return -EINVAL;
 		}
 
-		ret = nvkm_memory_tags_get(memory, device, tags,
-					   nvkm_ltc_tags_clear,
-					   &map->tags);
-		if (ret) {
-			VMM_DEBUG(vmm, "comp %d", ret);
-			return ret;
+		if (!map->no_comp) {
+			ret = nvkm_memory_tags_get(memory, device, tags,
+						   nvkm_ltc_tags_clear,
+						   &map->tags);
+			if (ret) {
+				VMM_DEBUG(vmm, "comp %d", ret);
+				return ret;
+			}
 		}
 
-		if (map->tags->mn) {
+		if (!map->no_comp && map->tags->mn) {
 			tags = map->tags->mn->offset + (map->offset >> 16);
 			map->ctag |= ((1ULL << page->shift) >> 16) << 36;
 			map->type |= tags << 36;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c
index b7548dcd72c7..ff08ad5005a9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmnv50.c
@@ -296,19 +296,22 @@ nv50_vmm_valid(struct nvkm_vmm *vmm, void *argv, u32 argc,
 			return -EINVAL;
 		}
 
-		ret = nvkm_memory_tags_get(memory, device, tags, NULL,
-					   &map->tags);
-		if (ret) {
-			VMM_DEBUG(vmm, "comp %d", ret);
-			return ret;
-		}
+		if (!map->no_comp) {
+			ret = nvkm_memory_tags_get(memory, device, tags, NULL,
+						   &map->tags);
+			if (ret) {
+				VMM_DEBUG(vmm, "comp %d", ret);
+				return ret;
+			}
 
-		if (map->tags->mn) {
-			u32 tags = map->tags->mn->offset + (map->offset >> 16);
-			map->ctag |= (u64)comp << 49;
-			map->type |= (u64)comp << 47;
-			map->type |= (u64)tags << 49;
-			map->next |= map->ctag;
+			if (map->tags->mn) {
+				u32 tags = map->tags->mn->offset +
+					   (map->offset >> 16);
+				map->ctag |= (u64)comp << 49;
+				map->type |= (u64)comp << 47;
+				map->type |= (u64)tags << 49;
+				map->next |= map->ctag;
+			}
 		}
 	}