357 files changed, 16590 insertions, 7100 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index b066bb3ca01a..e3b4b0f02b3d 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -8,6 +8,7 @@ menuconfig DRM
 	tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)"
 	depends on (AGP || AGP=n) && !EMULATED_CMPXCHG && MMU && HAS_DMA
 	select HDMI
+	select FB_CMDLINE
 	select I2C
 	select I2C_ALGOBIT
 	select DMA_SHARED_BUFFER
@@ -24,12 +25,6 @@ config DRM_MIPI_DSI
 	bool
 	depends on DRM
 
-config DRM_USB
-	tristate
-	depends on DRM
-	depends on USB_SUPPORT && USB_ARCH_HAS_HCD
-	select USB
-
 config DRM_KMS_HELPER
 	tristate
 	depends on DRM
@@ -115,6 +110,7 @@ config DRM_RADEON
 	select HWMON
 	select BACKLIGHT_CLASS_DEVICE
 	select INTERVAL_TREE
+	select MMU_NOTIFIER
 	help
 	  Choose this option if you have an ATI Radeon graphics card.  There
 	  are both PCI and AGP versions.  You don't need to choose this to
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 4a55d59ccd22..9292a761ea6d 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -4,7 +4,7 @@
 
 ccflags-y := -Iinclude/drm
 
-drm-y       :=	drm_auth.o drm_buffer.o drm_bufs.o drm_cache.o \
+drm-y       :=	drm_auth.o drm_bufs.o drm_cache.o \
 		drm_context.o drm_dma.o \
 		drm_fops.o drm_gem.o drm_ioctl.o drm_irq.o \
 		drm_lock.o drm_memory.o drm_drv.o drm_vm.o \
@@ -22,8 +22,6 @@ drm-$(CONFIG_PCI) += ati_pcigart.o
 drm-$(CONFIG_DRM_PANEL) += drm_panel.o
 drm-$(CONFIG_OF) += drm_of.o
 
-drm-usb-y   := drm_usb.o
-
 drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o \
 		drm_plane_helper.o drm_dp_mst_topology.o
 drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
@@ -36,7 +34,6 @@ CFLAGS_drm_trace_points.o := -I$(src)
 
 obj-$(CONFIG_DRM)	+= drm.o
 obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
-obj-$(CONFIG_DRM_USB)   += drm_usb.o
 obj-$(CONFIG_DRM_TTM)	+= ttm/
 obj-$(CONFIG_DRM_TDFX)	+= tdfx/
 obj-$(CONFIG_DRM_R128)	+= r128/
diff --git a/drivers/gpu/drm/armada/armada_drv.c b/drivers/gpu/drm/armada/armada_drv.c
index e2d5792b140f..f672e6ad8afa 100644
--- a/drivers/gpu/drm/armada/armada_drv.c
+++ b/drivers/gpu/drm/armada/armada_drv.c
@@ -308,6 +308,7 @@ static struct drm_driver armada_drm_driver = {
 	.postclose		= NULL,
 	.lastclose		= armada_drm_lastclose,
 	.unload			= armada_drm_unload,
+	.set_busid		= drm_platform_set_busid,
 	.get_vblank_counter	= drm_vblank_count,
 	.enable_vblank		= armada_drm_enable_vblank,
 	.disable_vblank		= armada_drm_disable_vblank,
diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c
index f19682a93c24..9a32d9dfdd26 100644
--- a/drivers/gpu/drm/ast/ast_drv.c
+++ b/drivers/gpu/drm/ast/ast_drv.c
@@ -199,6 +199,7 @@ static struct drm_driver driver = {
 
 	.load = ast_driver_load,
 	.unload = ast_driver_unload,
+	.set_busid = drm_pci_set_busid,
 
 	.fops = &ast_fops,
 	.name = DRIVER_NAME,
diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 957d4fabf1e1..cb91c2acc3cb 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -316,7 +316,7 @@ struct ast_bo {
 	struct ttm_placement placement;
 	struct ttm_bo_kmap_obj kmap;
 	struct drm_gem_object gem;
-	u32 placements[3];
+	struct ttm_place placements[3];
 	int pin_count;
 };
 #define gem_to_ast_bo(gobj) container_of((gobj), struct ast_bo, gem)
diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c
index b8246227bab0..5098c7dd435c 100644
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c
@@ -293,18 +293,22 @@ void ast_mm_fini(struct ast_private *ast)
 void ast_ttm_placement(struct ast_bo *bo, int domain)
 {
 	u32 c = 0;
-	bo->placement.fpfn = 0;
-	bo->placement.lpfn = 0;
+	unsigned i;
+
 	bo->placement.placement = bo->placements;
 	bo->placement.busy_placement = bo->placements;
 	if (domain & TTM_PL_FLAG_VRAM)
-		bo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM;
+		bo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM;
 	if (domain & TTM_PL_FLAG_SYSTEM)
-		bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+		bo->placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
 	if (!c)
-		bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+		bo->placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
 	bo->placement.num_placement = c;
 	bo->placement.num_busy_placement = c;
+	for (i = 0; i < c; ++i) {
+		bo->placements[i].fpfn = 0;
+		bo->placements[i].lpfn = 0;
+	}
 }
 
 int ast_bo_create(struct drm_device *dev, int size, int align,
@@ -360,7 +364,7 @@ int ast_bo_pin(struct ast_bo *bo, u32 pl_flag, u64 *gpu_addr)
 
 	ast_ttm_placement(bo, pl_flag);
 	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
+		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
 	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
@@ -383,7 +387,7 @@ int ast_bo_unpin(struct ast_bo *bo)
 		return 0;
 
 	for (i = 0; i < bo->placement.num_placement ; i++)
-		bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
+		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
 	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
@@ -407,7 +411,7 @@ int ast_bo_push_sysram(struct ast_bo *bo)
 
 	ast_ttm_placement(bo, TTM_PL_FLAG_SYSTEM);
 	for (i = 0; i < bo->placement.num_placement ; i++)
-		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
+		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
 
 	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret) {
diff --git a/drivers/gpu/drm/ati_pcigart.c b/drivers/gpu/drm/ati_pcigart.c
index c399dea27a3b..6c4d4b6eba80 100644
--- a/drivers/gpu/drm/ati_pcigart.c
+++ b/drivers/gpu/drm/ati_pcigart.c
@@ -34,6 +34,8 @@
 #include <linux/export.h>
 #include <drm/drmP.h>
 
+#include <drm/ati_pcigart.h>
+
 # define ATI_PCIGART_PAGE_SIZE		4096	/**< PCI GART page size */
 
 static int drm_ati_alloc_pcigart_table(struct drm_device *dev,
diff --git a/drivers/gpu/drm/bochs/bochs.h b/drivers/gpu/drm/bochs/bochs.h
index 7eb52dd44b01..4f6e7b3a3635 100644
--- a/drivers/gpu/drm/bochs/bochs.h
+++ b/drivers/gpu/drm/bochs/bochs.h
@@ -99,7 +99,7 @@ struct bochs_bo {
 	struct ttm_placement placement;
 	struct ttm_bo_kmap_obj kmap;
 	struct drm_gem_object gem;
-	u32 placements[3];
+	struct ttm_place placements[3];
 	int pin_count;
 };
 
diff --git a/drivers/gpu/drm/bochs/bochs_drv.c b/drivers/gpu/drm/bochs/bochs_drv.c
index 9738e9b14708..98837bde2d25 100644
--- a/drivers/gpu/drm/bochs/bochs_drv.c
+++ b/drivers/gpu/drm/bochs/bochs_drv.c
@@ -82,6 +82,7 @@ static struct drm_driver bochs_driver = {
 	.driver_features	= DRIVER_GEM | DRIVER_MODESET,
 	.load			= bochs_load,
 	.unload			= bochs_unload,
+	.set_busid		= drm_pci_set_busid,
 	.fops			= &bochs_fops,
 	.name			= "bochs-drm",
 	.desc			= "bochs dispi vga interface (qemu stdvga)",
diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c
index 1728a1b0b813..2af30e7607d7 100644
--- a/drivers/gpu/drm/bochs/bochs_mm.c
+++ b/drivers/gpu/drm/bochs/bochs_mm.c
@@ -257,20 +257,26 @@ void bochs_mm_fini(struct bochs_device *bochs)
 
 static void bochs_ttm_placement(struct bochs_bo *bo, int domain)
 {
+	unsigned i;
 	u32 c = 0;
-	bo->placement.fpfn = 0;
-	bo->placement.lpfn = 0;
 	bo->placement.placement = bo->placements;
 	bo->placement.busy_placement = bo->placements;
 	if (domain & TTM_PL_FLAG_VRAM) {
-		bo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED
+		bo->placements[c++].flags = TTM_PL_FLAG_WC
+			| TTM_PL_FLAG_UNCACHED
 			| TTM_PL_FLAG_VRAM;
 	}
 	if (domain & TTM_PL_FLAG_SYSTEM) {
-		bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+		bo->placements[c++].flags = TTM_PL_MASK_CACHING
+			| TTM_PL_FLAG_SYSTEM;
 	}
 	if (!c) {
-		bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+		bo->placements[c++].flags = TTM_PL_MASK_CACHING
+			| TTM_PL_FLAG_SYSTEM;
+	}
+	for (i = 0; i < c; ++i) {
+		bo->placements[i].fpfn = 0;
+		bo->placements[i].lpfn = 0;
 	}
 	bo->placement.num_placement = c;
 	bo->placement.num_busy_placement = c;
@@ -294,7 +300,7 @@ int bochs_bo_pin(struct bochs_bo *bo, u32 pl_flag, u64 *gpu_addr)
 
 	bochs_ttm_placement(bo, pl_flag);
 	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
+		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
 	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
@@ -319,7 +325,7 @@ int bochs_bo_unpin(struct bochs_bo *bo)
 		return 0;
 
 	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
+		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
 	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.c b/drivers/gpu/drm/cirrus/cirrus_drv.c
index 919c73b94447..e705335101a5 100644
--- a/drivers/gpu/drm/cirrus/cirrus_drv.c
+++ b/drivers/gpu/drm/cirrus/cirrus_drv.c
@@ -128,6 +128,7 @@ static struct drm_driver driver = {
 	.driver_features = DRIVER_MODESET | DRIVER_GEM,
 	.load = cirrus_driver_load,
 	.unload = cirrus_driver_unload,
+	.set_busid = drm_pci_set_busid,
 	.fops = &cirrus_driver_fops,
 	.name = DRIVER_NAME,
 	.desc = DRIVER_DESC,
diff --git a/drivers/gpu/drm/cirrus/cirrus_drv.h b/drivers/gpu/drm/cirrus/cirrus_drv.h
index 401c890b6c6a..dd2cfc9024aa 100644
--- a/drivers/gpu/drm/cirrus/cirrus_drv.h
+++ b/drivers/gpu/drm/cirrus/cirrus_drv.h
@@ -163,7 +163,7 @@ struct cirrus_bo {
 	struct ttm_placement placement;
 	struct ttm_bo_kmap_obj kmap;
 	struct drm_gem_object gem;
-	u32 placements[3];
+	struct ttm_place placements[3];
 	int pin_count;
 };
 #define gem_to_cirrus_bo(gobj) container_of((gobj), struct cirrus_bo, gem)
diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c
index 92e6b7786097..3e7d758330a9 100644
--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
+++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
@@ -298,18 +298,21 @@ void cirrus_mm_fini(struct cirrus_device *cirrus)
 void cirrus_ttm_placement(struct cirrus_bo *bo, int domain)
 {
 	u32 c = 0;
-	bo->placement.fpfn = 0;
-	bo->placement.lpfn = 0;
+	unsigned i;
 	bo->placement.placement = bo->placements;
 	bo->placement.busy_placement = bo->placements;
 	if (domain & TTM_PL_FLAG_VRAM)
-		bo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM;
+		bo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM;
 	if (domain & TTM_PL_FLAG_SYSTEM)
-		bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+		bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
 	if (!c)
-		bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+		bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
 	bo->placement.num_placement = c;
 	bo->placement.num_busy_placement = c;
+	for (i = 0; i < c; ++i) {
+		bo->placements[i].fpfn = 0;
+		bo->placements[i].lpfn = 0;
+	}
 }
 
 int cirrus_bo_create(struct drm_device *dev, int size, int align,
@@ -365,7 +368,7 @@ int cirrus_bo_pin(struct cirrus_bo *bo, u32 pl_flag, u64 *gpu_addr)
 
 	cirrus_ttm_placement(bo, pl_flag);
 	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
+		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
 	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
@@ -392,7 +395,7 @@ int cirrus_bo_push_sysram(struct cirrus_bo *bo)
 
 	cirrus_ttm_placement(bo, TTM_PL_FLAG_SYSTEM);
 	for (i = 0; i < bo->placement.num_placement ; i++)
-		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
+		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
 
 	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret) {
diff --git a/drivers/gpu/drm/drm_agpsupport.c b/drivers/gpu/drm/drm_agpsupport.c
index dde205cef384..4b2b4aa5033b 100644
--- a/drivers/gpu/drm/drm_agpsupport.c
+++ b/drivers/gpu/drm/drm_agpsupport.c
@@ -34,6 +34,7 @@
 #include <drm/drmP.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include "drm_legacy.h"
 
 #if __OS_HAS_AGP
 
diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c
index 3cedae12b3c1..fc8e8aaa34fb 100644
--- a/drivers/gpu/drm/drm_auth.c
+++ b/drivers/gpu/drm/drm_auth.c
@@ -34,6 +34,13 @@
  */
 
 #include <drm/drmP.h>
+#include "drm_internal.h"
+
+struct drm_magic_entry {
+	struct list_head head;
+	struct drm_hash_item hash_item;
+	struct drm_file *priv;
+};
 
 /**
  * Find the file with the given magic number.
diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index 61acb8f6756d..569064a00693 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -1,18 +1,13 @@
-/**
- * \file drm_bufs.c
- * Generic buffer template
- *
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
- * \author Gareth Hughes <gareth@valinux.com>
- */
-
 /*
- * Created: Thu Nov 23 03:10:50 2000 by gareth@valinux.com
+ * Legacy: Generic DRM Buffer Management
  *
  * Copyright 1999, 2000 Precision Insight, Inc., Cedar Park, Texas.
  * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
  * All Rights Reserved.
  *
+ * Author: Rickard E. (Rik) Faith <faith@valinux.com>
+ * Author: Gareth Hughes <gareth@valinux.com>
+ *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
@@ -39,6 +34,7 @@
 #include <linux/export.h>
 #include <asm/shmparam.h>
 #include <drm/drmP.h>
+#include "drm_legacy.h"
 
 static struct drm_map_list *drm_find_matching_map(struct drm_device *dev,
 						  struct drm_local_map *map)
@@ -365,9 +361,9 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 	return 0;
 }
 
-int drm_addmap(struct drm_device * dev, resource_size_t offset,
-	       unsigned int size, enum drm_map_type type,
-	       enum drm_map_flags flags, struct drm_local_map ** map_ptr)
+int drm_legacy_addmap(struct drm_device * dev, resource_size_t offset,
+		      unsigned int size, enum drm_map_type type,
+		      enum drm_map_flags flags, struct drm_local_map **map_ptr)
 {
 	struct drm_map_list *list;
 	int rc;
@@ -377,8 +373,7 @@ int drm_addmap(struct drm_device * dev, resource_size_t offset,
 		*map_ptr = list->map;
 	return rc;
 }
-
-EXPORT_SYMBOL(drm_addmap);
+EXPORT_SYMBOL(drm_legacy_addmap);
 
 /**
  * Ioctl to specify a range of memory that is available for mapping by a
@@ -391,8 +386,8 @@ EXPORT_SYMBOL(drm_addmap);
  * \return zero on success or a negative value on error.
  *
  */
-int drm_addmap_ioctl(struct drm_device *dev, void *data,
-		     struct drm_file *file_priv)
+int drm_legacy_addmap_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file_priv)
 {
 	struct drm_map *map = data;
 	struct drm_map_list *maplist;
@@ -429,9 +424,9 @@ int drm_addmap_ioctl(struct drm_device *dev, void *data,
  * its being used, and free any associate resource (such as MTRR's) if it's not
  * being on use.
  *
- * \sa drm_addmap
+ * \sa drm_legacy_addmap
  */
-int drm_rmmap_locked(struct drm_device *dev, struct drm_local_map *map)
+int drm_legacy_rmmap_locked(struct drm_device *dev, struct drm_local_map *map)
 {
 	struct drm_map_list *r_list = NULL, *list_t;
 	drm_dma_handle_t dmah;
@@ -478,26 +473,26 @@ int drm_rmmap_locked(struct drm_device *dev, struct drm_local_map *map)
 		dmah.vaddr = map->handle;
 		dmah.busaddr = map->offset;
 		dmah.size = map->size;
-		__drm_pci_free(dev, &dmah);
+		__drm_legacy_pci_free(dev, &dmah);
 		break;
 	}
 	kfree(map);
 
 	return 0;
 }
-EXPORT_SYMBOL(drm_rmmap_locked);
+EXPORT_SYMBOL(drm_legacy_rmmap_locked);
 
-int drm_rmmap(struct drm_device *dev, struct drm_local_map *map)
+int drm_legacy_rmmap(struct drm_device *dev, struct drm_local_map *map)
 {
 	int ret;
 
 	mutex_lock(&dev->struct_mutex);
-	ret = drm_rmmap_locked(dev, map);
+	ret = drm_legacy_rmmap_locked(dev, map);
 	mutex_unlock(&dev->struct_mutex);
 
 	return ret;
 }
-EXPORT_SYMBOL(drm_rmmap);
+EXPORT_SYMBOL(drm_legacy_rmmap);
 
 /* The rmmap ioctl appears to be unnecessary.  All mappings are torn down on
  * the last close of the device, and this is necessary for cleanup when things
@@ -514,8 +509,8 @@ EXPORT_SYMBOL(drm_rmmap);
  * \param arg pointer to a struct drm_map structure.
  * \return zero on success or a negative value on error.
  */
-int drm_rmmap_ioctl(struct drm_device *dev, void *data,
-		    struct drm_file *file_priv)
+int drm_legacy_rmmap_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv)
 {
 	struct drm_map *request = data;
 	struct drm_local_map *map = NULL;
@@ -546,7 +541,7 @@ int drm_rmmap_ioctl(struct drm_device *dev, void *data,
 		return 0;
 	}
 
-	ret = drm_rmmap_locked(dev, map);
+	ret = drm_legacy_rmmap_locked(dev, map);
 
 	mutex_unlock(&dev->struct_mutex);
 
@@ -599,7 +594,8 @@ static void drm_cleanup_buf_error(struct drm_device * dev,
  * reallocates the buffer list of the same size order to accommodate the new
  * buffers.
  */
-int drm_addbufs_agp(struct drm_device * dev, struct drm_buf_desc * request)
+int drm_legacy_addbufs_agp(struct drm_device *dev,
+			   struct drm_buf_desc *request)
 {
 	struct drm_device_dma *dma = dev->dma;
 	struct drm_buf_entry *entry;
@@ -759,10 +755,11 @@ int drm_addbufs_agp(struct drm_device * dev, struct drm_buf_desc * request)
 	atomic_dec(&dev->buf_alloc);
 	return 0;
 }
-EXPORT_SYMBOL(drm_addbufs_agp);
+EXPORT_SYMBOL(drm_legacy_addbufs_agp);
 #endif				/* __OS_HAS_AGP */
 
-int drm_addbufs_pci(struct drm_device * dev, struct drm_buf_desc * request)
+int drm_legacy_addbufs_pci(struct drm_device *dev,
+			   struct drm_buf_desc *request)
 {
 	struct drm_device_dma *dma = dev->dma;
 	int count;
@@ -964,9 +961,10 @@ int drm_addbufs_pci(struct drm_device * dev, struct drm_buf_desc * request)
 	return 0;
 
 }
-EXPORT_SYMBOL(drm_addbufs_pci);
+EXPORT_SYMBOL(drm_legacy_addbufs_pci);
 
-static int drm_addbufs_sg(struct drm_device * dev, struct drm_buf_desc * request)
+static int drm_legacy_addbufs_sg(struct drm_device *dev,
+				 struct drm_buf_desc *request)
 {
 	struct drm_device_dma *dma = dev->dma;
 	struct drm_buf_entry *entry;
@@ -1135,8 +1133,8 @@ static int drm_addbufs_sg(struct drm_device * dev, struct drm_buf_desc * request
  * addbufs_sg() or addbufs_pci() for AGP, scatter-gather or consistent
  * PCI memory respectively.
  */
-int drm_addbufs(struct drm_device *dev, void *data,
-		struct drm_file *file_priv)
+int drm_legacy_addbufs(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv)
 {
 	struct drm_buf_desc *request = data;
 	int ret;
@@ -1149,15 +1147,15 @@ int drm_addbufs(struct drm_device *dev, void *data,
 
 #if __OS_HAS_AGP
 	if (request->flags & _DRM_AGP_BUFFER)
-		ret = drm_addbufs_agp(dev, request);
+		ret = drm_legacy_addbufs_agp(dev, request);
 	else
 #endif
 	if (request->flags & _DRM_SG_BUFFER)
-		ret = drm_addbufs_sg(dev, request);
+		ret = drm_legacy_addbufs_sg(dev, request);
 	else if (request->flags & _DRM_FB_BUFFER)
 		ret = -EINVAL;
 	else
-		ret = drm_addbufs_pci(dev, request);
+		ret = drm_legacy_addbufs_pci(dev, request);
 
 	return ret;
 }
@@ -1179,8 +1177,8 @@ int drm_addbufs(struct drm_device *dev, void *data,
  * lock, preventing of allocating more buffers after this call. Information
  * about each requested buffer is then copied into user space.
  */
-int drm_infobufs(struct drm_device *dev, void *data,
-		 struct drm_file *file_priv)
+int drm_legacy_infobufs(struct drm_device *dev, void *data,
+			struct drm_file *file_priv)
 {
 	struct drm_device_dma *dma = dev->dma;
 	struct drm_buf_info *request = data;
@@ -1260,8 +1258,8 @@ int drm_infobufs(struct drm_device *dev, void *data,
  *
  * \note This ioctl is deprecated and mostly never used.
  */
-int drm_markbufs(struct drm_device *dev, void *data,
-		 struct drm_file *file_priv)
+int drm_legacy_markbufs(struct drm_device *dev, void *data,
+			struct drm_file *file_priv)
 {
 	struct drm_device_dma *dma = dev->dma;
 	struct drm_buf_desc *request = data;
@@ -1307,8 +1305,8 @@ int drm_markbufs(struct drm_device *dev, void *data,
  * Calls free_buffer() for each used buffer.
  * This function is primarily used for debugging.
  */
-int drm_freebufs(struct drm_device *dev, void *data,
-		 struct drm_file *file_priv)
+int drm_legacy_freebufs(struct drm_device *dev, void *data,
+			struct drm_file *file_priv)
 {
 	struct drm_device_dma *dma = dev->dma;
 	struct drm_buf_free *request = data;
@@ -1340,7 +1338,7 @@ int drm_freebufs(struct drm_device *dev, void *data,
 				  task_pid_nr(current));
 			return -EINVAL;
 		}
-		drm_free_buffer(dev, buf);
+		drm_legacy_free_buffer(dev, buf);
 	}
 
 	return 0;
@@ -1360,8 +1358,8 @@ int drm_freebufs(struct drm_device *dev, void *data,
  * offset equal to 0, which drm_mmap() interpretes as PCI buffers and calls
  * drm_mmap_dma().
  */
-int drm_mapbufs(struct drm_device *dev, void *data,
-	        struct drm_file *file_priv)
+int drm_legacy_mapbufs(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv)
 {
 	struct drm_device_dma *dma = dev->dma;
 	int retcode = 0;
@@ -1448,7 +1446,7 @@ int drm_mapbufs(struct drm_device *dev, void *data,
 	return retcode;
 }
 
-int drm_dma_ioctl(struct drm_device *dev, void *data,
+int drm_legacy_dma_ioctl(struct drm_device *dev, void *data,
 		  struct drm_file *file_priv)
 {
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
@@ -1460,7 +1458,7 @@ int drm_dma_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 }
 
-struct drm_local_map *drm_getsarea(struct drm_device *dev)
+struct drm_local_map *drm_legacy_getsarea(struct drm_device *dev)
 {
 	struct drm_map_list *entry;
 
@@ -1472,4 +1470,4 @@ struct drm_local_map *drm_getsarea(struct drm_device *dev)
 	}
 	return NULL;
 }
-EXPORT_SYMBOL(drm_getsarea);
+EXPORT_SYMBOL(drm_legacy_getsarea);
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 90e773019eac..ecd4e0b4c525 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -40,106 +40,12 @@
 #include <drm/drm_modeset_lock.h>
 
 #include "drm_crtc_internal.h"
+#include "drm_internal.h"
 
 static struct drm_framebuffer *add_framebuffer_internal(struct drm_device *dev,
 							struct drm_mode_fb_cmd2 *r,
 							struct drm_file *file_priv);
 
-/**
- * drm_modeset_lock_all - take all modeset locks
- * @dev: drm device
- *
- * This function takes all modeset locks, suitable where a more fine-grained
- * scheme isn't (yet) implemented. Locks must be dropped with
- * drm_modeset_unlock_all.
- */
-void drm_modeset_lock_all(struct drm_device *dev)
-{
-	struct drm_mode_config *config = &dev->mode_config;
-	struct drm_modeset_acquire_ctx *ctx;
-	int ret;
-
-	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
-	if (WARN_ON(!ctx))
-		return;
-
-	mutex_lock(&config->mutex);
-
-	drm_modeset_acquire_init(ctx, 0);
-
-retry:
-	ret = drm_modeset_lock(&config->connection_mutex, ctx);
-	if (ret)
-		goto fail;
-	ret = drm_modeset_lock_all_crtcs(dev, ctx);
-	if (ret)
-		goto fail;
-
-	WARN_ON(config->acquire_ctx);
-
-	/* now we hold the locks, so now that it is safe, stash the
-	 * ctx for drm_modeset_unlock_all():
-	 */
-	config->acquire_ctx = ctx;
-
-	drm_warn_on_modeset_not_all_locked(dev);
-
-	return;
-
-fail:
-	if (ret == -EDEADLK) {
-		drm_modeset_backoff(ctx);
-		goto retry;
-	}
-}
-EXPORT_SYMBOL(drm_modeset_lock_all);
-
-/**
- * drm_modeset_unlock_all - drop all modeset locks
- * @dev: device
- *
- * This function drop all modeset locks taken by drm_modeset_lock_all.
- */
-void drm_modeset_unlock_all(struct drm_device *dev)
-{
-	struct drm_mode_config *config = &dev->mode_config;
-	struct drm_modeset_acquire_ctx *ctx = config->acquire_ctx;
-
-	if (WARN_ON(!ctx))
-		return;
-
-	config->acquire_ctx = NULL;
-	drm_modeset_drop_locks(ctx);
-	drm_modeset_acquire_fini(ctx);
-
-	kfree(ctx);
-
-	mutex_unlock(&dev->mode_config.mutex);
-}
-EXPORT_SYMBOL(drm_modeset_unlock_all);
-
-/**
- * drm_warn_on_modeset_not_all_locked - check that all modeset locks are locked
- * @dev: device
- *
- * Useful as a debug assert.
- */
-void drm_warn_on_modeset_not_all_locked(struct drm_device *dev)
-{
-	struct drm_crtc *crtc;
-
-	/* Locking is currently fubar in the panic handler. */
-	if (oops_in_progress)
-		return;
-
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
-		WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
-
-	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
-	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
-}
-EXPORT_SYMBOL(drm_warn_on_modeset_not_all_locked);
-
 /* Avoid boilerplate.  I'm tired of typing. */
 #define DRM_ENUM_NAME_FN(fnname, list)				\
 	const char *fnname(int val)				\
@@ -515,9 +421,6 @@ int drm_framebuffer_init(struct drm_device *dev, struct drm_framebuffer *fb,
 	if (ret)
 		goto out;
 
-	/* Grab the idr reference. */
-	drm_framebuffer_reference(fb);
-
 	dev->mode_config.num_fb++;
 	list_add(&fb->head, &dev->mode_config.fb_list);
 out:
@@ -527,10 +430,34 @@ out:
 }
 EXPORT_SYMBOL(drm_framebuffer_init);
 
+/* dev->mode_config.fb_lock must be held! */
+static void __drm_framebuffer_unregister(struct drm_device *dev,
+					 struct drm_framebuffer *fb)
+{
+	mutex_lock(&dev->mode_config.idr_mutex);
+	idr_remove(&dev->mode_config.crtc_idr, fb->base.id);
+	mutex_unlock(&dev->mode_config.idr_mutex);
+
+	fb->base.id = 0;
+}
+
 static void drm_framebuffer_free(struct kref *kref)
 {
 	struct drm_framebuffer *fb =
 			container_of(kref, struct drm_framebuffer, refcount);
+	struct drm_device *dev = fb->dev;
+
+	/*
+	 * The lookup idr holds a weak reference, which has not necessarily been
+	 * removed at this point. Check for that.
+	 */
+	mutex_lock(&dev->mode_config.fb_lock);
+	if (fb->base.id) {
+		/* Mark fb as reaped and drop idr ref. */
+		__drm_framebuffer_unregister(dev, fb);
+	}
+	mutex_unlock(&dev->mode_config.fb_lock);
+
 	fb->funcs->destroy(fb);
 }
 
@@ -567,8 +494,10 @@ struct drm_framebuffer *drm_framebuffer_lookup(struct drm_device *dev,
 
 	mutex_lock(&dev->mode_config.fb_lock);
 	fb = __drm_framebuffer_lookup(dev, id);
-	if (fb)
-		drm_framebuffer_reference(fb);
+	if (fb) {
+		if (!kref_get_unless_zero(&fb->refcount))
+			fb = NULL;
+	}
 	mutex_unlock(&dev->mode_config.fb_lock);
 
 	return fb;
@@ -612,19 +541,6 @@ static void __drm_framebuffer_unreference(struct drm_framebuffer *fb)
 	kref_put(&fb->refcount, drm_framebuffer_free_bug);
 }
 
-/* dev->mode_config.fb_lock must be held! */
-static void __drm_framebuffer_unregister(struct drm_device *dev,
-					 struct drm_framebuffer *fb)
-{
-	mutex_lock(&dev->mode_config.idr_mutex);
-	idr_remove(&dev->mode_config.crtc_idr, fb->base.id);
-	mutex_unlock(&dev->mode_config.idr_mutex);
-
-	fb->base.id = 0;
-
-	__drm_framebuffer_unreference(fb);
-}
-
 /**
  * drm_framebuffer_unregister_private - unregister a private fb from the lookup idr
  * @fb: fb to unregister
@@ -764,11 +680,7 @@ int drm_crtc_init_with_planes(struct drm_device *dev, struct drm_crtc *crtc,
 	crtc->funcs = funcs;
 	crtc->invert_dimensions = false;
 
-	drm_modeset_lock_all(dev);
 	drm_modeset_lock_init(&crtc->mutex);
-	/* dropped by _unlock_all(): */
-	drm_modeset_lock(&crtc->mutex, config->acquire_ctx);
-
 	ret = drm_mode_object_get(dev, &crtc->base, DRM_MODE_OBJECT_CRTC);
 	if (ret)
 		goto out;
@@ -786,7 +698,6 @@ int drm_crtc_init_with_planes(struct drm_device *dev, struct drm_crtc *crtc,
 		cursor->possible_crtcs = 1 << drm_crtc_index(crtc);
 
  out:
-	drm_modeset_unlock_all(dev);
 
 	return ret;
 }
@@ -853,6 +764,59 @@ static void drm_mode_remove(struct drm_connector *connector,
 }
 
 /**
+ * drm_connector_get_cmdline_mode - reads the user's cmdline mode
+ * @connector: connector to quwery
+ * @mode: returned mode
+ *
+ * The kernel supports per-connector configration of its consoles through
+ * use of the video= parameter. This function parses that option and
+ * extracts the user's specified mode (or enable/disable status) for a
+ * particular connector. This is typically only used during the early fbdev
+ * setup.
+ */
+static void drm_connector_get_cmdline_mode(struct drm_connector *connector)
+{
+	struct drm_cmdline_mode *mode = &connector->cmdline_mode;
+	char *option = NULL;
+
+	if (fb_get_options(connector->name, &option))
+		return;
+
+	if (!drm_mode_parse_command_line_for_connector(option,
+						       connector,
+						       mode))
+		return;
+
+	if (mode->force) {
+		const char *s;
+
+		switch (mode->force) {
+		case DRM_FORCE_OFF:
+			s = "OFF";
+			break;
+		case DRM_FORCE_ON_DIGITAL:
+			s = "ON - dig";
+			break;
+		default:
+		case DRM_FORCE_ON:
+			s = "ON";
+			break;
+		}
+
+		DRM_INFO("forcing %s connector %s\n", connector->name, s);
+		connector->force = mode->force;
+	}
+
+	DRM_DEBUG_KMS("cmdline mode for connector %s %dx%d@%dHz%s%s%s\n",
+		      connector->name,
+		      mode->xres, mode->yres,
+		      mode->refresh_specified ? mode->refresh : 60,
+		      mode->rb ? " reduced blanking" : "",
+		      mode->margins ? " with margins" : "",
+		      mode->interlace ?  " interlaced" : "");
+}
+
+/**
  * drm_connector_init - Init a preallocated connector
  * @dev: DRM device
  * @connector: the connector to init
@@ -904,6 +868,8 @@ int drm_connector_init(struct drm_device *dev,
 	connector->edid_blob_ptr = NULL;
 	connector->status = connector_status_unknown;
 
+	drm_connector_get_cmdline_mode(connector);
+
 	list_add_tail(&connector->head, &dev->mode_config.connector_list);
 	dev->mode_config.num_connector++;
 
@@ -957,6 +923,29 @@ void drm_connector_cleanup(struct drm_connector *connector)
 EXPORT_SYMBOL(drm_connector_cleanup);
 
 /**
+ * drm_connector_index - find the index of a registered connector
+ * @connector: connector to find index for
+ *
+ * Given a registered connector, return the index of that connector within a DRM
+ * device's list of connectors.
+ */
+unsigned int drm_connector_index(struct drm_connector *connector)
+{
+	unsigned int index = 0;
+	struct drm_connector *tmp;
+
+	list_for_each_entry(tmp, &connector->dev->mode_config.connector_list, head) {
+		if (tmp == connector)
+			return index;
+
+		index++;
+	}
+
+	BUG();
+}
+EXPORT_SYMBOL(drm_connector_index);
+
+/**
  * drm_connector_register - register a connector
  * @connector: the connector to register
  *
@@ -1261,6 +1250,29 @@ void drm_plane_cleanup(struct drm_plane *plane)
 EXPORT_SYMBOL(drm_plane_cleanup);
 
 /**
+ * drm_plane_index - find the index of a registered plane
+ * @plane: plane to find index for
+ *
+ * Given a registered plane, return the index of that CRTC within a DRM
+ * device's list of planes.
+ */
+unsigned int drm_plane_index(struct drm_plane *plane)
+{
+	unsigned int index = 0;
+	struct drm_plane *tmp;
+
+	list_for_each_entry(tmp, &plane->dev->mode_config.plane_list, head) {
+		if (tmp == plane)
+			return index;
+
+		index++;
+	}
+
+	BUG();
+}
+EXPORT_SYMBOL(drm_plane_index);
+
+/**
  * drm_plane_force_disable - Forcibly disable a plane
  * @plane: plane to disable
  *
@@ -1271,19 +1283,21 @@ EXPORT_SYMBOL(drm_plane_cleanup);
  */
 void drm_plane_force_disable(struct drm_plane *plane)
 {
-	struct drm_framebuffer *old_fb = plane->fb;
 	int ret;
 
-	if (!old_fb)
+	if (!plane->fb)
 		return;
 
+	plane->old_fb = plane->fb;
 	ret = plane->funcs->disable_plane(plane);
 	if (ret) {
 		DRM_ERROR("failed to disable plane with busy fb\n");
+		plane->old_fb = NULL;
 		return;
 	}
 	/* disconnect the plane from the fb and crtc: */
-	__drm_framebuffer_unreference(old_fb);
+	__drm_framebuffer_unreference(plane->old_fb);
+	plane->old_fb = NULL;
 	plane->fb = NULL;
 	plane->crtc = NULL;
 }
@@ -2259,23 +2273,21 @@ static int setplane_internal(struct drm_plane *plane,
 			     uint32_t src_w, uint32_t src_h)
 {
 	struct drm_device *dev = plane->dev;
-	struct drm_framebuffer *old_fb = NULL;
 	int ret = 0;
 	unsigned int fb_width, fb_height;
 	int i;
 
+	drm_modeset_lock_all(dev);
 	/* No fb means shut it down */
 	if (!fb) {
-		drm_modeset_lock_all(dev);
-		old_fb = plane->fb;
+		plane->old_fb = plane->fb;
 		ret = plane->funcs->disable_plane(plane);
 		if (!ret) {
 			plane->crtc = NULL;
 			plane->fb = NULL;
 		} else {
-			old_fb = NULL;
+			plane->old_fb = NULL;
 		}
-		drm_modeset_unlock_all(dev);
 		goto out;
 	}
 
@@ -2315,8 +2327,7 @@ static int setplane_internal(struct drm_plane *plane,
 		goto out;
 	}
 
-	drm_modeset_lock_all(dev);
-	old_fb = plane->fb;
+	plane->old_fb = plane->fb;
 	ret = plane->funcs->update_plane(plane, crtc, fb,
 					 crtc_x, crtc_y, crtc_w, crtc_h,
 					 src_x, src_y, src_w, src_h);
@@ -2325,15 +2336,16 @@ static int setplane_internal(struct drm_plane *plane,
 		plane->fb = fb;
 		fb = NULL;
 	} else {
-		old_fb = NULL;
+		plane->old_fb = NULL;
 	}
-	drm_modeset_unlock_all(dev);
 
 out:
 	if (fb)
 		drm_framebuffer_unreference(fb);
-	if (old_fb)
-		drm_framebuffer_unreference(old_fb);
+	if (plane->old_fb)
+		drm_framebuffer_unreference(plane->old_fb);
+	plane->old_fb = NULL;
+	drm_modeset_unlock_all(dev);
 
 	return ret;
 
@@ -2440,7 +2452,7 @@ int drm_mode_set_config_internal(struct drm_mode_set *set)
 	 * crtcs. Atomic modeset will have saner semantics ...
 	 */
 	list_for_each_entry(tmp, &crtc->dev->mode_config.crtc_list, head)
-		tmp->old_fb = tmp->primary->fb;
+		tmp->primary->old_fb = tmp->primary->fb;
 
 	fb = set->fb;
 
@@ -2453,8 +2465,9 @@ int drm_mode_set_config_internal(struct drm_mode_set *set)
 	list_for_each_entry(tmp, &crtc->dev->mode_config.crtc_list, head) {
 		if (tmp->primary->fb)
 			drm_framebuffer_reference(tmp->primary->fb);
-		if (tmp->old_fb)
-			drm_framebuffer_unreference(tmp->old_fb);
+		if (tmp->primary->old_fb)
+			drm_framebuffer_unreference(tmp->primary->old_fb);
+		tmp->primary->old_fb = NULL;
 	}
 
 	return ret;
@@ -2785,7 +2798,7 @@ static int drm_mode_cursor_common(struct drm_device *dev,
 	if (crtc->cursor)
 		return drm_mode_cursor_universal(crtc, req, file_priv);
 
-	drm_modeset_lock(&crtc->mutex, NULL);
+	drm_modeset_lock_crtc(crtc);
 	if (req->flags & DRM_MODE_CURSOR_BO) {
 		if (!crtc->funcs->cursor_set && !crtc->funcs->cursor_set2) {
 			ret = -ENXIO;
@@ -2809,7 +2822,7 @@ static int drm_mode_cursor_common(struct drm_device *dev,
 		}
 	}
 out:
-	drm_modeset_unlock(&crtc->mutex);
+	drm_modeset_unlock_crtc(crtc);
 
 	return ret;
 
@@ -3495,9 +3508,10 @@ EXPORT_SYMBOL(drm_property_create_enum);
  * @flags: flags specifying the property type
  * @name: name of the property
  * @props: enumeration lists with property bitflags
- * @num_values: number of pre-defined values
+ * @num_props: size of the @props array
+ * @supported_bits: bitmask of all supported enumeration values
  *
- * This creates a new generic drm property which can then be attached to a drm
+ * This creates a new bitmask drm property which can then be attached to a drm
  * object with drm_object_attach_property. The returned property object must be
  * freed with drm_property_destroy.
  *
@@ -4157,12 +4171,25 @@ static int drm_mode_crtc_set_obj_prop(struct drm_mode_object *obj,
 	return ret;
 }
 
-static int drm_mode_plane_set_obj_prop(struct drm_mode_object *obj,
-				      struct drm_property *property,
-				      uint64_t value)
+/**
+ * drm_mode_plane_set_obj_prop - set the value of a property
+ * @plane: drm plane object to set property value for
+ * @property: property to set
+ * @value: value the property should be set to
+ *
+ * This functions sets a given property on a given plane object. This function
+ * calls the driver's ->set_property callback and changes the software state of
+ * the property if the callback succeeds.
+ *
+ * Returns:
+ * Zero on success, error code on failure.
+ */
+int drm_mode_plane_set_obj_prop(struct drm_plane *plane,
+				struct drm_property *property,
+				uint64_t value)
 {
 	int ret = -EINVAL;
-	struct drm_plane *plane = obj_to_plane(obj);
+	struct drm_mode_object *obj = &plane->base;
 
 	if (plane->funcs->set_property)
 		ret = plane->funcs->set_property(plane, property, value);
@@ -4171,6 +4198,7 @@ static int drm_mode_plane_set_obj_prop(struct drm_mode_object *obj,
 
 	return ret;
 }
+EXPORT_SYMBOL(drm_mode_plane_set_obj_prop);
 
 /**
  * drm_mode_getproperty_ioctl - get the current value of a object's property
@@ -4309,7 +4337,8 @@ int drm_mode_obj_set_property_ioctl(struct drm_device *dev, void *data,
 		ret = drm_mode_crtc_set_obj_prop(arg_obj, property, arg->value);
 		break;
 	case DRM_MODE_OBJECT_PLANE:
-		ret = drm_mode_plane_set_obj_prop(arg_obj, property, arg->value);
+		ret = drm_mode_plane_set_obj_prop(obj_to_plane(arg_obj),
+						  property, arg->value);
 		break;
 	}
 
@@ -4529,7 +4558,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
 {
 	struct drm_mode_crtc_page_flip *page_flip = data;
 	struct drm_crtc *crtc;
-	struct drm_framebuffer *fb = NULL, *old_fb = NULL;
+	struct drm_framebuffer *fb = NULL;
 	struct drm_pending_vblank_event *e = NULL;
 	unsigned long flags;
 	int ret = -EINVAL;
@@ -4545,7 +4574,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
 	if (!crtc)
 		return -ENOENT;
 
-	drm_modeset_lock(&crtc->mutex, NULL);
+	drm_modeset_lock_crtc(crtc);
 	if (crtc->primary->fb == NULL) {
 		/* The framebuffer is currently unbound, presumably
 		 * due to a hotplug event, that userspace has not
@@ -4601,7 +4630,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
 			(void (*) (struct drm_pending_event *)) kfree;
 	}
 
-	old_fb = crtc->primary->fb;
+	crtc->primary->old_fb = crtc->primary->fb;
 	ret = crtc->funcs->page_flip(crtc, fb, e, page_flip->flags);
 	if (ret) {
 		if (page_flip->flags & DRM_MODE_PAGE_FLIP_EVENT) {
@@ -4611,7 +4640,7 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
 			kfree(e);
 		}
 		/* Keep the old fb, don't unref it. */
-		old_fb = NULL;
+		crtc->primary->old_fb = NULL;
 	} else {
 		/*
 		 * Warn if the driver hasn't properly updated the crtc->fb
@@ -4627,9 +4656,10 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev,
 out:
 	if (fb)
 		drm_framebuffer_unreference(fb);
-	if (old_fb)
-		drm_framebuffer_unreference(old_fb);
-	drm_modeset_unlock(&crtc->mutex);
+	if (crtc->primary->old_fb)
+		drm_framebuffer_unreference(crtc->primary->old_fb);
+	crtc->primary->old_fb = NULL;
+	drm_modeset_unlock_crtc(crtc);
 
 	return ret;
 }
@@ -4645,9 +4675,14 @@ out:
 void drm_mode_config_reset(struct drm_device *dev)
 {
 	struct drm_crtc *crtc;
+	struct drm_plane *plane;
 	struct drm_encoder *encoder;
 	struct drm_connector *connector;
 
+	list_for_each_entry(plane, &dev->mode_config.plane_list, head)
+		if (plane->funcs->reset)
+			plane->funcs->reset(plane);
+
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
 		if (crtc->funcs->reset)
 			crtc->funcs->reset(crtc);
diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 13bd42923dd4..3bcf8e6a85b3 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -36,6 +36,7 @@
 #include <linux/export.h>
 #include <drm/drmP.h>
 #include <drm/drm_edid.h>
+#include "drm_internal.h"
 
 #if defined(CONFIG_DEBUG_FS)
 
@@ -49,9 +50,7 @@ static const struct drm_info_list drm_debugfs_list[] = {
 	{"clients", drm_clients_info, 0},
 	{"bufs", drm_bufs_info, 0},
 	{"gem_names", drm_gem_name_info, DRIVER_GEM},
-#if DRM_DEBUG_CODE
 	{"vma", drm_vma_info, 0},
-#endif
 };
 #define DRM_DEBUGFS_ENTRIES ARRAY_SIZE(drm_debugfs_list)
 
diff --git a/drivers/gpu/drm/drm_dma.c b/drivers/gpu/drm/drm_dma.c
index 8a140a953754..ea481800ef56 100644
--- a/drivers/gpu/drm/drm_dma.c
+++ b/drivers/gpu/drm/drm_dma.c
@@ -35,6 +35,7 @@
 
 #include <linux/export.h>
 #include <drm/drmP.h>
+#include "drm_legacy.h"
 
 /**
  * Initialize the DMA data.
@@ -124,7 +125,7 @@ void drm_legacy_dma_takedown(struct drm_device *dev)
  *
  * Resets the fields of \p buf.
  */
-void drm_free_buffer(struct drm_device *dev, struct drm_buf * buf)
+void drm_legacy_free_buffer(struct drm_device *dev, struct drm_buf * buf)
 {
 	if (!buf)
 		return;
@@ -142,8 +143,8 @@ void drm_free_buffer(struct drm_device *dev, struct drm_buf * buf)
  *
  * Frees each buffer associated with \p file_priv not already on the hardware.
  */
-void drm_core_reclaim_buffers(struct drm_device *dev,
-			      struct drm_file *file_priv)
+void drm_legacy_reclaim_buffers(struct drm_device *dev,
+				struct drm_file *file_priv)
 {
 	struct drm_device_dma *dma = dev->dma;
 	int i;
@@ -154,7 +155,7 @@ void drm_core_reclaim_buffers(struct drm_device *dev,
 		if (dma->buflist[i]->file_priv == file_priv) {
 			switch (dma->buflist[i]->list) {
 			case DRM_LIST_NONE:
-				drm_free_buffer(dev, dma->buflist[i]);
+				drm_legacy_free_buffer(dev, dma->buflist[i]);
 				break;
 			case DRM_LIST_WAIT:
 				dma->buflist[i]->list = DRM_LIST_RECLAIM;
@@ -166,5 +167,3 @@ void drm_core_reclaim_buffers(struct drm_device *dev,
 		}
 	}
 }
-
-EXPORT_SYMBOL(drm_core_reclaim_buffers);
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index ac3c2738db94..b3adf1445020 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1772,7 +1772,7 @@ static int drm_dp_get_vc_payload_bw(int dp_link_bw, int dp_link_count)
 	case DP_LINK_BW_5_4:
 		return 10 * dp_link_count;
 	}
-	return 0;
+	BUG();
 }
 
 /**
@@ -2071,6 +2071,7 @@ static int drm_dp_mst_handle_up_req(struct drm_dp_mst_topology_mgr *mgr)
  * drm_dp_mst_hpd_irq() - MST hotplug IRQ notify
  * @mgr: manager to notify irq for.
  * @esi: 4 bytes from SINK_COUNT_ESI
+ * @handled: whether the hpd interrupt was consumed or not
  *
  * This should be called from the driver when it detects a short IRQ,
  * along with the value of the DEVICE_SERVICE_IRQ_VECTOR_ESI0. The
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 3242e208c0d0..6a119026a76b 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -35,32 +35,20 @@
 #include <drm/drmP.h>
 #include <drm/drm_core.h>
 #include "drm_legacy.h"
+#include "drm_internal.h"
 
 unsigned int drm_debug = 0;	/* 1 to enable debug output */
 EXPORT_SYMBOL(drm_debug);
 
-unsigned int drm_vblank_offdelay = 5000;    /* Default to 5000 msecs. */
-
-unsigned int drm_timestamp_precision = 20;  /* Default to 20 usecs. */
-
-/*
- * Default to use monotonic timestamps for wait-for-vblank and page-flip
- * complete events.
- */
-unsigned int drm_timestamp_monotonic = 1;
-
 MODULE_AUTHOR(CORE_AUTHOR);
 MODULE_DESCRIPTION(CORE_DESC);
 MODULE_LICENSE("GPL and additional rights");
 MODULE_PARM_DESC(debug, "Enable debug output");
-MODULE_PARM_DESC(vblankoffdelay, "Delay until vblank irq auto-disable [msecs]");
+MODULE_PARM_DESC(vblankoffdelay, "Delay until vblank irq auto-disable [msecs] (0: never disable, <0: disable immediately)");
 MODULE_PARM_DESC(timestamp_precision_usec, "Max. error on timestamps [usecs]");
 MODULE_PARM_DESC(timestamp_monotonic, "Use monotonic timestamps");
 
 module_param_named(debug, drm_debug, int, 0600);
-module_param_named(vblankoffdelay, drm_vblank_offdelay, int, 0600);
-module_param_named(timestamp_precision_usec, drm_timestamp_precision, int, 0600);
-module_param_named(timestamp_monotonic, drm_timestamp_monotonic, int, 0600);
 
 static DEFINE_SPINLOCK(drm_minor_lock);
 static struct idr drm_minors_idr;
@@ -102,6 +90,8 @@ void drm_ut_debug_printk(const char *function_name, const char *format, ...)
 }
 EXPORT_SYMBOL(drm_ut_debug_printk);
 
+#define DRM_MAGIC_HASH_ORDER  4  /**< Size of key hash table. Must be power of 2. */
+
 struct drm_master *drm_master_create(struct drm_minor *minor)
 {
 	struct drm_master *master;
@@ -133,7 +123,6 @@ EXPORT_SYMBOL(drm_master_get);
 static void drm_master_destroy(struct kref *kref)
 {
 	struct drm_master *master = container_of(kref, struct drm_master, refcount);
-	struct drm_magic_entry *pt, *next;
 	struct drm_device *dev = master->minor->dev;
 	struct drm_map_list *r_list, *list_temp;
 
@@ -143,7 +132,7 @@ static void drm_master_destroy(struct kref *kref)
 
 	list_for_each_entry_safe(r_list, list_temp, &dev->maplist, head) {
 		if (r_list->master == master) {
-			drm_rmmap_locked(dev, r_list->map);
+			drm_legacy_rmmap_locked(dev, r_list->map);
 			r_list = NULL;
 		}
 	}
@@ -154,12 +143,6 @@ static void drm_master_destroy(struct kref *kref)
 		master->unique_len = 0;
 	}
 
-	list_for_each_entry_safe(pt, next, &master->magicfree, head) {
-		list_del(&pt->head);
-		drm_ht_remove_item(&master->magiclist, &pt->hash_item);
-		kfree(pt);
-	}
-
 	drm_ht_remove(&master->magiclist);
 
 	mutex_unlock(&dev->struct_mutex);
@@ -779,7 +762,7 @@ void drm_dev_unregister(struct drm_device *dev)
 	drm_vblank_cleanup(dev);
 
 	list_for_each_entry_safe(r_list, list_temp, &dev->maplist, head)
-		drm_rmmap(dev, r_list->map);
+		drm_legacy_rmmap(dev, r_list->map);
 
 	drm_minor_unregister(dev, DRM_MINOR_LEGACY);
 	drm_minor_unregister(dev, DRM_MINOR_RENDER);
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 1dbf3bc4c6a3..1bdbfd0e0033 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -632,27 +632,27 @@ static const struct drm_display_mode edid_cea_modes[] = {
 		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC |
 			DRM_MODE_FLAG_INTERLACE),
 	  .vrefresh = 60, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
-	/* 6 - 1440x480i@60Hz */
-	{ DRM_MODE("1440x480i", DRM_MODE_TYPE_DRIVER, 27000, 1440, 1478,
-		   1602, 1716, 0, 480, 488, 494, 525, 0,
+	/* 6 - 720(1440)x480i@60Hz */
+	{ DRM_MODE("720x480i", DRM_MODE_TYPE_DRIVER, 13500, 720, 739,
+		   801, 858, 0, 480, 488, 494, 525, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 60, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
-	/* 7 - 1440x480i@60Hz */
-	{ DRM_MODE("1440x480i", DRM_MODE_TYPE_DRIVER, 27000, 1440, 1478,
-		   1602, 1716, 0, 480, 488, 494, 525, 0,
+	/* 7 - 720(1440)x480i@60Hz */
+	{ DRM_MODE("720x480i", DRM_MODE_TYPE_DRIVER, 13500, 720, 739,
+		   801, 858, 0, 480, 488, 494, 525, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 60, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
-	/* 8 - 1440x240@60Hz */
-	{ DRM_MODE("1440x240", DRM_MODE_TYPE_DRIVER, 27000, 1440, 1478,
-		   1602, 1716, 0, 240, 244, 247, 262, 0,
+	/* 8 - 720(1440)x240@60Hz */
+	{ DRM_MODE("720x240", DRM_MODE_TYPE_DRIVER, 13500, 720, 739,
+		   801, 858, 0, 240, 244, 247, 262, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 60, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
-	/* 9 - 1440x240@60Hz */
-	{ DRM_MODE("1440x240", DRM_MODE_TYPE_DRIVER, 27000, 1440, 1478,
-		   1602, 1716, 0, 240, 244, 247, 262, 0,
+	/* 9 - 720(1440)x240@60Hz */
+	{ DRM_MODE("720x240", DRM_MODE_TYPE_DRIVER, 13500, 720, 739,
+		   801, 858, 0, 240, 244, 247, 262, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 60, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
@@ -714,27 +714,27 @@ static const struct drm_display_mode edid_cea_modes[] = {
 		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC |
 			DRM_MODE_FLAG_INTERLACE),
 	  .vrefresh = 50, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
-	/* 21 - 1440x576i@50Hz */
-	{ DRM_MODE("1440x576i", DRM_MODE_TYPE_DRIVER, 27000, 1440, 1464,
-		   1590, 1728, 0, 576, 580, 586, 625, 0,
+	/* 21 - 720(1440)x576i@50Hz */
+	{ DRM_MODE("720x576i", DRM_MODE_TYPE_DRIVER, 13500, 720, 732,
+		   795, 864, 0, 576, 580, 586, 625, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 50, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
-	/* 22 - 1440x576i@50Hz */
-	{ DRM_MODE("1440x576i", DRM_MODE_TYPE_DRIVER, 27000, 1440, 1464,
-		   1590, 1728, 0, 576, 580, 586, 625, 0,
+	/* 22 - 720(1440)x576i@50Hz */
+	{ DRM_MODE("720x576i", DRM_MODE_TYPE_DRIVER, 13500, 720, 732,
+		   795, 864, 0, 576, 580, 586, 625, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 50, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
-	/* 23 - 1440x288@50Hz */
-	{ DRM_MODE("1440x288", DRM_MODE_TYPE_DRIVER, 27000, 1440, 1464,
-		   1590, 1728, 0, 288, 290, 293, 312, 0,
+	/* 23 - 720(1440)x288@50Hz */
+	{ DRM_MODE("720x288", DRM_MODE_TYPE_DRIVER, 13500, 720, 732,
+		   795, 864, 0, 288, 290, 293, 312, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 50, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
-	/* 24 - 1440x288@50Hz */
-	{ DRM_MODE("1440x288", DRM_MODE_TYPE_DRIVER, 27000, 1440, 1464,
-		   1590, 1728, 0, 288, 290, 293, 312, 0,
+	/* 24 - 720(1440)x288@50Hz */
+	{ DRM_MODE("720x288", DRM_MODE_TYPE_DRIVER, 13500, 720, 732,
+		   795, 864, 0, 288, 290, 293, 312, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 50, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
@@ -837,15 +837,15 @@ static const struct drm_display_mode edid_cea_modes[] = {
 		   796, 864, 0, 576, 581, 586, 625, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC),
 	  .vrefresh = 100, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
-	/* 44 - 1440x576i@100Hz */
-	{ DRM_MODE("1440x576", DRM_MODE_TYPE_DRIVER, 54000, 1440, 1464,
-		   1590, 1728, 0, 576, 580, 586, 625, 0,
+	/* 44 - 720(1440)x576i@100Hz */
+	{ DRM_MODE("720x576i", DRM_MODE_TYPE_DRIVER, 27000, 720, 732,
+		   795, 864, 0, 576, 580, 586, 625, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 100, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
-	/* 45 - 1440x576i@100Hz */
-	{ DRM_MODE("1440x576", DRM_MODE_TYPE_DRIVER, 54000, 1440, 1464,
-		   1590, 1728, 0, 576, 580, 586, 625, 0,
+	/* 45 - 720(1440)x576i@100Hz */
+	{ DRM_MODE("720x576i", DRM_MODE_TYPE_DRIVER, 27000, 720, 732,
+		   795, 864, 0, 576, 580, 586, 625, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 100, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
@@ -870,15 +870,15 @@ static const struct drm_display_mode edid_cea_modes[] = {
 		   798, 858, 0, 480, 489, 495, 525, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC),
 	  .vrefresh = 120, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
-	/* 50 - 1440x480i@120Hz */
-	{ DRM_MODE("1440x480i", DRM_MODE_TYPE_DRIVER, 54000, 1440, 1478,
-		   1602, 1716, 0, 480, 488, 494, 525, 0,
+	/* 50 - 720(1440)x480i@120Hz */
+	{ DRM_MODE("720x480i", DRM_MODE_TYPE_DRIVER, 27000, 720, 739,
+		   801, 858, 0, 480, 488, 494, 525, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 120, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
-	/* 51 - 1440x480i@120Hz */
-	{ DRM_MODE("1440x480i", DRM_MODE_TYPE_DRIVER, 54000, 1440, 1478,
-		   1602, 1716, 0, 480, 488, 494, 525, 0,
+	/* 51 - 720(1440)x480i@120Hz */
+	{ DRM_MODE("720x480i", DRM_MODE_TYPE_DRIVER, 27000, 720, 739,
+		   801, 858, 0, 480, 488, 494, 525, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 120, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
@@ -892,15 +892,15 @@ static const struct drm_display_mode edid_cea_modes[] = {
 		   796, 864, 0, 576, 581, 586, 625, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC),
 	  .vrefresh = 200, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
-	/* 54 - 1440x576i@200Hz */
-	{ DRM_MODE("1440x576i", DRM_MODE_TYPE_DRIVER, 108000, 1440, 1464,
-		   1590, 1728, 0, 576, 580, 586, 625, 0,
+	/* 54 - 720(1440)x576i@200Hz */
+	{ DRM_MODE("720x576i", DRM_MODE_TYPE_DRIVER, 54000, 720, 732,
+		   795, 864, 0, 576, 580, 586, 625, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 200, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
-	/* 55 - 1440x576i@200Hz */
-	{ DRM_MODE("1440x576i", DRM_MODE_TYPE_DRIVER, 108000, 1440, 1464,
-		   1590, 1728, 0, 576, 580, 586, 625, 0,
+	/* 55 - 720(1440)x576i@200Hz */
+	{ DRM_MODE("720x576i", DRM_MODE_TYPE_DRIVER, 54000, 720, 732,
+		   795, 864, 0, 576, 580, 586, 625, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 200, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
@@ -914,15 +914,15 @@ static const struct drm_display_mode edid_cea_modes[] = {
 		   798, 858, 0, 480, 489, 495, 525, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC),
 	  .vrefresh = 240, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
-	/* 58 - 1440x480i@240 */
-	{ DRM_MODE("1440x480i", DRM_MODE_TYPE_DRIVER, 108000, 1440, 1478,
-		   1602, 1716, 0, 480, 488, 494, 525, 0,
+	/* 58 - 720(1440)x480i@240 */
+	{ DRM_MODE("720x480i", DRM_MODE_TYPE_DRIVER, 54000, 720, 739,
+		   801, 858, 0, 480, 488, 494, 525, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 240, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_4_3, },
-	/* 59 - 1440x480i@240 */
-	{ DRM_MODE("1440x480i", DRM_MODE_TYPE_DRIVER, 108000, 1440, 1478,
-		   1602, 1716, 0, 480, 488, 494, 525, 0,
+	/* 59 - 720(1440)x480i@240 */
+	{ DRM_MODE("720x480i", DRM_MODE_TYPE_DRIVER, 54000, 720, 739,
+		   801, 858, 0, 480, 488, 494, 525, 0,
 		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC |
 			DRM_MODE_FLAG_INTERLACE | DRM_MODE_FLAG_DBLCLK),
 	  .vrefresh = 240, .picture_aspect_ratio = HDMI_PICTURE_ASPECT_16_9, },
@@ -2103,7 +2103,8 @@ static int
 add_inferred_modes(struct drm_connector *connector, struct edid *edid)
 {
 	struct detailed_mode_closure closure = {
-		connector, edid, 0, 0, 0
+		.connector = connector,
+		.edid = edid,
 	};
 
 	if (version_greater(edid, 1, 0))
@@ -2169,7 +2170,8 @@ add_established_modes(struct drm_connector *connector, struct edid *edid)
 		((edid->established_timings.mfg_rsvd & 0x80) << 9);
 	int i, modes = 0;
 	struct detailed_mode_closure closure = {
-		connector, edid, 0, 0, 0
+		.connector = connector,
+		.edid = edid,
 	};
 
 	for (i = 0; i <= EDID_EST_TIMINGS; i++) {
@@ -2227,7 +2229,8 @@ add_standard_modes(struct drm_connector *connector, struct edid *edid)
 {
 	int i, modes = 0;
 	struct detailed_mode_closure closure = {
-		connector, edid, 0, 0, 0
+		.connector = connector,
+		.edid = edid,
 	};
 
 	for (i = 0; i < EDID_STD_TIMINGS; i++) {
@@ -2313,7 +2316,8 @@ static int
 add_cvt_modes(struct drm_connector *connector, struct edid *edid)
 {	
 	struct detailed_mode_closure closure = {
-		connector, edid, 0, 0, 0
+		.connector = connector,
+		.edid = edid,
 	};
 
 	if (version_greater(edid, 1, 2))
@@ -2357,11 +2361,10 @@ add_detailed_modes(struct drm_connector *connector, struct edid *edid,
 		   u32 quirks)
 {
 	struct detailed_mode_closure closure = {
-		connector,
-		edid,
-		1,
-		quirks,
-		0
+		.connector = connector,
+		.edid = edid,
+		.preferred = 1,
+		.quirks = quirks,
 	};
 
 	if (closure.preferred && !version_greater(edid, 1, 3))
@@ -3433,10 +3436,10 @@ EXPORT_SYMBOL(drm_rgb_quant_range_selectable);
 /**
  * drm_assign_hdmi_deep_color_info - detect whether monitor supports
  * hdmi deep color modes and update drm_display_info if so.
- *
  * @edid: monitor EDID information
  * @info: Updated with maximum supported deep color bpc and color format
  *        if deep color supported.
+ * @connector: DRM connector, used only for debug output
  *
  * Parse the CEA extension according to CEA-861-B.
  * Return true if HDMI deep color supported, false if not or unknown.
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 3144db9dc0f1..0c0c39bac23d 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -126,7 +126,7 @@ int drm_fb_helper_add_one_connector(struct drm_fb_helper *fb_helper, struct drm_
 
 	WARN_ON(!mutex_is_locked(&fb_helper->dev->mode_config.mutex));
 	if (fb_helper->connector_count + 1 > fb_helper->connector_info_alloc_count) {
-		temp = krealloc(fb_helper->connector_info, sizeof(struct drm_fb_helper_connector) * (fb_helper->connector_count + 1), GFP_KERNEL);
+		temp = krealloc(fb_helper->connector_info, sizeof(struct drm_fb_helper_connector *) * (fb_helper->connector_count + 1), GFP_KERNEL);
 		if (!temp)
 			return -ENOMEM;
 
@@ -171,60 +171,6 @@ int drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper,
 }
 EXPORT_SYMBOL(drm_fb_helper_remove_one_connector);
 
-static int drm_fb_helper_parse_command_line(struct drm_fb_helper *fb_helper)
-{
-	struct drm_fb_helper_connector *fb_helper_conn;
-	int i;
-
-	for (i = 0; i < fb_helper->connector_count; i++) {
-		struct drm_cmdline_mode *mode;
-		struct drm_connector *connector;
-		char *option = NULL;
-
-		fb_helper_conn = fb_helper->connector_info[i];
-		connector = fb_helper_conn->connector;
-		mode = &fb_helper_conn->cmdline_mode;
-
-		/* do something on return - turn off connector maybe */
-		if (fb_get_options(connector->name, &option))
-			continue;
-
-		if (drm_mode_parse_command_line_for_connector(option,
-							      connector,
-							      mode)) {
-			if (mode->force) {
-				const char *s;
-				switch (mode->force) {
-				case DRM_FORCE_OFF:
-					s = "OFF";
-					break;
-				case DRM_FORCE_ON_DIGITAL:
-					s = "ON - dig";
-					break;
-				default:
-				case DRM_FORCE_ON:
-					s = "ON";
-					break;
-				}
-
-				DRM_INFO("forcing %s connector %s\n",
-					 connector->name, s);
-				connector->force = mode->force;
-			}
-
-			DRM_DEBUG_KMS("cmdline mode for connector %s %dx%d@%dHz%s%s%s\n",
-				      connector->name,
-				      mode->xres, mode->yres,
-				      mode->refresh_specified ? mode->refresh : 60,
-				      mode->rb ? " reduced blanking" : "",
-				      mode->margins ? " with margins" : "",
-				      mode->interlace ?  " interlaced" : "");
-		}
-
-	}
-	return 0;
-}
-
 static void drm_fb_helper_save_lut_atomic(struct drm_crtc *crtc, struct drm_fb_helper *helper)
 {
 	uint16_t *r_base, *g_base, *b_base;
@@ -345,10 +291,17 @@ static bool restore_fbdev_mode(struct drm_fb_helper *fb_helper)
 
 	drm_warn_on_modeset_not_all_locked(dev);
 
-	list_for_each_entry(plane, &dev->mode_config.plane_list, head)
+	list_for_each_entry(plane, &dev->mode_config.plane_list, head) {
 		if (plane->type != DRM_PLANE_TYPE_PRIMARY)
 			drm_plane_force_disable(plane);
 
+		if (dev->mode_config.rotation_property) {
+			drm_mode_plane_set_obj_prop(plane,
+						    dev->mode_config.rotation_property,
+						    BIT(DRM_ROTATE_0));
+		}
+	}
+
 	for (i = 0; i < fb_helper->crtc_count; i++) {
 		struct drm_mode_set *mode_set = &fb_helper->crtc_info[i].mode_set;
 		struct drm_crtc *crtc = mode_set->crtc;
@@ -419,11 +372,11 @@ static bool drm_fb_helper_force_kernel_mode(void)
 		if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 			continue;
 
-		/* NOTE: we use lockless flag below to avoid grabbing other
-		 * modeset locks.  So just trylock the underlying mutex
-		 * directly:
+		/*
+		 * NOTE: Use trylock mode to avoid deadlocks and sleeping in
+		 * panic context.
 		 */
-		if (!mutex_trylock(&dev->mode_config.mutex)) {
+		if (__drm_modeset_lock_all(dev, true) != 0) {
 			error = true;
 			continue;
 		}
@@ -432,7 +385,7 @@ static bool drm_fb_helper_force_kernel_mode(void)
 		if (ret)
 			error = true;
 
-		mutex_unlock(&dev->mode_config.mutex);
+		drm_modeset_unlock_all(dev);
 	}
 	return error;
 }
@@ -1013,7 +966,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 		struct drm_fb_helper_connector *fb_helper_conn = fb_helper->connector_info[i];
 		struct drm_cmdline_mode *cmdline_mode;
 
-		cmdline_mode = &fb_helper_conn->cmdline_mode;
+		cmdline_mode = &fb_helper_conn->connector->cmdline_mode;
 
 		if (cmdline_mode->bpp_specified) {
 			switch (cmdline_mode->bpp) {
@@ -1260,9 +1213,7 @@ EXPORT_SYMBOL(drm_has_preferred_mode);
 
 static bool drm_has_cmdline_mode(struct drm_fb_helper_connector *fb_connector)
 {
-	struct drm_cmdline_mode *cmdline_mode;
-	cmdline_mode = &fb_connector->cmdline_mode;
-	return cmdline_mode->specified;
+	return fb_connector->connector->cmdline_mode.specified;
 }
 
 struct drm_display_mode *drm_pick_cmdline_mode(struct drm_fb_helper_connector *fb_helper_conn,
@@ -1272,7 +1223,7 @@ struct drm_display_mode *drm_pick_cmdline_mode(struct drm_fb_helper_connector *f
 	struct drm_display_mode *mode = NULL;
 	bool prefer_non_interlace;
 
-	cmdline_mode = &fb_helper_conn->cmdline_mode;
+	cmdline_mode = &fb_helper_conn->connector->cmdline_mode;
 	if (cmdline_mode->specified == false)
 		return mode;
 
@@ -1657,8 +1608,6 @@ bool drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper, int bpp_sel)
 	struct drm_device *dev = fb_helper->dev;
 	int count = 0;
 
-	drm_fb_helper_parse_command_line(fb_helper);
-
 	mutex_lock(&dev->mode_config.mutex);
 	count = drm_fb_helper_probe_connector_modes(fb_helper,
 						    dev->mode_config.max_width,
diff --git a/drivers/gpu/drm/drm_fops.c b/drivers/gpu/drm/drm_fops.c
index 79d5221c6e41..3e6694633f42 100644
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@@ -39,10 +39,10 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include "drm_legacy.h"
+#include "drm_internal.h"
 
 /* from BKL pushdown */
 DEFINE_MUTEX(drm_global_mutex);
-EXPORT_SYMBOL(drm_global_mutex);
 
 static int drm_open_helper(struct file *filp, struct drm_minor *minor);
 
@@ -268,11 +268,11 @@ static void drm_master_release(struct drm_device *dev, struct file *filp)
 {
 	struct drm_file *file_priv = filp->private_data;
 
-	if (drm_i_have_hw_lock(dev, file_priv)) {
+	if (drm_legacy_i_have_hw_lock(dev, file_priv)) {
 		DRM_DEBUG("File %p released, freeing lock for context %d\n",
 			  filp, _DRM_LOCKING_CONTEXT(file_priv->master->lock.hw_lock->lock));
-		drm_lock_free(&file_priv->master->lock,
-			      _DRM_LOCKING_CONTEXT(file_priv->master->lock.hw_lock->lock));
+		drm_legacy_lock_free(&file_priv->master->lock,
+				     _DRM_LOCKING_CONTEXT(file_priv->master->lock.hw_lock->lock));
 	}
 }
 
@@ -330,8 +330,6 @@ static void drm_legacy_dev_reinit(struct drm_device *dev)
  */
 int drm_lastclose(struct drm_device * dev)
 {
-	struct drm_vma_entry *vma, *vma_temp;
-
 	DRM_DEBUG("\n");
 
 	if (dev->driver->lastclose)
@@ -346,13 +344,7 @@ int drm_lastclose(struct drm_device * dev)
 	drm_agp_clear(dev);
 
 	drm_legacy_sg_cleanup(dev);
-
-	/* Clear vma list (only built for debugging) */
-	list_for_each_entry_safe(vma, vma_temp, &dev->vmalist, head) {
-		list_del(&vma->head);
-		kfree(vma);
-	}
-
+	drm_legacy_vma_flush(dev);
 	drm_legacy_dma_takedown(dev);
 
 	mutex_unlock(&dev->struct_mutex);
@@ -412,7 +404,7 @@ int drm_release(struct inode *inode, struct file *filp)
 		drm_master_release(dev, filp);
 
 	if (drm_core_check_feature(dev, DRIVER_HAVE_DMA))
-		drm_core_reclaim_buffers(dev, file_priv);
+		drm_legacy_reclaim_buffers(dev, file_priv);
 
 	drm_events_release(file_priv);
 
@@ -464,6 +456,8 @@ int drm_release(struct inode *inode, struct file *filp)
 	if (drm_core_check_feature(dev, DRIVER_PRIME))
 		drm_prime_destroy_file_private(&file_priv->prime);
 
+	WARN_ON(!list_empty(&file_priv->event_list));
+
 	put_pid(file_priv->pid);
 	kfree(file_priv);
 
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 6adee4c2afc0..eb5dd67153e4 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -38,6 +38,7 @@
 #include <linux/dma-buf.h>
 #include <drm/drmP.h>
 #include <drm/drm_vma_manager.h>
+#include "drm_internal.h"
 
 /** @file drm_gem.c
  *
@@ -146,7 +147,7 @@ int drm_gem_object_init(struct drm_device *dev,
 EXPORT_SYMBOL(drm_gem_object_init);
 
 /**
- * drm_gem_object_init - initialize an allocated private GEM object
+ * drm_gem_private_object_init - initialize an allocated private GEM object
  * @dev: drm_device the object should be initialized for
  * @obj: drm_gem_object to initialize
  * @size: object size
diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c
index ecaf0fa2eec8..0780541f7935 100644
--- a/drivers/gpu/drm/drm_info.c
+++ b/drivers/gpu/drm/drm_info.c
@@ -35,6 +35,7 @@
 
 #include <linux/seq_file.h>
 #include <drm/drmP.h>
+#include "drm_legacy.h"
 
 /**
  * Called when "/proc/dri/.../name" is read.
@@ -183,15 +184,32 @@ int drm_clients_info(struct seq_file *m, void *data)
 	struct drm_device *dev = node->minor->dev;
 	struct drm_file *priv;
 
+	seq_printf(m,
+		   "%20s %5s %3s master a %5s %10s\n",
+		   "command",
+		   "pid",
+		   "dev",
+		   "uid",
+		   "magic");
+
+	/* dev->filelist is sorted youngest first, but we want to present
+	 * oldest first (i.e. kernel, servers, clients), so walk backwardss.
+	 */
 	mutex_lock(&dev->struct_mutex);
-	seq_printf(m, "a dev	pid    uid	magic\n\n");
-	list_for_each_entry(priv, &dev->filelist, lhead) {
-		seq_printf(m, "%c %3d %5d %5d %10u\n",
-			   priv->authenticated ? 'y' : 'n',
-			   priv->minor->index,
+	list_for_each_entry_reverse(priv, &dev->filelist, lhead) {
+		struct task_struct *task;
+
+		rcu_read_lock(); /* locks pid_task()->comm */
+		task = pid_task(priv->pid, PIDTYPE_PID);
+		seq_printf(m, "%20s %5d %3d   %c    %c %5d %10u\n",
+			   task ? task->comm : "<unknown>",
 			   pid_vnr(priv->pid),
+			   priv->minor->index,
+			   priv->is_master ? 'y' : 'n',
+			   priv->authenticated ? 'y' : 'n',
 			   from_kuid_munged(seq_user_ns(m), priv->uid),
 			   priv->magic);
+		rcu_read_unlock();
 	}
 	mutex_unlock(&dev->struct_mutex);
 	return 0;
@@ -223,62 +241,3 @@ int drm_gem_name_info(struct seq_file *m, void *data)
 
 	return 0;
 }
-
-#if DRM_DEBUG_CODE
-
-int drm_vma_info(struct seq_file *m, void *data)
-{
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
-	struct drm_device *dev = node->minor->dev;
-	struct drm_vma_entry *pt;
-	struct vm_area_struct *vma;
-	unsigned long vma_count = 0;
-#if defined(__i386__)
-	unsigned int pgprot;
-#endif
-
-	mutex_lock(&dev->struct_mutex);
-	list_for_each_entry(pt, &dev->vmalist, head)
-		vma_count++;
-
-	seq_printf(m, "vma use count: %lu, high_memory = %pK, 0x%pK\n",
-		   vma_count, high_memory,
-		   (void *)(unsigned long)virt_to_phys(high_memory));
-
-	list_for_each_entry(pt, &dev->vmalist, head) {
-		vma = pt->vma;
-		if (!vma)
-			continue;
-		seq_printf(m,
-			   "\n%5d 0x%pK-0x%pK %c%c%c%c%c%c 0x%08lx000",
-			   pt->pid,
-			   (void *)vma->vm_start, (void *)vma->vm_end,
-			   vma->vm_flags & VM_READ ? 'r' : '-',
-			   vma->vm_flags & VM_WRITE ? 'w' : '-',
-			   vma->vm_flags & VM_EXEC ? 'x' : '-',
-			   vma->vm_flags & VM_MAYSHARE ? 's' : 'p',
-			   vma->vm_flags & VM_LOCKED ? 'l' : '-',
-			   vma->vm_flags & VM_IO ? 'i' : '-',
-			   vma->vm_pgoff);
-
-#if defined(__i386__)
-		pgprot = pgprot_val(vma->vm_page_prot);
-		seq_printf(m, " %c%c%c%c%c%c%c%c%c",
-			   pgprot & _PAGE_PRESENT ? 'p' : '-',
-			   pgprot & _PAGE_RW ? 'w' : 'r',
-			   pgprot & _PAGE_USER ? 'u' : 's',
-			   pgprot & _PAGE_PWT ? 't' : 'b',
-			   pgprot & _PAGE_PCD ? 'u' : 'c',
-			   pgprot & _PAGE_ACCESSED ? 'a' : '-',
-			   pgprot & _PAGE_DIRTY ? 'd' : '-',
-			   pgprot & _PAGE_PSE ? 'm' : 'k',
-			   pgprot & _PAGE_GLOBAL ? 'g' : 'l');
-#endif
-		seq_printf(m, "\n");
-	}
-	mutex_unlock(&dev->struct_mutex);
-	return 0;
-}
-
-#endif
-
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
new file mode 100644
index 000000000000..7e459bf38c26
--- /dev/null
+++ b/drivers/gpu/drm/drm_internal.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *   Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* drm_irq.c */
+extern unsigned int drm_timestamp_monotonic;
+
+/* drm_fops.c */
+extern struct mutex drm_global_mutex;
+int drm_lastclose(struct drm_device *dev);
+
+/* drm_pci.c */
+int drm_pci_set_unique(struct drm_device *dev,
+		       struct drm_master *master,
+		       struct drm_unique *u);
+int drm_irq_by_busid(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv);
+
+/* drm_vm.c */
+int drm_vma_info(struct seq_file *m, void *data);
+int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma);
+void drm_vm_close_locked(struct drm_device *dev, struct vm_area_struct *vma);
+
+/* drm_prime.c */
+int drm_prime_handle_to_fd_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file_priv);
+int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file_priv);
+
+void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv);
+void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv);
+void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpriv,
+					struct dma_buf *dma_buf);
+
+/* drm_info.c */
+int drm_name_info(struct seq_file *m, void *data);
+int drm_vm_info(struct seq_file *m, void *data);
+int drm_bufs_info(struct seq_file *m, void *data);
+int drm_vblank_info(struct seq_file *m, void *data);
+int drm_clients_info(struct seq_file *m, void* data);
+int drm_gem_name_info(struct seq_file *m, void *data);
+
+/* drm_irq.c */
+int drm_control(struct drm_device *dev, void *data,
+		struct drm_file *file_priv);
+
+/* drm_auth.c */
+int drm_getmagic(struct drm_device *dev, void *data,
+		 struct drm_file *file_priv);
+int drm_authmagic(struct drm_device *dev, void *data,
+		  struct drm_file *file_priv);
+int drm_remove_magic(struct drm_master *master, drm_magic_t magic);
+
+/* drm_sysfs.c */
+extern struct class *drm_class;
+
+struct class *drm_sysfs_create(struct module *owner, char *name);
+void drm_sysfs_destroy(void);
+struct device *drm_sysfs_minor_alloc(struct drm_minor *minor);
+int drm_sysfs_connector_add(struct drm_connector *connector);
+void drm_sysfs_connector_remove(struct drm_connector *connector);
+
+/* drm_gem.c */
+int drm_gem_init(struct drm_device *dev);
+void drm_gem_destroy(struct drm_device *dev);
+int drm_gem_handle_create_tail(struct drm_file *file_priv,
+			       struct drm_gem_object *obj,
+			       u32 *handlep);
+int drm_gem_close_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
+int drm_gem_flink_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
+int drm_gem_open_ioctl(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv);
+void drm_gem_open(struct drm_device *dev, struct drm_file *file_private);
+void drm_gem_release(struct drm_device *dev, struct drm_file *file_private);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 40be746b7e68..00587a1e3c83 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -31,6 +31,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_core.h>
 #include "drm_legacy.h"
+#include "drm_internal.h"
 
 #include <linux/pci.h>
 #include <linux/export.h>
@@ -41,121 +42,6 @@
 static int drm_version(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv);
 
-#define DRM_IOCTL_DEF(ioctl, _func, _flags) \
-	[DRM_IOCTL_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl}
-
-/** Ioctl table */
-static const struct drm_ioctl_desc drm_ioctls[] = {
-	DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, DRM_UNLOCKED|DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, 0),
-	DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, 0),
-	DRM_IOCTL_DEF(DRM_IOCTL_IRQ_BUSID, drm_irq_by_busid, DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_GET_MAP, drm_getmap, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_GET_CLIENT, drm_getclient, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_GET_STATS, drm_getstats, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_GET_CAP, drm_getcap, DRM_UNLOCKED|DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_SET_CLIENT_CAP, drm_setclientcap, 0),
-	DRM_IOCTL_DEF(DRM_IOCTL_SET_VERSION, drm_setversion, DRM_MASTER),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_SET_UNIQUE, drm_setunique, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_BLOCK, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_UNBLOCK, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_AUTH_MAGIC, drm_authmagic, DRM_AUTH|DRM_MASTER),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_ADD_MAP, drm_addmap_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_RM_MAP, drm_rmmap_ioctl, DRM_AUTH),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_SET_SAREA_CTX, drm_legacy_setsareactx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_GET_SAREA_CTX, drm_legacy_getsareactx, DRM_AUTH),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_SET_MASTER, drm_setmaster_ioctl, DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_DROP_MASTER, drm_dropmaster_ioctl, DRM_ROOT_ONLY),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_ADD_CTX, drm_legacy_addctx, DRM_AUTH|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_RM_CTX, drm_legacy_rmctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_MOD_CTX, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_GET_CTX, drm_legacy_getctx, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_IOCTL_SWITCH_CTX, drm_legacy_switchctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_NEW_CTX, drm_legacy_newctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_RES_CTX, drm_legacy_resctx, DRM_AUTH),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_ADD_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_RM_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_LOCK, drm_lock, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_IOCTL_UNLOCK, drm_unlock, DRM_AUTH),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_FINISH, drm_noop, DRM_AUTH),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_ADD_BUFS, drm_addbufs, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_MARK_BUFS, drm_markbufs, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_INFO_BUFS, drm_infobufs, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_IOCTL_MAP_BUFS, drm_mapbufs, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_IOCTL_FREE_BUFS, drm_freebufs, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_IOCTL_DMA, drm_dma_ioctl, DRM_AUTH),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_CONTROL, drm_control, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-
-#if __OS_HAS_AGP
-	DRM_IOCTL_DEF(DRM_IOCTL_AGP_ACQUIRE, drm_agp_acquire_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_AGP_RELEASE, drm_agp_release_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_AGP_ENABLE, drm_agp_enable_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_AGP_INFO, drm_agp_info_ioctl, DRM_AUTH),
-	DRM_IOCTL_DEF(DRM_IOCTL_AGP_ALLOC, drm_agp_alloc_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_AGP_FREE, drm_agp_free_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_AGP_BIND, drm_agp_bind_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_AGP_UNBIND, drm_agp_unbind_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-#endif
-
-	DRM_IOCTL_DEF(DRM_IOCTL_SG_ALLOC, drm_sg_alloc, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-	DRM_IOCTL_DEF(DRM_IOCTL_SG_FREE, drm_sg_free, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_WAIT_VBLANK, drm_wait_vblank, DRM_UNLOCKED),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_MODESET_CTL, drm_modeset_ctl, 0),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH|DRM_UNLOCKED),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_PRIME_HANDLE_TO_FD, drm_prime_handle_to_fd_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF(DRM_IOCTL_PRIME_FD_TO_HANDLE, drm_prime_fd_to_handle_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
-
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
-};
-
-#define DRM_CORE_IOCTL_COUNT	ARRAY_SIZE( drm_ioctls )
-
 /**
  * Get the bus id.
  *
@@ -167,7 +53,7 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
  *
  * Copies the bus id from drm_device::unique into user space.
  */
-int drm_getunique(struct drm_device *dev, void *data,
+static int drm_getunique(struct drm_device *dev, void *data,
 		  struct drm_file *file_priv)
 {
 	struct drm_unique *u = data;
@@ -189,7 +75,6 @@ drm_unset_busid(struct drm_device *dev,
 	kfree(master->unique);
 	master->unique = NULL;
 	master->unique_len = 0;
-	master->unique_size = 0;
 }
 
 /**
@@ -207,7 +92,7 @@ drm_unset_busid(struct drm_device *dev,
  * version 1.1 or greater. Also note that KMS is all version 1.1 and later and
  * UMS was only ever supported on pci devices.
  */
-int drm_setunique(struct drm_device *dev, void *data,
+static int drm_setunique(struct drm_device *dev, void *data,
 		  struct drm_file *file_priv)
 {
 	struct drm_unique *u = data;
@@ -245,15 +130,15 @@ static int drm_set_busid(struct drm_device *dev, struct drm_file *file_priv)
 	if (master->unique != NULL)
 		drm_unset_busid(dev, master);
 
-	if (dev->driver->bus && dev->driver->bus->set_busid) {
-		ret = dev->driver->bus->set_busid(dev, master);
+	if (dev->driver->set_busid) {
+		ret = dev->driver->set_busid(dev, master);
 		if (ret) {
 			drm_unset_busid(dev, master);
 			return ret;
 		}
 	} else {
 		if (WARN(dev->unique == NULL,
-			 "No drm_bus.set_busid() implementation provided by "
+			 "No drm_driver.set_busid() implementation provided by "
 			 "%ps. Use drm_dev_set_unique() to set the unique "
 			 "name explicitly.", dev->driver))
 			return -EINVAL;
@@ -279,7 +164,7 @@ static int drm_set_busid(struct drm_device *dev, struct drm_file *file_priv)
  * Searches for the mapping with the specified offset and copies its information
  * into userspace
  */
-int drm_getmap(struct drm_device *dev, void *data,
+static int drm_getmap(struct drm_device *dev, void *data,
 	       struct drm_file *file_priv)
 {
 	struct drm_map *map = data;
@@ -340,7 +225,7 @@ int drm_getmap(struct drm_device *dev, void *data,
  * Searches for the client with the specified index and copies its information
  * into userspace
  */
-int drm_getclient(struct drm_device *dev, void *data,
+static int drm_getclient(struct drm_device *dev, void *data,
 		  struct drm_file *file_priv)
 {
 	struct drm_client *client = data;
@@ -380,7 +265,7 @@ int drm_getclient(struct drm_device *dev, void *data,
  *
  * \return zero on success or a negative number on failure.
  */
-int drm_getstats(struct drm_device *dev, void *data,
+static int drm_getstats(struct drm_device *dev, void *data,
 		 struct drm_file *file_priv)
 {
 	struct drm_stats *stats = data;
@@ -394,7 +279,7 @@ int drm_getstats(struct drm_device *dev, void *data,
 /**
  * Get device/driver capabilities
  */
-int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_priv)
+static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	struct drm_get_cap *req = data;
 
@@ -444,7 +329,7 @@ int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_priv)
 /**
  * Set device/driver capabilities
  */
-int
+static int
 drm_setclientcap(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	struct drm_set_client_cap *req = data;
@@ -478,7 +363,7 @@ drm_setclientcap(struct drm_device *dev, void *data, struct drm_file *file_priv)
  *
  * Sets the requested interface version
  */
-int drm_setversion(struct drm_device *dev, void *data, struct drm_file *file_priv)
+static int drm_setversion(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	struct drm_set_version *sv = data;
 	int if_version, retcode = 0;
@@ -624,6 +509,121 @@ static int drm_ioctl_permit(u32 flags, struct drm_file *file_priv)
 	return 0;
 }
 
+#define DRM_IOCTL_DEF(ioctl, _func, _flags) \
+	[DRM_IOCTL_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl}
+
+/** Ioctl table */
+static const struct drm_ioctl_desc drm_ioctls[] = {
+	DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, 0),
+	DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, 0),
+	DRM_IOCTL_DEF(DRM_IOCTL_IRQ_BUSID, drm_irq_by_busid, DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_GET_MAP, drm_getmap, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_GET_CLIENT, drm_getclient, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_GET_STATS, drm_getstats, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_GET_CAP, drm_getcap, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_SET_CLIENT_CAP, drm_setclientcap, 0),
+	DRM_IOCTL_DEF(DRM_IOCTL_SET_VERSION, drm_setversion, DRM_MASTER),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_SET_UNIQUE, drm_setunique, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_BLOCK, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_UNBLOCK, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_AUTH_MAGIC, drm_authmagic, DRM_AUTH|DRM_MASTER),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_ADD_MAP, drm_legacy_addmap_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_RM_MAP, drm_legacy_rmmap_ioctl, DRM_AUTH),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_SET_SAREA_CTX, drm_legacy_setsareactx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_GET_SAREA_CTX, drm_legacy_getsareactx, DRM_AUTH),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_SET_MASTER, drm_setmaster_ioctl, DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_DROP_MASTER, drm_dropmaster_ioctl, DRM_ROOT_ONLY),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_ADD_CTX, drm_legacy_addctx, DRM_AUTH|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_RM_CTX, drm_legacy_rmctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_MOD_CTX, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_GET_CTX, drm_legacy_getctx, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_IOCTL_SWITCH_CTX, drm_legacy_switchctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_NEW_CTX, drm_legacy_newctx, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_RES_CTX, drm_legacy_resctx, DRM_AUTH),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_ADD_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_RM_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_LOCK, drm_legacy_lock, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_IOCTL_UNLOCK, drm_legacy_unlock, DRM_AUTH),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_FINISH, drm_noop, DRM_AUTH),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_ADD_BUFS, drm_legacy_addbufs, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_MARK_BUFS, drm_legacy_markbufs, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_INFO_BUFS, drm_legacy_infobufs, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_IOCTL_MAP_BUFS, drm_legacy_mapbufs, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_IOCTL_FREE_BUFS, drm_legacy_freebufs, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_IOCTL_DMA, drm_legacy_dma_ioctl, DRM_AUTH),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_CONTROL, drm_control, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+
+#if __OS_HAS_AGP
+	DRM_IOCTL_DEF(DRM_IOCTL_AGP_ACQUIRE, drm_agp_acquire_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_AGP_RELEASE, drm_agp_release_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_AGP_ENABLE, drm_agp_enable_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_AGP_INFO, drm_agp_info_ioctl, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_IOCTL_AGP_ALLOC, drm_agp_alloc_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_AGP_FREE, drm_agp_free_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_AGP_BIND, drm_agp_bind_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_AGP_UNBIND, drm_agp_unbind_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+#endif
+
+	DRM_IOCTL_DEF(DRM_IOCTL_SG_ALLOC, drm_legacy_sg_alloc, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_IOCTL_SG_FREE, drm_legacy_sg_free, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_WAIT_VBLANK, drm_wait_vblank, DRM_UNLOCKED),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_MODESET_CTL, drm_modeset_ctl, 0),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_UPDATE_DRAW, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH|DRM_UNLOCKED),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_PRIME_HANDLE_TO_FD, drm_prime_handle_to_fd_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF(DRM_IOCTL_PRIME_FD_TO_HANDLE, drm_prime_fd_to_handle_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+	DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+};
+
+#define DRM_CORE_IOCTL_COUNT	ARRAY_SIZE( drm_ioctls )
+
 /**
  * Called whenever a process performs an ioctl on /dev/drm.
  *
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 08ba1209228e..80ff94ada75e 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -34,6 +34,7 @@
 
 #include <drm/drmP.h>
 #include "drm_trace.h"
+#include "drm_internal.h"
 
 #include <linux/interrupt.h>	/* For task queue support */
 #include <linux/slab.h>
@@ -55,12 +56,88 @@
  */
 #define DRM_REDUNDANT_VBLIRQ_THRESH_NS 1000000
 
+static bool
+drm_get_last_vbltimestamp(struct drm_device *dev, int crtc,
+			  struct timeval *tvblank, unsigned flags);
+
+static unsigned int drm_timestamp_precision = 20;  /* Default to 20 usecs. */
+
 /*
- * Clear vblank timestamp buffer for a crtc.
+ * Default to use monotonic timestamps for wait-for-vblank and page-flip
+ * complete events.
+ */
+unsigned int drm_timestamp_monotonic = 1;
+
+static int drm_vblank_offdelay = 5000;    /* Default to 5000 msecs. */
+
+module_param_named(vblankoffdelay, drm_vblank_offdelay, int, 0600);
+module_param_named(timestamp_precision_usec, drm_timestamp_precision, int, 0600);
+module_param_named(timestamp_monotonic, drm_timestamp_monotonic, int, 0600);
+
+/**
+ * drm_update_vblank_count - update the master vblank counter
+ * @dev: DRM device
+ * @crtc: counter to update
+ *
+ * Call back into the driver to update the appropriate vblank counter
+ * (specified by @crtc).  Deal with wraparound, if it occurred, and
+ * update the last read value so we can deal with wraparound on the next
+ * call if necessary.
+ *
+ * Only necessary when going from off->on, to account for frames we
+ * didn't get an interrupt for.
+ *
+ * Note: caller must hold dev->vbl_lock since this reads & writes
+ * device vblank fields.
  */
-static void clear_vblank_timestamps(struct drm_device *dev, int crtc)
+static void drm_update_vblank_count(struct drm_device *dev, int crtc)
 {
-	memset(dev->vblank[crtc].time, 0, sizeof(dev->vblank[crtc].time));
+	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
+	u32 cur_vblank, diff, tslot;
+	bool rc;
+	struct timeval t_vblank;
+
+	/*
+	 * Interrupts were disabled prior to this call, so deal with counter
+	 * wrap if needed.
+	 * NOTE!  It's possible we lost a full dev->max_vblank_count events
+	 * here if the register is small or we had vblank interrupts off for
+	 * a long time.
+	 *
+	 * We repeat the hardware vblank counter & timestamp query until
+	 * we get consistent results. This to prevent races between gpu
+	 * updating its hardware counter while we are retrieving the
+	 * corresponding vblank timestamp.
+	 */
+	do {
+		cur_vblank = dev->driver->get_vblank_counter(dev, crtc);
+		rc = drm_get_last_vbltimestamp(dev, crtc, &t_vblank, 0);
+	} while (cur_vblank != dev->driver->get_vblank_counter(dev, crtc));
+
+	/* Deal with counter wrap */
+	diff = cur_vblank - vblank->last;
+	if (cur_vblank < vblank->last) {
+		diff += dev->max_vblank_count;
+
+		DRM_DEBUG("last_vblank[%d]=0x%x, cur_vblank=0x%x => diff=0x%x\n",
+			  crtc, vblank->last, cur_vblank, diff);
+	}
+
+	DRM_DEBUG("updating vblank count on crtc %d, missed %d\n",
+		  crtc, diff);
+
+	/* Reinitialize corresponding vblank timestamp if high-precision query
+	 * available. Skip this step if query unsupported or failed. Will
+	 * reinitialize delayed at next vblank interrupt in that case.
+	 */
+	if (rc) {
+		tslot = atomic_read(&vblank->count) + diff;
+		vblanktimestamp(dev, crtc, tslot) = t_vblank;
+	}
+
+	smp_mb__before_atomic();
+	atomic_add(diff, &vblank->count);
+	smp_mb__after_atomic();
 }
 
 /*
@@ -71,10 +148,11 @@ static void clear_vblank_timestamps(struct drm_device *dev, int crtc)
  */
 static void vblank_disable_and_save(struct drm_device *dev, int crtc)
 {
+	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
 	unsigned long irqflags;
 	u32 vblcount;
 	s64 diff_ns;
-	int vblrc;
+	bool vblrc;
 	struct timeval tvblank;
 	int count = DRM_TIMESTAMP_MAXRETRIES;
 
@@ -84,8 +162,28 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc)
 	 */
 	spin_lock_irqsave(&dev->vblank_time_lock, irqflags);
 
+	/*
+	 * If the vblank interrupt was already disbled update the count
+	 * and timestamp to maintain the appearance that the counter
+	 * has been ticking all along until this time. This makes the
+	 * count account for the entire time between drm_vblank_on() and
+	 * drm_vblank_off().
+	 *
+	 * But only do this if precise vblank timestamps are available.
+	 * Otherwise we might read a totally bogus timestamp since drivers
+	 * lacking precise timestamp support rely upon sampling the system clock
+	 * at vblank interrupt time. Which obviously won't work out well if the
+	 * vblank interrupt is disabled.
+	 */
+	if (!vblank->enabled &&
+	    drm_get_last_vbltimestamp(dev, crtc, &tvblank, 0)) {
+		drm_update_vblank_count(dev, crtc);
+		spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags);
+		return;
+	}
+
 	dev->driver->disable_vblank(dev, crtc);
-	dev->vblank[crtc].enabled = false;
+	vblank->enabled = false;
 
 	/* No further vblank irq's will be processed after
 	 * this point. Get current hardware vblank count and
@@ -100,9 +198,9 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc)
 	 * delayed gpu counter increment.
 	 */
 	do {
-		dev->vblank[crtc].last = dev->driver->get_vblank_counter(dev, crtc);
+		vblank->last = dev->driver->get_vblank_counter(dev, crtc);
 		vblrc = drm_get_last_vbltimestamp(dev, crtc, &tvblank, 0);
-	} while (dev->vblank[crtc].last != dev->driver->get_vblank_counter(dev, crtc) && (--count) && vblrc);
+	} while (vblank->last != dev->driver->get_vblank_counter(dev, crtc) && (--count) && vblrc);
 
 	if (!count)
 		vblrc = 0;
@@ -110,7 +208,7 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc)
 	/* Compute time difference to stored timestamp of last vblank
 	 * as updated by last invocation of drm_handle_vblank() in vblank irq.
 	 */
-	vblcount = atomic_read(&dev->vblank[crtc].count);
+	vblcount = atomic_read(&vblank->count);
 	diff_ns = timeval_to_ns(&tvblank) -
 		  timeval_to_ns(&vblanktimestamp(dev, crtc, vblcount));
 
@@ -126,14 +224,18 @@ static void vblank_disable_and_save(struct drm_device *dev, int crtc)
 	 * available. In that case we can't account for this and just
 	 * hope for the best.
 	 */
-	if ((vblrc > 0) && (abs64(diff_ns) > 1000000)) {
-		atomic_inc(&dev->vblank[crtc].count);
+	if (vblrc && (abs64(diff_ns) > 1000000)) {
+		/* Store new timestamp in ringbuffer. */
+		vblanktimestamp(dev, crtc, vblcount + 1) = tvblank;
+
+		/* Increment cooked vblank count. This also atomically commits
+		 * the timestamp computed above.
+		 */
+		smp_mb__before_atomic();
+		atomic_inc(&vblank->count);
 		smp_mb__after_atomic();
 	}
 
-	/* Invalidate all timestamps while vblank irq's are off. */
-	clear_vblank_timestamps(dev, crtc);
-
 	spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags);
 }
 
@@ -164,14 +266,20 @@ static void vblank_disable_fn(unsigned long arg)
 void drm_vblank_cleanup(struct drm_device *dev)
 {
 	int crtc;
+	unsigned long irqflags;
 
 	/* Bail if the driver didn't call drm_vblank_init() */
 	if (dev->num_crtcs == 0)
 		return;
 
 	for (crtc = 0; crtc < dev->num_crtcs; crtc++) {
-		del_timer_sync(&dev->vblank[crtc].disable_timer);
-		vblank_disable_fn((unsigned long)&dev->vblank[crtc]);
+		struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
+
+		del_timer_sync(&vblank->disable_timer);
+
+		spin_lock_irqsave(&dev->vbl_lock, irqflags);
+		vblank_disable_and_save(dev, crtc);
+		spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
 	}
 
 	kfree(dev->vblank);
@@ -204,11 +312,13 @@ int drm_vblank_init(struct drm_device *dev, int num_crtcs)
 		goto err;
 
 	for (i = 0; i < num_crtcs; i++) {
-		dev->vblank[i].dev = dev;
-		dev->vblank[i].crtc = i;
-		init_waitqueue_head(&dev->vblank[i].queue);
-		setup_timer(&dev->vblank[i].disable_timer, vblank_disable_fn,
-			    (unsigned long)&dev->vblank[i]);
+		struct drm_vblank_crtc *vblank = &dev->vblank[i];
+
+		vblank->dev = dev;
+		vblank->crtc = i;
+		init_waitqueue_head(&vblank->queue);
+		setup_timer(&vblank->disable_timer, vblank_disable_fn,
+			    (unsigned long)vblank);
 	}
 
 	DRM_INFO("Supports vblank timestamp caching Rev 2 (21.10.2013).\n");
@@ -224,7 +334,7 @@ int drm_vblank_init(struct drm_device *dev, int num_crtcs)
 	return 0;
 
 err:
-	drm_vblank_cleanup(dev);
+	dev->num_crtcs = 0;
 	return ret;
 }
 EXPORT_SYMBOL(drm_vblank_init);
@@ -360,9 +470,11 @@ int drm_irq_uninstall(struct drm_device *dev)
 	if (dev->num_crtcs) {
 		spin_lock_irqsave(&dev->vbl_lock, irqflags);
 		for (i = 0; i < dev->num_crtcs; i++) {
-			wake_up(&dev->vblank[i].queue);
-			dev->vblank[i].enabled = false;
-			dev->vblank[i].last =
+			struct drm_vblank_crtc *vblank = &dev->vblank[i];
+
+			wake_up(&vblank->queue);
+			vblank->enabled = false;
+			vblank->last =
 				dev->driver->get_vblank_counter(dev, i);
 		}
 		spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
@@ -617,7 +729,7 @@ int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev, int crtc,
 	 * within vblank area, counting down the number of lines until
 	 * start of scanout.
 	 */
-	invbl = vbl_status & DRM_SCANOUTPOS_INVBL;
+	invbl = vbl_status & DRM_SCANOUTPOS_IN_VBLANK;
 
 	/* Convert scanout position into elapsed time at raw_time query
 	 * since start of scanout at first display scanline. delta_ns
@@ -647,7 +759,7 @@ int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev, int crtc,
 
 	vbl_status = DRM_VBLANKTIME_SCANOUTPOS_METHOD;
 	if (invbl)
-		vbl_status |= DRM_VBLANKTIME_INVBL;
+		vbl_status |= DRM_VBLANKTIME_IN_VBLANK;
 
 	return vbl_status;
 }
@@ -679,10 +791,11 @@ static struct timeval get_drm_timestamp(void)
  * call, i.e., it isn't very precisely locked to the true vblank.
  *
  * Returns:
- * Non-zero if timestamp is considered to be very precise, zero otherwise.
+ * True if timestamp is considered to be very precise, false otherwise.
  */
-u32 drm_get_last_vbltimestamp(struct drm_device *dev, int crtc,
-			      struct timeval *tvblank, unsigned flags)
+static bool
+drm_get_last_vbltimestamp(struct drm_device *dev, int crtc,
+			  struct timeval *tvblank, unsigned flags)
 {
 	int ret;
 
@@ -694,7 +807,7 @@ u32 drm_get_last_vbltimestamp(struct drm_device *dev, int crtc,
 		ret = dev->driver->get_vblank_timestamp(dev, crtc, &max_error,
 							tvblank, flags);
 		if (ret > 0)
-			return (u32) ret;
+			return true;
 	}
 
 	/* GPU high precision timestamp query unsupported or failed.
@@ -702,9 +815,8 @@ u32 drm_get_last_vbltimestamp(struct drm_device *dev, int crtc,
 	 */
 	*tvblank = get_drm_timestamp();
 
-	return 0;
+	return false;
 }
-EXPORT_SYMBOL(drm_get_last_vbltimestamp);
 
 /**
  * drm_vblank_count - retrieve "cooked" vblank counter value
@@ -720,7 +832,11 @@ EXPORT_SYMBOL(drm_get_last_vbltimestamp);
  */
 u32 drm_vblank_count(struct drm_device *dev, int crtc)
 {
-	return atomic_read(&dev->vblank[crtc].count);
+	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
+
+	if (WARN_ON(crtc >= dev->num_crtcs))
+		return 0;
+	return atomic_read(&vblank->count);
 }
 EXPORT_SYMBOL(drm_vblank_count);
 
@@ -740,18 +856,22 @@ EXPORT_SYMBOL(drm_vblank_count);
 u32 drm_vblank_count_and_time(struct drm_device *dev, int crtc,
 			      struct timeval *vblanktime)
 {
+	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
 	u32 cur_vblank;
 
+	if (WARN_ON(crtc >= dev->num_crtcs))
+		return 0;
+
 	/* Read timestamp from slot of _vblank_time ringbuffer
 	 * that corresponds to current vblank count. Retry if
 	 * count has incremented during readout. This works like
 	 * a seqlock.
 	 */
 	do {
-		cur_vblank = atomic_read(&dev->vblank[crtc].count);
+		cur_vblank = atomic_read(&vblank->count);
 		*vblanktime = vblanktimestamp(dev, crtc, cur_vblank);
 		smp_rmb();
-	} while (cur_vblank != atomic_read(&dev->vblank[crtc].count));
+	} while (cur_vblank != atomic_read(&vblank->count));
 
 	return cur_vblank;
 }
@@ -800,83 +920,20 @@ void drm_send_vblank_event(struct drm_device *dev, int crtc,
 EXPORT_SYMBOL(drm_send_vblank_event);
 
 /**
- * drm_update_vblank_count - update the master vblank counter
- * @dev: DRM device
- * @crtc: counter to update
- *
- * Call back into the driver to update the appropriate vblank counter
- * (specified by @crtc).  Deal with wraparound, if it occurred, and
- * update the last read value so we can deal with wraparound on the next
- * call if necessary.
- *
- * Only necessary when going from off->on, to account for frames we
- * didn't get an interrupt for.
- *
- * Note: caller must hold dev->vbl_lock since this reads & writes
- * device vblank fields.
- */
-static void drm_update_vblank_count(struct drm_device *dev, int crtc)
-{
-	u32 cur_vblank, diff, tslot, rc;
-	struct timeval t_vblank;
-
-	/*
-	 * Interrupts were disabled prior to this call, so deal with counter
-	 * wrap if needed.
-	 * NOTE!  It's possible we lost a full dev->max_vblank_count events
-	 * here if the register is small or we had vblank interrupts off for
-	 * a long time.
-	 *
-	 * We repeat the hardware vblank counter & timestamp query until
-	 * we get consistent results. This to prevent races between gpu
-	 * updating its hardware counter while we are retrieving the
-	 * corresponding vblank timestamp.
-	 */
-	do {
-		cur_vblank = dev->driver->get_vblank_counter(dev, crtc);
-		rc = drm_get_last_vbltimestamp(dev, crtc, &t_vblank, 0);
-	} while (cur_vblank != dev->driver->get_vblank_counter(dev, crtc));
-
-	/* Deal with counter wrap */
-	diff = cur_vblank - dev->vblank[crtc].last;
-	if (cur_vblank < dev->vblank[crtc].last) {
-		diff += dev->max_vblank_count;
-
-		DRM_DEBUG("last_vblank[%d]=0x%x, cur_vblank=0x%x => diff=0x%x\n",
-			  crtc, dev->vblank[crtc].last, cur_vblank, diff);
-	}
-
-	DRM_DEBUG("enabling vblank interrupts on crtc %d, missed %d\n",
-		  crtc, diff);
-
-	/* Reinitialize corresponding vblank timestamp if high-precision query
-	 * available. Skip this step if query unsupported or failed. Will
-	 * reinitialize delayed at next vblank interrupt in that case.
-	 */
-	if (rc) {
-		tslot = atomic_read(&dev->vblank[crtc].count) + diff;
-		vblanktimestamp(dev, crtc, tslot) = t_vblank;
-	}
-
-	smp_mb__before_atomic();
-	atomic_add(diff, &dev->vblank[crtc].count);
-	smp_mb__after_atomic();
-}
-
-/**
  * drm_vblank_enable - enable the vblank interrupt on a CRTC
  * @dev: DRM device
  * @crtc: CRTC in question
  */
 static int drm_vblank_enable(struct drm_device *dev, int crtc)
 {
+	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
 	int ret = 0;
 
 	assert_spin_locked(&dev->vbl_lock);
 
 	spin_lock(&dev->vblank_time_lock);
 
-	if (!dev->vblank[crtc].enabled) {
+	if (!vblank->enabled) {
 		/*
 		 * Enable vblank irqs under vblank_time_lock protection.
 		 * All vblank count & timestamp updates are held off
@@ -887,9 +944,9 @@ static int drm_vblank_enable(struct drm_device *dev, int crtc)
 		ret = dev->driver->enable_vblank(dev, crtc);
 		DRM_DEBUG("enabling vblank on crtc %d, ret: %d\n", crtc, ret);
 		if (ret)
-			atomic_dec(&dev->vblank[crtc].refcount);
+			atomic_dec(&vblank->refcount);
 		else {
-			dev->vblank[crtc].enabled = true;
+			vblank->enabled = true;
 			drm_update_vblank_count(dev, crtc);
 		}
 	}
@@ -914,16 +971,20 @@ static int drm_vblank_enable(struct drm_device *dev, int crtc)
  */
 int drm_vblank_get(struct drm_device *dev, int crtc)
 {
+	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
 	unsigned long irqflags;
 	int ret = 0;
 
+	if (WARN_ON(crtc >= dev->num_crtcs))
+		return -EINVAL;
+
 	spin_lock_irqsave(&dev->vbl_lock, irqflags);
 	/* Going from 0->1 means we have to enable interrupts again */
-	if (atomic_add_return(1, &dev->vblank[crtc].refcount) == 1) {
+	if (atomic_add_return(1, &vblank->refcount) == 1) {
 		ret = drm_vblank_enable(dev, crtc);
 	} else {
-		if (!dev->vblank[crtc].enabled) {
-			atomic_dec(&dev->vblank[crtc].refcount);
+		if (!vblank->enabled) {
+			atomic_dec(&vblank->refcount);
 			ret = -EINVAL;
 		}
 	}
@@ -963,13 +1024,23 @@ EXPORT_SYMBOL(drm_crtc_vblank_get);
  */
 void drm_vblank_put(struct drm_device *dev, int crtc)
 {
-	BUG_ON(atomic_read(&dev->vblank[crtc].refcount) == 0);
+	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
+
+	BUG_ON(atomic_read(&vblank->refcount) == 0);
+
+	if (WARN_ON(crtc >= dev->num_crtcs))
+		return;
 
 	/* Last user schedules interrupt disable */
-	if (atomic_dec_and_test(&dev->vblank[crtc].refcount) &&
-	    (drm_vblank_offdelay > 0))
-		mod_timer(&dev->vblank[crtc].disable_timer,
-			  jiffies + ((drm_vblank_offdelay * HZ)/1000));
+	if (atomic_dec_and_test(&vblank->refcount)) {
+		if (drm_vblank_offdelay == 0)
+			return;
+		else if (dev->vblank_disable_immediate || drm_vblank_offdelay < 0)
+			vblank_disable_fn((unsigned long)vblank);
+		else
+			mod_timer(&vblank->disable_timer,
+				  jiffies + ((drm_vblank_offdelay * HZ)/1000));
+	}
 }
 EXPORT_SYMBOL(drm_vblank_put);
 
@@ -989,6 +1060,50 @@ void drm_crtc_vblank_put(struct drm_crtc *crtc)
 EXPORT_SYMBOL(drm_crtc_vblank_put);
 
 /**
+ * drm_wait_one_vblank - wait for one vblank
+ * @dev: DRM device
+ * @crtc: crtc index
+ *
+ * This waits for one vblank to pass on @crtc, using the irq driver interfaces.
+ * It is a failure to call this when the vblank irq for @crtc is disabled, e.g.
+ * due to lack of driver support or because the crtc is off.
+ */
+void drm_wait_one_vblank(struct drm_device *dev, int crtc)
+{
+	int ret;
+	u32 last;
+
+	ret = drm_vblank_get(dev, crtc);
+	if (WARN_ON(ret))
+		return;
+
+	last = drm_vblank_count(dev, crtc);
+
+	ret = wait_event_timeout(dev->vblank[crtc].queue,
+				 last != drm_vblank_count(dev, crtc),
+				 msecs_to_jiffies(100));
+
+	WARN_ON(ret == 0);
+
+	drm_vblank_put(dev, crtc);
+}
+EXPORT_SYMBOL(drm_wait_one_vblank);
+
+/**
+ * drm_crtc_wait_one_vblank - wait for one vblank
+ * @crtc: DRM crtc
+ *
+ * This waits for one vblank to pass on @crtc, using the irq driver interfaces.
+ * It is a failure to call this when the vblank irq for @crtc is disabled, e.g.
+ * due to lack of driver support or because the crtc is off.
+ */
+void drm_crtc_wait_one_vblank(struct drm_crtc *crtc)
+{
+	drm_wait_one_vblank(crtc->dev, drm_crtc_index(crtc));
+}
+EXPORT_SYMBOL(drm_crtc_wait_one_vblank);
+
+/**
  * drm_vblank_off - disable vblank events on a CRTC
  * @dev: DRM device
  * @crtc: CRTC in question
@@ -1004,19 +1119,34 @@ EXPORT_SYMBOL(drm_crtc_vblank_put);
  */
 void drm_vblank_off(struct drm_device *dev, int crtc)
 {
+	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
 	struct drm_pending_vblank_event *e, *t;
 	struct timeval now;
 	unsigned long irqflags;
 	unsigned int seq;
 
-	spin_lock_irqsave(&dev->vbl_lock, irqflags);
+	if (WARN_ON(crtc >= dev->num_crtcs))
+		return;
+
+	spin_lock_irqsave(&dev->event_lock, irqflags);
+
+	spin_lock(&dev->vbl_lock);
 	vblank_disable_and_save(dev, crtc);
-	wake_up(&dev->vblank[crtc].queue);
+	wake_up(&vblank->queue);
+
+	/*
+	 * Prevent subsequent drm_vblank_get() from re-enabling
+	 * the vblank interrupt by bumping the refcount.
+	 */
+	if (!vblank->inmodeset) {
+		atomic_inc(&vblank->refcount);
+		vblank->inmodeset = 1;
+	}
+	spin_unlock(&dev->vbl_lock);
 
 	/* Send any queued vblank events, lest the natives grow disquiet */
 	seq = drm_vblank_count_and_time(dev, crtc, &now);
 
-	spin_lock(&dev->event_lock);
 	list_for_each_entry_safe(e, t, &dev->vblank_event_list, base.link) {
 		if (e->pipe != crtc)
 			continue;
@@ -1027,9 +1157,7 @@ void drm_vblank_off(struct drm_device *dev, int crtc)
 		drm_vblank_put(dev, e->pipe);
 		send_vblank_event(dev, e, seq, &now);
 	}
-	spin_unlock(&dev->event_lock);
-
-	spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
+	spin_unlock_irqrestore(&dev->event_lock, irqflags);
 }
 EXPORT_SYMBOL(drm_vblank_off);
 
@@ -1066,11 +1194,35 @@ EXPORT_SYMBOL(drm_crtc_vblank_off);
  */
 void drm_vblank_on(struct drm_device *dev, int crtc)
 {
+	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
 	unsigned long irqflags;
 
+	if (WARN_ON(crtc >= dev->num_crtcs))
+		return;
+
 	spin_lock_irqsave(&dev->vbl_lock, irqflags);
-	/* re-enable interrupts if there's are users left */
-	if (atomic_read(&dev->vblank[crtc].refcount) != 0)
+	/* Drop our private "prevent drm_vblank_get" refcount */
+	if (vblank->inmodeset) {
+		atomic_dec(&vblank->refcount);
+		vblank->inmodeset = 0;
+	}
+
+	/*
+	 * sample the current counter to avoid random jumps
+	 * when drm_vblank_enable() applies the diff
+	 *
+	 * -1 to make sure user will never see the same
+	 * vblank counter value before and after a modeset
+	 */
+	vblank->last =
+		(dev->driver->get_vblank_counter(dev, crtc) - 1) &
+		dev->max_vblank_count;
+	/*
+	 * re-enable interrupts if there are users left, or the
+	 * user wishes vblank interrupts to be enabled all the time.
+	 */
+	if (atomic_read(&vblank->refcount) != 0 ||
+	    (!dev->vblank_disable_immediate && drm_vblank_offdelay == 0))
 		WARN_ON(drm_vblank_enable(dev, crtc));
 	spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
 }
@@ -1118,9 +1270,15 @@ EXPORT_SYMBOL(drm_crtc_vblank_on);
  */
 void drm_vblank_pre_modeset(struct drm_device *dev, int crtc)
 {
+	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
+
 	/* vblank is not initialized (IRQ not installed ?), or has been freed */
 	if (!dev->num_crtcs)
 		return;
+
+	if (WARN_ON(crtc >= dev->num_crtcs))
+		return;
+
 	/*
 	 * To avoid all the problems that might happen if interrupts
 	 * were enabled/disabled around or between these calls, we just
@@ -1128,10 +1286,10 @@ void drm_vblank_pre_modeset(struct drm_device *dev, int crtc)
 	 * to avoid corrupting the count if multiple, mismatch calls occur),
 	 * so that interrupts remain enabled in the interim.
 	 */
-	if (!dev->vblank[crtc].inmodeset) {
-		dev->vblank[crtc].inmodeset = 0x1;
+	if (!vblank->inmodeset) {
+		vblank->inmodeset = 0x1;
 		if (drm_vblank_get(dev, crtc) == 0)
-			dev->vblank[crtc].inmodeset |= 0x2;
+			vblank->inmodeset |= 0x2;
 	}
 }
 EXPORT_SYMBOL(drm_vblank_pre_modeset);
@@ -1146,21 +1304,22 @@ EXPORT_SYMBOL(drm_vblank_pre_modeset);
  */
 void drm_vblank_post_modeset(struct drm_device *dev, int crtc)
 {
+	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
 	unsigned long irqflags;
 
 	/* vblank is not initialized (IRQ not installed ?), or has been freed */
 	if (!dev->num_crtcs)
 		return;
 
-	if (dev->vblank[crtc].inmodeset) {
+	if (vblank->inmodeset) {
 		spin_lock_irqsave(&dev->vbl_lock, irqflags);
 		dev->vblank_disable_allowed = true;
 		spin_unlock_irqrestore(&dev->vbl_lock, irqflags);
 
-		if (dev->vblank[crtc].inmodeset & 0x2)
+		if (vblank->inmodeset & 0x2)
 			drm_vblank_put(dev, crtc);
 
-		dev->vblank[crtc].inmodeset = 0;
+		vblank->inmodeset = 0;
 	}
 }
 EXPORT_SYMBOL(drm_vblank_post_modeset);
@@ -1212,6 +1371,7 @@ static int drm_queue_vblank_event(struct drm_device *dev, int pipe,
 				  union drm_wait_vblank *vblwait,
 				  struct drm_file *file_priv)
 {
+	struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
 	struct drm_pending_vblank_event *e;
 	struct timeval now;
 	unsigned long flags;
@@ -1235,6 +1395,18 @@ static int drm_queue_vblank_event(struct drm_device *dev, int pipe,
 
 	spin_lock_irqsave(&dev->event_lock, flags);
 
+	/*
+	 * drm_vblank_off() might have been called after we called
+	 * drm_vblank_get(). drm_vblank_off() holds event_lock
+	 * around the vblank disable, so no need for further locking.
+	 * The reference from drm_vblank_get() protects against
+	 * vblank disable from another source.
+	 */
+	if (!vblank->enabled) {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
 	if (file_priv->event_space < sizeof e->event) {
 		ret = -EBUSY;
 		goto err_unlock;
@@ -1295,6 +1467,7 @@ err_put:
 int drm_wait_vblank(struct drm_device *dev, void *data,
 		    struct drm_file *file_priv)
 {
+	struct drm_vblank_crtc *vblank;
 	union drm_wait_vblank *vblwait = data;
 	int ret;
 	unsigned int flags, seq, crtc, high_crtc;
@@ -1324,6 +1497,8 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
 	if (crtc >= dev->num_crtcs)
 		return -EINVAL;
 
+	vblank = &dev->vblank[crtc];
+
 	ret = drm_vblank_get(dev, crtc);
 	if (ret) {
 		DRM_DEBUG("failed to acquire vblank counter, %d\n", ret);
@@ -1356,11 +1531,11 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
 
 	DRM_DEBUG("waiting on vblank count %d, crtc %d\n",
 		  vblwait->request.sequence, crtc);
-	dev->vblank[crtc].last_wait = vblwait->request.sequence;
-	DRM_WAIT_ON(ret, dev->vblank[crtc].queue, 3 * HZ,
+	vblank->last_wait = vblwait->request.sequence;
+	DRM_WAIT_ON(ret, vblank->queue, 3 * HZ,
 		    (((drm_vblank_count(dev, crtc) -
 		       vblwait->request.sequence) <= (1 << 23)) ||
-		     !dev->vblank[crtc].enabled ||
+		     !vblank->enabled ||
 		     !dev->irq_enabled));
 
 	if (ret != -EINTR) {
@@ -1385,12 +1560,11 @@ static void drm_handle_vblank_events(struct drm_device *dev, int crtc)
 {
 	struct drm_pending_vblank_event *e, *t;
 	struct timeval now;
-	unsigned long flags;
 	unsigned int seq;
 
-	seq = drm_vblank_count_and_time(dev, crtc, &now);
+	assert_spin_locked(&dev->event_lock);
 
-	spin_lock_irqsave(&dev->event_lock, flags);
+	seq = drm_vblank_count_and_time(dev, crtc, &now);
 
 	list_for_each_entry_safe(e, t, &dev->vblank_event_list, base.link) {
 		if (e->pipe != crtc)
@@ -1406,8 +1580,6 @@ static void drm_handle_vblank_events(struct drm_device *dev, int crtc)
 		send_vblank_event(dev, e, seq, &now);
 	}
 
-	spin_unlock_irqrestore(&dev->event_lock, flags);
-
 	trace_drm_vblank_event(crtc, seq);
 }
 
@@ -1421,6 +1593,7 @@ static void drm_handle_vblank_events(struct drm_device *dev, int crtc)
  */
 bool drm_handle_vblank(struct drm_device *dev, int crtc)
 {
+	struct drm_vblank_crtc *vblank = &dev->vblank[crtc];
 	u32 vblcount;
 	s64 diff_ns;
 	struct timeval tvblank;
@@ -1429,15 +1602,21 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc)
 	if (!dev->num_crtcs)
 		return false;
 
+	if (WARN_ON(crtc >= dev->num_crtcs))
+		return false;
+
+	spin_lock_irqsave(&dev->event_lock, irqflags);
+
 	/* Need timestamp lock to prevent concurrent execution with
 	 * vblank enable/disable, as this would cause inconsistent
 	 * or corrupted timestamps and vblank counts.
 	 */
-	spin_lock_irqsave(&dev->vblank_time_lock, irqflags);
+	spin_lock(&dev->vblank_time_lock);
 
 	/* Vblank irq handling disabled. Nothing to do. */
-	if (!dev->vblank[crtc].enabled) {
-		spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags);
+	if (!vblank->enabled) {
+		spin_unlock(&dev->vblank_time_lock);
+		spin_unlock_irqrestore(&dev->event_lock, irqflags);
 		return false;
 	}
 
@@ -1446,7 +1625,7 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc)
 	 */
 
 	/* Get current timestamp and count. */
-	vblcount = atomic_read(&dev->vblank[crtc].count);
+	vblcount = atomic_read(&vblank->count);
 	drm_get_last_vbltimestamp(dev, crtc, &tvblank, DRM_CALLED_FROM_VBLIRQ);
 
 	/* Compute time difference to timestamp of last vblank */
@@ -1470,17 +1649,20 @@ bool drm_handle_vblank(struct drm_device *dev, int crtc)
 		 * the timestamp computed above.
 		 */
 		smp_mb__before_atomic();
-		atomic_inc(&dev->vblank[crtc].count);
+		atomic_inc(&vblank->count);
 		smp_mb__after_atomic();
 	} else {
 		DRM_DEBUG("crtc %d: Redundant vblirq ignored. diff_ns = %d\n",
 			  crtc, (int) diff_ns);
 	}
 
-	wake_up(&dev->vblank[crtc].queue);
+	spin_unlock(&dev->vblank_time_lock);
+
+	wake_up(&vblank->queue);
 	drm_handle_vblank_events(dev, crtc);
 
-	spin_unlock_irqrestore(&dev->vblank_time_lock, irqflags);
+	spin_unlock_irqrestore(&dev->event_lock, irqflags);
+
 	return true;
 }
 EXPORT_SYMBOL(drm_handle_vblank);
diff --git a/drivers/gpu/drm/drm_legacy.h b/drivers/gpu/drm/drm_legacy.h
index d34f20a79b7c..c1dc61473db5 100644
--- a/drivers/gpu/drm/drm_legacy.h
+++ b/drivers/gpu/drm/drm_legacy.h
@@ -23,6 +23,15 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+/*
+ * This file contains legacy interfaces that modern drm drivers
+ * should no longer be using. They cannot be removed as legacy
+ * drivers use them, and removing them are API breaks.
+ */
+#include <linux/list.h>
+#include <drm/drm_legacy.h>
+
+struct agp_memory;
 struct drm_device;
 struct drm_file;
 
@@ -48,4 +57,57 @@ int drm_legacy_rmctx(struct drm_device *d, void *v, struct drm_file *f);
 int drm_legacy_setsareactx(struct drm_device *d, void *v, struct drm_file *f);
 int drm_legacy_getsareactx(struct drm_device *d, void *v, struct drm_file *f);
 
+/*
+ * Generic Buffer Management
+ */
+
+#define DRM_MAP_HASH_OFFSET 0x10000000
+
+int drm_legacy_addmap_ioctl(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_rmmap_ioctl(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_addbufs(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_infobufs(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_markbufs(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_freebufs(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_mapbufs(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_dma_ioctl(struct drm_device *d, void *v, struct drm_file *f);
+
+void drm_legacy_vma_flush(struct drm_device *d);
+
+/*
+ * AGP Support
+ */
+
+struct drm_agp_mem {
+	unsigned long handle;
+	struct agp_memory *memory;
+	unsigned long bound;
+	int pages;
+	struct list_head head;
+};
+
+/*
+ * Generic Userspace Locking-API
+ */
+
+int drm_legacy_i_have_hw_lock(struct drm_device *d, struct drm_file *f);
+int drm_legacy_lock(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_unlock(struct drm_device *d, void *v, struct drm_file *f);
+int drm_legacy_lock_free(struct drm_lock_data *lock, unsigned int ctx);
+
+/* DMA support */
+int drm_legacy_dma_setup(struct drm_device *dev);
+void drm_legacy_dma_takedown(struct drm_device *dev);
+void drm_legacy_free_buffer(struct drm_device *dev,
+			    struct drm_buf * buf);
+void drm_legacy_reclaim_buffers(struct drm_device *dev,
+				struct drm_file *filp);
+
+/* Scatter Gather Support */
+void drm_legacy_sg_cleanup(struct drm_device *dev);
+int drm_legacy_sg_alloc(struct drm_device *dev, void *data,
+			struct drm_file *file_priv);
+int drm_legacy_sg_free(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv);
+
 #endif /* __DRM_LEGACY_H__ */
diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c
index e26b59e385ff..f861361a635e 100644
--- a/drivers/gpu/drm/drm_lock.c
+++ b/drivers/gpu/drm/drm_lock.c
@@ -36,6 +36,7 @@
 #include <linux/export.h>
 #include <drm/drmP.h>
 #include "drm_legacy.h"
+#include "drm_internal.h"
 
 static int drm_notifier(void *priv);
 
@@ -52,7 +53,8 @@ static int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context);
  *
  * Add the current task to the lock wait queue, and attempt to take to lock.
  */
-int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
+int drm_legacy_lock(struct drm_device *dev, void *data,
+		    struct drm_file *file_priv)
 {
 	DECLARE_WAITQUEUE(entry, current);
 	struct drm_lock *lock = data;
@@ -120,7 +122,7 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 		sigaddset(&dev->sigmask, SIGTTOU);
 		dev->sigdata.context = lock->context;
 		dev->sigdata.lock = master->lock.hw_lock;
-		block_all_signals(drm_notifier, &dev->sigdata, &dev->sigmask);
+		block_all_signals(drm_notifier, dev, &dev->sigmask);
 	}
 
 	if (dev->driver->dma_quiescent && (lock->flags & _DRM_LOCK_QUIESCENT))
@@ -146,7 +148,7 @@ int drm_lock(struct drm_device *dev, void *data, struct drm_file *file_priv)
  *
  * Transfer and free the lock.
  */
-int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv)
+int drm_legacy_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 {
 	struct drm_lock *lock = data;
 	struct drm_master *master = file_priv->master;
@@ -157,7 +159,7 @@ int drm_unlock(struct drm_device *dev, void *data, struct drm_file *file_priv)
 		return -EINVAL;
 	}
 
-	if (drm_lock_free(&master->lock, lock->context)) {
+	if (drm_legacy_lock_free(&master->lock, lock->context)) {
 		/* FIXME: Should really bail out here. */
 	}
 
@@ -250,7 +252,7 @@ static int drm_lock_transfer(struct drm_lock_data *lock_data,
  * Marks the lock as not held, via the \p cmpxchg instruction. Wakes any task
  * waiting on the lock queue.
  */
-int drm_lock_free(struct drm_lock_data *lock_data, unsigned int context)
+int drm_legacy_lock_free(struct drm_lock_data *lock_data, unsigned int context)
 {
 	unsigned int old, new, prev;
 	volatile unsigned int *lock = &lock_data->hw_lock->lock;
@@ -286,26 +288,27 @@ int drm_lock_free(struct drm_lock_data *lock_data, unsigned int context)
  * If the lock is not held, then let the signal proceed as usual.  If the lock
  * is held, then set the contended flag and keep the signal blocked.
  *
- * \param priv pointer to a drm_sigdata structure.
+ * \param priv pointer to a drm_device structure.
  * \return one if the signal should be delivered normally, or zero if the
  * signal should be blocked.
  */
 static int drm_notifier(void *priv)
 {
-	struct drm_sigdata *s = (struct drm_sigdata *) priv;
+	struct drm_device *dev = priv;
+	struct drm_hw_lock *lock = dev->sigdata.lock;
 	unsigned int old, new, prev;
 
 	/* Allow signal delivery if lock isn't held */
-	if (!s->lock || !_DRM_LOCK_IS_HELD(s->lock->lock)
-	    || _DRM_LOCKING_CONTEXT(s->lock->lock) != s->context)
+	if (!lock || !_DRM_LOCK_IS_HELD(lock->lock)
+	    || _DRM_LOCKING_CONTEXT(lock->lock) != dev->sigdata.context)
 		return 1;
 
 	/* Otherwise, set flag to force call to
 	   drmUnlock */
 	do {
-		old = s->lock->lock;
+		old = lock->lock;
 		new = old | _DRM_LOCK_CONT;
-		prev = cmpxchg(&s->lock->lock, old, new);
+		prev = cmpxchg(&lock->lock, old, new);
 	} while (prev != old);
 	return 0;
 }
@@ -323,7 +326,7 @@ static int drm_notifier(void *priv)
  * having to worry about starvation.
  */
 
-void drm_idlelock_take(struct drm_lock_data *lock_data)
+void drm_legacy_idlelock_take(struct drm_lock_data *lock_data)
 {
 	int ret;
 
@@ -340,9 +343,9 @@ void drm_idlelock_take(struct drm_lock_data *lock_data)
 	}
 	spin_unlock_bh(&lock_data->spinlock);
 }
-EXPORT_SYMBOL(drm_idlelock_take);
+EXPORT_SYMBOL(drm_legacy_idlelock_take);
 
-void drm_idlelock_release(struct drm_lock_data *lock_data)
+void drm_legacy_idlelock_release(struct drm_lock_data *lock_data)
 {
 	unsigned int old, prev;
 	volatile unsigned int *lock = &lock_data->hw_lock->lock;
@@ -360,9 +363,10 @@ void drm_idlelock_release(struct drm_lock_data *lock_data)
 	}
 	spin_unlock_bh(&lock_data->spinlock);
 }
-EXPORT_SYMBOL(drm_idlelock_release);
+EXPORT_SYMBOL(drm_legacy_idlelock_release);
 
-int drm_i_have_hw_lock(struct drm_device *dev, struct drm_file *file_priv)
+int drm_legacy_i_have_hw_lock(struct drm_device *dev,
+			      struct drm_file *file_priv)
 {
 	struct drm_master *master = file_priv->master;
 	return (file_priv->lock_count && master->lock.hw_lock &&
diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c
index 00c67c0f2381..a521ef6ff807 100644
--- a/drivers/gpu/drm/drm_memory.c
+++ b/drivers/gpu/drm/drm_memory.c
@@ -36,8 +36,20 @@
 #include <linux/highmem.h>
 #include <linux/export.h>
 #include <drm/drmP.h>
+#include "drm_legacy.h"
 
 #if __OS_HAS_AGP
+
+#ifdef HAVE_PAGE_AGP
+# include <asm/agp.h>
+#else
+# ifdef __powerpc__
+#  define PAGE_AGP	__pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE)
+# else
+#  define PAGE_AGP	PAGE_KERNEL
+# endif
+#endif
+
 static void *agp_remap(unsigned long offset, unsigned long size,
 		       struct drm_device * dev)
 {
@@ -108,25 +120,25 @@ static inline void *agp_remap(unsigned long offset, unsigned long size,
 
 #endif				/* agp */
 
-void drm_core_ioremap(struct drm_local_map *map, struct drm_device *dev)
+void drm_legacy_ioremap(struct drm_local_map *map, struct drm_device *dev)
 {
 	if (dev->agp && dev->agp->cant_use_aperture && map->type == _DRM_AGP)
 		map->handle = agp_remap(map->offset, map->size, dev);
 	else
 		map->handle = ioremap(map->offset, map->size);
 }
-EXPORT_SYMBOL(drm_core_ioremap);
+EXPORT_SYMBOL(drm_legacy_ioremap);
 
-void drm_core_ioremap_wc(struct drm_local_map *map, struct drm_device *dev)
+void drm_legacy_ioremap_wc(struct drm_local_map *map, struct drm_device *dev)
 {
 	if (dev->agp && dev->agp->cant_use_aperture && map->type == _DRM_AGP)
 		map->handle = agp_remap(map->offset, map->size, dev);
 	else
 		map->handle = ioremap_wc(map->offset, map->size);
 }
-EXPORT_SYMBOL(drm_core_ioremap_wc);
+EXPORT_SYMBOL(drm_legacy_ioremap_wc);
 
-void drm_core_ioremapfree(struct drm_local_map *map, struct drm_device *dev)
+void drm_legacy_ioremapfree(struct drm_local_map *map, struct drm_device *dev)
 {
 	if (!map->handle || !map->size)
 		return;
@@ -136,4 +148,4 @@ void drm_core_ioremapfree(struct drm_local_map *map, struct drm_device *dev)
 	else
 		iounmap(map->handle);
 }
-EXPORT_SYMBOL(drm_core_ioremapfree);
+EXPORT_SYMBOL(drm_legacy_ioremapfree);
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index bedf1894e17e..d1b7d2006529 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -1259,6 +1259,7 @@ drm_mode_create_from_cmdline_mode(struct drm_device *dev,
 	if (!mode)
 		return NULL;
 
+	mode->type |= DRM_MODE_TYPE_USERDEF;
 	drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
 	return mode;
 }
diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
index 0dc57d5ecd10..8749fc06570e 100644
--- a/drivers/gpu/drm/drm_modeset_lock.c
+++ b/drivers/gpu/drm/drm_modeset_lock.c
@@ -57,6 +57,212 @@
 
 
 /**
+ * __drm_modeset_lock_all - internal helper to grab all modeset locks
+ * @dev: DRM device
+ * @trylock: trylock mode for atomic contexts
+ *
+ * This is a special version of drm_modeset_lock_all() which can also be used in
+ * atomic contexts. Then @trylock must be set to true.
+ *
+ * Returns:
+ * 0 on success or negative error code on failure.
+ */
+int __drm_modeset_lock_all(struct drm_device *dev,
+			   bool trylock)
+{
+	struct drm_mode_config *config = &dev->mode_config;
+	struct drm_modeset_acquire_ctx *ctx;
+	int ret;
+
+	ctx = kzalloc(sizeof(*ctx),
+		      trylock ? GFP_ATOMIC : GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	if (trylock) {
+		if (!mutex_trylock(&config->mutex))
+			return -EBUSY;
+	} else {
+		mutex_lock(&config->mutex);
+	}
+
+	drm_modeset_acquire_init(ctx, 0);
+	ctx->trylock_only = trylock;
+
+retry:
+	ret = drm_modeset_lock(&config->connection_mutex, ctx);
+	if (ret)
+		goto fail;
+	ret = drm_modeset_lock_all_crtcs(dev, ctx);
+	if (ret)
+		goto fail;
+
+	WARN_ON(config->acquire_ctx);
+
+	/* now we hold the locks, so now that it is safe, stash the
+	 * ctx for drm_modeset_unlock_all():
+	 */
+	config->acquire_ctx = ctx;
+
+	drm_warn_on_modeset_not_all_locked(dev);
+
+	return 0;
+
+fail:
+	if (ret == -EDEADLK) {
+		drm_modeset_backoff(ctx);
+		goto retry;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(__drm_modeset_lock_all);
+
+/**
+ * drm_modeset_lock_all - take all modeset locks
+ * @dev: drm device
+ *
+ * This function takes all modeset locks, suitable where a more fine-grained
+ * scheme isn't (yet) implemented. Locks must be dropped with
+ * drm_modeset_unlock_all.
+ */
+void drm_modeset_lock_all(struct drm_device *dev)
+{
+	WARN_ON(__drm_modeset_lock_all(dev, false) != 0);
+}
+EXPORT_SYMBOL(drm_modeset_lock_all);
+
+/**
+ * drm_modeset_unlock_all - drop all modeset locks
+ * @dev: device
+ *
+ * This function drop all modeset locks taken by drm_modeset_lock_all.
+ */
+void drm_modeset_unlock_all(struct drm_device *dev)
+{
+	struct drm_mode_config *config = &dev->mode_config;
+	struct drm_modeset_acquire_ctx *ctx = config->acquire_ctx;
+
+	if (WARN_ON(!ctx))
+		return;
+
+	config->acquire_ctx = NULL;
+	drm_modeset_drop_locks(ctx);
+	drm_modeset_acquire_fini(ctx);
+
+	kfree(ctx);
+
+	mutex_unlock(&dev->mode_config.mutex);
+}
+EXPORT_SYMBOL(drm_modeset_unlock_all);
+
+/**
+ * drm_modeset_lock_crtc - lock crtc with hidden acquire ctx
+ * @crtc: drm crtc
+ *
+ * This function locks the given crtc using a hidden acquire context. This is
+ * necessary so that drivers internally using the atomic interfaces can grab
+ * further locks with the lock acquire context.
+ */
+void drm_modeset_lock_crtc(struct drm_crtc *crtc)
+{
+	struct drm_modeset_acquire_ctx *ctx;
+	int ret;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (WARN_ON(!ctx))
+		return;
+
+	drm_modeset_acquire_init(ctx, 0);
+
+retry:
+	ret = drm_modeset_lock(&crtc->mutex, ctx);
+	if (ret)
+		goto fail;
+
+	WARN_ON(crtc->acquire_ctx);
+
+	/* now we hold the locks, so now that it is safe, stash the
+	 * ctx for drm_modeset_unlock_crtc():
+	 */
+	crtc->acquire_ctx = ctx;
+
+	return;
+
+fail:
+	if (ret == -EDEADLK) {
+		drm_modeset_backoff(ctx);
+		goto retry;
+	}
+}
+EXPORT_SYMBOL(drm_modeset_lock_crtc);
+
+/**
+ * drm_modeset_legacy_acquire_ctx - find acquire ctx for legacy ioctls
+ * @crtc: drm crtc
+ *
+ * Legacy ioctl operations like cursor updates or page flips only have per-crtc
+ * locking, and store the acquire ctx in the corresponding crtc. All other
+ * legacy operations take all locks and use a global acquire context. This
+ * function grabs the right one.
+ */
+struct drm_modeset_acquire_ctx *
+drm_modeset_legacy_acquire_ctx(struct drm_crtc *crtc)
+{
+	if (crtc->acquire_ctx)
+		return crtc->acquire_ctx;
+
+	WARN_ON(!crtc->dev->mode_config.acquire_ctx);
+
+	return crtc->dev->mode_config.acquire_ctx;
+}
+EXPORT_SYMBOL(drm_modeset_legacy_acquire_ctx);
+
+/**
+ * drm_modeset_unlock_crtc - drop crtc lock
+ * @crtc: drm crtc
+ *
+ * This drops the crtc lock acquire with drm_modeset_lock_crtc() and all other
+ * locks acquired through the hidden context.
+ */
+void drm_modeset_unlock_crtc(struct drm_crtc *crtc)
+{
+	struct drm_modeset_acquire_ctx *ctx = crtc->acquire_ctx;
+
+	if (WARN_ON(!ctx))
+		return;
+
+	crtc->acquire_ctx = NULL;
+	drm_modeset_drop_locks(ctx);
+	drm_modeset_acquire_fini(ctx);
+
+	kfree(ctx);
+}
+EXPORT_SYMBOL(drm_modeset_unlock_crtc);
+
+/**
+ * drm_warn_on_modeset_not_all_locked - check that all modeset locks are locked
+ * @dev: device
+ *
+ * Useful as a debug assert.
+ */
+void drm_warn_on_modeset_not_all_locked(struct drm_device *dev)
+{
+	struct drm_crtc *crtc;
+
+	/* Locking is currently fubar in the panic handler. */
+	if (oops_in_progress)
+		return;
+
+	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
+		WARN_ON(!drm_modeset_is_locked(&crtc->mutex));
+
+	WARN_ON(!drm_modeset_is_locked(&dev->mode_config.connection_mutex));
+	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
+}
+EXPORT_SYMBOL(drm_warn_on_modeset_not_all_locked);
+
+/**
  * drm_modeset_acquire_init - initialize acquire context
  * @ctx: the acquire context
  * @flags: for future
@@ -108,7 +314,12 @@ static inline int modeset_lock(struct drm_modeset_lock *lock,
 
 	WARN_ON(ctx->contended);
 
-	if (interruptible && slow) {
+	if (ctx->trylock_only) {
+		if (!ww_mutex_trylock(&lock->mutex))
+			return -EBUSY;
+		else
+			return 0;
+	} else if (interruptible && slow) {
 		ret = ww_mutex_lock_slow_interruptible(&lock->mutex, &ctx->ww_ctx);
 	} else if (interruptible) {
 		ret = ww_mutex_lock_interruptible(&lock->mutex, &ctx->ww_ctx);
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 020cfd934854..fd29f03645b8 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -27,6 +27,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/export.h>
 #include <drm/drmP.h>
+#include "drm_legacy.h"
 
 /**
  * drm_pci_alloc - Allocate a PCI consistent memory block, for DMA.
@@ -81,7 +82,7 @@ EXPORT_SYMBOL(drm_pci_alloc);
  *
  * This function is for internal use in the Linux-specific DRM core code.
  */
-void __drm_pci_free(struct drm_device * dev, drm_dma_handle_t * dmah)
+void __drm_legacy_pci_free(struct drm_device * dev, drm_dma_handle_t * dmah)
 {
 	unsigned long addr;
 	size_t sz;
@@ -105,7 +106,7 @@ void __drm_pci_free(struct drm_device * dev, drm_dma_handle_t * dmah)
  */
 void drm_pci_free(struct drm_device * dev, drm_dma_handle_t * dmah)
 {
-	__drm_pci_free(dev, dmah);
+	__drm_legacy_pci_free(dev, dmah);
 	kfree(dmah);
 }
 
@@ -127,34 +128,20 @@ static int drm_get_pci_domain(struct drm_device *dev)
 	return pci_domain_nr(dev->pdev->bus);
 }
 
-static int drm_pci_set_busid(struct drm_device *dev, struct drm_master *master)
+int drm_pci_set_busid(struct drm_device *dev, struct drm_master *master)
 {
-	int len, ret;
-	master->unique_len = 40;
-	master->unique_size = master->unique_len;
-	master->unique = kmalloc(master->unique_size, GFP_KERNEL);
-	if (master->unique == NULL)
+	master->unique = kasprintf(GFP_KERNEL, "pci:%04x:%02x:%02x.%d",
+					drm_get_pci_domain(dev),
+					dev->pdev->bus->number,
+					PCI_SLOT(dev->pdev->devfn),
+					PCI_FUNC(dev->pdev->devfn));
+	if (!master->unique)
 		return -ENOMEM;
 
-
-	len = snprintf(master->unique, master->unique_len,
-		       "pci:%04x:%02x:%02x.%d",
-		       drm_get_pci_domain(dev),
-		       dev->pdev->bus->number,
-		       PCI_SLOT(dev->pdev->devfn),
-		       PCI_FUNC(dev->pdev->devfn));
-
-	if (len >= master->unique_len) {
-		DRM_ERROR("buffer overflow");
-		ret = -EINVAL;
-		goto err;
-	} else
-		master->unique_len = len;
-
+	master->unique_len = strlen(master->unique);
 	return 0;
-err:
-	return ret;
 }
+EXPORT_SYMBOL(drm_pci_set_busid);
 
 int drm_pci_set_unique(struct drm_device *dev,
 		       struct drm_master *master,
@@ -163,8 +150,7 @@ int drm_pci_set_unique(struct drm_device *dev,
 	int domain, bus, slot, func, ret;
 
 	master->unique_len = u->unique_len;
-	master->unique_size = u->unique_len + 1;
-	master->unique = kmalloc(master->unique_size, GFP_KERNEL);
+	master->unique = kmalloc(master->unique_len + 1, GFP_KERNEL);
 	if (!master->unique) {
 		ret = -ENOMEM;
 		goto err;
@@ -269,10 +255,6 @@ void drm_pci_agp_destroy(struct drm_device *dev)
 	}
 }
 
-static struct drm_bus drm_pci_bus = {
-	.set_busid = drm_pci_set_busid,
-};
-
 /**
  * drm_get_pci_dev - Register a PCI device with the DRM subsystem
  * @pdev: PCI device
@@ -353,8 +335,6 @@ int drm_pci_init(struct drm_driver *driver, struct pci_driver *pdriver)
 
 	DRM_DEBUG("\n");
 
-	driver->bus = &drm_pci_bus;
-
 	if (driver->driver_features & DRIVER_MODESET)
 		return pci_register_driver(pdriver);
 
diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c
index d5b76f148c12..5314c9d5fef4 100644
--- a/drivers/gpu/drm/drm_platform.c
+++ b/drivers/gpu/drm/drm_platform.c
@@ -68,42 +68,23 @@ err_free:
 	return ret;
 }
 
-static int drm_platform_set_busid(struct drm_device *dev, struct drm_master *master)
+int drm_platform_set_busid(struct drm_device *dev, struct drm_master *master)
 {
-	int len, ret, id;
-
-	master->unique_len = 13 + strlen(dev->platformdev->name);
-	master->unique_size = master->unique_len;
-	master->unique = kmalloc(master->unique_len + 1, GFP_KERNEL);
-
-	if (master->unique == NULL)
-		return -ENOMEM;
+	int id;
 
 	id = dev->platformdev->id;
-
-	/* if only a single instance of the platform device, id will be
-	 * set to -1.. use 0 instead to avoid a funny looking bus-id:
-	 */
-	if (id == -1)
+	if (id < 0)
 		id = 0;
 
-	len = snprintf(master->unique, master->unique_len,
-			"platform:%s:%02d", dev->platformdev->name, id);
-
-	if (len > master->unique_len) {
-		DRM_ERROR("Unique buffer overflowed\n");
-		ret = -EINVAL;
-		goto err;
-	}
+	master->unique = kasprintf(GFP_KERNEL, "platform:%s:%02d",
+						dev->platformdev->name, id);
+	if (!master->unique)
+		return -ENOMEM;
 
+	master->unique_len = strlen(master->unique);
 	return 0;
-err:
-	return ret;
 }
-
-static struct drm_bus drm_platform_bus = {
-	.set_busid = drm_platform_set_busid,
-};
+EXPORT_SYMBOL(drm_platform_set_busid);
 
 /**
  * drm_platform_init - Register a platform device with the DRM subsystem
@@ -120,7 +101,6 @@ int drm_platform_init(struct drm_driver *driver, struct platform_device *platfor
 {
 	DRM_DEBUG("\n");
 
-	driver->bus = &drm_platform_bus;
 	return drm_get_platform_dev(platform_device, driver);
 }
 EXPORT_SYMBOL(drm_platform_init);
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 99d578bad17e..2807a771f505 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -29,6 +29,7 @@
 #include <linux/export.h>
 #include <linux/dma-buf.h>
 #include <drm/drmP.h>
+#include "drm_internal.h"
 
 /*
  * DMA-BUF/GEM Object references and lifetime overview:
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index db7d250f7ac7..6857e9ad6339 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -82,6 +82,22 @@ static void drm_mode_validate_flag(struct drm_connector *connector,
 	return;
 }
 
+static int drm_helper_probe_add_cmdline_mode(struct drm_connector *connector)
+{
+	struct drm_display_mode *mode;
+
+	if (!connector->cmdline_mode.specified)
+		return 0;
+
+	mode = drm_mode_create_from_cmdline_mode(connector->dev,
+						 &connector->cmdline_mode);
+	if (mode == NULL)
+		return 0;
+
+	drm_mode_probed_add(connector, mode);
+	return 1;
+}
+
 static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connector *connector,
 							      uint32_t maxX, uint32_t maxY, bool merge_type_bits)
 {
@@ -141,6 +157,7 @@ static int drm_helper_probe_single_connector_modes_merge_bits(struct drm_connect
 
 	if (count == 0 && connector->status == connector_status_connected)
 		count = drm_add_modes_noedid(connector, 1024, 768);
+	count += drm_helper_probe_add_cmdline_mode(connector);
 	if (count == 0)
 		goto prune;
 
diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c
index 1c78406f6e71..4f0f3b36d537 100644
--- a/drivers/gpu/drm/drm_scatter.c
+++ b/drivers/gpu/drm/drm_scatter.c
@@ -34,6 +34,7 @@
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <drm/drmP.h>
+#include "drm_legacy.h"
 
 #define DEBUG_SCATTER 0
 
@@ -78,8 +79,8 @@ void drm_legacy_sg_cleanup(struct drm_device *dev)
 # define ScatterHandle(x) (unsigned int)(x)
 #endif
 
-int drm_sg_alloc(struct drm_device *dev, void *data,
-		 struct drm_file *file_priv)
+int drm_legacy_sg_alloc(struct drm_device *dev, void *data,
+			struct drm_file *file_priv)
 {
 	struct drm_scatter_gather *request = data;
 	struct drm_sg_mem *entry;
@@ -194,8 +195,8 @@ int drm_sg_alloc(struct drm_device *dev, void *data,
 	return -ENOMEM;
 }
 
-int drm_sg_free(struct drm_device *dev, void *data,
-		struct drm_file *file_priv)
+int drm_legacy_sg_free(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv)
 {
 	struct drm_scatter_gather *request = data;
 	struct drm_sg_mem *entry;
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index ab1a5f6dde8a..cc3d6d6d67e0 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -21,6 +21,7 @@
 #include <drm/drm_sysfs.h>
 #include <drm/drm_core.h>
 #include <drm/drmP.h>
+#include "drm_internal.h"
 
 #define to_drm_minor(d) dev_get_drvdata(d)
 #define to_drm_connector(d) dev_get_drvdata(d)
diff --git a/drivers/gpu/drm/drm_usb.c b/drivers/gpu/drm/drm_usb.c
deleted file mode 100644
index f2fe94aab901..000000000000
--- a/drivers/gpu/drm/drm_usb.c
+++ /dev/null
@@ -1,88 +0,0 @@
-#include <drm/drmP.h>
-#include <drm/drm_usb.h>
-#include <linux/usb.h>
-#include <linux/module.h>
-
-int drm_get_usb_dev(struct usb_interface *interface,
-		    const struct usb_device_id *id,
-		    struct drm_driver *driver)
-{
-	struct drm_device *dev;
-	int ret;
-
-	DRM_DEBUG("\n");
-
-	dev = drm_dev_alloc(driver, &interface->dev);
-	if (!dev)
-		return -ENOMEM;
-
-	dev->usbdev = interface_to_usbdev(interface);
-	usb_set_intfdata(interface, dev);
-
-	ret = drm_dev_register(dev, 0);
-	if (ret)
-		goto err_free;
-
-	DRM_INFO("Initialized %s %d.%d.%d %s on minor %d\n",
-		 driver->name, driver->major, driver->minor, driver->patchlevel,
-		 driver->date, dev->primary->index);
-
-	return 0;
-
-err_free:
-	drm_dev_unref(dev);
-	return ret;
-
-}
-EXPORT_SYMBOL(drm_get_usb_dev);
-
-static int drm_usb_set_busid(struct drm_device *dev,
-			       struct drm_master *master)
-{
-	return 0;
-}
-
-static struct drm_bus drm_usb_bus = {
-	.set_busid = drm_usb_set_busid,
-};
-
-/**
- * drm_usb_init - Register matching USB devices with the DRM subsystem
- * @driver: DRM device driver
- * @udriver: USB device driver
- *
- * Registers one or more devices matched by a USB driver with the DRM
- * subsystem.
- *
- * Return: 0 on success or a negative error code on failure.
- */
-int drm_usb_init(struct drm_driver *driver, struct usb_driver *udriver)
-{
-	int res;
-	DRM_DEBUG("\n");
-
-	driver->bus = &drm_usb_bus;
-
-	res = usb_register(udriver);
-	return res;
-}
-EXPORT_SYMBOL(drm_usb_init);
-
-/**
- * drm_usb_exit - Unregister matching USB devices from the DRM subsystem
- * @driver: DRM device driver
- * @udriver: USB device driver
- *
- * Unregisters one or more devices matched by a USB driver from the DRM
- * subsystem.
- */
-void drm_usb_exit(struct drm_driver *driver,
-		  struct usb_driver *udriver)
-{
-	usb_deregister(udriver);
-}
-EXPORT_SYMBOL(drm_usb_exit);
-
-MODULE_AUTHOR("David Airlie");
-MODULE_DESCRIPTION("USB DRM support");
-MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index 24e045c4f531..06cad0323699 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -35,10 +35,19 @@
 
 #include <drm/drmP.h>
 #include <linux/export.h>
+#include <linux/seq_file.h>
 #if defined(__ia64__)
 #include <linux/efi.h>
 #include <linux/slab.h>
 #endif
+#include <asm/pgtable.h>
+#include "drm_legacy.h"
+
+struct drm_vma_entry {
+	struct list_head head;
+	struct vm_area_struct *vma;
+	pid_t pid;
+};
 
 static void drm_vm_open(struct vm_area_struct *vma);
 static void drm_vm_close(struct vm_area_struct *vma);
@@ -263,7 +272,7 @@ static void drm_vm_shm_close(struct vm_area_struct *vma)
 				dmah.vaddr = map->handle;
 				dmah.busaddr = map->offset;
 				dmah.size = map->size;
-				__drm_pci_free(dev, &dmah);
+				__drm_legacy_pci_free(dev, &dmah);
 				break;
 			}
 			kfree(map);
@@ -662,3 +671,68 @@ int drm_mmap(struct file *filp, struct vm_area_struct *vma)
 	return ret;
 }
 EXPORT_SYMBOL(drm_mmap);
+
+void drm_legacy_vma_flush(struct drm_device *dev)
+{
+	struct drm_vma_entry *vma, *vma_temp;
+
+	/* Clear vma list (only needed for legacy drivers) */
+	list_for_each_entry_safe(vma, vma_temp, &dev->vmalist, head) {
+		list_del(&vma->head);
+		kfree(vma);
+	}
+}
+
+int drm_vma_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_vma_entry *pt;
+	struct vm_area_struct *vma;
+	unsigned long vma_count = 0;
+#if defined(__i386__)
+	unsigned int pgprot;
+#endif
+
+	mutex_lock(&dev->struct_mutex);
+	list_for_each_entry(pt, &dev->vmalist, head)
+		vma_count++;
+
+	seq_printf(m, "vma use count: %lu, high_memory = %pK, 0x%pK\n",
+		   vma_count, high_memory,
+		   (void *)(unsigned long)virt_to_phys(high_memory));
+
+	list_for_each_entry(pt, &dev->vmalist, head) {
+		vma = pt->vma;
+		if (!vma)
+			continue;
+		seq_printf(m,
+			   "\n%5d 0x%pK-0x%pK %c%c%c%c%c%c 0x%08lx000",
+			   pt->pid,
+			   (void *)vma->vm_start, (void *)vma->vm_end,
+			   vma->vm_flags & VM_READ ? 'r' : '-',
+			   vma->vm_flags & VM_WRITE ? 'w' : '-',
+			   vma->vm_flags & VM_EXEC ? 'x' : '-',
+			   vma->vm_flags & VM_MAYSHARE ? 's' : 'p',
+			   vma->vm_flags & VM_LOCKED ? 'l' : '-',
+			   vma->vm_flags & VM_IO ? 'i' : '-',
+			   vma->vm_pgoff);
+
+#if defined(__i386__)
+		pgprot = pgprot_val(vma->vm_page_prot);
+		seq_printf(m, " %c%c%c%c%c%c%c%c%c",
+			   pgprot & _PAGE_PRESENT ? 'p' : '-',
+			   pgprot & _PAGE_RW ? 'w' : 'r',
+			   pgprot & _PAGE_USER ? 'u' : 's',
+			   pgprot & _PAGE_PWT ? 't' : 'b',
+			   pgprot & _PAGE_PCD ? 'u' : 'c',
+			   pgprot & _PAGE_ACCESSED ? 'a' : '-',
+			   pgprot & _PAGE_DIRTY ? 'd' : '-',
+			   pgprot & _PAGE_PSE ? 'm' : 'k',
+			   pgprot & _PAGE_GLOBAL ? 'g' : 'l');
+#endif
+		seq_printf(m, "\n");
+	}
+	mutex_unlock(&dev->struct_mutex);
+	return 0;
+}
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 0d74e9b99c4e..5aae95cf5b23 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -330,6 +330,7 @@ static struct drm_driver exynos_drm_driver = {
 	.preclose		= exynos_drm_preclose,
 	.lastclose		= exynos_drm_lastclose,
 	.postclose		= exynos_drm_postclose,
+	.set_busid		= drm_platform_set_busid,
 	.get_vblank_counter	= drm_vblank_count,
 	.enable_vblank		= exynos_drm_crtc_enable_vblank,
 	.disable_vblank		= exynos_drm_crtc_disable_vblank,
diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index eec993f93b1a..6ec3a905fdd2 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -476,6 +476,7 @@ static struct drm_driver driver = {
 	.unload = psb_driver_unload,
 	.lastclose = psb_driver_lastclose,
 	.preclose = psb_driver_preclose,
+	.set_busid = drm_pci_set_busid,
 
 	.num_ioctls = ARRAY_SIZE(psb_ioctls),
 	.device_is_agp = psb_driver_device_is_agp,
diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c
index bae897de9468..d91856779beb 100644
--- a/drivers/gpu/drm/i810/i810_dma.c
+++ b/drivers/gpu/drm/i810/i810_dma.c
@@ -213,7 +213,7 @@ static int i810_dma_cleanup(struct drm_device *dev)
 		    (drm_i810_private_t *) dev->dev_private;
 
 		if (dev_priv->ring.virtual_start)
-			drm_core_ioremapfree(&dev_priv->ring.map, dev);
+			drm_legacy_ioremapfree(&dev_priv->ring.map, dev);
 		if (dev_priv->hw_status_page) {
 			pci_free_consistent(dev->pdev, PAGE_SIZE,
 					    dev_priv->hw_status_page,
@@ -227,7 +227,7 @@ static int i810_dma_cleanup(struct drm_device *dev)
 			drm_i810_buf_priv_t *buf_priv = buf->dev_private;
 
 			if (buf_priv->kernel_virtual && buf->total)
-				drm_core_ioremapfree(&buf_priv->map, dev);
+				drm_legacy_ioremapfree(&buf_priv->map, dev);
 		}
 	}
 	return 0;
@@ -306,7 +306,7 @@ static int i810_freelist_init(struct drm_device *dev, drm_i810_private_t *dev_pr
 		buf_priv->map.flags = 0;
 		buf_priv->map.mtrr = 0;
 
-		drm_core_ioremap(&buf_priv->map, dev);
+		drm_legacy_ioremap(&buf_priv->map, dev);
 		buf_priv->kernel_virtual = buf_priv->map.handle;
 
 	}
@@ -334,7 +334,7 @@ static int i810_dma_initialize(struct drm_device *dev,
 		DRM_ERROR("can not find sarea!\n");
 		return -EINVAL;
 	}
-	dev_priv->mmio_map = drm_core_findmap(dev, init->mmio_offset);
+	dev_priv->mmio_map = drm_legacy_findmap(dev, init->mmio_offset);
 	if (!dev_priv->mmio_map) {
 		dev->dev_private = (void *)dev_priv;
 		i810_dma_cleanup(dev);
@@ -342,7 +342,7 @@ static int i810_dma_initialize(struct drm_device *dev,
 		return -EINVAL;
 	}
 	dev->agp_buffer_token = init->buffers_offset;
-	dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset);
+	dev->agp_buffer_map = drm_legacy_findmap(dev, init->buffers_offset);
 	if (!dev->agp_buffer_map) {
 		dev->dev_private = (void *)dev_priv;
 		i810_dma_cleanup(dev);
@@ -363,7 +363,7 @@ static int i810_dma_initialize(struct drm_device *dev,
 	dev_priv->ring.map.flags = 0;
 	dev_priv->ring.map.mtrr = 0;
 
-	drm_core_ioremap(&dev_priv->ring.map, dev);
+	drm_legacy_ioremap(&dev_priv->ring.map, dev);
 
 	if (dev_priv->ring.map.handle == NULL) {
 		dev->dev_private = (void *)dev_priv;
@@ -1215,9 +1215,9 @@ void i810_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
 	}
 
 	if (file_priv->master && file_priv->master->lock.hw_lock) {
-		drm_idlelock_take(&file_priv->master->lock);
+		drm_legacy_idlelock_take(&file_priv->master->lock);
 		i810_driver_reclaim_buffers(dev, file_priv);
-		drm_idlelock_release(&file_priv->master->lock);
+		drm_legacy_idlelock_release(&file_priv->master->lock);
 	} else {
 		/* master disappeared, clean up stuff anyway and hope nothing
 		 * goes wrong */
diff --git a/drivers/gpu/drm/i810/i810_drv.c b/drivers/gpu/drm/i810/i810_drv.c
index 441ccf8f5bdc..6cb08a1c6b62 100644
--- a/drivers/gpu/drm/i810/i810_drv.c
+++ b/drivers/gpu/drm/i810/i810_drv.c
@@ -63,6 +63,7 @@ static struct drm_driver driver = {
 	.load = i810_driver_load,
 	.lastclose = i810_driver_lastclose,
 	.preclose = i810_driver_preclose,
+	.set_busid = drm_pci_set_busid,
 	.device_is_agp = i810_driver_device_is_agp,
 	.dma_quiescent = i810_driver_dma_quiescent,
 	.ioctls = i810_ioctls,
diff --git a/drivers/gpu/drm/i810/i810_drv.h b/drivers/gpu/drm/i810/i810_drv.h
index d4d16eddd651..93ec5dc4e7d3 100644
--- a/drivers/gpu/drm/i810/i810_drv.h
+++ b/drivers/gpu/drm/i810/i810_drv.h
@@ -32,6 +32,8 @@
 #ifndef _I810_DRV_H_
 #define _I810_DRV_H_
 
+#include <drm/drm_legacy.h>
+
 /* General customization:
  */
 
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 91bd167e1cb7..c1dd485aeb6c 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -31,6 +31,7 @@ i915-y += i915_cmd_parser.o \
 	  i915_gpu_error.o \
 	  i915_irq.o \
 	  i915_trace_points.o \
+	  intel_lrc.o \
 	  intel_ringbuffer.o \
 	  intel_uncore.o
 
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index dea99d92fb4a..c45856bcc8b9 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -842,8 +842,6 @@ finish:
  */
 bool i915_needs_cmd_parser(struct intel_engine_cs *ring)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-
 	if (!ring->needs_cmd_parser)
 		return false;
 
@@ -852,7 +850,7 @@ bool i915_needs_cmd_parser(struct intel_engine_cs *ring)
 	 * disabled. That will cause all of the parser's PPGTT checks to
 	 * fail. For now, disable parsing when PPGTT is off.
 	 */
-	if (!dev_priv->mm.aliasing_ppgtt)
+	if (USES_PPGTT(ring->dev))
 		return false;
 
 	return (i915.enable_cmd_parser == 1);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 9e737b771c40..6c82bdaa0822 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -333,7 +333,7 @@ static int per_file_stats(int id, void *ptr, void *data)
 			}
 
 			ppgtt = container_of(vma->vm, struct i915_hw_ppgtt, base);
-			if (ppgtt->ctx && ppgtt->ctx->file_priv != stats->file_priv)
+			if (ppgtt->file_priv != stats->file_priv)
 				continue;
 
 			if (obj->ring) /* XXX per-vma statistic */
@@ -703,6 +703,12 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 		}
 
 		for_each_pipe(pipe) {
+			if (!intel_display_power_enabled(dev_priv,
+						POWER_DOMAIN_PIPE(pipe))) {
+				seq_printf(m, "Pipe %c power disabled\n",
+					   pipe_name(pipe));
+				continue;
+			}
 			seq_printf(m, "Pipe %c IMR:\t%08x\n",
 				   pipe_name(pipe),
 				   I915_READ(GEN8_DE_PIPE_IMR(pipe)));
@@ -1433,6 +1439,47 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
 	return 0;
 }
 
+static int i915_fbc_fc_get(void *data, u64 *val)
+{
+	struct drm_device *dev = data;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (INTEL_INFO(dev)->gen < 7 || !HAS_FBC(dev))
+		return -ENODEV;
+
+	drm_modeset_lock_all(dev);
+	*val = dev_priv->fbc.false_color;
+	drm_modeset_unlock_all(dev);
+
+	return 0;
+}
+
+static int i915_fbc_fc_set(void *data, u64 val)
+{
+	struct drm_device *dev = data;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 reg;
+
+	if (INTEL_INFO(dev)->gen < 7 || !HAS_FBC(dev))
+		return -ENODEV;
+
+	drm_modeset_lock_all(dev);
+
+	reg = I915_READ(ILK_DPFC_CONTROL);
+	dev_priv->fbc.false_color = val;
+
+	I915_WRITE(ILK_DPFC_CONTROL, val ?
+		   (reg | FBC_CTL_FALSE_COLOR) :
+		   (reg & ~FBC_CTL_FALSE_COLOR));
+
+	drm_modeset_unlock_all(dev);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(i915_fbc_fc_fops,
+			i915_fbc_fc_get, i915_fbc_fc_set,
+			"%llu\n");
+
 static int i915_ips_status(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = m->private;
@@ -1630,6 +1677,14 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+static void describe_ctx_ringbuf(struct seq_file *m,
+				 struct intel_ringbuffer *ringbuf)
+{
+	seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)",
+		   ringbuf->space, ringbuf->head, ringbuf->tail,
+		   ringbuf->last_retired_head);
+}
+
 static int i915_context_status(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = m->private;
@@ -1656,16 +1711,168 @@ static int i915_context_status(struct seq_file *m, void *unused)
 	}
 
 	list_for_each_entry(ctx, &dev_priv->context_list, link) {
-		if (ctx->legacy_hw_ctx.rcs_state == NULL)
+		if (!i915.enable_execlists &&
+		    ctx->legacy_hw_ctx.rcs_state == NULL)
 			continue;
 
 		seq_puts(m, "HW context ");
 		describe_ctx(m, ctx);
-		for_each_ring(ring, dev_priv, i)
+		for_each_ring(ring, dev_priv, i) {
 			if (ring->default_context == ctx)
-				seq_printf(m, "(default context %s) ", ring->name);
+				seq_printf(m, "(default context %s) ",
+					   ring->name);
+		}
+
+		if (i915.enable_execlists) {
+			seq_putc(m, '\n');
+			for_each_ring(ring, dev_priv, i) {
+				struct drm_i915_gem_object *ctx_obj =
+					ctx->engine[i].state;
+				struct intel_ringbuffer *ringbuf =
+					ctx->engine[i].ringbuf;
+
+				seq_printf(m, "%s: ", ring->name);
+				if (ctx_obj)
+					describe_obj(m, ctx_obj);
+				if (ringbuf)
+					describe_ctx_ringbuf(m, ringbuf);
+				seq_putc(m, '\n');
+			}
+		} else {
+			describe_obj(m, ctx->legacy_hw_ctx.rcs_state);
+		}
+
+		seq_putc(m, '\n');
+	}
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
+static int i915_dump_lrc(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_engine_cs *ring;
+	struct intel_context *ctx;
+	int ret, i;
+
+	if (!i915.enable_execlists) {
+		seq_printf(m, "Logical Ring Contexts are disabled\n");
+		return 0;
+	}
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	list_for_each_entry(ctx, &dev_priv->context_list, link) {
+		for_each_ring(ring, dev_priv, i) {
+			struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
+
+			if (ring->default_context == ctx)
+				continue;
+
+			if (ctx_obj) {
+				struct page *page = i915_gem_object_get_page(ctx_obj, 1);
+				uint32_t *reg_state = kmap_atomic(page);
+				int j;
+
+				seq_printf(m, "CONTEXT: %s %u\n", ring->name,
+						intel_execlists_ctx_id(ctx_obj));
+
+				for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) {
+					seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n",
+					i915_gem_obj_ggtt_offset(ctx_obj) + 4096 + (j * 4),
+					reg_state[j], reg_state[j + 1],
+					reg_state[j + 2], reg_state[j + 3]);
+				}
+				kunmap_atomic(reg_state);
+
+				seq_putc(m, '\n');
+			}
+		}
+	}
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
+static int i915_execlists(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_engine_cs *ring;
+	u32 status_pointer;
+	u8 read_pointer;
+	u8 write_pointer;
+	u32 status;
+	u32 ctx_id;
+	struct list_head *cursor;
+	int ring_id, i;
+	int ret;
+
+	if (!i915.enable_execlists) {
+		seq_puts(m, "Logical Ring Contexts are disabled\n");
+		return 0;
+	}
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	for_each_ring(ring, dev_priv, ring_id) {
+		struct intel_ctx_submit_request *head_req = NULL;
+		int count = 0;
+		unsigned long flags;
+
+		seq_printf(m, "%s\n", ring->name);
+
+		status = I915_READ(RING_EXECLIST_STATUS(ring));
+		ctx_id = I915_READ(RING_EXECLIST_STATUS(ring) + 4);
+		seq_printf(m, "\tExeclist status: 0x%08X, context: %u\n",
+			   status, ctx_id);
+
+		status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
+		seq_printf(m, "\tStatus pointer: 0x%08X\n", status_pointer);
+
+		read_pointer = ring->next_context_status_buffer;
+		write_pointer = status_pointer & 0x07;
+		if (read_pointer > write_pointer)
+			write_pointer += 6;
+		seq_printf(m, "\tRead pointer: 0x%08X, write pointer 0x%08X\n",
+			   read_pointer, write_pointer);
+
+		for (i = 0; i < 6; i++) {
+			status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + 8*i);
+			ctx_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) + 8*i + 4);
+
+			seq_printf(m, "\tStatus buffer %d: 0x%08X, context: %u\n",
+				   i, status, ctx_id);
+		}
+
+		spin_lock_irqsave(&ring->execlist_lock, flags);
+		list_for_each(cursor, &ring->execlist_queue)
+			count++;
+		head_req = list_first_entry_or_null(&ring->execlist_queue,
+				struct intel_ctx_submit_request, execlist_link);
+		spin_unlock_irqrestore(&ring->execlist_lock, flags);
+
+		seq_printf(m, "\t%d requests in queue\n", count);
+		if (head_req) {
+			struct drm_i915_gem_object *ctx_obj;
+
+			ctx_obj = head_req->ctx->engine[ring_id].state;
+			seq_printf(m, "\tHead request id: %u\n",
+				   intel_execlists_ctx_id(ctx_obj));
+			seq_printf(m, "\tHead request tail: %u\n",
+				   head_req->tail);
+		}
 
-		describe_obj(m, ctx->legacy_hw_ctx.rcs_state);
 		seq_putc(m, '\n');
 	}
 
@@ -1774,7 +1981,13 @@ static int per_file_ctx(int id, void *ptr, void *data)
 {
 	struct intel_context *ctx = ptr;
 	struct seq_file *m = data;
-	struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(ctx);
+	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
+
+	if (!ppgtt) {
+		seq_printf(m, "  no ppgtt for context %d\n",
+			   ctx->user_handle);
+		return 0;
+	}
 
 	if (i915_gem_context_is_default(ctx))
 		seq_puts(m, "  default context:\n");
@@ -1834,8 +2047,7 @@ static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev)
 		seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd_offset);
 
 		ppgtt->debug_dump(ppgtt, m);
-	} else
-		return;
+	}
 
 	list_for_each_entry_reverse(file, &dev->filelist, lhead) {
 		struct drm_i915_file_private *file_priv = file->driver_priv;
@@ -2667,8 +2879,7 @@ static int i9xx_pipe_crc_auto_source(struct drm_device *dev, enum pipe pipe,
 	*source = INTEL_PIPE_CRC_SOURCE_PIPE;
 
 	drm_modeset_lock_all(dev);
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
-			    base.head) {
+	for_each_intel_encoder(dev, encoder) {
 		if (!encoder->base.crtc)
 			continue;
 
@@ -3923,6 +4134,8 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_opregion", i915_opregion, 0},
 	{"i915_gem_framebuffer", i915_gem_framebuffer_info, 0},
 	{"i915_context_status", i915_context_status, 0},
+	{"i915_dump_lrc", i915_dump_lrc, 0},
+	{"i915_execlists", i915_execlists, 0},
 	{"i915_gen6_forcewake_count", i915_gen6_forcewake_count_info, 0},
 	{"i915_swizzle_info", i915_swizzle_info, 0},
 	{"i915_ppgtt_info", i915_ppgtt_info, 0},
@@ -3957,6 +4170,7 @@ static const struct i915_debugfs_files {
 	{"i915_pri_wm_latency", &i915_pri_wm_latency_fops},
 	{"i915_spr_wm_latency", &i915_spr_wm_latency_fops},
 	{"i915_cur_wm_latency", &i915_cur_wm_latency_fops},
+	{"i915_fbc_false_color", &i915_fbc_fc_fops},
 };
 
 void intel_display_crc_init(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 9933c26017ed..272d3d16147c 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -31,6 +31,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_legacy.h>
 #include "intel_drv.h"
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
@@ -196,7 +197,7 @@ static int i915_initialize(struct drm_device *dev, drm_i915_init_t *init)
 	struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;
 	int ret;
 
-	master_priv->sarea = drm_getsarea(dev);
+	master_priv->sarea = drm_legacy_getsarea(dev);
 	if (master_priv->sarea) {
 		master_priv->sarea_priv = (drm_i915_sarea_t *)
 			((u8 *)master_priv->sarea->handle + init->sarea_priv_offset);
@@ -999,7 +1000,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 		value = HAS_WT(dev);
 		break;
 	case I915_PARAM_HAS_ALIASING_PPGTT:
-		value = dev_priv->mm.aliasing_ppgtt || USES_FULL_PPGTT(dev);
+		value = USES_PPGTT(dev);
 		break;
 	case I915_PARAM_HAS_WAIT_TIMEOUT:
 		value = 1;
@@ -1355,8 +1356,6 @@ static int i915_load_modeset_init(struct drm_device *dev)
 	if (ret)
 		goto cleanup_irq;
 
-	INIT_WORK(&dev_priv->console_resume_work, intel_console_resume);
-
 	intel_modeset_gem_init(dev);
 
 	/* Always safe in the mode setting case. */
@@ -1393,7 +1392,6 @@ cleanup_gem:
 	i915_gem_cleanup_ringbuffer(dev);
 	i915_gem_context_fini(dev);
 	mutex_unlock(&dev->struct_mutex);
-	WARN_ON(dev_priv->mm.aliasing_ppgtt);
 cleanup_irq:
 	drm_irq_uninstall(dev);
 cleanup_gem_stolen:
@@ -1608,9 +1606,10 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	dev->dev_private = dev_priv;
 	dev_priv->dev = dev;
 
-	/* copy initial configuration to dev_priv->info */
+	/* Setup the write-once "constant" device info */
 	device_info = (struct intel_device_info *)&dev_priv->info;
-	*device_info = *info;
+	memcpy(device_info, info, sizeof(dev_priv->info));
+	device_info->device_id = dev->pdev->device;
 
 	spin_lock_init(&dev_priv->irq_lock);
 	spin_lock_init(&dev_priv->gpu_error.lock);
@@ -1822,7 +1821,7 @@ out_mtrrfree:
 	arch_phys_wc_del(dev_priv->gtt.mtrr);
 	io_mapping_free(dev_priv->gtt.mappable);
 out_gtt:
-	dev_priv->gtt.base.cleanup(&dev_priv->gtt.base);
+	i915_global_gtt_cleanup(dev);
 out_regs:
 	intel_uncore_fini(dev);
 	pci_iounmap(dev->pdev, dev_priv->regs);
@@ -1869,7 +1868,6 @@ int i915_driver_unload(struct drm_device *dev)
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 		intel_fbdev_fini(dev);
 		intel_modeset_cleanup(dev);
-		cancel_work_sync(&dev_priv->console_resume_work);
 
 		/*
 		 * free the memory space allocated for the child device
@@ -1902,7 +1900,6 @@ int i915_driver_unload(struct drm_device *dev)
 		mutex_lock(&dev->struct_mutex);
 		i915_gem_cleanup_ringbuffer(dev);
 		i915_gem_context_fini(dev);
-		WARN_ON(dev_priv->mm.aliasing_ppgtt);
 		mutex_unlock(&dev->struct_mutex);
 		i915_gem_cleanup_stolen(dev);
 
@@ -1910,8 +1907,6 @@ int i915_driver_unload(struct drm_device *dev)
 			i915_free_hws(dev);
 	}
 
-	WARN_ON(!list_empty(&dev_priv->vm_list));
-
 	drm_vblank_cleanup(dev);
 
 	intel_teardown_gmbus(dev);
@@ -1921,7 +1916,7 @@ int i915_driver_unload(struct drm_device *dev)
 	destroy_workqueue(dev_priv->wq);
 	pm_qos_remove_request(&dev_priv->pm_qos);
 
-	dev_priv->gtt.base.cleanup(&dev_priv->gtt.base);
+	i915_global_gtt_cleanup(dev);
 
 	intel_uncore_fini(dev);
 	if (dev_priv->regs != NULL)
@@ -1986,6 +1981,9 @@ void i915_driver_preclose(struct drm_device *dev, struct drm_file *file)
 	i915_gem_context_close(dev, file);
 	i915_gem_release(dev, file);
 	mutex_unlock(&dev->struct_mutex);
+
+	if (drm_core_check_feature(dev, DRIVER_MODESET))
+		intel_modeset_preclose(dev, file);
 }
 
 void i915_driver_postclose(struct drm_device *dev, struct drm_file *file)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index e27cdbe9d524..cdd95956811d 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -481,6 +481,10 @@ bool i915_semaphore_is_enabled(struct drm_device *dev)
 	if (i915.semaphores >= 0)
 		return i915.semaphores;
 
+	/* TODO: make semaphores and Execlists play nicely together */
+	if (i915.enable_execlists)
+		return false;
+
 	/* Until we get further testing... */
 	if (IS_GEN8(dev))
 		return false;
@@ -524,6 +528,10 @@ static void intel_suspend_encoders(struct drm_i915_private *dev_priv)
 	drm_modeset_unlock_all(dev);
 }
 
+static int intel_suspend_complete(struct drm_i915_private *dev_priv);
+static int intel_resume_prepare(struct drm_i915_private *dev_priv,
+				bool rpm_resume);
+
 static int i915_drm_freeze(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -591,9 +599,7 @@ static int i915_drm_freeze(struct drm_device *dev)
 	intel_uncore_forcewake_reset(dev, false);
 	intel_opregion_fini(dev);
 
-	console_lock();
-	intel_fbdev_set_suspend(dev, FBINFO_STATE_SUSPENDED);
-	console_unlock();
+	intel_fbdev_set_suspend(dev, FBINFO_STATE_SUSPENDED, true);
 
 	dev_priv->suspend_count++;
 
@@ -632,30 +638,20 @@ int i915_suspend(struct drm_device *dev, pm_message_t state)
 	return 0;
 }
 
-void intel_console_resume(struct work_struct *work)
-{
-	struct drm_i915_private *dev_priv =
-		container_of(work, struct drm_i915_private,
-			     console_resume_work);
-	struct drm_device *dev = dev_priv->dev;
-
-	console_lock();
-	intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING);
-	console_unlock();
-}
-
 static int i915_drm_thaw_early(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
 
-	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
-		hsw_disable_pc8(dev_priv);
+	ret = intel_resume_prepare(dev_priv, false);
+	if (ret)
+		DRM_ERROR("Resume prepare failed: %d,Continuing resume\n", ret);
 
 	intel_uncore_early_sanitize(dev, true);
 	intel_uncore_sanitize(dev);
 	intel_power_domains_init_hw(dev_priv);
 
-	return 0;
+	return ret;
 }
 
 static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings)
@@ -714,17 +710,7 @@ static int __i915_drm_thaw(struct drm_device *dev, bool restore_gtt_mappings)
 
 	intel_opregion_init(dev);
 
-	/*
-	 * The console lock can be pretty contented on resume due
-	 * to all the printk activity.  Try to keep it out of the hot
-	 * path of resume if possible.
-	 */
-	if (console_trylock()) {
-		intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING);
-		console_unlock();
-	} else {
-		schedule_work(&dev_priv->console_resume_work);
-	}
+	intel_fbdev_set_suspend(dev, FBINFO_STATE_RUNNING, false);
 
 	mutex_lock(&dev_priv->modeset_restore_lock);
 	dev_priv->modeset_restore = MODESET_DONE;
@@ -941,6 +927,7 @@ static int i915_pm_suspend_late(struct device *dev)
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
 	struct drm_i915_private *dev_priv = drm_dev->dev_private;
+	int ret;
 
 	/*
 	 * We have a suspedn ordering issue with the snd-hda driver also
@@ -954,13 +941,16 @@ static int i915_pm_suspend_late(struct device *dev)
 	if (drm_dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 		return 0;
 
-	if (IS_HASWELL(drm_dev) || IS_BROADWELL(drm_dev))
-		hsw_enable_pc8(dev_priv);
+	ret = intel_suspend_complete(dev_priv);
 
-	pci_disable_device(pdev);
-	pci_set_power_state(pdev, PCI_D3hot);
+	if (ret)
+		DRM_ERROR("Suspend complete failed: %d\n", ret);
+	else {
+		pci_disable_device(pdev);
+		pci_set_power_state(pdev, PCI_D3hot);
+	}
 
-	return 0;
+	return ret;
 }
 
 static int i915_pm_resume_early(struct device *dev)
@@ -1016,23 +1006,26 @@ static int i915_pm_poweroff(struct device *dev)
 	return i915_drm_freeze(drm_dev);
 }
 
-static int hsw_runtime_suspend(struct drm_i915_private *dev_priv)
+static int hsw_suspend_complete(struct drm_i915_private *dev_priv)
 {
 	hsw_enable_pc8(dev_priv);
 
 	return 0;
 }
 
-static int snb_runtime_resume(struct drm_i915_private *dev_priv)
+static int snb_resume_prepare(struct drm_i915_private *dev_priv,
+				bool rpm_resume)
 {
 	struct drm_device *dev = dev_priv->dev;
 
-	intel_init_pch_refclk(dev);
+	if (rpm_resume)
+		intel_init_pch_refclk(dev);
 
 	return 0;
 }
 
-static int hsw_runtime_resume(struct drm_i915_private *dev_priv)
+static int hsw_resume_prepare(struct drm_i915_private *dev_priv,
+				bool rpm_resume)
 {
 	hsw_disable_pc8(dev_priv);
 
@@ -1328,7 +1321,7 @@ static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv)
 	I915_WRITE(VLV_GTLC_PW_STATUS, VLV_GTLC_ALLOWWAKEERR);
 }
 
-static int vlv_runtime_suspend(struct drm_i915_private *dev_priv)
+static int vlv_suspend_complete(struct drm_i915_private *dev_priv)
 {
 	u32 mask;
 	int err;
@@ -1368,7 +1361,8 @@ err1:
 	return err;
 }
 
-static int vlv_runtime_resume(struct drm_i915_private *dev_priv)
+static int vlv_resume_prepare(struct drm_i915_private *dev_priv,
+				bool rpm_resume)
 {
 	struct drm_device *dev = dev_priv->dev;
 	int err;
@@ -1393,8 +1387,10 @@ static int vlv_runtime_resume(struct drm_i915_private *dev_priv)
 
 	vlv_check_no_gt_access(dev_priv);
 
-	intel_init_clock_gating(dev);
-	i915_gem_restore_fences(dev);
+	if (rpm_resume) {
+		intel_init_clock_gating(dev);
+		i915_gem_restore_fences(dev);
+	}
 
 	return ret;
 }
@@ -1409,7 +1405,9 @@ static int intel_runtime_suspend(struct device *device)
 	if (WARN_ON_ONCE(!(dev_priv->rps.enabled && intel_enable_rc6(dev))))
 		return -ENODEV;
 
-	WARN_ON(!HAS_RUNTIME_PM(dev));
+	if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev)))
+		return -ENODEV;
+
 	assert_force_wake_inactive(dev_priv);
 
 	DRM_DEBUG_KMS("Suspending device\n");
@@ -1446,17 +1444,7 @@ static int intel_runtime_suspend(struct device *device)
 	cancel_work_sync(&dev_priv->rps.work);
 	intel_runtime_pm_disable_interrupts(dev);
 
-	if (IS_GEN6(dev)) {
-		ret = 0;
-	} else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
-		ret = hsw_runtime_suspend(dev_priv);
-	} else if (IS_VALLEYVIEW(dev)) {
-		ret = vlv_runtime_suspend(dev_priv);
-	} else {
-		ret = -ENODEV;
-		WARN_ON(1);
-	}
-
+	ret = intel_suspend_complete(dev_priv);
 	if (ret) {
 		DRM_ERROR("Runtime suspend failed, disabling it (%d)\n", ret);
 		intel_runtime_pm_restore_interrupts(dev);
@@ -1487,24 +1475,15 @@ static int intel_runtime_resume(struct device *device)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
-	WARN_ON(!HAS_RUNTIME_PM(dev));
+	if (WARN_ON_ONCE(!HAS_RUNTIME_PM(dev)))
+		return -ENODEV;
 
 	DRM_DEBUG_KMS("Resuming device\n");
 
 	intel_opregion_notify_adapter(dev, PCI_D0);
 	dev_priv->pm.suspended = false;
 
-	if (IS_GEN6(dev)) {
-		ret = snb_runtime_resume(dev_priv);
-	} else if (IS_HASWELL(dev) || IS_BROADWELL(dev)) {
-		ret = hsw_runtime_resume(dev_priv);
-	} else if (IS_VALLEYVIEW(dev)) {
-		ret = vlv_runtime_resume(dev_priv);
-	} else {
-		WARN_ON(1);
-		ret = -ENODEV;
-	}
-
+	ret = intel_resume_prepare(dev_priv, true);
 	/*
 	 * No point of rolling back things in case of an error, as the best
 	 * we can do is to hope that things will still work (and disable RPM).
@@ -1523,6 +1502,48 @@ static int intel_runtime_resume(struct device *device)
 	return ret;
 }
 
+/*
+ * This function implements common functionality of runtime and system
+ * suspend sequence.
+ */
+static int intel_suspend_complete(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	int ret;
+
+	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+		ret = hsw_suspend_complete(dev_priv);
+	else if (IS_VALLEYVIEW(dev))
+		ret = vlv_suspend_complete(dev_priv);
+	else
+		ret = 0;
+
+	return ret;
+}
+
+/*
+ * This function implements common functionality of runtime and system
+ * resume sequence. Variable rpm_resume used for implementing different
+ * code paths.
+ */
+static int intel_resume_prepare(struct drm_i915_private *dev_priv,
+				bool rpm_resume)
+{
+	struct drm_device *dev = dev_priv->dev;
+	int ret;
+
+	if (IS_GEN6(dev))
+		ret = snb_resume_prepare(dev_priv, rpm_resume);
+	else if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+		ret = hsw_resume_prepare(dev_priv, rpm_resume);
+	else if (IS_VALLEYVIEW(dev))
+		ret = vlv_resume_prepare(dev_priv, rpm_resume);
+	else
+		ret = 0;
+
+	return ret;
+}
+
 static const struct dev_pm_ops i915_pm_ops = {
 	.suspend = i915_pm_suspend,
 	.suspend_late = i915_pm_suspend_late,
@@ -1572,6 +1593,7 @@ static struct drm_driver driver = {
 	.lastclose = i915_driver_lastclose,
 	.preclose = i915_driver_preclose,
 	.postclose = i915_driver_postclose,
+	.set_busid = drm_pci_set_busid,
 
 	/* Used in place of i915_pm_ops for non-DRIVER_MODESET */
 	.suspend = i915_suspend,
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3524306d8cfb..357913c4f2c6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -35,11 +35,13 @@
 #include "i915_reg.h"
 #include "intel_bios.h"
 #include "intel_ringbuffer.h"
+#include "intel_lrc.h"
 #include "i915_gem_gtt.h"
 #include <linux/io-mapping.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
 #include <drm/intel-gtt.h>
+#include <drm/drm_legacy.h> /* for struct drm_dma_handle */
 #include <linux/backlight.h>
 #include <linux/hashtable.h>
 #include <linux/intel-iommu.h>
@@ -53,7 +55,7 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20140725"
+#define DRIVER_DATE		"20140822"
 
 enum pipe {
 	INVALID_PIPE = -1,
@@ -171,6 +173,11 @@ enum hpd_pin {
 #define for_each_intel_crtc(dev, intel_crtc) \
 	list_for_each_entry(intel_crtc, &dev->mode_config.crtc_list, base.head)
 
+#define for_each_intel_encoder(dev, intel_encoder)		\
+	list_for_each_entry(intel_encoder,			\
+			    &(dev)->mode_config.encoder_list,	\
+			    base.head)
+
 #define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \
 	list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \
 		if ((intel_encoder)->base.crtc == (__crtc))
@@ -198,10 +205,13 @@ enum intel_dpll_id {
 #define I915_NUM_PLLS 2
 
 struct intel_dpll_hw_state {
+	/* i9xx, pch plls */
 	uint32_t dpll;
 	uint32_t dpll_md;
 	uint32_t fp0;
 	uint32_t fp1;
+
+	/* hsw, bdw */
 	uint32_t wrpll;
 };
 
@@ -277,8 +287,10 @@ struct intel_opregion {
 struct intel_overlay;
 struct intel_overlay_error_state;
 
+struct drm_local_map;
+
 struct drm_i915_master_private {
-	drm_local_map_t *sarea;
+	struct drm_local_map *sarea;
 	struct _drm_i915_sarea *sarea_priv;
 };
 #define I915_FENCE_REG_NONE -1
@@ -388,6 +400,7 @@ struct drm_i915_error_state {
 		pid_t pid;
 		char comm[TASK_COMM_LEN];
 	} ring[I915_NUM_RINGS];
+
 	struct drm_i915_error_buffer {
 		u32 size;
 		u32 name;
@@ -406,6 +419,7 @@ struct drm_i915_error_state {
 	} **active_bo, **pinned_bo;
 
 	u32 *active_bo_count, *pinned_bo_count;
+	u32 vm_count;
 };
 
 struct intel_connector;
@@ -551,6 +565,7 @@ struct intel_uncore {
 
 struct intel_device_info {
 	u32 display_mmio_offset;
+	u16 device_id;
 	u8 num_pipes:3;
 	u8 num_sprites[I915_MAX_PIPES];
 	u8 gen;
@@ -615,13 +630,20 @@ struct intel_context {
 	uint8_t remap_slice;
 	struct drm_i915_file_private *file_priv;
 	struct i915_ctx_hang_stats hang_stats;
-	struct i915_address_space *vm;
+	struct i915_hw_ppgtt *ppgtt;
 
+	/* Legacy ring buffer submission */
 	struct {
 		struct drm_i915_gem_object *rcs_state;
 		bool initialized;
 	} legacy_hw_ctx;
 
+	/* Execlists */
+	struct {
+		struct drm_i915_gem_object *state;
+		struct intel_ringbuffer *ringbuf;
+	} engine[I915_NUM_RINGS];
+
 	struct list_head link;
 };
 
@@ -635,6 +657,8 @@ struct i915_fbc {
 	struct drm_mm_node compressed_fb;
 	struct drm_mm_node *compressed_llb;
 
+	bool false_color;
+
 	struct intel_fbc_work {
 		struct delayed_work work;
 		struct drm_crtc *crtc;
@@ -1228,6 +1252,12 @@ enum modeset_restore {
 };
 
 struct ddi_vbt_port_info {
+	/*
+	 * This is an index in the HDMI/DVI DDI buffer translation table.
+	 * The special value HDMI_LEVEL_SHIFT_UNKNOWN means the VBT didn't
+	 * populate this field.
+	 */
+#define HDMI_LEVEL_SHIFT_UNKNOWN	0xff
 	uint8_t hdmi_level_shift;
 
 	uint8_t supports_dvi:1;
@@ -1421,7 +1451,7 @@ struct drm_i915_private {
 	struct drm_i915_gem_object *semaphore_obj;
 	uint32_t last_seqno, next_seqno;
 
-	drm_dma_handle_t *status_page_dmah;
+	struct drm_dma_handle *status_page_dmah;
 	struct resource mch_res;
 
 	/* protects the irq masks */
@@ -1561,14 +1591,9 @@ struct drm_i915_private {
 #ifdef CONFIG_DRM_I915_FBDEV
 	/* list of fbdev register on this device */
 	struct intel_fbdev *fbdev;
+	struct work_struct fbdev_suspend_work;
 #endif
 
-	/*
-	 * The console may be contended at resume, but we don't
-	 * want it to block on it.
-	 */
-	struct work_struct console_resume_work;
-
 	struct drm_property *broadcast_rgb_property;
 	struct drm_property *force_audio_property;
 
@@ -1620,6 +1645,20 @@ struct drm_i915_private {
 	/* Old ums support infrastructure, same warning applies. */
 	struct i915_ums_state ums;
 
+	/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
+	struct {
+		int (*do_execbuf)(struct drm_device *dev, struct drm_file *file,
+				  struct intel_engine_cs *ring,
+				  struct intel_context *ctx,
+				  struct drm_i915_gem_execbuffer2 *args,
+				  struct list_head *vmas,
+				  struct drm_i915_gem_object *batch_obj,
+				  u64 exec_start, u32 flags);
+		int (*init_rings)(struct drm_device *dev);
+		void (*cleanup_ring)(struct intel_engine_cs *ring);
+		void (*stop_ring)(struct intel_engine_cs *ring);
+	} gt;
+
 	/*
 	 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
 	 * will be rejected. Instead look for a better place.
@@ -1761,13 +1800,6 @@ struct drm_i915_gem_object {
 	 * Only honoured if hardware has relevant pte bit
 	 */
 	unsigned long gt_ro:1;
-
-	/*
-	 * Is the GPU currently using a fence to access this buffer,
-	 */
-	unsigned int pending_fenced_gpu_access:1;
-	unsigned int fenced_gpu_access:1;
-
 	unsigned int cache_level:3;
 
 	unsigned int has_aliasing_ppgtt_mapping:1;
@@ -1805,7 +1837,7 @@ struct drm_i915_gem_object {
 	struct drm_file *pin_filp;
 
 	/** for phy allocated objects */
-	drm_dma_handle_t *phys_handle;
+	struct drm_dma_handle *phys_handle;
 
 	union {
 		struct i915_gem_userptr {
@@ -1971,51 +2003,63 @@ struct drm_i915_cmd_table {
 	int count;
 };
 
-#define INTEL_INFO(dev)	(&to_i915(dev)->info)
-
-#define IS_I830(dev)		((dev)->pdev->device == 0x3577)
-#define IS_845G(dev)		((dev)->pdev->device == 0x2562)
+/* Note that the (struct drm_i915_private *) cast is just to shut up gcc. */
+#define __I915__(p) ({ \
+	struct drm_i915_private *__p; \
+	if (__builtin_types_compatible_p(typeof(*p), struct drm_i915_private)) \
+		__p = (struct drm_i915_private *)p; \
+	else if (__builtin_types_compatible_p(typeof(*p), struct drm_device)) \
+		__p = to_i915((struct drm_device *)p); \
+	else \
+		BUILD_BUG(); \
+	__p; \
+})
+#define INTEL_INFO(p) 	(&__I915__(p)->info)
+#define INTEL_DEVID(p)	(INTEL_INFO(p)->device_id)
+
+#define IS_I830(dev)		(INTEL_DEVID(dev) == 0x3577)
+#define IS_845G(dev)		(INTEL_DEVID(dev) == 0x2562)
 #define IS_I85X(dev)		(INTEL_INFO(dev)->is_i85x)
-#define IS_I865G(dev)		((dev)->pdev->device == 0x2572)
+#define IS_I865G(dev)		(INTEL_DEVID(dev) == 0x2572)
 #define IS_I915G(dev)		(INTEL_INFO(dev)->is_i915g)
-#define IS_I915GM(dev)		((dev)->pdev->device == 0x2592)
-#define IS_I945G(dev)		((dev)->pdev->device == 0x2772)
+#define IS_I915GM(dev)		(INTEL_DEVID(dev) == 0x2592)
+#define IS_I945G(dev)		(INTEL_DEVID(dev) == 0x2772)
 #define IS_I945GM(dev)		(INTEL_INFO(dev)->is_i945gm)
 #define IS_BROADWATER(dev)	(INTEL_INFO(dev)->is_broadwater)
 #define IS_CRESTLINE(dev)	(INTEL_INFO(dev)->is_crestline)
-#define IS_GM45(dev)		((dev)->pdev->device == 0x2A42)
+#define IS_GM45(dev)		(INTEL_DEVID(dev) == 0x2A42)
 #define IS_G4X(dev)		(INTEL_INFO(dev)->is_g4x)
-#define IS_PINEVIEW_G(dev)	((dev)->pdev->device == 0xa001)
-#define IS_PINEVIEW_M(dev)	((dev)->pdev->device == 0xa011)
+#define IS_PINEVIEW_G(dev)	(INTEL_DEVID(dev) == 0xa001)
+#define IS_PINEVIEW_M(dev)	(INTEL_DEVID(dev) == 0xa011)
 #define IS_PINEVIEW(dev)	(INTEL_INFO(dev)->is_pineview)
 #define IS_G33(dev)		(INTEL_INFO(dev)->is_g33)
-#define IS_IRONLAKE_M(dev)	((dev)->pdev->device == 0x0046)
+#define IS_IRONLAKE_M(dev)	(INTEL_DEVID(dev) == 0x0046)
 #define IS_IVYBRIDGE(dev)	(INTEL_INFO(dev)->is_ivybridge)
-#define IS_IVB_GT1(dev)		((dev)->pdev->device == 0x0156 || \
-				 (dev)->pdev->device == 0x0152 || \
-				 (dev)->pdev->device == 0x015a)
-#define IS_SNB_GT1(dev)		((dev)->pdev->device == 0x0102 || \
-				 (dev)->pdev->device == 0x0106 || \
-				 (dev)->pdev->device == 0x010A)
+#define IS_IVB_GT1(dev)		(INTEL_DEVID(dev) == 0x0156 || \
+				 INTEL_DEVID(dev) == 0x0152 || \
+				 INTEL_DEVID(dev) == 0x015a)
+#define IS_SNB_GT1(dev)		(INTEL_DEVID(dev) == 0x0102 || \
+				 INTEL_DEVID(dev) == 0x0106 || \
+				 INTEL_DEVID(dev) == 0x010A)
 #define IS_VALLEYVIEW(dev)	(INTEL_INFO(dev)->is_valleyview)
 #define IS_CHERRYVIEW(dev)	(INTEL_INFO(dev)->is_valleyview && IS_GEN8(dev))
 #define IS_HASWELL(dev)	(INTEL_INFO(dev)->is_haswell)
 #define IS_BROADWELL(dev)	(!INTEL_INFO(dev)->is_valleyview && IS_GEN8(dev))
 #define IS_MOBILE(dev)		(INTEL_INFO(dev)->is_mobile)
 #define IS_HSW_EARLY_SDV(dev)	(IS_HASWELL(dev) && \
-				 ((dev)->pdev->device & 0xFF00) == 0x0C00)
+				 (INTEL_DEVID(dev) & 0xFF00) == 0x0C00)
 #define IS_BDW_ULT(dev)		(IS_BROADWELL(dev) && \
-				 (((dev)->pdev->device & 0xf) == 0x2  || \
-				 ((dev)->pdev->device & 0xf) == 0x6 || \
-				 ((dev)->pdev->device & 0xf) == 0xe))
+				 ((INTEL_DEVID(dev) & 0xf) == 0x2  || \
+				 (INTEL_DEVID(dev) & 0xf) == 0x6 || \
+				 (INTEL_DEVID(dev) & 0xf) == 0xe))
 #define IS_HSW_ULT(dev)		(IS_HASWELL(dev) && \
-				 ((dev)->pdev->device & 0xFF00) == 0x0A00)
+				 (INTEL_DEVID(dev) & 0xFF00) == 0x0A00)
 #define IS_ULT(dev)		(IS_HSW_ULT(dev) || IS_BDW_ULT(dev))
 #define IS_HSW_GT3(dev)		(IS_HASWELL(dev) && \
-				 ((dev)->pdev->device & 0x00F0) == 0x0020)
+				 (INTEL_DEVID(dev) & 0x00F0) == 0x0020)
 /* ULX machines are also considered ULT. */
-#define IS_HSW_ULX(dev)		((dev)->pdev->device == 0x0A0E || \
-				 (dev)->pdev->device == 0x0A1E)
+#define IS_HSW_ULX(dev)		(INTEL_DEVID(dev) == 0x0A0E || \
+				 INTEL_DEVID(dev) == 0x0A1E)
 #define IS_PRELIMINARY_HW(intel_info) ((intel_info)->is_preliminary)
 
 /*
@@ -2047,10 +2091,11 @@ struct drm_i915_cmd_table {
 #define I915_NEED_GFX_HWS(dev)	(INTEL_INFO(dev)->need_gfx_hws)
 
 #define HAS_HW_CONTEXTS(dev)	(INTEL_INFO(dev)->gen >= 6)
+#define HAS_LOGICAL_RING_CONTEXTS(dev)	(INTEL_INFO(dev)->gen >= 8)
 #define HAS_ALIASING_PPGTT(dev)	(INTEL_INFO(dev)->gen >= 6)
 #define HAS_PPGTT(dev)		(INTEL_INFO(dev)->gen >= 7 && !IS_GEN8(dev))
-#define USES_PPGTT(dev)		intel_enable_ppgtt(dev, false)
-#define USES_FULL_PPGTT(dev)	intel_enable_ppgtt(dev, true)
+#define USES_PPGTT(dev)		(i915.enable_ppgtt)
+#define USES_FULL_PPGTT(dev)	(i915.enable_ppgtt == 2)
 
 #define HAS_OVERLAY(dev)		(INTEL_INFO(dev)->has_overlay)
 #define OVERLAY_NEEDS_PHYSICAL(dev)	(INTEL_INFO(dev)->overlay_needs_physical)
@@ -2134,6 +2179,7 @@ struct i915_params {
 	int enable_rc6;
 	int enable_fbc;
 	int enable_ppgtt;
+	int enable_execlists;
 	int enable_psr;
 	unsigned int preliminary_hw_support;
 	int disable_power_well;
@@ -2180,8 +2226,6 @@ extern void i915_update_gfx_val(struct drm_i915_private *dev_priv);
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool on);
 void intel_hpd_cancel_work(struct drm_i915_private *dev_priv);
 
-extern void intel_console_resume(struct work_struct *work);
-
 /* i915_irq.c */
 void i915_queue_hangcheck(struct drm_device *dev);
 __printf(3, 4)
@@ -2229,6 +2273,20 @@ int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
 int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv);
+void i915_gem_execbuffer_move_to_active(struct list_head *vmas,
+					struct intel_engine_cs *ring);
+void i915_gem_execbuffer_retire_commands(struct drm_device *dev,
+					 struct drm_file *file,
+					 struct intel_engine_cs *ring,
+					 struct drm_i915_gem_object *obj);
+int i915_gem_ringbuffer_submission(struct drm_device *dev,
+				   struct drm_file *file,
+				   struct intel_engine_cs *ring,
+				   struct intel_context *ctx,
+				   struct drm_i915_gem_execbuffer2 *args,
+				   struct list_head *vmas,
+				   struct drm_i915_gem_object *batch_obj,
+				   u64 exec_start, u32 flags);
 int i915_gem_execbuffer(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int i915_gem_execbuffer2(struct drm_device *dev, void *data,
@@ -2381,6 +2439,7 @@ void i915_gem_reset(struct drm_device *dev);
 bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, bool force);
 int __must_check i915_gem_object_finish_gpu(struct drm_i915_gem_object *obj);
 int __must_check i915_gem_init(struct drm_device *dev);
+int i915_gem_init_rings(struct drm_device *dev);
 int __must_check i915_gem_init_hw(struct drm_device *dev);
 int i915_gem_l3_remap(struct intel_engine_cs *ring, int slice);
 void i915_gem_init_swizzling(struct drm_device *dev);
@@ -2451,7 +2510,7 @@ static inline bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) {
 }
 
 /* Some GGTT VM helpers */
-#define obj_to_ggtt(obj) \
+#define i915_obj_to_ggtt(obj) \
 	(&((struct drm_i915_private *)(obj)->base.dev->dev_private)->gtt.base)
 static inline bool i915_is_ggtt(struct i915_address_space *vm)
 {
@@ -2460,21 +2519,30 @@ static inline bool i915_is_ggtt(struct i915_address_space *vm)
 	return vm == ggtt;
 }
 
+static inline struct i915_hw_ppgtt *
+i915_vm_to_ppgtt(struct i915_address_space *vm)
+{
+	WARN_ON(i915_is_ggtt(vm));
+
+	return container_of(vm, struct i915_hw_ppgtt, base);
+}
+
+
 static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj)
 {
-	return i915_gem_obj_bound(obj, obj_to_ggtt(obj));
+	return i915_gem_obj_bound(obj, i915_obj_to_ggtt(obj));
 }
 
 static inline unsigned long
 i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *obj)
 {
-	return i915_gem_obj_offset(obj, obj_to_ggtt(obj));
+	return i915_gem_obj_offset(obj, i915_obj_to_ggtt(obj));
 }
 
 static inline unsigned long
 i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj)
 {
-	return i915_gem_obj_size(obj, obj_to_ggtt(obj));
+	return i915_gem_obj_size(obj, i915_obj_to_ggtt(obj));
 }
 
 static inline int __must_check
@@ -2482,7 +2550,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
 		      uint32_t alignment,
 		      unsigned flags)
 {
-	return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment, flags | PIN_GLOBAL);
+	return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj),
+				   alignment, flags | PIN_GLOBAL);
 }
 
 static inline int
@@ -2494,7 +2563,6 @@ i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj)
 void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
 
 /* i915_gem_context.c */
-#define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base)
 int __must_check i915_gem_context_init(struct drm_device *dev);
 void i915_gem_context_fini(struct drm_device *dev);
 void i915_gem_context_reset(struct drm_device *dev);
@@ -2506,6 +2574,8 @@ int i915_switch_context(struct intel_engine_cs *ring,
 struct intel_context *
 i915_gem_context_get(struct drm_i915_file_private *file_priv, u32 id);
 void i915_gem_context_free(struct kref *ctx_ref);
+struct drm_i915_gem_object *
+i915_gem_alloc_context_obj(struct drm_device *dev, size_t size);
 static inline void i915_gem_context_reference(struct intel_context *ctx)
 {
 	kref_get(&ctx->ref);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ad55b06a3cb1..1133bb3b2766 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2163,8 +2163,6 @@ static void
 i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 			       struct intel_engine_cs *ring)
 {
-	struct drm_device *dev = obj->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 seqno = intel_ring_get_seqno(ring);
 
 	BUG_ON(ring == NULL);
@@ -2183,19 +2181,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
 	list_move_tail(&obj->ring_list, &ring->active_list);
 
 	obj->last_read_seqno = seqno;
-
-	if (obj->fenced_gpu_access) {
-		obj->last_fenced_seqno = seqno;
-
-		/* Bump MRU to take account of the delayed flush */
-		if (obj->fence_reg != I915_FENCE_REG_NONE) {
-			struct drm_i915_fence_reg *reg;
-
-			reg = &dev_priv->fence_regs[obj->fence_reg];
-			list_move_tail(&reg->lru_list,
-				       &dev_priv->mm.fence_list);
-		}
-	}
 }
 
 void i915_vma_move_to_active(struct i915_vma *vma,
@@ -2231,7 +2216,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
 	obj->base.write_domain = 0;
 
 	obj->last_fenced_seqno = 0;
-	obj->fenced_gpu_access = false;
 
 	obj->active = 0;
 	drm_gem_object_unreference(&obj->base);
@@ -2329,10 +2313,21 @@ int __i915_add_request(struct intel_engine_cs *ring,
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct drm_i915_gem_request *request;
+	struct intel_ringbuffer *ringbuf;
 	u32 request_ring_position, request_start;
 	int ret;
 
-	request_start = intel_ring_get_tail(ring->buffer);
+	request = ring->preallocated_lazy_request;
+	if (WARN_ON(request == NULL))
+		return -ENOMEM;
+
+	if (i915.enable_execlists) {
+		struct intel_context *ctx = request->ctx;
+		ringbuf = ctx->engine[ring->id].ringbuf;
+	} else
+		ringbuf = ring->buffer;
+
+	request_start = intel_ring_get_tail(ringbuf);
 	/*
 	 * Emit any outstanding flushes - execbuf can fail to emit the flush
 	 * after having emitted the batchbuffer command. Hence we need to fix
@@ -2340,24 +2335,32 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	 * is that the flush _must_ happen before the next request, no matter
 	 * what.
 	 */
-	ret = intel_ring_flush_all_caches(ring);
-	if (ret)
-		return ret;
-
-	request = ring->preallocated_lazy_request;
-	if (WARN_ON(request == NULL))
-		return -ENOMEM;
+	if (i915.enable_execlists) {
+		ret = logical_ring_flush_all_caches(ringbuf);
+		if (ret)
+			return ret;
+	} else {
+		ret = intel_ring_flush_all_caches(ring);
+		if (ret)
+			return ret;
+	}
 
 	/* Record the position of the start of the request so that
 	 * should we detect the updated seqno part-way through the
 	 * GPU processing the request, we never over-estimate the
 	 * position of the head.
 	 */
-	request_ring_position = intel_ring_get_tail(ring->buffer);
+	request_ring_position = intel_ring_get_tail(ringbuf);
 
-	ret = ring->add_request(ring);
-	if (ret)
-		return ret;
+	if (i915.enable_execlists) {
+		ret = ring->emit_request(ringbuf);
+		if (ret)
+			return ret;
+	} else {
+		ret = ring->add_request(ring);
+		if (ret)
+			return ret;
+	}
 
 	request->seqno = intel_ring_get_seqno(ring);
 	request->ring = ring;
@@ -2372,12 +2375,14 @@ int __i915_add_request(struct intel_engine_cs *ring,
 	 */
 	request->batch_obj = obj;
 
-	/* Hold a reference to the current context so that we can inspect
-	 * it later in case a hangcheck error event fires.
-	 */
-	request->ctx = ring->last_context;
-	if (request->ctx)
-		i915_gem_context_reference(request->ctx);
+	if (!i915.enable_execlists) {
+		/* Hold a reference to the current context so that we can inspect
+		 * it later in case a hangcheck error event fires.
+		 */
+		request->ctx = ring->last_context;
+		if (request->ctx)
+			i915_gem_context_reference(request->ctx);
+	}
 
 	request->emitted_jiffies = jiffies;
 	list_add_tail(&request->list, &ring->request_list);
@@ -2548,6 +2553,18 @@ static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
 		i915_gem_free_request(request);
 	}
 
+	while (!list_empty(&ring->execlist_queue)) {
+		struct intel_ctx_submit_request *submit_req;
+
+		submit_req = list_first_entry(&ring->execlist_queue,
+				struct intel_ctx_submit_request,
+				execlist_link);
+		list_del(&submit_req->execlist_link);
+		intel_runtime_pm_put(dev_priv);
+		i915_gem_context_unreference(submit_req->ctx);
+		kfree(submit_req);
+	}
+
 	/* These may not have been flush before the reset, do so now */
 	kfree(ring->preallocated_lazy_request);
 	ring->preallocated_lazy_request = NULL;
@@ -2632,6 +2649,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 
 	while (!list_empty(&ring->request_list)) {
 		struct drm_i915_gem_request *request;
+		struct intel_ringbuffer *ringbuf;
 
 		request = list_first_entry(&ring->request_list,
 					   struct drm_i915_gem_request,
@@ -2641,12 +2659,24 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
 			break;
 
 		trace_i915_gem_request_retire(ring, request->seqno);
+
+		/* This is one of the few common intersection points
+		 * between legacy ringbuffer submission and execlists:
+		 * we need to tell them apart in order to find the correct
+		 * ringbuffer to which the request belongs to.
+		 */
+		if (i915.enable_execlists) {
+			struct intel_context *ctx = request->ctx;
+			ringbuf = ctx->engine[ring->id].ringbuf;
+		} else
+			ringbuf = ring->buffer;
+
 		/* We know the GPU must have read the request to have
 		 * sent us the seqno + interrupt, so use the position
 		 * of tail of the request to update the last known position
 		 * of the GPU head.
 		 */
-		ring->buffer->last_retired_head = request->tail;
+		ringbuf->last_retired_head = request->tail;
 
 		i915_gem_free_request(request);
 	}
@@ -2922,9 +2952,8 @@ int i915_vma_unbind(struct i915_vma *vma)
 	vma->unbind_vma(vma);
 
 	list_del_init(&vma->mm_list);
-	/* Avoid an unnecessary call to unbind on rebind. */
 	if (i915_is_ggtt(vma->vm))
-		obj->map_and_fenceable = true;
+		obj->map_and_fenceable = false;
 
 	drm_mm_remove_node(&vma->node);
 	i915_gem_vma_destroy(vma);
@@ -3169,7 +3198,6 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
 		obj->last_fenced_seqno = 0;
 	}
 
-	obj->fenced_gpu_access = false;
 	return 0;
 }
 
@@ -3276,6 +3304,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
 			return 0;
 		}
 	} else if (enable) {
+		if (WARN_ON(!obj->map_and_fenceable))
+			return -EINVAL;
+
 		reg = i915_find_fence_reg(dev);
 		if (IS_ERR(reg))
 			return PTR_ERR(reg);
@@ -3586,11 +3617,12 @@ int
 i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 {
 	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+	struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
 	uint32_t old_write_domain, old_read_domains;
 	int ret;
 
 	/* Not valid to be called on unbound objects. */
-	if (!i915_gem_obj_bound_any(obj))
+	if (vma == NULL)
 		return -EINVAL;
 
 	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
@@ -3632,13 +3664,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
 					    old_write_domain);
 
 	/* And bump the LRU for this access */
-	if (i915_gem_object_is_inactive(obj)) {
-		struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
-		if (vma)
-			list_move_tail(&vma->mm_list,
-				       &dev_priv->gtt.base.inactive_list);
-
-	}
+	if (i915_gem_object_is_inactive(obj))
+		list_move_tail(&vma->mm_list,
+			       &dev_priv->gtt.base.inactive_list);
 
 	return 0;
 }
@@ -3802,9 +3830,6 @@ static bool is_pin_display(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
 
-	if (list_empty(&obj->vma_list))
-		return false;
-
 	vma = i915_gem_obj_to_ggtt(obj);
 	if (!vma)
 		return false;
@@ -4331,8 +4356,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
 
 	obj->fence_reg = I915_FENCE_REG_NONE;
 	obj->madv = I915_MADV_WILLNEED;
-	/* Avoid an unnecessary call to unbind on the first bind. */
-	obj->map_and_fenceable = true;
 
 	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
 }
@@ -4493,12 +4516,18 @@ struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
 
 void i915_gem_vma_destroy(struct i915_vma *vma)
 {
+	struct i915_address_space *vm = NULL;
 	WARN_ON(vma->node.allocated);
 
 	/* Keep the vma as a placeholder in the execbuffer reservation lists */
 	if (!list_empty(&vma->exec_list))
 		return;
 
+	vm = vma->vm;
+
+	if (!i915_is_ggtt(vm))
+		i915_ppgtt_put(i915_vm_to_ppgtt(vm));
+
 	list_del(&vma->vma_link);
 
 	kfree(vma);
@@ -4512,7 +4541,7 @@ i915_gem_stop_ringbuffers(struct drm_device *dev)
 	int i;
 
 	for_each_ring(ring, dev_priv, i)
-		intel_stop_ring_buffer(ring);
+		dev_priv->gt.stop_ring(ring);
 }
 
 int
@@ -4629,7 +4658,7 @@ intel_enable_blt(struct drm_device *dev)
 	return true;
 }
 
-static int i915_gem_init_rings(struct drm_device *dev)
+int i915_gem_init_rings(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
@@ -4712,7 +4741,7 @@ i915_gem_init_hw(struct drm_device *dev)
 
 	i915_gem_init_swizzling(dev);
 
-	ret = i915_gem_init_rings(dev);
+	ret = dev_priv->gt.init_rings(dev);
 	if (ret)
 		return ret;
 
@@ -4730,6 +4759,14 @@ i915_gem_init_hw(struct drm_device *dev)
 	if (ret && ret != -EIO) {
 		DRM_ERROR("Context enable failed %d\n", ret);
 		i915_gem_cleanup_ringbuffer(dev);
+
+		return ret;
+	}
+
+	ret = i915_ppgtt_init_hw(dev);
+	if (ret && ret != -EIO) {
+		DRM_ERROR("PPGTT enable failed %d\n", ret);
+		i915_gem_cleanup_ringbuffer(dev);
 	}
 
 	return ret;
@@ -4740,6 +4777,9 @@ int i915_gem_init(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int ret;
 
+	i915.enable_execlists = intel_sanitize_enable_execlists(dev,
+			i915.enable_execlists);
+
 	mutex_lock(&dev->struct_mutex);
 
 	if (IS_VALLEYVIEW(dev)) {
@@ -4750,7 +4790,24 @@ int i915_gem_init(struct drm_device *dev)
 			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
 	}
 
-	i915_gem_init_userptr(dev);
+	if (!i915.enable_execlists) {
+		dev_priv->gt.do_execbuf = i915_gem_ringbuffer_submission;
+		dev_priv->gt.init_rings = i915_gem_init_rings;
+		dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer;
+		dev_priv->gt.stop_ring = intel_stop_ring_buffer;
+	} else {
+		dev_priv->gt.do_execbuf = intel_execlists_submission;
+		dev_priv->gt.init_rings = intel_logical_rings_init;
+		dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup;
+		dev_priv->gt.stop_ring = intel_logical_ring_stop;
+	}
+
+	ret = i915_gem_init_userptr(dev);
+	if (ret) {
+		mutex_unlock(&dev->struct_mutex);
+		return ret;
+	}
+
 	i915_gem_init_global_gtt(dev);
 
 	ret = i915_gem_context_init(dev);
@@ -4785,7 +4842,7 @@ i915_gem_cleanup_ringbuffer(struct drm_device *dev)
 	int i;
 
 	for_each_ring(ring, dev_priv, i)
-		intel_cleanup_ring_buffer(ring);
+		dev_priv->gt.cleanup_ring(ring);
 }
 
 int
@@ -5097,9 +5154,7 @@ unsigned long i915_gem_obj_offset(struct drm_i915_gem_object *o,
 	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
 	struct i915_vma *vma;
 
-	if (!dev_priv->mm.aliasing_ppgtt ||
-	    vm == &dev_priv->mm.aliasing_ppgtt->base)
-		vm = &dev_priv->gtt.base;
+	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
 
 	list_for_each_entry(vma, &o->vma_list, vma_link) {
 		if (vma->vm == vm)
@@ -5140,9 +5195,7 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
 	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
 	struct i915_vma *vma;
 
-	if (!dev_priv->mm.aliasing_ppgtt ||
-	    vm == &dev_priv->mm.aliasing_ppgtt->base)
-		vm = &dev_priv->gtt.base;
+	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
 
 	BUG_ON(list_empty(&o->vma_list));
 
@@ -5247,14 +5300,8 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
 
-	/* This WARN has probably outlived its usefulness (callers already
-	 * WARN if they don't find the GGTT vma they expect). When removing,
-	 * remember to remove the pre-check in is_pin_display() as well */
-	if (WARN_ON(list_empty(&obj->vma_list)))
-		return NULL;
-
 	vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
-	if (vma->vm != obj_to_ggtt(obj))
+	if (vma->vm != i915_obj_to_ggtt(obj))
 		return NULL;
 
 	return vma;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 3b99390e467a..9683e62ec61a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -96,50 +96,6 @@
 #define GEN6_CONTEXT_ALIGN (64<<10)
 #define GEN7_CONTEXT_ALIGN 4096
 
-static void do_ppgtt_cleanup(struct i915_hw_ppgtt *ppgtt)
-{
-	struct drm_device *dev = ppgtt->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct i915_address_space *vm = &ppgtt->base;
-
-	if (ppgtt == dev_priv->mm.aliasing_ppgtt ||
-	    (list_empty(&vm->active_list) && list_empty(&vm->inactive_list))) {
-		ppgtt->base.cleanup(&ppgtt->base);
-		return;
-	}
-
-	/*
-	 * Make sure vmas are unbound before we take down the drm_mm
-	 *
-	 * FIXME: Proper refcounting should take care of this, this shouldn't be
-	 * needed at all.
-	 */
-	if (!list_empty(&vm->active_list)) {
-		struct i915_vma *vma;
-
-		list_for_each_entry(vma, &vm->active_list, mm_list)
-			if (WARN_ON(list_empty(&vma->vma_link) ||
-				    list_is_singular(&vma->vma_link)))
-				break;
-
-		i915_gem_evict_vm(&ppgtt->base, true);
-	} else {
-		i915_gem_retire_requests(dev);
-		i915_gem_evict_vm(&ppgtt->base, false);
-	}
-
-	ppgtt->base.cleanup(&ppgtt->base);
-}
-
-static void ppgtt_release(struct kref *kref)
-{
-	struct i915_hw_ppgtt *ppgtt =
-		container_of(kref, struct i915_hw_ppgtt, ref);
-
-	do_ppgtt_cleanup(ppgtt);
-	kfree(ppgtt);
-}
-
 static size_t get_context_alignment(struct drm_device *dev)
 {
 	if (IS_GEN6(dev))
@@ -179,24 +135,20 @@ static int get_context_size(struct drm_device *dev)
 void i915_gem_context_free(struct kref *ctx_ref)
 {
 	struct intel_context *ctx = container_of(ctx_ref,
-						   typeof(*ctx), ref);
-	struct i915_hw_ppgtt *ppgtt = NULL;
+						 typeof(*ctx), ref);
 
-	if (ctx->legacy_hw_ctx.rcs_state) {
-		/* We refcount even the aliasing PPGTT to keep the code symmetric */
-		if (USES_PPGTT(ctx->legacy_hw_ctx.rcs_state->base.dev))
-			ppgtt = ctx_to_ppgtt(ctx);
-	}
+	if (i915.enable_execlists)
+		intel_lr_context_free(ctx);
+
+	i915_ppgtt_put(ctx->ppgtt);
 
-	if (ppgtt)
-		kref_put(&ppgtt->ref, ppgtt_release);
 	if (ctx->legacy_hw_ctx.rcs_state)
 		drm_gem_object_unreference(&ctx->legacy_hw_ctx.rcs_state->base);
 	list_del(&ctx->link);
 	kfree(ctx);
 }
 
-static struct drm_i915_gem_object *
+struct drm_i915_gem_object *
 i915_gem_alloc_context_obj(struct drm_device *dev, size_t size)
 {
 	struct drm_i915_gem_object *obj;
@@ -226,29 +178,9 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size)
 	return obj;
 }
 
-static struct i915_hw_ppgtt *
-create_vm_for_ctx(struct drm_device *dev, struct intel_context *ctx)
-{
-	struct i915_hw_ppgtt *ppgtt;
-	int ret;
-
-	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
-	if (!ppgtt)
-		return ERR_PTR(-ENOMEM);
-
-	ret = i915_gem_init_ppgtt(dev, ppgtt);
-	if (ret) {
-		kfree(ppgtt);
-		return ERR_PTR(ret);
-	}
-
-	ppgtt->ctx = ctx;
-	return ppgtt;
-}
-
 static struct intel_context *
 __create_hw_context(struct drm_device *dev,
-		  struct drm_i915_file_private *file_priv)
+		    struct drm_i915_file_private *file_priv)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_context *ctx;
@@ -301,11 +233,9 @@ err_out:
  */
 static struct intel_context *
 i915_gem_create_context(struct drm_device *dev,
-			struct drm_i915_file_private *file_priv,
-			bool create_vm)
+			struct drm_i915_file_private *file_priv)
 {
 	const bool is_global_default_ctx = file_priv == NULL;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_context *ctx;
 	int ret = 0;
 
@@ -331,34 +261,18 @@ i915_gem_create_context(struct drm_device *dev,
 		}
 	}
 
-	if (create_vm) {
-		struct i915_hw_ppgtt *ppgtt = create_vm_for_ctx(dev, ctx);
+	if (USES_FULL_PPGTT(dev)) {
+		struct i915_hw_ppgtt *ppgtt = i915_ppgtt_create(dev, file_priv);
 
 		if (IS_ERR_OR_NULL(ppgtt)) {
 			DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
 					 PTR_ERR(ppgtt));
 			ret = PTR_ERR(ppgtt);
 			goto err_unpin;
-		} else
-			ctx->vm = &ppgtt->base;
-
-		/* This case is reserved for the global default context and
-		 * should only happen once. */
-		if (is_global_default_ctx) {
-			if (WARN_ON(dev_priv->mm.aliasing_ppgtt)) {
-				ret = -EEXIST;
-				goto err_unpin;
-			}
-
-			dev_priv->mm.aliasing_ppgtt = ppgtt;
 		}
-	} else if (USES_PPGTT(dev)) {
-		/* For platforms which only have aliasing PPGTT, we fake the
-		 * address space and refcounting. */
-		ctx->vm = &dev_priv->mm.aliasing_ppgtt->base;
-		kref_get(&dev_priv->mm.aliasing_ppgtt->ref);
-	} else
-		ctx->vm = &dev_priv->gtt.base;
+
+		ctx->ppgtt = ppgtt;
+	}
 
 	return ctx;
 
@@ -417,7 +331,11 @@ int i915_gem_context_init(struct drm_device *dev)
 	if (WARN_ON(dev_priv->ring[RCS].default_context))
 		return 0;
 
-	if (HAS_HW_CONTEXTS(dev)) {
+	if (i915.enable_execlists) {
+		/* NB: intentionally left blank. We will allocate our own
+		 * backing objects as we need them, thank you very much */
+		dev_priv->hw_context_size = 0;
+	} else if (HAS_HW_CONTEXTS(dev)) {
 		dev_priv->hw_context_size = round_up(get_context_size(dev), 4096);
 		if (dev_priv->hw_context_size > (1<<20)) {
 			DRM_DEBUG_DRIVER("Disabling HW Contexts; invalid size %d\n",
@@ -426,18 +344,23 @@ int i915_gem_context_init(struct drm_device *dev)
 		}
 	}
 
-	ctx = i915_gem_create_context(dev, NULL, USES_PPGTT(dev));
+	ctx = i915_gem_create_context(dev, NULL);
 	if (IS_ERR(ctx)) {
 		DRM_ERROR("Failed to create default global context (error %ld)\n",
 			  PTR_ERR(ctx));
 		return PTR_ERR(ctx);
 	}
 
-	/* NB: RCS will hold a ref for all rings */
-	for (i = 0; i < I915_NUM_RINGS; i++)
-		dev_priv->ring[i].default_context = ctx;
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		struct intel_engine_cs *ring = &dev_priv->ring[i];
 
-	DRM_DEBUG_DRIVER("%s context support initialized\n", dev_priv->hw_context_size ? "HW" : "fake");
+		/* NB: RCS will hold a ref for all rings */
+		ring->default_context = ctx;
+	}
+
+	DRM_DEBUG_DRIVER("%s context support initialized\n",
+			i915.enable_execlists ? "LR" :
+			dev_priv->hw_context_size ? "HW" : "fake");
 	return 0;
 }
 
@@ -489,13 +412,6 @@ int i915_gem_context_enable(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *ring;
 	int ret, i;
 
-	/* This is the only place the aliasing PPGTT gets enabled, which means
-	 * it has to happen before we bail on reset */
-	if (dev_priv->mm.aliasing_ppgtt) {
-		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-		ppgtt->enable(ppgtt);
-	}
-
 	/* FIXME: We should make this work, even in reset */
 	if (i915_reset_in_progress(&dev_priv->gpu_error))
 		return 0;
@@ -527,7 +443,7 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file)
 	idr_init(&file_priv->context_idr);
 
 	mutex_lock(&dev->struct_mutex);
-	ctx = i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev));
+	ctx = i915_gem_create_context(dev, file_priv);
 	mutex_unlock(&dev->struct_mutex);
 
 	if (IS_ERR(ctx)) {
@@ -614,7 +530,6 @@ static int do_switch(struct intel_engine_cs *ring,
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	struct intel_context *from = ring->last_context;
-	struct i915_hw_ppgtt *ppgtt = ctx_to_ppgtt(to);
 	u32 hw_flags = 0;
 	bool uninitialized = false;
 	int ret, i;
@@ -642,8 +557,8 @@ static int do_switch(struct intel_engine_cs *ring,
 	 */
 	from = ring->last_context;
 
-	if (USES_FULL_PPGTT(ring->dev)) {
-		ret = ppgtt->switch_mm(ppgtt, ring, false);
+	if (to->ppgtt) {
+		ret = to->ppgtt->switch_mm(to->ppgtt, ring, false);
 		if (ret)
 			goto unpin_out;
 	}
@@ -766,9 +681,9 @@ int i915_switch_context(struct intel_engine_cs *ring,
 	return do_switch(ring, to);
 }
 
-static bool hw_context_enabled(struct drm_device *dev)
+static bool contexts_enabled(struct drm_device *dev)
 {
-	return to_i915(dev)->hw_context_size;
+	return i915.enable_execlists || to_i915(dev)->hw_context_size;
 }
 
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
@@ -779,14 +694,14 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
 	struct intel_context *ctx;
 	int ret;
 
-	if (!hw_context_enabled(dev))
+	if (!contexts_enabled(dev))
 		return -ENODEV;
 
 	ret = i915_mutex_lock_interruptible(dev);
 	if (ret)
 		return ret;
 
-	ctx = i915_gem_create_context(dev, file_priv, USES_FULL_PPGTT(dev));
+	ctx = i915_gem_create_context(dev, file_priv);
 	mutex_unlock(&dev->struct_mutex);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 60998fc4e5b2..1a0611bb576b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,7 @@
 
 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
 #define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
 
 #define BATCH_OFFSET_BIAS (256*1024)
@@ -94,7 +95,6 @@ eb_lookup_vmas(struct eb_vmas *eb,
 	       struct i915_address_space *vm,
 	       struct drm_file *file)
 {
-	struct drm_i915_private *dev_priv = vm->dev->dev_private;
 	struct drm_i915_gem_object *obj;
 	struct list_head objects;
 	int i, ret;
@@ -129,20 +129,6 @@ eb_lookup_vmas(struct eb_vmas *eb,
 	i = 0;
 	while (!list_empty(&objects)) {
 		struct i915_vma *vma;
-		struct i915_address_space *bind_vm = vm;
-
-		if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT &&
-		    USES_FULL_PPGTT(vm->dev)) {
-			ret = -EINVAL;
-			goto err;
-		}
-
-		/* If we have secure dispatch, or the userspace assures us that
-		 * they know what they're doing, use the GGTT VM.
-		 */
-		if (((args->flags & I915_EXEC_SECURE) &&
-		    (i == (args->buffer_count - 1))))
-			bind_vm = &dev_priv->gtt.base;
 
 		obj = list_first_entry(&objects,
 				       struct drm_i915_gem_object,
@@ -156,7 +142,7 @@ eb_lookup_vmas(struct eb_vmas *eb,
 		 * from the (obj, vm) we don't run the risk of creating
 		 * duplicated vmas for the same vm.
 		 */
-		vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm);
+		vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
 		if (IS_ERR(vma)) {
 			DRM_DEBUG("Failed to lookup VMA\n");
 			ret = PTR_ERR(vma);
@@ -307,7 +293,7 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	uint64_t delta = reloc->delta + target_offset;
-	uint32_t __iomem *reloc_entry;
+	uint64_t offset;
 	void __iomem *reloc_page;
 	int ret;
 
@@ -320,25 +306,24 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj,
 		return ret;
 
 	/* Map the page containing the relocation we're going to perform.  */
-	reloc->offset += i915_gem_obj_ggtt_offset(obj);
+	offset = i915_gem_obj_ggtt_offset(obj);
+	offset += reloc->offset;
 	reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
-			reloc->offset & PAGE_MASK);
-	reloc_entry = (uint32_t __iomem *)
-		(reloc_page + offset_in_page(reloc->offset));
-	iowrite32(lower_32_bits(delta), reloc_entry);
+					      offset & PAGE_MASK);
+	iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
 
 	if (INTEL_INFO(dev)->gen >= 8) {
-		reloc_entry += 1;
+		offset += sizeof(uint32_t);
 
-		if (offset_in_page(reloc->offset + sizeof(uint32_t)) == 0) {
+		if (offset_in_page(offset) == 0) {
 			io_mapping_unmap_atomic(reloc_page);
-			reloc_page = io_mapping_map_atomic_wc(
-					dev_priv->gtt.mappable,
-					reloc->offset + sizeof(uint32_t));
-			reloc_entry = reloc_page;
+			reloc_page =
+				io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
+							 offset);
 		}
 
-		iowrite32(upper_32_bits(delta), reloc_entry);
+		iowrite32(upper_32_bits(delta),
+			  reloc_page + offset_in_page(offset));
 	}
 
 	io_mapping_unmap_atomic(reloc_page);
@@ -535,34 +520,18 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
 }
 
 static int
-need_reloc_mappable(struct i915_vma *vma)
-{
-	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
-		i915_is_ggtt(vma->vm);
-}
-
-static int
 i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 				struct intel_engine_cs *ring,
 				bool *need_reloc)
 {
 	struct drm_i915_gem_object *obj = vma->obj;
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
-	bool need_fence;
 	uint64_t flags;
 	int ret;
 
 	flags = 0;
-
-	need_fence =
-		has_fenced_gpu_access &&
-		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-		obj->tiling_mode != I915_TILING_NONE;
-	if (need_fence || need_reloc_mappable(vma))
+	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
 		flags |= PIN_MAPPABLE;
-
 	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
 		flags |= PIN_GLOBAL;
 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
@@ -574,17 +543,13 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 
 	entry->flags |= __EXEC_OBJECT_HAS_PIN;
 
-	if (has_fenced_gpu_access) {
-		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
-			ret = i915_gem_object_get_fence(obj);
-			if (ret)
-				return ret;
-
-			if (i915_gem_object_pin_fence(obj))
-				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+	if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+		ret = i915_gem_object_get_fence(obj);
+		if (ret)
+			return ret;
 
-			obj->pending_fenced_gpu_access = true;
-		}
+		if (i915_gem_object_pin_fence(obj))
+			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
 	}
 
 	if (entry->offset != vma->node.start) {
@@ -601,26 +566,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
 }
 
 static bool
-eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
+need_reloc_mappable(struct i915_vma *vma)
 {
 	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
-	struct drm_i915_gem_object *obj = vma->obj;
-	bool need_fence, need_mappable;
 
-	need_fence =
-		has_fenced_gpu_access &&
-		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
-		obj->tiling_mode != I915_TILING_NONE;
-	need_mappable = need_fence || need_reloc_mappable(vma);
+	if (entry->relocation_count == 0)
+		return false;
+
+	if (!i915_is_ggtt(vma->vm))
+		return false;
+
+	/* See also use_cpu_reloc() */
+	if (HAS_LLC(vma->obj->base.dev))
+		return false;
 
-	WARN_ON((need_mappable || need_fence) &&
+	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+		return false;
+
+	return true;
+}
+
+static bool
+eb_vma_misplaced(struct i915_vma *vma)
+{
+	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+	struct drm_i915_gem_object *obj = vma->obj;
+
+	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
 	       !i915_is_ggtt(vma->vm));
 
 	if (entry->alignment &&
 	    vma->node.start & (entry->alignment - 1))
 		return true;
 
-	if (need_mappable && !obj->map_and_fenceable)
+	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
 		return true;
 
 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
@@ -642,9 +621,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
 	int retry;
 
-	if (list_empty(vmas))
-		return 0;
-
 	i915_gem_retire_requests_ring(ring);
 
 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
@@ -658,20 +634,21 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 		obj = vma->obj;
 		entry = vma->exec_entry;
 
+		if (!has_fenced_gpu_access)
+			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
 		need_fence =
-			has_fenced_gpu_access &&
 			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
 			obj->tiling_mode != I915_TILING_NONE;
 		need_mappable = need_fence || need_reloc_mappable(vma);
 
-		if (need_mappable)
+		if (need_mappable) {
+			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
 			list_move(&vma->exec_list, &ordered_vmas);
-		else
+		} else
 			list_move_tail(&vma->exec_list, &ordered_vmas);
 
 		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
 		obj->base.pending_write_domain = 0;
-		obj->pending_fenced_gpu_access = false;
 	}
 	list_splice(&ordered_vmas, vmas);
 
@@ -696,7 +673,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
 			if (!drm_mm_node_allocated(&vma->node))
 				continue;
 
-			if (eb_vma_misplaced(vma, has_fenced_gpu_access))
+			if (eb_vma_misplaced(vma))
 				ret = i915_vma_unbind(vma);
 			else
 				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -744,9 +721,6 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
 	int i, total, ret;
 	unsigned count = args->buffer_count;
 
-	if (WARN_ON(list_empty(&eb->vmas)))
-		return 0;
-
 	vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
 
 	/* We may process another execbuffer during the unlock... */
@@ -890,18 +864,24 @@ i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 }
 
 static int
-validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
+validate_exec_list(struct drm_device *dev,
+		   struct drm_i915_gem_exec_object2 *exec,
 		   int count)
 {
-	int i;
 	unsigned relocs_total = 0;
 	unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
+	unsigned invalid_flags;
+	int i;
+
+	invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
+	if (USES_FULL_PPGTT(dev))
+		invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
 
 	for (i = 0; i < count; i++) {
 		char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
 		int length; /* limited by fault_in_pages_readable() */
 
-		if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
+		if (exec[i].flags & invalid_flags)
 			return -EINVAL;
 
 		/* First check for malicious input causing overflow in
@@ -951,16 +931,26 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
 		return ERR_PTR(-EIO);
 	}
 
+	if (i915.enable_execlists && !ctx->engine[ring->id].state) {
+		int ret = intel_lr_context_deferred_create(ctx, ring);
+		if (ret) {
+			DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
+			return ERR_PTR(ret);
+		}
+	}
+
 	return ctx;
 }
 
-static void
+void
 i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 				   struct intel_engine_cs *ring)
 {
+	u32 seqno = intel_ring_get_seqno(ring);
 	struct i915_vma *vma;
 
 	list_for_each_entry(vma, vmas, exec_list) {
+		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 		struct drm_i915_gem_object *obj = vma->obj;
 		u32 old_read = obj->base.read_domains;
 		u32 old_write = obj->base.write_domain;
@@ -969,24 +959,31 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
 		if (obj->base.write_domain == 0)
 			obj->base.pending_read_domains |= obj->base.read_domains;
 		obj->base.read_domains = obj->base.pending_read_domains;
-		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
 
 		i915_vma_move_to_active(vma, ring);
 		if (obj->base.write_domain) {
 			obj->dirty = 1;
-			obj->last_write_seqno = intel_ring_get_seqno(ring);
+			obj->last_write_seqno = seqno;
 
 			intel_fb_obj_invalidate(obj, ring);
 
 			/* update for the implicit flush after a batch */
 			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
 		}
+		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+			obj->last_fenced_seqno = seqno;
+			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+				struct drm_i915_private *dev_priv = to_i915(ring->dev);
+				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
+					       &dev_priv->mm.fence_list);
+			}
+		}
 
 		trace_i915_gem_object_change_domain(obj, old_read, old_write);
 	}
 }
 
-static void
+void
 i915_gem_execbuffer_retire_commands(struct drm_device *dev,
 				    struct drm_file *file,
 				    struct intel_engine_cs *ring,
@@ -1026,14 +1023,14 @@ i915_reset_gen7_sol_offsets(struct drm_device *dev,
 	return 0;
 }
 
-static int
-legacy_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
-			     struct intel_engine_cs *ring,
-			     struct intel_context *ctx,
-			     struct drm_i915_gem_execbuffer2 *args,
-			     struct list_head *vmas,
-			     struct drm_i915_gem_object *batch_obj,
-			     u64 exec_start, u32 flags)
+int
+i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
+			       struct intel_engine_cs *ring,
+			       struct intel_context *ctx,
+			       struct drm_i915_gem_execbuffer2 *args,
+			       struct list_head *vmas,
+			       struct drm_i915_gem_object *batch_obj,
+			       u64 exec_start, u32 flags)
 {
 	struct drm_clip_rect *cliprects = NULL;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1254,7 +1251,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (!i915_gem_check_execbuffer(args))
 		return -EINVAL;
 
-	ret = validate_exec_list(exec, args->buffer_count);
+	ret = validate_exec_list(dev, exec, args->buffer_count);
 	if (ret)
 		return ret;
 
@@ -1318,8 +1315,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 
 	i915_gem_context_reference(ctx);
 
-	vm = ctx->vm;
-	if (!USES_FULL_PPGTT(dev))
+	if (ctx->ppgtt)
+		vm = &ctx->ppgtt->base;
+	else
 		vm = &dev_priv->gtt.base;
 
 	eb = eb_create(args);
@@ -1386,25 +1384,36 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
 	 * batch" bit. Hence we need to pin secure batches into the global gtt.
 	 * hsw should have this fixed, but bdw mucks it up again. */
-	if (flags & I915_DISPATCH_SECURE &&
-	    !batch_obj->has_global_gtt_mapping) {
-		/* When we have multiple VMs, we'll need to make sure that we
-		 * allocate space first */
-		struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
-		BUG_ON(!vma);
-		vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
-	}
+	if (flags & I915_DISPATCH_SECURE) {
+		/*
+		 * So on first glance it looks freaky that we pin the batch here
+		 * outside of the reservation loop. But:
+		 * - The batch is already pinned into the relevant ppgtt, so we
+		 *   already have the backing storage fully allocated.
+		 * - No other BO uses the global gtt (well contexts, but meh),
+		 *   so we don't really have issues with mutliple objects not
+		 *   fitting due to fragmentation.
+		 * So this is actually safe.
+		 */
+		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+		if (ret)
+			goto err;
 
-	if (flags & I915_DISPATCH_SECURE)
 		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
-	else
+	} else
 		exec_start += i915_gem_obj_offset(batch_obj, vm);
 
-	ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
-			args, &eb->vmas, batch_obj, exec_start, flags);
-	if (ret)
-		goto err;
+	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
+				      &eb->vmas, batch_obj, exec_start, flags);
 
+	/*
+	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
+	 * batch vma for correctness. For less ugly and less fragility this
+	 * needs to be adjusted to also track the ggtt batch vma properly as
+	 * active.
+	 */
+	if (flags & I915_DISPATCH_SECURE)
+		i915_gem_object_ggtt_unpin(batch_obj);
 err:
 	/* the request owns the ref now */
 	i915_gem_context_unreference(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 1411613f2174..4db237065610 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -33,17 +33,6 @@
 static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv);
 static void chv_setup_private_ppat(struct drm_i915_private *dev_priv);
 
-bool intel_enable_ppgtt(struct drm_device *dev, bool full)
-{
-	if (i915.enable_ppgtt == 0)
-		return false;
-
-	if (i915.enable_ppgtt == 1 && full)
-		return false;
-
-	return true;
-}
-
 static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
 {
 	if (enable_ppgtt == 0 || !HAS_ALIASING_PPGTT(dev))
@@ -78,7 +67,6 @@ static void ppgtt_bind_vma(struct i915_vma *vma,
 			   enum i915_cache_level cache_level,
 			   u32 flags);
 static void ppgtt_unbind_vma(struct i915_vma *vma);
-static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt);
 
 static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
 					     enum i915_cache_level level,
@@ -403,9 +391,6 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 
-	list_del(&vm->global_link);
-	drm_mm_takedown(&vm->mm);
-
 	gen8_ppgtt_unmap_pages(ppgtt);
 	gen8_ppgtt_free(ppgtt);
 }
@@ -615,7 +600,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
 		kunmap_atomic(pd_vaddr);
 	}
 
-	ppgtt->enable = gen8_ppgtt_enable;
 	ppgtt->switch_mm = gen8_mm_switch;
 	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
 	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
@@ -836,39 +820,26 @@ static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
 	return 0;
 }
 
-static int gen8_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
+static void gen8_ppgtt_enable(struct drm_device *dev)
 {
-	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring;
-	int j, ret;
+	int j;
+
+	/* In the case of execlists, PPGTT is enabled by the context descriptor
+	 * and the PDPs are contained within the context itself.  We don't
+	 * need to do anything here. */
+	if (i915.enable_execlists)
+		return;
 
 	for_each_ring(ring, dev_priv, j) {
 		I915_WRITE(RING_MODE_GEN7(ring),
 			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-
-		/* We promise to do a switch later with FULL PPGTT. If this is
-		 * aliasing, this is the one and only switch we'll do */
-		if (USES_FULL_PPGTT(dev))
-			continue;
-
-		ret = ppgtt->switch_mm(ppgtt, ring, true);
-		if (ret)
-			goto err_out;
 	}
-
-	return 0;
-
-err_out:
-	for_each_ring(ring, dev_priv, j)
-		I915_WRITE(RING_MODE_GEN7(ring),
-			   _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE));
-	return ret;
 }
 
-static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
+static void gen7_ppgtt_enable(struct drm_device *dev)
 {
-	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring;
 	uint32_t ecochk, ecobits;
@@ -887,31 +858,16 @@ static int gen7_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
 	I915_WRITE(GAM_ECOCHK, ecochk);
 
 	for_each_ring(ring, dev_priv, i) {
-		int ret;
 		/* GFX_MODE is per-ring on gen7+ */
 		I915_WRITE(RING_MODE_GEN7(ring),
 			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-
-		/* We promise to do a switch later with FULL PPGTT. If this is
-		 * aliasing, this is the one and only switch we'll do */
-		if (USES_FULL_PPGTT(dev))
-			continue;
-
-		ret = ppgtt->switch_mm(ppgtt, ring, true);
-		if (ret)
-			return ret;
 	}
-
-	return 0;
 }
 
-static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
+static void gen6_ppgtt_enable(struct drm_device *dev)
 {
-	struct drm_device *dev = ppgtt->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_engine_cs *ring;
 	uint32_t ecochk, gab_ctl, ecobits;
-	int i;
 
 	ecobits = I915_READ(GAC_ECO_BITS);
 	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
@@ -924,14 +880,6 @@ static int gen6_ppgtt_enable(struct i915_hw_ppgtt *ppgtt)
 	I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
 
 	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-
-	for_each_ring(ring, dev_priv, i) {
-		int ret = ppgtt->switch_mm(ppgtt, ring, true);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
 }
 
 /* PPGTT support for Sandybdrige/Gen6 and later */
@@ -1029,8 +977,6 @@ static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
 	struct i915_hw_ppgtt *ppgtt =
 		container_of(vm, struct i915_hw_ppgtt, base);
 
-	list_del(&vm->global_link);
-	drm_mm_takedown(&ppgtt->base.mm);
 	drm_mm_remove_node(&ppgtt->node);
 
 	gen6_ppgtt_unmap_pages(ppgtt);
@@ -1151,13 +1097,10 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 
 	ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
 	if (IS_GEN6(dev)) {
-		ppgtt->enable = gen6_ppgtt_enable;
 		ppgtt->switch_mm = gen6_mm_switch;
 	} else if (IS_HASWELL(dev)) {
-		ppgtt->enable = gen7_ppgtt_enable;
 		ppgtt->switch_mm = hsw_mm_switch;
 	} else if (IS_GEN7(dev)) {
-		ppgtt->enable = gen7_ppgtt_enable;
 		ppgtt->switch_mm = gen7_mm_switch;
 	} else
 		BUG();
@@ -1188,39 +1131,108 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
 			 ppgtt->node.size >> 20,
 			 ppgtt->node.start / PAGE_SIZE);
 
+	gen6_write_pdes(ppgtt);
+	DRM_DEBUG("Adding PPGTT at offset %x\n",
+		  ppgtt->pd_offset << 10);
+
 	return 0;
 }
 
-int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
+static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	int ret = 0;
 
 	ppgtt->base.dev = dev;
 	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
 
 	if (INTEL_INFO(dev)->gen < 8)
-		ret = gen6_ppgtt_init(ppgtt);
+		return gen6_ppgtt_init(ppgtt);
 	else if (IS_GEN8(dev))
-		ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
+		return gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
 	else
 		BUG();
+}
+int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret = 0;
 
-	if (!ret) {
-		struct drm_i915_private *dev_priv = dev->dev_private;
+	ret = __hw_ppgtt_init(dev, ppgtt);
+	if (ret == 0) {
 		kref_init(&ppgtt->ref);
 		drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
 			    ppgtt->base.total);
 		i915_init_vm(dev_priv, &ppgtt->base);
-		if (INTEL_INFO(dev)->gen < 8) {
-			gen6_write_pdes(ppgtt);
-			DRM_DEBUG("Adding PPGTT at offset %x\n",
-				  ppgtt->pd_offset << 10);
+	}
+
+	return ret;
+}
+
+int i915_ppgtt_init_hw(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_engine_cs *ring;
+	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
+	int i, ret = 0;
+
+	if (!USES_PPGTT(dev))
+		return 0;
+
+	if (IS_GEN6(dev))
+		gen6_ppgtt_enable(dev);
+	else if (IS_GEN7(dev))
+		gen7_ppgtt_enable(dev);
+	else if (INTEL_INFO(dev)->gen >= 8)
+		gen8_ppgtt_enable(dev);
+	else
+		WARN_ON(1);
+
+	if (ppgtt) {
+		for_each_ring(ring, dev_priv, i) {
+			ret = ppgtt->switch_mm(ppgtt, ring, true);
+			if (ret != 0)
+				return ret;
 		}
 	}
 
 	return ret;
 }
+struct i915_hw_ppgtt *
+i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
+{
+	struct i915_hw_ppgtt *ppgtt;
+	int ret;
+
+	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
+	if (!ppgtt)
+		return ERR_PTR(-ENOMEM);
+
+	ret = i915_ppgtt_init(dev, ppgtt);
+	if (ret) {
+		kfree(ppgtt);
+		return ERR_PTR(ret);
+	}
+
+	ppgtt->file_priv = fpriv;
+
+	return ppgtt;
+}
+
+void  i915_ppgtt_release(struct kref *kref)
+{
+	struct i915_hw_ppgtt *ppgtt =
+		container_of(kref, struct i915_hw_ppgtt, ref);
+
+	/* vmas should already be unbound */
+	WARN_ON(!list_empty(&ppgtt->base.active_list));
+	WARN_ON(!list_empty(&ppgtt->base.inactive_list));
+
+	list_del(&ppgtt->base.global_link);
+	drm_mm_takedown(&ppgtt->base.mm);
+
+	ppgtt->base.cleanup(&ppgtt->base);
+	kfree(ppgtt);
+}
 
 static void
 ppgtt_bind_vma(struct i915_vma *vma,
@@ -1675,10 +1687,10 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node,
 	}
 }
 
-void i915_gem_setup_global_gtt(struct drm_device *dev,
-			       unsigned long start,
-			       unsigned long mappable_end,
-			       unsigned long end)
+int i915_gem_setup_global_gtt(struct drm_device *dev,
+			      unsigned long start,
+			      unsigned long mappable_end,
+			      unsigned long end)
 {
 	/* Let GEM Manage all of the aperture.
 	 *
@@ -1694,6 +1706,7 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
 	struct drm_mm_node *entry;
 	struct drm_i915_gem_object *obj;
 	unsigned long hole_start, hole_end;
+	int ret;
 
 	BUG_ON(mappable_end > end);
 
@@ -1705,14 +1718,16 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
 	/* Mark any preallocated objects as occupied */
 	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
 		struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
-		int ret;
+
 		DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
 			      i915_gem_obj_ggtt_offset(obj), obj->base.size);
 
 		WARN_ON(i915_gem_obj_ggtt_bound(obj));
 		ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
-		if (ret)
-			DRM_DEBUG_KMS("Reservation failed\n");
+		if (ret) {
+			DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
+			return ret;
+		}
 		obj->has_global_gtt_mapping = 1;
 	}
 
@@ -1729,6 +1744,22 @@ void i915_gem_setup_global_gtt(struct drm_device *dev,
 
 	/* And finally clear the reserved guard page */
 	ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
+
+	if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
+		struct i915_hw_ppgtt *ppgtt;
+
+		ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
+		if (!ppgtt)
+			return -ENOMEM;
+
+		ret = __hw_ppgtt_init(dev, ppgtt);
+		if (ret != 0)
+			return ret;
+
+		dev_priv->mm.aliasing_ppgtt = ppgtt;
+	}
+
+	return 0;
 }
 
 void i915_gem_init_global_gtt(struct drm_device *dev)
@@ -1742,6 +1773,25 @@ void i915_gem_init_global_gtt(struct drm_device *dev)
 	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
 }
 
+void i915_global_gtt_cleanup(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct i915_address_space *vm = &dev_priv->gtt.base;
+
+	if (dev_priv->mm.aliasing_ppgtt) {
+		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
+
+		ppgtt->base.cleanup(&ppgtt->base);
+	}
+
+	if (drm_mm_initialized(&vm->mm)) {
+		drm_mm_takedown(&vm->mm);
+		list_del(&vm->global_link);
+	}
+
+	vm->cleanup(vm);
+}
+
 static int setup_scratch_page(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2010,10 +2060,6 @@ static void gen6_gmch_remove(struct i915_address_space *vm)
 
 	struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
 
-	if (drm_mm_initialized(&vm->mm)) {
-		drm_mm_takedown(&vm->mm);
-		list_del(&vm->global_link);
-	}
 	iounmap(gtt->gsm);
 	teardown_scratch_page(vm->dev);
 }
@@ -2046,10 +2092,6 @@ static int i915_gmch_probe(struct drm_device *dev,
 
 static void i915_gmch_remove(struct i915_address_space *vm)
 {
-	if (drm_mm_initialized(&vm->mm)) {
-		drm_mm_takedown(&vm->mm);
-		list_del(&vm->global_link);
-	}
 	intel_gmch_remove();
 }
 
@@ -2164,5 +2206,8 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
 	if (!vma)
 		vma = __i915_gem_vma_create(obj, vm);
 
+	if (!i915_is_ggtt(vm))
+		i915_ppgtt_get(i915_vm_to_ppgtt(vm));
+
 	return vma;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 8d6f7c18c404..6280648d4805 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -34,6 +34,8 @@
 #ifndef __I915_GEM_GTT_H__
 #define __I915_GEM_GTT_H__
 
+struct drm_i915_file_private;
+
 typedef uint32_t gen6_gtt_pte_t;
 typedef uint64_t gen8_gtt_pte_t;
 typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
@@ -258,7 +260,7 @@ struct i915_hw_ppgtt {
 		dma_addr_t *gen8_pt_dma_addr[4];
 	};
 
-	struct intel_context *ctx;
+	struct drm_i915_file_private *file_priv;
 
 	int (*enable)(struct i915_hw_ppgtt *ppgtt);
 	int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
@@ -269,11 +271,26 @@ struct i915_hw_ppgtt {
 
 int i915_gem_gtt_init(struct drm_device *dev);
 void i915_gem_init_global_gtt(struct drm_device *dev);
-void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start,
-			       unsigned long mappable_end, unsigned long end);
-
-bool intel_enable_ppgtt(struct drm_device *dev, bool full);
-int i915_gem_init_ppgtt(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt);
+int i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start,
+			      unsigned long mappable_end, unsigned long end);
+void i915_global_gtt_cleanup(struct drm_device *dev);
+
+
+int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt);
+int i915_ppgtt_init_hw(struct drm_device *dev);
+void i915_ppgtt_release(struct kref *kref);
+struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_device *dev,
+					struct drm_i915_file_private *fpriv);
+static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt)
+{
+	if (ppgtt)
+		kref_get(&ppgtt->ref);
+}
+static inline void i915_ppgtt_put(struct i915_hw_ppgtt *ppgtt)
+{
+	if (ppgtt)
+		kref_put(&ppgtt->ref, i915_ppgtt_release);
+}
 
 void i915_check_and_clear_faults(struct drm_device *dev);
 void i915_gem_suspend_gtt_mappings(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index cb150e8b4336..7e623bf097a1 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -376,7 +376,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
 
 		if (ret == 0) {
 			obj->fence_dirty =
-				obj->fenced_gpu_access ||
+				obj->last_fenced_seqno ||
 				obj->fence_reg != I915_FENCE_REG_NONE;
 
 			obj->tiling_mode = args->tiling_mode;
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index eab41f9390f8..35e70d5d6282 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -192,10 +192,10 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m,
 				struct drm_i915_error_buffer *err,
 				int count)
 {
-	err_printf(m, "%s [%d]:\n", name, count);
+	err_printf(m, "  %s [%d]:\n", name, count);
 
 	while (count--) {
-		err_printf(m, "  %08x %8u %02x %02x %x %x",
+		err_printf(m, "    %08x %8u %02x %02x %x %x",
 			   err->gtt_offset,
 			   err->size,
 			   err->read_domains,
@@ -393,15 +393,17 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 		i915_ring_error_state(m, dev, &error->ring[i]);
 	}
 
-	if (error->active_bo)
+	for (i = 0; i < error->vm_count; i++) {
+		err_printf(m, "vm[%d]\n", i);
+
 		print_error_buffers(m, "Active",
-				    error->active_bo[0],
-				    error->active_bo_count[0]);
+				    error->active_bo[i],
+				    error->active_bo_count[i]);
 
-	if (error->pinned_bo)
 		print_error_buffers(m, "Pinned",
-				    error->pinned_bo[0],
-				    error->pinned_bo_count[0]);
+				    error->pinned_bo[i],
+				    error->pinned_bo_count[i]);
+	}
 
 	for (i = 0; i < ARRAY_SIZE(error->ring); i++) {
 		obj = error->ring[i].batchbuffer;
@@ -644,13 +646,15 @@ unwind:
 				       (src)->base.size>>PAGE_SHIFT)
 
 static void capture_bo(struct drm_i915_error_buffer *err,
-		       struct drm_i915_gem_object *obj)
+		       struct i915_vma *vma)
 {
+	struct drm_i915_gem_object *obj = vma->obj;
+
 	err->size = obj->base.size;
 	err->name = obj->base.name;
 	err->rseqno = obj->last_read_seqno;
 	err->wseqno = obj->last_write_seqno;
-	err->gtt_offset = i915_gem_obj_ggtt_offset(obj);
+	err->gtt_offset = vma->node.start;
 	err->read_domains = obj->base.read_domains;
 	err->write_domain = obj->base.write_domain;
 	err->fence_reg = obj->fence_reg;
@@ -674,7 +678,7 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
 	int i = 0;
 
 	list_for_each_entry(vma, head, mm_list) {
-		capture_bo(err++, vma->obj);
+		capture_bo(err++, vma);
 		if (++i == count)
 			break;
 	}
@@ -683,21 +687,27 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err,
 }
 
 static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
-			     int count, struct list_head *head)
+			     int count, struct list_head *head,
+			     struct i915_address_space *vm)
 {
 	struct drm_i915_gem_object *obj;
-	int i = 0;
+	struct drm_i915_error_buffer * const first = err;
+	struct drm_i915_error_buffer * const last = err + count;
 
 	list_for_each_entry(obj, head, global_list) {
-		if (!i915_gem_obj_is_pinned(obj))
-			continue;
+		struct i915_vma *vma;
 
-		capture_bo(err++, obj);
-		if (++i == count)
+		if (err == last)
 			break;
+
+		list_for_each_entry(vma, &obj->vma_list, vma_link)
+			if (vma->vm == vm && vma->pin_count > 0) {
+				capture_bo(err++, vma);
+				break;
+			}
 	}
 
-	return i;
+	return err - first;
 }
 
 /* Generate a semi-unique error code. The code is not meant to have meaning, The
@@ -967,6 +977,12 @@ static void i915_gem_record_rings(struct drm_device *dev,
 
 		request = i915_gem_find_active_request(ring);
 		if (request) {
+			struct i915_address_space *vm;
+
+			vm = request->ctx && request->ctx->ppgtt ?
+				&request->ctx->ppgtt->base :
+				&dev_priv->gtt.base;
+
 			/* We need to copy these to an anonymous buffer
 			 * as the simplest method to avoid being overwritten
 			 * by userspace.
@@ -974,9 +990,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
 			error->ring[i].batchbuffer =
 				i915_error_object_create(dev_priv,
 							 request->batch_obj,
-							 request->ctx ?
-							 request->ctx->vm :
-							 &dev_priv->gtt.base);
+							 vm);
 
 			if (HAS_BROKEN_CS_TLB(dev_priv->dev) &&
 			    ring->scratch.obj)
@@ -1049,9 +1063,14 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
 	list_for_each_entry(vma, &vm->active_list, mm_list)
 		i++;
 	error->active_bo_count[ndx] = i;
-	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
-		if (i915_gem_obj_is_pinned(obj))
-			i++;
+
+	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
+		list_for_each_entry(vma, &obj->vma_list, vma_link)
+			if (vma->vm == vm && vma->pin_count > 0) {
+				i++;
+				break;
+			}
+	}
 	error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx];
 
 	if (i) {
@@ -1070,7 +1089,7 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
 		error->pinned_bo_count[ndx] =
 			capture_pinned_bo(pinned_bo,
 					  error->pinned_bo_count[ndx],
-					  &dev_priv->mm.bound_list);
+					  &dev_priv->mm.bound_list, vm);
 	error->active_bo[ndx] = active_bo;
 	error->pinned_bo[ndx] = pinned_bo;
 }
@@ -1091,8 +1110,25 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
 	error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count),
 					 GFP_ATOMIC);
 
-	list_for_each_entry(vm, &dev_priv->vm_list, global_link)
-		i915_gem_capture_vm(dev_priv, error, vm, i++);
+	if (error->active_bo == NULL ||
+	    error->pinned_bo == NULL ||
+	    error->active_bo_count == NULL ||
+	    error->pinned_bo_count == NULL) {
+		kfree(error->active_bo);
+		kfree(error->active_bo_count);
+		kfree(error->pinned_bo);
+		kfree(error->pinned_bo_count);
+
+		error->active_bo = NULL;
+		error->active_bo_count = NULL;
+		error->pinned_bo = NULL;
+		error->pinned_bo_count = NULL;
+	} else {
+		list_for_each_entry(vm, &dev_priv->vm_list, global_link)
+			i915_gem_capture_vm(dev_priv, error, vm, i++);
+
+		error->vm_count = cnt;
+	}
 }
 
 /* Capture all registers which don't fit into another category. */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 0050ee9470f1..7391697c25e7 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -151,7 +151,7 @@ ironlake_disable_display_irq(struct drm_i915_private *dev_priv, u32 mask)
 {
 	assert_spin_locked(&dev_priv->irq_lock);
 
-	if (!intel_irqs_enabled(dev_priv))
+	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
 		return;
 
 	if ((dev_priv->irq_mask & mask) != mask) {
@@ -1020,7 +1020,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, int pipe,
 
 	/* In vblank? */
 	if (in_vbl)
-		ret |= DRM_SCANOUTPOS_INVBL;
+		ret |= DRM_SCANOUTPOS_IN_VBLANK;
 
 	return ret;
 }
@@ -1322,10 +1322,10 @@ static u32 vlv_c0_residency(struct drm_i915_private *dev_priv,
  * @dev_priv: DRM device private
  *
  */
-static u32 vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
+static int vlv_calc_delay_from_C0_counters(struct drm_i915_private *dev_priv)
 {
 	u32 residency_C0_up = 0, residency_C0_down = 0;
-	u8 new_delay, adj;
+	int new_delay, adj;
 
 	dev_priv->rps.ei_interrupt_count++;
 
@@ -1627,6 +1627,7 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev,
 				       struct drm_i915_private *dev_priv,
 				       u32 master_ctl)
 {
+	struct intel_engine_cs *ring;
 	u32 rcs, bcs, vcs;
 	uint32_t tmp = 0;
 	irqreturn_t ret = IRQ_NONE;
@@ -1636,12 +1637,20 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev,
 		if (tmp) {
 			I915_WRITE(GEN8_GT_IIR(0), tmp);
 			ret = IRQ_HANDLED;
+
 			rcs = tmp >> GEN8_RCS_IRQ_SHIFT;
-			bcs = tmp >> GEN8_BCS_IRQ_SHIFT;
+			ring = &dev_priv->ring[RCS];
 			if (rcs & GT_RENDER_USER_INTERRUPT)
-				notify_ring(dev, &dev_priv->ring[RCS]);
+				notify_ring(dev, ring);
+			if (rcs & GT_CONTEXT_SWITCH_INTERRUPT)
+				intel_execlists_handle_ctx_events(ring);
+
+			bcs = tmp >> GEN8_BCS_IRQ_SHIFT;
+			ring = &dev_priv->ring[BCS];
 			if (bcs & GT_RENDER_USER_INTERRUPT)
-				notify_ring(dev, &dev_priv->ring[BCS]);
+				notify_ring(dev, ring);
+			if (bcs & GT_CONTEXT_SWITCH_INTERRUPT)
+				intel_execlists_handle_ctx_events(ring);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT0)!\n");
 	}
@@ -1651,12 +1660,20 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev,
 		if (tmp) {
 			I915_WRITE(GEN8_GT_IIR(1), tmp);
 			ret = IRQ_HANDLED;
+
 			vcs = tmp >> GEN8_VCS1_IRQ_SHIFT;
+			ring = &dev_priv->ring[VCS];
 			if (vcs & GT_RENDER_USER_INTERRUPT)
-				notify_ring(dev, &dev_priv->ring[VCS]);
+				notify_ring(dev, ring);
+			if (vcs & GT_CONTEXT_SWITCH_INTERRUPT)
+				intel_execlists_handle_ctx_events(ring);
+
 			vcs = tmp >> GEN8_VCS2_IRQ_SHIFT;
+			ring = &dev_priv->ring[VCS2];
 			if (vcs & GT_RENDER_USER_INTERRUPT)
-				notify_ring(dev, &dev_priv->ring[VCS2]);
+				notify_ring(dev, ring);
+			if (vcs & GT_CONTEXT_SWITCH_INTERRUPT)
+				intel_execlists_handle_ctx_events(ring);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT1)!\n");
 	}
@@ -1677,9 +1694,13 @@ static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev,
 		if (tmp) {
 			I915_WRITE(GEN8_GT_IIR(3), tmp);
 			ret = IRQ_HANDLED;
+
 			vcs = tmp >> GEN8_VECS_IRQ_SHIFT;
+			ring = &dev_priv->ring[VECS];
 			if (vcs & GT_RENDER_USER_INTERRUPT)
-				notify_ring(dev, &dev_priv->ring[VECS]);
+				notify_ring(dev, ring);
+			if (vcs & GT_CONTEXT_SWITCH_INTERRUPT)
+				intel_execlists_handle_ctx_events(ring);
 		} else
 			DRM_ERROR("The master control interrupt lied (GT3)!\n");
 	}
@@ -1772,7 +1793,9 @@ static inline void intel_hpd_irq_handler(struct drm_device *dev,
 				long_hpd = (dig_hotplug_reg >> dig_shift) & PORTB_HOTPLUG_LONG_DETECT;
 			}
 
-			DRM_DEBUG_DRIVER("digital hpd port %d %d\n", port, long_hpd);
+			DRM_DEBUG_DRIVER("digital hpd port %c - %s\n",
+					 port_name(port),
+					 long_hpd ? "long" : "short");
 			/* for long HPD pulses we want to have the digital queue happen,
 			   but we still want HPD storm detection to function. */
 			if (long_hpd) {
@@ -1984,14 +2007,9 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir)
 
 static bool intel_pipe_handle_vblank(struct drm_device *dev, enum pipe pipe)
 {
-	struct intel_crtc *crtc;
-
 	if (!drm_handle_vblank(dev, pipe))
 		return false;
 
-	crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev, pipe));
-	wake_up(&crtc->vbl_wait);
-
 	return true;
 }
 
@@ -3522,18 +3540,17 @@ static void cherryview_irq_preinstall(struct drm_device *dev)
 static void ibx_hpd_irq_setup(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_mode_config *mode_config = &dev->mode_config;
 	struct intel_encoder *intel_encoder;
 	u32 hotplug_irqs, hotplug, enabled_irqs = 0;
 
 	if (HAS_PCH_IBX(dev)) {
 		hotplug_irqs = SDE_HOTPLUG_MASK;
-		list_for_each_entry(intel_encoder, &mode_config->encoder_list, base.head)
+		for_each_intel_encoder(dev, intel_encoder)
 			if (dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark == HPD_ENABLED)
 				enabled_irqs |= hpd_ibx[intel_encoder->hpd_pin];
 	} else {
 		hotplug_irqs = SDE_HOTPLUG_MASK_CPT;
-		list_for_each_entry(intel_encoder, &mode_config->encoder_list, base.head)
+		for_each_intel_encoder(dev, intel_encoder)
 			if (dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark == HPD_ENABLED)
 				enabled_irqs |= hpd_cpt[intel_encoder->hpd_pin];
 	}
@@ -3787,12 +3804,17 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv)
 	/* These are interrupts we'll toggle with the ring mask register */
 	uint32_t gt_interrupts[] = {
 		GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
+			GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT |
 			GT_RENDER_L3_PARITY_ERROR_INTERRUPT |
-			GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT,
+			GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT |
+			GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT,
 		GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT |
-			GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT,
+			GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT |
+			GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT |
+			GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT,
 		0,
-		GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT
+		GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT |
+			GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT
 		};
 
 	for (i = 0; i < ARRAY_SIZE(gt_interrupts); i++)
@@ -4444,7 +4466,6 @@ static int i965_irq_postinstall(struct drm_device *dev)
 static void i915_hpd_irq_setup(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_mode_config *mode_config = &dev->mode_config;
 	struct intel_encoder *intel_encoder;
 	u32 hotplug_en;
 
@@ -4455,7 +4476,7 @@ static void i915_hpd_irq_setup(struct drm_device *dev)
 		hotplug_en &= ~HOTPLUG_INT_EN_MASK;
 		/* Note HDMI and DP share hotplug bits */
 		/* enable bits are the same for all generations */
-		list_for_each_entry(intel_encoder, &mode_config->encoder_list, base.head)
+		for_each_intel_encoder(dev, intel_encoder)
 			if (dev_priv->hpd_stats[intel_encoder->hpd_pin].hpd_mark == HPD_ENABLED)
 				hotplug_en |= hpd_mask_i915[intel_encoder->hpd_pin];
 		/* Programming the CRT detection parameters tends
@@ -4680,6 +4701,14 @@ void intel_irq_init(struct drm_device *dev)
 		dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */
 	}
 
+	/*
+	 * Opt out of the vblank disable timer on everything except gen2.
+	 * Gen2 doesn't have a hardware frame counter and so depends on
+	 * vblank interrupts to produce sane vblank seuquence numbers.
+	 */
+	if (!IS_GEN2(dev))
+		dev->vblank_disable_immediate = true;
+
 	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
 		dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp;
 		dev->driver->get_scanout_position = i915_get_crtc_scanoutpos;
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 7f84dd263ee8..139f490d464d 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -35,6 +35,7 @@ struct i915_params i915 __read_mostly = {
 	.vbt_sdvo_panel_type = -1,
 	.enable_rc6 = -1,
 	.enable_fbc = -1,
+	.enable_execlists = 0,
 	.enable_hangcheck = true,
 	.enable_ppgtt = -1,
 	.enable_psr = 0,
@@ -118,6 +119,11 @@ MODULE_PARM_DESC(enable_ppgtt,
 	"Override PPGTT usage. "
 	"(-1=auto [default], 0=disabled, 1=aliasing, 2=full)");
 
+module_param_named(enable_execlists, i915.enable_execlists, int, 0400);
+MODULE_PARM_DESC(enable_execlists,
+	"Override execlists usage. "
+	"(-1=auto, 0=disabled [default], 1=enabled)");
+
 module_param_named(enable_psr, i915.enable_psr, int, 0600);
 MODULE_PARM_DESC(enable_psr, "Enable PSR (default: false)");
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f29b44c86a2f..1e4fb846d9a5 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -272,6 +272,7 @@
 #define   MI_SEMAPHORE_POLL		(1<<15)
 #define   MI_SEMAPHORE_SAD_GTE_SDD	(1<<12)
 #define MI_STORE_DWORD_IMM	MI_INSTR(0x20, 1)
+#define MI_STORE_DWORD_IMM_GEN8	MI_INSTR(0x20, 2)
 #define   MI_MEM_VIRTUAL	(1 << 22) /* 965+ only */
 #define MI_STORE_DWORD_INDEX	MI_INSTR(0x21, 1)
 #define   MI_STORE_DWORD_INDEX_SHIFT 2
@@ -282,6 +283,7 @@
  *   address/value pairs. Don't overdue it, though, x <= 2^4 must hold!
  */
 #define MI_LOAD_REGISTER_IMM(x)	MI_INSTR(0x22, 2*(x)-1)
+#define   MI_LRI_FORCE_POSTED		(1<<12)
 #define MI_STORE_REGISTER_MEM(x) MI_INSTR(0x24, 2*(x)-1)
 #define MI_STORE_REGISTER_MEM_GEN8(x) MI_INSTR(0x24, 3*(x)-1)
 #define   MI_SRM_LRM_GLOBAL_GTT		(1<<22)
@@ -501,10 +503,26 @@
 #define BUNIT_REG_BISOC				0x11
 
 #define PUNIT_REG_DSPFREQ			0x36
+#define   DSPFREQSTAT_SHIFT_CHV			24
+#define   DSPFREQSTAT_MASK_CHV			(0x1f << DSPFREQSTAT_SHIFT_CHV)
+#define   DSPFREQGUAR_SHIFT_CHV			8
+#define   DSPFREQGUAR_MASK_CHV			(0x1f << DSPFREQGUAR_SHIFT_CHV)
 #define   DSPFREQSTAT_SHIFT			30
 #define   DSPFREQSTAT_MASK			(0x3 << DSPFREQSTAT_SHIFT)
 #define   DSPFREQGUAR_SHIFT			14
 #define   DSPFREQGUAR_MASK			(0x3 << DSPFREQGUAR_SHIFT)
+#define   _DP_SSC(val, pipe)			((val) << (2 * (pipe)))
+#define   DP_SSC_MASK(pipe)			_DP_SSC(0x3, (pipe))
+#define   DP_SSC_PWR_ON(pipe)			_DP_SSC(0x0, (pipe))
+#define   DP_SSC_CLK_GATE(pipe)			_DP_SSC(0x1, (pipe))
+#define   DP_SSC_RESET(pipe)			_DP_SSC(0x2, (pipe))
+#define   DP_SSC_PWR_GATE(pipe)			_DP_SSC(0x3, (pipe))
+#define   _DP_SSS(val, pipe)			((val) << (2 * (pipe) + 16))
+#define   DP_SSS_MASK(pipe)			_DP_SSS(0x3, (pipe))
+#define   DP_SSS_PWR_ON(pipe)			_DP_SSS(0x0, (pipe))
+#define   DP_SSS_CLK_GATE(pipe)			_DP_SSS(0x1, (pipe))
+#define   DP_SSS_RESET(pipe)			_DP_SSS(0x2, (pipe))
+#define   DP_SSS_PWR_GATE(pipe)			_DP_SSS(0x3, (pipe))
 
 /* See the PUNIT HAS v0.8 for the below bits */
 enum punit_power_well {
@@ -518,6 +536,11 @@ enum punit_power_well {
 	PUNIT_POWER_WELL_DPIO_TX_C_LANES_23	= 9,
 	PUNIT_POWER_WELL_DPIO_RX0		= 10,
 	PUNIT_POWER_WELL_DPIO_RX1		= 11,
+	PUNIT_POWER_WELL_DPIO_CMN_D		= 12,
+	/* FIXME: guesswork below */
+	PUNIT_POWER_WELL_DPIO_TX_D_LANES_01	= 13,
+	PUNIT_POWER_WELL_DPIO_TX_D_LANES_23	= 14,
+	PUNIT_POWER_WELL_DPIO_RX2		= 15,
 
 	PUNIT_POWER_WELL_NUM,
 };
@@ -838,8 +861,8 @@ enum punit_power_well {
 
 #define _VLV_TX_DW2_CH0			0x8288
 #define _VLV_TX_DW2_CH1			0x8488
-#define   DPIO_SWING_MARGIN_SHIFT	16
-#define   DPIO_SWING_MARGIN_MASK	(0xff << DPIO_SWING_MARGIN_SHIFT)
+#define   DPIO_SWING_MARGIN000_SHIFT	16
+#define   DPIO_SWING_MARGIN000_MASK	(0xff << DPIO_SWING_MARGIN000_SHIFT)
 #define   DPIO_UNIQ_TRANS_SCALE_SHIFT	8
 #define VLV_TX_DW2(ch) _PORT(ch, _VLV_TX_DW2_CH0, _VLV_TX_DW2_CH1)
 
@@ -847,12 +870,16 @@ enum punit_power_well {
 #define _VLV_TX_DW3_CH1			0x848c
 /* The following bit for CHV phy */
 #define   DPIO_TX_UNIQ_TRANS_SCALE_EN	(1<<27)
+#define   DPIO_SWING_MARGIN101_SHIFT	16
+#define   DPIO_SWING_MARGIN101_MASK	(0xff << DPIO_SWING_MARGIN101_SHIFT)
 #define VLV_TX_DW3(ch) _PORT(ch, _VLV_TX_DW3_CH0, _VLV_TX_DW3_CH1)
 
 #define _VLV_TX_DW4_CH0			0x8290
 #define _VLV_TX_DW4_CH1			0x8490
 #define   DPIO_SWING_DEEMPH9P5_SHIFT	24
 #define   DPIO_SWING_DEEMPH9P5_MASK	(0xff << DPIO_SWING_DEEMPH9P5_SHIFT)
+#define   DPIO_SWING_DEEMPH6P0_SHIFT	16
+#define   DPIO_SWING_DEEMPH6P0_MASK	(0xff << DPIO_SWING_DEEMPH6P0_SHIFT)
 #define VLV_TX_DW4(ch) _PORT(ch, _VLV_TX_DW4_CH0, _VLV_TX_DW4_CH1)
 
 #define _VLV_TX3_DW4_CH0		0x690
@@ -1064,6 +1091,7 @@ enum punit_power_well {
 #define RING_ACTHD_UDW(base)	((base)+0x5c)
 #define RING_NOPID(base)	((base)+0x94)
 #define RING_IMR(base)		((base)+0xa8)
+#define RING_HWSTAM(base)	((base)+0x98)
 #define RING_TIMESTAMP(base)	((base)+0x358)
 #define   TAIL_ADDR		0x001FFFF8
 #define   HEAD_WRAP_COUNT	0xFFE00000
@@ -1380,6 +1408,7 @@ enum punit_power_well {
 #define GT_BSD_CS_ERROR_INTERRUPT		(1 << 15)
 #define GT_BSD_USER_INTERRUPT			(1 << 12)
 #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT_S1	(1 << 11) /* hsw+; rsvd on snb, ivb, vlv */
+#define GT_CONTEXT_SWITCH_INTERRUPT		(1 <<  8)
 #define GT_RENDER_L3_PARITY_ERROR_INTERRUPT	(1 <<  5) /* !snb */
 #define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT	(1 <<  4)
 #define GT_RENDER_CS_MASTER_ERROR_INTERRUPT	(1 <<  3)
@@ -1519,6 +1548,7 @@ enum punit_power_well {
 /* Framebuffer compression for Ironlake */
 #define ILK_DPFC_CB_BASE	0x43200
 #define ILK_DPFC_CONTROL	0x43208
+#define   FBC_CTL_FALSE_COLOR	(1<<10)
 /* The bit 28-8 is reserved */
 #define   DPFC_RESERVED		(0x1FFFFF00)
 #define ILK_DPFC_RECOMP_CTL	0x4320c
@@ -1675,12 +1705,9 @@ enum punit_power_well {
 #define DPIO_PHY_STATUS			(VLV_DISPLAY_BASE + 0x6240)
 #define   DPLL_PORTD_READY_MASK		(0xf)
 #define DISPLAY_PHY_CONTROL (VLV_DISPLAY_BASE + 0x60100)
-#define   PHY_COM_LANE_RESET_DEASSERT(phy, val) \
-				((phy == DPIO_PHY0) ? (val | 1) : (val | 2))
-#define   PHY_COM_LANE_RESET_ASSERT(phy, val) \
-				((phy == DPIO_PHY0) ? (val & ~1) : (val & ~2))
+#define   PHY_COM_LANE_RESET_DEASSERT(phy) (1 << (phy))
 #define DISPLAY_PHY_STATUS (VLV_DISPLAY_BASE + 0x60104)
-#define   PHY_POWERGOOD(phy)	((phy == DPIO_PHY0) ? (1<<31) : (1<<30))
+#define   PHY_POWERGOOD(phy)	(((phy) == DPIO_PHY0) ? (1<<31) : (1<<30))
 
 /*
  * The i830 generation, in LVDS mode, defines P1 as the bit number set within
@@ -3476,6 +3503,8 @@ enum punit_power_well {
 #define   DP_LINK_TRAIN_OFF		(3 << 28)
 #define   DP_LINK_TRAIN_MASK		(3 << 28)
 #define   DP_LINK_TRAIN_SHIFT		28
+#define   DP_LINK_TRAIN_PAT_3_CHV	(1 << 14)
+#define   DP_LINK_TRAIN_MASK_CHV	((3 << 28)|(1<<14))
 
 /* CPT Link training mode */
 #define   DP_LINK_TRAIN_PAT_1_CPT	(0 << 8)
@@ -3732,7 +3761,6 @@ enum punit_power_well {
 #define   PIPE_VSYNC_INTERRUPT_STATUS		(1UL<<9)
 #define   PIPE_DISPLAY_LINE_COMPARE_STATUS	(1UL<<8)
 #define   PIPE_DPST_EVENT_STATUS		(1UL<<7)
-#define   PIPE_LEGACY_BLC_EVENT_STATUS		(1UL<<6)
 #define   PIPE_A_PSR_STATUS_VLV			(1UL<<6)
 #define   PIPE_LEGACY_BLC_EVENT_STATUS		(1UL<<6)
 #define   PIPE_ODD_FIELD_INTERRUPT_STATUS	(1UL<<5)
@@ -3842,73 +3870,151 @@ enum punit_power_well {
 #define   DSPARB_BEND_SHIFT	9 /* on 855 */
 #define   DSPARB_AEND_SHIFT	0
 
+/* pnv/gen4/g4x/vlv/chv */
 #define DSPFW1			(dev_priv->info.display_mmio_offset + 0x70034)
-#define   DSPFW_SR_SHIFT	23
-#define   DSPFW_SR_MASK		(0x1ff<<23)
-#define   DSPFW_CURSORB_SHIFT	16
-#define   DSPFW_CURSORB_MASK	(0x3f<<16)
-#define   DSPFW_PLANEB_SHIFT	8
-#define   DSPFW_PLANEB_MASK	(0x7f<<8)
-#define   DSPFW_PLANEA_MASK	(0x7f)
+#define   DSPFW_SR_SHIFT		23
+#define   DSPFW_SR_MASK			(0x1ff<<23)
+#define   DSPFW_CURSORB_SHIFT		16
+#define   DSPFW_CURSORB_MASK		(0x3f<<16)
+#define   DSPFW_PLANEB_SHIFT		8
+#define   DSPFW_PLANEB_MASK		(0x7f<<8)
+#define   DSPFW_PLANEB_MASK_VLV		(0xff<<8) /* vlv/chv */
+#define   DSPFW_PLANEA_SHIFT		0
+#define   DSPFW_PLANEA_MASK		(0x7f<<0)
+#define   DSPFW_PLANEA_MASK_VLV		(0xff<<0) /* vlv/chv */
 #define DSPFW2			(dev_priv->info.display_mmio_offset + 0x70038)
-#define   DSPFW_CURSORA_MASK	0x00003f00
-#define   DSPFW_CURSORA_SHIFT	8
-#define   DSPFW_PLANEC_MASK	(0x7f)
+#define   DSPFW_FBC_SR_EN		(1<<31)	  /* g4x */
+#define   DSPFW_FBC_SR_SHIFT		28
+#define   DSPFW_FBC_SR_MASK		(0x7<<28) /* g4x */
+#define   DSPFW_FBC_HPLL_SR_SHIFT	24
+#define   DSPFW_FBC_HPLL_SR_MASK	(0xf<<24) /* g4x */
+#define   DSPFW_SPRITEB_SHIFT		(16)
+#define   DSPFW_SPRITEB_MASK		(0x7f<<16) /* g4x */
+#define   DSPFW_SPRITEB_MASK_VLV	(0xff<<16) /* vlv/chv */
+#define   DSPFW_CURSORA_SHIFT		8
+#define   DSPFW_CURSORA_MASK		(0x3f<<8)
+#define   DSPFW_PLANEC_SHIFT_OLD	0
+#define   DSPFW_PLANEC_MASK_OLD		(0x7f<<0) /* pre-gen4 sprite C */
+#define   DSPFW_SPRITEA_SHIFT		0
+#define   DSPFW_SPRITEA_MASK		(0x7f<<0) /* g4x */
+#define   DSPFW_SPRITEA_MASK_VLV	(0xff<<0) /* vlv/chv */
 #define DSPFW3			(dev_priv->info.display_mmio_offset + 0x7003c)
-#define   DSPFW_HPLL_SR_EN	(1<<31)
-#define   DSPFW_CURSOR_SR_SHIFT	24
+#define   DSPFW_HPLL_SR_EN		(1<<31)
 #define   PINEVIEW_SELF_REFRESH_EN	(1<<30)
+#define   DSPFW_CURSOR_SR_SHIFT		24
 #define   DSPFW_CURSOR_SR_MASK		(0x3f<<24)
 #define   DSPFW_HPLL_CURSOR_SHIFT	16
 #define   DSPFW_HPLL_CURSOR_MASK	(0x3f<<16)
-#define   DSPFW_HPLL_SR_MASK		(0x1ff)
-#define DSPFW4			(dev_priv->info.display_mmio_offset + 0x70070)
-#define DSPFW7			(dev_priv->info.display_mmio_offset + 0x7007c)
+#define   DSPFW_HPLL_SR_SHIFT		0
+#define   DSPFW_HPLL_SR_MASK		(0x1ff<<0)
+
+/* vlv/chv */
+#define DSPFW4			(VLV_DISPLAY_BASE + 0x70070)
+#define   DSPFW_SPRITEB_WM1_SHIFT	16
+#define   DSPFW_SPRITEB_WM1_MASK	(0xff<<16)
+#define   DSPFW_CURSORA_WM1_SHIFT	8
+#define   DSPFW_CURSORA_WM1_MASK	(0x3f<<8)
+#define   DSPFW_SPRITEA_WM1_SHIFT	0
+#define   DSPFW_SPRITEA_WM1_MASK	(0xff<<0)
+#define DSPFW5			(VLV_DISPLAY_BASE + 0x70074)
+#define   DSPFW_PLANEB_WM1_SHIFT	24
+#define   DSPFW_PLANEB_WM1_MASK		(0xff<<24)
+#define   DSPFW_PLANEA_WM1_SHIFT	16
+#define   DSPFW_PLANEA_WM1_MASK		(0xff<<16)
+#define   DSPFW_CURSORB_WM1_SHIFT	8
+#define   DSPFW_CURSORB_WM1_MASK	(0x3f<<8)
+#define   DSPFW_CURSOR_SR_WM1_SHIFT	0
+#define   DSPFW_CURSOR_SR_WM1_MASK	(0x3f<<0)
+#define DSPFW6			(VLV_DISPLAY_BASE + 0x70078)
+#define   DSPFW_SR_WM1_SHIFT		0
+#define   DSPFW_SR_WM1_MASK		(0x1ff<<0)
+#define DSPFW7			(VLV_DISPLAY_BASE + 0x7007c)
+#define DSPFW7_CHV		(VLV_DISPLAY_BASE + 0x700b4) /* wtf #1? */
+#define   DSPFW_SPRITED_WM1_SHIFT	24
+#define   DSPFW_SPRITED_WM1_MASK	(0xff<<24)
+#define   DSPFW_SPRITED_SHIFT		16
+#define   DSPFW_SPRITED_MASK		(0xff<<16)
+#define   DSPFW_SPRITEC_WM1_SHIFT	8
+#define   DSPFW_SPRITEC_WM1_MASK	(0xff<<8)
+#define   DSPFW_SPRITEC_SHIFT		0
+#define   DSPFW_SPRITEC_MASK		(0xff<<0)
+#define DSPFW8_CHV		(VLV_DISPLAY_BASE + 0x700b8)
+#define   DSPFW_SPRITEF_WM1_SHIFT	24
+#define   DSPFW_SPRITEF_WM1_MASK	(0xff<<24)
+#define   DSPFW_SPRITEF_SHIFT		16
+#define   DSPFW_SPRITEF_MASK		(0xff<<16)
+#define   DSPFW_SPRITEE_WM1_SHIFT	8
+#define   DSPFW_SPRITEE_WM1_MASK	(0xff<<8)
+#define   DSPFW_SPRITEE_SHIFT		0
+#define   DSPFW_SPRITEE_MASK		(0xff<<0)
+#define DSPFW9_CHV		(VLV_DISPLAY_BASE + 0x7007c) /* wtf #2? */
+#define   DSPFW_PLANEC_WM1_SHIFT	24
+#define   DSPFW_PLANEC_WM1_MASK		(0xff<<24)
+#define   DSPFW_PLANEC_SHIFT		16
+#define   DSPFW_PLANEC_MASK		(0xff<<16)
+#define   DSPFW_CURSORC_WM1_SHIFT	8
+#define   DSPFW_CURSORC_WM1_MASK	(0x3f<<16)
+#define   DSPFW_CURSORC_SHIFT		0
+#define   DSPFW_CURSORC_MASK		(0x3f<<0)
+
+/* vlv/chv high order bits */
+#define DSPHOWM			(VLV_DISPLAY_BASE + 0x70064)
+#define   DSPFW_SR_HI_SHIFT		24
+#define   DSPFW_SR_HI_MASK		(1<<24)
+#define   DSPFW_SPRITEF_HI_SHIFT	23
+#define   DSPFW_SPRITEF_HI_MASK		(1<<23)
+#define   DSPFW_SPRITEE_HI_SHIFT	22
+#define   DSPFW_SPRITEE_HI_MASK		(1<<22)
+#define   DSPFW_PLANEC_HI_SHIFT		21
+#define   DSPFW_PLANEC_HI_MASK		(1<<21)
+#define   DSPFW_SPRITED_HI_SHIFT	20
+#define   DSPFW_SPRITED_HI_MASK		(1<<20)
+#define   DSPFW_SPRITEC_HI_SHIFT	16
+#define   DSPFW_SPRITEC_HI_MASK		(1<<16)
+#define   DSPFW_PLANEB_HI_SHIFT		12
+#define   DSPFW_PLANEB_HI_MASK		(1<<12)
+#define   DSPFW_SPRITEB_HI_SHIFT	8
+#define   DSPFW_SPRITEB_HI_MASK		(1<<8)
+#define   DSPFW_SPRITEA_HI_SHIFT	4
+#define   DSPFW_SPRITEA_HI_MASK		(1<<4)
+#define   DSPFW_PLANEA_HI_SHIFT		0
+#define   DSPFW_PLANEA_HI_MASK		(1<<0)
+#define DSPHOWM1		(VLV_DISPLAY_BASE + 0x70068)
+#define   DSPFW_SR_WM1_HI_SHIFT		24
+#define   DSPFW_SR_WM1_HI_MASK		(1<<24)
+#define   DSPFW_SPRITEF_WM1_HI_SHIFT	23
+#define   DSPFW_SPRITEF_WM1_HI_MASK	(1<<23)
+#define   DSPFW_SPRITEE_WM1_HI_SHIFT	22
+#define   DSPFW_SPRITEE_WM1_HI_MASK	(1<<22)
+#define   DSPFW_PLANEC_WM1_HI_SHIFT	21
+#define   DSPFW_PLANEC_WM1_HI_MASK	(1<<21)
+#define   DSPFW_SPRITED_WM1_HI_SHIFT	20
+#define   DSPFW_SPRITED_WM1_HI_MASK	(1<<20)
+#define   DSPFW_SPRITEC_WM1_HI_SHIFT	16
+#define   DSPFW_SPRITEC_WM1_HI_MASK	(1<<16)
+#define   DSPFW_PLANEB_WM1_HI_SHIFT	12
+#define   DSPFW_PLANEB_WM1_HI_MASK	(1<<12)
+#define   DSPFW_SPRITEB_WM1_HI_SHIFT	8
+#define   DSPFW_SPRITEB_WM1_HI_MASK	(1<<8)
+#define   DSPFW_SPRITEA_WM1_HI_SHIFT	4
+#define   DSPFW_SPRITEA_WM1_HI_MASK	(1<<4)
+#define   DSPFW_PLANEA_WM1_HI_SHIFT	0
+#define   DSPFW_PLANEA_WM1_HI_MASK	(1<<0)
 
 /* drain latency register values*/
 #define DRAIN_LATENCY_PRECISION_32	32
 #define DRAIN_LATENCY_PRECISION_64	64
-#define VLV_DDL1			(VLV_DISPLAY_BASE + 0x70050)
-#define DDL_CURSORA_PRECISION_64	(1<<31)
-#define DDL_CURSORA_PRECISION_32	(0<<31)
-#define DDL_CURSORA_SHIFT		24
-#define DDL_SPRITEB_PRECISION_64	(1<<23)
-#define DDL_SPRITEB_PRECISION_32	(0<<23)
-#define DDL_SPRITEB_SHIFT		16
-#define DDL_SPRITEA_PRECISION_64	(1<<15)
-#define DDL_SPRITEA_PRECISION_32	(0<<15)
-#define DDL_SPRITEA_SHIFT		8
-#define DDL_PLANEA_PRECISION_64		(1<<7)
-#define DDL_PLANEA_PRECISION_32		(0<<7)
-#define DDL_PLANEA_SHIFT		0
-
-#define VLV_DDL2			(VLV_DISPLAY_BASE + 0x70054)
-#define DDL_CURSORB_PRECISION_64	(1<<31)
-#define DDL_CURSORB_PRECISION_32	(0<<31)
-#define DDL_CURSORB_SHIFT		24
-#define DDL_SPRITED_PRECISION_64	(1<<23)
-#define DDL_SPRITED_PRECISION_32	(0<<23)
-#define DDL_SPRITED_SHIFT		16
-#define DDL_SPRITEC_PRECISION_64	(1<<15)
-#define DDL_SPRITEC_PRECISION_32	(0<<15)
-#define DDL_SPRITEC_SHIFT		8
-#define DDL_PLANEB_PRECISION_64		(1<<7)
-#define DDL_PLANEB_PRECISION_32		(0<<7)
-#define DDL_PLANEB_SHIFT		0
-
-#define VLV_DDL3			(VLV_DISPLAY_BASE + 0x70058)
-#define DDL_CURSORC_PRECISION_64	(1<<31)
-#define DDL_CURSORC_PRECISION_32	(0<<31)
-#define DDL_CURSORC_SHIFT		24
-#define DDL_SPRITEF_PRECISION_64	(1<<23)
-#define DDL_SPRITEF_PRECISION_32	(0<<23)
-#define DDL_SPRITEF_SHIFT		16
-#define DDL_SPRITEE_PRECISION_64	(1<<15)
-#define DDL_SPRITEE_PRECISION_32	(0<<15)
-#define DDL_SPRITEE_SHIFT		8
-#define DDL_PLANEC_PRECISION_64		(1<<7)
-#define DDL_PLANEC_PRECISION_32		(0<<7)
-#define DDL_PLANEC_SHIFT		0
+#define VLV_DDL(pipe)			(VLV_DISPLAY_BASE + 0x70050 + 4 * (pipe))
+#define DDL_CURSOR_PRECISION_64		(1<<31)
+#define DDL_CURSOR_PRECISION_32		(0<<31)
+#define DDL_CURSOR_SHIFT		24
+#define DDL_SPRITE_PRECISION_64(sprite)	(1<<(15+8*(sprite)))
+#define DDL_SPRITE_PRECISION_32(sprite)	(0<<(15+8*(sprite)))
+#define DDL_SPRITE_SHIFT(sprite)	(8+8*(sprite))
+#define DDL_PLANE_PRECISION_64		(1<<7)
+#define DDL_PLANE_PRECISION_32		(0<<7)
+#define DDL_PLANE_SHIFT			0
+#define DRAIN_LATENCY_MASK		0x7f
 
 /* FIFO watermark sizes etc */
 #define G4X_FIFO_LINE_SIZE	64
@@ -4026,7 +4132,8 @@ enum punit_power_well {
 /* Old style CUR*CNTR flags (desktop 8xx) */
 #define   CURSOR_ENABLE		0x80000000
 #define   CURSOR_GAMMA_ENABLE	0x40000000
-#define   CURSOR_STRIDE_MASK	0x30000000
+#define   CURSOR_STRIDE_SHIFT	28
+#define   CURSOR_STRIDE(x)	((ffs(x)-9) << CURSOR_STRIDE_SHIFT) /* 256,512,1k,2k */
 #define   CURSOR_PIPE_CSC_ENABLE (1<<24)
 #define   CURSOR_FORMAT_SHIFT	24
 #define   CURSOR_FORMAT_MASK	(0x07 << CURSOR_FORMAT_SHIFT)
@@ -4195,6 +4302,7 @@ enum punit_power_well {
 #define   DVS_YUV_ORDER_UYVY	(1<<16)
 #define   DVS_YUV_ORDER_YVYU	(2<<16)
 #define   DVS_YUV_ORDER_VYUY	(3<<16)
+#define   DVS_ROTATE_180	(1<<15)
 #define   DVS_DEST_KEY		(1<<2)
 #define   DVS_TRICKLE_FEED_DISABLE (1<<14)
 #define   DVS_TILED		(1<<10)
@@ -4265,6 +4373,7 @@ enum punit_power_well {
 #define   SPRITE_YUV_ORDER_UYVY		(1<<16)
 #define   SPRITE_YUV_ORDER_YVYU		(2<<16)
 #define   SPRITE_YUV_ORDER_VYUY		(3<<16)
+#define   SPRITE_ROTATE_180		(1<<15)
 #define   SPRITE_TRICKLE_FEED_DISABLE	(1<<14)
 #define   SPRITE_INT_GAMMA_ENABLE	(1<<13)
 #define   SPRITE_TILED			(1<<10)
@@ -4338,6 +4447,7 @@ enum punit_power_well {
 #define   SP_YUV_ORDER_UYVY		(1<<16)
 #define   SP_YUV_ORDER_YVYU		(2<<16)
 #define   SP_YUV_ORDER_VYUY		(3<<16)
+#define   SP_ROTATE_180			(1<<15)
 #define   SP_TILED			(1<<10)
 #define _SPALINOFF		(VLV_DISPLAY_BASE + 0x72184)
 #define _SPASTRIDE		(VLV_DISPLAY_BASE + 0x72188)
@@ -5407,7 +5517,6 @@ enum punit_power_well {
 #define   VLV_GTLC_ALLOWWAKEERR			(1 << 1)
 #define   VLV_GTLC_PW_MEDIA_STATUS_MASK		(1 << 5)
 #define   VLV_GTLC_PW_RENDER_STATUS_MASK	(1 << 7)
-#define VLV_GTLC_SURVIVABILITY_REG              0x130098
 #define  FORCEWAKE_MT				0xa188 /* multi-threaded */
 #define   FORCEWAKE_KERNEL			0x1
 #define   FORCEWAKE_USER			0x2
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index eee79e1c3222..32d22339c130 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -976,12 +976,10 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port,
 	if (bdb->version >= 158) {
 		/* The VBT HDMI level shift values match the table we have. */
 		hdmi_level_shift = child->raw[7] & 0xF;
-		if (hdmi_level_shift < 0xC) {
-			DRM_DEBUG_KMS("VBT HDMI level shift for port %c: %d\n",
-				      port_name(port),
-				      hdmi_level_shift);
-			info->hdmi_level_shift = hdmi_level_shift;
-		}
+		DRM_DEBUG_KMS("VBT HDMI level shift for port %c: %d\n",
+			      port_name(port),
+			      hdmi_level_shift);
+		info->hdmi_level_shift = hdmi_level_shift;
 	}
 }
 
@@ -1114,8 +1112,7 @@ init_vbt_defaults(struct drm_i915_private *dev_priv)
 		struct ddi_vbt_port_info *info =
 			&dev_priv->vbt.ddi_port_info[port];
 
-		/* Recommended BSpec default: 800mV 0dB. */
-		info->hdmi_level_shift = 6;
+		info->hdmi_level_shift = HDMI_LEVEL_SHIFT_UNKNOWN;
 
 		info->supports_dvi = (port != PORT_A && port != PORT_E);
 		info->supports_hdmi = info->supports_dvi;
diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h
index b98667796337..905999bee2ac 100644
--- a/drivers/gpu/drm/i915/intel_bios.h
+++ b/drivers/gpu/drm/i915/intel_bios.h
@@ -802,7 +802,8 @@ struct mipi_config {
 
 	u16 rsvd4;
 
-	u8 rsvd5[5];
+	u8 rsvd5;
+	u32 target_burst_mode_freq;
 	u32 dsi_ddr_clk;
 	u32 bridge_ref_clk;
 
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 5db0b5552e39..02d55843c78d 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -33,7 +33,7 @@
  * automatically adapt to HDMI connections as well
  */
 static const u32 hsw_ddi_translations_dp[] = {
-	0x00FFFFFF, 0x0006000E,		/* DP parameters */
+	0x00FFFFFF, 0x0006000E,
 	0x00D75FFF, 0x0005000A,
 	0x00C30FFF, 0x00040006,
 	0x80AAAFFF, 0x000B0000,
@@ -45,7 +45,7 @@ static const u32 hsw_ddi_translations_dp[] = {
 };
 
 static const u32 hsw_ddi_translations_fdi[] = {
-	0x00FFFFFF, 0x0007000E,		/* FDI parameters */
+	0x00FFFFFF, 0x0007000E,
 	0x00D75FFF, 0x000F000A,
 	0x00C30FFF, 0x00060006,
 	0x00AAAFFF, 0x001E0000,
@@ -73,7 +73,7 @@ static const u32 hsw_ddi_translations_hdmi[] = {
 };
 
 static const u32 bdw_ddi_translations_edp[] = {
-	0x00FFFFFF, 0x00000012,		/* eDP parameters */
+	0x00FFFFFF, 0x00000012,
 	0x00EBAFFF, 0x00020011,
 	0x00C71FFF, 0x0006000F,
 	0x00AAAFFF, 0x000E000A,
@@ -82,11 +82,10 @@ static const u32 bdw_ddi_translations_edp[] = {
 	0x00BEEFFF, 0x000A000C,
 	0x00FFFFFF, 0x0005000F,
 	0x00DB6FFF, 0x000A000C,
-	0x00FFFFFF, 0x00140006		/* HDMI parameters 800mV 0dB*/
 };
 
 static const u32 bdw_ddi_translations_dp[] = {
-	0x00FFFFFF, 0x0007000E,		/* DP parameters */
+	0x00FFFFFF, 0x0007000E,
 	0x00D75FFF, 0x000E000A,
 	0x00BEFFFF, 0x00140006,
 	0x80B2CFFF, 0x001B0002,
@@ -95,11 +94,10 @@ static const u32 bdw_ddi_translations_dp[] = {
 	0x80CB2FFF, 0x001B0002,
 	0x00F7DFFF, 0x00180004,
 	0x80D75FFF, 0x001B0002,
-	0x00FFFFFF, 0x00140006		/* HDMI parameters 800mV 0dB*/
 };
 
 static const u32 bdw_ddi_translations_fdi[] = {
-	0x00FFFFFF, 0x0001000E,		/* FDI parameters */
+	0x00FFFFFF, 0x0001000E,
 	0x00D75FFF, 0x0004000A,
 	0x00C30FFF, 0x00070006,
 	0x00AAAFFF, 0x000C0000,
@@ -108,7 +106,20 @@ static const u32 bdw_ddi_translations_fdi[] = {
 	0x00C30FFF, 0x000C0000,
 	0x00FFFFFF, 0x00070006,
 	0x00D75FFF, 0x000C0000,
-	0x00FFFFFF, 0x00140006		/* HDMI parameters 800mV 0dB*/
+};
+
+static const u32 bdw_ddi_translations_hdmi[] = {
+				/* Idx	NT mV diff	T mV diff	db  */
+	0x00FFFFFF, 0x0007000E, /* 0:	400		400		0   */
+	0x00D75FFF, 0x000E000A, /* 1:	400		600		3.5 */
+	0x00BEFFFF, 0x00140006, /* 2:	400		800		6   */
+	0x00FFFFFF, 0x0009000D, /* 3:	450		450		0   */
+	0x00FFFFFF, 0x000E000A, /* 4:	600		600		0   */
+	0x00D7FFFF, 0x00140006, /* 5:	600		800		2.5 */
+	0x80CB2FFF, 0x001B0002, /* 6:	600		1000		4.5 */
+	0x00FFFFFF, 0x00140006, /* 7:	800		800		0   */
+	0x80E79FFF, 0x001B0002, /* 8:	800		1000		2   */
+	0x80FFFFFF, 0x001B0002, /* 9:	1000		1000		0   */
 };
 
 enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder)
@@ -145,26 +156,36 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 reg;
-	int i;
+	int i, n_hdmi_entries, hdmi_800mV_0dB;
 	int hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift;
 	const u32 *ddi_translations_fdi;
 	const u32 *ddi_translations_dp;
 	const u32 *ddi_translations_edp;
+	const u32 *ddi_translations_hdmi;
 	const u32 *ddi_translations;
 
 	if (IS_BROADWELL(dev)) {
 		ddi_translations_fdi = bdw_ddi_translations_fdi;
 		ddi_translations_dp = bdw_ddi_translations_dp;
 		ddi_translations_edp = bdw_ddi_translations_edp;
+		ddi_translations_hdmi = bdw_ddi_translations_hdmi;
+		n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi) / 2;
+		hdmi_800mV_0dB = 7;
 	} else if (IS_HASWELL(dev)) {
 		ddi_translations_fdi = hsw_ddi_translations_fdi;
 		ddi_translations_dp = hsw_ddi_translations_dp;
 		ddi_translations_edp = hsw_ddi_translations_dp;
+		ddi_translations_hdmi = hsw_ddi_translations_hdmi;
+		n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi) / 2;
+		hdmi_800mV_0dB = 6;
 	} else {
 		WARN(1, "ddi translation table missing\n");
 		ddi_translations_edp = bdw_ddi_translations_dp;
 		ddi_translations_fdi = bdw_ddi_translations_fdi;
 		ddi_translations_dp = bdw_ddi_translations_dp;
+		ddi_translations_hdmi = bdw_ddi_translations_hdmi;
+		n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi) / 2;
+		hdmi_800mV_0dB = 7;
 	}
 
 	switch (port) {
@@ -193,9 +214,15 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port)
 		I915_WRITE(reg, ddi_translations[i]);
 		reg += 4;
 	}
+
+	/* Choose a good default if VBT is badly populated */
+	if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN ||
+	    hdmi_level >= n_hdmi_entries)
+		hdmi_level = hdmi_800mV_0dB;
+
 	/* Entry 9 is for HDMI: */
 	for (i = 0; i < 2; i++) {
-		I915_WRITE(reg, hsw_ddi_translations_hdmi[hdmi_level * 2 + i]);
+		I915_WRITE(reg, ddi_translations_hdmi[hdmi_level * 2 + i]);
 		reg += 4;
 	}
 }
@@ -587,8 +614,8 @@ static int intel_ddi_calc_wrpll_link(struct drm_i915_private *dev_priv,
 	return (refclk * n * 100) / (p * r);
 }
 
-void intel_ddi_clock_get(struct intel_encoder *encoder,
-			 struct intel_crtc_config *pipe_config)
+static void hsw_ddi_clock_get(struct intel_encoder *encoder,
+			      struct intel_crtc_config *pipe_config)
 {
 	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
 	int link_clock = 0;
@@ -643,9 +670,15 @@ void intel_ddi_clock_get(struct intel_encoder *encoder,
 		pipe_config->adjusted_mode.crtc_clock = pipe_config->port_clock;
 }
 
+void intel_ddi_clock_get(struct intel_encoder *encoder,
+			 struct intel_crtc_config *pipe_config)
+{
+	hsw_ddi_clock_get(encoder, pipe_config);
+}
+
 static void
-intel_ddi_calculate_wrpll(int clock /* in Hz */,
-			  unsigned *r2_out, unsigned *n2_out, unsigned *p_out)
+hsw_ddi_calculate_wrpll(int clock /* in Hz */,
+			unsigned *r2_out, unsigned *n2_out, unsigned *p_out)
 {
 	uint64_t freq2k;
 	unsigned p, n2, r2;
@@ -708,27 +741,17 @@ intel_ddi_calculate_wrpll(int clock /* in Hz */,
 	*r2_out = best.r2;
 }
 
-/*
- * Tries to find a PLL for the CRTC. If it finds, it increases the refcount and
- * stores it in intel_crtc->ddi_pll_sel, so other mode sets won't be able to
- * steal the selected PLL. You need to call intel_ddi_pll_enable to actually
- * enable the PLL.
- */
-bool intel_ddi_pll_select(struct intel_crtc *intel_crtc)
+static bool
+hsw_ddi_pll_select(struct intel_crtc *intel_crtc,
+		   struct intel_encoder *intel_encoder,
+		   int clock)
 {
-	struct drm_crtc *crtc = &intel_crtc->base;
-	struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc);
-	int type = intel_encoder->type;
-	int clock = intel_crtc->config.port_clock;
-
-	intel_put_shared_dpll(intel_crtc);
-
-	if (type == INTEL_OUTPUT_HDMI) {
+	if (intel_encoder->type == INTEL_OUTPUT_HDMI) {
 		struct intel_shared_dpll *pll;
 		uint32_t val;
 		unsigned p, n2, r2;
 
-		intel_ddi_calculate_wrpll(clock * 1000, &r2, &n2, &p);
+		hsw_ddi_calculate_wrpll(clock * 1000, &r2, &n2, &p);
 
 		val = WRPLL_PLL_ENABLE | WRPLL_PLL_LCPLL |
 		      WRPLL_DIVIDER_REFERENCE(r2) | WRPLL_DIVIDER_FEEDBACK(n2) |
@@ -749,6 +772,25 @@ bool intel_ddi_pll_select(struct intel_crtc *intel_crtc)
 	return true;
 }
 
+
+/*
+ * Tries to find a *shared* PLL for the CRTC and store it in
+ * intel_crtc->ddi_pll_sel.
+ *
+ * For private DPLLs, compute_config() should do the selection for us. This
+ * function should be folded into compute_config() eventually.
+ */
+bool intel_ddi_pll_select(struct intel_crtc *intel_crtc)
+{
+	struct drm_crtc *crtc = &intel_crtc->base;
+	struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc);
+	int clock = intel_crtc->config.port_clock;
+
+	intel_put_shared_dpll(intel_crtc);
+
+	return hsw_ddi_pll_select(intel_crtc, intel_encoder, clock);
+}
+
 void intel_ddi_set_pipe_settings(struct drm_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
@@ -1183,31 +1225,52 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder)
 	}
 }
 
-int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv)
+static int bdw_get_cdclk_freq(struct drm_i915_private *dev_priv)
+{
+	uint32_t lcpll = I915_READ(LCPLL_CTL);
+	uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK;
+
+	if (lcpll & LCPLL_CD_SOURCE_FCLK)
+		return 800000;
+	else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT)
+		return 450000;
+	else if (freq == LCPLL_CLK_FREQ_450)
+		return 450000;
+	else if (freq == LCPLL_CLK_FREQ_54O_BDW)
+		return 540000;
+	else if (freq == LCPLL_CLK_FREQ_337_5_BDW)
+		return 337500;
+	else
+		return 675000;
+}
+
+static int hsw_get_cdclk_freq(struct drm_i915_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
 	uint32_t lcpll = I915_READ(LCPLL_CTL);
 	uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK;
 
-	if (lcpll & LCPLL_CD_SOURCE_FCLK) {
+	if (lcpll & LCPLL_CD_SOURCE_FCLK)
 		return 800000;
-	} else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT) {
+	else if (I915_READ(FUSE_STRAP) & HSW_CDCLK_LIMIT)
 		return 450000;
-	} else if (freq == LCPLL_CLK_FREQ_450) {
+	else if (freq == LCPLL_CLK_FREQ_450)
 		return 450000;
-	} else if (IS_HASWELL(dev)) {
-		if (IS_ULT(dev))
-			return 337500;
-		else
-			return 540000;
-	} else {
-		if (freq == LCPLL_CLK_FREQ_54O_BDW)
-			return 540000;
-		else if (freq == LCPLL_CLK_FREQ_337_5_BDW)
-			return 337500;
-		else
-			return 675000;
-	}
+	else if (IS_ULT(dev))
+		return 337500;
+	else
+		return 540000;
+}
+
+int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+
+	if (IS_BROADWELL(dev))
+		return bdw_get_cdclk_freq(dev_priv);
+
+	/* Haswell */
+	return hsw_get_cdclk_freq(dev_priv);
 }
 
 static void hsw_ddi_pll_enable(struct drm_i915_private *dev_priv,
@@ -1248,10 +1311,8 @@ static const char * const hsw_ddi_pll_names[] = {
 	"WRPLL 2",
 };
 
-void intel_ddi_pll_init(struct drm_device *dev)
+static void hsw_shared_dplls_init(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	uint32_t val = I915_READ(LCPLL_CTL);
 	int i;
 
 	dev_priv->num_shared_dpll = 2;
@@ -1264,6 +1325,14 @@ void intel_ddi_pll_init(struct drm_device *dev)
 		dev_priv->shared_dplls[i].get_hw_state =
 			hsw_ddi_pll_get_hw_state;
 	}
+}
+
+void intel_ddi_pll_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t val = I915_READ(LCPLL_CTL);
+
+	hsw_shared_dplls_init(dev_priv);
 
 	/* The LCPLL register should be turned on by the BIOS. For now let's
 	 * just check its state and print errors in case something is wrong.
@@ -1444,7 +1513,7 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 		dev_priv->vbt.edp_bpp = pipe_config->pipe_bpp;
 	}
 
-	intel_ddi_clock_get(encoder, pipe_config);
+	hsw_ddi_clock_get(encoder, pipe_config);
 }
 
 static void intel_ddi_destroy(struct drm_encoder *encoder)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index d8324c69fa86..32ede71fbe7f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -91,15 +91,16 @@ static int intel_framebuffer_init(struct drm_device *dev,
 				  struct intel_framebuffer *ifb,
 				  struct drm_mode_fb_cmd2 *mode_cmd,
 				  struct drm_i915_gem_object *obj);
-static void intel_dp_set_m_n(struct intel_crtc *crtc);
 static void i9xx_set_pipeconf(struct intel_crtc *intel_crtc);
 static void intel_set_pipe_timings(struct intel_crtc *intel_crtc);
 static void intel_cpu_transcoder_set_m_n(struct intel_crtc *crtc,
-					 struct intel_link_m_n *m_n);
+					 struct intel_link_m_n *m_n,
+					 struct intel_link_m_n *m2_n2);
 static void ironlake_set_pipeconf(struct drm_crtc *crtc);
 static void haswell_set_pipeconf(struct drm_crtc *crtc);
 static void intel_set_pipe_csc(struct drm_crtc *crtc);
 static void vlv_prepare_pll(struct intel_crtc *crtc);
+static void chv_prepare_pll(struct intel_crtc *crtc);
 
 static struct intel_encoder *intel_find_encoder(struct intel_connector *connector, int pipe)
 {
@@ -1341,6 +1342,12 @@ static void assert_sprites_disabled(struct drm_i915_private *dev_priv,
 	}
 }
 
+static void assert_vblank_disabled(struct drm_crtc *crtc)
+{
+	if (WARN_ON(drm_crtc_vblank_get(crtc) == 0))
+		drm_crtc_vblank_put(crtc);
+}
+
 static void ibx_assert_pch_refclk_enabled(struct drm_i915_private *dev_priv)
 {
 	u32 val;
@@ -1513,34 +1520,6 @@ static void intel_init_dpio(struct drm_device *dev)
 	}
 }
 
-static void intel_reset_dpio(struct drm_device *dev)
-{
-	struct drm_i915_private *dev_priv = dev->dev_private;
-
-	if (IS_CHERRYVIEW(dev)) {
-		enum dpio_phy phy;
-		u32 val;
-
-		for (phy = DPIO_PHY0; phy < I915_NUM_PHYS_VLV; phy++) {
-			/* Poll for phypwrgood signal */
-			if (wait_for(I915_READ(DISPLAY_PHY_STATUS) &
-						PHY_POWERGOOD(phy), 1))
-				DRM_ERROR("Display PHY %d is not power up\n", phy);
-
-			/*
-			 * Deassert common lane reset for PHY.
-			 *
-			 * This should only be done on init and resume from S3
-			 * with both PLLs disabled, or we risk losing DPIO and
-			 * PLL synchronization.
-			 */
-			val = I915_READ(DISPLAY_PHY_CONTROL);
-			I915_WRITE(DISPLAY_PHY_CONTROL,
-				PHY_COM_LANE_RESET_DEASSERT(phy, val));
-		}
-	}
-}
-
 static void vlv_enable_pll(struct intel_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
@@ -1712,7 +1691,7 @@ static void chv_disable_pll(struct drm_i915_private *dev_priv, enum pipe pipe)
 	assert_pipe_disabled(dev_priv, pipe);
 
 	/* Set PLL en = 0 */
-	val = DPLL_SSC_REF_CLOCK_CHV;
+	val = DPLL_SSC_REF_CLOCK_CHV | DPLL_REFA_CLK_ENABLE_VLV;
 	if (pipe != PIPE_A)
 		val |= DPLL_INTEGRATED_CRI_CLK_VLV;
 	I915_WRITE(DPLL(pipe), val);
@@ -1806,7 +1785,7 @@ static void intel_enable_shared_dpll(struct intel_crtc *crtc)
 	if (WARN_ON(pll->refcount == 0))
 		return;
 
-	DRM_DEBUG_KMS("enable %s (active %d, on? %d)for crtc %d\n",
+	DRM_DEBUG_KMS("enable %s (active %d, on? %d) for crtc %d\n",
 		      pll->name, pll->active, pll->on,
 		      crtc->base.base.id);
 
@@ -1824,7 +1803,7 @@ static void intel_enable_shared_dpll(struct intel_crtc *crtc)
 	pll->on = true;
 }
 
-void intel_disable_shared_dpll(struct intel_crtc *crtc)
+static void intel_disable_shared_dpll(struct intel_crtc *crtc)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2109,35 +2088,28 @@ void intel_flush_primary_plane(struct drm_i915_private *dev_priv,
 
 /**
  * intel_enable_primary_hw_plane - enable the primary plane on a given pipe
- * @dev_priv: i915 private structure
- * @plane: plane to enable
- * @pipe: pipe being fed
+ * @plane:  plane to be enabled
+ * @crtc: crtc for the plane
  *
- * Enable @plane on @pipe, making sure that @pipe is running first.
+ * Enable @plane on @crtc, making sure that the pipe is running first.
  */
-static void intel_enable_primary_hw_plane(struct drm_i915_private *dev_priv,
-					  enum plane plane, enum pipe pipe)
+static void intel_enable_primary_hw_plane(struct drm_plane *plane,
+					  struct drm_crtc *crtc)
 {
-	struct drm_device *dev = dev_priv->dev;
-	struct intel_crtc *intel_crtc =
-		to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]);
-	int reg;
-	u32 val;
+	struct drm_device *dev = plane->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
 	/* If the pipe isn't enabled, we can't pump pixels and may hang */
-	assert_pipe_enabled(dev_priv, pipe);
+	assert_pipe_enabled(dev_priv, intel_crtc->pipe);
 
 	if (intel_crtc->primary_enabled)
 		return;
 
 	intel_crtc->primary_enabled = true;
 
-	reg = DSPCNTR(plane);
-	val = I915_READ(reg);
-	WARN_ON(val & DISPLAY_PLANE_ENABLE);
-
-	I915_WRITE(reg, val | DISPLAY_PLANE_ENABLE);
-	intel_flush_primary_plane(dev_priv, plane);
+	dev_priv->display.update_primary_plane(crtc, plane->fb,
+					       crtc->x, crtc->y);
 
 	/*
 	 * BDW signals flip done immediately if the plane
@@ -2150,31 +2122,27 @@ static void intel_enable_primary_hw_plane(struct drm_i915_private *dev_priv,
 
 /**
  * intel_disable_primary_hw_plane - disable the primary hardware plane
- * @dev_priv: i915 private structure
- * @plane: plane to disable
- * @pipe: pipe consuming the data
+ * @plane: plane to be disabled
+ * @crtc: crtc for the plane
  *
- * Disable @plane; should be an independent operation.
+ * Disable @plane on @crtc, making sure that the pipe is running first.
  */
-static void intel_disable_primary_hw_plane(struct drm_i915_private *dev_priv,
-					   enum plane plane, enum pipe pipe)
+static void intel_disable_primary_hw_plane(struct drm_plane *plane,
+					   struct drm_crtc *crtc)
 {
-	struct intel_crtc *intel_crtc =
-		to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]);
-	int reg;
-	u32 val;
+	struct drm_device *dev = plane->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+
+	assert_pipe_enabled(dev_priv, intel_crtc->pipe);
 
 	if (!intel_crtc->primary_enabled)
 		return;
 
 	intel_crtc->primary_enabled = false;
 
-	reg = DSPCNTR(plane);
-	val = I915_READ(reg);
-	WARN_ON((val & DISPLAY_PLANE_ENABLE) == 0);
-
-	I915_WRITE(reg, val & ~DISPLAY_PLANE_ENABLE);
-	intel_flush_primary_plane(dev_priv, plane);
+	dev_priv->display.update_primary_plane(crtc, plane->fb,
+					       crtc->x, crtc->y);
 }
 
 static bool need_vtd_wa(struct drm_device *dev)
@@ -2426,12 +2394,35 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc,
 	int plane = intel_crtc->plane;
 	unsigned long linear_offset;
 	u32 dspcntr;
-	u32 reg;
+	u32 reg = DSPCNTR(plane);
+
+	if (!intel_crtc->primary_enabled) {
+		I915_WRITE(reg, 0);
+		if (INTEL_INFO(dev)->gen >= 4)
+			I915_WRITE(DSPSURF(plane), 0);
+		else
+			I915_WRITE(DSPADDR(plane), 0);
+		POSTING_READ(reg);
+		return;
+	}
+
+	dspcntr = DISPPLANE_GAMMA_ENABLE;
+
+	dspcntr |= DISPLAY_PLANE_ENABLE;
+
+	if (INTEL_INFO(dev)->gen < 4) {
+		if (intel_crtc->pipe == PIPE_B)
+			dspcntr |= DISPPLANE_SEL_PIPE_B;
+
+		/* pipesrc and dspsize control the size that is scaled from,
+		 * which should always be the user's requested size.
+		 */
+		I915_WRITE(DSPSIZE(plane),
+			   ((intel_crtc->config.pipe_src_h - 1) << 16) |
+			   (intel_crtc->config.pipe_src_w - 1));
+		I915_WRITE(DSPPOS(plane), 0);
+	}
 
-	reg = DSPCNTR(plane);
-	dspcntr = I915_READ(reg);
-	/* Mask out pixel format bits in case we change it */
-	dspcntr &= ~DISPPLANE_PIXFORMAT_MASK;
 	switch (fb->pixel_format) {
 	case DRM_FORMAT_C8:
 		dspcntr |= DISPPLANE_8BPP;
@@ -2463,12 +2454,9 @@ static void i9xx_update_primary_plane(struct drm_crtc *crtc,
 		BUG();
 	}
 
-	if (INTEL_INFO(dev)->gen >= 4) {
-		if (obj->tiling_mode != I915_TILING_NONE)
-			dspcntr |= DISPPLANE_TILED;
-		else
-			dspcntr &= ~DISPPLANE_TILED;
-	}
+	if (INTEL_INFO(dev)->gen >= 4 &&
+	    obj->tiling_mode != I915_TILING_NONE)
+		dspcntr |= DISPPLANE_TILED;
 
 	if (IS_G4X(dev))
 		dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
@@ -2512,12 +2500,22 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc,
 	int plane = intel_crtc->plane;
 	unsigned long linear_offset;
 	u32 dspcntr;
-	u32 reg;
+	u32 reg = DSPCNTR(plane);
+
+	if (!intel_crtc->primary_enabled) {
+		I915_WRITE(reg, 0);
+		I915_WRITE(DSPSURF(plane), 0);
+		POSTING_READ(reg);
+		return;
+	}
+
+	dspcntr = DISPPLANE_GAMMA_ENABLE;
+
+	dspcntr |= DISPLAY_PLANE_ENABLE;
+
+	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
+		dspcntr |= DISPPLANE_PIPE_CSC_ENABLE;
 
-	reg = DSPCNTR(plane);
-	dspcntr = I915_READ(reg);
-	/* Mask out pixel format bits in case we change it */
-	dspcntr &= ~DISPPLANE_PIXFORMAT_MASK;
 	switch (fb->pixel_format) {
 	case DRM_FORMAT_C8:
 		dspcntr |= DISPPLANE_8BPP;
@@ -2547,12 +2545,8 @@ static void ironlake_update_primary_plane(struct drm_crtc *crtc,
 
 	if (obj->tiling_mode != I915_TILING_NONE)
 		dspcntr |= DISPPLANE_TILED;
-	else
-		dspcntr &= ~DISPPLANE_TILED;
 
-	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
-		dspcntr &= ~DISPPLANE_TRICKLE_FEED_DISABLE;
-	else
+	if (!IS_HASWELL(dev) && !IS_BROADWELL(dev))
 		dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
 
 	I915_WRITE(reg, dspcntr);
@@ -3911,14 +3905,14 @@ static void intel_crtc_dpms_overlay(struct intel_crtc *intel_crtc, bool enable)
 static void intel_crtc_enable_planes(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	int plane = intel_crtc->plane;
+
+	assert_vblank_disabled(crtc);
 
 	drm_vblank_on(dev, pipe);
 
-	intel_enable_primary_hw_plane(dev_priv, plane, pipe);
+	intel_enable_primary_hw_plane(crtc->primary, crtc);
 	intel_enable_planes(crtc);
 	intel_crtc_update_cursor(crtc, true);
 	intel_crtc_dpms_overlay(intel_crtc, true);
@@ -3955,7 +3949,7 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc)
 	intel_crtc_dpms_overlay(intel_crtc, false);
 	intel_crtc_update_cursor(crtc, false);
 	intel_disable_planes(crtc);
-	intel_disable_primary_hw_plane(dev_priv, plane, pipe);
+	intel_disable_primary_hw_plane(crtc->primary, crtc);
 
 	/*
 	 * FIXME: Once we grow proper nuclear flip support out of this we need
@@ -3965,6 +3959,8 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc)
 	intel_frontbuffer_flip(dev, INTEL_FRONTBUFFER_ALL_MASK(pipe));
 
 	drm_vblank_off(dev, pipe);
+
+	assert_vblank_disabled(crtc);
 }
 
 static void ironlake_crtc_enable(struct drm_crtc *crtc)
@@ -3974,7 +3970,6 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_encoder *encoder;
 	int pipe = intel_crtc->pipe;
-	enum plane plane = intel_crtc->plane;
 
 	WARN_ON(!crtc->enabled);
 
@@ -3991,18 +3986,11 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
 
 	if (intel_crtc->config.has_pch_encoder) {
 		intel_cpu_transcoder_set_m_n(intel_crtc,
-					     &intel_crtc->config.fdi_m_n);
+				     &intel_crtc->config.fdi_m_n, NULL);
 	}
 
 	ironlake_set_pipeconf(crtc);
 
-	/* Set up the display plane register */
-	I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE);
-	POSTING_READ(DSPCNTR(plane));
-
-	dev_priv->display.update_primary_plane(crtc, crtc->primary->fb,
-					       crtc->x, crtc->y);
-
 	intel_crtc->active = true;
 
 	intel_set_cpu_fifo_underrun_reporting(dev, pipe, true);
@@ -4087,7 +4075,6 @@ static void haswell_crtc_enable(struct drm_crtc *crtc)
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_encoder *encoder;
 	int pipe = intel_crtc->pipe;
-	enum plane plane = intel_crtc->plane;
 
 	WARN_ON(!crtc->enabled);
 
@@ -4104,20 +4091,13 @@ static void haswell_crtc_enable(struct drm_crtc *crtc)
 
 	if (intel_crtc->config.has_pch_encoder) {
 		intel_cpu_transcoder_set_m_n(intel_crtc,
-					     &intel_crtc->config.fdi_m_n);
+				     &intel_crtc->config.fdi_m_n, NULL);
 	}
 
 	haswell_set_pipeconf(crtc);
 
 	intel_set_pipe_csc(crtc);
 
-	/* Set up the display plane register */
-	I915_WRITE(DSPCNTR(plane), DISPPLANE_GAMMA_ENABLE | DISPPLANE_PIPE_CSC_ENABLE);
-	POSTING_READ(DSPCNTR(plane));
-
-	dev_priv->display.update_primary_plane(crtc, crtc->primary->fb,
-					       crtc->x, crtc->y);
-
 	intel_crtc->active = true;
 
 	intel_set_cpu_fifo_underrun_reporting(dev, pipe, true);
@@ -4539,12 +4519,57 @@ static void valleyview_set_cdclk(struct drm_device *dev, int cdclk)
 	vlv_update_cdclk(dev);
 }
 
+static void cherryview_set_cdclk(struct drm_device *dev, int cdclk)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 val, cmd;
+
+	WARN_ON(dev_priv->display.get_display_clock_speed(dev) != dev_priv->vlv_cdclk_freq);
+
+	switch (cdclk) {
+	case 400000:
+		cmd = 3;
+		break;
+	case 333333:
+	case 320000:
+		cmd = 2;
+		break;
+	case 266667:
+		cmd = 1;
+		break;
+	case 200000:
+		cmd = 0;
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	mutex_lock(&dev_priv->rps.hw_lock);
+	val = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
+	val &= ~DSPFREQGUAR_MASK_CHV;
+	val |= (cmd << DSPFREQGUAR_SHIFT_CHV);
+	vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, val);
+	if (wait_for((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) &
+		      DSPFREQSTAT_MASK_CHV) == (cmd << DSPFREQSTAT_SHIFT_CHV),
+		     50)) {
+		DRM_ERROR("timed out waiting for CDclk change\n");
+	}
+	mutex_unlock(&dev_priv->rps.hw_lock);
+
+	vlv_update_cdclk(dev);
+}
+
 static int valleyview_calc_cdclk(struct drm_i915_private *dev_priv,
 				 int max_pixclk)
 {
 	int vco = valleyview_get_vco(dev_priv);
 	int freq_320 = (vco <<  1) % 320000 != 0 ? 333333 : 320000;
 
+	/* FIXME: Punit isn't quite ready yet */
+	if (IS_CHERRYVIEW(dev_priv->dev))
+		return 400000;
+
 	/*
 	 * Really only a few cases to deal with, as only 4 CDclks are supported:
 	 *   200MHz
@@ -4607,21 +4632,23 @@ static void valleyview_modeset_global_resources(struct drm_device *dev)
 	int max_pixclk = intel_mode_max_pixclk(dev_priv);
 	int req_cdclk = valleyview_calc_cdclk(dev_priv, max_pixclk);
 
-	if (req_cdclk != dev_priv->vlv_cdclk_freq)
-		valleyview_set_cdclk(dev, req_cdclk);
+	if (req_cdclk != dev_priv->vlv_cdclk_freq) {
+		if (IS_CHERRYVIEW(dev))
+			cherryview_set_cdclk(dev, req_cdclk);
+		else
+			valleyview_set_cdclk(dev, req_cdclk);
+	}
+
 	modeset_update_crtc_power_domains(dev);
 }
 
 static void valleyview_crtc_enable(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_encoder *encoder;
 	int pipe = intel_crtc->pipe;
-	int plane = intel_crtc->plane;
 	bool is_dsi;
-	u32 dspcntr;
 
 	WARN_ON(!crtc->enabled);
 
@@ -4630,33 +4657,20 @@ static void valleyview_crtc_enable(struct drm_crtc *crtc)
 
 	is_dsi = intel_pipe_has_type(crtc, INTEL_OUTPUT_DSI);
 
-	if (!is_dsi && !IS_CHERRYVIEW(dev))
-		vlv_prepare_pll(intel_crtc);
-
-	/* Set up the display plane register */
-	dspcntr = DISPPLANE_GAMMA_ENABLE;
+	if (!is_dsi) {
+		if (IS_CHERRYVIEW(dev))
+			chv_prepare_pll(intel_crtc);
+		else
+			vlv_prepare_pll(intel_crtc);
+	}
 
 	if (intel_crtc->config.has_dp_encoder)
 		intel_dp_set_m_n(intel_crtc);
 
 	intel_set_pipe_timings(intel_crtc);
 
-	/* pipesrc and dspsize control the size that is scaled from,
-	 * which should always be the user's requested size.
-	 */
-	I915_WRITE(DSPSIZE(plane),
-		   ((intel_crtc->config.pipe_src_h - 1) << 16) |
-		   (intel_crtc->config.pipe_src_w - 1));
-	I915_WRITE(DSPPOS(plane), 0);
-
 	i9xx_set_pipeconf(intel_crtc);
 
-	I915_WRITE(DSPCNTR(plane), dspcntr);
-	POSTING_READ(DSPCNTR(plane));
-
-	dev_priv->display.update_primary_plane(crtc, crtc->primary->fb,
-					       crtc->x, crtc->y);
-
 	intel_crtc->active = true;
 
 	intel_set_cpu_fifo_underrun_reporting(dev, pipe, true);
@@ -4704,12 +4718,9 @@ static void i9xx_set_pll_dividers(struct intel_crtc *crtc)
 static void i9xx_crtc_enable(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	struct intel_encoder *encoder;
 	int pipe = intel_crtc->pipe;
-	int plane = intel_crtc->plane;
-	u32 dspcntr;
 
 	WARN_ON(!crtc->enabled);
 
@@ -4718,35 +4729,13 @@ static void i9xx_crtc_enable(struct drm_crtc *crtc)
 
 	i9xx_set_pll_dividers(intel_crtc);
 
-	/* Set up the display plane register */
-	dspcntr = DISPPLANE_GAMMA_ENABLE;
-
-	if (pipe == 0)
-		dspcntr &= ~DISPPLANE_SEL_PIPE_MASK;
-	else
-		dspcntr |= DISPPLANE_SEL_PIPE_B;
-
 	if (intel_crtc->config.has_dp_encoder)
 		intel_dp_set_m_n(intel_crtc);
 
 	intel_set_pipe_timings(intel_crtc);
 
-	/* pipesrc and dspsize control the size that is scaled from,
-	 * which should always be the user's requested size.
-	 */
-	I915_WRITE(DSPSIZE(plane),
-		   ((intel_crtc->config.pipe_src_h - 1) << 16) |
-		   (intel_crtc->config.pipe_src_w - 1));
-	I915_WRITE(DSPPOS(plane), 0);
-
 	i9xx_set_pipeconf(intel_crtc);
 
-	I915_WRITE(DSPCNTR(plane), dspcntr);
-	POSTING_READ(DSPCNTR(plane));
-
-	dev_priv->display.update_primary_plane(crtc, crtc->primary->fb,
-					       crtc->x, crtc->y);
-
 	intel_crtc->active = true;
 
 	if (!IS_GEN2(dev))
@@ -5275,6 +5264,10 @@ static int valleyview_get_display_clock_speed(struct drm_device *dev)
 	u32 val;
 	int divider;
 
+	/* FIXME: Punit isn't quite ready yet */
+	if (IS_CHERRYVIEW(dev))
+		return 400000;
+
 	mutex_lock(&dev_priv->dpio_lock);
 	val = vlv_cck_read(dev_priv, CCK_DISPLAY_CLOCK_CONTROL);
 	mutex_unlock(&dev_priv->dpio_lock);
@@ -5519,7 +5512,8 @@ static void intel_pch_transcoder_set_m_n(struct intel_crtc *crtc,
 }
 
 static void intel_cpu_transcoder_set_m_n(struct intel_crtc *crtc,
-					 struct intel_link_m_n *m_n)
+					 struct intel_link_m_n *m_n,
+					 struct intel_link_m_n *m2_n2)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -5531,6 +5525,18 @@ static void intel_cpu_transcoder_set_m_n(struct intel_crtc *crtc,
 		I915_WRITE(PIPE_DATA_N1(transcoder), m_n->gmch_n);
 		I915_WRITE(PIPE_LINK_M1(transcoder), m_n->link_m);
 		I915_WRITE(PIPE_LINK_N1(transcoder), m_n->link_n);
+		/* M2_N2 registers to be set only for gen < 8 (M2_N2 available
+		 * for gen < 8) and if DRRS is supported (to make sure the
+		 * registers are not unnecessarily accessed).
+		 */
+		if (m2_n2 && INTEL_INFO(dev)->gen < 8 &&
+			crtc->config.has_drrs) {
+			I915_WRITE(PIPE_DATA_M2(transcoder),
+					TU_SIZE(m2_n2->tu) | m2_n2->gmch_m);
+			I915_WRITE(PIPE_DATA_N2(transcoder), m2_n2->gmch_n);
+			I915_WRITE(PIPE_LINK_M2(transcoder), m2_n2->link_m);
+			I915_WRITE(PIPE_LINK_N2(transcoder), m2_n2->link_n);
+		}
 	} else {
 		I915_WRITE(PIPE_DATA_M_G4X(pipe), TU_SIZE(m_n->tu) | m_n->gmch_m);
 		I915_WRITE(PIPE_DATA_N_G4X(pipe), m_n->gmch_n);
@@ -5539,12 +5545,13 @@ static void intel_cpu_transcoder_set_m_n(struct intel_crtc *crtc,
 	}
 }
 
-static void intel_dp_set_m_n(struct intel_crtc *crtc)
+void intel_dp_set_m_n(struct intel_crtc *crtc)
 {
 	if (crtc->config.has_pch_encoder)
 		intel_pch_transcoder_set_m_n(crtc, &crtc->config.dp_m_n);
 	else
-		intel_cpu_transcoder_set_m_n(crtc, &crtc->config.dp_m_n);
+		intel_cpu_transcoder_set_m_n(crtc, &crtc->config.dp_m_n,
+						   &crtc->config.dp_m2_n2);
 }
 
 static void vlv_update_pll(struct intel_crtc *crtc)
@@ -5662,6 +5669,18 @@ static void vlv_prepare_pll(struct intel_crtc *crtc)
 
 static void chv_update_pll(struct intel_crtc *crtc)
 {
+	crtc->config.dpll_hw_state.dpll = DPLL_SSC_REF_CLOCK_CHV |
+		DPLL_REFA_CLK_ENABLE_VLV | DPLL_VGA_MODE_DIS |
+		DPLL_VCO_ENABLE;
+	if (crtc->pipe != PIPE_A)
+		crtc->config.dpll_hw_state.dpll |= DPLL_INTEGRATED_CRI_CLK_VLV;
+
+	crtc->config.dpll_hw_state.dpll_md =
+		(crtc->config.pixel_multiplier - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT;
+}
+
+static void chv_prepare_pll(struct intel_crtc *crtc)
+{
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe = crtc->pipe;
@@ -5671,15 +5690,6 @@ static void chv_update_pll(struct intel_crtc *crtc)
 	u32 bestn, bestm1, bestm2, bestp1, bestp2, bestm2_frac;
 	int refclk;
 
-	crtc->config.dpll_hw_state.dpll = DPLL_SSC_REF_CLOCK_CHV |
-		DPLL_REFA_CLK_ENABLE_VLV | DPLL_VGA_MODE_DIS |
-		DPLL_VCO_ENABLE;
-	if (pipe != PIPE_A)
-		crtc->config.dpll_hw_state.dpll |= DPLL_INTEGRATED_CRI_CLK_VLV;
-
-	crtc->config.dpll_hw_state.dpll_md =
-		(crtc->config.pixel_multiplier - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT;
-
 	bestn = crtc->config.dpll.n;
 	bestm2_frac = crtc->config.dpll.m2 & 0x3fffff;
 	bestm1 = crtc->config.dpll.m1;
@@ -6235,7 +6245,7 @@ static void i9xx_get_plane_config(struct intel_crtc *crtc,
 	crtc->base.primary->fb->height = ((val >> 0) & 0xfff) + 1;
 
 	val = I915_READ(DSPSTRIDE(pipe));
-	crtc->base.primary->fb->pitches[0] = val & 0xffffff80;
+	crtc->base.primary->fb->pitches[0] = val & 0xffffffc0;
 
 	aligned_height = intel_align_height(dev, crtc->base.primary->fb->height,
 					    plane_config->tiled);
@@ -6367,7 +6377,6 @@ static bool i9xx_get_pipe_config(struct intel_crtc *crtc,
 static void ironlake_init_pch_refclk(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct drm_mode_config *mode_config = &dev->mode_config;
 	struct intel_encoder *encoder;
 	u32 val, final;
 	bool has_lvds = false;
@@ -6377,8 +6386,7 @@ static void ironlake_init_pch_refclk(struct drm_device *dev)
 	bool can_ssc = false;
 
 	/* We need to take the global config into account */
-	list_for_each_entry(encoder, &mode_config->encoder_list,
-			    base.head) {
+	for_each_intel_encoder(dev, encoder) {
 		switch (encoder->type) {
 		case INTEL_OUTPUT_LVDS:
 			has_panel = true;
@@ -6685,11 +6693,10 @@ static void lpt_disable_clkout_dp(struct drm_device *dev)
 
 static void lpt_init_pch_refclk(struct drm_device *dev)
 {
-	struct drm_mode_config *mode_config = &dev->mode_config;
 	struct intel_encoder *encoder;
 	bool has_vga = false;
 
-	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
+	for_each_intel_encoder(dev, encoder) {
 		switch (encoder->type) {
 		case INTEL_OUTPUT_ANALOG:
 			has_vga = true;
@@ -7145,7 +7152,8 @@ static void intel_pch_transcoder_get_m_n(struct intel_crtc *crtc,
 
 static void intel_cpu_transcoder_get_m_n(struct intel_crtc *crtc,
 					 enum transcoder transcoder,
-					 struct intel_link_m_n *m_n)
+					 struct intel_link_m_n *m_n,
+					 struct intel_link_m_n *m2_n2)
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -7159,6 +7167,20 @@ static void intel_cpu_transcoder_get_m_n(struct intel_crtc *crtc,
 		m_n->gmch_n = I915_READ(PIPE_DATA_N1(transcoder));
 		m_n->tu = ((I915_READ(PIPE_DATA_M1(transcoder))
 			    & TU_SIZE_MASK) >> TU_SIZE_SHIFT) + 1;
+		/* Read M2_N2 registers only for gen < 8 (M2_N2 available for
+		 * gen < 8) and if DRRS is supported (to make sure the
+		 * registers are not unnecessarily read).
+		 */
+		if (m2_n2 && INTEL_INFO(dev)->gen < 8 &&
+			crtc->config.has_drrs) {
+			m2_n2->link_m = I915_READ(PIPE_LINK_M2(transcoder));
+			m2_n2->link_n =	I915_READ(PIPE_LINK_N2(transcoder));
+			m2_n2->gmch_m =	I915_READ(PIPE_DATA_M2(transcoder))
+					& ~TU_SIZE_MASK;
+			m2_n2->gmch_n =	I915_READ(PIPE_DATA_N2(transcoder));
+			m2_n2->tu = ((I915_READ(PIPE_DATA_M2(transcoder))
+					& TU_SIZE_MASK) >> TU_SIZE_SHIFT) + 1;
+		}
 	} else {
 		m_n->link_m = I915_READ(PIPE_LINK_M_G4X(pipe));
 		m_n->link_n = I915_READ(PIPE_LINK_N_G4X(pipe));
@@ -7177,14 +7199,15 @@ void intel_dp_get_m_n(struct intel_crtc *crtc,
 		intel_pch_transcoder_get_m_n(crtc, &pipe_config->dp_m_n);
 	else
 		intel_cpu_transcoder_get_m_n(crtc, pipe_config->cpu_transcoder,
-					     &pipe_config->dp_m_n);
+					     &pipe_config->dp_m_n,
+					     &pipe_config->dp_m2_n2);
 }
 
 static void ironlake_get_fdi_m_n_config(struct intel_crtc *crtc,
 					struct intel_crtc_config *pipe_config)
 {
 	intel_cpu_transcoder_get_m_n(crtc, pipe_config->cpu_transcoder,
-				     &pipe_config->fdi_m_n);
+				     &pipe_config->fdi_m_n, NULL);
 }
 
 static void ironlake_get_pfit_config(struct intel_crtc *crtc,
@@ -7255,7 +7278,7 @@ static void ironlake_get_plane_config(struct intel_crtc *crtc,
 	crtc->base.primary->fb->height = ((val >> 0) & 0xfff) + 1;
 
 	val = I915_READ(DSPSTRIDE(pipe));
-	crtc->base.primary->fb->pitches[0] = val & 0xffffff80;
+	crtc->base.primary->fb->pitches[0] = val & 0xffffffc0;
 
 	aligned_height = intel_align_height(dev, crtc->base.primary->fb->height,
 					    plane_config->tiled);
@@ -7615,6 +7638,22 @@ static int haswell_crtc_mode_set(struct drm_crtc *crtc,
 	return 0;
 }
 
+static void haswell_get_ddi_pll(struct drm_i915_private *dev_priv,
+				enum port port,
+				struct intel_crtc_config *pipe_config)
+{
+	pipe_config->ddi_pll_sel = I915_READ(PORT_CLK_SEL(port));
+
+	switch (pipe_config->ddi_pll_sel) {
+	case PORT_CLK_SEL_WRPLL1:
+		pipe_config->shared_dpll = DPLL_ID_WRPLL1;
+		break;
+	case PORT_CLK_SEL_WRPLL2:
+		pipe_config->shared_dpll = DPLL_ID_WRPLL2;
+		break;
+	}
+}
+
 static void haswell_get_ddi_port_state(struct intel_crtc *crtc,
 				       struct intel_crtc_config *pipe_config)
 {
@@ -7628,16 +7667,7 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc,
 
 	port = (tmp & TRANS_DDI_PORT_MASK) >> TRANS_DDI_PORT_SHIFT;
 
-	pipe_config->ddi_pll_sel = I915_READ(PORT_CLK_SEL(port));
-
-	switch (pipe_config->ddi_pll_sel) {
-	case PORT_CLK_SEL_WRPLL1:
-		pipe_config->shared_dpll = DPLL_ID_WRPLL1;
-		break;
-	case PORT_CLK_SEL_WRPLL2:
-		pipe_config->shared_dpll = DPLL_ID_WRPLL2;
-		break;
-	}
+	haswell_get_ddi_pll(dev_priv, port, pipe_config);
 
 	if (pipe_config->shared_dpll >= 0) {
 		pll = &dev_priv->shared_dplls[pipe_config->shared_dpll];
@@ -8037,74 +8067,62 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base)
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	uint32_t cntl;
+	uint32_t cntl = 0, size = 0;
 
-	if (base != intel_crtc->cursor_base) {
-		/* On these chipsets we can only modify the base whilst
-		 * the cursor is disabled.
-		 */
-		if (intel_crtc->cursor_cntl) {
-			I915_WRITE(_CURACNTR, 0);
-			POSTING_READ(_CURACNTR);
-			intel_crtc->cursor_cntl = 0;
+	if (base) {
+		unsigned int width = intel_crtc->cursor_width;
+		unsigned int height = intel_crtc->cursor_height;
+		unsigned int stride = roundup_pow_of_two(width) * 4;
+
+		switch (stride) {
+		default:
+			WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n",
+				  width, stride);
+			stride = 256;
+			/* fallthrough */
+		case 256:
+		case 512:
+		case 1024:
+		case 2048:
+			break;
 		}
 
-		I915_WRITE(_CURABASE, base);
-		POSTING_READ(_CURABASE);
+		cntl |= CURSOR_ENABLE |
+			CURSOR_GAMMA_ENABLE |
+			CURSOR_FORMAT_ARGB |
+			CURSOR_STRIDE(stride);
+
+		size = (height << 12) | width;
 	}
 
-	/* XXX width must be 64, stride 256 => 0x00 << 28 */
-	cntl = 0;
-	if (base)
-		cntl = (CURSOR_ENABLE |
-			CURSOR_GAMMA_ENABLE |
-			CURSOR_FORMAT_ARGB);
-	if (intel_crtc->cursor_cntl != cntl) {
-		I915_WRITE(_CURACNTR, cntl);
+	if (intel_crtc->cursor_cntl != 0 &&
+	    (intel_crtc->cursor_base != base ||
+	     intel_crtc->cursor_size != size ||
+	     intel_crtc->cursor_cntl != cntl)) {
+		/* On these chipsets we can only modify the base/size/stride
+		 * whilst the cursor is disabled.
+		 */
+		I915_WRITE(_CURACNTR, 0);
 		POSTING_READ(_CURACNTR);
-		intel_crtc->cursor_cntl = cntl;
+		intel_crtc->cursor_cntl = 0;
 	}
-}
 
-static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	int pipe = intel_crtc->pipe;
-	uint32_t cntl;
+	if (intel_crtc->cursor_base != base)
+		I915_WRITE(_CURABASE, base);
 
-	cntl = 0;
-	if (base) {
-		cntl = MCURSOR_GAMMA_ENABLE;
-		switch (intel_crtc->cursor_width) {
-			case 64:
-				cntl |= CURSOR_MODE_64_ARGB_AX;
-				break;
-			case 128:
-				cntl |= CURSOR_MODE_128_ARGB_AX;
-				break;
-			case 256:
-				cntl |= CURSOR_MODE_256_ARGB_AX;
-				break;
-			default:
-				WARN_ON(1);
-				return;
-		}
-		cntl |= pipe << 28; /* Connect to correct pipe */
+	if (intel_crtc->cursor_size != size) {
+		I915_WRITE(CURSIZE, size);
+		intel_crtc->cursor_size = size;
 	}
+
 	if (intel_crtc->cursor_cntl != cntl) {
-		I915_WRITE(CURCNTR(pipe), cntl);
-		POSTING_READ(CURCNTR(pipe));
+		I915_WRITE(_CURACNTR, cntl);
+		POSTING_READ(_CURACNTR);
 		intel_crtc->cursor_cntl = cntl;
 	}
-
-	/* and commit changes on next vblank */
-	I915_WRITE(CURBASE(pipe), base);
-	POSTING_READ(CURBASE(pipe));
 }
 
-static void ivb_update_cursor(struct drm_crtc *crtc, u32 base)
+static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -8129,6 +8147,7 @@ static void ivb_update_cursor(struct drm_crtc *crtc, u32 base)
 				WARN_ON(1);
 				return;
 		}
+		cntl |= pipe << 28; /* Connect to correct pipe */
 	}
 	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
 		cntl |= CURSOR_PIPE_CSC_ENABLE;
@@ -8188,15 +8207,50 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc,
 
 	I915_WRITE(CURPOS(pipe), pos);
 
-	if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev) || IS_BROADWELL(dev))
-		ivb_update_cursor(crtc, base);
-	else if (IS_845G(dev) || IS_I865G(dev))
+	if (IS_845G(dev) || IS_I865G(dev))
 		i845_update_cursor(crtc, base);
 	else
 		i9xx_update_cursor(crtc, base);
 	intel_crtc->cursor_base = base;
 }
 
+static bool cursor_size_ok(struct drm_device *dev,
+			   uint32_t width, uint32_t height)
+{
+	if (width == 0 || height == 0)
+		return false;
+
+	/*
+	 * 845g/865g are special in that they are only limited by
+	 * the width of their cursors, the height is arbitrary up to
+	 * the precision of the register. Everything else requires
+	 * square cursors, limited to a few power-of-two sizes.
+	 */
+	if (IS_845G(dev) || IS_I865G(dev)) {
+		if ((width & 63) != 0)
+			return false;
+
+		if (width > (IS_845G(dev) ? 64 : 512))
+			return false;
+
+		if (height > 1023)
+			return false;
+	} else {
+		switch (width | height) {
+		case 256:
+		case 128:
+			if (IS_GEN2(dev))
+				return false;
+		case 64:
+			break;
+		default:
+			return false;
+		}
+	}
+
+	return true;
+}
+
 /*
  * intel_crtc_cursor_set_obj - Set cursor to specified GEM object
  *
@@ -8212,7 +8266,7 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	enum pipe pipe = intel_crtc->pipe;
-	unsigned old_width;
+	unsigned old_width, stride;
 	uint32_t addr;
 	int ret;
 
@@ -8226,14 +8280,13 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc,
 	}
 
 	/* Check for which cursor types we support */
-	if (!((width == 64 && height == 64) ||
-			(width == 128 && height == 128 && !IS_GEN2(dev)) ||
-			(width == 256 && height == 256 && !IS_GEN2(dev)))) {
+	if (!cursor_size_ok(dev, width, height)) {
 		DRM_DEBUG("Cursor dimension not supported\n");
 		return -EINVAL;
 	}
 
-	if (obj->base.size < width * height * 4) {
+	stride = roundup_pow_of_two(width) * 4;
+	if (obj->base.size < stride * height) {
 		DRM_DEBUG_KMS("buffer is too small\n");
 		ret = -ENOMEM;
 		goto fail;
@@ -8295,9 +8348,6 @@ static int intel_crtc_cursor_set_obj(struct drm_crtc *crtc,
 		addr = obj->phys_handle->busaddr;
 	}
 
-	if (IS_GEN2(dev))
-		I915_WRITE(CURSIZE, (height << 12) | width);
-
  finish:
 	if (intel_crtc->cursor_bo) {
 		if (!INTEL_INFO(dev)->cursor_needs_physical)
@@ -9532,6 +9582,8 @@ static bool use_mmio_flip(struct intel_engine_cs *ring,
 		return false;
 	else if (i915.use_mmio_flip > 0)
 		return true;
+	else if (i915.enable_execlists)
+		return true;
 	else
 		return ring != obj->ring;
 }
@@ -9847,8 +9899,7 @@ static void intel_modeset_update_staged_output_state(struct drm_device *dev)
 			to_intel_encoder(connector->base.encoder);
 	}
 
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
-			    base.head) {
+	for_each_intel_encoder(dev, encoder) {
 		encoder->new_crtc =
 			to_intel_crtc(encoder->base.crtc);
 	}
@@ -9879,8 +9930,7 @@ static void intel_modeset_commit_output_state(struct drm_device *dev)
 		connector->base.encoder = &connector->new_encoder->base;
 	}
 
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
-			    base.head) {
+	for_each_intel_encoder(dev, encoder) {
 		encoder->base.crtc = &encoder->new_crtc->base;
 	}
 
@@ -10007,6 +10057,15 @@ static void intel_dump_pipe_config(struct intel_crtc *crtc,
 		      pipe_config->dp_m_n.gmch_m, pipe_config->dp_m_n.gmch_n,
 		      pipe_config->dp_m_n.link_m, pipe_config->dp_m_n.link_n,
 		      pipe_config->dp_m_n.tu);
+
+	DRM_DEBUG_KMS("dp: %i, gmch_m2: %u, gmch_n2: %u, link_m2: %u, link_n2: %u, tu2: %u\n",
+		      pipe_config->has_dp_encoder,
+		      pipe_config->dp_m2_n2.gmch_m,
+		      pipe_config->dp_m2_n2.gmch_n,
+		      pipe_config->dp_m2_n2.link_m,
+		      pipe_config->dp_m2_n2.link_n,
+		      pipe_config->dp_m2_n2.tu);
+
 	DRM_DEBUG_KMS("requested mode:\n");
 	drm_mode_debug_printmodeline(&pipe_config->requested_mode);
 	DRM_DEBUG_KMS("adjusted mode:\n");
@@ -10041,8 +10100,7 @@ static bool check_single_encoder_cloning(struct intel_crtc *crtc,
 	struct drm_device *dev = crtc->base.dev;
 	struct intel_encoder *source_encoder;
 
-	list_for_each_entry(source_encoder,
-			    &dev->mode_config.encoder_list, base.head) {
+	for_each_intel_encoder(dev, source_encoder) {
 		if (source_encoder->new_crtc != crtc)
 			continue;
 
@@ -10058,8 +10116,7 @@ static bool check_encoder_cloning(struct intel_crtc *crtc)
 	struct drm_device *dev = crtc->base.dev;
 	struct intel_encoder *encoder;
 
-	list_for_each_entry(encoder,
-			    &dev->mode_config.encoder_list, base.head) {
+	for_each_intel_encoder(dev, encoder) {
 		if (encoder->new_crtc != crtc)
 			continue;
 
@@ -10143,8 +10200,7 @@ encoder_retry:
 	 * adjust it according to limitations or connector properties, and also
 	 * a chance to reject the mode entirely.
 	 */
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
-			    base.head) {
+	for_each_intel_encoder(dev, encoder) {
 
 		if (&encoder->new_crtc->base != crtc)
 			continue;
@@ -10222,8 +10278,7 @@ intel_modeset_affected_pipes(struct drm_crtc *crtc, unsigned *modeset_pipes,
 				1 << connector->new_encoder->new_crtc->pipe;
 	}
 
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
-			    base.head) {
+	for_each_intel_encoder(dev, encoder) {
 		if (encoder->base.crtc == &encoder->new_crtc->base)
 			continue;
 
@@ -10297,8 +10352,7 @@ intel_modeset_update_state(struct drm_device *dev, unsigned prepare_pipes)
 	struct intel_crtc *intel_crtc;
 	struct drm_connector *connector;
 
-	list_for_each_entry(intel_encoder, &dev->mode_config.encoder_list,
-			    base.head) {
+	for_each_intel_encoder(dev, intel_encoder) {
 		if (!intel_encoder->base.crtc)
 			continue;
 
@@ -10387,6 +10441,22 @@ intel_pipe_config_compare(struct drm_device *dev,
 		return false; \
 	}
 
+/* This is required for BDW+ where there is only one set of registers for
+ * switching between high and low RR.
+ * This macro can be used whenever a comparison has to be made between one
+ * hw state and multiple sw state variables.
+ */
+#define PIPE_CONF_CHECK_I_ALT(name, alt_name) \
+	if ((current_config->name != pipe_config->name) && \
+		(current_config->alt_name != pipe_config->name)) { \
+			DRM_ERROR("mismatch in " #name " " \
+				  "(expected %i or %i, found %i)\n", \
+				  current_config->name, \
+				  current_config->alt_name, \
+				  pipe_config->name); \
+			return false; \
+	}
+
 #define PIPE_CONF_CHECK_FLAGS(name, mask)	\
 	if ((current_config->name ^ pipe_config->name) & (mask)) { \
 		DRM_ERROR("mismatch in " #name "(" #mask ") "	   \
@@ -10419,11 +10489,28 @@ intel_pipe_config_compare(struct drm_device *dev,
 	PIPE_CONF_CHECK_I(fdi_m_n.tu);
 
 	PIPE_CONF_CHECK_I(has_dp_encoder);
-	PIPE_CONF_CHECK_I(dp_m_n.gmch_m);
-	PIPE_CONF_CHECK_I(dp_m_n.gmch_n);
-	PIPE_CONF_CHECK_I(dp_m_n.link_m);
-	PIPE_CONF_CHECK_I(dp_m_n.link_n);
-	PIPE_CONF_CHECK_I(dp_m_n.tu);
+
+	if (INTEL_INFO(dev)->gen < 8) {
+		PIPE_CONF_CHECK_I(dp_m_n.gmch_m);
+		PIPE_CONF_CHECK_I(dp_m_n.gmch_n);
+		PIPE_CONF_CHECK_I(dp_m_n.link_m);
+		PIPE_CONF_CHECK_I(dp_m_n.link_n);
+		PIPE_CONF_CHECK_I(dp_m_n.tu);
+
+		if (current_config->has_drrs) {
+			PIPE_CONF_CHECK_I(dp_m2_n2.gmch_m);
+			PIPE_CONF_CHECK_I(dp_m2_n2.gmch_n);
+			PIPE_CONF_CHECK_I(dp_m2_n2.link_m);
+			PIPE_CONF_CHECK_I(dp_m2_n2.link_n);
+			PIPE_CONF_CHECK_I(dp_m2_n2.tu);
+		}
+	} else {
+		PIPE_CONF_CHECK_I_ALT(dp_m_n.gmch_m, dp_m2_n2.gmch_m);
+		PIPE_CONF_CHECK_I_ALT(dp_m_n.gmch_n, dp_m2_n2.gmch_n);
+		PIPE_CONF_CHECK_I_ALT(dp_m_n.link_m, dp_m2_n2.link_m);
+		PIPE_CONF_CHECK_I_ALT(dp_m_n.link_n, dp_m2_n2.link_n);
+		PIPE_CONF_CHECK_I_ALT(dp_m_n.tu, dp_m2_n2.tu);
+	}
 
 	PIPE_CONF_CHECK_I(adjusted_mode.crtc_hdisplay);
 	PIPE_CONF_CHECK_I(adjusted_mode.crtc_htotal);
@@ -10509,6 +10596,7 @@ intel_pipe_config_compare(struct drm_device *dev,
 
 #undef PIPE_CONF_CHECK_X
 #undef PIPE_CONF_CHECK_I
+#undef PIPE_CONF_CHECK_I_ALT
 #undef PIPE_CONF_CHECK_FLAGS
 #undef PIPE_CONF_CHECK_CLOCK_FUZZY
 #undef PIPE_CONF_QUIRK
@@ -10538,8 +10626,7 @@ check_encoder_state(struct drm_device *dev)
 	struct intel_encoder *encoder;
 	struct intel_connector *connector;
 
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
-			    base.head) {
+	for_each_intel_encoder(dev, encoder) {
 		bool enabled = false;
 		bool active = false;
 		enum pipe pipe, tracked_pipe;
@@ -10618,8 +10705,7 @@ check_crtc_state(struct drm_device *dev)
 		WARN(crtc->active && !crtc->base.enabled,
 		     "active crtc, but not enabled in sw tracking\n");
 
-		list_for_each_entry(encoder, &dev->mode_config.encoder_list,
-				    base.head) {
+		for_each_intel_encoder(dev, encoder) {
 			if (encoder->base.crtc != &crtc->base)
 				continue;
 			enabled = true;
@@ -10641,8 +10727,7 @@ check_crtc_state(struct drm_device *dev)
 		if (crtc->pipe == PIPE_A && dev_priv->quirks & QUIRK_PIPEA_FORCE)
 			active = crtc->active;
 
-		list_for_each_entry(encoder, &dev->mode_config.encoder_list,
-				    base.head) {
+		for_each_intel_encoder(dev, encoder) {
 			enum pipe pipe;
 			if (encoder->base.crtc != &crtc->base)
 				continue;
@@ -11010,7 +11095,7 @@ static void intel_set_config_restore_state(struct drm_device *dev,
 	}
 
 	count = 0;
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) {
+	for_each_intel_encoder(dev, encoder) {
 		encoder->new_crtc =
 			to_intel_crtc(config->save_encoder_crtcs[count++]);
 	}
@@ -11169,8 +11254,7 @@ intel_modeset_stage_output_state(struct drm_device *dev,
 	}
 
 	/* Check for any encoders that needs to be disabled. */
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
-			    base.head) {
+	for_each_intel_encoder(dev, encoder) {
 		int num_connectors = 0;
 		list_for_each_entry(connector,
 				    &dev->mode_config.connector_list,
@@ -11203,9 +11287,7 @@ intel_modeset_stage_output_state(struct drm_device *dev,
 	for_each_intel_crtc(dev, crtc) {
 		crtc->new_enabled = false;
 
-		list_for_each_entry(encoder,
-				    &dev->mode_config.encoder_list,
-				    base.head) {
+		for_each_intel_encoder(dev, encoder) {
 			if (encoder->new_crtc == crtc) {
 				crtc->new_enabled = true;
 				break;
@@ -11242,7 +11324,7 @@ static void disable_crtc_nofb(struct intel_crtc *crtc)
 			connector->new_encoder = NULL;
 	}
 
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) {
+	for_each_intel_encoder(dev, encoder) {
 		if (encoder->new_crtc == crtc)
 			encoder->new_crtc = NULL;
 	}
@@ -11305,7 +11387,6 @@ static int intel_crtc_set_config(struct drm_mode_set *set)
 		ret = intel_set_mode(set->crtc, set->mode,
 				     set->x, set->y, set->fb);
 	} else if (config->fb_changed) {
-		struct drm_i915_private *dev_priv = dev->dev_private;
 		struct intel_crtc *intel_crtc = to_intel_crtc(set->crtc);
 
 		intel_crtc_wait_for_pending_flips(set->crtc);
@@ -11319,8 +11400,7 @@ static int intel_crtc_set_config(struct drm_mode_set *set)
 		 */
 		if (!intel_crtc->primary_enabled && ret == 0) {
 			WARN_ON(!intel_crtc->active);
-			intel_enable_primary_hw_plane(dev_priv, intel_crtc->plane,
-						      intel_crtc->pipe);
+			intel_enable_primary_hw_plane(set->crtc->primary, set->crtc);
 		}
 
 		/*
@@ -11473,8 +11553,6 @@ static int
 intel_primary_plane_disable(struct drm_plane *plane)
 {
 	struct drm_device *dev = plane->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_plane *intel_plane = to_intel_plane(plane);
 	struct intel_crtc *intel_crtc;
 
 	if (!plane->fb)
@@ -11497,8 +11575,8 @@ intel_primary_plane_disable(struct drm_plane *plane)
 		goto disable_unpin;
 
 	intel_crtc_wait_for_pending_flips(plane->crtc);
-	intel_disable_primary_hw_plane(dev_priv, intel_plane->plane,
-				       intel_plane->pipe);
+	intel_disable_primary_hw_plane(plane, plane->crtc);
+
 disable_unpin:
 	mutex_lock(&dev->struct_mutex);
 	i915_gem_track_fb(intel_fb_obj(plane->fb), NULL,
@@ -11518,9 +11596,7 @@ intel_primary_plane_setplane(struct drm_plane *plane, struct drm_crtc *crtc,
 			     uint32_t src_w, uint32_t src_h)
 {
 	struct drm_device *dev = crtc->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	struct intel_plane *intel_plane = to_intel_plane(plane);
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->fb);
 	struct drm_rect dest = {
@@ -11607,9 +11683,7 @@ intel_primary_plane_setplane(struct drm_plane *plane, struct drm_crtc *crtc,
 				  INTEL_FRONTBUFFER_PRIMARY(intel_crtc->pipe));
 
 		if (intel_crtc->primary_enabled)
-			intel_disable_primary_hw_plane(dev_priv,
-						       intel_plane->plane,
-						       intel_plane->pipe);
+			intel_disable_primary_hw_plane(plane, crtc);
 
 
 		if (plane->fb != fb)
@@ -11626,8 +11700,7 @@ intel_primary_plane_setplane(struct drm_plane *plane, struct drm_crtc *crtc,
 		return ret;
 
 	if (!intel_crtc->primary_enabled)
-		intel_enable_primary_hw_plane(dev_priv, intel_crtc->plane,
-					      intel_crtc->pipe);
+		intel_enable_primary_hw_plane(plane, crtc);
 
 	return 0;
 }
@@ -11736,6 +11809,10 @@ intel_cursor_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
 		return intel_crtc_cursor_set_obj(crtc, obj, crtc_w, crtc_h);
 	} else {
 		intel_crtc_update_cursor(crtc, visible);
+
+		intel_frontbuffer_flip(crtc->dev,
+				       INTEL_FRONTBUFFER_CURSOR(intel_crtc->pipe));
+
 		return 0;
 	}
 }
@@ -11812,8 +11889,7 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
 
 	intel_crtc->cursor_base = ~0;
 	intel_crtc->cursor_cntl = ~0;
-
-	init_waitqueue_head(&intel_crtc->vbl_wait);
+	intel_crtc->cursor_size = ~0;
 
 	BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) ||
 	       dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL);
@@ -11876,8 +11952,7 @@ static int intel_encoder_clones(struct intel_encoder *encoder)
 	int index_mask = 0;
 	int entry = 0;
 
-	list_for_each_entry(source_encoder,
-			    &dev->mode_config.encoder_list, base.head) {
+	for_each_intel_encoder(dev, source_encoder) {
 		if (encoders_cloneable(encoder, source_encoder))
 			index_mask |= (1 << entry);
 
@@ -12066,7 +12141,7 @@ static void intel_setup_outputs(struct drm_device *dev)
 
 	intel_edp_psr_init(dev);
 
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) {
+	for_each_intel_encoder(dev, encoder) {
 		encoder->base.possible_crtcs = encoder->crtc_mask;
 		encoder->base.possible_clones =
 			intel_encoder_clones(encoder);
@@ -12332,29 +12407,27 @@ static void intel_init_display(struct drm_device *dev)
 		dev_priv->display.get_display_clock_speed =
 			i830_get_display_clock_speed;
 
-	if (HAS_PCH_SPLIT(dev)) {
-		if (IS_GEN5(dev)) {
-			dev_priv->display.fdi_link_train = ironlake_fdi_link_train;
-			dev_priv->display.write_eld = ironlake_write_eld;
-		} else if (IS_GEN6(dev)) {
-			dev_priv->display.fdi_link_train = gen6_fdi_link_train;
-			dev_priv->display.write_eld = ironlake_write_eld;
-			dev_priv->display.modeset_global_resources =
-				snb_modeset_global_resources;
-		} else if (IS_IVYBRIDGE(dev)) {
-			/* FIXME: detect B0+ stepping and use auto training */
-			dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train;
-			dev_priv->display.write_eld = ironlake_write_eld;
-			dev_priv->display.modeset_global_resources =
-				ivb_modeset_global_resources;
-		} else if (IS_HASWELL(dev) || IS_GEN8(dev)) {
-			dev_priv->display.fdi_link_train = hsw_fdi_link_train;
-			dev_priv->display.write_eld = haswell_write_eld;
-			dev_priv->display.modeset_global_resources =
-				haswell_modeset_global_resources;
-		}
-	} else if (IS_G4X(dev)) {
+	if (IS_G4X(dev)) {
 		dev_priv->display.write_eld = g4x_write_eld;
+	} else if (IS_GEN5(dev)) {
+		dev_priv->display.fdi_link_train = ironlake_fdi_link_train;
+		dev_priv->display.write_eld = ironlake_write_eld;
+	} else if (IS_GEN6(dev)) {
+		dev_priv->display.fdi_link_train = gen6_fdi_link_train;
+		dev_priv->display.write_eld = ironlake_write_eld;
+		dev_priv->display.modeset_global_resources =
+			snb_modeset_global_resources;
+	} else if (IS_IVYBRIDGE(dev)) {
+		/* FIXME: detect B0+ stepping and use auto training */
+		dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train;
+		dev_priv->display.write_eld = ironlake_write_eld;
+		dev_priv->display.modeset_global_resources =
+			ivb_modeset_global_resources;
+	} else if (IS_HASWELL(dev) || IS_GEN8(dev)) {
+		dev_priv->display.fdi_link_train = hsw_fdi_link_train;
+		dev_priv->display.write_eld = haswell_write_eld;
+		dev_priv->display.modeset_global_resources =
+			haswell_modeset_global_resources;
 	} else if (IS_VALLEYVIEW(dev)) {
 		dev_priv->display.modeset_global_resources =
 			valleyview_modeset_global_resources;
@@ -12563,8 +12636,6 @@ void intel_modeset_init_hw(struct drm_device *dev)
 
 	intel_init_clock_gating(dev);
 
-	intel_reset_dpio(dev);
-
 	intel_enable_gt_powersave(dev);
 }
 
@@ -12610,7 +12681,10 @@ void intel_modeset_init(struct drm_device *dev)
 		dev->mode_config.max_height = 8192;
 	}
 
-	if (IS_GEN2(dev)) {
+	if (IS_845G(dev) || IS_I865G(dev)) {
+		dev->mode_config.cursor_width = IS_845G(dev) ? 64 : 512;
+		dev->mode_config.cursor_height = 1023;
+	} else if (IS_GEN2(dev)) {
 		dev->mode_config.cursor_width = GEN2_CURSOR_WIDTH;
 		dev->mode_config.cursor_height = GEN2_CURSOR_HEIGHT;
 	} else {
@@ -12635,7 +12709,6 @@ void intel_modeset_init(struct drm_device *dev)
 	}
 
 	intel_init_dpio(dev);
-	intel_reset_dpio(dev);
 
 	intel_shared_dpll_init(dev);
 
@@ -12730,9 +12803,10 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
 	I915_WRITE(reg, I915_READ(reg) & ~PIPECONF_FRAME_START_DELAY_MASK);
 
 	/* restore vblank interrupts to correct state */
-	if (crtc->active)
+	if (crtc->active) {
+		update_scanline_offset(crtc);
 		drm_vblank_on(dev, crtc->pipe);
-	else
+	} else
 		drm_vblank_off(dev, crtc->pipe);
 
 	/* We need to sanitize the plane -> pipe mapping first because this will
@@ -12831,8 +12905,6 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc)
 		 */
 		crtc->cpu_fifo_underrun_disabled = true;
 		crtc->pch_fifo_underrun_disabled = true;
-
-		update_scanline_offset(crtc);
 	}
 }
 
@@ -12964,8 +13036,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 			intel_display_power_get(dev_priv, POWER_DOMAIN_PLLS);
 	}
 
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
-			    base.head) {
+	for_each_intel_encoder(dev, encoder) {
 		pipe = 0;
 
 		if (encoder->get_hw_state(encoder, &pipe)) {
@@ -13029,8 +13100,7 @@ void intel_modeset_setup_hw_state(struct drm_device *dev,
 	}
 
 	/* HW state is read out, now we need to sanitize this mess. */
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
-			    base.head) {
+	for_each_intel_encoder(dev, encoder) {
 		intel_sanitize_encoder(encoder);
 	}
 
@@ -13397,3 +13467,25 @@ intel_display_print_error_state(struct drm_i915_error_state_buf *m,
 		err_printf(m, "  VSYNC: %08x\n", error->transcoder[i].vsync);
 	}
 }
+
+void intel_modeset_preclose(struct drm_device *dev, struct drm_file *file)
+{
+	struct intel_crtc *crtc;
+
+	for_each_intel_crtc(dev, crtc) {
+		struct intel_unpin_work *work;
+		unsigned long irqflags;
+
+		spin_lock_irqsave(&dev->event_lock, irqflags);
+
+		work = crtc->unpin_work;
+
+		if (work && work->event &&
+		    work->event->base.file_priv == file) {
+			kfree(work->event);
+			work->event = NULL;
+		}
+
+		spin_unlock_irqrestore(&dev->event_lock, irqflags);
+	}
+}
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 81d7681faa63..a12a4d3363fd 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -828,20 +828,6 @@ intel_dp_set_clock(struct intel_encoder *encoder,
 	}
 }
 
-static void
-intel_dp_set_m2_n2(struct intel_crtc *crtc, struct intel_link_m_n *m_n)
-{
-	struct drm_device *dev = crtc->base.dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	enum transcoder transcoder = crtc->config.cpu_transcoder;
-
-	I915_WRITE(PIPE_DATA_M2(transcoder),
-		TU_SIZE(m_n->tu) | m_n->gmch_m);
-	I915_WRITE(PIPE_DATA_N2(transcoder), m_n->gmch_n);
-	I915_WRITE(PIPE_LINK_M2(transcoder), m_n->link_m);
-	I915_WRITE(PIPE_LINK_N2(transcoder), m_n->link_n);
-}
-
 bool
 intel_dp_compute_config(struct intel_encoder *encoder,
 			struct intel_crtc_config *pipe_config)
@@ -867,6 +853,7 @@ intel_dp_compute_config(struct intel_encoder *encoder,
 		pipe_config->has_pch_encoder = true;
 
 	pipe_config->has_dp_encoder = true;
+	pipe_config->has_drrs = false;
 	pipe_config->has_audio = intel_dp->has_audio;
 
 	if (is_edp(intel_dp) && intel_connector->panel.fixed_mode) {
@@ -970,13 +957,14 @@ found:
 
 	if (intel_connector->panel.downclock_mode != NULL &&
 		intel_dp->drrs_state.type == SEAMLESS_DRRS_SUPPORT) {
+			pipe_config->has_drrs = true;
 			intel_link_compute_m_n(bpp, lane_count,
 				intel_connector->panel.downclock_mode->clock,
 				pipe_config->port_clock,
 				&pipe_config->dp_m2_n2);
 	}
 
-	if (HAS_DDI(dev))
+	if (IS_HASWELL(dev) || IS_BROADWELL(dev))
 		hsw_dp_set_ddi_pll_sel(pipe_config, intel_dp->link_bw);
 	else
 		intel_dp_set_clock(encoder, pipe_config, intel_dp->link_bw);
@@ -2293,6 +2281,8 @@ static void chv_dp_pre_pll_enable(struct intel_encoder *encoder)
 	enum pipe pipe = intel_crtc->pipe;
 	u32 val;
 
+	intel_dp_prepare(encoder);
+
 	mutex_lock(&dev_priv->dpio_lock);
 
 	/* program left/right clock distribution */
@@ -2659,8 +2649,8 @@ static uint32_t intel_chv_signal_levels(struct intel_dp *intel_dp)
 	/* Program swing margin */
 	for (i = 0; i < 4; i++) {
 		val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW2(ch, i));
-		val &= ~DPIO_SWING_MARGIN_MASK;
-		val |= margin_reg_value << DPIO_SWING_MARGIN_SHIFT;
+		val &= ~DPIO_SWING_MARGIN000_MASK;
+		val |= margin_reg_value << DPIO_SWING_MARGIN000_SHIFT;
 		vlv_dpio_write(dev_priv, pipe, CHV_TX_DW2(ch, i), val);
 	}
 
@@ -2971,7 +2961,10 @@ intel_dp_set_link_train(struct intel_dp *intel_dp,
 		}
 
 	} else {
-		*DP &= ~DP_LINK_TRAIN_MASK;
+		if (IS_CHERRYVIEW(dev))
+			*DP &= ~DP_LINK_TRAIN_MASK_CHV;
+		else
+			*DP &= ~DP_LINK_TRAIN_MASK;
 
 		switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) {
 		case DP_TRAINING_PATTERN_DISABLE:
@@ -2984,8 +2977,12 @@ intel_dp_set_link_train(struct intel_dp *intel_dp,
 			*DP |= DP_LINK_TRAIN_PAT_2;
 			break;
 		case DP_TRAINING_PATTERN_3:
-			DRM_ERROR("DP training pattern 3 not supported\n");
-			*DP |= DP_LINK_TRAIN_PAT_2;
+			if (IS_CHERRYVIEW(dev)) {
+				*DP |= DP_LINK_TRAIN_PAT_3_CHV;
+			} else {
+				DRM_ERROR("DP training pattern 3 not supported\n");
+				*DP |= DP_LINK_TRAIN_PAT_2;
+			}
 			break;
 		}
 	}
@@ -3272,7 +3269,10 @@ intel_dp_link_down(struct intel_dp *intel_dp)
 		DP &= ~DP_LINK_TRAIN_MASK_CPT;
 		I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE_CPT);
 	} else {
-		DP &= ~DP_LINK_TRAIN_MASK;
+		if (IS_CHERRYVIEW(dev))
+			DP &= ~DP_LINK_TRAIN_MASK_CHV;
+		else
+			DP &= ~DP_LINK_TRAIN_MASK;
 		I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE);
 	}
 	POSTING_READ(intel_dp->output_reg);
@@ -4072,7 +4072,8 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 	if (intel_dig_port->base.type != INTEL_OUTPUT_EDP)
 		intel_dig_port->base.type = INTEL_OUTPUT_DISPLAYPORT;
 
-	DRM_DEBUG_KMS("got hpd irq on port %d - %s\n", intel_dig_port->port,
+	DRM_DEBUG_KMS("got hpd irq on port %c - %s\n",
+		      port_name(intel_dig_port->port),
 		      long_hpd ? "long" : "short");
 
 	power_domain = intel_display_port_power_domain(intel_encoder);
@@ -4434,7 +4435,7 @@ void intel_dp_set_drrs_state(struct drm_device *dev, int refresh_rate)
 		val = I915_READ(reg);
 		if (index > DRRS_HIGH_RR) {
 			val |= PIPECONF_EDP_RR_MODE_SWITCH;
-			intel_dp_set_m2_n2(intel_crtc, &config->dp_m2_n2);
+			intel_dp_set_m_n(intel_crtc);
 		} else {
 			val &= ~PIPECONF_EDP_RR_MODE_SWITCH;
 		}
@@ -4474,7 +4475,7 @@ intel_dp_drrs_init(struct intel_digital_port *intel_dig_port,
 	}
 
 	if (dev_priv->vbt.drrs_type != SEAMLESS_DRRS_SUPPORT) {
-		DRM_INFO("VBT doesn't support DRRS\n");
+		DRM_DEBUG_KMS("VBT doesn't support DRRS\n");
 		return NULL;
 	}
 
@@ -4482,7 +4483,7 @@ intel_dp_drrs_init(struct intel_digital_port *intel_dig_port,
 					(dev, fixed_mode, connector);
 
 	if (!downclock_mode) {
-		DRM_INFO("DRRS not supported\n");
+		DRM_DEBUG_KMS("DRRS not supported\n");
 		return NULL;
 	}
 
@@ -4493,7 +4494,7 @@ intel_dp_drrs_init(struct intel_digital_port *intel_dig_port,
 	intel_dp->drrs_state.type = dev_priv->vbt.drrs_type;
 
 	intel_dp->drrs_state.refresh_rate_type = DRRS_HIGH_RR;
-	DRM_INFO("seamless DRRS supported for eDP panel.\n");
+	DRM_DEBUG_KMS("seamless DRRS supported for eDP panel.\n");
 	return downclock_mode;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index b8c8bbd8e5f9..d683a2090249 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -330,6 +330,7 @@ struct intel_crtc_config {
 
 	/* m2_n2 for eDP downclock */
 	struct intel_link_m_n dp_m2_n2;
+	bool has_drrs;
 
 	/*
 	 * Frequence the dpll for the port should run at. Differs from the
@@ -410,6 +411,7 @@ struct intel_crtc {
 	uint32_t cursor_addr;
 	int16_t cursor_width, cursor_height;
 	uint32_t cursor_cntl;
+	uint32_t cursor_size;
 	uint32_t cursor_base;
 
 	struct intel_plane_config plane_config;
@@ -430,8 +432,6 @@ struct intel_crtc {
 		struct intel_pipe_wm active;
 	} wm;
 
-	wait_queue_head_t vbl_wait;
-
 	int scanline_offset;
 	struct intel_mmio_flip mmio_flip;
 };
@@ -455,6 +455,7 @@ struct intel_plane {
 	unsigned int crtc_w, crtc_h;
 	uint32_t src_x, src_y;
 	uint32_t src_w, src_h;
+	unsigned int rotation;
 
 	/* Since we need to change the watermarks before/after
 	 * enabling/disabling the planes, we need to store the parameters here
@@ -882,6 +883,7 @@ void hsw_enable_pc8(struct drm_i915_private *dev_priv);
 void hsw_disable_pc8(struct drm_i915_private *dev_priv);
 void intel_dp_get_m_n(struct intel_crtc *crtc,
 		      struct intel_crtc_config *pipe_config);
+void intel_dp_set_m_n(struct intel_crtc *crtc);
 int intel_dotclock_calculate(int link_freq, const struct intel_link_m_n *m_n);
 void
 ironlake_check_encoder_dotclock(const struct intel_crtc_config *pipe_config,
@@ -896,7 +898,7 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode,
 				 struct intel_crtc_config *pipe_config);
 int intel_format_to_fourcc(int format);
 void intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc);
-
+void intel_modeset_preclose(struct drm_device *dev, struct drm_file *file);
 
 /* intel_dp.c */
 void intel_dp_init(struct drm_device *dev, int output_reg, enum port port);
@@ -951,7 +953,7 @@ void intel_dvo_init(struct drm_device *dev);
 extern int intel_fbdev_init(struct drm_device *dev);
 extern void intel_fbdev_initial_config(struct drm_device *dev);
 extern void intel_fbdev_fini(struct drm_device *dev);
-extern void intel_fbdev_set_suspend(struct drm_device *dev, int state);
+extern void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous);
 extern void intel_fbdev_output_poll_changed(struct drm_device *dev);
 extern void intel_fbdev_restore_mode(struct drm_device *dev);
 #else
@@ -968,7 +970,7 @@ static inline void intel_fbdev_fini(struct drm_device *dev)
 {
 }
 
-static inline void intel_fbdev_set_suspend(struct drm_device *dev, int state)
+static inline void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous)
 {
 }
 
@@ -1091,7 +1093,7 @@ bool intel_sdvo_init(struct drm_device *dev, uint32_t sdvo_reg, bool is_sdvob);
 int intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane);
 void intel_flush_primary_plane(struct drm_i915_private *dev_priv,
 			       enum plane plane);
-void intel_plane_restore(struct drm_plane *plane);
+int intel_plane_restore(struct drm_plane *plane);
 void intel_plane_disable(struct drm_plane *plane);
 int intel_sprite_set_colorkey(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
diff --git a/drivers/gpu/drm/i915/intel_dsi.c b/drivers/gpu/drm/i915/intel_dsi.c
index 670c29a7b5dd..5bd9e09ad3c5 100644
--- a/drivers/gpu/drm/i915/intel_dsi.c
+++ b/drivers/gpu/drm/i915/intel_dsi.c
@@ -184,7 +184,7 @@ static void intel_dsi_pre_enable(struct intel_encoder *encoder)
 
 	/* update the hw state for DPLL */
 	intel_crtc->config.dpll_hw_state.dpll = DPLL_INTEGRATED_CLOCK_VLV |
-						DPLL_REFA_CLK_ENABLE_VLV;
+		DPLL_REFA_CLK_ENABLE_VLV;
 
 	tmp = I915_READ(DSPCLK_GATE_D);
 	tmp |= DPOUNIT_CLOCK_GATE_DISABLE;
@@ -259,8 +259,8 @@ static void intel_dsi_disable(struct intel_encoder *encoder)
 	temp = I915_READ(MIPI_CTRL(pipe));
 	temp &= ~ESCAPE_CLOCK_DIVIDER_MASK;
 	I915_WRITE(MIPI_CTRL(pipe), temp |
-			intel_dsi->escape_clk_div <<
-			ESCAPE_CLOCK_DIVIDER_SHIFT);
+		   intel_dsi->escape_clk_div <<
+		   ESCAPE_CLOCK_DIVIDER_SHIFT);
 
 	I915_WRITE(MIPI_EOT_DISABLE(pipe), CLOCKSTOP);
 
@@ -297,7 +297,7 @@ static void intel_dsi_clear_device_ready(struct intel_encoder *encoder)
 	usleep_range(2000, 2500);
 
 	if (wait_for(((I915_READ(MIPI_PORT_CTRL(pipe)) & AFE_LATCHOUT)
-					== 0x00000), 30))
+		      == 0x00000), 30))
 		DRM_ERROR("DSI LP not going Low\n");
 
 	val = I915_READ(MIPI_PORT_CTRL(pipe));
@@ -423,9 +423,11 @@ static u16 txclkesc(u32 divider, unsigned int us)
 }
 
 /* return pixels in terms of txbyteclkhs */
-static u16 txbyteclkhs(u16 pixels, int bpp, int lane_count)
+static u16 txbyteclkhs(u16 pixels, int bpp, int lane_count,
+		       u16 burst_mode_ratio)
 {
-	return DIV_ROUND_UP(DIV_ROUND_UP(pixels * bpp, 8), lane_count);
+	return DIV_ROUND_UP(DIV_ROUND_UP(pixels * bpp * burst_mode_ratio,
+					 8 * 100), lane_count);
 }
 
 static void set_dsi_timings(struct drm_encoder *encoder,
@@ -451,10 +453,12 @@ static void set_dsi_timings(struct drm_encoder *encoder,
 	vbp = mode->vtotal - mode->vsync_end;
 
 	/* horizontal values are in terms of high speed byte clock */
-	hactive = txbyteclkhs(hactive, bpp, lane_count);
-	hfp = txbyteclkhs(hfp, bpp, lane_count);
-	hsync = txbyteclkhs(hsync, bpp, lane_count);
-	hbp = txbyteclkhs(hbp, bpp, lane_count);
+	hactive = txbyteclkhs(hactive, bpp, lane_count,
+			      intel_dsi->burst_mode_ratio);
+	hfp = txbyteclkhs(hfp, bpp, lane_count, intel_dsi->burst_mode_ratio);
+	hsync = txbyteclkhs(hsync, bpp, lane_count,
+			    intel_dsi->burst_mode_ratio);
+	hbp = txbyteclkhs(hbp, bpp, lane_count, intel_dsi->burst_mode_ratio);
 
 	I915_WRITE(MIPI_HACTIVE_AREA_COUNT(pipe), hactive);
 	I915_WRITE(MIPI_HFP_COUNT(pipe), hfp);
@@ -541,12 +545,14 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder)
 	    intel_dsi->video_mode_format == VIDEO_MODE_BURST) {
 		I915_WRITE(MIPI_HS_TX_TIMEOUT(pipe),
 			   txbyteclkhs(adjusted_mode->htotal, bpp,
-				       intel_dsi->lane_count) + 1);
+				       intel_dsi->lane_count,
+				       intel_dsi->burst_mode_ratio) + 1);
 	} else {
 		I915_WRITE(MIPI_HS_TX_TIMEOUT(pipe),
 			   txbyteclkhs(adjusted_mode->vtotal *
 				       adjusted_mode->htotal,
-				       bpp, intel_dsi->lane_count) + 1);
+				       bpp, intel_dsi->lane_count,
+				       intel_dsi->burst_mode_ratio) + 1);
 	}
 	I915_WRITE(MIPI_LP_RX_TIMEOUT(pipe), intel_dsi->lp_rx_timeout);
 	I915_WRITE(MIPI_TURN_AROUND_TIMEOUT(pipe), intel_dsi->turn_arnd_val);
@@ -576,7 +582,7 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder)
 	 * XXX: write MIPI_STOP_STATE_STALL?
 	 */
 	I915_WRITE(MIPI_HIGH_LOW_SWITCH_COUNT(pipe),
-						intel_dsi->hs_to_lp_count);
+		   intel_dsi->hs_to_lp_count);
 
 	/* XXX: low power clock equivalence in terms of byte clock. the number
 	 * of byte clocks occupied in one low power clock. based on txbyteclkhs
@@ -601,10 +607,10 @@ static void intel_dsi_prepare(struct intel_encoder *intel_encoder)
 		 * 64 like 1366 x 768. Enable RANDOM resolution support for such
 		 * panels by default */
 		I915_WRITE(MIPI_VIDEO_MODE_FORMAT(pipe),
-				intel_dsi->video_frmt_cfg_bits |
-				intel_dsi->video_mode_format |
-				IP_TG_CONFIG |
-				RANDOM_DPI_DISPLAY_RESOLUTION);
+			   intel_dsi->video_frmt_cfg_bits |
+			   intel_dsi->video_mode_format |
+			   IP_TG_CONFIG |
+			   RANDOM_DPI_DISPLAY_RESOLUTION);
 }
 
 static void intel_dsi_pre_pll_enable(struct intel_encoder *encoder)
diff --git a/drivers/gpu/drm/i915/intel_dsi.h b/drivers/gpu/drm/i915/intel_dsi.h
index fd51867fd0d3..657eb5c1b9d8 100644
--- a/drivers/gpu/drm/i915/intel_dsi.h
+++ b/drivers/gpu/drm/i915/intel_dsi.h
@@ -116,6 +116,8 @@ struct intel_dsi {
 	u16 clk_hs_to_lp_count;
 
 	u16 init_count;
+	u32 pclk;
+	u16 burst_mode_ratio;
 
 	/* all delays in ms */
 	u16 backlight_off_delay;
diff --git a/drivers/gpu/drm/i915/intel_dsi_cmd.c b/drivers/gpu/drm/i915/intel_dsi_cmd.c
index 7f1430ac8543..f4767fd2ebeb 100644
--- a/drivers/gpu/drm/i915/intel_dsi_cmd.c
+++ b/drivers/gpu/drm/i915/intel_dsi_cmd.c
@@ -430,7 +430,7 @@ void wait_for_dsi_fifo_empty(struct intel_dsi *intel_dsi)
 	u32 mask;
 
 	mask = LP_CTRL_FIFO_EMPTY | HS_CTRL_FIFO_EMPTY |
-					LP_DATA_FIFO_EMPTY | HS_DATA_FIFO_EMPTY;
+		LP_DATA_FIFO_EMPTY | HS_DATA_FIFO_EMPTY;
 
 	if (wait_for((I915_READ(MIPI_GEN_FIFO_STAT(pipe)) & mask) == mask, 100))
 		DRM_ERROR("DPI FIFOs are not empty\n");
diff --git a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c
index 47c7584a4aa0..f6bdd44069ce 100644
--- a/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c
+++ b/drivers/gpu/drm/i915/intel_dsi_panel_vbt.c
@@ -271,6 +271,8 @@ static bool generic_init(struct intel_dsi_device *dsi)
 	u32 ths_prepare_ns, tclk_trail_ns;
 	u32 tclk_prepare_clkzero, ths_prepare_hszero;
 	u32 lp_to_hs_switch, hs_to_lp_switch;
+	u32 pclk, computed_ddr;
+	u16 burst_mode_ratio;
 
 	DRM_DEBUG_KMS("\n");
 
@@ -284,8 +286,6 @@ static bool generic_init(struct intel_dsi_device *dsi)
 	else if (intel_dsi->pixel_format == VID_MODE_FORMAT_RGB565)
 		bits_per_pixel = 16;
 
-	bitrate = (mode->clock * bits_per_pixel) / intel_dsi->lane_count;
-
 	intel_dsi->operation_mode = mipi_config->is_cmd_mode;
 	intel_dsi->video_mode_format = mipi_config->video_transfer_mode;
 	intel_dsi->escape_clk_div = mipi_config->byte_clk_sel;
@@ -297,6 +297,40 @@ static bool generic_init(struct intel_dsi_device *dsi)
 	intel_dsi->video_frmt_cfg_bits =
 		mipi_config->bta_enabled ? DISABLE_VIDEO_BTA : 0;
 
+	pclk = mode->clock;
+
+	/* Burst Mode Ratio
+	 * Target ddr frequency from VBT / non burst ddr freq
+	 * multiply by 100 to preserve remainder
+	 */
+	if (intel_dsi->video_mode_format == VIDEO_MODE_BURST) {
+		if (mipi_config->target_burst_mode_freq) {
+			computed_ddr =
+				(pclk * bits_per_pixel) / intel_dsi->lane_count;
+
+			if (mipi_config->target_burst_mode_freq <
+								computed_ddr) {
+				DRM_ERROR("Burst mode freq is less than computed\n");
+				return false;
+			}
+
+			burst_mode_ratio = DIV_ROUND_UP(
+				mipi_config->target_burst_mode_freq * 100,
+				computed_ddr);
+
+			pclk = DIV_ROUND_UP(pclk * burst_mode_ratio, 100);
+		} else {
+			DRM_ERROR("Burst mode target is not set\n");
+			return false;
+		}
+	} else
+		burst_mode_ratio = 100;
+
+	intel_dsi->burst_mode_ratio = burst_mode_ratio;
+	intel_dsi->pclk = pclk;
+
+	bitrate = (pclk * bits_per_pixel) / intel_dsi->lane_count;
+
 	switch (intel_dsi->escape_clk_div) {
 	case 0:
 		tlpx_ns = 50;
diff --git a/drivers/gpu/drm/i915/intel_dsi_pll.c b/drivers/gpu/drm/i915/intel_dsi_pll.c
index d8bb1ea2f0da..fa7a6ca34cd6 100644
--- a/drivers/gpu/drm/i915/intel_dsi_pll.c
+++ b/drivers/gpu/drm/i915/intel_dsi_pll.c
@@ -134,8 +134,7 @@ static u32 dsi_rr_formula(const struct drm_display_mode *mode,
 #else
 
 /* Get DSI clock from pixel clock */
-static u32 dsi_clk_from_pclk(const struct drm_display_mode *mode,
-			  int pixel_format, int lane_count)
+static u32 dsi_clk_from_pclk(u32 pclk, int pixel_format, int lane_count)
 {
 	u32 dsi_clk_khz;
 	u32 bpp;
@@ -156,7 +155,7 @@ static u32 dsi_clk_from_pclk(const struct drm_display_mode *mode,
 
 	/* DSI data rate = pixel clock * bits per pixel / lane count
 	   pixel clock is converted from KHz to Hz */
-	dsi_clk_khz = DIV_ROUND_CLOSEST(mode->clock * bpp, lane_count);
+	dsi_clk_khz = DIV_ROUND_CLOSEST(pclk * bpp, lane_count);
 
 	return dsi_clk_khz;
 }
@@ -191,7 +190,7 @@ static int dsi_calc_mnp(u32 dsi_clk, struct dsi_mnp *dsi_mnp)
 	for (m = 62; m <= 92; m++) {
 		for (p = 2; p <= 6; p++) {
 			/* Find the optimal m and p divisors
-			with minimal error +/- the required clock */
+			   with minimal error +/- the required clock */
 			calc_dsi_clk = (m * ref_clk) / p;
 			if (calc_dsi_clk == target_dsi_clk) {
 				calc_m = m;
@@ -228,15 +227,13 @@ static int dsi_calc_mnp(u32 dsi_clk, struct dsi_mnp *dsi_mnp)
 static void vlv_configure_dsi_pll(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = encoder->base.dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc);
-	const struct drm_display_mode *mode = &intel_crtc->config.adjusted_mode;
 	struct intel_dsi *intel_dsi = enc_to_intel_dsi(&encoder->base);
 	int ret;
 	struct dsi_mnp dsi_mnp;
 	u32 dsi_clk;
 
-	dsi_clk = dsi_clk_from_pclk(mode, intel_dsi->pixel_format,
-						intel_dsi->lane_count);
+	dsi_clk = dsi_clk_from_pclk(intel_dsi->pclk, intel_dsi->pixel_format,
+				    intel_dsi->lane_count);
 
 	ret = dsi_calc_mnp(dsi_clk, &dsi_mnp);
 	if (ret) {
@@ -318,8 +315,8 @@ static void assert_bpp_mismatch(int pixel_format, int pipe_bpp)
 	}
 
 	WARN(bpp != pipe_bpp,
-		"bpp match assertion failure (expected %d, current %d)\n",
-		bpp, pipe_bpp);
+	     "bpp match assertion failure (expected %d, current %d)\n",
+	     bpp, pipe_bpp);
 }
 
 u32 vlv_get_dsi_pclk(struct intel_encoder *encoder, int pipe_bpp)
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index f475414671d8..cf052a39558d 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -26,6 +26,7 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/console.h>
 #include <linux/errno.h>
 #include <linux/string.h>
 #include <linux/mm.h>
@@ -636,6 +637,15 @@ out:
 	return false;
 }
 
+static void intel_fbdev_suspend_worker(struct work_struct *work)
+{
+	intel_fbdev_set_suspend(container_of(work,
+					     struct drm_i915_private,
+					     fbdev_suspend_work)->dev,
+				FBINFO_STATE_RUNNING,
+				true);
+}
+
 int intel_fbdev_init(struct drm_device *dev)
 {
 	struct intel_fbdev *ifbdev;
@@ -662,6 +672,8 @@ int intel_fbdev_init(struct drm_device *dev)
 	}
 
 	dev_priv->fbdev = ifbdev;
+	INIT_WORK(&dev_priv->fbdev_suspend_work, intel_fbdev_suspend_worker);
+
 	drm_fb_helper_single_add_all_connectors(&ifbdev->helper);
 
 	return 0;
@@ -682,12 +694,14 @@ void intel_fbdev_fini(struct drm_device *dev)
 	if (!dev_priv->fbdev)
 		return;
 
+	flush_work(&dev_priv->fbdev_suspend_work);
+
 	intel_fbdev_destroy(dev, dev_priv->fbdev);
 	kfree(dev_priv->fbdev);
 	dev_priv->fbdev = NULL;
 }
 
-void intel_fbdev_set_suspend(struct drm_device *dev, int state)
+void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_fbdev *ifbdev = dev_priv->fbdev;
@@ -698,6 +712,33 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state)
 
 	info = ifbdev->helper.fbdev;
 
+	if (synchronous) {
+		/* Flush any pending work to turn the console on, and then
+		 * wait to turn it off. It must be synchronous as we are
+		 * about to suspend or unload the driver.
+		 *
+		 * Note that from within the work-handler, we cannot flush
+		 * ourselves, so only flush outstanding work upon suspend!
+		 */
+		if (state != FBINFO_STATE_RUNNING)
+			flush_work(&dev_priv->fbdev_suspend_work);
+		console_lock();
+	} else {
+		/*
+		 * The console lock can be pretty contented on resume due
+		 * to all the printk activity.  Try to keep it out of the hot
+		 * path of resume if possible.
+		 */
+		WARN_ON(state != FBINFO_STATE_RUNNING);
+		if (!console_trylock()) {
+			/* Don't block our own workqueue as this can
+			 * be run in parallel with other i915.ko tasks.
+			 */
+			schedule_work(&dev_priv->fbdev_suspend_work);
+			return;
+		}
+	}
+
 	/* On resume from hibernation: If the object is shmemfs backed, it has
 	 * been restored from swap. If the object is stolen however, it will be
 	 * full of whatever garbage was left in there.
@@ -706,6 +747,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state)
 		memset_io(info->screen_base, 0, info->screen_size);
 
 	fb_set_suspend(info, state);
+	console_unlock();
 }
 
 void intel_fbdev_output_poll_changed(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index f9151f6641d9..96957683032e 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -864,10 +864,15 @@ static enum drm_mode_status
 intel_hdmi_mode_valid(struct drm_connector *connector,
 		      struct drm_display_mode *mode)
 {
-	if (mode->clock > hdmi_portclock_limit(intel_attached_hdmi(connector),
-					       true))
+	int clock = mode->clock;
+
+	if (mode->flags & DRM_MODE_FLAG_DBLCLK)
+		clock *= 2;
+
+	if (clock > hdmi_portclock_limit(intel_attached_hdmi(connector),
+					 true))
 		return MODE_CLOCK_HIGH;
-	if (mode->clock < 20000)
+	if (clock < 20000)
 		return MODE_CLOCK_LOW;
 
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
@@ -885,7 +890,7 @@ static bool hdmi_12bpc_possible(struct intel_crtc *crtc)
 	if (HAS_GMCH_DISPLAY(dev))
 		return false;
 
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.head) {
+	for_each_intel_encoder(dev, encoder) {
 		if (encoder->new_crtc != crtc)
 			continue;
 
@@ -921,6 +926,10 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
 			intel_hdmi->color_range = 0;
 	}
 
+	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) {
+		pipe_config->pixel_multiplier = 2;
+	}
+
 	if (intel_hdmi->color_range)
 		pipe_config->limited_color_range = true;
 
@@ -1260,6 +1269,8 @@ static void chv_hdmi_pre_pll_enable(struct intel_encoder *encoder)
 	enum pipe pipe = intel_crtc->pipe;
 	u32 val;
 
+	intel_hdmi_prepare(encoder);
+
 	mutex_lock(&dev_priv->dpio_lock);
 
 	/* program left/right clock distribution */
@@ -1429,8 +1440,8 @@ static void chv_hdmi_pre_enable(struct intel_encoder *encoder)
 
 	for (i = 0; i < 4; i++) {
 		val = vlv_dpio_read(dev_priv, pipe, CHV_TX_DW2(ch, i));
-		val &= ~DPIO_SWING_MARGIN_MASK;
-		val |= 102 << DPIO_SWING_MARGIN_SHIFT;
+		val &= ~DPIO_SWING_MARGIN000_MASK;
+		val |= 102 << DPIO_SWING_MARGIN000_SHIFT;
 		vlv_dpio_write(dev_priv, pipe, CHV_TX_DW2(ch, i), val);
 	}
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
new file mode 100644
index 000000000000..c096b9b7f22a
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -0,0 +1,1697 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Ben Widawsky <ben@bwidawsk.net>
+ *    Michel Thierry <michel.thierry@intel.com>
+ *    Thomas Daniel <thomas.daniel@intel.com>
+ *    Oscar Mateo <oscar.mateo@intel.com>
+ *
+ */
+
+/**
+ * DOC: Logical Rings, Logical Ring Contexts and Execlists
+ *
+ * Motivation:
+ * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
+ * These expanded contexts enable a number of new abilities, especially
+ * "Execlists" (also implemented in this file).
+ *
+ * One of the main differences with the legacy HW contexts is that logical
+ * ring contexts incorporate many more things to the context's state, like
+ * PDPs or ringbuffer control registers:
+ *
+ * The reason why PDPs are included in the context is straightforward: as
+ * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
+ * contained there mean you don't need to do a ppgtt->switch_mm yourself,
+ * instead, the GPU will do it for you on the context switch.
+ *
+ * But, what about the ringbuffer control registers (head, tail, etc..)?
+ * shouldn't we just need a set of those per engine command streamer? This is
+ * where the name "Logical Rings" starts to make sense: by virtualizing the
+ * rings, the engine cs shifts to a new "ring buffer" with every context
+ * switch. When you want to submit a workload to the GPU you: A) choose your
+ * context, B) find its appropriate virtualized ring, C) write commands to it
+ * and then, finally, D) tell the GPU to switch to that context.
+ *
+ * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
+ * to a contexts is via a context execution list, ergo "Execlists".
+ *
+ * LRC implementation:
+ * Regarding the creation of contexts, we have:
+ *
+ * - One global default context.
+ * - One local default context for each opened fd.
+ * - One local extra context for each context create ioctl call.
+ *
+ * Now that ringbuffers belong per-context (and not per-engine, like before)
+ * and that contexts are uniquely tied to a given engine (and not reusable,
+ * like before) we need:
+ *
+ * - One ringbuffer per-engine inside each context.
+ * - One backing object per-engine inside each context.
+ *
+ * The global default context starts its life with these new objects fully
+ * allocated and populated. The local default context for each opened fd is
+ * more complex, because we don't know at creation time which engine is going
+ * to use them. To handle this, we have implemented a deferred creation of LR
+ * contexts:
+ *
+ * The local context starts its life as a hollow or blank holder, that only
+ * gets populated for a given engine once we receive an execbuffer. If later
+ * on we receive another execbuffer ioctl for the same context but a different
+ * engine, we allocate/populate a new ringbuffer and context backing object and
+ * so on.
+ *
+ * Finally, regarding local contexts created using the ioctl call: as they are
+ * only allowed with the render ring, we can allocate & populate them right
+ * away (no need to defer anything, at least for now).
+ *
+ * Execlists implementation:
+ * Execlists are the new method by which, on gen8+ hardware, workloads are
+ * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
+ * This method works as follows:
+ *
+ * When a request is committed, its commands (the BB start and any leading or
+ * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
+ * for the appropriate context. The tail pointer in the hardware context is not
+ * updated at this time, but instead, kept by the driver in the ringbuffer
+ * structure. A structure representing this request is added to a request queue
+ * for the appropriate engine: this structure contains a copy of the context's
+ * tail after the request was written to the ring buffer and a pointer to the
+ * context itself.
+ *
+ * If the engine's request queue was empty before the request was added, the
+ * queue is processed immediately. Otherwise the queue will be processed during
+ * a context switch interrupt. In any case, elements on the queue will get sent
+ * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
+ * globally unique 20-bits submission ID.
+ *
+ * When execution of a request completes, the GPU updates the context status
+ * buffer with a context complete event and generates a context switch interrupt.
+ * During the interrupt handling, the driver examines the events in the buffer:
+ * for each context complete event, if the announced ID matches that on the head
+ * of the request queue, then that request is retired and removed from the queue.
+ *
+ * After processing, if any requests were retired and the queue is not empty
+ * then a new execution list can be submitted. The two requests at the front of
+ * the queue are next to be submitted but since a context may not occur twice in
+ * an execution list, if subsequent requests have the same ID as the first then
+ * the two requests must be combined. This is done simply by discarding requests
+ * at the head of the queue until either only one requests is left (in which case
+ * we use a NULL second context) or the first two requests have unique IDs.
+ *
+ * By always executing the first two requests in the queue the driver ensures
+ * that the GPU is kept as busy as possible. In the case where a single context
+ * completes but a second context is still executing, the request for this second
+ * context will be at the head of the queue when we remove the first one. This
+ * request will then be resubmitted along with a new request for a different context,
+ * which will cause the hardware to continue executing the second request and queue
+ * the new request (the GPU detects the condition of a context getting preempted
+ * with the same context and optimizes the context switch flow by not doing
+ * preemption, but just sampling the new tail pointer).
+ *
+ */
+
+#include <drm/drmP.h>
+#include <drm/i915_drm.h>
+#include "i915_drv.h"
+
+#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
+#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
+
+#define GEN8_LR_CONTEXT_ALIGN 4096
+
+#define RING_EXECLIST_QFULL		(1 << 0x2)
+#define RING_EXECLIST1_VALID		(1 << 0x3)
+#define RING_EXECLIST0_VALID		(1 << 0x4)
+#define RING_EXECLIST_ACTIVE_STATUS	(3 << 0xE)
+#define RING_EXECLIST1_ACTIVE		(1 << 0x11)
+#define RING_EXECLIST0_ACTIVE		(1 << 0x12)
+
+#define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
+#define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
+#define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
+#define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
+#define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
+#define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
+
+#define CTX_LRI_HEADER_0		0x01
+#define CTX_CONTEXT_CONTROL		0x02
+#define CTX_RING_HEAD			0x04
+#define CTX_RING_TAIL			0x06
+#define CTX_RING_BUFFER_START		0x08
+#define CTX_RING_BUFFER_CONTROL		0x0a
+#define CTX_BB_HEAD_U			0x0c
+#define CTX_BB_HEAD_L			0x0e
+#define CTX_BB_STATE			0x10
+#define CTX_SECOND_BB_HEAD_U		0x12
+#define CTX_SECOND_BB_HEAD_L		0x14
+#define CTX_SECOND_BB_STATE		0x16
+#define CTX_BB_PER_CTX_PTR		0x18
+#define CTX_RCS_INDIRECT_CTX		0x1a
+#define CTX_RCS_INDIRECT_CTX_OFFSET	0x1c
+#define CTX_LRI_HEADER_1		0x21
+#define CTX_CTX_TIMESTAMP		0x22
+#define CTX_PDP3_UDW			0x24
+#define CTX_PDP3_LDW			0x26
+#define CTX_PDP2_UDW			0x28
+#define CTX_PDP2_LDW			0x2a
+#define CTX_PDP1_UDW			0x2c
+#define CTX_PDP1_LDW			0x2e
+#define CTX_PDP0_UDW			0x30
+#define CTX_PDP0_LDW			0x32
+#define CTX_LRI_HEADER_2		0x41
+#define CTX_R_PWR_CLK_STATE		0x42
+#define CTX_GPGPU_CSR_BASE_ADDRESS	0x44
+
+#define GEN8_CTX_VALID (1<<0)
+#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
+#define GEN8_CTX_FORCE_RESTORE (1<<2)
+#define GEN8_CTX_L3LLC_COHERENT (1<<5)
+#define GEN8_CTX_PRIVILEGE (1<<8)
+enum {
+	ADVANCED_CONTEXT = 0,
+	LEGACY_CONTEXT,
+	ADVANCED_AD_CONTEXT,
+	LEGACY_64B_CONTEXT
+};
+#define GEN8_CTX_MODE_SHIFT 3
+enum {
+	FAULT_AND_HANG = 0,
+	FAULT_AND_HALT, /* Debug only */
+	FAULT_AND_STREAM,
+	FAULT_AND_CONTINUE /* Unsupported */
+};
+#define GEN8_CTX_ID_SHIFT 32
+
+/**
+ * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
+ * @dev: DRM device.
+ * @enable_execlists: value of i915.enable_execlists module parameter.
+ *
+ * Only certain platforms support Execlists (the prerequisites being
+ * support for Logical Ring Contexts and Aliasing PPGTT or better),
+ * and only when enabled via module parameter.
+ *
+ * Return: 1 if Execlists is supported and has to be enabled.
+ */
+int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists)
+{
+	WARN_ON(i915.enable_ppgtt == -1);
+
+	if (enable_execlists == 0)
+		return 0;
+
+	if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev) &&
+	    i915.use_mmio_flip >= 0)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * intel_execlists_ctx_id() - get the Execlists Context ID
+ * @ctx_obj: Logical Ring Context backing object.
+ *
+ * Do not confuse with ctx->id! Unfortunately we have a name overload
+ * here: the old context ID we pass to userspace as a handler so that
+ * they can refer to a context, and the new context ID we pass to the
+ * ELSP so that the GPU can inform us of the context status via
+ * interrupts.
+ *
+ * Return: 20-bits globally unique context ID.
+ */
+u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
+{
+	u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj);
+
+	/* LRCA is required to be 4K aligned so the more significant 20 bits
+	 * are globally unique */
+	return lrca >> 12;
+}
+
+static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj)
+{
+	uint64_t desc;
+	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
+
+	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
+
+	desc = GEN8_CTX_VALID;
+	desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT;
+	desc |= GEN8_CTX_L3LLC_COHERENT;
+	desc |= GEN8_CTX_PRIVILEGE;
+	desc |= lrca;
+	desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
+
+	/* TODO: WaDisableLiteRestore when we start using semaphore
+	 * signalling between Command Streamers */
+	/* desc |= GEN8_CTX_FORCE_RESTORE; */
+
+	return desc;
+}
+
+static void execlists_elsp_write(struct intel_engine_cs *ring,
+				 struct drm_i915_gem_object *ctx_obj0,
+				 struct drm_i915_gem_object *ctx_obj1)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	uint64_t temp = 0;
+	uint32_t desc[4];
+	unsigned long flags;
+
+	/* XXX: You must always write both descriptors in the order below. */
+	if (ctx_obj1)
+		temp = execlists_ctx_descriptor(ctx_obj1);
+	else
+		temp = 0;
+	desc[1] = (u32)(temp >> 32);
+	desc[0] = (u32)temp;
+
+	temp = execlists_ctx_descriptor(ctx_obj0);
+	desc[3] = (u32)(temp >> 32);
+	desc[2] = (u32)temp;
+
+	/* Set Force Wakeup bit to prevent GT from entering C6 while ELSP writes
+	 * are in progress.
+	 *
+	 * The other problem is that we can't just call gen6_gt_force_wake_get()
+	 * because that function calls intel_runtime_pm_get(), which might sleep.
+	 * Instead, we do the runtime_pm_get/put when creating/destroying requests.
+	 */
+	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
+	if (dev_priv->uncore.forcewake_count++ == 0)
+		dev_priv->uncore.funcs.force_wake_get(dev_priv, FORCEWAKE_ALL);
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
+
+	I915_WRITE(RING_ELSP(ring), desc[1]);
+	I915_WRITE(RING_ELSP(ring), desc[0]);
+	I915_WRITE(RING_ELSP(ring), desc[3]);
+	/* The context is automatically loaded after the following */
+	I915_WRITE(RING_ELSP(ring), desc[2]);
+
+	/* ELSP is a wo register, so use another nearby reg for posting instead */
+	POSTING_READ(RING_EXECLIST_STATUS(ring));
+
+	/* Release Force Wakeup (see the big comment above). */
+	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
+	if (--dev_priv->uncore.forcewake_count == 0)
+		dev_priv->uncore.funcs.force_wake_put(dev_priv, FORCEWAKE_ALL);
+	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
+}
+
+static int execlists_ctx_write_tail(struct drm_i915_gem_object *ctx_obj, u32 tail)
+{
+	struct page *page;
+	uint32_t *reg_state;
+
+	page = i915_gem_object_get_page(ctx_obj, 1);
+	reg_state = kmap_atomic(page);
+
+	reg_state[CTX_RING_TAIL+1] = tail;
+
+	kunmap_atomic(reg_state);
+
+	return 0;
+}
+
+static int execlists_submit_context(struct intel_engine_cs *ring,
+				    struct intel_context *to0, u32 tail0,
+				    struct intel_context *to1, u32 tail1)
+{
+	struct drm_i915_gem_object *ctx_obj0;
+	struct drm_i915_gem_object *ctx_obj1 = NULL;
+
+	ctx_obj0 = to0->engine[ring->id].state;
+	BUG_ON(!ctx_obj0);
+	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
+
+	execlists_ctx_write_tail(ctx_obj0, tail0);
+
+	if (to1) {
+		ctx_obj1 = to1->engine[ring->id].state;
+		BUG_ON(!ctx_obj1);
+		WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
+
+		execlists_ctx_write_tail(ctx_obj1, tail1);
+	}
+
+	execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
+
+	return 0;
+}
+
+static void execlists_context_unqueue(struct intel_engine_cs *ring)
+{
+	struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
+	struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+
+	assert_spin_locked(&ring->execlist_lock);
+
+	if (list_empty(&ring->execlist_queue))
+		return;
+
+	/* Try to read in pairs */
+	list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue,
+				 execlist_link) {
+		if (!req0) {
+			req0 = cursor;
+		} else if (req0->ctx == cursor->ctx) {
+			/* Same ctx: ignore first request, as second request
+			 * will update tail past first request's workload */
+			cursor->elsp_submitted = req0->elsp_submitted;
+			list_del(&req0->execlist_link);
+			queue_work(dev_priv->wq, &req0->work);
+			req0 = cursor;
+		} else {
+			req1 = cursor;
+			break;
+		}
+	}
+
+	WARN_ON(req1 && req1->elsp_submitted);
+
+	WARN_ON(execlists_submit_context(ring, req0->ctx, req0->tail,
+					 req1 ? req1->ctx : NULL,
+					 req1 ? req1->tail : 0));
+
+	req0->elsp_submitted++;
+	if (req1)
+		req1->elsp_submitted++;
+}
+
+static bool execlists_check_remove_request(struct intel_engine_cs *ring,
+					   u32 request_id)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	struct intel_ctx_submit_request *head_req;
+
+	assert_spin_locked(&ring->execlist_lock);
+
+	head_req = list_first_entry_or_null(&ring->execlist_queue,
+					    struct intel_ctx_submit_request,
+					    execlist_link);
+
+	if (head_req != NULL) {
+		struct drm_i915_gem_object *ctx_obj =
+				head_req->ctx->engine[ring->id].state;
+		if (intel_execlists_ctx_id(ctx_obj) == request_id) {
+			WARN(head_req->elsp_submitted == 0,
+			     "Never submitted head request\n");
+
+			if (--head_req->elsp_submitted <= 0) {
+				list_del(&head_req->execlist_link);
+				queue_work(dev_priv->wq, &head_req->work);
+				return true;
+			}
+		}
+	}
+
+	return false;
+}
+
+/**
+ * intel_execlists_handle_ctx_events() - handle Context Switch interrupts
+ * @ring: Engine Command Streamer to handle.
+ *
+ * Check the unread Context Status Buffers and manage the submission of new
+ * contexts to the ELSP accordingly.
+ */
+void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	u32 status_pointer;
+	u8 read_pointer;
+	u8 write_pointer;
+	u32 status;
+	u32 status_id;
+	u32 submit_contexts = 0;
+
+	status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
+
+	read_pointer = ring->next_context_status_buffer;
+	write_pointer = status_pointer & 0x07;
+	if (read_pointer > write_pointer)
+		write_pointer += 6;
+
+	spin_lock(&ring->execlist_lock);
+
+	while (read_pointer < write_pointer) {
+		read_pointer++;
+		status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
+				(read_pointer % 6) * 8);
+		status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
+				(read_pointer % 6) * 8 + 4);
+
+		if (status & GEN8_CTX_STATUS_PREEMPTED) {
+			if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
+				if (execlists_check_remove_request(ring, status_id))
+					WARN(1, "Lite Restored request removed from queue\n");
+			} else
+				WARN(1, "Preemption without Lite Restore\n");
+		}
+
+		 if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
+		     (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
+			if (execlists_check_remove_request(ring, status_id))
+				submit_contexts++;
+		}
+	}
+
+	if (submit_contexts != 0)
+		execlists_context_unqueue(ring);
+
+	spin_unlock(&ring->execlist_lock);
+
+	WARN(submit_contexts > 2, "More than two context complete events?\n");
+	ring->next_context_status_buffer = write_pointer % 6;
+
+	I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
+		   ((u32)ring->next_context_status_buffer & 0x07) << 8);
+}
+
+static void execlists_free_request_task(struct work_struct *work)
+{
+	struct intel_ctx_submit_request *req =
+		container_of(work, struct intel_ctx_submit_request, work);
+	struct drm_device *dev = req->ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	intel_runtime_pm_put(dev_priv);
+
+	mutex_lock(&dev->struct_mutex);
+	i915_gem_context_unreference(req->ctx);
+	mutex_unlock(&dev->struct_mutex);
+
+	kfree(req);
+}
+
+static int execlists_context_queue(struct intel_engine_cs *ring,
+				   struct intel_context *to,
+				   u32 tail)
+{
+	struct intel_ctx_submit_request *req = NULL, *cursor;
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	unsigned long flags;
+	int num_elements = 0;
+
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
+	if (req == NULL)
+		return -ENOMEM;
+	req->ctx = to;
+	i915_gem_context_reference(req->ctx);
+	req->ring = ring;
+	req->tail = tail;
+	INIT_WORK(&req->work, execlists_free_request_task);
+
+	intel_runtime_pm_get(dev_priv);
+
+	spin_lock_irqsave(&ring->execlist_lock, flags);
+
+	list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
+		if (++num_elements > 2)
+			break;
+
+	if (num_elements > 2) {
+		struct intel_ctx_submit_request *tail_req;
+
+		tail_req = list_last_entry(&ring->execlist_queue,
+					   struct intel_ctx_submit_request,
+					   execlist_link);
+
+		if (to == tail_req->ctx) {
+			WARN(tail_req->elsp_submitted != 0,
+			     "More than 2 already-submitted reqs queued\n");
+			list_del(&tail_req->execlist_link);
+			queue_work(dev_priv->wq, &tail_req->work);
+		}
+	}
+
+	list_add_tail(&req->execlist_link, &ring->execlist_queue);
+	if (num_elements == 0)
+		execlists_context_unqueue(ring);
+
+	spin_unlock_irqrestore(&ring->execlist_lock, flags);
+
+	return 0;
+}
+
+static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	uint32_t flush_domains;
+	int ret;
+
+	flush_domains = 0;
+	if (ring->gpu_caches_dirty)
+		flush_domains = I915_GEM_GPU_DOMAINS;
+
+	ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains);
+	if (ret)
+		return ret;
+
+	ring->gpu_caches_dirty = false;
+	return 0;
+}
+
+static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
+				 struct list_head *vmas)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct i915_vma *vma;
+	uint32_t flush_domains = 0;
+	bool flush_chipset = false;
+	int ret;
+
+	list_for_each_entry(vma, vmas, exec_list) {
+		struct drm_i915_gem_object *obj = vma->obj;
+
+		ret = i915_gem_object_sync(obj, ring);
+		if (ret)
+			return ret;
+
+		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
+			flush_chipset |= i915_gem_clflush_object(obj, false);
+
+		flush_domains |= obj->base.write_domain;
+	}
+
+	if (flush_domains & I915_GEM_DOMAIN_GTT)
+		wmb();
+
+	/* Unconditionally invalidate gpu caches and ensure that we do flush
+	 * any residual writes from the previous batch.
+	 */
+	return logical_ring_invalidate_all_caches(ringbuf);
+}
+
+/**
+ * execlists_submission() - submit a batchbuffer for execution, Execlists style
+ * @dev: DRM device.
+ * @file: DRM file.
+ * @ring: Engine Command Streamer to submit to.
+ * @ctx: Context to employ for this submission.
+ * @args: execbuffer call arguments.
+ * @vmas: list of vmas.
+ * @batch_obj: the batchbuffer to submit.
+ * @exec_start: batchbuffer start virtual address pointer.
+ * @flags: translated execbuffer call flags.
+ *
+ * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts
+ * away the submission details of the execbuffer ioctl call.
+ *
+ * Return: non-zero if the submission fails.
+ */
+int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
+			       struct intel_engine_cs *ring,
+			       struct intel_context *ctx,
+			       struct drm_i915_gem_execbuffer2 *args,
+			       struct list_head *vmas,
+			       struct drm_i915_gem_object *batch_obj,
+			       u64 exec_start, u32 flags)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
+	int instp_mode;
+	u32 instp_mask;
+	int ret;
+
+	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
+	instp_mask = I915_EXEC_CONSTANTS_MASK;
+	switch (instp_mode) {
+	case I915_EXEC_CONSTANTS_REL_GENERAL:
+	case I915_EXEC_CONSTANTS_ABSOLUTE:
+	case I915_EXEC_CONSTANTS_REL_SURFACE:
+		if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
+			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
+			return -EINVAL;
+		}
+
+		if (instp_mode != dev_priv->relative_constants_mode) {
+			if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
+				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
+				return -EINVAL;
+			}
+
+			/* The HW changed the meaning on this bit on gen6 */
+			instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
+		}
+		break;
+	default:
+		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
+		return -EINVAL;
+	}
+
+	if (args->num_cliprects != 0) {
+		DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
+		return -EINVAL;
+	} else {
+		if (args->DR4 == 0xffffffff) {
+			DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
+			args->DR4 = 0;
+		}
+
+		if (args->DR1 || args->DR4 || args->cliprects_ptr) {
+			DRM_DEBUG("0 cliprects but dirt in cliprects fields\n");
+			return -EINVAL;
+		}
+	}
+
+	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
+		DRM_DEBUG("sol reset is gen7 only\n");
+		return -EINVAL;
+	}
+
+	ret = execlists_move_to_gpu(ringbuf, vmas);
+	if (ret)
+		return ret;
+
+	if (ring == &dev_priv->ring[RCS] &&
+	    instp_mode != dev_priv->relative_constants_mode) {
+		ret = intel_logical_ring_begin(ringbuf, 4);
+		if (ret)
+			return ret;
+
+		intel_logical_ring_emit(ringbuf, MI_NOOP);
+		intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
+		intel_logical_ring_emit(ringbuf, INSTPM);
+		intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode);
+		intel_logical_ring_advance(ringbuf);
+
+		dev_priv->relative_constants_mode = instp_mode;
+	}
+
+	ret = ring->emit_bb_start(ringbuf, exec_start, flags);
+	if (ret)
+		return ret;
+
+	i915_gem_execbuffer_move_to_active(vmas, ring);
+	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
+
+	return 0;
+}
+
+void intel_logical_ring_stop(struct intel_engine_cs *ring)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	int ret;
+
+	if (!intel_ring_initialized(ring))
+		return;
+
+	ret = intel_ring_idle(ring);
+	if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
+		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
+			  ring->name, ret);
+
+	/* TODO: Is this correct with Execlists enabled? */
+	I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
+	if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
+		DRM_ERROR("%s :timed out trying to stop ring\n", ring->name);
+		return;
+	}
+	I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
+}
+
+int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	int ret;
+
+	if (!ring->gpu_caches_dirty)
+		return 0;
+
+	ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS);
+	if (ret)
+		return ret;
+
+	ring->gpu_caches_dirty = false;
+	return 0;
+}
+
+/**
+ * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
+ * @ringbuf: Logical Ringbuffer to advance.
+ *
+ * The tail is updated in our logical ringbuffer struct, not in the actual context. What
+ * really happens during submission is that the context and current tail will be placed
+ * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
+ * point, the tail *inside* the context is updated and the ELSP written to.
+ */
+void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct intel_context *ctx = ringbuf->FIXME_lrc_ctx;
+
+	intel_logical_ring_advance(ringbuf);
+
+	if (intel_ring_stopped(ring))
+		return;
+
+	execlists_context_queue(ring, ctx, ringbuf->tail);
+}
+
+static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
+				    struct intel_context *ctx)
+{
+	if (ring->outstanding_lazy_seqno)
+		return 0;
+
+	if (ring->preallocated_lazy_request == NULL) {
+		struct drm_i915_gem_request *request;
+
+		request = kmalloc(sizeof(*request), GFP_KERNEL);
+		if (request == NULL)
+			return -ENOMEM;
+
+		/* Hold a reference to the context this request belongs to
+		 * (we will need it when the time comes to emit/retire the
+		 * request).
+		 */
+		request->ctx = ctx;
+		i915_gem_context_reference(request->ctx);
+
+		ring->preallocated_lazy_request = request;
+	}
+
+	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
+}
+
+static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
+				     int bytes)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct drm_i915_gem_request *request;
+	u32 seqno = 0;
+	int ret;
+
+	if (ringbuf->last_retired_head != -1) {
+		ringbuf->head = ringbuf->last_retired_head;
+		ringbuf->last_retired_head = -1;
+
+		ringbuf->space = intel_ring_space(ringbuf);
+		if (ringbuf->space >= bytes)
+			return 0;
+	}
+
+	list_for_each_entry(request, &ring->request_list, list) {
+		if (__intel_ring_space(request->tail, ringbuf->tail,
+				       ringbuf->size) >= bytes) {
+			seqno = request->seqno;
+			break;
+		}
+	}
+
+	if (seqno == 0)
+		return -ENOSPC;
+
+	ret = i915_wait_seqno(ring, seqno);
+	if (ret)
+		return ret;
+
+	i915_gem_retire_requests_ring(ring);
+	ringbuf->head = ringbuf->last_retired_head;
+	ringbuf->last_retired_head = -1;
+
+	ringbuf->space = intel_ring_space(ringbuf);
+	return 0;
+}
+
+static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
+				       int bytes)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	unsigned long end;
+	int ret;
+
+	ret = logical_ring_wait_request(ringbuf, bytes);
+	if (ret != -ENOSPC)
+		return ret;
+
+	/* Force the context submission in case we have been skipping it */
+	intel_logical_ring_advance_and_submit(ringbuf);
+
+	/* With GEM the hangcheck timer should kick us out of the loop,
+	 * leaving it early runs the risk of corrupting GEM state (due
+	 * to running on almost untested codepaths). But on resume
+	 * timers don't work yet, so prevent a complete hang in that
+	 * case by choosing an insanely large timeout. */
+	end = jiffies + 60 * HZ;
+
+	do {
+		ringbuf->head = I915_READ_HEAD(ring);
+		ringbuf->space = intel_ring_space(ringbuf);
+		if (ringbuf->space >= bytes) {
+			ret = 0;
+			break;
+		}
+
+		msleep(1);
+
+		if (dev_priv->mm.interruptible && signal_pending(current)) {
+			ret = -ERESTARTSYS;
+			break;
+		}
+
+		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+					   dev_priv->mm.interruptible);
+		if (ret)
+			break;
+
+		if (time_after(jiffies, end)) {
+			ret = -EBUSY;
+			break;
+		}
+	} while (1);
+
+	return ret;
+}
+
+static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf)
+{
+	uint32_t __iomem *virt;
+	int rem = ringbuf->size - ringbuf->tail;
+
+	if (ringbuf->space < rem) {
+		int ret = logical_ring_wait_for_space(ringbuf, rem);
+
+		if (ret)
+			return ret;
+	}
+
+	virt = ringbuf->virtual_start + ringbuf->tail;
+	rem /= 4;
+	while (rem--)
+		iowrite32(MI_NOOP, virt++);
+
+	ringbuf->tail = 0;
+	ringbuf->space = intel_ring_space(ringbuf);
+
+	return 0;
+}
+
+static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
+{
+	int ret;
+
+	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
+		ret = logical_ring_wrap_buffer(ringbuf);
+		if (unlikely(ret))
+			return ret;
+	}
+
+	if (unlikely(ringbuf->space < bytes)) {
+		ret = logical_ring_wait_for_space(ringbuf, bytes);
+		if (unlikely(ret))
+			return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
+ *
+ * @ringbuf: Logical ringbuffer.
+ * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
+ *
+ * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
+ * be wrapped, or wait a bit for the tail to be updated). This function takes care of that
+ * and also preallocates a request (every workload submission is still mediated through
+ * requests, same as it did with legacy ringbuffer submission).
+ *
+ * Return: non-zero if the ringbuffer is not ready to be written to.
+ */
+int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
+				   dev_priv->mm.interruptible);
+	if (ret)
+		return ret;
+
+	ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t));
+	if (ret)
+		return ret;
+
+	/* Preallocate the olr before touching the ring */
+	ret = logical_ring_alloc_seqno(ring, ringbuf->FIXME_lrc_ctx);
+	if (ret)
+		return ret;
+
+	ringbuf->space -= num_dwords * sizeof(uint32_t);
+	return 0;
+}
+
+static int gen8_init_common_ring(struct intel_engine_cs *ring)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
+	I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
+
+	I915_WRITE(RING_MODE_GEN7(ring),
+		   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
+		   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
+	POSTING_READ(RING_MODE_GEN7(ring));
+	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", ring->name);
+
+	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
+
+	return 0;
+}
+
+static int gen8_init_render_ring(struct intel_engine_cs *ring)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	ret = gen8_init_common_ring(ring);
+	if (ret)
+		return ret;
+
+	/* We need to disable the AsyncFlip performance optimisations in order
+	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
+	 * programmed to '1' on all products.
+	 *
+	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
+	 */
+	I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
+
+	ret = intel_init_pipe_control(ring);
+	if (ret)
+		return ret;
+
+	I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
+
+	return ret;
+}
+
+static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf,
+			      u64 offset, unsigned flags)
+{
+	bool ppgtt = !(flags & I915_DISPATCH_SECURE);
+	int ret;
+
+	ret = intel_logical_ring_begin(ringbuf, 4);
+	if (ret)
+		return ret;
+
+	/* FIXME(BDW): Address space and security selectors. */
+	intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
+	intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
+	intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
+	intel_logical_ring_emit(ringbuf, MI_NOOP);
+	intel_logical_ring_advance(ringbuf);
+
+	return 0;
+}
+
+static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	unsigned long flags;
+
+	if (!dev->irq_enabled)
+		return false;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, flags);
+	if (ring->irq_refcount++ == 0) {
+		I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
+		POSTING_READ(RING_IMR(ring->mmio_base));
+	}
+	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
+
+	return true;
+}
+
+static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, flags);
+	if (--ring->irq_refcount == 0) {
+		I915_WRITE_IMR(ring, ~ring->irq_keep_mask);
+		POSTING_READ(RING_IMR(ring->mmio_base));
+	}
+	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
+}
+
+static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
+			   u32 invalidate_domains,
+			   u32 unused)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t cmd;
+	int ret;
+
+	ret = intel_logical_ring_begin(ringbuf, 4);
+	if (ret)
+		return ret;
+
+	cmd = MI_FLUSH_DW + 1;
+
+	if (ring == &dev_priv->ring[VCS]) {
+		if (invalidate_domains & I915_GEM_GPU_DOMAINS)
+			cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
+				MI_FLUSH_DW_STORE_INDEX |
+				MI_FLUSH_DW_OP_STOREDW;
+	} else {
+		if (invalidate_domains & I915_GEM_DOMAIN_RENDER)
+			cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
+				MI_FLUSH_DW_OP_STOREDW;
+	}
+
+	intel_logical_ring_emit(ringbuf, cmd);
+	intel_logical_ring_emit(ringbuf,
+				I915_GEM_HWS_SCRATCH_ADDR |
+				MI_FLUSH_DW_USE_GTT);
+	intel_logical_ring_emit(ringbuf, 0); /* upper addr */
+	intel_logical_ring_emit(ringbuf, 0); /* value */
+	intel_logical_ring_advance(ringbuf);
+
+	return 0;
+}
+
+static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
+				  u32 invalidate_domains,
+				  u32 flush_domains)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
+	u32 flags = 0;
+	int ret;
+
+	flags |= PIPE_CONTROL_CS_STALL;
+
+	if (flush_domains) {
+		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+	}
+
+	if (invalidate_domains) {
+		flags |= PIPE_CONTROL_TLB_INVALIDATE;
+		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_QW_WRITE;
+		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+	}
+
+	ret = intel_logical_ring_begin(ringbuf, 6);
+	if (ret)
+		return ret;
+
+	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
+	intel_logical_ring_emit(ringbuf, flags);
+	intel_logical_ring_emit(ringbuf, scratch_addr);
+	intel_logical_ring_emit(ringbuf, 0);
+	intel_logical_ring_emit(ringbuf, 0);
+	intel_logical_ring_emit(ringbuf, 0);
+	intel_logical_ring_advance(ringbuf);
+
+	return 0;
+}
+
+static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
+{
+	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
+}
+
+static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
+{
+	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
+}
+
+static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
+{
+	struct intel_engine_cs *ring = ringbuf->ring;
+	u32 cmd;
+	int ret;
+
+	ret = intel_logical_ring_begin(ringbuf, 6);
+	if (ret)
+		return ret;
+
+	cmd = MI_STORE_DWORD_IMM_GEN8;
+	cmd |= MI_GLOBAL_GTT;
+
+	intel_logical_ring_emit(ringbuf, cmd);
+	intel_logical_ring_emit(ringbuf,
+				(ring->status_page.gfx_addr +
+				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
+	intel_logical_ring_emit(ringbuf, 0);
+	intel_logical_ring_emit(ringbuf, ring->outstanding_lazy_seqno);
+	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
+	intel_logical_ring_emit(ringbuf, MI_NOOP);
+	intel_logical_ring_advance_and_submit(ringbuf);
+
+	return 0;
+}
+
+/**
+ * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
+ *
+ * @ring: Engine Command Streamer.
+ *
+ */
+void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
+{
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+
+	if (!intel_ring_initialized(ring))
+		return;
+
+	intel_logical_ring_stop(ring);
+	WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
+	ring->preallocated_lazy_request = NULL;
+	ring->outstanding_lazy_seqno = 0;
+
+	if (ring->cleanup)
+		ring->cleanup(ring);
+
+	i915_cmd_parser_fini_ring(ring);
+
+	if (ring->status_page.obj) {
+		kunmap(sg_page(ring->status_page.obj->pages->sgl));
+		ring->status_page.obj = NULL;
+	}
+}
+
+static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
+{
+	int ret;
+	struct intel_context *dctx = ring->default_context;
+	struct drm_i915_gem_object *dctx_obj;
+
+	/* Intentionally left blank. */
+	ring->buffer = NULL;
+
+	ring->dev = dev;
+	INIT_LIST_HEAD(&ring->active_list);
+	INIT_LIST_HEAD(&ring->request_list);
+	init_waitqueue_head(&ring->irq_queue);
+
+	INIT_LIST_HEAD(&ring->execlist_queue);
+	spin_lock_init(&ring->execlist_lock);
+	ring->next_context_status_buffer = 0;
+
+	ret = intel_lr_context_deferred_create(dctx, ring);
+	if (ret)
+		return ret;
+
+	/* The status page is offset 0 from the context object in LRCs. */
+	dctx_obj = dctx->engine[ring->id].state;
+	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj);
+	ring->status_page.page_addr = kmap(sg_page(dctx_obj->pages->sgl));
+	if (ring->status_page.page_addr == NULL)
+		return -ENOMEM;
+	ring->status_page.obj = dctx_obj;
+
+	ret = i915_cmd_parser_init_ring(ring);
+	if (ret)
+		return ret;
+
+	if (ring->init) {
+		ret = ring->init(ring);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int logical_render_ring_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
+
+	ring->name = "render ring";
+	ring->id = RCS;
+	ring->mmio_base = RENDER_RING_BASE;
+	ring->irq_enable_mask =
+		GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
+	ring->irq_keep_mask =
+		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
+	if (HAS_L3_DPF(dev))
+		ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
+
+	ring->init = gen8_init_render_ring;
+	ring->cleanup = intel_fini_pipe_control;
+	ring->get_seqno = gen8_get_seqno;
+	ring->set_seqno = gen8_set_seqno;
+	ring->emit_request = gen8_emit_request;
+	ring->emit_flush = gen8_emit_flush_render;
+	ring->irq_get = gen8_logical_ring_get_irq;
+	ring->irq_put = gen8_logical_ring_put_irq;
+	ring->emit_bb_start = gen8_emit_bb_start;
+
+	return logical_ring_init(dev, ring);
+}
+
+static int logical_bsd_ring_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_engine_cs *ring = &dev_priv->ring[VCS];
+
+	ring->name = "bsd ring";
+	ring->id = VCS;
+	ring->mmio_base = GEN6_BSD_RING_BASE;
+	ring->irq_enable_mask =
+		GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
+	ring->irq_keep_mask =
+		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
+
+	ring->init = gen8_init_common_ring;
+	ring->get_seqno = gen8_get_seqno;
+	ring->set_seqno = gen8_set_seqno;
+	ring->emit_request = gen8_emit_request;
+	ring->emit_flush = gen8_emit_flush;
+	ring->irq_get = gen8_logical_ring_get_irq;
+	ring->irq_put = gen8_logical_ring_put_irq;
+	ring->emit_bb_start = gen8_emit_bb_start;
+
+	return logical_ring_init(dev, ring);
+}
+
+static int logical_bsd2_ring_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
+
+	ring->name = "bds2 ring";
+	ring->id = VCS2;
+	ring->mmio_base = GEN8_BSD2_RING_BASE;
+	ring->irq_enable_mask =
+		GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
+	ring->irq_keep_mask =
+		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
+
+	ring->init = gen8_init_common_ring;
+	ring->get_seqno = gen8_get_seqno;
+	ring->set_seqno = gen8_set_seqno;
+	ring->emit_request = gen8_emit_request;
+	ring->emit_flush = gen8_emit_flush;
+	ring->irq_get = gen8_logical_ring_get_irq;
+	ring->irq_put = gen8_logical_ring_put_irq;
+	ring->emit_bb_start = gen8_emit_bb_start;
+
+	return logical_ring_init(dev, ring);
+}
+
+static int logical_blt_ring_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_engine_cs *ring = &dev_priv->ring[BCS];
+
+	ring->name = "blitter ring";
+	ring->id = BCS;
+	ring->mmio_base = BLT_RING_BASE;
+	ring->irq_enable_mask =
+		GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
+	ring->irq_keep_mask =
+		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
+
+	ring->init = gen8_init_common_ring;
+	ring->get_seqno = gen8_get_seqno;
+	ring->set_seqno = gen8_set_seqno;
+	ring->emit_request = gen8_emit_request;
+	ring->emit_flush = gen8_emit_flush;
+	ring->irq_get = gen8_logical_ring_get_irq;
+	ring->irq_put = gen8_logical_ring_put_irq;
+	ring->emit_bb_start = gen8_emit_bb_start;
+
+	return logical_ring_init(dev, ring);
+}
+
+static int logical_vebox_ring_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_engine_cs *ring = &dev_priv->ring[VECS];
+
+	ring->name = "video enhancement ring";
+	ring->id = VECS;
+	ring->mmio_base = VEBOX_RING_BASE;
+	ring->irq_enable_mask =
+		GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
+	ring->irq_keep_mask =
+		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
+
+	ring->init = gen8_init_common_ring;
+	ring->get_seqno = gen8_get_seqno;
+	ring->set_seqno = gen8_set_seqno;
+	ring->emit_request = gen8_emit_request;
+	ring->emit_flush = gen8_emit_flush;
+	ring->irq_get = gen8_logical_ring_get_irq;
+	ring->irq_put = gen8_logical_ring_put_irq;
+	ring->emit_bb_start = gen8_emit_bb_start;
+
+	return logical_ring_init(dev, ring);
+}
+
+/**
+ * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers
+ * @dev: DRM device.
+ *
+ * This function inits the engines for an Execlists submission style (the equivalent in the
+ * legacy ringbuffer submission world would be i915_gem_init_rings). It does it only for
+ * those engines that are present in the hardware.
+ *
+ * Return: non-zero if the initialization failed.
+ */
+int intel_logical_rings_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	ret = logical_render_ring_init(dev);
+	if (ret)
+		return ret;
+
+	if (HAS_BSD(dev)) {
+		ret = logical_bsd_ring_init(dev);
+		if (ret)
+			goto cleanup_render_ring;
+	}
+
+	if (HAS_BLT(dev)) {
+		ret = logical_blt_ring_init(dev);
+		if (ret)
+			goto cleanup_bsd_ring;
+	}
+
+	if (HAS_VEBOX(dev)) {
+		ret = logical_vebox_ring_init(dev);
+		if (ret)
+			goto cleanup_blt_ring;
+	}
+
+	if (HAS_BSD2(dev)) {
+		ret = logical_bsd2_ring_init(dev);
+		if (ret)
+			goto cleanup_vebox_ring;
+	}
+
+	ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
+	if (ret)
+		goto cleanup_bsd2_ring;
+
+	return 0;
+
+cleanup_bsd2_ring:
+	intel_logical_ring_cleanup(&dev_priv->ring[VCS2]);
+cleanup_vebox_ring:
+	intel_logical_ring_cleanup(&dev_priv->ring[VECS]);
+cleanup_blt_ring:
+	intel_logical_ring_cleanup(&dev_priv->ring[BCS]);
+cleanup_bsd_ring:
+	intel_logical_ring_cleanup(&dev_priv->ring[VCS]);
+cleanup_render_ring:
+	intel_logical_ring_cleanup(&dev_priv->ring[RCS]);
+
+	return ret;
+}
+
+static int
+populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj,
+		    struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf)
+{
+	struct drm_i915_gem_object *ring_obj = ringbuf->obj;
+	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
+	struct page *page;
+	uint32_t *reg_state;
+	int ret;
+
+	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
+		return ret;
+	}
+
+	ret = i915_gem_object_get_pages(ctx_obj);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Could not get object pages\n");
+		return ret;
+	}
+
+	i915_gem_object_pin_pages(ctx_obj);
+
+	/* The second page of the context object contains some fields which must
+	 * be set up prior to the first execution. */
+	page = i915_gem_object_get_page(ctx_obj, 1);
+	reg_state = kmap_atomic(page);
+
+	/* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
+	 * commands followed by (reg, value) pairs. The values we are setting here are
+	 * only for the first context restore: on a subsequent save, the GPU will
+	 * recreate this batchbuffer with new values (including all the missing
+	 * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */
+	if (ring->id == RCS)
+		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14);
+	else
+		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11);
+	reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED;
+	reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
+	reg_state[CTX_CONTEXT_CONTROL+1] =
+			_MASKED_BIT_ENABLE((1<<3) | MI_RESTORE_INHIBIT);
+	reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
+	reg_state[CTX_RING_HEAD+1] = 0;
+	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
+	reg_state[CTX_RING_TAIL+1] = 0;
+	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
+	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
+	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
+	reg_state[CTX_RING_BUFFER_CONTROL+1] =
+			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID;
+	reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168;
+	reg_state[CTX_BB_HEAD_U+1] = 0;
+	reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140;
+	reg_state[CTX_BB_HEAD_L+1] = 0;
+	reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110;
+	reg_state[CTX_BB_STATE+1] = (1<<5);
+	reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c;
+	reg_state[CTX_SECOND_BB_HEAD_U+1] = 0;
+	reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
+	reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
+	reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
+	reg_state[CTX_SECOND_BB_STATE+1] = 0;
+	if (ring->id == RCS) {
+		/* TODO: according to BSpec, the register state context
+		 * for CHV does not have these. OTOH, these registers do
+		 * exist in CHV. I'm waiting for a clarification */
+		reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
+		reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
+		reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
+		reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
+		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
+		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
+	}
+	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
+	reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
+	reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
+	reg_state[CTX_CTX_TIMESTAMP+1] = 0;
+	reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
+	reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
+	reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
+	reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
+	reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
+	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
+	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
+	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
+	reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[3]);
+	reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[3]);
+	reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[2]);
+	reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[2]);
+	reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[1]);
+	reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[1]);
+	reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[0]);
+	reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[0]);
+	if (ring->id == RCS) {
+		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
+		reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8;
+		reg_state[CTX_R_PWR_CLK_STATE+1] = 0;
+	}
+
+	kunmap_atomic(reg_state);
+
+	ctx_obj->dirty = 1;
+	set_page_dirty(page);
+	i915_gem_object_unpin_pages(ctx_obj);
+
+	return 0;
+}
+
+/**
+ * intel_lr_context_free() - free the LRC specific bits of a context
+ * @ctx: the LR context to free.
+ *
+ * The real context freeing is done in i915_gem_context_free: this only
+ * takes care of the bits that are LRC related: the per-engine backing
+ * objects and the logical ringbuffer.
+ */
+void intel_lr_context_free(struct intel_context *ctx)
+{
+	int i;
+
+	for (i = 0; i < I915_NUM_RINGS; i++) {
+		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
+		struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
+
+		if (ctx_obj) {
+			intel_destroy_ringbuffer_obj(ringbuf);
+			kfree(ringbuf);
+			i915_gem_object_ggtt_unpin(ctx_obj);
+			drm_gem_object_unreference(&ctx_obj->base);
+		}
+	}
+}
+
+static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
+{
+	int ret = 0;
+
+	WARN_ON(INTEL_INFO(ring->dev)->gen != 8);
+
+	switch (ring->id) {
+	case RCS:
+		ret = GEN8_LR_CONTEXT_RENDER_SIZE;
+		break;
+	case VCS:
+	case BCS:
+	case VECS:
+	case VCS2:
+		ret = GEN8_LR_CONTEXT_OTHER_SIZE;
+		break;
+	}
+
+	return ret;
+}
+
+/**
+ * intel_lr_context_deferred_create() - create the LRC specific bits of a context
+ * @ctx: LR context to create.
+ * @ring: engine to be used with the context.
+ *
+ * This function can be called more than once, with different engines, if we plan
+ * to use the context with them. The context backing objects and the ringbuffers
+ * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why
+ * the creation is a deferred call: it's better to make sure first that we need to use
+ * a given ring with the context.
+ *
+ * Return: non-zero on eror.
+ */
+int intel_lr_context_deferred_create(struct intel_context *ctx,
+				     struct intel_engine_cs *ring)
+{
+	struct drm_device *dev = ring->dev;
+	struct drm_i915_gem_object *ctx_obj;
+	uint32_t context_size;
+	struct intel_ringbuffer *ringbuf;
+	int ret;
+
+	WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL);
+	if (ctx->engine[ring->id].state)
+		return 0;
+
+	context_size = round_up(get_lr_context_size(ring), 4096);
+
+	ctx_obj = i915_gem_alloc_context_obj(dev, context_size);
+	if (IS_ERR(ctx_obj)) {
+		ret = PTR_ERR(ctx_obj);
+		DRM_DEBUG_DRIVER("Alloc LRC backing obj failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n", ret);
+		drm_gem_object_unreference(&ctx_obj->base);
+		return ret;
+	}
+
+	ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
+	if (!ringbuf) {
+		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
+				ring->name);
+		i915_gem_object_ggtt_unpin(ctx_obj);
+		drm_gem_object_unreference(&ctx_obj->base);
+		ret = -ENOMEM;
+		return ret;
+	}
+
+	ringbuf->ring = ring;
+	ringbuf->FIXME_lrc_ctx = ctx;
+
+	ringbuf->size = 32 * PAGE_SIZE;
+	ringbuf->effective_size = ringbuf->size;
+	ringbuf->head = 0;
+	ringbuf->tail = 0;
+	ringbuf->space = ringbuf->size;
+	ringbuf->last_retired_head = -1;
+
+	/* TODO: For now we put this in the mappable region so that we can reuse
+	 * the existing ringbuffer code which ioremaps it. When we start
+	 * creating many contexts, this will no longer work and we must switch
+	 * to a kmapish interface.
+	 */
+	ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer obj %s: %d\n",
+				ring->name, ret);
+		goto error;
+	}
+
+	ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
+	if (ret) {
+		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
+		intel_destroy_ringbuffer_obj(ringbuf);
+		goto error;
+	}
+
+	ctx->engine[ring->id].ringbuf = ringbuf;
+	ctx->engine[ring->id].state = ctx_obj;
+
+	return 0;
+
+error:
+	kfree(ringbuf);
+	i915_gem_object_ggtt_unpin(ctx_obj);
+	drm_gem_object_unreference(&ctx_obj->base);
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
new file mode 100644
index 000000000000..991d4499fb03
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright © 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _INTEL_LRC_H_
+#define _INTEL_LRC_H_
+
+/* Execlists regs */
+#define RING_ELSP(ring)			((ring)->mmio_base+0x230)
+#define RING_EXECLIST_STATUS(ring)	((ring)->mmio_base+0x234)
+#define RING_CONTEXT_CONTROL(ring)	((ring)->mmio_base+0x244)
+#define RING_CONTEXT_STATUS_BUF(ring)	((ring)->mmio_base+0x370)
+#define RING_CONTEXT_STATUS_PTR(ring)	((ring)->mmio_base+0x3a0)
+
+/* Logical Rings */
+void intel_logical_ring_stop(struct intel_engine_cs *ring);
+void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
+int intel_logical_rings_init(struct drm_device *dev);
+
+int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf);
+void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf);
+/**
+ * intel_logical_ring_advance() - advance the ringbuffer tail
+ * @ringbuf: Ringbuffer to advance.
+ *
+ * The tail is only updated in our logical ringbuffer struct.
+ */
+static inline void intel_logical_ring_advance(struct intel_ringbuffer *ringbuf)
+{
+	ringbuf->tail &= ringbuf->size - 1;
+}
+/**
+ * intel_logical_ring_emit() - write a DWORD to the ringbuffer.
+ * @ringbuf: Ringbuffer to write to.
+ * @data: DWORD to write.
+ */
+static inline void intel_logical_ring_emit(struct intel_ringbuffer *ringbuf,
+					   u32 data)
+{
+	iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
+	ringbuf->tail += 4;
+}
+int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords);
+
+/* Logical Ring Contexts */
+void intel_lr_context_free(struct intel_context *ctx);
+int intel_lr_context_deferred_create(struct intel_context *ctx,
+				     struct intel_engine_cs *ring);
+
+/* Execlists */
+int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists);
+int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
+			       struct intel_engine_cs *ring,
+			       struct intel_context *ctx,
+			       struct drm_i915_gem_execbuffer2 *args,
+			       struct list_head *vmas,
+			       struct drm_i915_gem_object *batch_obj,
+			       u64 exec_start, u32 flags);
+u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj);
+
+/**
+ * struct intel_ctx_submit_request - queued context submission request
+ * @ctx: Context to submit to the ELSP.
+ * @ring: Engine to submit it to.
+ * @tail: how far in the context's ringbuffer this request goes to.
+ * @execlist_link: link in the submission queue.
+ * @work: workqueue for processing this request in a bottom half.
+ * @elsp_submitted: no. of times this request has been sent to the ELSP.
+ *
+ * The ELSP only accepts two elements at a time, so we queue context/tail
+ * pairs on a given queue (ring->execlist_queue) until the hardware is
+ * available. The queue serves a double purpose: we also use it to keep track
+ * of the up to 2 contexts currently in the hardware (usually one in execution
+ * and the other queued up by the GPU): We only remove elements from the head
+ * of the queue when the hardware informs us that an element has been
+ * completed.
+ *
+ * All accesses to the queue are mediated by a spinlock (ring->execlist_lock).
+ */
+struct intel_ctx_submit_request {
+	struct intel_context *ctx;
+	struct intel_engine_cs *ring;
+	u32 tail;
+
+	struct list_head execlist_link;
+	struct work_struct work;
+
+	int elsp_submitted;
+};
+
+void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring);
+
+#endif /* _INTEL_LRC_H_ */
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index fdf40267249c..a6bd1422e38f 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -823,8 +823,7 @@ bool intel_is_dual_link_lvds(struct drm_device *dev)
 	struct intel_encoder *encoder;
 	struct intel_lvds_encoder *lvds_encoder;
 
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list,
-			    base.head) {
+	for_each_intel_encoder(dev, encoder) {
 		if (encoder->type == INTEL_OUTPUT_LVDS) {
 			lvds_encoder = to_lvds_encoder(&encoder->base);
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 40c12295c0bd..c8f744c418f0 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -309,6 +309,9 @@ static void gen7_enable_fbc(struct drm_crtc *crtc)
 
 	dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN;
 
+	if (dev_priv->fbc.false_color)
+		dpfc_ctl |= FBC_CTL_FALSE_COLOR;
+
 	I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN);
 
 	if (IS_IVYBRIDGE(dev)) {
@@ -1268,33 +1271,27 @@ static bool g4x_compute_srwm(struct drm_device *dev,
 			      display, cursor);
 }
 
-static bool vlv_compute_drain_latency(struct drm_device *dev,
-				     int plane,
-				     int *plane_prec_mult,
-				     int *plane_dl,
-				     int *cursor_prec_mult,
-				     int *cursor_dl)
+static bool vlv_compute_drain_latency(struct drm_crtc *crtc,
+				      int pixel_size,
+				      int *prec_mult,
+				      int *drain_latency)
 {
-	struct drm_crtc *crtc;
-	int clock, pixel_size;
 	int entries;
+	int clock = to_intel_crtc(crtc)->config.adjusted_mode.crtc_clock;
 
-	crtc = intel_get_crtc_for_plane(dev, plane);
-	if (!intel_crtc_active(crtc))
+	if (WARN(clock == 0, "Pixel clock is zero!\n"))
 		return false;
 
-	clock = to_intel_crtc(crtc)->config.adjusted_mode.crtc_clock;
-	pixel_size = crtc->primary->fb->bits_per_pixel / 8;	/* BPP */
+	if (WARN(pixel_size == 0, "Pixel size is zero!\n"))
+		return false;
 
-	entries = (clock / 1000) * pixel_size;
-	*plane_prec_mult = (entries > 128) ?
-		DRAIN_LATENCY_PRECISION_64 : DRAIN_LATENCY_PRECISION_32;
-	*plane_dl = (64 * (*plane_prec_mult) * 4) / entries;
+	entries = DIV_ROUND_UP(clock, 1000) * pixel_size;
+	*prec_mult = (entries > 128) ? DRAIN_LATENCY_PRECISION_64 :
+				       DRAIN_LATENCY_PRECISION_32;
+	*drain_latency = (64 * (*prec_mult) * 4) / entries;
 
-	entries = (clock / 1000) * 4;	/* BPP is always 4 for cursor */
-	*cursor_prec_mult = (entries > 128) ?
-		DRAIN_LATENCY_PRECISION_64 : DRAIN_LATENCY_PRECISION_32;
-	*cursor_dl = (64 * (*cursor_prec_mult) * 4) / entries;
+	if (*drain_latency > DRAIN_LATENCY_MASK)
+		*drain_latency = DRAIN_LATENCY_MASK;
 
 	return true;
 }
@@ -1307,39 +1304,48 @@ static bool vlv_compute_drain_latency(struct drm_device *dev,
  * latency value.
  */
 
-static void vlv_update_drain_latency(struct drm_device *dev)
+static void vlv_update_drain_latency(struct drm_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int planea_prec, planea_dl, planeb_prec, planeb_dl;
-	int cursora_prec, cursora_dl, cursorb_prec, cursorb_dl;
-	int plane_prec_mult, cursor_prec_mult; /* Precision multiplier is
-							either 16 or 32 */
+	struct drm_i915_private *dev_priv = crtc->dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pixel_size;
+	int drain_latency;
+	enum pipe pipe = intel_crtc->pipe;
+	int plane_prec, prec_mult, plane_dl;
 
-	/* For plane A, Cursor A */
-	if (vlv_compute_drain_latency(dev, 0, &plane_prec_mult, &planea_dl,
-				      &cursor_prec_mult, &cursora_dl)) {
-		cursora_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
-			DDL_CURSORA_PRECISION_32 : DDL_CURSORA_PRECISION_64;
-		planea_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
-			DDL_PLANEA_PRECISION_32 : DDL_PLANEA_PRECISION_64;
+	plane_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_PLANE_PRECISION_64 |
+		   DRAIN_LATENCY_MASK | DDL_CURSOR_PRECISION_64 |
+		   (DRAIN_LATENCY_MASK << DDL_CURSOR_SHIFT));
+
+	if (!intel_crtc_active(crtc)) {
+		I915_WRITE(VLV_DDL(pipe), plane_dl);
+		return;
+	}
 
-		I915_WRITE(VLV_DDL1, cursora_prec |
-				(cursora_dl << DDL_CURSORA_SHIFT) |
-				planea_prec | planea_dl);
+	/* Primary plane Drain Latency */
+	pixel_size = crtc->primary->fb->bits_per_pixel / 8;	/* BPP */
+	if (vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) {
+		plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ?
+					   DDL_PLANE_PRECISION_64 :
+					   DDL_PLANE_PRECISION_32;
+		plane_dl |= plane_prec | drain_latency;
 	}
 
-	/* For plane B, Cursor B */
-	if (vlv_compute_drain_latency(dev, 1, &plane_prec_mult, &planeb_dl,
-				      &cursor_prec_mult, &cursorb_dl)) {
-		cursorb_prec = (cursor_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
-			DDL_CURSORB_PRECISION_32 : DDL_CURSORB_PRECISION_64;
-		planeb_prec = (plane_prec_mult == DRAIN_LATENCY_PRECISION_32) ?
-			DDL_PLANEB_PRECISION_32 : DDL_PLANEB_PRECISION_64;
+	/* Cursor Drain Latency
+	 * BPP is always 4 for cursor
+	 */
+	pixel_size = 4;
 
-		I915_WRITE(VLV_DDL2, cursorb_prec |
-				(cursorb_dl << DDL_CURSORB_SHIFT) |
-				planeb_prec | planeb_dl);
+	/* Program cursor DL only if it is enabled */
+	if (intel_crtc->cursor_base &&
+	    vlv_compute_drain_latency(crtc, pixel_size, &prec_mult, &drain_latency)) {
+		plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ?
+					   DDL_CURSOR_PRECISION_64 :
+					   DDL_CURSOR_PRECISION_32;
+		plane_dl |= plane_prec | (drain_latency << DDL_CURSOR_SHIFT);
 	}
+
+	I915_WRITE(VLV_DDL(pipe), plane_dl);
 }
 
 #define single_plane_enabled(mask) is_power_of_2(mask)
@@ -1355,7 +1361,7 @@ static void valleyview_update_wm(struct drm_crtc *crtc)
 	unsigned int enabled = 0;
 	bool cxsr_enabled;
 
-	vlv_update_drain_latency(dev);
+	vlv_update_drain_latency(crtc);
 
 	if (g4x_compute_wm0(dev, PIPE_A,
 			    &valleyview_wm_info, latency_ns,
@@ -1387,7 +1393,8 @@ static void valleyview_update_wm(struct drm_crtc *crtc)
 		plane_sr = cursor_sr = 0;
 	}
 
-	DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
+	DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
+		      "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
 		      planea_wm, cursora_wm,
 		      planeb_wm, cursorb_wm,
 		      plane_sr, cursor_sr);
@@ -1396,7 +1403,7 @@ static void valleyview_update_wm(struct drm_crtc *crtc)
 		   (plane_sr << DSPFW_SR_SHIFT) |
 		   (cursorb_wm << DSPFW_CURSORB_SHIFT) |
 		   (planeb_wm << DSPFW_PLANEB_SHIFT) |
-		   planea_wm);
+		   (planea_wm << DSPFW_PLANEA_SHIFT));
 	I915_WRITE(DSPFW2,
 		   (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
 		   (cursora_wm << DSPFW_CURSORA_SHIFT));
@@ -1408,6 +1415,116 @@ static void valleyview_update_wm(struct drm_crtc *crtc)
 		intel_set_memory_cxsr(dev_priv, true);
 }
 
+static void cherryview_update_wm(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	static const int sr_latency_ns = 12000;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int planea_wm, planeb_wm, planec_wm;
+	int cursora_wm, cursorb_wm, cursorc_wm;
+	int plane_sr, cursor_sr;
+	int ignore_plane_sr, ignore_cursor_sr;
+	unsigned int enabled = 0;
+	bool cxsr_enabled;
+
+	vlv_update_drain_latency(crtc);
+
+	if (g4x_compute_wm0(dev, PIPE_A,
+			    &valleyview_wm_info, latency_ns,
+			    &valleyview_cursor_wm_info, latency_ns,
+			    &planea_wm, &cursora_wm))
+		enabled |= 1 << PIPE_A;
+
+	if (g4x_compute_wm0(dev, PIPE_B,
+			    &valleyview_wm_info, latency_ns,
+			    &valleyview_cursor_wm_info, latency_ns,
+			    &planeb_wm, &cursorb_wm))
+		enabled |= 1 << PIPE_B;
+
+	if (g4x_compute_wm0(dev, PIPE_C,
+			    &valleyview_wm_info, latency_ns,
+			    &valleyview_cursor_wm_info, latency_ns,
+			    &planec_wm, &cursorc_wm))
+		enabled |= 1 << PIPE_C;
+
+	if (single_plane_enabled(enabled) &&
+	    g4x_compute_srwm(dev, ffs(enabled) - 1,
+			     sr_latency_ns,
+			     &valleyview_wm_info,
+			     &valleyview_cursor_wm_info,
+			     &plane_sr, &ignore_cursor_sr) &&
+	    g4x_compute_srwm(dev, ffs(enabled) - 1,
+			     2*sr_latency_ns,
+			     &valleyview_wm_info,
+			     &valleyview_cursor_wm_info,
+			     &ignore_plane_sr, &cursor_sr)) {
+		cxsr_enabled = true;
+	} else {
+		cxsr_enabled = false;
+		intel_set_memory_cxsr(dev_priv, false);
+		plane_sr = cursor_sr = 0;
+	}
+
+	DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
+		      "B: plane=%d, cursor=%d, C: plane=%d, cursor=%d, "
+		      "SR: plane=%d, cursor=%d\n",
+		      planea_wm, cursora_wm,
+		      planeb_wm, cursorb_wm,
+		      planec_wm, cursorc_wm,
+		      plane_sr, cursor_sr);
+
+	I915_WRITE(DSPFW1,
+		   (plane_sr << DSPFW_SR_SHIFT) |
+		   (cursorb_wm << DSPFW_CURSORB_SHIFT) |
+		   (planeb_wm << DSPFW_PLANEB_SHIFT) |
+		   (planea_wm << DSPFW_PLANEA_SHIFT));
+	I915_WRITE(DSPFW2,
+		   (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
+		   (cursora_wm << DSPFW_CURSORA_SHIFT));
+	I915_WRITE(DSPFW3,
+		   (I915_READ(DSPFW3) & ~DSPFW_CURSOR_SR_MASK) |
+		   (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
+	I915_WRITE(DSPFW9_CHV,
+		   (I915_READ(DSPFW9_CHV) & ~(DSPFW_PLANEC_MASK |
+					      DSPFW_CURSORC_MASK)) |
+		   (planec_wm << DSPFW_PLANEC_SHIFT) |
+		   (cursorc_wm << DSPFW_CURSORC_SHIFT));
+
+	if (cxsr_enabled)
+		intel_set_memory_cxsr(dev_priv, true);
+}
+
+static void valleyview_update_sprite_wm(struct drm_plane *plane,
+					struct drm_crtc *crtc,
+					uint32_t sprite_width,
+					uint32_t sprite_height,
+					int pixel_size,
+					bool enabled, bool scaled)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int pipe = to_intel_plane(plane)->pipe;
+	int sprite = to_intel_plane(plane)->plane;
+	int drain_latency;
+	int plane_prec;
+	int sprite_dl;
+	int prec_mult;
+
+	sprite_dl = I915_READ(VLV_DDL(pipe)) & ~(DDL_SPRITE_PRECISION_64(sprite) |
+		    (DRAIN_LATENCY_MASK << DDL_SPRITE_SHIFT(sprite)));
+
+	if (enabled && vlv_compute_drain_latency(crtc, pixel_size, &prec_mult,
+						 &drain_latency)) {
+		plane_prec = (prec_mult == DRAIN_LATENCY_PRECISION_64) ?
+					   DDL_SPRITE_PRECISION_64(sprite) :
+					   DDL_SPRITE_PRECISION_32(sprite);
+		sprite_dl |= plane_prec |
+			     (drain_latency << DDL_SPRITE_SHIFT(sprite));
+	}
+
+	I915_WRITE(VLV_DDL(pipe), sprite_dl);
+}
+
 static void g4x_update_wm(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -1443,7 +1560,8 @@ static void g4x_update_wm(struct drm_crtc *crtc)
 		plane_sr = cursor_sr = 0;
 	}
 
-	DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
+	DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, "
+		      "B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n",
 		      planea_wm, cursora_wm,
 		      planeb_wm, cursorb_wm,
 		      plane_sr, cursor_sr);
@@ -1452,7 +1570,7 @@ static void g4x_update_wm(struct drm_crtc *crtc)
 		   (plane_sr << DSPFW_SR_SHIFT) |
 		   (cursorb_wm << DSPFW_CURSORB_SHIFT) |
 		   (planeb_wm << DSPFW_PLANEB_SHIFT) |
-		   planea_wm);
+		   (planea_wm << DSPFW_PLANEA_SHIFT));
 	I915_WRITE(DSPFW2,
 		   (I915_READ(DSPFW2) & ~DSPFW_CURSORA_MASK) |
 		   (cursora_wm << DSPFW_CURSORA_SHIFT));
@@ -1526,8 +1644,11 @@ static void i965_update_wm(struct drm_crtc *unused_crtc)
 
 	/* 965 has limitations... */
 	I915_WRITE(DSPFW1, (srwm << DSPFW_SR_SHIFT) |
-		   (8 << 16) | (8 << 8) | (8 << 0));
-	I915_WRITE(DSPFW2, (8 << 8) | (8 << 0));
+		   (8 << DSPFW_CURSORB_SHIFT) |
+		   (8 << DSPFW_PLANEB_SHIFT) |
+		   (8 << DSPFW_PLANEA_SHIFT));
+	I915_WRITE(DSPFW2, (8 << DSPFW_CURSORA_SHIFT) |
+		   (8 << DSPFW_PLANEC_SHIFT_OLD));
 	/* update cursor SR watermark */
 	I915_WRITE(DSPFW3, (cursor_sr << DSPFW_CURSOR_SR_SHIFT));
 
@@ -3598,7 +3719,6 @@ static void gen6_enable_rps(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_engine_cs *ring;
 	u32 rp_state_cap;
-	u32 gt_perf_status;
 	u32 rc6vids, pcu_mbox = 0, rc6_mask = 0;
 	u32 gtfifodbg;
 	int rc6_mode;
@@ -3623,7 +3743,6 @@ static void gen6_enable_rps(struct drm_device *dev)
 	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
 
 	rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
-	gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
 
 	parse_rp_state_cap(dev_priv, rp_state_cap);
 
@@ -6252,6 +6371,153 @@ static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv,
 	vlv_set_power_well(dev_priv, power_well, false);
 }
 
+static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv,
+					   struct i915_power_well *power_well)
+{
+	enum dpio_phy phy;
+
+	WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC &&
+		     power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D);
+
+	/*
+	 * Enable the CRI clock source so we can get at the
+	 * display and the reference clock for VGA
+	 * hotplug / manual detection.
+	 */
+	if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) {
+		phy = DPIO_PHY0;
+		I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) |
+			   DPLL_REFA_CLK_ENABLE_VLV);
+		I915_WRITE(DPLL(PIPE_B), I915_READ(DPLL(PIPE_B)) |
+			   DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV);
+	} else {
+		phy = DPIO_PHY1;
+		I915_WRITE(DPLL(PIPE_C), I915_READ(DPLL(PIPE_C)) |
+			   DPLL_REFA_CLK_ENABLE_VLV | DPLL_INTEGRATED_CRI_CLK_VLV);
+	}
+	udelay(1); /* >10ns for cmnreset, >0ns for sidereset */
+	vlv_set_power_well(dev_priv, power_well, true);
+
+	/* Poll for phypwrgood signal */
+	if (wait_for(I915_READ(DISPLAY_PHY_STATUS) & PHY_POWERGOOD(phy), 1))
+		DRM_ERROR("Display PHY %d is not power up\n", phy);
+
+	I915_WRITE(DISPLAY_PHY_CONTROL, I915_READ(DISPLAY_PHY_CONTROL) |
+		   PHY_COM_LANE_RESET_DEASSERT(phy));
+}
+
+static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv,
+					    struct i915_power_well *power_well)
+{
+	enum dpio_phy phy;
+
+	WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC &&
+		     power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D);
+
+	if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) {
+		phy = DPIO_PHY0;
+		assert_pll_disabled(dev_priv, PIPE_A);
+		assert_pll_disabled(dev_priv, PIPE_B);
+	} else {
+		phy = DPIO_PHY1;
+		assert_pll_disabled(dev_priv, PIPE_C);
+	}
+
+	I915_WRITE(DISPLAY_PHY_CONTROL, I915_READ(DISPLAY_PHY_CONTROL) &
+		   ~PHY_COM_LANE_RESET_DEASSERT(phy));
+
+	vlv_set_power_well(dev_priv, power_well, false);
+}
+
+static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv,
+					struct i915_power_well *power_well)
+{
+	enum pipe pipe = power_well->data;
+	bool enabled;
+	u32 state, ctrl;
+
+	mutex_lock(&dev_priv->rps.hw_lock);
+
+	state = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe);
+	/*
+	 * We only ever set the power-on and power-gate states, anything
+	 * else is unexpected.
+	 */
+	WARN_ON(state != DP_SSS_PWR_ON(pipe) && state != DP_SSS_PWR_GATE(pipe));
+	enabled = state == DP_SSS_PWR_ON(pipe);
+
+	/*
+	 * A transient state at this point would mean some unexpected party
+	 * is poking at the power controls too.
+	 */
+	ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSC_MASK(pipe);
+	WARN_ON(ctrl << 16 != state);
+
+	mutex_unlock(&dev_priv->rps.hw_lock);
+
+	return enabled;
+}
+
+static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv,
+				    struct i915_power_well *power_well,
+				    bool enable)
+{
+	enum pipe pipe = power_well->data;
+	u32 state;
+	u32 ctrl;
+
+	state = enable ? DP_SSS_PWR_ON(pipe) : DP_SSS_PWR_GATE(pipe);
+
+	mutex_lock(&dev_priv->rps.hw_lock);
+
+#define COND \
+	((vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ) & DP_SSS_MASK(pipe)) == state)
+
+	if (COND)
+		goto out;
+
+	ctrl = vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ);
+	ctrl &= ~DP_SSC_MASK(pipe);
+	ctrl |= enable ? DP_SSC_PWR_ON(pipe) : DP_SSC_PWR_GATE(pipe);
+	vlv_punit_write(dev_priv, PUNIT_REG_DSPFREQ, ctrl);
+
+	if (wait_for(COND, 100))
+		DRM_ERROR("timout setting power well state %08x (%08x)\n",
+			  state,
+			  vlv_punit_read(dev_priv, PUNIT_REG_DSPFREQ));
+
+#undef COND
+
+out:
+	mutex_unlock(&dev_priv->rps.hw_lock);
+}
+
+static void chv_pipe_power_well_sync_hw(struct drm_i915_private *dev_priv,
+					struct i915_power_well *power_well)
+{
+	chv_set_pipe_power_well(dev_priv, power_well, power_well->count > 0);
+}
+
+static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv,
+				       struct i915_power_well *power_well)
+{
+	WARN_ON_ONCE(power_well->data != PIPE_A &&
+		     power_well->data != PIPE_B &&
+		     power_well->data != PIPE_C);
+
+	chv_set_pipe_power_well(dev_priv, power_well, true);
+}
+
+static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv,
+					struct i915_power_well *power_well)
+{
+	WARN_ON_ONCE(power_well->data != PIPE_A &&
+		     power_well->data != PIPE_B &&
+		     power_well->data != PIPE_C);
+
+	chv_set_pipe_power_well(dev_priv, power_well, false);
+}
+
 static void check_power_well_state(struct drm_i915_private *dev_priv,
 				   struct i915_power_well *power_well)
 {
@@ -6443,6 +6709,39 @@ EXPORT_SYMBOL_GPL(i915_get_cdclk_freq);
 	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |	\
 	BIT(POWER_DOMAIN_INIT))
 
+#define CHV_PIPE_A_POWER_DOMAINS (	\
+	BIT(POWER_DOMAIN_PIPE_A) |	\
+	BIT(POWER_DOMAIN_INIT))
+
+#define CHV_PIPE_B_POWER_DOMAINS (	\
+	BIT(POWER_DOMAIN_PIPE_B) |	\
+	BIT(POWER_DOMAIN_INIT))
+
+#define CHV_PIPE_C_POWER_DOMAINS (	\
+	BIT(POWER_DOMAIN_PIPE_C) |	\
+	BIT(POWER_DOMAIN_INIT))
+
+#define CHV_DPIO_CMN_BC_POWER_DOMAINS (		\
+	BIT(POWER_DOMAIN_PORT_DDI_B_2_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_B_4_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_C_2_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_C_4_LANES) |	\
+	BIT(POWER_DOMAIN_INIT))
+
+#define CHV_DPIO_CMN_D_POWER_DOMAINS (		\
+	BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |	\
+	BIT(POWER_DOMAIN_INIT))
+
+#define CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS (	\
+	BIT(POWER_DOMAIN_PORT_DDI_D_2_LANES) |	\
+	BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |	\
+	BIT(POWER_DOMAIN_INIT))
+
+#define CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS (	\
+	BIT(POWER_DOMAIN_PORT_DDI_D_4_LANES) |	\
+	BIT(POWER_DOMAIN_INIT))
+
 static const struct i915_power_well_ops i9xx_always_on_power_well_ops = {
 	.sync_hw = i9xx_always_on_power_well_noop,
 	.enable = i9xx_always_on_power_well_noop,
@@ -6450,6 +6749,20 @@ static const struct i915_power_well_ops i9xx_always_on_power_well_ops = {
 	.is_enabled = i9xx_always_on_power_well_enabled,
 };
 
+static const struct i915_power_well_ops chv_pipe_power_well_ops = {
+	.sync_hw = chv_pipe_power_well_sync_hw,
+	.enable = chv_pipe_power_well_enable,
+	.disable = chv_pipe_power_well_disable,
+	.is_enabled = chv_pipe_power_well_enabled,
+};
+
+static const struct i915_power_well_ops chv_dpio_cmn_power_well_ops = {
+	.sync_hw = vlv_power_well_sync_hw,
+	.enable = chv_dpio_cmn_power_well_enable,
+	.disable = chv_dpio_cmn_power_well_disable,
+	.is_enabled = vlv_power_well_enabled,
+};
+
 static struct i915_power_well i9xx_always_on_power_well[] = {
 	{
 		.name = "always-on",
@@ -6572,6 +6885,107 @@ static struct i915_power_well vlv_power_wells[] = {
 	},
 };
 
+static struct i915_power_well chv_power_wells[] = {
+	{
+		.name = "always-on",
+		.always_on = 1,
+		.domains = VLV_ALWAYS_ON_POWER_DOMAINS,
+		.ops = &i9xx_always_on_power_well_ops,
+	},
+#if 0
+	{
+		.name = "display",
+		.domains = VLV_DISPLAY_POWER_DOMAINS,
+		.data = PUNIT_POWER_WELL_DISP2D,
+		.ops = &vlv_display_power_well_ops,
+	},
+	{
+		.name = "pipe-a",
+		.domains = CHV_PIPE_A_POWER_DOMAINS,
+		.data = PIPE_A,
+		.ops = &chv_pipe_power_well_ops,
+	},
+	{
+		.name = "pipe-b",
+		.domains = CHV_PIPE_B_POWER_DOMAINS,
+		.data = PIPE_B,
+		.ops = &chv_pipe_power_well_ops,
+	},
+	{
+		.name = "pipe-c",
+		.domains = CHV_PIPE_C_POWER_DOMAINS,
+		.data = PIPE_C,
+		.ops = &chv_pipe_power_well_ops,
+	},
+#endif
+	{
+		.name = "dpio-common-bc",
+		/*
+		 * XXX: cmnreset for one PHY seems to disturb the other.
+		 * As a workaround keep both powered on at the same
+		 * time for now.
+		 */
+		.domains = CHV_DPIO_CMN_BC_POWER_DOMAINS | CHV_DPIO_CMN_D_POWER_DOMAINS,
+		.data = PUNIT_POWER_WELL_DPIO_CMN_BC,
+		.ops = &chv_dpio_cmn_power_well_ops,
+	},
+	{
+		.name = "dpio-common-d",
+		/*
+		 * XXX: cmnreset for one PHY seems to disturb the other.
+		 * As a workaround keep both powered on at the same
+		 * time for now.
+		 */
+		.domains = CHV_DPIO_CMN_BC_POWER_DOMAINS | CHV_DPIO_CMN_D_POWER_DOMAINS,
+		.data = PUNIT_POWER_WELL_DPIO_CMN_D,
+		.ops = &chv_dpio_cmn_power_well_ops,
+	},
+#if 0
+	{
+		.name = "dpio-tx-b-01",
+		.domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
+			   VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS,
+		.ops = &vlv_dpio_power_well_ops,
+		.data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01,
+	},
+	{
+		.name = "dpio-tx-b-23",
+		.domains = VLV_DPIO_TX_B_LANES_01_POWER_DOMAINS |
+			   VLV_DPIO_TX_B_LANES_23_POWER_DOMAINS,
+		.ops = &vlv_dpio_power_well_ops,
+		.data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23,
+	},
+	{
+		.name = "dpio-tx-c-01",
+		.domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
+			   VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
+		.ops = &vlv_dpio_power_well_ops,
+		.data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01,
+	},
+	{
+		.name = "dpio-tx-c-23",
+		.domains = VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS |
+			   VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS,
+		.ops = &vlv_dpio_power_well_ops,
+		.data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23,
+	},
+	{
+		.name = "dpio-tx-d-01",
+		.domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS |
+			   CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS,
+		.ops = &vlv_dpio_power_well_ops,
+		.data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_01,
+	},
+	{
+		.name = "dpio-tx-d-23",
+		.domains = CHV_DPIO_TX_D_LANES_01_POWER_DOMAINS |
+			   CHV_DPIO_TX_D_LANES_23_POWER_DOMAINS,
+		.ops = &vlv_dpio_power_well_ops,
+		.data = PUNIT_POWER_WELL_DPIO_TX_D_LANES_23,
+	},
+#endif
+};
+
 static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_priv,
 						 enum punit_power_well power_well_id)
 {
@@ -6608,6 +7022,8 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv)
 	} else if (IS_BROADWELL(dev_priv->dev)) {
 		set_power_wells(power_domains, bdw_power_wells);
 		hsw_pwr = power_domains;
+	} else if (IS_CHERRYVIEW(dev_priv->dev)) {
+		set_power_wells(power_domains, chv_power_wells);
 	} else if (IS_VALLEYVIEW(dev_priv->dev)) {
 		set_power_wells(power_domains, vlv_power_wells);
 	} else {
@@ -6835,11 +7251,13 @@ void intel_init_pm(struct drm_device *dev)
 		else if (INTEL_INFO(dev)->gen == 8)
 			dev_priv->display.init_clock_gating = gen8_init_clock_gating;
 	} else if (IS_CHERRYVIEW(dev)) {
-		dev_priv->display.update_wm = valleyview_update_wm;
+		dev_priv->display.update_wm = cherryview_update_wm;
+		dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
 		dev_priv->display.init_clock_gating =
 			cherryview_init_clock_gating;
 	} else if (IS_VALLEYVIEW(dev)) {
 		dev_priv->display.update_wm = valleyview_update_wm;
+		dev_priv->display.update_sprite_wm = valleyview_update_sprite_wm;
 		dev_priv->display.init_clock_gating =
 			valleyview_init_clock_gating;
 	} else if (IS_PINEVIEW(dev)) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 2d068edd1adc..3c7283d8f160 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -33,14 +33,24 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
-/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
- * but keeps the logic simple. Indeed, the whole purpose of this macro is just
- * to give some inclination as to some of the magic values used in the various
- * workarounds!
- */
-#define CACHELINE_BYTES 64
+bool
+intel_ring_initialized(struct intel_engine_cs *ring)
+{
+	struct drm_device *dev = ring->dev;
+
+	if (!dev)
+		return false;
 
-static inline int __ring_space(int head, int tail, int size)
+	if (i915.enable_execlists) {
+		struct intel_context *dctx = ring->default_context;
+		struct intel_ringbuffer *ringbuf = dctx->engine[ring->id].ringbuf;
+
+		return ringbuf->obj;
+	} else
+		return ring->buffer && ring->buffer->obj;
+}
+
+int __intel_ring_space(int head, int tail, int size)
 {
 	int space = head - (tail + I915_RING_FREE_SPACE);
 	if (space < 0)
@@ -48,12 +58,13 @@ static inline int __ring_space(int head, int tail, int size)
 	return space;
 }
 
-static inline int ring_space(struct intel_ringbuffer *ringbuf)
+int intel_ring_space(struct intel_ringbuffer *ringbuf)
 {
-	return __ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size);
+	return __intel_ring_space(ringbuf->head & HEAD_ADDR,
+				  ringbuf->tail, ringbuf->size);
 }
 
-static bool intel_ring_stopped(struct intel_engine_cs *ring)
+bool intel_ring_stopped(struct intel_engine_cs *ring)
 {
 	struct drm_i915_private *dev_priv = ring->dev->dev_private;
 	return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
@@ -476,9 +487,14 @@ static bool stop_ring(struct intel_engine_cs *ring)
 
 	if (!IS_GEN2(ring->dev)) {
 		I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
-		if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
-			DRM_ERROR("%s :timed out trying to stop ring\n", ring->name);
-			return false;
+		if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
+			DRM_ERROR("%s : timed out trying to stop ring\n", ring->name);
+			/* Sometimes we observe that the idle flag is not
+			 * set even though the ring is empty. So double
+			 * check before giving up.
+			 */
+			if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring))
+				return false;
 		}
 	}
 
@@ -563,7 +579,7 @@ static int init_ring_common(struct intel_engine_cs *ring)
 	else {
 		ringbuf->head = I915_READ_HEAD(ring);
 		ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
-		ringbuf->space = ring_space(ringbuf);
+		ringbuf->space = intel_ring_space(ringbuf);
 		ringbuf->last_retired_head = -1;
 	}
 
@@ -575,8 +591,25 @@ out:
 	return ret;
 }
 
-static int
-init_pipe_control(struct intel_engine_cs *ring)
+void
+intel_fini_pipe_control(struct intel_engine_cs *ring)
+{
+	struct drm_device *dev = ring->dev;
+
+	if (ring->scratch.obj == NULL)
+		return;
+
+	if (INTEL_INFO(dev)->gen >= 5) {
+		kunmap(sg_page(ring->scratch.obj->pages->sgl));
+		i915_gem_object_ggtt_unpin(ring->scratch.obj);
+	}
+
+	drm_gem_object_unreference(&ring->scratch.obj->base);
+	ring->scratch.obj = NULL;
+}
+
+int
+intel_init_pipe_control(struct intel_engine_cs *ring)
 {
 	int ret;
 
@@ -651,7 +684,7 @@ static int init_render_ring(struct intel_engine_cs *ring)
 			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
 
 	if (INTEL_INFO(dev)->gen >= 5) {
-		ret = init_pipe_control(ring);
+		ret = intel_init_pipe_control(ring);
 		if (ret)
 			return ret;
 	}
@@ -686,16 +719,7 @@ static void render_ring_cleanup(struct intel_engine_cs *ring)
 		dev_priv->semaphore_obj = NULL;
 	}
 
-	if (ring->scratch.obj == NULL)
-		return;
-
-	if (INTEL_INFO(dev)->gen >= 5) {
-		kunmap(sg_page(ring->scratch.obj->pages->sgl));
-		i915_gem_object_ggtt_unpin(ring->scratch.obj);
-	}
-
-	drm_gem_object_unreference(&ring->scratch.obj->base);
-	ring->scratch.obj = NULL;
+	intel_fini_pipe_control(ring);
 }
 
 static int gen8_rcs_signal(struct intel_engine_cs *signaller,
@@ -1526,7 +1550,7 @@ static int init_phys_status_page(struct intel_engine_cs *ring)
 	return 0;
 }
 
-static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
+void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
 {
 	if (!ringbuf->obj)
 		return;
@@ -1537,8 +1561,8 @@ static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
 	ringbuf->obj = NULL;
 }
 
-static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
-				      struct intel_ringbuffer *ringbuf)
+int intel_alloc_ringbuffer_obj(struct drm_device *dev,
+			       struct intel_ringbuffer *ringbuf)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct drm_i915_gem_object *obj;
@@ -1600,7 +1624,9 @@ static int intel_init_ring_buffer(struct drm_device *dev,
 	ring->dev = dev;
 	INIT_LIST_HEAD(&ring->active_list);
 	INIT_LIST_HEAD(&ring->request_list);
+	INIT_LIST_HEAD(&ring->execlist_queue);
 	ringbuf->size = 32 * PAGE_SIZE;
+	ringbuf->ring = ring;
 	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
 
 	init_waitqueue_head(&ring->irq_queue);
@@ -1683,13 +1709,14 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
 		ringbuf->head = ringbuf->last_retired_head;
 		ringbuf->last_retired_head = -1;
 
-		ringbuf->space = ring_space(ringbuf);
+		ringbuf->space = intel_ring_space(ringbuf);
 		if (ringbuf->space >= n)
 			return 0;
 	}
 
 	list_for_each_entry(request, &ring->request_list, list) {
-		if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) {
+		if (__intel_ring_space(request->tail, ringbuf->tail,
+				       ringbuf->size) >= n) {
 			seqno = request->seqno;
 			break;
 		}
@@ -1706,7 +1733,7 @@ static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
 	ringbuf->head = ringbuf->last_retired_head;
 	ringbuf->last_retired_head = -1;
 
-	ringbuf->space = ring_space(ringbuf);
+	ringbuf->space = intel_ring_space(ringbuf);
 	return 0;
 }
 
@@ -1735,7 +1762,7 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
 	trace_i915_ring_wait_begin(ring);
 	do {
 		ringbuf->head = I915_READ_HEAD(ring);
-		ringbuf->space = ring_space(ringbuf);
+		ringbuf->space = intel_ring_space(ringbuf);
 		if (ringbuf->space >= n) {
 			ret = 0;
 			break;
@@ -1787,7 +1814,7 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
 		iowrite32(MI_NOOP, virt++);
 
 	ringbuf->tail = 0;
-	ringbuf->space = ring_space(ringbuf);
+	ringbuf->space = intel_ring_space(ringbuf);
 
 	return 0;
 }
@@ -1992,9 +2019,7 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
 			      u64 offset, u32 len,
 			      unsigned flags)
 {
-	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-	bool ppgtt = dev_priv->mm.aliasing_ppgtt != NULL &&
-		!(flags & I915_DISPATCH_SECURE);
+	bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE);
 	int ret;
 
 	ret = intel_ring_begin(ring, 4);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 70525d0c2c74..9cbf7b0ebc99 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -5,6 +5,13 @@
 
 #define I915_CMD_HASH_ORDER 9
 
+/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
+ * but keeps the logic simple. Indeed, the whole purpose of this macro is just
+ * to give some inclination as to some of the magic values used in the various
+ * workarounds!
+ */
+#define CACHELINE_BYTES 64
+
 /*
  * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
  * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
@@ -90,6 +97,15 @@ struct intel_ringbuffer {
 	struct drm_i915_gem_object *obj;
 	void __iomem *virtual_start;
 
+	struct intel_engine_cs *ring;
+
+	/*
+	 * FIXME: This backpointer is an artifact of the history of how the
+	 * execlist patches came into being. It will get removed once the basic
+	 * code has landed.
+	 */
+	struct intel_context *FIXME_lrc_ctx;
+
 	u32 head;
 	u32 tail;
 	int space;
@@ -214,6 +230,18 @@ struct  intel_engine_cs {
 				  unsigned int num_dwords);
 	} semaphore;
 
+	/* Execlists */
+	spinlock_t execlist_lock;
+	struct list_head execlist_queue;
+	u8 next_context_status_buffer;
+	u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
+	int		(*emit_request)(struct intel_ringbuffer *ringbuf);
+	int		(*emit_flush)(struct intel_ringbuffer *ringbuf,
+				      u32 invalidate_domains,
+				      u32 flush_domains);
+	int		(*emit_bb_start)(struct intel_ringbuffer *ringbuf,
+					 u64 offset, unsigned flags);
+
 	/**
 	 * List of objects currently involved in rendering from the
 	 * ringbuffer.
@@ -287,11 +315,7 @@ struct  intel_engine_cs {
 	u32 (*get_cmd_length_mask)(u32 cmd_header);
 };
 
-static inline bool
-intel_ring_initialized(struct intel_engine_cs *ring)
-{
-	return ring->buffer && ring->buffer->obj;
-}
+bool intel_ring_initialized(struct intel_engine_cs *ring);
 
 static inline unsigned
 intel_ring_flag(struct intel_engine_cs *ring)
@@ -355,6 +379,10 @@ intel_write_status_page(struct intel_engine_cs *ring,
 #define I915_GEM_HWS_SCRATCH_INDEX	0x30
 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 
+void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
+int intel_alloc_ringbuffer_obj(struct drm_device *dev,
+			       struct intel_ringbuffer *ringbuf);
+
 void intel_stop_ring_buffer(struct intel_engine_cs *ring);
 void intel_cleanup_ring_buffer(struct intel_engine_cs *ring);
 
@@ -372,6 +400,9 @@ static inline void intel_ring_advance(struct intel_engine_cs *ring)
 	struct intel_ringbuffer *ringbuf = ring->buffer;
 	ringbuf->tail &= ringbuf->size - 1;
 }
+int __intel_ring_space(int head, int tail, int size);
+int intel_ring_space(struct intel_ringbuffer *ringbuf);
+bool intel_ring_stopped(struct intel_engine_cs *ring);
 void __intel_ring_advance(struct intel_engine_cs *ring);
 
 int __must_check intel_ring_idle(struct intel_engine_cs *ring);
@@ -379,6 +410,9 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno);
 int intel_ring_flush_all_caches(struct intel_engine_cs *ring);
 int intel_ring_invalidate_all_caches(struct intel_engine_cs *ring);
 
+void intel_fini_pipe_control(struct intel_engine_cs *ring);
+int intel_init_pipe_control(struct intel_engine_cs *ring);
+
 int intel_init_render_ring_buffer(struct drm_device *dev);
 int intel_init_bsd_ring_buffer(struct drm_device *dev);
 int intel_init_bsd2_ring_buffer(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 168c6652cda1..0bdb00b7c59c 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -53,6 +53,7 @@ static bool intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl
 	enum pipe pipe = crtc->pipe;
 	long timeout = msecs_to_jiffies_timeout(1);
 	int scanline, min, max, vblank_start;
+	wait_queue_head_t *wq = drm_crtc_vblank_waitqueue(&crtc->base);
 	DEFINE_WAIT(wait);
 
 	WARN_ON(!drm_modeset_is_locked(&crtc->base.mutex));
@@ -81,7 +82,7 @@ static bool intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl
 		 * other CPUs can see the task state update by the time we
 		 * read the scanline.
 		 */
-		prepare_to_wait(&crtc->vbl_wait, &wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
 
 		scanline = intel_get_crtc_scanline(crtc);
 		if (scanline < min || scanline > max)
@@ -100,7 +101,7 @@ static bool intel_pipe_update_start(struct intel_crtc *crtc, uint32_t *start_vbl
 		local_irq_disable();
 	}
 
-	finish_wait(&crtc->vbl_wait, &wait);
+	finish_wait(wq, &wait);
 
 	drm_vblank_put(dev, pipe);
 
@@ -163,6 +164,7 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc,
 	sprctl &= ~SP_PIXFORMAT_MASK;
 	sprctl &= ~SP_YUV_BYTE_ORDER_MASK;
 	sprctl &= ~SP_TILED;
+	sprctl &= ~SP_ROTATE_180;
 
 	switch (fb->pixel_format) {
 	case DRM_FORMAT_YUYV:
@@ -235,6 +237,14 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_crtc *crtc,
 							fb->pitches[0]);
 	linear_offset -= sprsurf_offset;
 
+	if (intel_plane->rotation == BIT(DRM_ROTATE_180)) {
+		sprctl |= SP_ROTATE_180;
+
+		x += src_w;
+		y += src_h;
+		linear_offset += src_h * fb->pitches[0] + src_w * pixel_size;
+	}
+
 	atomic_update = intel_pipe_update_start(intel_crtc, &start_vbl_count);
 
 	intel_update_primary_plane(intel_crtc);
@@ -364,6 +374,7 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 	sprctl &= ~SPRITE_RGB_ORDER_RGBX;
 	sprctl &= ~SPRITE_YUV_BYTE_ORDER_MASK;
 	sprctl &= ~SPRITE_TILED;
+	sprctl &= ~SPRITE_ROTATE_180;
 
 	switch (fb->pixel_format) {
 	case DRM_FORMAT_XBGR8888:
@@ -426,6 +437,18 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 					       pixel_size, fb->pitches[0]);
 	linear_offset -= sprsurf_offset;
 
+	if (intel_plane->rotation == BIT(DRM_ROTATE_180)) {
+		sprctl |= SPRITE_ROTATE_180;
+
+		/* HSW and BDW does this automagically in hardware */
+		if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) {
+			x += src_w;
+			y += src_h;
+			linear_offset += src_h * fb->pitches[0] +
+				src_w * pixel_size;
+		}
+	}
+
 	atomic_update = intel_pipe_update_start(intel_crtc, &start_vbl_count);
 
 	intel_update_primary_plane(intel_crtc);
@@ -571,6 +594,7 @@ ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 	dvscntr &= ~DVS_RGB_ORDER_XBGR;
 	dvscntr &= ~DVS_YUV_BYTE_ORDER_MASK;
 	dvscntr &= ~DVS_TILED;
+	dvscntr &= ~DVS_ROTATE_180;
 
 	switch (fb->pixel_format) {
 	case DRM_FORMAT_XBGR8888:
@@ -628,6 +652,14 @@ ilk_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 					       pixel_size, fb->pitches[0]);
 	linear_offset -= dvssurf_offset;
 
+	if (intel_plane->rotation == BIT(DRM_ROTATE_180)) {
+		dvscntr |= DVS_ROTATE_180;
+
+		x += src_w;
+		y += src_h;
+		linear_offset += src_h * fb->pitches[0] + src_w * pixel_size;
+	}
+
 	atomic_update = intel_pipe_update_start(intel_crtc, &start_vbl_count);
 
 	intel_update_primary_plane(intel_crtc);
@@ -895,6 +927,9 @@ intel_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 	max_scale = intel_plane->max_downscale << 16;
 	min_scale = intel_plane->can_scale ? 1 : (1 << 16);
 
+	drm_rect_rotate(&src, fb->width << 16, fb->height << 16,
+			intel_plane->rotation);
+
 	hscale = drm_rect_calc_hscale_relaxed(&src, &dst, min_scale, max_scale);
 	BUG_ON(hscale < 0);
 
@@ -933,6 +968,9 @@ intel_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 				     drm_rect_width(&dst) * hscale - drm_rect_width(&src),
 				     drm_rect_height(&dst) * vscale - drm_rect_height(&src));
 
+		drm_rect_rotate_inv(&src, fb->width << 16, fb->height << 16,
+				    intel_plane->rotation);
+
 		/* sanity check to make sure the src viewport wasn't enlarged */
 		WARN_ON(src.x1 < (int) src_x ||
 			src.y1 < (int) src_y ||
@@ -1180,18 +1218,42 @@ out_unlock:
 	return ret;
 }
 
-void intel_plane_restore(struct drm_plane *plane)
+static int intel_plane_set_property(struct drm_plane *plane,
+				    struct drm_property *prop,
+				    uint64_t val)
+{
+	struct drm_device *dev = plane->dev;
+	struct intel_plane *intel_plane = to_intel_plane(plane);
+	uint64_t old_val;
+	int ret = -ENOENT;
+
+	if (prop == dev->mode_config.rotation_property) {
+		/* exactly one rotation angle please */
+		if (hweight32(val & 0xf) != 1)
+			return -EINVAL;
+
+		old_val = intel_plane->rotation;
+		intel_plane->rotation = val;
+		ret = intel_plane_restore(plane);
+		if (ret)
+			intel_plane->rotation = old_val;
+	}
+
+	return ret;
+}
+
+int intel_plane_restore(struct drm_plane *plane)
 {
 	struct intel_plane *intel_plane = to_intel_plane(plane);
 
 	if (!plane->crtc || !plane->fb)
-		return;
+		return 0;
 
-	intel_update_plane(plane, plane->crtc, plane->fb,
-			   intel_plane->crtc_x, intel_plane->crtc_y,
-			   intel_plane->crtc_w, intel_plane->crtc_h,
-			   intel_plane->src_x, intel_plane->src_y,
-			   intel_plane->src_w, intel_plane->src_h);
+	return intel_update_plane(plane, plane->crtc, plane->fb,
+				  intel_plane->crtc_x, intel_plane->crtc_y,
+				  intel_plane->crtc_w, intel_plane->crtc_h,
+				  intel_plane->src_x, intel_plane->src_y,
+				  intel_plane->src_w, intel_plane->src_h);
 }
 
 void intel_plane_disable(struct drm_plane *plane)
@@ -1206,6 +1268,7 @@ static const struct drm_plane_funcs intel_plane_funcs = {
 	.update_plane = intel_update_plane,
 	.disable_plane = intel_disable_plane,
 	.destroy = intel_destroy_plane,
+	.set_property = intel_plane_set_property,
 };
 
 static uint32_t ilk_plane_formats[] = {
@@ -1310,13 +1373,28 @@ intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane)
 
 	intel_plane->pipe = pipe;
 	intel_plane->plane = plane;
+	intel_plane->rotation = BIT(DRM_ROTATE_0);
 	possible_crtcs = (1 << pipe);
 	ret = drm_plane_init(dev, &intel_plane->base, possible_crtcs,
 			     &intel_plane_funcs,
 			     plane_formats, num_plane_formats,
 			     false);
-	if (ret)
+	if (ret) {
 		kfree(intel_plane);
+		goto out;
+	}
+
+	if (!dev->mode_config.rotation_property)
+		dev->mode_config.rotation_property =
+			drm_mode_create_rotation_property(dev,
+							  BIT(DRM_ROTATE_0) |
+							  BIT(DRM_ROTATE_180));
+
+	if (dev->mode_config.rotation_property)
+		drm_object_attach_property(&intel_plane->base.base,
+					   dev->mode_config.rotation_property,
+					   intel_plane->rotation);
 
+ out:
 	return ret;
 }
diff --git a/drivers/gpu/drm/mga/mga_dma.c b/drivers/gpu/drm/mga/mga_dma.c
index c3bf059ba720..8cfa9cb74c86 100644
--- a/drivers/gpu/drm/mga/mga_dma.c
+++ b/drivers/gpu/drm/mga/mga_dma.c
@@ -502,31 +502,31 @@ static int mga_do_agp_dma_bootstrap(struct drm_device *dev,
 		return err;
 	}
 
-	/* Make drm_addbufs happy by not trying to create a mapping for less
-	 * than a page.
+	/* Make drm_legacy_addbufs happy by not trying to create a mapping for
+	 * less than a page.
 	 */
 	if (warp_size < PAGE_SIZE)
 		warp_size = PAGE_SIZE;
 
 	offset = 0;
-	err = drm_addmap(dev, offset, warp_size,
-			 _DRM_AGP, _DRM_READ_ONLY, &dev_priv->warp);
+	err = drm_legacy_addmap(dev, offset, warp_size,
+				_DRM_AGP, _DRM_READ_ONLY, &dev_priv->warp);
 	if (err) {
 		DRM_ERROR("Unable to map WARP microcode: %d\n", err);
 		return err;
 	}
 
 	offset += warp_size;
-	err = drm_addmap(dev, offset, dma_bs->primary_size,
-			 _DRM_AGP, _DRM_READ_ONLY, &dev_priv->primary);
+	err = drm_legacy_addmap(dev, offset, dma_bs->primary_size,
+				_DRM_AGP, _DRM_READ_ONLY, &dev_priv->primary);
 	if (err) {
 		DRM_ERROR("Unable to map primary DMA region: %d\n", err);
 		return err;
 	}
 
 	offset += dma_bs->primary_size;
-	err = drm_addmap(dev, offset, secondary_size,
-			 _DRM_AGP, 0, &dev->agp_buffer_map);
+	err = drm_legacy_addmap(dev, offset, secondary_size,
+				_DRM_AGP, 0, &dev->agp_buffer_map);
 	if (err) {
 		DRM_ERROR("Unable to map secondary DMA region: %d\n", err);
 		return err;
@@ -538,7 +538,7 @@ static int mga_do_agp_dma_bootstrap(struct drm_device *dev,
 	req.flags = _DRM_AGP_BUFFER;
 	req.agp_start = offset;
 
-	err = drm_addbufs_agp(dev, &req);
+	err = drm_legacy_addbufs_agp(dev, &req);
 	if (err) {
 		DRM_ERROR("Unable to add secondary DMA buffers: %d\n", err);
 		return err;
@@ -559,16 +559,16 @@ static int mga_do_agp_dma_bootstrap(struct drm_device *dev,
 	}
 
 	offset += secondary_size;
-	err = drm_addmap(dev, offset, agp_size - offset,
-			 _DRM_AGP, 0, &dev_priv->agp_textures);
+	err = drm_legacy_addmap(dev, offset, agp_size - offset,
+				_DRM_AGP, 0, &dev_priv->agp_textures);
 	if (err) {
 		DRM_ERROR("Unable to map AGP texture region %d\n", err);
 		return err;
 	}
 
-	drm_core_ioremap(dev_priv->warp, dev);
-	drm_core_ioremap(dev_priv->primary, dev);
-	drm_core_ioremap(dev->agp_buffer_map, dev);
+	drm_legacy_ioremap(dev_priv->warp, dev);
+	drm_legacy_ioremap(dev_priv->primary, dev);
+	drm_legacy_ioremap(dev->agp_buffer_map, dev);
 
 	if (!dev_priv->warp->handle ||
 	    !dev_priv->primary->handle || !dev->agp_buffer_map->handle) {
@@ -602,7 +602,7 @@ static int mga_do_agp_dma_bootstrap(struct drm_device *dev,
  *
  * \todo
  * Determine whether the maximum address passed to drm_pci_alloc is correct.
- * The same goes for drm_addbufs_pci.
+ * The same goes for drm_legacy_addbufs_pci.
  *
  * \sa mga_do_dma_bootstrap, mga_do_agp_dma_bootstrap
  */
@@ -622,15 +622,15 @@ static int mga_do_pci_dma_bootstrap(struct drm_device *dev,
 		return -EFAULT;
 	}
 
-	/* Make drm_addbufs happy by not trying to create a mapping for less
-	 * than a page.
+	/* Make drm_legacy_addbufs happy by not trying to create a mapping for
+	 * less than a page.
 	 */
 	if (warp_size < PAGE_SIZE)
 		warp_size = PAGE_SIZE;
 
 	/* The proper alignment is 0x100 for this mapping */
-	err = drm_addmap(dev, 0, warp_size, _DRM_CONSISTENT,
-			 _DRM_READ_ONLY, &dev_priv->warp);
+	err = drm_legacy_addmap(dev, 0, warp_size, _DRM_CONSISTENT,
+				_DRM_READ_ONLY, &dev_priv->warp);
 	if (err != 0) {
 		DRM_ERROR("Unable to create mapping for WARP microcode: %d\n",
 			  err);
@@ -645,8 +645,8 @@ static int mga_do_pci_dma_bootstrap(struct drm_device *dev,
 	for (primary_size = dma_bs->primary_size; primary_size != 0;
 	     primary_size >>= 1) {
 		/* The proper alignment for this mapping is 0x04 */
-		err = drm_addmap(dev, 0, primary_size, _DRM_CONSISTENT,
-				 _DRM_READ_ONLY, &dev_priv->primary);
+		err = drm_legacy_addmap(dev, 0, primary_size, _DRM_CONSISTENT,
+					_DRM_READ_ONLY, &dev_priv->primary);
 		if (!err)
 			break;
 	}
@@ -669,7 +669,7 @@ static int mga_do_pci_dma_bootstrap(struct drm_device *dev,
 		req.count = bin_count;
 		req.size = dma_bs->secondary_bin_size;
 
-		err = drm_addbufs_pci(dev, &req);
+		err = drm_legacy_addbufs_pci(dev, &req);
 		if (!err)
 			break;
 	}
@@ -708,15 +708,16 @@ static int mga_do_dma_bootstrap(struct drm_device *dev,
 	/* The first steps are the same for both PCI and AGP based DMA.  Map
 	 * the cards MMIO registers and map a status page.
 	 */
-	err = drm_addmap(dev, dev_priv->mmio_base, dev_priv->mmio_size,
-			 _DRM_REGISTERS, _DRM_READ_ONLY, &dev_priv->mmio);
+	err = drm_legacy_addmap(dev, dev_priv->mmio_base, dev_priv->mmio_size,
+				_DRM_REGISTERS, _DRM_READ_ONLY,
+				&dev_priv->mmio);
 	if (err) {
 		DRM_ERROR("Unable to map MMIO region: %d\n", err);
 		return err;
 	}
 
-	err = drm_addmap(dev, 0, SAREA_MAX, _DRM_SHM,
-			 _DRM_READ_ONLY | _DRM_LOCKED | _DRM_KERNEL,
+	err = drm_legacy_addmap(dev, 0, SAREA_MAX, _DRM_SHM,
+				_DRM_READ_ONLY | _DRM_LOCKED | _DRM_KERNEL,
 			 &dev_priv->status);
 	if (err) {
 		DRM_ERROR("Unable to map status region: %d\n", err);
@@ -809,7 +810,7 @@ static int mga_do_init_dma(struct drm_device *dev, drm_mga_init_t *init)
 	dev_priv->texture_offset = init->texture_offset[0];
 	dev_priv->texture_size = init->texture_size[0];
 
-	dev_priv->sarea = drm_getsarea(dev);
+	dev_priv->sarea = drm_legacy_getsarea(dev);
 	if (!dev_priv->sarea) {
 		DRM_ERROR("failed to find sarea!\n");
 		return -EINVAL;
@@ -820,37 +821,37 @@ static int mga_do_init_dma(struct drm_device *dev, drm_mga_init_t *init)
 		dev_priv->dma_access = MGA_PAGPXFER;
 		dev_priv->wagp_enable = MGA_WAGP_ENABLE;
 
-		dev_priv->status = drm_core_findmap(dev, init->status_offset);
+		dev_priv->status = drm_legacy_findmap(dev, init->status_offset);
 		if (!dev_priv->status) {
 			DRM_ERROR("failed to find status page!\n");
 			return -EINVAL;
 		}
-		dev_priv->mmio = drm_core_findmap(dev, init->mmio_offset);
+		dev_priv->mmio = drm_legacy_findmap(dev, init->mmio_offset);
 		if (!dev_priv->mmio) {
 			DRM_ERROR("failed to find mmio region!\n");
 			return -EINVAL;
 		}
-		dev_priv->warp = drm_core_findmap(dev, init->warp_offset);
+		dev_priv->warp = drm_legacy_findmap(dev, init->warp_offset);
 		if (!dev_priv->warp) {
 			DRM_ERROR("failed to find warp microcode region!\n");
 			return -EINVAL;
 		}
-		dev_priv->primary = drm_core_findmap(dev, init->primary_offset);
+		dev_priv->primary = drm_legacy_findmap(dev, init->primary_offset);
 		if (!dev_priv->primary) {
 			DRM_ERROR("failed to find primary dma region!\n");
 			return -EINVAL;
 		}
 		dev->agp_buffer_token = init->buffers_offset;
 		dev->agp_buffer_map =
-		    drm_core_findmap(dev, init->buffers_offset);
+		    drm_legacy_findmap(dev, init->buffers_offset);
 		if (!dev->agp_buffer_map) {
 			DRM_ERROR("failed to find dma buffer region!\n");
 			return -EINVAL;
 		}
 
-		drm_core_ioremap(dev_priv->warp, dev);
-		drm_core_ioremap(dev_priv->primary, dev);
-		drm_core_ioremap(dev->agp_buffer_map, dev);
+		drm_legacy_ioremap(dev_priv->warp, dev);
+		drm_legacy_ioremap(dev_priv->primary, dev);
+		drm_legacy_ioremap(dev->agp_buffer_map, dev);
 	}
 
 	dev_priv->sarea_priv =
@@ -936,14 +937,14 @@ static int mga_do_cleanup_dma(struct drm_device *dev, int full_cleanup)
 
 		if ((dev_priv->warp != NULL)
 		    && (dev_priv->warp->type != _DRM_CONSISTENT))
-			drm_core_ioremapfree(dev_priv->warp, dev);
+			drm_legacy_ioremapfree(dev_priv->warp, dev);
 
 		if ((dev_priv->primary != NULL)
 		    && (dev_priv->primary->type != _DRM_CONSISTENT))
-			drm_core_ioremapfree(dev_priv->primary, dev);
+			drm_legacy_ioremapfree(dev_priv->primary, dev);
 
 		if (dev->agp_buffer_map != NULL)
-			drm_core_ioremapfree(dev->agp_buffer_map, dev);
+			drm_legacy_ioremapfree(dev->agp_buffer_map, dev);
 
 		if (dev_priv->used_new_dma_init) {
 #if __OS_HAS_AGP
diff --git a/drivers/gpu/drm/mga/mga_drv.c b/drivers/gpu/drm/mga/mga_drv.c
index 6b1a87c8aac5..cb5c71f4b28e 100644
--- a/drivers/gpu/drm/mga/mga_drv.c
+++ b/drivers/gpu/drm/mga/mga_drv.c
@@ -64,6 +64,7 @@ static struct drm_driver driver = {
 	.load = mga_driver_load,
 	.unload = mga_driver_unload,
 	.lastclose = mga_driver_lastclose,
+	.set_busid = drm_pci_set_busid,
 	.dma_quiescent = mga_driver_dma_quiescent,
 	.device_is_agp = mga_driver_device_is_agp,
 	.get_vblank_counter = mga_get_vblank_counter,
diff --git a/drivers/gpu/drm/mga/mga_drv.h b/drivers/gpu/drm/mga/mga_drv.h
index fe453213600a..b4a2014917e5 100644
--- a/drivers/gpu/drm/mga/mga_drv.h
+++ b/drivers/gpu/drm/mga/mga_drv.h
@@ -31,6 +31,8 @@
 #ifndef __MGA_DRV_H__
 #define __MGA_DRV_H__
 
+#include <drm/drm_legacy.h>
+
 /* General customization:
  */
 
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c b/drivers/gpu/drm/mgag200/mgag200_drv.c
index 2d75d6df0789..97745991544d 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.c
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.c
@@ -91,6 +91,7 @@ static struct drm_driver driver = {
 	.driver_features = DRIVER_GEM | DRIVER_MODESET,
 	.load = mgag200_driver_load,
 	.unload = mgag200_driver_unload,
+	.set_busid = drm_pci_set_busid,
 	.fops = &mgag200_driver_fops,
 	.name = DRIVER_NAME,
 	.desc = DRIVER_DESC,
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h
index 80de23d9b9c9..c03e347f3ffd 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.h
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.h
@@ -190,8 +190,6 @@ struct mga_device {
 	resource_size_t			rmmio_size;
 	void __iomem			*rmmio;
 
-	drm_local_map_t			*framebuffer;
-
 	struct mga_mc			mc;
 	struct mga_mode_info		mode_info;
 
@@ -224,7 +222,7 @@ struct mgag200_bo {
 	struct ttm_placement placement;
 	struct ttm_bo_kmap_obj kmap;
 	struct drm_gem_object gem;
-	u32 placements[3];
+	struct ttm_place placements[3];
 	int pin_count;
 };
 #define gem_to_mga_bo(gobj) container_of((gobj), struct mgag200_bo, gem)
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index 45f04dea0ac2..83485ab81ce8 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -1483,11 +1483,7 @@ static int mga_vga_mode_valid(struct drm_connector *connector,
 {
 	struct drm_device *dev = connector->dev;
 	struct mga_device *mdev = (struct mga_device*)dev->dev_private;
-	struct mga_fbdev *mfbdev = mdev->mfbdev;
-	struct drm_fb_helper *fb_helper = &mfbdev->helper;
-	struct drm_fb_helper_connector *fb_helper_conn = NULL;
 	int bpp = 32;
-	int i = 0;
 
 	if (IS_G200_SE(mdev)) {
 		if (mdev->unique_rev_id == 0x01) {
@@ -1537,21 +1533,14 @@ static int mga_vga_mode_valid(struct drm_connector *connector,
 	}
 
 	/* Validate the mode input by the user */
-	for (i = 0; i < fb_helper->connector_count; i++) {
-		if (fb_helper->connector_info[i]->connector == connector) {
-			/* Found the helper for this connector */
-			fb_helper_conn = fb_helper->connector_info[i];
-			if (fb_helper_conn->cmdline_mode.specified) {
-				if (fb_helper_conn->cmdline_mode.bpp_specified) {
-					bpp = fb_helper_conn->cmdline_mode.bpp;
-				}
-			}
-		}
+	if (connector->cmdline_mode.specified) {
+		if (connector->cmdline_mode.bpp_specified)
+			bpp = connector->cmdline_mode.bpp;
 	}
 
 	if ((mode->hdisplay * mode->vdisplay * (bpp/8)) > mdev->mc.vram_size) {
-		if (fb_helper_conn)
-			fb_helper_conn->cmdline_mode.specified = false;
+		if (connector->cmdline_mode.specified)
+			connector->cmdline_mode.specified = false;
 		return MODE_BAD;
 	}
 
diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c
index 5a00e90696de..be883ef5a1d3 100644
--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
@@ -293,18 +293,22 @@ void mgag200_mm_fini(struct mga_device *mdev)
 void mgag200_ttm_placement(struct mgag200_bo *bo, int domain)
 {
 	u32 c = 0;
-	bo->placement.fpfn = 0;
-	bo->placement.lpfn = 0;
+	unsigned i;
+
 	bo->placement.placement = bo->placements;
 	bo->placement.busy_placement = bo->placements;
 	if (domain & TTM_PL_FLAG_VRAM)
-		bo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM;
+		bo->placements[c++].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_VRAM;
 	if (domain & TTM_PL_FLAG_SYSTEM)
-		bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+		bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
 	if (!c)
-		bo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+		bo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
 	bo->placement.num_placement = c;
 	bo->placement.num_busy_placement = c;
+	for (i = 0; i < c; ++i) {
+		bo->placements[i].fpfn = 0;
+		bo->placements[i].lpfn = 0;
+	}
 }
 
 int mgag200_bo_create(struct drm_device *dev, int size, int align,
@@ -361,7 +365,7 @@ int mgag200_bo_pin(struct mgag200_bo *bo, u32 pl_flag, u64 *gpu_addr)
 
 	mgag200_ttm_placement(bo, pl_flag);
 	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
+		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
 	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
@@ -384,7 +388,7 @@ int mgag200_bo_unpin(struct mgag200_bo *bo)
 		return 0;
 
 	for (i = 0; i < bo->placement.num_placement ; i++)
-		bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
+		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
 	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret)
 		return ret;
@@ -408,7 +412,7 @@ int mgag200_bo_push_sysram(struct mgag200_bo *bo)
 
 	mgag200_ttm_placement(bo, TTM_PL_FLAG_SYSTEM);
 	for (i = 0; i < bo->placement.num_placement ; i++)
-		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
+		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
 
 	ret = ttm_bo_validate(&bo->bo, &bo->placement, false, false);
 	if (ret) {
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index c99c50de3226..9d907c526c94 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -4,6 +4,7 @@ config DRM_MSM
 	depends on DRM
 	depends on ARCH_QCOM || (ARM && COMPILE_TEST)
 	select DRM_KMS_HELPER
+	select DRM_PANEL
 	select SHMEM
 	select TMPFS
 	default y
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 93ca49c8df44..6283dcb96af5 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -4,6 +4,7 @@ ifeq (, $(findstring -W,$(EXTRA_CFLAGS)))
 endif
 
 msm-y := \
+	adreno/adreno_device.o \
 	adreno/adreno_gpu.o \
 	adreno/a3xx_gpu.o \
 	hdmi/hdmi.o \
@@ -18,6 +19,8 @@ msm-y := \
 	mdp/mdp_kms.o \
 	mdp/mdp4/mdp4_crtc.o \
 	mdp/mdp4/mdp4_dtv_encoder.o \
+	mdp/mdp4/mdp4_lcdc_encoder.o \
+	mdp/mdp4/mdp4_lvds_connector.o \
 	mdp/mdp4/mdp4_irq.o \
 	mdp/mdp4/mdp4_kms.o \
 	mdp/mdp4/mdp4_plane.o \
@@ -39,5 +42,6 @@ msm-y := \
 	msm_ringbuffer.o
 
 msm-$(CONFIG_DRM_MSM_FBDEV) += msm_fbdev.o
+msm-$(CONFIG_COMMON_CLK) += mdp/mdp4/mdp4_lvds_pll.o
 
 obj-$(CONFIG_DRM_MSM)	+= msm.o
diff --git a/drivers/gpu/drm/msm/adreno/a2xx.xml.h b/drivers/gpu/drm/msm/adreno/a2xx.xml.h
index a8a144b38eaa..a3104598c27f 100644
--- a/drivers/gpu/drm/msm/adreno/a2xx.xml.h
+++ b/drivers/gpu/drm/msm/adreno/a2xx.xml.h
@@ -12,9 +12,9 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2013-03-31 16:51:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (   9859 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14477 bytes, from 2014-05-16 11:51:57)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  58020 bytes, from 2014-06-25 12:57:16)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  26602 bytes, from 2014-06-25 12:57:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14960 bytes, from 2014-07-27 17:22:13)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  58020 bytes, from 2014-08-01 12:22:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  41068 bytes, from 2014-08-01 12:22:48)
 
 Copyright (C) 2013-2014 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/drivers/gpu/drm/msm/adreno/a3xx.xml.h b/drivers/gpu/drm/msm/adreno/a3xx.xml.h
index 303e8a9e91a5..82d015279b47 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx.xml.h
+++ b/drivers/gpu/drm/msm/adreno/a3xx.xml.h
@@ -12,9 +12,9 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2013-03-31 16:51:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (   9859 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14477 bytes, from 2014-05-16 11:51:57)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  58020 bytes, from 2014-06-25 12:57:16)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  26602 bytes, from 2014-06-25 12:57:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14960 bytes, from 2014-07-27 17:22:13)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  58020 bytes, from 2014-08-01 12:22:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  41068 bytes, from 2014-08-01 12:22:48)
 
 Copyright (C) 2013-2014 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
@@ -654,7 +654,7 @@ static inline uint32_t A3XX_GRAS_SU_POINT_SIZE(float val)
 #define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT		0
 static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val)
 {
-	return ((((uint32_t)(val * 40.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
+	return ((((uint32_t)(val * 28.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
 }
 
 #define REG_A3XX_GRAS_SU_POLY_OFFSET_OFFSET			0x0000206d
@@ -662,7 +662,7 @@ static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val)
 #define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT			0
 static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_OFFSET(float val)
 {
-	return ((((uint32_t)(val * 44.0))) << A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
+	return ((((uint32_t)(val * 28.0))) << A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
 }
 
 #define REG_A3XX_GRAS_SU_MODE_CONTROL				0x00002070
@@ -1696,7 +1696,7 @@ static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val)
 {
 	return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK;
 }
-#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK		0x3f000000
+#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK		0x7f000000
 #define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT		24
 static inline uint32_t A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
 {
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 2773600c9488..218c5b060398 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -35,10 +35,8 @@
 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
 	 A3XX_INT0_UCHE_OOB_ACCESS)
 
+extern bool hang_debug;
 
-static bool hang_debug = false;
-MODULE_PARM_DESC(hang_debug, "Dump registers when hang is detected (can be slow!)");
-module_param_named(hang_debug, hang_debug, bool, 0600);
 static void a3xx_dump(struct msm_gpu *gpu);
 
 static void a3xx_me_init(struct msm_gpu *gpu)
@@ -387,58 +385,26 @@ static const unsigned int a3xx_registers[] = {
 	0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d,
 	0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036,
 	0x303c, 0x303c, 0x305e, 0x305f,
+	~0   /* sentinel */
 };
 
 #ifdef CONFIG_DEBUG_FS
 static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
 {
-	int i;
-
-	adreno_show(gpu, m);
-
 	gpu->funcs->pm_resume(gpu);
-
 	seq_printf(m, "status:   %08x\n",
 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
-
-	/* dump these out in a form that can be parsed by demsm: */
-	seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name);
-	for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) {
-		uint32_t start = a3xx_registers[i];
-		uint32_t end   = a3xx_registers[i+1];
-		uint32_t addr;
-
-		for (addr = start; addr <= end; addr++) {
-			uint32_t val = gpu_read(gpu, addr);
-			seq_printf(m, "IO:R %08x %08x\n", addr<<2, val);
-		}
-	}
-
 	gpu->funcs->pm_suspend(gpu);
+	adreno_show(gpu, m);
 }
 #endif
 
 /* would be nice to not have to duplicate the _show() stuff with printk(): */
 static void a3xx_dump(struct msm_gpu *gpu)
 {
-	int i;
-
-	adreno_dump(gpu);
 	printk("status:   %08x\n",
 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
-
-	/* dump these out in a form that can be parsed by demsm: */
-	printk("IO:region %s 00000000 00020000\n", gpu->name);
-	for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) {
-		uint32_t start = a3xx_registers[i];
-		uint32_t end   = a3xx_registers[i+1];
-		uint32_t addr;
-
-		for (addr = start; addr <= end; addr++) {
-			uint32_t val = gpu_read(gpu, addr);
-			printk("IO:R %08x %08x\n", addr<<2, val);
-		}
-	}
+	adreno_dump(gpu);
 }
 
 static const struct adreno_gpu_funcs funcs = {
@@ -474,7 +440,6 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
 	struct msm_gpu *gpu;
 	struct msm_drm_private *priv = dev->dev_private;
 	struct platform_device *pdev = priv->gpu_pdev;
-	struct adreno_platform_config *config;
 	int ret;
 
 	if (!pdev) {
@@ -483,8 +448,6 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
 		goto fail;
 	}
 
-	config = pdev->dev.platform_data;
-
 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
 	if (!a3xx_gpu) {
 		ret = -ENOMEM;
@@ -496,20 +459,12 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
 
 	a3xx_gpu->pdev = pdev;
 
-	gpu->fast_rate = config->fast_rate;
-	gpu->slow_rate = config->slow_rate;
-	gpu->bus_freq  = config->bus_freq;
-#ifdef CONFIG_MSM_BUS_SCALING
-	gpu->bus_scale_table = config->bus_scale_table;
-#endif
-
-	DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u",
-			gpu->fast_rate, gpu->slow_rate, gpu->bus_freq);
-
 	gpu->perfcntrs = perfcntrs;
 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
 
-	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, config->rev);
+	adreno_gpu->registers = a3xx_registers;
+
+	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs);
 	if (ret)
 		goto fail;
 
@@ -549,158 +504,3 @@ fail:
 
 	return ERR_PTR(ret);
 }
-
-/*
- * The a3xx device:
- */
-
-#if defined(CONFIG_MSM_BUS_SCALING) && !defined(CONFIG_OF)
-#  include <mach/kgsl.h>
-#endif
-
-static void set_gpu_pdev(struct drm_device *dev,
-		struct platform_device *pdev)
-{
-	struct msm_drm_private *priv = dev->dev_private;
-	priv->gpu_pdev = pdev;
-}
-
-static int a3xx_bind(struct device *dev, struct device *master, void *data)
-{
-	static struct adreno_platform_config config = {};
-#ifdef CONFIG_OF
-	struct device_node *child, *node = dev->of_node;
-	u32 val;
-	int ret;
-
-	ret = of_property_read_u32(node, "qcom,chipid", &val);
-	if (ret) {
-		dev_err(dev, "could not find chipid: %d\n", ret);
-		return ret;
-	}
-
-	config.rev = ADRENO_REV((val >> 24) & 0xff,
-			(val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff);
-
-	/* find clock rates: */
-	config.fast_rate = 0;
-	config.slow_rate = ~0;
-	for_each_child_of_node(node, child) {
-		if (of_device_is_compatible(child, "qcom,gpu-pwrlevels")) {
-			struct device_node *pwrlvl;
-			for_each_child_of_node(child, pwrlvl) {
-				ret = of_property_read_u32(pwrlvl, "qcom,gpu-freq", &val);
-				if (ret) {
-					dev_err(dev, "could not find gpu-freq: %d\n", ret);
-					return ret;
-				}
-				config.fast_rate = max(config.fast_rate, val);
-				config.slow_rate = min(config.slow_rate, val);
-			}
-		}
-	}
-
-	if (!config.fast_rate) {
-		dev_err(dev, "could not find clk rates\n");
-		return -ENXIO;
-	}
-
-#else
-	struct kgsl_device_platform_data *pdata = dev->platform_data;
-	uint32_t version = socinfo_get_version();
-	if (cpu_is_apq8064ab()) {
-		config.fast_rate = 450000000;
-		config.slow_rate = 27000000;
-		config.bus_freq  = 4;
-		config.rev = ADRENO_REV(3, 2, 1, 0);
-	} else if (cpu_is_apq8064()) {
-		config.fast_rate = 400000000;
-		config.slow_rate = 27000000;
-		config.bus_freq  = 4;
-
-		if (SOCINFO_VERSION_MAJOR(version) == 2)
-			config.rev = ADRENO_REV(3, 2, 0, 2);
-		else if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
-				(SOCINFO_VERSION_MINOR(version) == 1))
-			config.rev = ADRENO_REV(3, 2, 0, 1);
-		else
-			config.rev = ADRENO_REV(3, 2, 0, 0);
-
-	} else if (cpu_is_msm8960ab()) {
-		config.fast_rate = 400000000;
-		config.slow_rate = 320000000;
-		config.bus_freq  = 4;
-
-		if (SOCINFO_VERSION_MINOR(version) == 0)
-			config.rev = ADRENO_REV(3, 2, 1, 0);
-		else
-			config.rev = ADRENO_REV(3, 2, 1, 1);
-
-	} else if (cpu_is_msm8930()) {
-		config.fast_rate = 400000000;
-		config.slow_rate = 27000000;
-		config.bus_freq  = 3;
-
-		if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
-			(SOCINFO_VERSION_MINOR(version) == 2))
-			config.rev = ADRENO_REV(3, 0, 5, 2);
-		else
-			config.rev = ADRENO_REV(3, 0, 5, 0);
-
-	}
-#  ifdef CONFIG_MSM_BUS_SCALING
-	config.bus_scale_table = pdata->bus_scale_table;
-#  endif
-#endif
-	dev->platform_data = &config;
-	set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev));
-	return 0;
-}
-
-static void a3xx_unbind(struct device *dev, struct device *master,
-		void *data)
-{
-	set_gpu_pdev(dev_get_drvdata(master), NULL);
-}
-
-static const struct component_ops a3xx_ops = {
-		.bind   = a3xx_bind,
-		.unbind = a3xx_unbind,
-};
-
-static int a3xx_probe(struct platform_device *pdev)
-{
-	return component_add(&pdev->dev, &a3xx_ops);
-}
-
-static int a3xx_remove(struct platform_device *pdev)
-{
-	component_del(&pdev->dev, &a3xx_ops);
-	return 0;
-}
-
-static const struct of_device_id dt_match[] = {
-	{ .compatible = "qcom,adreno-3xx" },
-	/* for backwards compat w/ downstream kgsl DT files: */
-	{ .compatible = "qcom,kgsl-3d0" },
-	{}
-};
-
-static struct platform_driver a3xx_driver = {
-	.probe = a3xx_probe,
-	.remove = a3xx_remove,
-	.driver = {
-		.name = "kgsl-3d0",
-		.of_match_table = dt_match,
-	},
-};
-
-void __init a3xx_register(void)
-{
-	platform_driver_register(&a3xx_driver);
-}
-
-void __exit a3xx_unregister(void)
-{
-	platform_driver_unregister(&a3xx_driver);
-}
diff --git a/drivers/gpu/drm/msm/adreno/adreno_common.xml.h b/drivers/gpu/drm/msm/adreno/adreno_common.xml.h
index 9de19ac2e86c..cc341bc62b51 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_common.xml.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_common.xml.h
@@ -12,9 +12,9 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2013-03-31 16:51:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (   9859 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14477 bytes, from 2014-05-16 11:51:57)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  58020 bytes, from 2014-06-25 12:57:16)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  26602 bytes, from 2014-06-25 12:57:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14960 bytes, from 2014-07-27 17:22:13)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  58020 bytes, from 2014-08-01 12:22:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  41068 bytes, from 2014-08-01 12:22:48)
 
 Copyright (C) 2013-2014 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
new file mode 100644
index 000000000000..7ab85af3a7db
--- /dev/null
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -0,0 +1,285 @@
+/*
+ * Copyright (C) 2013-2014 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "adreno_gpu.h"
+
+#if defined(CONFIG_MSM_BUS_SCALING) && !defined(CONFIG_OF)
+#  include <mach/kgsl.h>
+#endif
+
+#define ANY_ID 0xff
+
+bool hang_debug = false;
+MODULE_PARM_DESC(hang_debug, "Dump registers when hang is detected (can be slow!)");
+module_param_named(hang_debug, hang_debug, bool, 0600);
+
+struct msm_gpu *a3xx_gpu_init(struct drm_device *dev);
+
+static const struct adreno_info gpulist[] = {
+	{
+		.rev   = ADRENO_REV(3, 0, 5, ANY_ID),
+		.revn  = 305,
+		.name  = "A305",
+		.pm4fw = "a300_pm4.fw",
+		.pfpfw = "a300_pfp.fw",
+		.gmem  = SZ_256K,
+		.init  = a3xx_gpu_init,
+	}, {
+		.rev   = ADRENO_REV(3, 2, ANY_ID, ANY_ID),
+		.revn  = 320,
+		.name  = "A320",
+		.pm4fw = "a300_pm4.fw",
+		.pfpfw = "a300_pfp.fw",
+		.gmem  = SZ_512K,
+		.init  = a3xx_gpu_init,
+	}, {
+		.rev   = ADRENO_REV(3, 3, 0, ANY_ID),
+		.revn  = 330,
+		.name  = "A330",
+		.pm4fw = "a330_pm4.fw",
+		.pfpfw = "a330_pfp.fw",
+		.gmem  = SZ_1M,
+		.init  = a3xx_gpu_init,
+	},
+};
+
+MODULE_FIRMWARE("a300_pm4.fw");
+MODULE_FIRMWARE("a300_pfp.fw");
+MODULE_FIRMWARE("a330_pm4.fw");
+MODULE_FIRMWARE("a330_pfp.fw");
+
+static inline bool _rev_match(uint8_t entry, uint8_t id)
+{
+	return (entry == ANY_ID) || (entry == id);
+}
+
+const struct adreno_info *adreno_info(struct adreno_rev rev)
+{
+	int i;
+
+	/* identify gpu: */
+	for (i = 0; i < ARRAY_SIZE(gpulist); i++) {
+		const struct adreno_info *info = &gpulist[i];
+		if (_rev_match(info->rev.core, rev.core) &&
+				_rev_match(info->rev.major, rev.major) &&
+				_rev_match(info->rev.minor, rev.minor) &&
+				_rev_match(info->rev.patchid, rev.patchid))
+			return info;
+	}
+
+	return NULL;
+}
+
+struct msm_gpu *adreno_load_gpu(struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct platform_device *pdev = priv->gpu_pdev;
+	struct adreno_platform_config *config;
+	struct adreno_rev rev;
+	const struct adreno_info *info;
+	struct msm_gpu *gpu = NULL;
+
+	if (!pdev) {
+		dev_err(dev->dev, "no adreno device\n");
+		return NULL;
+	}
+
+	config = pdev->dev.platform_data;
+	rev = config->rev;
+	info = adreno_info(config->rev);
+
+	if (!info) {
+		dev_warn(dev->dev, "Unknown GPU revision: %u.%u.%u.%u\n",
+				rev.core, rev.major, rev.minor, rev.patchid);
+		return NULL;
+	}
+
+	DBG("Found GPU: %u.%u.%u.%u",  rev.core, rev.major,
+			rev.minor, rev.patchid);
+
+	gpu = info->init(dev);
+	if (IS_ERR(gpu)) {
+		dev_warn(dev->dev, "failed to load adreno gpu\n");
+		gpu = NULL;
+		/* not fatal */
+	}
+
+	if (gpu) {
+		int ret;
+		mutex_lock(&dev->struct_mutex);
+		gpu->funcs->pm_resume(gpu);
+		mutex_unlock(&dev->struct_mutex);
+		ret = gpu->funcs->hw_init(gpu);
+		if (ret) {
+			dev_err(dev->dev, "gpu hw init failed: %d\n", ret);
+			gpu->funcs->destroy(gpu);
+			gpu = NULL;
+		} else {
+			/* give inactive pm a chance to kick in: */
+			msm_gpu_retire(gpu);
+		}
+	}
+
+	return gpu;
+}
+
+static void set_gpu_pdev(struct drm_device *dev,
+		struct platform_device *pdev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	priv->gpu_pdev = pdev;
+}
+
+static int adreno_bind(struct device *dev, struct device *master, void *data)
+{
+	static struct adreno_platform_config config = {};
+#ifdef CONFIG_OF
+	struct device_node *child, *node = dev->of_node;
+	u32 val;
+	int ret;
+
+	ret = of_property_read_u32(node, "qcom,chipid", &val);
+	if (ret) {
+		dev_err(dev, "could not find chipid: %d\n", ret);
+		return ret;
+	}
+
+	config.rev = ADRENO_REV((val >> 24) & 0xff,
+			(val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff);
+
+	/* find clock rates: */
+	config.fast_rate = 0;
+	config.slow_rate = ~0;
+	for_each_child_of_node(node, child) {
+		if (of_device_is_compatible(child, "qcom,gpu-pwrlevels")) {
+			struct device_node *pwrlvl;
+			for_each_child_of_node(child, pwrlvl) {
+				ret = of_property_read_u32(pwrlvl, "qcom,gpu-freq", &val);
+				if (ret) {
+					dev_err(dev, "could not find gpu-freq: %d\n", ret);
+					return ret;
+				}
+				config.fast_rate = max(config.fast_rate, val);
+				config.slow_rate = min(config.slow_rate, val);
+			}
+		}
+	}
+
+	if (!config.fast_rate) {
+		dev_err(dev, "could not find clk rates\n");
+		return -ENXIO;
+	}
+
+#else
+	struct kgsl_device_platform_data *pdata = dev->platform_data;
+	uint32_t version = socinfo_get_version();
+	if (cpu_is_apq8064ab()) {
+		config.fast_rate = 450000000;
+		config.slow_rate = 27000000;
+		config.bus_freq  = 4;
+		config.rev = ADRENO_REV(3, 2, 1, 0);
+	} else if (cpu_is_apq8064()) {
+		config.fast_rate = 400000000;
+		config.slow_rate = 27000000;
+		config.bus_freq  = 4;
+
+		if (SOCINFO_VERSION_MAJOR(version) == 2)
+			config.rev = ADRENO_REV(3, 2, 0, 2);
+		else if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
+				(SOCINFO_VERSION_MINOR(version) == 1))
+			config.rev = ADRENO_REV(3, 2, 0, 1);
+		else
+			config.rev = ADRENO_REV(3, 2, 0, 0);
+
+	} else if (cpu_is_msm8960ab()) {
+		config.fast_rate = 400000000;
+		config.slow_rate = 320000000;
+		config.bus_freq  = 4;
+
+		if (SOCINFO_VERSION_MINOR(version) == 0)
+			config.rev = ADRENO_REV(3, 2, 1, 0);
+		else
+			config.rev = ADRENO_REV(3, 2, 1, 1);
+
+	} else if (cpu_is_msm8930()) {
+		config.fast_rate = 400000000;
+		config.slow_rate = 27000000;
+		config.bus_freq  = 3;
+
+		if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
+			(SOCINFO_VERSION_MINOR(version) == 2))
+			config.rev = ADRENO_REV(3, 0, 5, 2);
+		else
+			config.rev = ADRENO_REV(3, 0, 5, 0);
+
+	}
+#  ifdef CONFIG_MSM_BUS_SCALING
+	config.bus_scale_table = pdata->bus_scale_table;
+#  endif
+#endif
+	dev->platform_data = &config;
+	set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev));
+	return 0;
+}
+
+static void adreno_unbind(struct device *dev, struct device *master,
+		void *data)
+{
+	set_gpu_pdev(dev_get_drvdata(master), NULL);
+}
+
+static const struct component_ops a3xx_ops = {
+		.bind   = adreno_bind,
+		.unbind = adreno_unbind,
+};
+
+static int adreno_probe(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &a3xx_ops);
+}
+
+static int adreno_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &a3xx_ops);
+	return 0;
+}
+
+static const struct of_device_id dt_match[] = {
+	{ .compatible = "qcom,adreno-3xx" },
+	/* for backwards compat w/ downstream kgsl DT files: */
+	{ .compatible = "qcom,kgsl-3d0" },
+	{}
+};
+
+static struct platform_driver adreno_driver = {
+	.probe = adreno_probe,
+	.remove = adreno_remove,
+	.driver = {
+		.name = "adreno",
+		.of_match_table = dt_match,
+	},
+};
+
+void __init adreno_register(void)
+{
+	platform_driver_register(&adreno_driver);
+}
+
+void __exit adreno_unregister(void)
+{
+	platform_driver_unregister(&adreno_driver);
+}
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 655ce5b14ad0..6afa29167fee 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -19,46 +19,6 @@
 #include "msm_gem.h"
 #include "msm_mmu.h"
 
-struct adreno_info {
-	struct adreno_rev rev;
-	uint32_t revn;
-	const char *name;
-	const char *pm4fw, *pfpfw;
-	uint32_t gmem;
-};
-
-#define ANY_ID 0xff
-
-static const struct adreno_info gpulist[] = {
-	{
-		.rev   = ADRENO_REV(3, 0, 5, ANY_ID),
-		.revn  = 305,
-		.name  = "A305",
-		.pm4fw = "a300_pm4.fw",
-		.pfpfw = "a300_pfp.fw",
-		.gmem  = SZ_256K,
-	}, {
-		.rev   = ADRENO_REV(3, 2, ANY_ID, ANY_ID),
-		.revn  = 320,
-		.name  = "A320",
-		.pm4fw = "a300_pm4.fw",
-		.pfpfw = "a300_pfp.fw",
-		.gmem  = SZ_512K,
-	}, {
-		.rev   = ADRENO_REV(3, 3, 0, ANY_ID),
-		.revn  = 330,
-		.name  = "A330",
-		.pm4fw = "a330_pm4.fw",
-		.pfpfw = "a330_pfp.fw",
-		.gmem  = SZ_1M,
-	},
-};
-
-MODULE_FIRMWARE("a300_pm4.fw");
-MODULE_FIRMWARE("a300_pfp.fw");
-MODULE_FIRMWARE("a330_pm4.fw");
-MODULE_FIRMWARE("a330_pfp.fw");
-
 #define RB_SIZE    SZ_32K
 #define RB_BLKSIZE 16
 
@@ -252,6 +212,7 @@ void adreno_idle(struct msm_gpu *gpu)
 void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	int i;
 
 	seq_printf(m, "revision: %d (%d.%d.%d.%d)\n",
 			adreno_gpu->info->revn, adreno_gpu->rev.core,
@@ -263,6 +224,23 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
 	seq_printf(m, "rptr:     %d\n", adreno_gpu->memptrs->rptr);
 	seq_printf(m, "wptr:     %d\n", adreno_gpu->memptrs->wptr);
 	seq_printf(m, "rb wptr:  %d\n", get_wptr(gpu->rb));
+
+	gpu->funcs->pm_resume(gpu);
+
+	/* dump these out in a form that can be parsed by demsm: */
+	seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name);
+	for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
+		uint32_t start = adreno_gpu->registers[i];
+		uint32_t end   = adreno_gpu->registers[i+1];
+		uint32_t addr;
+
+		for (addr = start; addr <= end; addr++) {
+			uint32_t val = gpu_read(gpu, addr);
+			seq_printf(m, "IO:R %08x %08x\n", addr<<2, val);
+		}
+	}
+
+	gpu->funcs->pm_suspend(gpu);
 }
 #endif
 
@@ -270,6 +248,7 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
 void adreno_dump(struct msm_gpu *gpu)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	int i;
 
 	printk("revision: %d (%d.%d.%d.%d)\n",
 			adreno_gpu->info->revn, adreno_gpu->rev.core,
@@ -282,6 +261,18 @@ void adreno_dump(struct msm_gpu *gpu)
 	printk("wptr:     %d\n", adreno_gpu->memptrs->wptr);
 	printk("rb wptr:  %d\n", get_wptr(gpu->rb));
 
+	/* dump these out in a form that can be parsed by demsm: */
+	printk("IO:region %s 00000000 00020000\n", gpu->name);
+	for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
+		uint32_t start = adreno_gpu->registers[i];
+		uint32_t end   = adreno_gpu->registers[i+1];
+		uint32_t addr;
+
+		for (addr = start; addr <= end; addr++) {
+			uint32_t val = gpu_read(gpu, addr);
+			printk("IO:R %08x %08x\n", addr<<2, val);
+		}
+	}
 }
 
 static uint32_t ring_freewords(struct msm_gpu *gpu)
@@ -304,65 +295,51 @@ static const char *iommu_ports[] = {
 		"gfx3d1_user", "gfx3d1_priv",
 };
 
-static inline bool _rev_match(uint8_t entry, uint8_t id)
-{
-	return (entry == ANY_ID) || (entry == id);
-}
-
 int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
-		struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs,
-		struct adreno_rev rev)
+		struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs)
 {
+	struct adreno_platform_config *config = pdev->dev.platform_data;
+	struct msm_gpu *gpu = &adreno_gpu->base;
 	struct msm_mmu *mmu;
-	int i, ret;
-
-	/* identify gpu: */
-	for (i = 0; i < ARRAY_SIZE(gpulist); i++) {
-		const struct adreno_info *info = &gpulist[i];
-		if (_rev_match(info->rev.core, rev.core) &&
-				_rev_match(info->rev.major, rev.major) &&
-				_rev_match(info->rev.minor, rev.minor) &&
-				_rev_match(info->rev.patchid, rev.patchid)) {
-			gpu->info = info;
-			gpu->revn = info->revn;
-			break;
-		}
-	}
-
-	if (i == ARRAY_SIZE(gpulist)) {
-		dev_err(drm->dev, "Unknown GPU revision: %u.%u.%u.%u\n",
-				rev.core, rev.major, rev.minor, rev.patchid);
-		return -ENXIO;
-	}
+	int ret;
 
-	DBG("Found GPU: %s (%u.%u.%u.%u)", gpu->info->name,
-			rev.core, rev.major, rev.minor, rev.patchid);
+	adreno_gpu->funcs = funcs;
+	adreno_gpu->info = adreno_info(config->rev);
+	adreno_gpu->gmem = adreno_gpu->info->gmem;
+	adreno_gpu->revn = adreno_gpu->info->revn;
+	adreno_gpu->rev = config->rev;
+
+	gpu->fast_rate = config->fast_rate;
+	gpu->slow_rate = config->slow_rate;
+	gpu->bus_freq  = config->bus_freq;
+#ifdef CONFIG_MSM_BUS_SCALING
+	gpu->bus_scale_table = config->bus_scale_table;
+#endif
 
-	gpu->funcs = funcs;
-	gpu->gmem = gpu->info->gmem;
-	gpu->rev = rev;
+	DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u",
+			gpu->fast_rate, gpu->slow_rate, gpu->bus_freq);
 
-	ret = request_firmware(&gpu->pm4, gpu->info->pm4fw, drm->dev);
+	ret = request_firmware(&adreno_gpu->pm4, adreno_gpu->info->pm4fw, drm->dev);
 	if (ret) {
 		dev_err(drm->dev, "failed to load %s PM4 firmware: %d\n",
-				gpu->info->pm4fw, ret);
+				adreno_gpu->info->pm4fw, ret);
 		return ret;
 	}
 
-	ret = request_firmware(&gpu->pfp, gpu->info->pfpfw, drm->dev);
+	ret = request_firmware(&adreno_gpu->pfp, adreno_gpu->info->pfpfw, drm->dev);
 	if (ret) {
 		dev_err(drm->dev, "failed to load %s PFP firmware: %d\n",
-				gpu->info->pfpfw, ret);
+				adreno_gpu->info->pfpfw, ret);
 		return ret;
 	}
 
-	ret = msm_gpu_init(drm, pdev, &gpu->base, &funcs->base,
-			gpu->info->name, "kgsl_3d0_reg_memory", "kgsl_3d0_irq",
+	ret = msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base,
+			adreno_gpu->info->name, "kgsl_3d0_reg_memory", "kgsl_3d0_irq",
 			RB_SIZE);
 	if (ret)
 		return ret;
 
-	mmu = gpu->base.mmu;
+	mmu = gpu->mmu;
 	if (mmu) {
 		ret = mmu->funcs->attach(mmu, iommu_ports,
 				ARRAY_SIZE(iommu_ports));
@@ -371,24 +348,24 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	}
 
 	mutex_lock(&drm->struct_mutex);
-	gpu->memptrs_bo = msm_gem_new(drm, sizeof(*gpu->memptrs),
+	adreno_gpu->memptrs_bo = msm_gem_new(drm, sizeof(*adreno_gpu->memptrs),
 			MSM_BO_UNCACHED);
 	mutex_unlock(&drm->struct_mutex);
-	if (IS_ERR(gpu->memptrs_bo)) {
-		ret = PTR_ERR(gpu->memptrs_bo);
-		gpu->memptrs_bo = NULL;
+	if (IS_ERR(adreno_gpu->memptrs_bo)) {
+		ret = PTR_ERR(adreno_gpu->memptrs_bo);
+		adreno_gpu->memptrs_bo = NULL;
 		dev_err(drm->dev, "could not allocate memptrs: %d\n", ret);
 		return ret;
 	}
 
-	gpu->memptrs = msm_gem_vaddr(gpu->memptrs_bo);
-	if (!gpu->memptrs) {
+	adreno_gpu->memptrs = msm_gem_vaddr(adreno_gpu->memptrs_bo);
+	if (!adreno_gpu->memptrs) {
 		dev_err(drm->dev, "could not vmap memptrs\n");
 		return -ENOMEM;
 	}
 
-	ret = msm_gem_get_iova(gpu->memptrs_bo, gpu->base.id,
-			&gpu->memptrs_iova);
+	ret = msm_gem_get_iova(adreno_gpu->memptrs_bo, gpu->id,
+			&adreno_gpu->memptrs_iova);
 	if (ret) {
 		dev_err(drm->dev, "could not map memptrs: %d\n", ret);
 		return ret;
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 63c36ce33020..52f051579753 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -39,7 +39,16 @@ struct adreno_gpu_funcs {
 	struct msm_gpu_funcs base;
 };
 
-struct adreno_info;
+struct adreno_info {
+	struct adreno_rev rev;
+	uint32_t revn;
+	const char *name;
+	const char *pm4fw, *pfpfw;
+	uint32_t gmem;
+	struct msm_gpu *(*init)(struct drm_device *dev);
+};
+
+const struct adreno_info *adreno_info(struct adreno_rev rev);
 
 struct adreno_rbmemptrs {
 	volatile uint32_t rptr;
@@ -55,6 +64,9 @@ struct adreno_gpu {
 	uint32_t revn;  /* numeric revision name */
 	const struct adreno_gpu_funcs *funcs;
 
+	/* interesting register offsets to dump: */
+	const unsigned int *registers;
+
 	/* firmware: */
 	const struct firmware *pm4, *pfp;
 
@@ -131,8 +143,7 @@ void adreno_dump(struct msm_gpu *gpu);
 void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords);
 
 int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
-		struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs,
-		struct adreno_rev rev);
+		struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs);
 void adreno_gpu_cleanup(struct adreno_gpu *gpu);
 
 
diff --git a/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h b/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h
index 4eee0ec8f069..6ef43f66c30a 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_pm4.xml.h
@@ -12,9 +12,9 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml  (   1453 bytes, from 2013-03-31 16:51:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (   9859 bytes, from 2014-06-02 15:21:30)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14477 bytes, from 2014-05-16 11:51:57)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  58020 bytes, from 2014-06-25 12:57:16)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  26602 bytes, from 2014-06-25 12:57:16)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14960 bytes, from 2014-07-27 17:22:13)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  58020 bytes, from 2014-08-01 12:22:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  41068 bytes, from 2014-08-01 12:22:48)
 
 Copyright (C) 2013-2014 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
@@ -163,12 +163,16 @@ enum adreno_pm4_type3_packets {
 	CP_INDIRECT_BUFFER_PFE = 63,
 	CP_SET_BIN = 76,
 	CP_TEST_TWO_MEMS = 113,
+	CP_REG_WR_NO_CTXT = 120,
+	CP_RECORD_PFP_TIMESTAMP = 17,
 	CP_WAIT_FOR_ME = 19,
 	CP_SET_DRAW_STATE = 67,
 	CP_DRAW_INDX_OFFSET = 56,
 	CP_DRAW_INDIRECT = 40,
 	CP_DRAW_INDX_INDIRECT = 41,
 	CP_DRAW_AUTO = 36,
+	CP_UNKNOWN_1A = 26,
+	CP_WIDE_REG_WRITE = 116,
 	IN_IB_PREFETCH_END = 23,
 	IN_SUBBLK_PREFETCH = 31,
 	IN_INSTR_PREFETCH = 32,
diff --git a/drivers/gpu/drm/msm/dsi/dsi.xml.h b/drivers/gpu/drm/msm/dsi/dsi.xml.h
index 0f1f5b9459a5..e965898dfda6 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.xml.h
+++ b/drivers/gpu/drm/msm/dsi/dsi.xml.h
@@ -10,14 +10,14 @@ git clone https://github.com/freedreno/envytools.git
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    647 bytes, from 2013-11-30 14:45:35)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  17996 bytes, from 2013-12-01 19:10:31)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   1615 bytes, from 2013-11-30 15:00:52)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  22517 bytes, from 2014-06-25 12:55:02)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20457 bytes, from 2014-08-01 12:22:48)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   1615 bytes, from 2014-07-17 15:34:33)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  22517 bytes, from 2014-07-17 15:34:33)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml             (  11712 bytes, from 2013-08-17 17:13:43)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    344 bytes, from 2013-08-11 19:26:32)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1544 bytes, from 2013-08-16 19:17:05)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2014-08-01 12:23:53)
 - /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2013-07-05 19:21:12)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  23613 bytes, from 2014-06-25 12:53:44)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  23613 bytes, from 2014-07-17 15:33:30)
 
 Copyright (C) 2013 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h b/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h
index d468f86f637c..f2bdda957205 100644
--- a/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h
+++ b/drivers/gpu/drm/msm/dsi/mmss_cc.xml.h
@@ -10,16 +10,16 @@ git clone https://github.com/freedreno/envytools.git
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    647 bytes, from 2013-11-30 14:45:35)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  17996 bytes, from 2013-12-01 19:10:31)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   1615 bytes, from 2013-11-30 15:00:52)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  22517 bytes, from 2014-06-25 12:55:02)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20457 bytes, from 2014-08-01 12:22:48)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   1615 bytes, from 2014-07-17 15:34:33)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  22517 bytes, from 2014-07-17 15:34:33)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml             (  11712 bytes, from 2013-08-17 17:13:43)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    344 bytes, from 2013-08-11 19:26:32)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1544 bytes, from 2013-08-16 19:17:05)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2014-08-01 12:23:53)
 - /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2013-07-05 19:21:12)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  23613 bytes, from 2014-06-25 12:53:44)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  23613 bytes, from 2014-07-17 15:33:30)
 
-Copyright (C) 2013 by the following authors:
+Copyright (C) 2013-2014 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
 
 Permission is hereby granted, free of charge, to any person obtaining
@@ -112,5 +112,11 @@ static inline uint32_t MMSS_CC_CLK_NS_VAL(uint32_t val)
 	return ((val) << MMSS_CC_CLK_NS_VAL__SHIFT) & MMSS_CC_CLK_NS_VAL__MASK;
 }
 
+#define REG_MMSS_CC_DSI2_PIXEL_CC				0x00000094
+
+#define REG_MMSS_CC_DSI2_PIXEL_NS				0x000000e4
+
+#define REG_MMSS_CC_DSI2_PIXEL_CC2				0x00000264
+
 
 #endif /* MMSS_CC_XML */
diff --git a/drivers/gpu/drm/msm/dsi/sfpb.xml.h b/drivers/gpu/drm/msm/dsi/sfpb.xml.h
index da8740054cdf..e5b071ffd865 100644
--- a/drivers/gpu/drm/msm/dsi/sfpb.xml.h
+++ b/drivers/gpu/drm/msm/dsi/sfpb.xml.h
@@ -10,14 +10,14 @@ git clone https://github.com/freedreno/envytools.git
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    647 bytes, from 2013-11-30 14:45:35)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  17996 bytes, from 2013-12-01 19:10:31)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   1615 bytes, from 2013-11-30 15:00:52)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  22517 bytes, from 2014-06-25 12:55:02)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20457 bytes, from 2014-08-01 12:22:48)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   1615 bytes, from 2014-07-17 15:34:33)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  22517 bytes, from 2014-07-17 15:34:33)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml             (  11712 bytes, from 2013-08-17 17:13:43)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    344 bytes, from 2013-08-11 19:26:32)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1544 bytes, from 2013-08-16 19:17:05)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2014-08-01 12:23:53)
 - /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2013-07-05 19:21:12)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  23613 bytes, from 2014-06-25 12:53:44)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  23613 bytes, from 2014-07-17 15:33:30)
 
 Copyright (C) 2013 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
index c6c9b02e0ada..9d00dcba6959 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -123,7 +123,7 @@ struct hdmi *hdmi_init(struct drm_device *dev, struct drm_encoder *encoder)
 	for (i = 0; i < config->hpd_reg_cnt; i++) {
 		struct regulator *reg;
 
-		reg = devm_regulator_get_exclusive(&pdev->dev,
+		reg = devm_regulator_get(&pdev->dev,
 				config->hpd_reg_names[i]);
 		if (IS_ERR(reg)) {
 			ret = PTR_ERR(reg);
@@ -139,7 +139,7 @@ struct hdmi *hdmi_init(struct drm_device *dev, struct drm_encoder *encoder)
 	for (i = 0; i < config->pwr_reg_cnt; i++) {
 		struct regulator *reg;
 
-		reg = devm_regulator_get_exclusive(&pdev->dev,
+		reg = devm_regulator_get(&pdev->dev,
 				config->pwr_reg_names[i]);
 		if (IS_ERR(reg)) {
 			ret = PTR_ERR(reg);
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.xml.h b/drivers/gpu/drm/msm/hdmi/hdmi.xml.h
index e89fe053d375..76fd0cfc6558 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.xml.h
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.xml.h
@@ -10,14 +10,14 @@ git clone https://github.com/freedreno/envytools.git
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    647 bytes, from 2013-11-30 14:45:35)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  17996 bytes, from 2013-12-01 19:10:31)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   1615 bytes, from 2013-11-30 15:00:52)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  22517 bytes, from 2014-06-25 12:55:02)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20457 bytes, from 2014-08-01 12:22:48)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   1615 bytes, from 2014-07-17 15:34:33)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  22517 bytes, from 2014-07-17 15:34:33)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml             (  11712 bytes, from 2013-08-17 17:13:43)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    344 bytes, from 2013-08-11 19:26:32)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1544 bytes, from 2013-08-16 19:17:05)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2014-08-01 12:23:53)
 - /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2013-07-05 19:21:12)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  23613 bytes, from 2014-06-25 12:53:44)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  23613 bytes, from 2014-07-17 15:33:30)
 
 Copyright (C) 2013-2014 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/drivers/gpu/drm/msm/hdmi/qfprom.xml.h b/drivers/gpu/drm/msm/hdmi/qfprom.xml.h
index bd81db6a7829..d53c29327df9 100644
--- a/drivers/gpu/drm/msm/hdmi/qfprom.xml.h
+++ b/drivers/gpu/drm/msm/hdmi/qfprom.xml.h
@@ -10,14 +10,14 @@ git clone https://github.com/freedreno/envytools.git
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    647 bytes, from 2013-11-30 14:45:35)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  17996 bytes, from 2013-12-01 19:10:31)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   1615 bytes, from 2013-11-30 15:00:52)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  22517 bytes, from 2014-06-25 12:55:02)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20457 bytes, from 2014-08-01 12:22:48)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   1615 bytes, from 2014-07-17 15:34:33)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  22517 bytes, from 2014-07-17 15:34:33)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml             (  11712 bytes, from 2013-08-17 17:13:43)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    344 bytes, from 2013-08-11 19:26:32)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1544 bytes, from 2013-08-16 19:17:05)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2014-08-01 12:23:53)
 - /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2013-07-05 19:21:12)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  23613 bytes, from 2014-06-25 12:53:44)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  23613 bytes, from 2014-07-17 15:33:30)
 
 Copyright (C) 2013 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4.xml.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4.xml.h
index 122208e8a2ee..03c0bd9cd5b9 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4.xml.h
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4.xml.h
@@ -10,16 +10,16 @@ git clone https://github.com/freedreno/envytools.git
 The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    647 bytes, from 2013-11-30 14:45:35)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  17996 bytes, from 2013-12-01 19:10:31)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   1615 bytes, from 2013-11-30 15:00:52)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  22517 bytes, from 2014-06-25 12:55:02)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20457 bytes, from 2014-08-01 12:22:48)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   1615 bytes, from 2014-07-17 15:34:33)
+- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  22517 bytes, from 2014-07-17 15:34:33)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml             (  11712 bytes, from 2013-08-17 17:13:43)
 - /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    344 bytes, from 2013-08-11 19:26:32)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1544 bytes, from 2013-08-16 19:17:05)
+- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2014-08-01 12:23:53)
 - /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2013-07-05 19:21:12)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  23613 bytes, from 2014-06-25 12:53:44)
+- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  23613 bytes, from 2014-07-17 15:33:30)
 
-Copyright (C) 2013 by the following authors:
+Copyright (C) 2013-2014 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
 
 Permission is hereby granted, free of charge, to any person obtaining
@@ -871,6 +871,101 @@ static inline uint32_t MDP4_LCDC_UNDERFLOW_CLR_COLOR(uint32_t val)
 #define MDP4_LCDC_CTRL_POLARITY_VSYNC_LOW			0x00000002
 #define MDP4_LCDC_CTRL_POLARITY_DATA_EN_LOW			0x00000004
 
+#define REG_MDP4_LCDC_LVDS_INTF_CTL				0x000c2000
+#define MDP4_LCDC_LVDS_INTF_CTL_MODE_SEL			0x00000004
+#define MDP4_LCDC_LVDS_INTF_CTL_RGB_OUT				0x00000008
+#define MDP4_LCDC_LVDS_INTF_CTL_CH_SWAP				0x00000010
+#define MDP4_LCDC_LVDS_INTF_CTL_CH1_RES_BIT			0x00000020
+#define MDP4_LCDC_LVDS_INTF_CTL_CH2_RES_BIT			0x00000040
+#define MDP4_LCDC_LVDS_INTF_CTL_ENABLE				0x00000080
+#define MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE0_EN		0x00000100
+#define MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE1_EN		0x00000200
+#define MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE2_EN		0x00000400
+#define MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE3_EN		0x00000800
+#define MDP4_LCDC_LVDS_INTF_CTL_CH2_DATA_LANE0_EN		0x00001000
+#define MDP4_LCDC_LVDS_INTF_CTL_CH2_DATA_LANE1_EN		0x00002000
+#define MDP4_LCDC_LVDS_INTF_CTL_CH2_DATA_LANE2_EN		0x00004000
+#define MDP4_LCDC_LVDS_INTF_CTL_CH2_DATA_LANE3_EN		0x00008000
+#define MDP4_LCDC_LVDS_INTF_CTL_CH1_CLK_LANE_EN			0x00010000
+#define MDP4_LCDC_LVDS_INTF_CTL_CH2_CLK_LANE_EN			0x00020000
+
+static inline uint32_t REG_MDP4_LCDC_LVDS_MUX_CTL(uint32_t i0) { return 0x000c2014 + 0x8*i0; }
+
+static inline uint32_t REG_MDP4_LCDC_LVDS_MUX_CTL_3_TO_0(uint32_t i0) { return 0x000c2014 + 0x8*i0; }
+#define MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT0__MASK		0x000000ff
+#define MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT0__SHIFT		0
+static inline uint32_t MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT0(uint32_t val)
+{
+	return ((val) << MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT0__SHIFT) & MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT0__MASK;
+}
+#define MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT1__MASK		0x0000ff00
+#define MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT1__SHIFT		8
+static inline uint32_t MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT1(uint32_t val)
+{
+	return ((val) << MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT1__SHIFT) & MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT1__MASK;
+}
+#define MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT2__MASK		0x00ff0000
+#define MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT2__SHIFT		16
+static inline uint32_t MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT2(uint32_t val)
+{
+	return ((val) << MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT2__SHIFT) & MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT2__MASK;
+}
+#define MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT3__MASK		0xff000000
+#define MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT3__SHIFT		24
+static inline uint32_t MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT3(uint32_t val)
+{
+	return ((val) << MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT3__SHIFT) & MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT3__MASK;
+}
+
+static inline uint32_t REG_MDP4_LCDC_LVDS_MUX_CTL_6_TO_4(uint32_t i0) { return 0x000c2018 + 0x8*i0; }
+#define MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT4__MASK		0x000000ff
+#define MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT4__SHIFT		0
+static inline uint32_t MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT4(uint32_t val)
+{
+	return ((val) << MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT4__SHIFT) & MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT4__MASK;
+}
+#define MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT5__MASK		0x0000ff00
+#define MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT5__SHIFT		8
+static inline uint32_t MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT5(uint32_t val)
+{
+	return ((val) << MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT5__SHIFT) & MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT5__MASK;
+}
+#define MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT6__MASK		0x00ff0000
+#define MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT6__SHIFT		16
+static inline uint32_t MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT6(uint32_t val)
+{
+	return ((val) << MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT6__SHIFT) & MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT6__MASK;
+}
+
+#define REG_MDP4_LCDC_LVDS_PHY_RESET				0x000c2034
+
+#define REG_MDP4_LVDS_PHY_PLL_CTRL_0				0x000c3000
+
+#define REG_MDP4_LVDS_PHY_PLL_CTRL_1				0x000c3004
+
+#define REG_MDP4_LVDS_PHY_PLL_CTRL_2				0x000c3008
+
+#define REG_MDP4_LVDS_PHY_PLL_CTRL_3				0x000c300c
+
+#define REG_MDP4_LVDS_PHY_PLL_CTRL_5				0x000c3014
+
+#define REG_MDP4_LVDS_PHY_PLL_CTRL_6				0x000c3018
+
+#define REG_MDP4_LVDS_PHY_PLL_CTRL_7				0x000c301c
+
+#define REG_MDP4_LVDS_PHY_PLL_CTRL_8				0x000c3020
+
+#define REG_MDP4_LVDS_PHY_PLL_CTRL_9				0x000c3024
+
+#define REG_MDP4_LVDS_PHY_PLL_LOCKED				0x000c3080
+
+#define REG_MDP4_LVDS_PHY_CFG2					0x000c3108
+
+#define REG_MDP4_LVDS_PHY_CFG0					0x000c3100
+#define MDP4_LVDS_PHY_CFG0_SERIALIZATION_ENBLE			0x00000010
+#define MDP4_LVDS_PHY_CFG0_CHANNEL0				0x00000040
+#define MDP4_LVDS_PHY_CFG0_CHANNEL1				0x00000080
+
 #define REG_MDP4_DTV						0x000d0000
 
 #define REG_MDP4_DTV_ENABLE					0x000d0000
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
index c6c80ea28c35..7d00f7fb5773 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
@@ -273,14 +273,17 @@ static void blend_setup(struct drm_crtc *crtc)
 	};
 	bool alpha[4]= { false, false, false, false };
 
+	/* Don't rely on value read back from hw, but instead use our
+	 * own shadowed value.  Possibly disable/reenable looses the
+	 * previous value and goes back to power-on default?
+	 */
+	mixer_cfg = mdp4_kms->mixer_cfg;
+
 	mdp4_write(mdp4_kms, REG_MDP4_OVLP_TRANSP_LOW0(ovlp), 0);
 	mdp4_write(mdp4_kms, REG_MDP4_OVLP_TRANSP_LOW1(ovlp), 0);
 	mdp4_write(mdp4_kms, REG_MDP4_OVLP_TRANSP_HIGH0(ovlp), 0);
 	mdp4_write(mdp4_kms, REG_MDP4_OVLP_TRANSP_HIGH1(ovlp), 0);
 
-	/* TODO single register for all CRTCs, so this won't work properly
-	 * when multiple CRTCs are active..
-	 */
 	for (i = 0; i < ARRAY_SIZE(mdp4_crtc->planes); i++) {
 		struct drm_plane *plane = mdp4_crtc->planes[i];
 		if (plane) {
@@ -291,7 +294,8 @@ static void blend_setup(struct drm_crtc *crtc)
 					to_mdp_format(msm_framebuffer_format(plane->fb));
 				alpha[idx-1] = format->alpha_enable;
 			}
-			mixer_cfg |= mixercfg(mdp4_crtc->mixer, pipe_id, stages[idx]);
+			mixer_cfg = mixercfg(mixer_cfg, mdp4_crtc->mixer,
+					pipe_id, stages[idx]);
 		}
 	}
 
@@ -320,6 +324,7 @@ static void blend_setup(struct drm_crtc *crtc)
 		mdp4_write(mdp4_kms, REG_MDP4_OVLP_STAGE_TRANSP_HIGH1(ovlp, i), 0);
 	}
 
+	mdp4_kms->mixer_cfg = mixer_cfg;
 	mdp4_write(mdp4_kms, REG_MDP4_LAYERMIXER_IN_CFG, mixer_cfg);
 }
 
@@ -672,7 +677,7 @@ void mdp4_crtc_set_config(struct drm_crtc *crtc, uint32_t config)
 }
 
 /* set interface for routing crtc->encoder: */
-void mdp4_crtc_set_intf(struct drm_crtc *crtc, enum mdp4_intf intf)
+void mdp4_crtc_set_intf(struct drm_crtc *crtc, enum mdp4_intf intf, int mixer)
 {
 	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
 	struct mdp4_kms *mdp4_kms = get_kms(crtc);
@@ -698,15 +703,13 @@ void mdp4_crtc_set_intf(struct drm_crtc *crtc, enum mdp4_intf intf)
 	if (intf == INTF_DSI_VIDEO) {
 		intf_sel &= ~MDP4_DISP_INTF_SEL_DSI_CMD;
 		intf_sel |= MDP4_DISP_INTF_SEL_DSI_VIDEO;
-		mdp4_crtc->mixer = 0;
 	} else if (intf == INTF_DSI_CMD) {
 		intf_sel &= ~MDP4_DISP_INTF_SEL_DSI_VIDEO;
 		intf_sel |= MDP4_DISP_INTF_SEL_DSI_CMD;
-		mdp4_crtc->mixer = 0;
-	} else if (intf == INTF_LCDC_DTV){
-		mdp4_crtc->mixer = 1;
 	}
 
+	mdp4_crtc->mixer = mixer;
+
 	blend_setup(crtc);
 
 	DBG("%s: intf_sel=%08x", mdp4_crtc->name, intf_sel);
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
index 067ed03b35fe..c3878420180b 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
@@ -233,7 +233,7 @@ static void mdp4_dtv_encoder_commit(struct drm_encoder *encoder)
 			MDP4_DMA_CONFIG_G_BPC(BPC8) |
 			MDP4_DMA_CONFIG_B_BPC(BPC8) |
 			MDP4_DMA_CONFIG_PACK(0x21));
-	mdp4_crtc_set_intf(encoder->crtc, INTF_LCDC_DTV);
+	mdp4_crtc_set_intf(encoder->crtc, INTF_LCDC_DTV, 1);
 	mdp4_dtv_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
 }
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
index 733646c0d3f8..79d804e61cc4 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
@@ -106,6 +106,7 @@ static int mdp4_hw_init(struct msm_kms *kms)
 
 	if (mdp4_kms->rev >= 2)
 		mdp4_write(mdp4_kms, REG_MDP4_LAYERMIXER_IN_CFG_UPDATE_METHOD, 1);
+	mdp4_write(mdp4_kms, REG_MDP4_LAYERMIXER_IN_CFG, 0);
 
 	/* disable CSC matrix / YUV by default: */
 	mdp4_write(mdp4_kms, REG_MDP4_PIPE_OP_MODE(VG1), 0);
@@ -196,6 +197,28 @@ int mdp4_enable(struct mdp4_kms *mdp4_kms)
 	return 0;
 }
 
+#ifdef CONFIG_OF
+static struct drm_panel *detect_panel(struct drm_device *dev, const char *name)
+{
+	struct device_node *n;
+	struct drm_panel *panel = NULL;
+
+	n = of_parse_phandle(dev->dev->of_node, name, 0);
+	if (n) {
+		panel = of_drm_find_panel(n);
+		if (!panel)
+			panel = ERR_PTR(-EPROBE_DEFER);
+	}
+
+	return panel;
+}
+#else
+static struct drm_panel *detect_panel(struct drm_device *dev, const char *name)
+{
+	// ??? maybe use a module param to specify which panel is attached?
+}
+#endif
+
 static int modeset_init(struct mdp4_kms *mdp4_kms)
 {
 	struct drm_device *dev = mdp4_kms->dev;
@@ -203,14 +226,11 @@ static int modeset_init(struct mdp4_kms *mdp4_kms)
 	struct drm_plane *plane;
 	struct drm_crtc *crtc;
 	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+	struct drm_panel *panel;
 	struct hdmi *hdmi;
 	int ret;
 
-	/*
-	 *  NOTE: this is a bit simplistic until we add support
-	 * for more than just RGB1->DMA_E->DTV->HDMI
-	 */
-
 	/* construct non-private planes: */
 	plane = mdp4_plane_init(dev, VG1, false);
 	if (IS_ERR(plane)) {
@@ -228,7 +248,57 @@ static int modeset_init(struct mdp4_kms *mdp4_kms)
 	}
 	priv->planes[priv->num_planes++] = plane;
 
-	/* the CRTCs get constructed with a private plane: */
+	/*
+	 * Setup the LCDC/LVDS path: RGB2 -> DMA_P -> LCDC -> LVDS:
+	 */
+
+	panel = detect_panel(dev, "qcom,lvds-panel");
+	if (IS_ERR(panel)) {
+		ret = PTR_ERR(panel);
+		dev_err(dev->dev, "failed to detect LVDS panel: %d\n", ret);
+		goto fail;
+	}
+
+	plane = mdp4_plane_init(dev, RGB2, true);
+	if (IS_ERR(plane)) {
+		dev_err(dev->dev, "failed to construct plane for RGB2\n");
+		ret = PTR_ERR(plane);
+		goto fail;
+	}
+
+	crtc  = mdp4_crtc_init(dev, plane, priv->num_crtcs, 0, DMA_P);
+	if (IS_ERR(crtc)) {
+		dev_err(dev->dev, "failed to construct crtc for DMA_P\n");
+		ret = PTR_ERR(crtc);
+		goto fail;
+	}
+
+	encoder = mdp4_lcdc_encoder_init(dev, panel);
+	if (IS_ERR(encoder)) {
+		dev_err(dev->dev, "failed to construct LCDC encoder\n");
+		ret = PTR_ERR(encoder);
+		goto fail;
+	}
+
+	/* LCDC can be hooked to DMA_P: */
+	encoder->possible_crtcs = 1 << priv->num_crtcs;
+
+	priv->crtcs[priv->num_crtcs++] = crtc;
+	priv->encoders[priv->num_encoders++] = encoder;
+
+	connector = mdp4_lvds_connector_init(dev, panel, encoder);
+	if (IS_ERR(connector)) {
+		ret = PTR_ERR(connector);
+		dev_err(dev->dev, "failed to initialize LVDS connector: %d\n", ret);
+		goto fail;
+	}
+
+	priv->connectors[priv->num_connectors++] = connector;
+
+	/*
+	 * Setup DTV/HDMI path: RGB1 -> DMA_E -> DTV -> HDMI:
+	 */
+
 	plane = mdp4_plane_init(dev, RGB1, true);
 	if (IS_ERR(plane)) {
 		dev_err(dev->dev, "failed to construct plane for RGB1\n");
@@ -242,7 +312,6 @@ static int modeset_init(struct mdp4_kms *mdp4_kms)
 		ret = PTR_ERR(crtc);
 		goto fail;
 	}
-	priv->crtcs[priv->num_crtcs++] = crtc;
 
 	encoder = mdp4_dtv_encoder_init(dev);
 	if (IS_ERR(encoder)) {
@@ -250,7 +319,11 @@ static int modeset_init(struct mdp4_kms *mdp4_kms)
 		ret = PTR_ERR(encoder);
 		goto fail;
 	}
-	encoder->possible_crtcs = 0x1;     /* DTV can be hooked to DMA_E */
+
+	/* DTV can be hooked to DMA_E: */
+	encoder->possible_crtcs = 1 << priv->num_crtcs;
+
+	priv->crtcs[priv->num_crtcs++] = crtc;
 	priv->encoders[priv->num_encoders++] = encoder;
 
 	hdmi = hdmi_init(dev, encoder);
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
index 3225da804c61..9ff6e7ccfe90 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
@@ -23,6 +23,8 @@
 #include "mdp/mdp_kms.h"
 #include "mdp4.xml.h"
 
+#include "drm_panel.h"
+
 struct mdp4_kms {
 	struct mdp_kms base;
 
@@ -30,6 +32,13 @@ struct mdp4_kms {
 
 	int rev;
 
+	/* Shadow value for MDP4_LAYERMIXER_IN_CFG.. since setup for all
+	 * crtcs/encoders is in one shared register, we need to update it
+	 * via read/modify/write.  But to avoid getting confused by power-
+	 * on-default values after resume, use this shadow value instead:
+	 */
+	uint32_t mixer_cfg;
+
 	/* mapper-id used to request GEM buffer mapped for scanout: */
 	int id;
 
@@ -74,7 +83,7 @@ static inline uint32_t pipe2flush(enum mdp4_pipe pipe)
 	case VG1:      return MDP4_OVERLAY_FLUSH_VG1;
 	case VG2:      return MDP4_OVERLAY_FLUSH_VG2;
 	case RGB1:     return MDP4_OVERLAY_FLUSH_RGB1;
-	case RGB2:     return MDP4_OVERLAY_FLUSH_RGB1;
+	case RGB2:     return MDP4_OVERLAY_FLUSH_RGB2;
 	default:       return 0;
 	}
 }
@@ -108,38 +117,50 @@ static inline uint32_t dma2err(enum mdp4_dma dma)
 	}
 }
 
-static inline uint32_t mixercfg(int mixer, enum mdp4_pipe pipe,
-		enum mdp_mixer_stage_id stage)
+static inline uint32_t mixercfg(uint32_t mixer_cfg, int mixer,
+		enum mdp4_pipe pipe, enum mdp_mixer_stage_id stage)
 {
-	uint32_t mixer_cfg = 0;
-
 	switch (pipe) {
 	case VG1:
-		mixer_cfg = MDP4_LAYERMIXER_IN_CFG_PIPE0(stage) |
+		mixer_cfg &= ~(MDP4_LAYERMIXER_IN_CFG_PIPE0__MASK |
+				MDP4_LAYERMIXER_IN_CFG_PIPE0_MIXER1);
+		mixer_cfg |= MDP4_LAYERMIXER_IN_CFG_PIPE0(stage) |
 			COND(mixer == 1, MDP4_LAYERMIXER_IN_CFG_PIPE0_MIXER1);
 		break;
 	case VG2:
-		mixer_cfg = MDP4_LAYERMIXER_IN_CFG_PIPE1(stage) |
+		mixer_cfg &= ~(MDP4_LAYERMIXER_IN_CFG_PIPE1__MASK |
+				MDP4_LAYERMIXER_IN_CFG_PIPE1_MIXER1);
+		mixer_cfg |= MDP4_LAYERMIXER_IN_CFG_PIPE1(stage) |
 			COND(mixer == 1, MDP4_LAYERMIXER_IN_CFG_PIPE1_MIXER1);
 		break;
 	case RGB1:
-		mixer_cfg = MDP4_LAYERMIXER_IN_CFG_PIPE2(stage) |
+		mixer_cfg &= ~(MDP4_LAYERMIXER_IN_CFG_PIPE2__MASK |
+				MDP4_LAYERMIXER_IN_CFG_PIPE2_MIXER1);
+		mixer_cfg |= MDP4_LAYERMIXER_IN_CFG_PIPE2(stage) |
 			COND(mixer == 1, MDP4_LAYERMIXER_IN_CFG_PIPE2_MIXER1);
 		break;
 	case RGB2:
-		mixer_cfg = MDP4_LAYERMIXER_IN_CFG_PIPE3(stage) |
+		mixer_cfg &= ~(MDP4_LAYERMIXER_IN_CFG_PIPE3__MASK |
+				MDP4_LAYERMIXER_IN_CFG_PIPE3_MIXER1);
+		mixer_cfg |= MDP4_LAYERMIXER_IN_CFG_PIPE3(stage) |
 			COND(mixer == 1, MDP4_LAYERMIXER_IN_CFG_PIPE3_MIXER1);
 		break;
 	case RGB3:
-		mixer_cfg = MDP4_LAYERMIXER_IN_CFG_PIPE4(stage) |
+		mixer_cfg &= ~(MDP4_LAYERMIXER_IN_CFG_PIPE4__MASK |
+				MDP4_LAYERMIXER_IN_CFG_PIPE4_MIXER1);
+		mixer_cfg |= MDP4_LAYERMIXER_IN_CFG_PIPE4(stage) |
 			COND(mixer == 1, MDP4_LAYERMIXER_IN_CFG_PIPE4_MIXER1);
 		break;
 	case VG3:
-		mixer_cfg = MDP4_LAYERMIXER_IN_CFG_PIPE5(stage) |
+		mixer_cfg &= ~(MDP4_LAYERMIXER_IN_CFG_PIPE5__MASK |
+				MDP4_LAYERMIXER_IN_CFG_PIPE5_MIXER1);
+		mixer_cfg |= MDP4_LAYERMIXER_IN_CFG_PIPE5(stage) |
 			COND(mixer == 1, MDP4_LAYERMIXER_IN_CFG_PIPE5_MIXER1);
 		break;
 	case VG4:
-		mixer_cfg = MDP4_LAYERMIXER_IN_CFG_PIPE6(stage) |
+		mixer_cfg &= ~(MDP4_LAYERMIXER_IN_CFG_PIPE6__MASK |
+				MDP4_LAYERMIXER_IN_CFG_PIPE6_MIXER1);
+		mixer_cfg |= MDP4_LAYERMIXER_IN_CFG_PIPE6(stage) |
 			COND(mixer == 1, MDP4_LAYERMIXER_IN_CFG_PIPE6_MIXER1);
 		break;
 	default:
@@ -188,7 +209,7 @@ struct drm_plane *mdp4_plane_init(struct drm_device *dev,
 uint32_t mdp4_crtc_vblank(struct drm_crtc *crtc);
 void mdp4_crtc_cancel_pending_flip(struct drm_crtc *crtc, struct drm_file *file);
 void mdp4_crtc_set_config(struct drm_crtc *crtc, uint32_t config);
-void mdp4_crtc_set_intf(struct drm_crtc *crtc, enum mdp4_intf intf);
+void mdp4_crtc_set_intf(struct drm_crtc *crtc, enum mdp4_intf intf, int mixer);
 void mdp4_crtc_attach(struct drm_crtc *crtc, struct drm_plane *plane);
 void mdp4_crtc_detach(struct drm_crtc *crtc, struct drm_plane *plane);
 struct drm_crtc *mdp4_crtc_init(struct drm_device *dev,
@@ -198,6 +219,22 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev,
 long mdp4_dtv_round_pixclk(struct drm_encoder *encoder, unsigned long rate);
 struct drm_encoder *mdp4_dtv_encoder_init(struct drm_device *dev);
 
+long mdp4_lcdc_round_pixclk(struct drm_encoder *encoder, unsigned long rate);
+struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev,
+		struct drm_panel *panel);
+
+struct drm_connector *mdp4_lvds_connector_init(struct drm_device *dev,
+		struct drm_panel *panel, struct drm_encoder *encoder);
+
+#ifdef CONFIG_COMMON_CLK
+struct clk *mpd4_lvds_pll_init(struct drm_device *dev);
+#else
+static inline struct clk *mpd4_lvds_pll_init(struct drm_device *dev)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
+
 #ifdef CONFIG_MSM_BUS_SCALING
 static inline int match_dev_name(struct device *dev, void *data)
 {
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c
new file mode 100644
index 000000000000..41f6436754fc
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lcdc_encoder.c
@@ -0,0 +1,506 @@
+/*
+ * Copyright (C) 2014 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ * Author: Vinay Simha <vinaysimha@inforcecomputing.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "mdp4_kms.h"
+
+#include "drm_crtc.h"
+#include "drm_crtc_helper.h"
+
+struct mdp4_lcdc_encoder {
+	struct drm_encoder base;
+	struct drm_panel *panel;
+	struct clk *lcdc_clk;
+	unsigned long int pixclock;
+	struct regulator *regs[3];
+	bool enabled;
+	uint32_t bsc;
+};
+#define to_mdp4_lcdc_encoder(x) container_of(x, struct mdp4_lcdc_encoder, base)
+
+static struct mdp4_kms *get_kms(struct drm_encoder *encoder)
+{
+	struct msm_drm_private *priv = encoder->dev->dev_private;
+	return to_mdp4_kms(to_mdp_kms(priv->kms));
+}
+
+#ifdef CONFIG_MSM_BUS_SCALING
+#include <mach/board.h>
+static void bs_init(struct mdp4_lcdc_encoder *mdp4_lcdc_encoder)
+{
+	struct drm_device *dev = mdp4_lcdc_encoder->base.dev;
+	struct lcdc_platform_data *lcdc_pdata = mdp4_find_pdata("lvds.0");
+
+	if (!lcdc_pdata) {
+		dev_err(dev->dev, "could not find lvds pdata\n");
+		return;
+	}
+
+	if (lcdc_pdata->bus_scale_table) {
+		mdp4_lcdc_encoder->bsc = msm_bus_scale_register_client(
+				lcdc_pdata->bus_scale_table);
+		DBG("lvds : bus scale client: %08x", mdp4_lcdc_encoder->bsc);
+	}
+}
+
+static void bs_fini(struct mdp4_lcdc_encoder *mdp4_lcdc_encoder)
+{
+	if (mdp4_lcdc_encoder->bsc) {
+		msm_bus_scale_unregister_client(mdp4_lcdc_encoder->bsc);
+		mdp4_lcdc_encoder->bsc = 0;
+	}
+}
+
+static void bs_set(struct mdp4_lcdc_encoder *mdp4_lcdc_encoder, int idx)
+{
+	if (mdp4_lcdc_encoder->bsc) {
+		DBG("set bus scaling: %d", idx);
+		msm_bus_scale_client_update_request(mdp4_lcdc_encoder->bsc, idx);
+	}
+}
+#else
+static void bs_init(struct mdp4_lcdc_encoder *mdp4_lcdc_encoder) {}
+static void bs_fini(struct mdp4_lcdc_encoder *mdp4_lcdc_encoder) {}
+static void bs_set(struct mdp4_lcdc_encoder *mdp4_lcdc_encoder, int idx) {}
+#endif
+
+static void mdp4_lcdc_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct mdp4_lcdc_encoder *mdp4_lcdc_encoder =
+			to_mdp4_lcdc_encoder(encoder);
+	bs_fini(mdp4_lcdc_encoder);
+	drm_encoder_cleanup(encoder);
+	kfree(mdp4_lcdc_encoder);
+}
+
+static const struct drm_encoder_funcs mdp4_lcdc_encoder_funcs = {
+	.destroy = mdp4_lcdc_encoder_destroy,
+};
+
+/* this should probably be a helper: */
+struct drm_connector *get_connector(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_connector *connector;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+		if (connector->encoder == encoder)
+			return connector;
+
+	return NULL;
+}
+
+static void setup_phy(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_connector *connector = get_connector(encoder);
+	struct mdp4_kms *mdp4_kms = get_kms(encoder);
+	uint32_t lvds_intf = 0, lvds_phy_cfg0 = 0;
+	int bpp, nchan, swap;
+
+	if (!connector)
+		return;
+
+	bpp = 3 * connector->display_info.bpc;
+
+	if (!bpp)
+		bpp = 18;
+
+	/* TODO, these should come from panel somehow: */
+	nchan = 1;
+	swap = 0;
+
+	switch (bpp) {
+	case 24:
+		mdp4_write(mdp4_kms, REG_MDP4_LCDC_LVDS_MUX_CTL_3_TO_0(0),
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT0(0x08) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT1(0x05) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT2(0x04) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT3(0x03));
+		mdp4_write(mdp4_kms, REG_MDP4_LCDC_LVDS_MUX_CTL_6_TO_4(0),
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT4(0x02) |
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT5(0x01) |
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT6(0x00));
+		mdp4_write(mdp4_kms, REG_MDP4_LCDC_LVDS_MUX_CTL_3_TO_0(1),
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT0(0x11) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT1(0x10) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT2(0x0d) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT3(0x0c));
+		mdp4_write(mdp4_kms, REG_MDP4_LCDC_LVDS_MUX_CTL_6_TO_4(1),
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT4(0x0b) |
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT5(0x0a) |
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT6(0x09));
+		mdp4_write(mdp4_kms, REG_MDP4_LCDC_LVDS_MUX_CTL_3_TO_0(2),
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT0(0x1a) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT1(0x19) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT2(0x18) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT3(0x15));
+		mdp4_write(mdp4_kms, REG_MDP4_LCDC_LVDS_MUX_CTL_6_TO_4(2),
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT4(0x14) |
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT5(0x13) |
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT6(0x12));
+		mdp4_write(mdp4_kms, REG_MDP4_LCDC_LVDS_MUX_CTL_3_TO_0(3),
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT0(0x1b) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT1(0x17) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT2(0x16) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT3(0x0f));
+		mdp4_write(mdp4_kms, REG_MDP4_LCDC_LVDS_MUX_CTL_6_TO_4(3),
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT4(0x0e) |
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT5(0x07) |
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT6(0x06));
+		if (nchan == 2) {
+			lvds_intf |= MDP4_LCDC_LVDS_INTF_CTL_CH2_DATA_LANE3_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH2_DATA_LANE2_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH2_DATA_LANE1_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH2_DATA_LANE0_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE3_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE2_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE1_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE0_EN;
+		} else {
+			lvds_intf |= MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE3_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE2_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE1_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE0_EN;
+		}
+		break;
+
+	case 18:
+		mdp4_write(mdp4_kms, REG_MDP4_LCDC_LVDS_MUX_CTL_3_TO_0(0),
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT0(0x0a) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT1(0x07) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT2(0x06) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT3(0x05));
+		mdp4_write(mdp4_kms, REG_MDP4_LCDC_LVDS_MUX_CTL_6_TO_4(0),
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT4(0x04) |
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT5(0x03) |
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT6(0x02));
+		mdp4_write(mdp4_kms, REG_MDP4_LCDC_LVDS_MUX_CTL_3_TO_0(1),
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT0(0x13) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT1(0x12) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT2(0x0f) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT3(0x0e));
+		mdp4_write(mdp4_kms, REG_MDP4_LCDC_LVDS_MUX_CTL_6_TO_4(1),
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT4(0x0d) |
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT5(0x0c) |
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT6(0x0b));
+		mdp4_write(mdp4_kms, REG_MDP4_LCDC_LVDS_MUX_CTL_3_TO_0(2),
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT0(0x1a) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT1(0x19) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT2(0x18) |
+				MDP4_LCDC_LVDS_MUX_CTL_3_TO_0_BIT3(0x17));
+		mdp4_write(mdp4_kms, REG_MDP4_LCDC_LVDS_MUX_CTL_6_TO_4(2),
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT4(0x16) |
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT5(0x15) |
+				MDP4_LCDC_LVDS_MUX_CTL_6_TO_4_BIT6(0x14));
+		if (nchan == 2) {
+			lvds_intf |= MDP4_LCDC_LVDS_INTF_CTL_CH2_DATA_LANE2_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH2_DATA_LANE1_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH2_DATA_LANE0_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE2_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE1_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE0_EN;
+		} else {
+			lvds_intf |= MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE2_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE1_EN |
+					MDP4_LCDC_LVDS_INTF_CTL_CH1_DATA_LANE0_EN;
+		}
+		lvds_intf |= MDP4_LCDC_LVDS_INTF_CTL_RGB_OUT;
+		break;
+
+	default:
+		dev_err(dev->dev, "unknown bpp: %d\n", bpp);
+		return;
+	}
+
+	switch (nchan) {
+	case 1:
+		lvds_phy_cfg0 = MDP4_LVDS_PHY_CFG0_CHANNEL0;
+		lvds_intf |= MDP4_LCDC_LVDS_INTF_CTL_CH1_CLK_LANE_EN |
+				MDP4_LCDC_LVDS_INTF_CTL_MODE_SEL;
+		break;
+	case 2:
+		lvds_phy_cfg0 = MDP4_LVDS_PHY_CFG0_CHANNEL0 |
+				MDP4_LVDS_PHY_CFG0_CHANNEL1;
+		lvds_intf |= MDP4_LCDC_LVDS_INTF_CTL_CH2_CLK_LANE_EN |
+				MDP4_LCDC_LVDS_INTF_CTL_CH1_CLK_LANE_EN;
+		break;
+	default:
+		dev_err(dev->dev, "unknown # of channels: %d\n", nchan);
+		return;
+	}
+
+	if (swap)
+		lvds_intf |= MDP4_LCDC_LVDS_INTF_CTL_CH_SWAP;
+
+	lvds_intf |= MDP4_LCDC_LVDS_INTF_CTL_ENABLE;
+
+	mdp4_write(mdp4_kms, REG_MDP4_LVDS_PHY_CFG0, lvds_phy_cfg0);
+	mdp4_write(mdp4_kms, REG_MDP4_LCDC_LVDS_INTF_CTL, lvds_intf);
+	mdp4_write(mdp4_kms, REG_MDP4_LVDS_PHY_CFG2, 0x30);
+
+	mb();
+	udelay(1);
+	lvds_phy_cfg0 |= MDP4_LVDS_PHY_CFG0_SERIALIZATION_ENBLE;
+	mdp4_write(mdp4_kms, REG_MDP4_LVDS_PHY_CFG0, lvds_phy_cfg0);
+}
+
+static void mdp4_lcdc_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct mdp4_lcdc_encoder *mdp4_lcdc_encoder =
+			to_mdp4_lcdc_encoder(encoder);
+	struct mdp4_kms *mdp4_kms = get_kms(encoder);
+	struct drm_panel *panel = mdp4_lcdc_encoder->panel;
+	bool enabled = (mode == DRM_MODE_DPMS_ON);
+	int i, ret;
+
+	DBG("mode=%d", mode);
+
+	if (enabled == mdp4_lcdc_encoder->enabled)
+		return;
+
+	if (enabled) {
+		unsigned long pc = mdp4_lcdc_encoder->pixclock;
+		int ret;
+
+		bs_set(mdp4_lcdc_encoder, 1);
+
+		for (i = 0; i < ARRAY_SIZE(mdp4_lcdc_encoder->regs); i++) {
+			ret = regulator_enable(mdp4_lcdc_encoder->regs[i]);
+			if (ret)
+				dev_err(dev->dev, "failed to enable regulator: %d\n", ret);
+		}
+
+		DBG("setting lcdc_clk=%lu", pc);
+		ret = clk_set_rate(mdp4_lcdc_encoder->lcdc_clk, pc);
+		if (ret)
+			dev_err(dev->dev, "failed to configure lcdc_clk: %d\n", ret);
+		ret = clk_prepare_enable(mdp4_lcdc_encoder->lcdc_clk);
+		if (ret)
+			dev_err(dev->dev, "failed to enable lcdc_clk: %d\n", ret);
+
+		if (panel)
+			drm_panel_enable(panel);
+
+		setup_phy(encoder);
+
+		mdp4_write(mdp4_kms, REG_MDP4_LCDC_ENABLE, 1);
+	} else {
+		mdp4_write(mdp4_kms, REG_MDP4_LCDC_ENABLE, 0);
+
+		if (panel)
+			drm_panel_disable(panel);
+
+		/*
+		 * Wait for a vsync so we know the ENABLE=0 latched before
+		 * the (connector) source of the vsync's gets disabled,
+		 * otherwise we end up in a funny state if we re-enable
+		 * before the disable latches, which results that some of
+		 * the settings changes for the new modeset (like new
+		 * scanout buffer) don't latch properly..
+		 */
+		mdp_irq_wait(&mdp4_kms->base, MDP4_IRQ_PRIMARY_VSYNC);
+
+		clk_disable_unprepare(mdp4_lcdc_encoder->lcdc_clk);
+
+		for (i = 0; i < ARRAY_SIZE(mdp4_lcdc_encoder->regs); i++) {
+			ret = regulator_disable(mdp4_lcdc_encoder->regs[i]);
+			if (ret)
+				dev_err(dev->dev, "failed to disable regulator: %d\n", ret);
+		}
+
+		bs_set(mdp4_lcdc_encoder, 0);
+	}
+
+	mdp4_lcdc_encoder->enabled = enabled;
+}
+
+static bool mdp4_lcdc_encoder_mode_fixup(struct drm_encoder *encoder,
+		const struct drm_display_mode *mode,
+		struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static void mdp4_lcdc_encoder_mode_set(struct drm_encoder *encoder,
+		struct drm_display_mode *mode,
+		struct drm_display_mode *adjusted_mode)
+{
+	struct mdp4_lcdc_encoder *mdp4_lcdc_encoder =
+			to_mdp4_lcdc_encoder(encoder);
+	struct mdp4_kms *mdp4_kms = get_kms(encoder);
+	uint32_t lcdc_hsync_skew, vsync_period, vsync_len, ctrl_pol;
+	uint32_t display_v_start, display_v_end;
+	uint32_t hsync_start_x, hsync_end_x;
+
+	mode = adjusted_mode;
+
+	DBG("set mode: %d:\"%s\" %d %d %d %d %d %d %d %d %d %d 0x%x 0x%x",
+			mode->base.id, mode->name,
+			mode->vrefresh, mode->clock,
+			mode->hdisplay, mode->hsync_start,
+			mode->hsync_end, mode->htotal,
+			mode->vdisplay, mode->vsync_start,
+			mode->vsync_end, mode->vtotal,
+			mode->type, mode->flags);
+
+	mdp4_lcdc_encoder->pixclock = mode->clock * 1000;
+
+	DBG("pixclock=%lu", mdp4_lcdc_encoder->pixclock);
+
+	ctrl_pol = 0;
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		ctrl_pol |= MDP4_LCDC_CTRL_POLARITY_HSYNC_LOW;
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		ctrl_pol |= MDP4_LCDC_CTRL_POLARITY_VSYNC_LOW;
+	/* probably need to get DATA_EN polarity from panel.. */
+
+	lcdc_hsync_skew = 0;  /* get this from panel? */
+
+	hsync_start_x = (mode->htotal - mode->hsync_start);
+	hsync_end_x = mode->htotal - (mode->hsync_start - mode->hdisplay) - 1;
+
+	vsync_period = mode->vtotal * mode->htotal;
+	vsync_len = (mode->vsync_end - mode->vsync_start) * mode->htotal;
+	display_v_start = (mode->vtotal - mode->vsync_start) * mode->htotal + lcdc_hsync_skew;
+	display_v_end = vsync_period - ((mode->vsync_start - mode->vdisplay) * mode->htotal) + lcdc_hsync_skew - 1;
+
+	mdp4_write(mdp4_kms, REG_MDP4_LCDC_HSYNC_CTRL,
+			MDP4_LCDC_HSYNC_CTRL_PULSEW(mode->hsync_end - mode->hsync_start) |
+			MDP4_LCDC_HSYNC_CTRL_PERIOD(mode->htotal));
+	mdp4_write(mdp4_kms, REG_MDP4_LCDC_VSYNC_PERIOD, vsync_period);
+	mdp4_write(mdp4_kms, REG_MDP4_LCDC_VSYNC_LEN, vsync_len);
+	mdp4_write(mdp4_kms, REG_MDP4_LCDC_DISPLAY_HCTRL,
+			MDP4_LCDC_DISPLAY_HCTRL_START(hsync_start_x) |
+			MDP4_LCDC_DISPLAY_HCTRL_END(hsync_end_x));
+	mdp4_write(mdp4_kms, REG_MDP4_LCDC_DISPLAY_VSTART, display_v_start);
+	mdp4_write(mdp4_kms, REG_MDP4_LCDC_DISPLAY_VEND, display_v_end);
+	mdp4_write(mdp4_kms, REG_MDP4_LCDC_BORDER_CLR, 0);
+	mdp4_write(mdp4_kms, REG_MDP4_LCDC_UNDERFLOW_CLR,
+			MDP4_LCDC_UNDERFLOW_CLR_ENABLE_RECOVERY |
+			MDP4_LCDC_UNDERFLOW_CLR_COLOR(0xff));
+	mdp4_write(mdp4_kms, REG_MDP4_LCDC_HSYNC_SKEW, lcdc_hsync_skew);
+	mdp4_write(mdp4_kms, REG_MDP4_LCDC_CTRL_POLARITY, ctrl_pol);
+	mdp4_write(mdp4_kms, REG_MDP4_LCDC_ACTIVE_HCTL,
+			MDP4_LCDC_ACTIVE_HCTL_START(0) |
+			MDP4_LCDC_ACTIVE_HCTL_END(0));
+	mdp4_write(mdp4_kms, REG_MDP4_LCDC_ACTIVE_VSTART, 0);
+	mdp4_write(mdp4_kms, REG_MDP4_LCDC_ACTIVE_VEND, 0);
+}
+
+static void mdp4_lcdc_encoder_prepare(struct drm_encoder *encoder)
+{
+	mdp4_lcdc_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
+}
+
+static void mdp4_lcdc_encoder_commit(struct drm_encoder *encoder)
+{
+	/* TODO: hard-coded for 18bpp: */
+	mdp4_crtc_set_config(encoder->crtc,
+			MDP4_DMA_CONFIG_R_BPC(BPC6) |
+			MDP4_DMA_CONFIG_G_BPC(BPC6) |
+			MDP4_DMA_CONFIG_B_BPC(BPC6) |
+			MDP4_DMA_CONFIG_PACK_ALIGN_MSB |
+			MDP4_DMA_CONFIG_PACK(0x21) |
+			MDP4_DMA_CONFIG_DEFLKR_EN |
+			MDP4_DMA_CONFIG_DITHER_EN);
+	mdp4_crtc_set_intf(encoder->crtc, INTF_LCDC_DTV, 0);
+	mdp4_lcdc_encoder_dpms(encoder, DRM_MODE_DPMS_ON);
+}
+
+static const struct drm_encoder_helper_funcs mdp4_lcdc_encoder_helper_funcs = {
+	.dpms = mdp4_lcdc_encoder_dpms,
+	.mode_fixup = mdp4_lcdc_encoder_mode_fixup,
+	.mode_set = mdp4_lcdc_encoder_mode_set,
+	.prepare = mdp4_lcdc_encoder_prepare,
+	.commit = mdp4_lcdc_encoder_commit,
+};
+
+long mdp4_lcdc_round_pixclk(struct drm_encoder *encoder, unsigned long rate)
+{
+	struct mdp4_lcdc_encoder *mdp4_lcdc_encoder =
+			to_mdp4_lcdc_encoder(encoder);
+	return clk_round_rate(mdp4_lcdc_encoder->lcdc_clk, rate);
+}
+
+/* initialize encoder */
+struct drm_encoder *mdp4_lcdc_encoder_init(struct drm_device *dev,
+		struct drm_panel *panel)
+{
+	struct drm_encoder *encoder = NULL;
+	struct mdp4_lcdc_encoder *mdp4_lcdc_encoder;
+	struct regulator *reg;
+	int ret;
+
+	mdp4_lcdc_encoder = kzalloc(sizeof(*mdp4_lcdc_encoder), GFP_KERNEL);
+	if (!mdp4_lcdc_encoder) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	mdp4_lcdc_encoder->panel = panel;
+
+	encoder = &mdp4_lcdc_encoder->base;
+
+	drm_encoder_init(dev, encoder, &mdp4_lcdc_encoder_funcs,
+			 DRM_MODE_ENCODER_LVDS);
+	drm_encoder_helper_add(encoder, &mdp4_lcdc_encoder_helper_funcs);
+
+	/* TODO: do we need different pll in other cases? */
+	mdp4_lcdc_encoder->lcdc_clk = mpd4_lvds_pll_init(dev);
+	if (IS_ERR(mdp4_lcdc_encoder->lcdc_clk)) {
+		dev_err(dev->dev, "failed to get lvds_clk\n");
+		ret = PTR_ERR(mdp4_lcdc_encoder->lcdc_clk);
+		goto fail;
+	}
+
+	/* TODO: different regulators in other cases? */
+	reg = devm_regulator_get(dev->dev, "lvds-vccs-3p3v");
+	if (IS_ERR(reg)) {
+		ret = PTR_ERR(reg);
+		dev_err(dev->dev, "failed to get lvds-vccs-3p3v: %d\n", ret);
+		goto fail;
+	}
+	mdp4_lcdc_encoder->regs[0] = reg;
+
+	reg = devm_regulator_get(dev->dev, "lvds-pll-vdda");
+	if (IS_ERR(reg)) {
+		ret = PTR_ERR(reg);
+		dev_err(dev->dev, "failed to get lvds-pll-vdda: %d\n", ret);
+		goto fail;
+	}
+	mdp4_lcdc_encoder->regs[1] = reg;
+
+	reg = devm_regulator_get(dev->dev, "lvds-vdda");
+	if (IS_ERR(reg)) {
+		ret = PTR_ERR(reg);
+		dev_err(dev->dev, "failed to get lvds-vdda: %d\n", ret);
+		goto fail;
+	}
+	mdp4_lcdc_encoder->regs[2] = reg;
+
+	bs_init(mdp4_lcdc_encoder);
+
+	return encoder;
+
+fail:
+	if (encoder)
+		mdp4_lcdc_encoder_destroy(encoder);
+
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lvds_connector.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lvds_connector.c
new file mode 100644
index 000000000000..310034688c15
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lvds_connector.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2014 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ * Author: Vinay Simha <vinaysimha@inforcecomputing.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/gpio.h>
+
+#include "mdp4_kms.h"
+
+struct mdp4_lvds_connector {
+	struct drm_connector base;
+	struct drm_encoder *encoder;
+	struct drm_panel *panel;
+};
+#define to_mdp4_lvds_connector(x) container_of(x, struct mdp4_lvds_connector, base)
+
+static enum drm_connector_status mdp4_lvds_connector_detect(
+		struct drm_connector *connector, bool force)
+{
+	struct mdp4_lvds_connector *mdp4_lvds_connector =
+			to_mdp4_lvds_connector(connector);
+
+	return mdp4_lvds_connector->panel ?
+			connector_status_connected :
+			connector_status_disconnected;
+}
+
+static void mdp4_lvds_connector_destroy(struct drm_connector *connector)
+{
+	struct mdp4_lvds_connector *mdp4_lvds_connector =
+			to_mdp4_lvds_connector(connector);
+	struct drm_panel *panel = mdp4_lvds_connector->panel;
+
+	if (panel)
+		drm_panel_detach(panel);
+
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+
+	kfree(mdp4_lvds_connector);
+}
+
+static int mdp4_lvds_connector_get_modes(struct drm_connector *connector)
+{
+	struct mdp4_lvds_connector *mdp4_lvds_connector =
+			to_mdp4_lvds_connector(connector);
+	struct drm_panel *panel = mdp4_lvds_connector->panel;
+	int ret = 0;
+
+	if (panel)
+		ret = panel->funcs->get_modes(panel);
+
+	return ret;
+}
+
+static int mdp4_lvds_connector_mode_valid(struct drm_connector *connector,
+				 struct drm_display_mode *mode)
+{
+	struct mdp4_lvds_connector *mdp4_lvds_connector =
+			to_mdp4_lvds_connector(connector);
+	struct drm_encoder *encoder = mdp4_lvds_connector->encoder;
+	long actual, requested;
+
+	requested = 1000 * mode->clock;
+	actual = mdp4_lcdc_round_pixclk(encoder, requested);
+
+	DBG("requested=%ld, actual=%ld", requested, actual);
+
+	if (actual != requested)
+		return MODE_CLOCK_RANGE;
+
+	return MODE_OK;
+}
+
+static struct drm_encoder *
+mdp4_lvds_connector_best_encoder(struct drm_connector *connector)
+{
+	struct mdp4_lvds_connector *mdp4_lvds_connector =
+			to_mdp4_lvds_connector(connector);
+	return mdp4_lvds_connector->encoder;
+}
+
+static const struct drm_connector_funcs mdp4_lvds_connector_funcs = {
+	.dpms = drm_helper_connector_dpms,
+	.detect = mdp4_lvds_connector_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = mdp4_lvds_connector_destroy,
+};
+
+static const struct drm_connector_helper_funcs mdp4_lvds_connector_helper_funcs = {
+	.get_modes = mdp4_lvds_connector_get_modes,
+	.mode_valid = mdp4_lvds_connector_mode_valid,
+	.best_encoder = mdp4_lvds_connector_best_encoder,
+};
+
+/* initialize connector */
+struct drm_connector *mdp4_lvds_connector_init(struct drm_device *dev,
+		struct drm_panel *panel, struct drm_encoder *encoder)
+{
+	struct drm_connector *connector = NULL;
+	struct mdp4_lvds_connector *mdp4_lvds_connector;
+	int ret;
+
+	mdp4_lvds_connector = kzalloc(sizeof(*mdp4_lvds_connector), GFP_KERNEL);
+	if (!mdp4_lvds_connector) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	mdp4_lvds_connector->encoder = encoder;
+	mdp4_lvds_connector->panel = panel;
+
+	connector = &mdp4_lvds_connector->base;
+
+	drm_connector_init(dev, connector, &mdp4_lvds_connector_funcs,
+			DRM_MODE_CONNECTOR_LVDS);
+	drm_connector_helper_add(connector, &mdp4_lvds_connector_helper_funcs);
+
+	connector->polled = 0;
+
+	connector->interlace_allowed = 0;
+	connector->doublescan_allowed = 0;
+
+	drm_connector_register(connector);
+
+	drm_mode_connector_attach_encoder(connector, encoder);
+
+	if (panel)
+		drm_panel_attach(panel, connector);
+
+	return connector;
+
+fail:
+	if (connector)
+		mdp4_lvds_connector_destroy(connector);
+
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lvds_pll.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lvds_pll.c
new file mode 100644
index 000000000000..ce4245971673
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_lvds_pll.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2014 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+
+#include "mdp4_kms.h"
+
+struct mdp4_lvds_pll {
+	struct clk_hw pll_hw;
+	struct drm_device *dev;
+	unsigned long pixclk;
+};
+#define to_mdp4_lvds_pll(x) container_of(x, struct mdp4_lvds_pll, pll_hw)
+
+static struct mdp4_kms *get_kms(struct mdp4_lvds_pll *lvds_pll)
+{
+	struct msm_drm_private *priv = lvds_pll->dev->dev_private;
+	return to_mdp4_kms(to_mdp_kms(priv->kms));
+}
+
+struct pll_rate {
+	unsigned long rate;
+	struct {
+		uint32_t val;
+		uint32_t reg;
+	} conf[32];
+};
+
+/* NOTE: keep sorted highest freq to lowest: */
+static const struct pll_rate freqtbl[] = {
+	{ 72000000, {
+		{ 0x8f, REG_MDP4_LVDS_PHY_PLL_CTRL_1 },
+		{ 0x30, REG_MDP4_LVDS_PHY_PLL_CTRL_2 },
+		{ 0xc6, REG_MDP4_LVDS_PHY_PLL_CTRL_3 },
+		{ 0x10, REG_MDP4_LVDS_PHY_PLL_CTRL_5 },
+		{ 0x07, REG_MDP4_LVDS_PHY_PLL_CTRL_6 },
+		{ 0x62, REG_MDP4_LVDS_PHY_PLL_CTRL_7 },
+		{ 0x41, REG_MDP4_LVDS_PHY_PLL_CTRL_8 },
+		{ 0x0d, REG_MDP4_LVDS_PHY_PLL_CTRL_9 },
+		{ 0, 0 } }
+	},
+};
+
+static const struct pll_rate *find_rate(unsigned long rate)
+{
+	int i;
+	for (i = 1; i < ARRAY_SIZE(freqtbl); i++)
+		if (rate > freqtbl[i].rate)
+			return &freqtbl[i-1];
+	return &freqtbl[i-1];
+}
+
+static int mpd4_lvds_pll_enable(struct clk_hw *hw)
+{
+	struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw);
+	struct mdp4_kms *mdp4_kms = get_kms(lvds_pll);
+	const struct pll_rate *pll_rate = find_rate(lvds_pll->pixclk);
+	int i;
+
+	DBG("pixclk=%lu (%lu)", lvds_pll->pixclk, pll_rate->rate);
+
+	if (WARN_ON(!pll_rate))
+		return -EINVAL;
+
+	mdp4_write(mdp4_kms, REG_MDP4_LCDC_LVDS_PHY_RESET, 0x33);
+
+	for (i = 0; pll_rate->conf[i].reg; i++)
+		mdp4_write(mdp4_kms, pll_rate->conf[i].reg, pll_rate->conf[i].val);
+
+	mdp4_write(mdp4_kms, REG_MDP4_LVDS_PHY_PLL_CTRL_0, 0x01);
+
+	/* Wait until LVDS PLL is locked and ready */
+	while (!mdp4_read(mdp4_kms, REG_MDP4_LVDS_PHY_PLL_LOCKED))
+		cpu_relax();
+
+	return 0;
+}
+
+static void mpd4_lvds_pll_disable(struct clk_hw *hw)
+{
+	struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw);
+	struct mdp4_kms *mdp4_kms = get_kms(lvds_pll);
+
+	DBG("");
+
+	mdp4_write(mdp4_kms, REG_MDP4_LVDS_PHY_CFG0, 0x0);
+	mdp4_write(mdp4_kms, REG_MDP4_LVDS_PHY_PLL_CTRL_0, 0x0);
+}
+
+static unsigned long mpd4_lvds_pll_recalc_rate(struct clk_hw *hw,
+				unsigned long parent_rate)
+{
+	struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw);
+	return lvds_pll->pixclk;
+}
+
+static long mpd4_lvds_pll_round_rate(struct clk_hw *hw, unsigned long rate,
+		unsigned long *parent_rate)
+{
+	const struct pll_rate *pll_rate = find_rate(rate);
+	return pll_rate->rate;
+}
+
+static int mpd4_lvds_pll_set_rate(struct clk_hw *hw, unsigned long rate,
+		unsigned long parent_rate)
+{
+	struct mdp4_lvds_pll *lvds_pll = to_mdp4_lvds_pll(hw);
+	lvds_pll->pixclk = rate;
+	return 0;
+}
+
+
+static const struct clk_ops mpd4_lvds_pll_ops = {
+	.enable = mpd4_lvds_pll_enable,
+	.disable = mpd4_lvds_pll_disable,
+	.recalc_rate = mpd4_lvds_pll_recalc_rate,
+	.round_rate = mpd4_lvds_pll_round_rate,
+	.set_rate = mpd4_lvds_pll_set_rate,
+};
+
+static const char *mpd4_lvds_pll_parents[] = {
+	"pxo",
+};
+
+static struct clk_init_data pll_init = {
+	.name = "mpd4_lvds_pll",
+	.ops = &mpd4_lvds_pll_ops,
+	.parent_names = mpd4_lvds_pll_parents,
+	.num_parents = ARRAY_SIZE(mpd4_lvds_pll_parents),
+};
+
+struct clk *mpd4_lvds_pll_init(struct drm_device *dev)
+{
+	struct mdp4_lvds_pll *lvds_pll;
+	struct clk *clk;
+	int ret;
+
+	lvds_pll = devm_kzalloc(dev->dev, sizeof(*lvds_pll), GFP_KERNEL);
+	if (!lvds_pll) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	lvds_pll->dev = dev;
+
+	lvds_pll->pll_hw.init = &pll_init;
+	clk = devm_clk_register(dev->dev, &lvds_pll->pll_hw);
+	if (IS_ERR(clk)) {
+		ret = PTR_ERR(clk);
+		goto fail;
+	}
+
+	return clk;
+
+fail:
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index fcf95680413d..b67ef5985125 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -280,7 +280,7 @@ static int msm_load(struct drm_device *dev, unsigned long flags)
 	dev->mode_config.max_height = 2048;
 	dev->mode_config.funcs = &mode_config_funcs;
 
-	ret = drm_vblank_init(dev, 1);
+	ret = drm_vblank_init(dev, priv->num_crtcs);
 	if (ret < 0) {
 		dev_err(dev->dev, "failed to initialize vblank\n");
 		goto fail;
@@ -315,39 +315,12 @@ static void load_gpu(struct drm_device *dev)
 {
 	static DEFINE_MUTEX(init_lock);
 	struct msm_drm_private *priv = dev->dev_private;
-	struct msm_gpu *gpu;
 
 	mutex_lock(&init_lock);
 
-	if (priv->gpu)
-		goto out;
-
-	gpu = a3xx_gpu_init(dev);
-	if (IS_ERR(gpu)) {
-		dev_warn(dev->dev, "failed to load a3xx gpu\n");
-		gpu = NULL;
-		/* not fatal */
-	}
-
-	if (gpu) {
-		int ret;
-		mutex_lock(&dev->struct_mutex);
-		gpu->funcs->pm_resume(gpu);
-		mutex_unlock(&dev->struct_mutex);
-		ret = gpu->funcs->hw_init(gpu);
-		if (ret) {
-			dev_err(dev->dev, "gpu hw init failed: %d\n", ret);
-			gpu->funcs->destroy(gpu);
-			gpu = NULL;
-		} else {
-			/* give inactive pm a chance to kick in: */
-			msm_gpu_retire(gpu);
-		}
-	}
-
-	priv->gpu = gpu;
+	if (!priv->gpu)
+		priv->gpu = adreno_load_gpu(dev);
 
-out:
 	mutex_unlock(&init_lock);
 }
 
@@ -836,6 +809,7 @@ static struct drm_driver msm_driver = {
 	.open               = msm_open,
 	.preclose           = msm_preclose,
 	.lastclose          = msm_lastclose,
+	.set_busid          = drm_platform_set_busid,
 	.irq_handler        = msm_irq,
 	.irq_preinstall     = msm_irq_preinstall,
 	.irq_postinstall    = msm_irq_postinstall,
@@ -1025,7 +999,7 @@ static int __init msm_drm_register(void)
 {
 	DBG("init");
 	hdmi_register();
-	a3xx_register();
+	adreno_register();
 	return platform_driver_register(&msm_platform_driver);
 }
 
@@ -1034,7 +1008,7 @@ static void __exit msm_drm_unregister(void)
 	DBG("fini");
 	platform_driver_unregister(&msm_platform_driver);
 	hdmi_unregister();
-	a3xx_unregister();
+	adreno_unregister();
 }
 
 module_init(msm_drm_register);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index 9b579b792840..fd1e4b4a6d40 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -166,8 +166,8 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 		const char *name, const char *ioname, const char *irqname, int ringsz);
 void msm_gpu_cleanup(struct msm_gpu *gpu);
 
-struct msm_gpu *a3xx_gpu_init(struct drm_device *dev);
-void __init a3xx_register(void);
-void __exit a3xx_unregister(void);
+struct msm_gpu *adreno_load_gpu(struct drm_device *dev);
+void __init adreno_register(void);
+void __exit adreno_unregister(void);
 
 #endif /* __MSM_GPU_H__ */
diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
index f5d7f7ce4bc6..12c24c8abf7f 100644
--- a/drivers/gpu/drm/nouveau/Makefile
+++ b/drivers/gpu/drm/nouveau/Makefile
@@ -38,6 +38,7 @@ nouveau-y += core/subdev/bios/dcb.o
 nouveau-y += core/subdev/bios/disp.o
 nouveau-y += core/subdev/bios/dp.o
 nouveau-y += core/subdev/bios/extdev.o
+nouveau-y += core/subdev/bios/fan.o
 nouveau-y += core/subdev/bios/gpio.o
 nouveau-y += core/subdev/bios/i2c.o
 nouveau-y += core/subdev/bios/init.o
@@ -51,6 +52,8 @@ nouveau-y += core/subdev/bios/therm.o
 nouveau-y += core/subdev/bios/vmap.o
 nouveau-y += core/subdev/bios/volt.o
 nouveau-y += core/subdev/bios/xpio.o
+nouveau-y += core/subdev/bios/M0205.o
+nouveau-y += core/subdev/bios/M0209.o
 nouveau-y += core/subdev/bios/P0260.o
 nouveau-y += core/subdev/bus/hwsq.o
 nouveau-y += core/subdev/bus/nv04.o
@@ -124,12 +127,17 @@ nouveau-y += core/subdev/fb/ramnvc0.o
 nouveau-y += core/subdev/fb/ramnve0.o
 nouveau-y += core/subdev/fb/ramgk20a.o
 nouveau-y += core/subdev/fb/ramgm107.o
+nouveau-y += core/subdev/fb/sddr2.o
 nouveau-y += core/subdev/fb/sddr3.o
 nouveau-y += core/subdev/fb/gddr5.o
+nouveau-y += core/subdev/fuse/base.o
+nouveau-y += core/subdev/fuse/g80.o
+nouveau-y += core/subdev/fuse/gf100.o
+nouveau-y += core/subdev/fuse/gm107.o
 nouveau-y += core/subdev/gpio/base.o
 nouveau-y += core/subdev/gpio/nv10.o
 nouveau-y += core/subdev/gpio/nv50.o
-nouveau-y += core/subdev/gpio/nv92.o
+nouveau-y += core/subdev/gpio/nv94.o
 nouveau-y += core/subdev/gpio/nvd0.o
 nouveau-y += core/subdev/gpio/nve0.o
 nouveau-y += core/subdev/i2c/base.o
@@ -190,6 +198,7 @@ nouveau-y += core/subdev/therm/nv50.o
 nouveau-y += core/subdev/therm/nv84.o
 nouveau-y += core/subdev/therm/nva3.o
 nouveau-y += core/subdev/therm/nvd0.o
+nouveau-y += core/subdev/therm/gm107.o
 nouveau-y += core/subdev/timer/base.o
 nouveau-y += core/subdev/timer/nv04.o
 nouveau-y += core/subdev/timer/gk20a.o
@@ -252,6 +261,7 @@ nouveau-y += core/engine/disp/hdanvd0.o
 nouveau-y += core/engine/disp/hdminv84.o
 nouveau-y += core/engine/disp/hdminva3.o
 nouveau-y += core/engine/disp/hdminvd0.o
+nouveau-y += core/engine/disp/hdminve0.o
 nouveau-y += core/engine/disp/piornv50.o
 nouveau-y += core/engine/disp/sornv50.o
 nouveau-y += core/engine/disp/sornv94.o
diff --git a/drivers/gpu/drm/nouveau/core/core/client.c b/drivers/gpu/drm/nouveau/core/core/client.c
index 68bf06768123..e962433294c3 100644
--- a/drivers/gpu/drm/nouveau/core/core/client.c
+++ b/drivers/gpu/drm/nouveau/core/core/client.c
@@ -91,9 +91,10 @@ nvkm_client_notify_del(struct nouveau_client *client, int index)
 }
 
 int
-nvkm_client_notify_new(struct nouveau_client *client,
+nvkm_client_notify_new(struct nouveau_object *object,
 		       struct nvkm_event *event, void *data, u32 size)
 {
+	struct nouveau_client *client = nouveau_client(object);
 	struct nvkm_client_notify *notify;
 	union {
 		struct nvif_notify_req_v0 v0;
@@ -127,8 +128,8 @@ nvkm_client_notify_new(struct nouveau_client *client,
 	}
 
 	if (ret == 0) {
-		ret = nvkm_notify_init(event, nvkm_client_notify, false,
-				       data, size, reply, &notify->n);
+		ret = nvkm_notify_init(object, event, nvkm_client_notify,
+				       false, data, size, reply, &notify->n);
 		if (ret == 0) {
 			client->notify[index] = notify;
 			notify->client = client;
diff --git a/drivers/gpu/drm/nouveau/core/core/event.c b/drivers/gpu/drm/nouveau/core/core/event.c
index 0540a48c5678..ff2b434b3db4 100644
--- a/drivers/gpu/drm/nouveau/core/core/event.c
+++ b/drivers/gpu/drm/nouveau/core/core/event.c
@@ -20,7 +20,7 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#include <core/os.h>
+#include <core/object.h>
 #include <core/event.h>
 
 void
diff --git a/drivers/gpu/drm/nouveau/core/core/gpuobj.c b/drivers/gpu/drm/nouveau/core/core/gpuobj.c
index 560b2214cf1c..daee87702502 100644
--- a/drivers/gpu/drm/nouveau/core/core/gpuobj.c
+++ b/drivers/gpu/drm/nouveau/core/core/gpuobj.c
@@ -115,7 +115,7 @@ nouveau_gpuobj_create_(struct nouveau_object *parent,
 	gpuobj->size = size;
 
 	if (heap) {
-		ret = nouveau_mm_head(heap, 1, size, size,
+		ret = nouveau_mm_head(heap, 0, 1, size, size,
 				      max(align, (u32)1), &gpuobj->node);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/nouveau/core/core/ioctl.c b/drivers/gpu/drm/nouveau/core/core/ioctl.c
index f7e19bfb489c..692aa92dd850 100644
--- a/drivers/gpu/drm/nouveau/core/core/ioctl.c
+++ b/drivers/gpu/drm/nouveau/core/core/ioctl.c
@@ -349,7 +349,6 @@ nvkm_ioctl_unmap(struct nouveau_handle *handle, void *data, u32 size)
 static int
 nvkm_ioctl_ntfy_new(struct nouveau_handle *handle, void *data, u32 size)
 {
-	struct nouveau_client *client = nouveau_client(handle->object);
 	struct nouveau_object *object = handle->object;
 	struct nouveau_ofuncs *ofuncs = object->oclass->ofuncs;
 	union {
@@ -365,7 +364,7 @@ nvkm_ioctl_ntfy_new(struct nouveau_handle *handle, void *data, u32 size)
 		if (ret = -ENODEV, ofuncs->ntfy)
 			ret = ofuncs->ntfy(object, args->v0.event, &event);
 		if (ret == 0) {
-			ret = nvkm_client_notify_new(client, event, data, size);
+			ret = nvkm_client_notify_new(object, event, data, size);
 			if (ret >= 0) {
 				args->v0.index = ret;
 				ret = 0;
diff --git a/drivers/gpu/drm/nouveau/core/core/mm.c b/drivers/gpu/drm/nouveau/core/core/mm.c
index 7a4e0891c5f8..b4f5db66d5b5 100644
--- a/drivers/gpu/drm/nouveau/core/core/mm.c
+++ b/drivers/gpu/drm/nouveau/core/core/mm.c
@@ -28,6 +28,24 @@
 #define node(root, dir) ((root)->nl_entry.dir == &mm->nodes) ? NULL : \
 	list_entry((root)->nl_entry.dir, struct nouveau_mm_node, nl_entry)
 
+static void
+nouveau_mm_dump(struct nouveau_mm *mm, const char *header)
+{
+	struct nouveau_mm_node *node;
+
+	printk(KERN_ERR "nouveau: %s\n", header);
+	printk(KERN_ERR "nouveau: node list:\n");
+	list_for_each_entry(node, &mm->nodes, nl_entry) {
+		printk(KERN_ERR "nouveau: \t%08x %08x %d\n",
+		       node->offset, node->length, node->type);
+	}
+	printk(KERN_ERR "nouveau: free list:\n");
+	list_for_each_entry(node, &mm->free, fl_entry) {
+		printk(KERN_ERR "nouveau: \t%08x %08x %d\n",
+		       node->offset, node->length, node->type);
+	}
+}
+
 void
 nouveau_mm_free(struct nouveau_mm *mm, struct nouveau_mm_node **pthis)
 {
@@ -37,29 +55,29 @@ nouveau_mm_free(struct nouveau_mm *mm, struct nouveau_mm_node **pthis)
 		struct nouveau_mm_node *prev = node(this, prev);
 		struct nouveau_mm_node *next = node(this, next);
 
-		if (prev && prev->type == 0) {
+		if (prev && prev->type == NVKM_MM_TYPE_NONE) {
 			prev->length += this->length;
 			list_del(&this->nl_entry);
 			kfree(this); this = prev;
 		}
 
-		if (next && next->type == 0) {
+		if (next && next->type == NVKM_MM_TYPE_NONE) {
 			next->offset  = this->offset;
 			next->length += this->length;
-			if (this->type == 0)
+			if (this->type == NVKM_MM_TYPE_NONE)
 				list_del(&this->fl_entry);
 			list_del(&this->nl_entry);
 			kfree(this); this = NULL;
 		}
 
-		if (this && this->type != 0) {
+		if (this && this->type != NVKM_MM_TYPE_NONE) {
 			list_for_each_entry(prev, &mm->free, fl_entry) {
 				if (this->offset < prev->offset)
 					break;
 			}
 
 			list_add_tail(&this->fl_entry, &prev->fl_entry);
-			this->type = 0;
+			this->type = NVKM_MM_TYPE_NONE;
 		}
 	}
 
@@ -80,27 +98,32 @@ region_head(struct nouveau_mm *mm, struct nouveau_mm_node *a, u32 size)
 
 	b->offset = a->offset;
 	b->length = size;
+	b->heap   = a->heap;
 	b->type   = a->type;
 	a->offset += size;
 	a->length -= size;
 	list_add_tail(&b->nl_entry, &a->nl_entry);
-	if (b->type == 0)
+	if (b->type == NVKM_MM_TYPE_NONE)
 		list_add_tail(&b->fl_entry, &a->fl_entry);
 	return b;
 }
 
 int
-nouveau_mm_head(struct nouveau_mm *mm, u8 type, u32 size_max, u32 size_min,
-		u32 align, struct nouveau_mm_node **pnode)
+nouveau_mm_head(struct nouveau_mm *mm, u8 heap, u8 type, u32 size_max,
+		u32 size_min, u32 align, struct nouveau_mm_node **pnode)
 {
 	struct nouveau_mm_node *prev, *this, *next;
 	u32 mask = align - 1;
 	u32 splitoff;
 	u32 s, e;
 
-	BUG_ON(!type);
+	BUG_ON(type == NVKM_MM_TYPE_NONE || type == NVKM_MM_TYPE_HOLE);
 
 	list_for_each_entry(this, &mm->free, fl_entry) {
+		if (unlikely(heap != NVKM_MM_HEAP_ANY)) {
+			if (this->heap != heap)
+				continue;
+		}
 		e = this->offset + this->length;
 		s = this->offset;
 
@@ -149,27 +172,32 @@ region_tail(struct nouveau_mm *mm, struct nouveau_mm_node *a, u32 size)
 	a->length -= size;
 	b->offset  = a->offset + a->length;
 	b->length  = size;
+	b->heap    = a->heap;
 	b->type    = a->type;
 
 	list_add(&b->nl_entry, &a->nl_entry);
-	if (b->type == 0)
+	if (b->type == NVKM_MM_TYPE_NONE)
 		list_add(&b->fl_entry, &a->fl_entry);
 	return b;
 }
 
 int
-nouveau_mm_tail(struct nouveau_mm *mm, u8 type, u32 size_max, u32 size_min,
-		u32 align, struct nouveau_mm_node **pnode)
+nouveau_mm_tail(struct nouveau_mm *mm, u8 heap, u8 type, u32 size_max,
+		u32 size_min, u32 align, struct nouveau_mm_node **pnode)
 {
 	struct nouveau_mm_node *prev, *this, *next;
 	u32 mask = align - 1;
 
-	BUG_ON(!type);
+	BUG_ON(type == NVKM_MM_TYPE_NONE || type == NVKM_MM_TYPE_HOLE);
 
 	list_for_each_entry_reverse(this, &mm->free, fl_entry) {
 		u32 e = this->offset + this->length;
 		u32 s = this->offset;
 		u32 c = 0, a;
+		if (unlikely(heap != NVKM_MM_HEAP_ANY)) {
+			if (this->heap != heap)
+				continue;
+		}
 
 		prev = node(this, prev);
 		if (prev && prev->type != type)
@@ -209,9 +237,23 @@ nouveau_mm_tail(struct nouveau_mm *mm, u8 type, u32 size_max, u32 size_min,
 int
 nouveau_mm_init(struct nouveau_mm *mm, u32 offset, u32 length, u32 block)
 {
-	struct nouveau_mm_node *node;
+	struct nouveau_mm_node *node, *prev;
+	u32 next;
 
-	if (block) {
+	if (nouveau_mm_initialised(mm)) {
+		prev = list_last_entry(&mm->nodes, typeof(*node), nl_entry);
+		next = prev->offset + prev->length;
+		if (next != offset) {
+			BUG_ON(next > offset);
+			if (!(node = kzalloc(sizeof(*node), GFP_KERNEL)))
+				return -ENOMEM;
+			node->type   = NVKM_MM_TYPE_HOLE;
+			node->offset = next;
+			node->length = offset - next;
+			list_add_tail(&node->nl_entry, &mm->nodes);
+		}
+		BUG_ON(block != mm->block_size);
+	} else {
 		INIT_LIST_HEAD(&mm->nodes);
 		INIT_LIST_HEAD(&mm->free);
 		mm->block_size = block;
@@ -230,25 +272,32 @@ nouveau_mm_init(struct nouveau_mm *mm, u32 offset, u32 length, u32 block)
 
 	list_add_tail(&node->nl_entry, &mm->nodes);
 	list_add_tail(&node->fl_entry, &mm->free);
-	mm->heap_nodes++;
+	node->heap = ++mm->heap_nodes;
 	return 0;
 }
 
 int
 nouveau_mm_fini(struct nouveau_mm *mm)
 {
-	if (nouveau_mm_initialised(mm)) {
-		struct nouveau_mm_node *node, *heap =
-			list_first_entry(&mm->nodes, typeof(*heap), nl_entry);
-		int nodes = 0;
+	struct nouveau_mm_node *node, *temp;
+	int nodes = 0;
 
-		list_for_each_entry(node, &mm->nodes, nl_entry) {
-			if (WARN_ON(nodes++ == mm->heap_nodes))
+	if (!nouveau_mm_initialised(mm))
+		return 0;
+
+	list_for_each_entry(node, &mm->nodes, nl_entry) {
+		if (node->type != NVKM_MM_TYPE_HOLE) {
+			if (++nodes > mm->heap_nodes) {
+				nouveau_mm_dump(mm, "mm not clean!");
 				return -EBUSY;
+			}
 		}
-
-		kfree(heap);
 	}
 
+	list_for_each_entry_safe(node, temp, &mm->nodes, nl_entry) {
+		list_del(&node->nl_entry);
+		kfree(node);
+	}
+	mm->heap_nodes = 0;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/core/core/notify.c b/drivers/gpu/drm/nouveau/core/core/notify.c
index 76adb81bdea2..d1bcde55e9d7 100644
--- a/drivers/gpu/drm/nouveau/core/core/notify.c
+++ b/drivers/gpu/drm/nouveau/core/core/notify.c
@@ -134,14 +134,15 @@ nvkm_notify_fini(struct nvkm_notify *notify)
 }
 
 int
-nvkm_notify_init(struct nvkm_event *event, int (*func)(struct nvkm_notify *),
-		 bool work, void *data, u32 size, u32 reply,
+nvkm_notify_init(struct nouveau_object *object, struct nvkm_event *event,
+		 int (*func)(struct nvkm_notify *), bool work,
+		 void *data, u32 size, u32 reply,
 		 struct nvkm_notify *notify)
 {
 	unsigned long flags;
 	int ret = -ENODEV;
 	if ((notify->event = event), event->refs) {
-		ret = event->func->ctor(data, size, notify);
+		ret = event->func->ctor(object, data, size, notify);
 		if (ret == 0 && (ret = -EINVAL, notify->size == reply)) {
 			notify->flags = 0;
 			notify->block = 1;
diff --git a/drivers/gpu/drm/nouveau/core/engine/device/base.c b/drivers/gpu/drm/nouveau/core/engine/device/base.c
index 8928f7981d4a..0ef5a5713182 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/base.c
@@ -505,7 +505,8 @@ nouveau_device_sclass[] = {
 };
 
 static int
-nouveau_device_event_ctor(void *data, u32 size, struct nvkm_notify *notify)
+nouveau_device_event_ctor(struct nouveau_object *object, void *data, u32 size,
+			  struct nvkm_notify *notify)
 {
 	if (!WARN_ON(size != 0)) {
 		notify->size  = 0;
diff --git a/drivers/gpu/drm/nouveau/core/engine/device/gm100.c b/drivers/gpu/drm/nouveau/core/engine/device/gm100.c
index 377ec0b8851e..6295668e29a5 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/gm100.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/gm100.c
@@ -26,6 +26,7 @@
 #include <subdev/bus.h>
 #include <subdev/gpio.h>
 #include <subdev/i2c.h>
+#include <subdev/fuse.h>
 #include <subdev/clock.h>
 #include <subdev/therm.h>
 #include <subdev/mxm.h>
@@ -62,10 +63,9 @@ gm100_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
 		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nve0_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nvd0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gm107_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nve0_clock_oclass;
-#if 0
-		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
-#endif
+		device->oclass[NVDEV_SUBDEV_THERM  ] = &gm107_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
 		device->oclass[NVDEV_SUBDEV_DEVINIT] =  gm107_devinit_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  gk20a_mc_oclass;
@@ -77,8 +77,9 @@ gm100_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_INSTMEM] =  nv50_instmem_oclass;
 		device->oclass[NVDEV_SUBDEV_VM     ] = &nvc0_vmmgr_oclass;
 		device->oclass[NVDEV_SUBDEV_BAR    ] = &nvc0_bar_oclass;
-#if 0
 		device->oclass[NVDEV_SUBDEV_PWR    ] =  nv108_pwr_oclass;
+
+#if 0
 		device->oclass[NVDEV_SUBDEV_VOLT   ] = &nv40_volt_oclass;
 #endif
 		device->oclass[NVDEV_ENGINE_DMAOBJ ] =  nvd0_dmaeng_oclass;
diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nv50.c b/drivers/gpu/drm/nouveau/core/engine/device/nv50.c
index 932f84fae459..96f568d1321b 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/nv50.c
@@ -26,6 +26,7 @@
 #include <subdev/bus.h>
 #include <subdev/gpio.h>
 #include <subdev/i2c.h>
+#include <subdev/fuse.h>
 #include <subdev/clock.h>
 #include <subdev/therm.h>
 #include <subdev/mxm.h>
@@ -62,6 +63,7 @@ nv50_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
 		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv50_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv50_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] =  &g80_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nv50_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv50_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -87,6 +89,7 @@ nv50_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
 		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv50_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv50_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] =  &g80_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nv84_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -115,6 +118,7 @@ nv50_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
 		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv50_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv50_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] =  &g80_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nv84_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -141,8 +145,9 @@ nv50_identify(struct nouveau_device *device)
 	case 0x92:
 		device->cname = "G92";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv50_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv50_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] =  &g80_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nv84_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -169,8 +174,9 @@ nv50_identify(struct nouveau_device *device)
 	case 0x94:
 		device->cname = "G94";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] =  &g80_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nv84_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -197,8 +203,9 @@ nv50_identify(struct nouveau_device *device)
 	case 0x96:
 		device->cname = "G96";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] =  &g80_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nv84_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -225,8 +232,9 @@ nv50_identify(struct nouveau_device *device)
 	case 0x98:
 		device->cname = "G98";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] =  &g80_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nv84_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -253,8 +261,9 @@ nv50_identify(struct nouveau_device *device)
 	case 0xa0:
 		device->cname = "G200";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv50_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] =  &g80_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nv84_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -281,8 +290,9 @@ nv50_identify(struct nouveau_device *device)
 	case 0xaa:
 		device->cname = "MCP77/MCP78";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] =  &g80_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nvaa_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -309,8 +319,9 @@ nv50_identify(struct nouveau_device *device)
 	case 0xac:
 		device->cname = "MCP79/MCP7A";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] =  &g80_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] =  nvaa_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nv84_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -337,8 +348,9 @@ nv50_identify(struct nouveau_device *device)
 	case 0xa3:
 		device->cname = "GT215";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] =  &g80_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nva3_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -367,8 +379,9 @@ nv50_identify(struct nouveau_device *device)
 	case 0xa5:
 		device->cname = "GT216";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] =  &g80_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nva3_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -396,8 +409,9 @@ nv50_identify(struct nouveau_device *device)
 	case 0xa8:
 		device->cname = "GT218";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] =  &g80_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nva3_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -425,8 +439,9 @@ nv50_identify(struct nouveau_device *device)
 	case 0xaf:
 		device->cname = "MCP89";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] =  &g80_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nva3_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/device/nvc0.c
index b4a2917ce555..cd05677ad4b7 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/nvc0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/nvc0.c
@@ -26,6 +26,7 @@
 #include <subdev/bus.h>
 #include <subdev/gpio.h>
 #include <subdev/i2c.h>
+#include <subdev/fuse.h>
 #include <subdev/clock.h>
 #include <subdev/therm.h>
 #include <subdev/mxm.h>
@@ -60,8 +61,9 @@ nvc0_identify(struct nouveau_device *device)
 	case 0xc0:
 		device->cname = "GF100";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gf100_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -92,8 +94,9 @@ nvc0_identify(struct nouveau_device *device)
 	case 0xc4:
 		device->cname = "GF104";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gf100_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -124,8 +127,9 @@ nvc0_identify(struct nouveau_device *device)
 	case 0xc3:
 		device->cname = "GF106";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gf100_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -155,8 +159,9 @@ nvc0_identify(struct nouveau_device *device)
 	case 0xce:
 		device->cname = "GF114";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gf100_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -187,8 +192,9 @@ nvc0_identify(struct nouveau_device *device)
 	case 0xcf:
 		device->cname = "GF116";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gf100_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -219,8 +225,9 @@ nvc0_identify(struct nouveau_device *device)
 	case 0xc1:
 		device->cname = "GF108";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gf100_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -250,8 +257,9 @@ nvc0_identify(struct nouveau_device *device)
 	case 0xc8:
 		device->cname = "GF110";
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
-		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv92_gpio_oclass;
+		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nv94_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nv94_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gf100_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nva3_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -284,6 +292,7 @@ nvc0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
 		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nvd0_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nvd0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gf100_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -315,6 +324,7 @@ nvc0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
 		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nvd0_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  gf117_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gf100_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nvc0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
diff --git a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c
index cdf9147f32a1..b1b2e484ecfa 100644
--- a/drivers/gpu/drm/nouveau/core/engine/device/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/device/nve0.c
@@ -26,6 +26,7 @@
 #include <subdev/bus.h>
 #include <subdev/gpio.h>
 #include <subdev/i2c.h>
+#include <subdev/fuse.h>
 #include <subdev/clock.h>
 #include <subdev/therm.h>
 #include <subdev/mxm.h>
@@ -62,6 +63,7 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
 		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nve0_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nve0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gf100_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nve0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -95,6 +97,7 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
 		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nve0_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nve0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gf100_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nve0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -128,6 +131,7 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
 		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nve0_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nve0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gf100_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nve0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -161,6 +165,7 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &gk20a_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_MC     ] =  gk20a_mc_oclass;
 		device->oclass[NVDEV_SUBDEV_BUS    ] =  nvc0_bus_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gf100_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_TIMER  ] = &gk20a_timer_oclass;
 		device->oclass[NVDEV_SUBDEV_FB     ] =  gk20a_fb_oclass;
 		device->oclass[NVDEV_SUBDEV_LTC    ] =  gk104_ltc_oclass;
@@ -180,6 +185,7 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
 		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nve0_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nve0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gf100_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nve0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -213,6 +219,7 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
 		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nve0_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nvd0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gf100_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nve0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
@@ -246,6 +253,7 @@ nve0_identify(struct nouveau_device *device)
 		device->oclass[NVDEV_SUBDEV_VBIOS  ] = &nouveau_bios_oclass;
 		device->oclass[NVDEV_SUBDEV_GPIO   ] =  nve0_gpio_oclass;
 		device->oclass[NVDEV_SUBDEV_I2C    ] =  nve0_i2c_oclass;
+		device->oclass[NVDEV_SUBDEV_FUSE   ] = &gf100_fuse_oclass;
 		device->oclass[NVDEV_SUBDEV_CLOCK  ] = &nve0_clock_oclass;
 		device->oclass[NVDEV_SUBDEV_THERM  ] = &nvd0_therm_oclass;
 		device->oclass[NVDEV_SUBDEV_MXM    ] = &nv50_mxm_oclass;
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/base.c b/drivers/gpu/drm/nouveau/core/engine/disp/base.c
index 22d55f6cde50..64b84667f3a5 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/base.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/base.c
@@ -32,7 +32,8 @@
 #include "conn.h"
 
 int
-nouveau_disp_vblank_ctor(void *data, u32 size, struct nvkm_notify *notify)
+nouveau_disp_vblank_ctor(struct nouveau_object *object, void *data, u32 size,
+			 struct nvkm_notify *notify)
 {
 	struct nouveau_disp *disp =
 		container_of(notify->event, typeof(*disp), vblank);
@@ -61,7 +62,8 @@ nouveau_disp_vblank(struct nouveau_disp *disp, int head)
 }
 
 static int
-nouveau_disp_hpd_ctor(void *data, u32 size, struct nvkm_notify *notify)
+nouveau_disp_hpd_ctor(struct nouveau_object *object, void *data, u32 size,
+		      struct nvkm_notify *notify)
 {
 	struct nouveau_disp *disp =
 		container_of(notify->event, typeof(*disp), hpd);
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/conn.c b/drivers/gpu/drm/nouveau/core/engine/disp/conn.c
index 3d1070228977..1496b567dd4a 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/conn.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/conn.c
@@ -126,8 +126,8 @@ nvkm_connector_create_(struct nouveau_object *parent,
 			return 0;
 		}
 
-		ret = nvkm_notify_init(&gpio->event, nvkm_connector_hpd, true,
-				       &(struct nvkm_gpio_ntfy_req) {
+		ret = nvkm_notify_init(NULL, &gpio->event, nvkm_connector_hpd,
+				       true, &(struct nvkm_gpio_ntfy_req) {
 					.mask = NVKM_GPIO_TOGGLED,
 					.line = func.line,
 				       },
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/gm107.c b/drivers/gpu/drm/nouveau/core/engine/disp/gm107.c
index d54da8b5f87e..b3df3fe2dc09 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/gm107.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/gm107.c
@@ -68,6 +68,10 @@ gm107_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
+	ret = nvkm_event_init(&nvd0_disp_chan_uevent, 1, 17, &priv->uevent);
+	if (ret)
+		return ret;
+
 	nv_engine(priv)->sclass = gm107_disp_base_oclass;
 	nv_engine(priv)->cclass = &nv50_disp_cclass;
 	nv_subdev(priv)->intr = nvd0_disp_intr;
@@ -80,7 +84,7 @@ gm107_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	priv->dac.sense = nv50_dac_sense;
 	priv->sor.power = nv50_sor_power;
 	priv->sor.hda_eld = nvd0_hda_eld;
-	priv->sor.hdmi = nvd0_hdmi_ctrl;
+	priv->sor.hdmi = nve0_hdmi_ctrl;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/hdanva3.c b/drivers/gpu/drm/nouveau/core/engine/disp/hdanva3.c
index 8b4e06abe533..fe9ef5894dd4 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/hdanva3.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/hdanva3.c
@@ -26,6 +26,8 @@
 #include <nvif/unpack.h>
 #include <nvif/class.h>
 
+#include <subdev/timer.h>
+
 #include "nv50.h"
 
 int
@@ -46,16 +48,21 @@ nva3_hda_eld(NV50_DISP_MTHD_V1)
 		return ret;
 
 	if (size && args->v0.data[0]) {
+		if (outp->info.type == DCB_OUTPUT_DP) {
+			nv_mask(priv, 0x61c1e0 + soff, 0x8000000d, 0x80000001);
+			nv_wait(priv, 0x61c1e0 + soff, 0x80000000, 0x00000000);
+		}
 		for (i = 0; i < size; i++)
 			nv_wr32(priv, 0x61c440 + soff, (i << 8) | args->v0.data[0]);
 		for (; i < 0x60; i++)
 			nv_wr32(priv, 0x61c440 + soff, (i << 8));
 		nv_mask(priv, 0x61c448 + soff, 0x80000003, 0x80000003);
-	} else
-	if (size) {
-		nv_mask(priv, 0x61c448 + soff, 0x80000003, 0x80000001);
 	} else {
-		nv_mask(priv, 0x61c448 + soff, 0x80000003, 0x80000000);
+		if (outp->info.type == DCB_OUTPUT_DP) {
+			nv_mask(priv, 0x61c1e0 + soff, 0x80000001, 0x80000000);
+			nv_wait(priv, 0x61c1e0 + soff, 0x80000000, 0x00000000);
+		}
+		nv_mask(priv, 0x61c448 + soff, 0x80000003, 0x80000000 | !!size);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/hdanvd0.c b/drivers/gpu/drm/nouveau/core/engine/disp/hdanvd0.c
index baf558fc12fb..1d4e8432d857 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/hdanvd0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/hdanvd0.c
@@ -26,10 +26,7 @@
 #include <nvif/unpack.h>
 #include <nvif/class.h>
 
-#include <subdev/bios.h>
-#include <subdev/bios/dcb.h>
-#include <subdev/bios/dp.h>
-#include <subdev/bios/init.h>
+#include <subdev/timer.h>
 
 #include "nv50.h"
 
@@ -40,6 +37,7 @@ nvd0_hda_eld(NV50_DISP_MTHD_V1)
 		struct nv50_disp_sor_hda_eld_v0 v0;
 	} *args = data;
 	const u32 soff = outp->or * 0x030;
+	const u32 hoff = head * 0x800;
 	int ret, i;
 
 	nv_ioctl(object, "disp sor hda eld size %d\n", size);
@@ -51,16 +49,22 @@ nvd0_hda_eld(NV50_DISP_MTHD_V1)
 		return ret;
 
 	if (size && args->v0.data[0]) {
+		if (outp->info.type == DCB_OUTPUT_DP) {
+			nv_mask(priv, 0x616618 + hoff, 0x8000000c, 0x80000001);
+			nv_wait(priv, 0x616618 + hoff, 0x80000000, 0x00000000);
+		}
+		nv_mask(priv, 0x616548 + hoff, 0x00000070, 0x00000000);
 		for (i = 0; i < size; i++)
 			nv_wr32(priv, 0x10ec00 + soff, (i << 8) | args->v0.data[i]);
 		for (; i < 0x60; i++)
 			nv_wr32(priv, 0x10ec00 + soff, (i << 8));
 		nv_mask(priv, 0x10ec10 + soff, 0x80000003, 0x80000003);
-	} else
-	if (size) {
-		nv_mask(priv, 0x10ec10 + soff, 0x80000003, 0x80000001);
 	} else {
-		nv_mask(priv, 0x10ec10 + soff, 0x80000003, 0x80000000);
+		if (outp->info.type == DCB_OUTPUT_DP) {
+			nv_mask(priv, 0x616618 + hoff, 0x80000001, 0x80000000);
+			nv_wait(priv, 0x616618 + hoff, 0x80000000, 0x00000000);
+		}
+		nv_mask(priv, 0x10ec10 + soff, 0x80000003, 0x80000000 | !!size);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/hdminvd0.c b/drivers/gpu/drm/nouveau/core/engine/disp/hdminvd0.c
index 3106d295b48d..bac4fc4570f0 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/hdminvd0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/hdminvd0.c
@@ -75,8 +75,5 @@ nvd0_hdmi_ctrl(NV50_DISP_MTHD_V1)
 
 	/* HDMI_CTRL */
 	nv_mask(priv, 0x616798 + hoff, 0x401f007f, ctrl);
-
-	/* NFI, audio doesn't work without it though.. */
-	nv_mask(priv, 0x616548 + hoff, 0x00000070, 0x00000000);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/hdminve0.c b/drivers/gpu/drm/nouveau/core/engine/disp/hdminve0.c
new file mode 100644
index 000000000000..528d14ec2f7f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/hdminve0.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2014 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <core/client.h>
+#include <nvif/unpack.h>
+#include <nvif/class.h>
+
+#include "nv50.h"
+
+int
+nve0_hdmi_ctrl(NV50_DISP_MTHD_V1)
+{
+	const u32 hoff = (head * 0x800);
+	const u32 hdmi = (head * 0x400);
+	union {
+		struct nv50_disp_sor_hdmi_pwr_v0 v0;
+	} *args = data;
+	u32 ctrl;
+	int ret;
+
+	nv_ioctl(object, "disp sor hdmi ctrl size %d\n", size);
+	if (nvif_unpack(args->v0, 0, 0, false)) {
+		nv_ioctl(object, "disp sor hdmi ctrl vers %d state %d "
+				 "max_ac_packet %d rekey %d\n",
+			 args->v0.version, args->v0.state,
+			 args->v0.max_ac_packet, args->v0.rekey);
+		if (args->v0.max_ac_packet > 0x1f || args->v0.rekey > 0x7f)
+			return -EINVAL;
+		ctrl  = 0x40000000 * !!args->v0.state;
+		ctrl |= args->v0.max_ac_packet << 16;
+		ctrl |= args->v0.rekey;
+	} else
+		return ret;
+
+	if (!(ctrl & 0x40000000)) {
+		nv_mask(priv, 0x616798 + hoff, 0x40000000, 0x00000000);
+		nv_mask(priv, 0x6900c0 + hdmi, 0x00000001, 0x00000000);
+		nv_mask(priv, 0x690000 + hdmi, 0x00000001, 0x00000000);
+		return 0;
+	}
+
+	/* AVI InfoFrame */
+	nv_mask(priv, 0x690000 + hdmi, 0x00000001, 0x00000000);
+	nv_wr32(priv, 0x690008 + hdmi, 0x000d0282);
+	nv_wr32(priv, 0x69000c + hdmi, 0x0000006f);
+	nv_wr32(priv, 0x690010 + hdmi, 0x00000000);
+	nv_wr32(priv, 0x690014 + hdmi, 0x00000000);
+	nv_wr32(priv, 0x690018 + hdmi, 0x00000000);
+	nv_mask(priv, 0x690000 + hdmi, 0x00000001, 0x00000001);
+
+	/* ??? InfoFrame? */
+	nv_mask(priv, 0x6900c0 + hdmi, 0x00000001, 0x00000000);
+	nv_wr32(priv, 0x6900cc + hdmi, 0x00000010);
+	nv_mask(priv, 0x6900c0 + hdmi, 0x00000001, 0x00000001);
+
+	/* ??? */
+	nv_wr32(priv, 0x690080 + hdmi, 0x82000000);
+
+	/* HDMI_CTRL */
+	nv_mask(priv, 0x616798 + hoff, 0x401f007f, ctrl);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c
index 4b5bb5d58a54..a7efbff4dc8f 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.c
@@ -29,6 +29,7 @@
 #include <core/enum.h>
 #include <nvif/unpack.h>
 #include <nvif/class.h>
+#include <nvif/event.h>
 
 #include <subdev/bios.h>
 #include <subdev/bios/dcb.h>
@@ -82,6 +83,71 @@ nv50_disp_chan_destroy(struct nv50_disp_chan *chan)
 	nouveau_namedb_destroy(&chan->base);
 }
 
+static void
+nv50_disp_chan_uevent_fini(struct nvkm_event *event, int type, int index)
+{
+	struct nv50_disp_priv *priv = container_of(event, typeof(*priv), uevent);
+	nv_mask(priv, 0x610028, 0x00000001 << index, 0x00000000 << index);
+}
+
+static void
+nv50_disp_chan_uevent_init(struct nvkm_event *event, int types, int index)
+{
+	struct nv50_disp_priv *priv = container_of(event, typeof(*priv), uevent);
+	nv_mask(priv, 0x610028, 0x00000001 << index, 0x00000001 << index);
+}
+
+void
+nv50_disp_chan_uevent_send(struct nv50_disp_priv *priv, int chid)
+{
+	struct nvif_notify_uevent_rep {
+	} rep;
+
+	nvkm_event_send(&priv->uevent, 1, chid, &rep, sizeof(rep));
+}
+
+int
+nv50_disp_chan_uevent_ctor(struct nouveau_object *object, void *data, u32 size,
+			   struct nvkm_notify *notify)
+{
+	struct nv50_disp_dmac *dmac = (void *)object;
+	union {
+		struct nvif_notify_uevent_req none;
+	} *args = data;
+	int ret;
+
+	if (nvif_unvers(args->none)) {
+		notify->size  = sizeof(struct nvif_notify_uevent_rep);
+		notify->types = 1;
+		notify->index = dmac->base.chid;
+		return 0;
+	}
+
+	return ret;
+}
+
+const struct nvkm_event_func
+nv50_disp_chan_uevent = {
+	.ctor = nv50_disp_chan_uevent_ctor,
+	.init = nv50_disp_chan_uevent_init,
+	.fini = nv50_disp_chan_uevent_fini,
+};
+
+int
+nv50_disp_chan_ntfy(struct nouveau_object *object, u32 type,
+		    struct nvkm_event **pevent)
+{
+	struct nv50_disp_priv *priv = (void *)object->engine;
+	switch (type) {
+	case NV50_DISP_CORE_CHANNEL_DMA_V0_NTFY_UEVENT:
+		*pevent = &priv->uevent;
+		return 0;
+	default:
+		break;
+	}
+	return -EINVAL;
+}
+
 int
 nv50_disp_chan_map(struct nouveau_object *object, u64 *addr, u32 *size)
 {
@@ -195,7 +261,7 @@ nv50_disp_dmac_init(struct nouveau_object *object)
 		return ret;
 
 	/* enable error reporting */
-	nv_mask(priv, 0x610028, 0x00010001 << chid, 0x00010001 << chid);
+	nv_mask(priv, 0x610028, 0x00010000 << chid, 0x00010000 << chid);
 
 	/* initialise channel for dma command submission */
 	nv_wr32(priv, 0x610204 + (chid * 0x0010), dmac->push);
@@ -232,7 +298,7 @@ nv50_disp_dmac_fini(struct nouveau_object *object, bool suspend)
 			return -EBUSY;
 	}
 
-	/* disable error reporting */
+	/* disable error reporting and completion notifications */
 	nv_mask(priv, 0x610028, 0x00010001 << chid, 0x00000000 << chid);
 
 	return nv50_disp_chan_fini(&dmac->base, suspend);
@@ -454,7 +520,7 @@ nv50_disp_mast_init(struct nouveau_object *object)
 		return ret;
 
 	/* enable error reporting */
-	nv_mask(priv, 0x610028, 0x00010001, 0x00010001);
+	nv_mask(priv, 0x610028, 0x00010000, 0x00010000);
 
 	/* attempt to unstick channel from some unknown state */
 	if ((nv_rd32(priv, 0x610200) & 0x009f0000) == 0x00020000)
@@ -494,7 +560,7 @@ nv50_disp_mast_fini(struct nouveau_object *object, bool suspend)
 			return -EBUSY;
 	}
 
-	/* disable error reporting */
+	/* disable error reporting and completion notifications */
 	nv_mask(priv, 0x610028, 0x00010001, 0x00000000);
 
 	return nv50_disp_chan_fini(&mast->base, suspend);
@@ -507,6 +573,7 @@ nv50_disp_mast_ofuncs = {
 	.base.init = nv50_disp_mast_init,
 	.base.fini = nv50_disp_mast_fini,
 	.base.map  = nv50_disp_chan_map,
+	.base.ntfy = nv50_disp_chan_ntfy,
 	.base.rd32 = nv50_disp_chan_rd32,
 	.base.wr32 = nv50_disp_chan_wr32,
 	.chid = 0,
@@ -607,6 +674,7 @@ nv50_disp_sync_ofuncs = {
 	.base.dtor = nv50_disp_dmac_dtor,
 	.base.init = nv50_disp_dmac_init,
 	.base.fini = nv50_disp_dmac_fini,
+	.base.ntfy = nv50_disp_chan_ntfy,
 	.base.map  = nv50_disp_chan_map,
 	.base.rd32 = nv50_disp_chan_rd32,
 	.base.wr32 = nv50_disp_chan_wr32,
@@ -696,6 +764,7 @@ nv50_disp_ovly_ofuncs = {
 	.base.dtor = nv50_disp_dmac_dtor,
 	.base.init = nv50_disp_dmac_init,
 	.base.fini = nv50_disp_dmac_fini,
+	.base.ntfy = nv50_disp_chan_ntfy,
 	.base.map  = nv50_disp_chan_map,
 	.base.rd32 = nv50_disp_chan_rd32,
 	.base.wr32 = nv50_disp_chan_wr32,
@@ -813,6 +882,7 @@ nv50_disp_oimm_ofuncs = {
 	.base.dtor = nv50_disp_pioc_dtor,
 	.base.init = nv50_disp_pioc_init,
 	.base.fini = nv50_disp_pioc_fini,
+	.base.ntfy = nv50_disp_chan_ntfy,
 	.base.map  = nv50_disp_chan_map,
 	.base.rd32 = nv50_disp_chan_rd32,
 	.base.wr32 = nv50_disp_chan_wr32,
@@ -860,6 +930,7 @@ nv50_disp_curs_ofuncs = {
 	.base.dtor = nv50_disp_pioc_dtor,
 	.base.init = nv50_disp_pioc_init,
 	.base.fini = nv50_disp_pioc_fini,
+	.base.ntfy = nv50_disp_chan_ntfy,
 	.base.map  = nv50_disp_chan_map,
 	.base.rd32 = nv50_disp_chan_rd32,
 	.base.wr32 = nv50_disp_chan_wr32,
@@ -1559,7 +1630,7 @@ nv50_disp_intr_unk20_1(struct nv50_disp_priv *priv, int head)
 }
 
 static void
-nv50_disp_intr_unk20_2_dp(struct nv50_disp_priv *priv,
+nv50_disp_intr_unk20_2_dp(struct nv50_disp_priv *priv, int head,
 			  struct dcb_output *outp, u32 pclk)
 {
 	const int link = !(outp->sorconf.link & 1);
@@ -1568,24 +1639,36 @@ nv50_disp_intr_unk20_2_dp(struct nv50_disp_priv *priv,
 	const u32 loff = (link * 0x080) + soff;
 	const u32 ctrl = nv_rd32(priv, 0x610794 + (or * 8));
 	const u32 symbol = 100000;
-	u32 dpctrl = nv_rd32(priv, 0x61c10c + loff) & 0x0000f0000;
+	const s32 vactive = nv_rd32(priv, 0x610af8 + (head * 0x540)) & 0xffff;
+	const s32 vblanke = nv_rd32(priv, 0x610ae8 + (head * 0x540)) & 0xffff;
+	const s32 vblanks = nv_rd32(priv, 0x610af0 + (head * 0x540)) & 0xffff;
+	u32 dpctrl = nv_rd32(priv, 0x61c10c + loff);
 	u32 clksor = nv_rd32(priv, 0x614300 + soff);
 	int bestTU = 0, bestVTUi = 0, bestVTUf = 0, bestVTUa = 0;
 	int TU, VTUi, VTUf, VTUa;
 	u64 link_data_rate, link_ratio, unk;
 	u32 best_diff = 64 * symbol;
 	u32 link_nr, link_bw, bits;
-
-	/* calculate packed data rate for each lane */
-	if      (dpctrl > 0x00030000) link_nr = 4;
-	else if (dpctrl > 0x00010000) link_nr = 2;
-	else			      link_nr = 1;
-
-	if (clksor & 0x000c0000)
-		link_bw = 270000;
-	else
-		link_bw = 162000;
-
+	u64 value;
+
+	link_bw = (clksor & 0x000c0000) ? 270000 : 162000;
+	link_nr = hweight32(dpctrl & 0x000f0000);
+
+	/* symbols/hblank - algorithm taken from comments in tegra driver */
+	value = vblanke + vactive - vblanks - 7;
+	value = value * link_bw;
+	do_div(value, pclk);
+	value = value - (3 * !!(dpctrl & 0x00004000)) - (12 / link_nr);
+	nv_mask(priv, 0x61c1e8 + soff, 0x0000ffff, value);
+
+	/* symbols/vblank - algorithm taken from comments in tegra driver */
+	value = vblanks - vblanke - 25;
+	value = value * link_bw;
+	do_div(value, pclk);
+	value = value - ((36 / link_nr) + 3) - 1;
+	nv_mask(priv, 0x61c1ec + soff, 0x00ffffff, value);
+
+	/* watermark / activesym */
 	if      ((ctrl & 0xf0000) == 0x60000) bits = 30;
 	else if ((ctrl & 0xf0000) == 0x50000) bits = 24;
 	else                                  bits = 18;
@@ -1731,7 +1814,7 @@ nv50_disp_intr_unk20_2(struct nv50_disp_priv *priv, int head)
 	} else
 	if (!outp->info.location) {
 		if (outp->info.type == DCB_OUTPUT_DP)
-			nv50_disp_intr_unk20_2_dp(priv, &outp->info, pclk);
+			nv50_disp_intr_unk20_2_dp(priv, head, &outp->info, pclk);
 		oreg = 0x614300 + (ffs(outp->info.or) - 1) * 0x800;
 		oval = (conf & 0x0100) ? 0x00000101 : 0x00000000;
 		hval = 0x00000000;
@@ -1846,6 +1929,12 @@ nv50_disp_intr(struct nouveau_subdev *subdev)
 		intr0 &= ~(0x00010000 << chid);
 	}
 
+	while (intr0 & 0x0000001f) {
+		u32 chid = __ffs(intr0 & 0x0000001f);
+		nv50_disp_chan_uevent_send(priv, chid);
+		intr0 &= ~(0x00000001 << chid);
+	}
+
 	if (intr1 & 0x00000004) {
 		nouveau_disp_vblank(&priv->base, 0);
 		nv_wr32(priv, 0x610024, 0x00000004);
@@ -1880,6 +1969,10 @@ nv50_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
+	ret = nvkm_event_init(&nv50_disp_chan_uevent, 1, 9, &priv->uevent);
+	if (ret)
+		return ret;
+
 	nv_engine(priv)->sclass = nv50_disp_base_oclass;
 	nv_engine(priv)->cclass = &nv50_disp_cclass;
 	nv_subdev(priv)->intr = nv50_disp_intr;
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.h
index 8ab14461f70c..5279feefec06 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nv50.h
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv50.h
@@ -26,6 +26,8 @@ struct nv50_disp_priv {
 	struct work_struct supervisor;
 	u32 super;
 
+	struct nvkm_event uevent;
+
 	struct {
 		int nr;
 	} head;
@@ -75,6 +77,7 @@ int nvd0_hda_eld(NV50_DISP_MTHD_V1);
 int nv84_hdmi_ctrl(NV50_DISP_MTHD_V1);
 int nva3_hdmi_ctrl(NV50_DISP_MTHD_V1);
 int nvd0_hdmi_ctrl(NV50_DISP_MTHD_V1);
+int nve0_hdmi_ctrl(NV50_DISP_MTHD_V1);
 
 int nv50_sor_power(NV50_DISP_MTHD_V1);
 
@@ -116,9 +119,16 @@ struct nv50_disp_chan {
 	int chid;
 };
 
+int  nv50_disp_chan_ntfy(struct nouveau_object *, u32, struct nvkm_event **);
 int  nv50_disp_chan_map(struct nouveau_object *, u64 *, u32 *);
 u32  nv50_disp_chan_rd32(struct nouveau_object *, u64);
 void nv50_disp_chan_wr32(struct nouveau_object *, u64, u32);
+extern const struct nvkm_event_func nv50_disp_chan_uevent;
+int  nv50_disp_chan_uevent_ctor(struct nouveau_object *, void *, u32,
+				struct nvkm_notify *);
+void nv50_disp_chan_uevent_send(struct nv50_disp_priv *, int);
+
+extern const struct nvkm_event_func nvd0_disp_chan_uevent;
 
 #define nv50_disp_chan_init(a)                                                 \
 	nouveau_namedb_init(&(a)->base)
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv84.c b/drivers/gpu/drm/nouveau/core/engine/disp/nv84.c
index 788ced1b6182..d36284715b2a 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nv84.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv84.c
@@ -236,6 +236,10 @@ nv84_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
+	ret = nvkm_event_init(&nv50_disp_chan_uevent, 1, 9, &priv->uevent);
+	if (ret)
+		return ret;
+
 	nv_engine(priv)->sclass = nv84_disp_base_oclass;
 	nv_engine(priv)->cclass = &nv50_disp_cclass;
 	nv_subdev(priv)->intr = nv50_disp_intr;
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nv94.c b/drivers/gpu/drm/nouveau/core/engine/disp/nv94.c
index fa79de906eae..a117064002b1 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nv94.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nv94.c
@@ -95,6 +95,10 @@ nv94_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
+	ret = nvkm_event_init(&nv50_disp_chan_uevent, 1, 9, &priv->uevent);
+	if (ret)
+		return ret;
+
 	nv_engine(priv)->sclass = nv94_disp_base_oclass;
 	nv_engine(priv)->cclass = &nv50_disp_cclass;
 	nv_subdev(priv)->intr = nv50_disp_intr;
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nva0.c b/drivers/gpu/drm/nouveau/core/engine/disp/nva0.c
index 7af15f5d48dc..c67e68aadd45 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nva0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nva0.c
@@ -112,6 +112,10 @@ nva0_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
+	ret = nvkm_event_init(&nv50_disp_chan_uevent, 1, 9, &priv->uevent);
+	if (ret)
+		return ret;
+
 	nv_engine(priv)->sclass = nva0_disp_base_oclass;
 	nv_engine(priv)->cclass = &nv50_disp_cclass;
 	nv_subdev(priv)->intr = nv50_disp_intr;
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nva3.c b/drivers/gpu/drm/nouveau/core/engine/disp/nva3.c
index 6bd39448f8da..22969f355aae 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nva3.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nva3.c
@@ -67,6 +67,10 @@ nva3_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
+	ret = nvkm_event_init(&nv50_disp_chan_uevent, 1, 9, &priv->uevent);
+	if (ret)
+		return ret;
+
 	nv_engine(priv)->sclass = nva3_disp_base_oclass;
 	nv_engine(priv)->cclass = &nv50_disp_cclass;
 	nv_subdev(priv)->intr = nv50_disp_intr;
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c b/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c
index a4bb3c774ee1..747e64bb9c06 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nvd0.c
@@ -43,6 +43,31 @@
 #include "nv50.h"
 
 /*******************************************************************************
+ * EVO channel base class
+ ******************************************************************************/
+
+static void
+nvd0_disp_chan_uevent_fini(struct nvkm_event *event, int type, int index)
+{
+	struct nv50_disp_priv *priv = container_of(event, typeof(*priv), uevent);
+	nv_mask(priv, 0x610090, 0x00000001 << index, 0x00000000 << index);
+}
+
+static void
+nvd0_disp_chan_uevent_init(struct nvkm_event *event, int types, int index)
+{
+	struct nv50_disp_priv *priv = container_of(event, typeof(*priv), uevent);
+	nv_mask(priv, 0x610090, 0x00000001 << index, 0x00000001 << index);
+}
+
+const struct nvkm_event_func
+nvd0_disp_chan_uevent = {
+	.ctor = nv50_disp_chan_uevent_ctor,
+	.init = nvd0_disp_chan_uevent_init,
+	.fini = nvd0_disp_chan_uevent_fini,
+};
+
+/*******************************************************************************
  * EVO DMA channel base class
  ******************************************************************************/
 
@@ -77,7 +102,6 @@ nvd0_disp_dmac_init(struct nouveau_object *object)
 		return ret;
 
 	/* enable error reporting */
-	nv_mask(priv, 0x610090, 0x00000001 << chid, 0x00000001 << chid);
 	nv_mask(priv, 0x6100a0, 0x00000001 << chid, 0x00000001 << chid);
 
 	/* initialise channel for dma command submission */
@@ -115,7 +139,7 @@ nvd0_disp_dmac_fini(struct nouveau_object *object, bool suspend)
 			return -EBUSY;
 	}
 
-	/* disable error reporting */
+	/* disable error reporting and completion notification */
 	nv_mask(priv, 0x610090, 0x00000001 << chid, 0x00000000);
 	nv_mask(priv, 0x6100a0, 0x00000001 << chid, 0x00000000);
 
@@ -278,7 +302,6 @@ nvd0_disp_mast_init(struct nouveau_object *object)
 		return ret;
 
 	/* enable error reporting */
-	nv_mask(priv, 0x610090, 0x00000001, 0x00000001);
 	nv_mask(priv, 0x6100a0, 0x00000001, 0x00000001);
 
 	/* initialise channel for dma command submission */
@@ -313,7 +336,7 @@ nvd0_disp_mast_fini(struct nouveau_object *object, bool suspend)
 			return -EBUSY;
 	}
 
-	/* disable error reporting */
+	/* disable error reporting and completion notification */
 	nv_mask(priv, 0x610090, 0x00000001, 0x00000000);
 	nv_mask(priv, 0x6100a0, 0x00000001, 0x00000000);
 
@@ -326,6 +349,7 @@ nvd0_disp_mast_ofuncs = {
 	.base.dtor = nv50_disp_dmac_dtor,
 	.base.init = nvd0_disp_mast_init,
 	.base.fini = nvd0_disp_mast_fini,
+	.base.ntfy = nv50_disp_chan_ntfy,
 	.base.map  = nv50_disp_chan_map,
 	.base.rd32 = nv50_disp_chan_rd32,
 	.base.wr32 = nv50_disp_chan_wr32,
@@ -419,6 +443,7 @@ nvd0_disp_sync_ofuncs = {
 	.base.dtor = nv50_disp_dmac_dtor,
 	.base.init = nvd0_disp_dmac_init,
 	.base.fini = nvd0_disp_dmac_fini,
+	.base.ntfy = nv50_disp_chan_ntfy,
 	.base.map  = nv50_disp_chan_map,
 	.base.rd32 = nv50_disp_chan_rd32,
 	.base.wr32 = nv50_disp_chan_wr32,
@@ -499,6 +524,7 @@ nvd0_disp_ovly_ofuncs = {
 	.base.dtor = nv50_disp_dmac_dtor,
 	.base.init = nvd0_disp_dmac_init,
 	.base.fini = nvd0_disp_dmac_fini,
+	.base.ntfy = nv50_disp_chan_ntfy,
 	.base.map  = nv50_disp_chan_map,
 	.base.rd32 = nv50_disp_chan_rd32,
 	.base.wr32 = nv50_disp_chan_wr32,
@@ -524,7 +550,6 @@ nvd0_disp_pioc_init(struct nouveau_object *object)
 		return ret;
 
 	/* enable error reporting */
-	nv_mask(priv, 0x610090, 0x00000001 << chid, 0x00000001 << chid);
 	nv_mask(priv, 0x6100a0, 0x00000001 << chid, 0x00000001 << chid);
 
 	/* activate channel */
@@ -553,7 +578,7 @@ nvd0_disp_pioc_fini(struct nouveau_object *object, bool suspend)
 			return -EBUSY;
 	}
 
-	/* disable error reporting */
+	/* disable error reporting and completion notification */
 	nv_mask(priv, 0x610090, 0x00000001 << chid, 0x00000000);
 	nv_mask(priv, 0x6100a0, 0x00000001 << chid, 0x00000000);
 
@@ -570,6 +595,7 @@ nvd0_disp_oimm_ofuncs = {
 	.base.dtor = nv50_disp_pioc_dtor,
 	.base.init = nvd0_disp_pioc_init,
 	.base.fini = nvd0_disp_pioc_fini,
+	.base.ntfy = nv50_disp_chan_ntfy,
 	.base.map  = nv50_disp_chan_map,
 	.base.rd32 = nv50_disp_chan_rd32,
 	.base.wr32 = nv50_disp_chan_wr32,
@@ -586,6 +612,7 @@ nvd0_disp_curs_ofuncs = {
 	.base.dtor = nv50_disp_pioc_dtor,
 	.base.init = nvd0_disp_pioc_init,
 	.base.fini = nvd0_disp_pioc_fini,
+	.base.ntfy = nv50_disp_chan_ntfy,
 	.base.map  = nv50_disp_chan_map,
 	.base.rd32 = nv50_disp_chan_rd32,
 	.base.wr32 = nv50_disp_chan_wr32,
@@ -949,6 +976,9 @@ nvd0_disp_intr_unk2_2_tu(struct nv50_disp_priv *priv, int head,
 	const int or = ffs(outp->or) - 1;
 	const u32 ctrl = nv_rd32(priv, 0x660200 + (or   * 0x020));
 	const u32 conf = nv_rd32(priv, 0x660404 + (head * 0x300));
+	const s32 vactive = nv_rd32(priv, 0x660414 + (head * 0x300)) & 0xffff;
+	const s32 vblanke = nv_rd32(priv, 0x66041c + (head * 0x300)) & 0xffff;
+	const s32 vblanks = nv_rd32(priv, 0x660420 + (head * 0x300)) & 0xffff;
 	const u32 pclk = nv_rd32(priv, 0x660450 + (head * 0x300)) / 1000;
 	const u32 link = ((ctrl & 0xf00) == 0x800) ? 0 : 1;
 	const u32 hoff = (head * 0x800);
@@ -956,23 +986,35 @@ nvd0_disp_intr_unk2_2_tu(struct nv50_disp_priv *priv, int head,
 	const u32 loff = (link * 0x080) + soff;
 	const u32 symbol = 100000;
 	const u32 TU = 64;
-	u32 dpctrl = nv_rd32(priv, 0x61c10c + loff) & 0x000f0000;
+	u32 dpctrl = nv_rd32(priv, 0x61c10c + loff);
 	u32 clksor = nv_rd32(priv, 0x612300 + soff);
 	u32 datarate, link_nr, link_bw, bits;
 	u64 ratio, value;
 
+	link_nr  = hweight32(dpctrl & 0x000f0000);
+	link_bw  = (clksor & 0x007c0000) >> 18;
+	link_bw *= 27000;
+
+	/* symbols/hblank - algorithm taken from comments in tegra driver */
+	value = vblanke + vactive - vblanks - 7;
+	value = value * link_bw;
+	do_div(value, pclk);
+	value = value - (3 * !!(dpctrl & 0x00004000)) - (12 / link_nr);
+	nv_mask(priv, 0x616620 + hoff, 0x0000ffff, value);
+
+	/* symbols/vblank - algorithm taken from comments in tegra driver */
+	value = vblanks - vblanke - 25;
+	value = value * link_bw;
+	do_div(value, pclk);
+	value = value - ((36 / link_nr) + 3) - 1;
+	nv_mask(priv, 0x616624 + hoff, 0x00ffffff, value);
+
+	/* watermark */
 	if      ((conf & 0x3c0) == 0x180) bits = 30;
 	else if ((conf & 0x3c0) == 0x140) bits = 24;
 	else                              bits = 18;
 	datarate = (pclk * bits) / 8;
 
-	if      (dpctrl > 0x00030000) link_nr = 4;
-	else if (dpctrl > 0x00010000) link_nr = 2;
-	else			      link_nr = 1;
-
-	link_bw  = (clksor & 0x007c0000) >> 18;
-	link_bw *= 27000;
-
 	ratio  = datarate;
 	ratio *= symbol;
 	do_div(ratio, link_nr * link_bw);
@@ -1153,7 +1195,11 @@ nvd0_disp_intr(struct nouveau_subdev *subdev)
 
 	if (intr & 0x00000001) {
 		u32 stat = nv_rd32(priv, 0x61008c);
-		nv_wr32(priv, 0x61008c, stat);
+		while (stat) {
+			int chid = __ffs(stat); stat &= ~(1 << chid);
+			nv50_disp_chan_uevent_send(priv, chid);
+			nv_wr32(priv, 0x61008c, 1 << chid);
+		}
 		intr &= ~0x00000001;
 	}
 
@@ -1209,6 +1255,10 @@ nvd0_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
+	ret = nvkm_event_init(&nvd0_disp_chan_uevent, 1, 17, &priv->uevent);
+	if (ret)
+		return ret;
+
 	nv_engine(priv)->sclass = nvd0_disp_base_oclass;
 	nv_engine(priv)->cclass = &nv50_disp_cclass;
 	nv_subdev(priv)->intr = nvd0_disp_intr;
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nve0.c b/drivers/gpu/drm/nouveau/core/engine/disp/nve0.c
index 47fef1e398c4..db144b2cf06b 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nve0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nve0.c
@@ -233,6 +233,10 @@ nve0_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
+	ret = nvkm_event_init(&nvd0_disp_chan_uevent, 1, 17, &priv->uevent);
+	if (ret)
+		return ret;
+
 	nv_engine(priv)->sclass = nve0_disp_base_oclass;
 	nv_engine(priv)->cclass = &nv50_disp_cclass;
 	nv_subdev(priv)->intr = nvd0_disp_intr;
@@ -245,7 +249,7 @@ nve0_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	priv->dac.sense = nv50_dac_sense;
 	priv->sor.power = nv50_sor_power;
 	priv->sor.hda_eld = nvd0_hda_eld;
-	priv->sor.hdmi = nvd0_hdmi_ctrl;
+	priv->sor.hdmi = nve0_hdmi_ctrl;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/nvf0.c b/drivers/gpu/drm/nouveau/core/engine/disp/nvf0.c
index 04bda4ac4ed3..402d7d67d806 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/nvf0.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/nvf0.c
@@ -68,6 +68,10 @@ nvf0_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
+	ret = nvkm_event_init(&nvd0_disp_chan_uevent, 1, 17, &priv->uevent);
+	if (ret)
+		return ret;
+
 	nv_engine(priv)->sclass = nvf0_disp_base_oclass;
 	nv_engine(priv)->cclass = &nv50_disp_cclass;
 	nv_subdev(priv)->intr = nvd0_disp_intr;
@@ -80,7 +84,7 @@ nvf0_disp_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	priv->dac.sense = nv50_dac_sense;
 	priv->sor.power = nv50_sor_power;
 	priv->sor.hda_eld = nvd0_hda_eld;
-	priv->sor.hdmi = nvd0_hdmi_ctrl;
+	priv->sor.hdmi = nve0_hdmi_ctrl;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/outpdp.c b/drivers/gpu/drm/nouveau/core/engine/disp/outpdp.c
index 6f6e2a898270..667a9070e006 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/outpdp.c
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/outpdp.c
@@ -254,7 +254,7 @@ nvkm_output_dp_create_(struct nouveau_object *parent,
 	atomic_set(&outp->lt.done, 0);
 
 	/* link maintenance */
-	ret = nvkm_notify_init(&i2c->event, nvkm_output_dp_irq, true,
+	ret = nvkm_notify_init(NULL, &i2c->event, nvkm_output_dp_irq, true,
 			       &(struct nvkm_i2c_ntfy_req) {
 				.mask = NVKM_I2C_IRQ,
 				.port = outp->base.edid->index,
@@ -268,7 +268,7 @@ nvkm_output_dp_create_(struct nouveau_object *parent,
 	}
 
 	/* hotplug detect, replaces gpio-based mechanism with aux events */
-	ret = nvkm_notify_init(&i2c->event, nvkm_output_dp_hpd, true,
+	ret = nvkm_notify_init(NULL, &i2c->event, nvkm_output_dp_hpd, true,
 			       &(struct nvkm_i2c_ntfy_req) {
 				.mask = NVKM_I2C_PLUG | NVKM_I2C_UNPLUG,
 				.port = outp->base.edid->index,
diff --git a/drivers/gpu/drm/nouveau/core/engine/disp/priv.h b/drivers/gpu/drm/nouveau/core/engine/disp/priv.h
index dbd43ae9df81..6a0511d54ce6 100644
--- a/drivers/gpu/drm/nouveau/core/engine/disp/priv.h
+++ b/drivers/gpu/drm/nouveau/core/engine/disp/priv.h
@@ -40,7 +40,8 @@ int  _nouveau_disp_fini(struct nouveau_object *, bool);
 extern struct nouveau_oclass *nvkm_output_oclass;
 extern struct nouveau_oclass *nvkm_connector_oclass;
 
-int  nouveau_disp_vblank_ctor(void *data, u32 size, struct nvkm_notify *);
+int  nouveau_disp_vblank_ctor(struct nouveau_object *, void *data, u32 size,
+			      struct nvkm_notify *);
 void nouveau_disp_vblank(struct nouveau_disp *, int head);
 int  nouveau_disp_ntfy(struct nouveau_object *, u32, struct nvkm_event **);
 
diff --git a/drivers/gpu/drm/nouveau/core/engine/fifo/base.c b/drivers/gpu/drm/nouveau/core/engine/fifo/base.c
index 0f999fc45ab9..ac8375cf4eef 100644
--- a/drivers/gpu/drm/nouveau/core/engine/fifo/base.c
+++ b/drivers/gpu/drm/nouveau/core/engine/fifo/base.c
@@ -34,7 +34,8 @@
 #include <engine/fifo.h>
 
 static int
-nouveau_fifo_event_ctor(void *data, u32 size, struct nvkm_notify *notify)
+nouveau_fifo_event_ctor(struct nouveau_object *object, void *data, u32 size,
+			struct nvkm_notify *notify)
 {
 	if (size == 0) {
 		notify->size  = 0;
@@ -170,7 +171,8 @@ _nouveau_fifo_channel_wr32(struct nouveau_object *object, u64 addr, u32 data)
 }
 
 int
-nouveau_fifo_uevent_ctor(void *data, u32 size, struct nvkm_notify *notify)
+nouveau_fifo_uevent_ctor(struct nouveau_object *object, void *data, u32 size,
+			 struct nvkm_notify *notify)
 {
 	union {
 		struct nvif_notify_uevent_req none;
diff --git a/drivers/gpu/drm/nouveau/core/engine/software/nv50.c b/drivers/gpu/drm/nouveau/core/engine/software/nv50.c
index 4d2994d8cc32..a0fec205f9db 100644
--- a/drivers/gpu/drm/nouveau/core/engine/software/nv50.c
+++ b/drivers/gpu/drm/nouveau/core/engine/software/nv50.c
@@ -175,7 +175,8 @@ nv50_software_context_ctor(struct nouveau_object *parent,
 		return ret;
 
 	for (i = 0; pdisp && i < pdisp->vblank.index_nr; i++) {
-		ret = nvkm_notify_init(&pdisp->vblank, pclass->vblank, false,
+		ret = nvkm_notify_init(NULL, &pdisp->vblank, pclass->vblank,
+				       false,
 				       &(struct nvif_notify_head_req_v0) {
 					.head = i,
 				       },
diff --git a/drivers/gpu/drm/nouveau/core/include/core/client.h b/drivers/gpu/drm/nouveau/core/include/core/client.h
index 1794a05205d8..b0ce9f6680b5 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/client.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/client.h
@@ -48,7 +48,7 @@ int  nouveau_client_init(struct nouveau_client *);
 int  nouveau_client_fini(struct nouveau_client *, bool suspend);
 const char *nouveau_client_name(void *obj);
 
-int nvkm_client_notify_new(struct nouveau_client *, struct nvkm_event *,
+int nvkm_client_notify_new(struct nouveau_object *, struct nvkm_event *,
 			   void *data, u32 size);
 int nvkm_client_notify_del(struct nouveau_client *, int index);
 int nvkm_client_notify_get(struct nouveau_client *, int index);
diff --git a/drivers/gpu/drm/nouveau/core/include/core/device.h b/drivers/gpu/drm/nouveau/core/include/core/device.h
index 8743766454a5..1d9d893929bb 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/device.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/device.h
@@ -24,6 +24,7 @@ enum nv_subdev_type {
 	 * been created, and are allowed to assume any subdevs in the
 	 * list above them exist and have been initialised.
 	 */
+	NVDEV_SUBDEV_FUSE,
 	NVDEV_SUBDEV_MXM,
 	NVDEV_SUBDEV_MC,
 	NVDEV_SUBDEV_BUS,
diff --git a/drivers/gpu/drm/nouveau/core/include/core/event.h b/drivers/gpu/drm/nouveau/core/include/core/event.h
index 51e55d03330a..92876528972f 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/event.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/event.h
@@ -4,7 +4,8 @@
 #include <core/notify.h>
 
 struct nvkm_event_func {
-	int  (*ctor)(void *data, u32 size, struct nvkm_notify *);
+	int  (*ctor)(struct nouveau_object *, void *data, u32 size,
+		     struct nvkm_notify *);
 	void (*send)(void *data, u32 size, struct nvkm_notify *);
 	void (*init)(struct nvkm_event *, int type, int index);
 	void (*fini)(struct nvkm_event *, int type, int index);
diff --git a/drivers/gpu/drm/nouveau/core/include/core/mm.h b/drivers/gpu/drm/nouveau/core/include/core/mm.h
index 2bf7d0e32261..bfe6931544fe 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/mm.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/mm.h
@@ -6,6 +6,10 @@ struct nouveau_mm_node {
 	struct list_head fl_entry;
 	struct list_head rl_entry;
 
+#define NVKM_MM_HEAP_ANY 0x00
+	u8  heap;
+#define NVKM_MM_TYPE_NONE 0x00
+#define NVKM_MM_TYPE_HOLE 0xff
 	u8  type;
 	u32 offset;
 	u32 length;
@@ -27,10 +31,10 @@ nouveau_mm_initialised(struct nouveau_mm *mm)
 
 int  nouveau_mm_init(struct nouveau_mm *, u32 offset, u32 length, u32 block);
 int  nouveau_mm_fini(struct nouveau_mm *);
-int  nouveau_mm_head(struct nouveau_mm *, u8 type, u32 size_max, u32 size_min,
-		     u32 align, struct nouveau_mm_node **);
-int  nouveau_mm_tail(struct nouveau_mm *, u8 type, u32 size_max, u32 size_min,
-		     u32 align, struct nouveau_mm_node **);
+int  nouveau_mm_head(struct nouveau_mm *, u8 heap, u8 type, u32 size_max,
+		     u32 size_min, u32 align, struct nouveau_mm_node **);
+int  nouveau_mm_tail(struct nouveau_mm *, u8 heap, u8 type, u32 size_max,
+		     u32 size_min, u32 align, struct nouveau_mm_node **);
 void nouveau_mm_free(struct nouveau_mm *, struct nouveau_mm_node **);
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/core/include/core/notify.h b/drivers/gpu/drm/nouveau/core/include/core/notify.h
index 1262d8f020f3..a7c3c5f578cc 100644
--- a/drivers/gpu/drm/nouveau/core/include/core/notify.h
+++ b/drivers/gpu/drm/nouveau/core/include/core/notify.h
@@ -25,8 +25,9 @@ struct nvkm_notify {
 	const void *data;
 };
 
-int  nvkm_notify_init(struct nvkm_event *, int (*func)(struct nvkm_notify *),
-		      bool work, void *data, u32 size, u32 reply,
+int  nvkm_notify_init(struct nouveau_object *, struct nvkm_event *,
+		      int (*func)(struct nvkm_notify *), bool work,
+		      void *data, u32 size, u32 reply,
 		      struct nvkm_notify *);
 void nvkm_notify_fini(struct nvkm_notify *);
 void nvkm_notify_get(struct nvkm_notify *);
diff --git a/drivers/gpu/drm/nouveau/core/include/engine/fifo.h b/drivers/gpu/drm/nouveau/core/include/engine/fifo.h
index e5e4d930b2c2..2007453f6fce 100644
--- a/drivers/gpu/drm/nouveau/core/include/engine/fifo.h
+++ b/drivers/gpu/drm/nouveau/core/include/engine/fifo.h
@@ -116,7 +116,8 @@ extern struct nouveau_oclass *nve0_fifo_oclass;
 extern struct nouveau_oclass *gk20a_fifo_oclass;
 extern struct nouveau_oclass *nv108_fifo_oclass;
 
-int  nouveau_fifo_uevent_ctor(void *, u32, struct nvkm_notify *);
+int  nouveau_fifo_uevent_ctor(struct nouveau_object *, void *, u32,
+			      struct nvkm_notify *);
 void nouveau_fifo_uevent(struct nouveau_fifo *);
 
 void nv04_fifo_intr(struct nouveau_subdev *);
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bar.h b/drivers/gpu/drm/nouveau/core/include/subdev/bar.h
index be037fac534c..257ddf6d36d4 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/bar.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/bar.h
@@ -12,7 +12,6 @@ struct nouveau_bar {
 
 	int (*alloc)(struct nouveau_bar *, struct nouveau_object *,
 		     struct nouveau_mem *, struct nouveau_object **);
-	void __iomem *iomem;
 
 	int (*kmap)(struct nouveau_bar *, struct nouveau_mem *,
 		    u32 flags, struct nouveau_vma *);
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios/M0205.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios/M0205.h
new file mode 100644
index 000000000000..e171120cec81
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios/M0205.h
@@ -0,0 +1,32 @@
+#ifndef __NVBIOS_M0205_H__
+#define __NVBIOS_M0205_H__
+
+struct nvbios_M0205T {
+	u16 freq;
+};
+
+u32 nvbios_M0205Te(struct nouveau_bios *,
+		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len, u8 *snr, u8 *ssz);
+u32 nvbios_M0205Tp(struct nouveau_bios *,
+		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len, u8 *snr, u8 *ssz,
+		   struct nvbios_M0205T *);
+
+struct nvbios_M0205E {
+	u8 type;
+};
+
+u32 nvbios_M0205Ee(struct nouveau_bios *, int idx,
+		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
+u32 nvbios_M0205Ep(struct nouveau_bios *, int idx,
+		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		   struct nvbios_M0205E *);
+
+struct nvbios_M0205S {
+	u8 data;
+};
+
+u32 nvbios_M0205Se(struct nouveau_bios *, int ent, int idx, u8 *ver, u8 *hdr);
+u32 nvbios_M0205Sp(struct nouveau_bios *, int ent, int idx, u8 *ver, u8 *hdr,
+		   struct nvbios_M0205S *);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios/M0209.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios/M0209.h
new file mode 100644
index 000000000000..67dc50d837bc
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios/M0209.h
@@ -0,0 +1,30 @@
+#ifndef __NVBIOS_M0209_H__
+#define __NVBIOS_M0209_H__
+
+u32 nvbios_M0209Te(struct nouveau_bios *,
+		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len, u8 *snr, u8 *ssz);
+
+struct nvbios_M0209E {
+	u8 v00_40;
+	u8 bits;
+	u8 modulo;
+	u8 v02_40;
+	u8 v02_07;
+	u8 v03;
+};
+
+u32 nvbios_M0209Ee(struct nouveau_bios *, int idx,
+		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
+u32 nvbios_M0209Ep(struct nouveau_bios *, int idx,
+		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		   struct nvbios_M0209E *);
+
+struct nvbios_M0209S {
+	u32 data[0x200];
+};
+
+u32 nvbios_M0209Se(struct nouveau_bios *, int ent, int idx, u8 *ver, u8 *hdr);
+u32 nvbios_M0209Sp(struct nouveau_bios *, int ent, int idx, u8 *ver, u8 *hdr,
+		   struct nvbios_M0209S *);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios/fan.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios/fan.h
new file mode 100644
index 000000000000..119d0874e041
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios/fan.h
@@ -0,0 +1,8 @@
+#ifndef __NVBIOS_FAN_H__
+#define __NVBIOS_FAN_H__
+
+#include <subdev/bios/therm.h>
+
+u16 nvbios_fan_parse(struct nouveau_bios *bios, struct nvbios_therm_fan *fan);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios/ramcfg.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios/ramcfg.h
index c086ac6d677d..a685bbd04568 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/bios/ramcfg.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios/ramcfg.h
@@ -4,60 +4,118 @@
 struct nouveau_bios;
 
 struct nvbios_ramcfg {
-	unsigned rammap_11_08_01:1;
-	unsigned rammap_11_08_0c:2;
-	unsigned rammap_11_08_10:1;
-	unsigned rammap_11_11_0c:2;
+	unsigned rammap_ver;
+	unsigned rammap_hdr;
+	unsigned rammap_min;
+	unsigned rammap_max;
+	union {
+		struct {
+			unsigned rammap_10_04_02:1;
+			unsigned rammap_10_04_08:1;
+		};
+		struct {
+			unsigned rammap_11_08_01:1;
+			unsigned rammap_11_08_0c:2;
+			unsigned rammap_11_08_10:1;
+			unsigned rammap_11_09_01ff:9;
+			unsigned rammap_11_0a_03fe:9;
+			unsigned rammap_11_0a_0400:1;
+			unsigned rammap_11_0a_0800:1;
+			unsigned rammap_11_0b_01f0:5;
+			unsigned rammap_11_0b_0200:1;
+			unsigned rammap_11_0b_0400:1;
+			unsigned rammap_11_0b_0800:1;
+			unsigned rammap_11_0d:8;
+			unsigned rammap_11_0e:8;
+			unsigned rammap_11_0f:8;
+			unsigned rammap_11_11_0c:2;
+		};
+	};
 
-	unsigned ramcfg_11_01_01:1;
-	unsigned ramcfg_11_01_02:1;
-	unsigned ramcfg_11_01_04:1;
-	unsigned ramcfg_11_01_08:1;
-	unsigned ramcfg_11_01_10:1;
-	unsigned ramcfg_11_01_20:1;
-	unsigned ramcfg_11_01_40:1;
-	unsigned ramcfg_11_01_80:1;
-	unsigned ramcfg_11_02_03:2;
-	unsigned ramcfg_11_02_04:1;
-	unsigned ramcfg_11_02_08:1;
-	unsigned ramcfg_11_02_10:1;
-	unsigned ramcfg_11_02_40:1;
-	unsigned ramcfg_11_02_80:1;
-	unsigned ramcfg_11_03_0f:4;
-	unsigned ramcfg_11_03_30:2;
-	unsigned ramcfg_11_03_c0:2;
-	unsigned ramcfg_11_03_f0:4;
-	unsigned ramcfg_11_04:8;
-	unsigned ramcfg_11_06:8;
-	unsigned ramcfg_11_07_02:1;
-	unsigned ramcfg_11_07_04:1;
-	unsigned ramcfg_11_07_08:1;
-	unsigned ramcfg_11_07_10:1;
-	unsigned ramcfg_11_07_40:1;
-	unsigned ramcfg_11_07_80:1;
-	unsigned ramcfg_11_08_01:1;
-	unsigned ramcfg_11_08_02:1;
-	unsigned ramcfg_11_08_04:1;
-	unsigned ramcfg_11_08_08:1;
-	unsigned ramcfg_11_08_10:1;
-	unsigned ramcfg_11_08_20:1;
-	unsigned ramcfg_11_09:8;
+	unsigned ramcfg_ver;
+	unsigned ramcfg_hdr;
+	unsigned ramcfg_timing;
+	union {
+		struct {
+			unsigned ramcfg_10_02_01:1;
+			unsigned ramcfg_10_02_02:1;
+			unsigned ramcfg_10_02_04:1;
+			unsigned ramcfg_10_02_08:1;
+			unsigned ramcfg_10_02_10:1;
+			unsigned ramcfg_10_02_20:1;
+			unsigned ramcfg_10_02_40:1;
+			unsigned ramcfg_10_03_0f:4;
+			unsigned ramcfg_10_05:8;
+			unsigned ramcfg_10_06:8;
+			unsigned ramcfg_10_07:8;
+			unsigned ramcfg_10_08:8;
+			unsigned ramcfg_10_09_0f:4;
+			unsigned ramcfg_10_09_f0:4;
+		};
+		struct {
+			unsigned ramcfg_11_01_01:1;
+			unsigned ramcfg_11_01_02:1;
+			unsigned ramcfg_11_01_04:1;
+			unsigned ramcfg_11_01_08:1;
+			unsigned ramcfg_11_01_10:1;
+			unsigned ramcfg_11_01_20:1;
+			unsigned ramcfg_11_01_40:1;
+			unsigned ramcfg_11_01_80:1;
+			unsigned ramcfg_11_02_03:2;
+			unsigned ramcfg_11_02_04:1;
+			unsigned ramcfg_11_02_08:1;
+			unsigned ramcfg_11_02_10:1;
+			unsigned ramcfg_11_02_40:1;
+			unsigned ramcfg_11_02_80:1;
+			unsigned ramcfg_11_03_0f:4;
+			unsigned ramcfg_11_03_30:2;
+			unsigned ramcfg_11_03_c0:2;
+			unsigned ramcfg_11_03_f0:4;
+			unsigned ramcfg_11_04:8;
+			unsigned ramcfg_11_06:8;
+			unsigned ramcfg_11_07_02:1;
+			unsigned ramcfg_11_07_04:1;
+			unsigned ramcfg_11_07_08:1;
+			unsigned ramcfg_11_07_10:1;
+			unsigned ramcfg_11_07_40:1;
+			unsigned ramcfg_11_07_80:1;
+			unsigned ramcfg_11_08_01:1;
+			unsigned ramcfg_11_08_02:1;
+			unsigned ramcfg_11_08_04:1;
+			unsigned ramcfg_11_08_08:1;
+			unsigned ramcfg_11_08_10:1;
+			unsigned ramcfg_11_08_20:1;
+			unsigned ramcfg_11_09:8;
+		};
+	};
 
+	unsigned timing_ver;
+	unsigned timing_hdr;
 	unsigned timing[11];
-	unsigned timing_20_2e_03:2;
-	unsigned timing_20_2e_30:2;
-	unsigned timing_20_2e_c0:2;
-	unsigned timing_20_2f_03:2;
-	unsigned timing_20_2c_003f:6;
-	unsigned timing_20_2c_1fc0:7;
-	unsigned timing_20_30_f8:5;
-	unsigned timing_20_30_07:3;
-	unsigned timing_20_31_0007:3;
-	unsigned timing_20_31_0078:4;
-	unsigned timing_20_31_0780:4;
-	unsigned timing_20_31_0800:1;
-	unsigned timing_20_31_7000:3;
-	unsigned timing_20_31_8000:1;
+	union {
+		struct {
+			unsigned timing_10_WR:8;
+			unsigned timing_10_CL:8;
+			unsigned timing_10_ODT:3;
+			unsigned timing_10_CWL:8;
+		};
+		struct {
+			unsigned timing_20_2e_03:2;
+			unsigned timing_20_2e_30:2;
+			unsigned timing_20_2e_c0:2;
+			unsigned timing_20_2f_03:2;
+			unsigned timing_20_2c_003f:6;
+			unsigned timing_20_2c_1fc0:7;
+			unsigned timing_20_30_f8:5;
+			unsigned timing_20_30_07:3;
+			unsigned timing_20_31_0007:3;
+			unsigned timing_20_31_0078:4;
+			unsigned timing_20_31_0780:4;
+			unsigned timing_20_31_0800:1;
+			unsigned timing_20_31_7000:3;
+			unsigned timing_20_31_8000:1;
+		};
+	};
 };
 
 u8 nvbios_ramcfg_count(struct nouveau_bios *);
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios/rammap.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios/rammap.h
index 5bdf8e4db40a..47e021d3e20d 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/bios/rammap.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios/rammap.h
@@ -8,9 +8,10 @@ u32 nvbios_rammapTe(struct nouveau_bios *, u8 *ver, u8 *hdr,
 
 u32 nvbios_rammapEe(struct nouveau_bios *, int idx,
 		    u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
+u32 nvbios_rammapEp(struct nouveau_bios *, int idx,
+		    u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		    struct nvbios_ramcfg *);
 u32 nvbios_rammapEm(struct nouveau_bios *, u16 mhz,
-		    u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
-u32 nvbios_rammapEp(struct nouveau_bios *, u16 mhz,
 		    u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
 		    struct nvbios_ramcfg *);
 
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios/therm.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios/therm.h
index 8dc5051df55d..295d093f3b30 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/bios/therm.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios/therm.h
@@ -23,6 +23,12 @@ struct nvbios_therm_sensor {
 	struct nvbios_therm_threshold thrs_shutdown;
 };
 
+enum nvbios_therm_fan_type {
+	NVBIOS_THERM_FAN_UNK = 0,
+	NVBIOS_THERM_FAN_TOGGLE = 1,
+	NVBIOS_THERM_FAN_PWM = 2,
+};
+
 /* no vbios have more than 6 */
 #define NOUVEAU_TEMP_FAN_TRIP_MAX 10
 struct nouveau_therm_trip_point {
@@ -38,7 +44,9 @@ enum nvbios_therm_fan_mode {
 };
 
 struct nvbios_therm_fan {
-	u16 pwm_freq;
+	enum nvbios_therm_fan_type type;
+
+	u32 pwm_freq;
 
 	u8 min_duty;
 	u8 max_duty;
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/clock.h b/drivers/gpu/drm/nouveau/core/include/subdev/clock.h
index a5ca00dd2f61..36ed035d4d42 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/clock.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/clock.h
@@ -29,6 +29,7 @@ enum nv_clk_src {
 	nv_clk_src_mdiv,
 
 	nv_clk_src_core,
+	nv_clk_src_core_intm,
 	nv_clk_src_shader,
 
 	nv_clk_src_mem,
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/fb.h b/drivers/gpu/drm/nouveau/core/include/subdev/fb.h
index 871e73914b24..8d0032f15205 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/fb.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/fb.h
@@ -111,6 +111,7 @@ extern struct nouveau_oclass *gm107_fb_oclass;
 #include <subdev/bios/ramcfg.h>
 
 struct nouveau_ram_data {
+	struct list_head head;
 	struct nvbios_ramcfg bios;
 	u32 freq;
 };
@@ -136,6 +137,7 @@ struct nouveau_ram {
 
 	int ranks;
 	int parts;
+	int part_mask;
 
 	int  (*get)(struct nouveau_fb *, u64 size, u32 align,
 		    u32 size_nc, u32 type, struct nouveau_mem **);
@@ -144,11 +146,6 @@ struct nouveau_ram {
 	int  (*calc)(struct nouveau_fb *, u32 freq);
 	int  (*prog)(struct nouveau_fb *);
 	void (*tidy)(struct nouveau_fb *);
-	struct {
-		u8  version;
-		u32 data;
-		u8  size;
-	} rammap, ramcfg, timing;
 	u32 freq;
 	u32 mr[16];
 	u32 mr1_nuts;
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/fb/regsnv04.h b/drivers/gpu/drm/nouveau/core/include/subdev/fb/regsnv04.h
new file mode 100644
index 000000000000..0f7fc0c52ab2
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/fb/regsnv04.h
@@ -0,0 +1,21 @@
+#ifndef __NOUVEAU_FB_REGS_04_H__
+#define __NOUVEAU_FB_REGS_04_H__
+
+#define NV04_PFB_BOOT_0						0x00100000
+#	define NV04_PFB_BOOT_0_RAM_AMOUNT			0x00000003
+#	define NV04_PFB_BOOT_0_RAM_AMOUNT_32MB			0x00000000
+#	define NV04_PFB_BOOT_0_RAM_AMOUNT_4MB			0x00000001
+#	define NV04_PFB_BOOT_0_RAM_AMOUNT_8MB			0x00000002
+#	define NV04_PFB_BOOT_0_RAM_AMOUNT_16MB			0x00000003
+#	define NV04_PFB_BOOT_0_RAM_WIDTH_128			0x00000004
+#	define NV04_PFB_BOOT_0_RAM_TYPE				0x00000028
+#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_8MBIT		0x00000000
+#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_16MBIT		0x00000008
+#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_16MBIT_4BANK	0x00000010
+#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_16MBIT		0x00000018
+#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_64MBIT		0x00000020
+#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_64MBITX16		0x00000028
+#	define NV04_PFB_BOOT_0_UMA_ENABLE			0x00000100
+#	define NV04_PFB_BOOT_0_UMA_SIZE				0x0000f000
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/fuse.h b/drivers/gpu/drm/nouveau/core/include/subdev/fuse.h
new file mode 100644
index 000000000000..2b1ddb2a9a7d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/fuse.h
@@ -0,0 +1,30 @@
+#ifndef __NOUVEAU_FUSE_H__
+#define __NOUVEAU_FUSE_H__
+
+#include <core/subdev.h>
+#include <core/device.h>
+
+struct nouveau_fuse {
+	struct nouveau_subdev base;
+};
+
+static inline struct nouveau_fuse *
+nouveau_fuse(void *obj)
+{
+	return (void *)nv_device(obj)->subdev[NVDEV_SUBDEV_FUSE];
+}
+
+#define nouveau_fuse_create(p, e, o, d)                                        \
+	nouveau_fuse_create_((p), (e), (o), sizeof(**d), (void **)d)
+
+int  nouveau_fuse_create_(struct nouveau_object *, struct nouveau_object *,
+			  struct nouveau_oclass *, int, void **);
+void _nouveau_fuse_dtor(struct nouveau_object *);
+int  _nouveau_fuse_init(struct nouveau_object *);
+#define _nouveau_fuse_fini _nouveau_subdev_fini
+
+extern struct nouveau_oclass g80_fuse_oclass;
+extern struct nouveau_oclass gf100_fuse_oclass;
+extern struct nouveau_oclass gm107_fuse_oclass;
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/gpio.h b/drivers/gpu/drm/nouveau/core/include/subdev/gpio.h
index b73733d21cc7..f855140dbcb7 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/gpio.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/gpio.h
@@ -40,7 +40,7 @@ nouveau_gpio(void *obj)
 
 extern struct nouveau_oclass *nv10_gpio_oclass;
 extern struct nouveau_oclass *nv50_gpio_oclass;
-extern struct nouveau_oclass *nv92_gpio_oclass;
+extern struct nouveau_oclass *nv94_gpio_oclass;
 extern struct nouveau_oclass *nvd0_gpio_oclass;
 extern struct nouveau_oclass *nve0_gpio_oclass;
 
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/pwr.h b/drivers/gpu/drm/nouveau/core/include/subdev/pwr.h
index f73feec151db..bf3d1f611333 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/pwr.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/pwr.h
@@ -47,5 +47,8 @@ void nouveau_memx_wr32(struct nouveau_memx *, u32 addr, u32 data);
 void nouveau_memx_wait(struct nouveau_memx *,
 		       u32 addr, u32 mask, u32 data, u32 nsec);
 void nouveau_memx_nsec(struct nouveau_memx *, u32 nsec);
+void nouveau_memx_wait_vblank(struct nouveau_memx *);
+void nouveau_memx_block(struct nouveau_memx *);
+void nouveau_memx_unblock(struct nouveau_memx *);
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/therm.h b/drivers/gpu/drm/nouveau/core/include/subdev/therm.h
index d4a68179e586..a437597dcafc 100644
--- a/drivers/gpu/drm/nouveau/core/include/subdev/therm.h
+++ b/drivers/gpu/drm/nouveau/core/include/subdev/therm.h
@@ -78,5 +78,6 @@ extern struct nouveau_oclass nv50_therm_oclass;
 extern struct nouveau_oclass nv84_therm_oclass;
 extern struct nouveau_oclass nva3_therm_oclass;
 extern struct nouveau_oclass nvd0_therm_oclass;
+extern struct nouveau_oclass gm107_therm_oclass;
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/core/subdev/bar/base.c b/drivers/gpu/drm/nouveau/core/subdev/bar/base.c
index 8bcbdf39cfb2..b1adc69efd88 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bar/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bar/base.c
@@ -38,10 +38,12 @@ struct nouveau_barobj {
 static int
 nouveau_barobj_ctor(struct nouveau_object *parent,
 		    struct nouveau_object *engine,
-		    struct nouveau_oclass *oclass, void *mem, u32 size,
+		    struct nouveau_oclass *oclass, void *data, u32 size,
 		    struct nouveau_object **pobject)
 {
+	struct nouveau_device *device = nv_device(parent);
 	struct nouveau_bar *bar = (void *)engine;
+	struct nouveau_mem *mem = data;
 	struct nouveau_barobj *barobj;
 	int ret;
 
@@ -54,7 +56,13 @@ nouveau_barobj_ctor(struct nouveau_object *parent,
 	if (ret)
 		return ret;
 
-	barobj->iomem = bar->iomem + (u32)barobj->vma.offset;
+	barobj->iomem = ioremap(nv_device_resource_start(device, 3) +
+				(u32)barobj->vma.offset, mem->size << 12);
+	if (!barobj->iomem) {
+		nv_warn(bar, "PRAMIN ioremap failed\n");
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
@@ -63,8 +71,11 @@ nouveau_barobj_dtor(struct nouveau_object *object)
 {
 	struct nouveau_bar *bar = (void *)object->engine;
 	struct nouveau_barobj *barobj = (void *)object;
-	if (barobj->vma.node)
+	if (barobj->vma.node) {
+		if (barobj->iomem)
+			iounmap(barobj->iomem);
 		bar->unmap(bar, &barobj->vma);
+	}
 	nouveau_object_destroy(&barobj->base);
 }
 
@@ -99,12 +110,11 @@ nouveau_bar_alloc(struct nouveau_bar *bar, struct nouveau_object *parent,
 		  struct nouveau_mem *mem, struct nouveau_object **pobject)
 {
 	struct nouveau_object *engine = nv_object(bar);
-	int ret = -ENOMEM;
-	if (bar->iomem) {
-		ret = nouveau_object_ctor(parent, engine,
-					  &nouveau_barobj_oclass,
-					  mem, 0, pobject);
-	}
+	struct nouveau_object *gpuobj;
+	int ret = nouveau_object_ctor(parent, engine, &nouveau_barobj_oclass,
+				      mem, 0, &gpuobj);
+	if (ret == 0)
+		*pobject = gpuobj;
 	return ret;
 }
 
@@ -113,7 +123,6 @@ nouveau_bar_create_(struct nouveau_object *parent,
 		    struct nouveau_object *engine,
 		    struct nouveau_oclass *oclass, int length, void **pobject)
 {
-	struct nouveau_device *device = nv_device(parent);
 	struct nouveau_bar *bar;
 	int ret;
 
@@ -123,21 +132,12 @@ nouveau_bar_create_(struct nouveau_object *parent,
 	if (ret)
 		return ret;
 
-	if (nv_device_resource_len(device, 3) != 0) {
-		bar->iomem = ioremap(nv_device_resource_start(device, 3),
-				     nv_device_resource_len(device, 3));
-		if (!bar->iomem)
-			nv_warn(bar, "PRAMIN ioremap failed\n");
-	}
-
 	return 0;
 }
 
 void
 nouveau_bar_destroy(struct nouveau_bar *bar)
 {
-	if (bar->iomem)
-		iounmap(bar->iomem);
 	nouveau_subdev_destroy(&bar->base);
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/M0205.c b/drivers/gpu/drm/nouveau/core/subdev/bios/M0205.c
new file mode 100644
index 000000000000..ac9617c5fc2a
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/M0205.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2013 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <subdev/bios.h>
+#include <subdev/bios/bit.h>
+#include <subdev/bios/M0205.h>
+
+u32
+nvbios_M0205Te(struct nouveau_bios *bios,
+	       u8 *ver, u8 *hdr, u8 *cnt, u8 *len, u8 *snr, u8 *ssz)
+{
+	struct bit_entry bit_M;
+	u32 data = 0x00000000;
+
+	if (!bit_entry(bios, 'M', &bit_M)) {
+		if (bit_M.version == 2 && bit_M.length > 0x08)
+			data = nv_ro32(bios, bit_M.offset + 0x05);
+		if (data) {
+			*ver = nv_ro08(bios, data + 0x00);
+			switch (*ver) {
+			case 0x10:
+				*hdr = nv_ro08(bios, data + 0x01);
+				*len = nv_ro08(bios, data + 0x02);
+				*ssz = nv_ro08(bios, data + 0x03);
+				*snr = nv_ro08(bios, data + 0x04);
+				*cnt = nv_ro08(bios, data + 0x05);
+				return data;
+			default:
+				break;
+			}
+		}
+	}
+
+	return 0x00000000;
+}
+
+u32
+nvbios_M0205Tp(struct nouveau_bios *bios,
+	       u8 *ver, u8 *hdr, u8 *cnt, u8 *len, u8 *snr, u8 *ssz,
+	       struct nvbios_M0205T *info)
+{
+	u32 data = nvbios_M0205Te(bios, ver, hdr, cnt, len, snr, ssz);
+	memset(info, 0x00, sizeof(*info));
+	switch (!!data * *ver) {
+	case 0x10:
+		info->freq = nv_ro16(bios, data + 0x06);
+		break;
+	default:
+		break;
+	}
+	return data;
+}
+
+u32
+nvbios_M0205Ee(struct nouveau_bios *bios, int idx,
+	       u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
+{
+	u8  snr, ssz;
+	u32 data = nvbios_M0205Te(bios, ver, hdr, cnt, len, &snr, &ssz);
+	if (data && idx < *cnt) {
+		data = data + *hdr + idx * (*len + (snr * ssz));
+		*hdr = *len;
+		*cnt = snr;
+		*len = ssz;
+		return data;
+	}
+	return 0x00000000;
+}
+
+u32
+nvbios_M0205Ep(struct nouveau_bios *bios, int idx,
+	       u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+	       struct nvbios_M0205E *info)
+{
+	u32 data = nvbios_M0205Ee(bios, idx, ver, hdr, cnt, len);
+	memset(info, 0x00, sizeof(*info));
+	switch (!!data * *ver) {
+	case 0x10:
+		info->type = nv_ro08(bios, data + 0x00) & 0x0f;
+		return data;
+	default:
+		break;
+	}
+	return 0x00000000;
+}
+
+u32
+nvbios_M0205Se(struct nouveau_bios *bios, int ent, int idx, u8 *ver, u8 *hdr)
+{
+
+	u8  cnt, len;
+	u32 data = nvbios_M0205Ee(bios, ent, ver, hdr, &cnt, &len);
+	if (data && idx < cnt) {
+		data = data + *hdr + idx * len;
+		*hdr = len;
+		return data;
+	}
+	return 0x00000000;
+}
+
+u32
+nvbios_M0205Sp(struct nouveau_bios *bios, int ent, int idx, u8 *ver, u8 *hdr,
+	       struct nvbios_M0205S *info)
+{
+	u32 data = nvbios_M0205Se(bios, ent, idx, ver, hdr);
+	memset(info, 0x00, sizeof(*info));
+	switch (!!data * *ver) {
+	case 0x10:
+		info->data = nv_ro08(bios, data + 0x00);
+		return data;
+	default:
+		break;
+	}
+	return 0x00000000;
+}
diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/M0209.c b/drivers/gpu/drm/nouveau/core/subdev/bios/M0209.c
new file mode 100644
index 000000000000..b142a510e89f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/M0209.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright 2013 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <subdev/bios.h>
+#include <subdev/bios/bit.h>
+#include <subdev/bios/M0209.h>
+
+u32
+nvbios_M0209Te(struct nouveau_bios *bios,
+	       u8 *ver, u8 *hdr, u8 *cnt, u8 *len, u8 *snr, u8 *ssz)
+{
+	struct bit_entry bit_M;
+	u32 data = 0x00000000;
+
+	if (!bit_entry(bios, 'M', &bit_M)) {
+		if (bit_M.version == 2 && bit_M.length > 0x0c)
+			data = nv_ro32(bios, bit_M.offset + 0x09);
+		if (data) {
+			*ver = nv_ro08(bios, data + 0x00);
+			switch (*ver) {
+			case 0x10:
+				*hdr = nv_ro08(bios, data + 0x01);
+				*len = nv_ro08(bios, data + 0x02);
+				*ssz = nv_ro08(bios, data + 0x03);
+				*snr = 1;
+				*cnt = nv_ro08(bios, data + 0x04);
+				return data;
+			default:
+				break;
+			}
+		}
+	}
+
+	return 0x00000000;
+}
+
+u32
+nvbios_M0209Ee(struct nouveau_bios *bios, int idx,
+	       u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
+{
+	u8  snr, ssz;
+	u32 data = nvbios_M0209Te(bios, ver, hdr, cnt, len, &snr, &ssz);
+	if (data && idx < *cnt) {
+		data = data + *hdr + idx * (*len + (snr * ssz));
+		*hdr = *len;
+		*cnt = snr;
+		*len = ssz;
+		return data;
+	}
+	return 0x00000000;
+}
+
+u32
+nvbios_M0209Ep(struct nouveau_bios *bios, int idx,
+	       u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+	       struct nvbios_M0209E *info)
+{
+	u32 data = nvbios_M0209Ee(bios, idx, ver, hdr, cnt, len);
+	memset(info, 0x00, sizeof(*info));
+	switch (!!data * *ver) {
+	case 0x10:
+		info->v00_40 = (nv_ro08(bios, data + 0x00) & 0x40) >> 6;
+		info->bits   =  nv_ro08(bios, data + 0x00) & 0x3f;
+		info->modulo =  nv_ro08(bios, data + 0x01);
+		info->v02_40 = (nv_ro08(bios, data + 0x02) & 0x40) >> 6;
+		info->v02_07 =  nv_ro08(bios, data + 0x02) & 0x07;
+		info->v03    =  nv_ro08(bios, data + 0x03);
+		return data;
+	default:
+		break;
+	}
+	return 0x00000000;
+}
+
+u32
+nvbios_M0209Se(struct nouveau_bios *bios, int ent, int idx, u8 *ver, u8 *hdr)
+{
+
+	u8  cnt, len;
+	u32 data = nvbios_M0209Ee(bios, ent, ver, hdr, &cnt, &len);
+	if (data && idx < cnt) {
+		data = data + *hdr + idx * len;
+		*hdr = len;
+		return data;
+	}
+	return 0x00000000;
+}
+
+u32
+nvbios_M0209Sp(struct nouveau_bios *bios, int ent, int idx, u8 *ver, u8 *hdr,
+	       struct nvbios_M0209S *info)
+{
+	struct nvbios_M0209E M0209E;
+	u8  cnt, len;
+	u32 data = nvbios_M0209Ep(bios, ent, ver, hdr, &cnt, &len, &M0209E);
+	if (data) {
+		u32 i, data = nvbios_M0209Se(bios, ent, idx, ver, hdr);
+		memset(info, 0x00, sizeof(*info));
+		switch (!!data * *ver) {
+		case 0x10:
+			for (i = 0; i < ARRAY_SIZE(info->data); i++) {
+				u32 bits = (i % M0209E.modulo) * M0209E.bits;
+				u32 mask = (1ULL << M0209E.bits) - 1;
+				u16  off = bits / 8;
+				u8   mod = bits % 8;
+				info->data[i] = nv_ro32(bios, data + off);
+				info->data[i] = info->data[i] >> mod;
+				info->data[i] = info->data[i] & mask;
+			}
+			return data;
+		default:
+			break;
+		}
+	}
+	return 0x00000000;
+}
diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c b/drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c
index 88606bfaf847..bd8d348385b3 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/dcb.c
@@ -124,6 +124,7 @@ dcb_outp_parse(struct nouveau_bios *bios, u8 idx, u8 *ver, u8 *len,
 	       struct dcb_output *outp)
 {
 	u16 dcb = dcb_outp(bios, idx, ver, len);
+	memset(outp, 0x00, sizeof(*outp));
 	if (dcb) {
 		if (*ver >= 0x20) {
 			u32 conn = nv_ro32(bios, dcb + 0x00);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/fan.c b/drivers/gpu/drm/nouveau/core/subdev/bios/fan.c
new file mode 100644
index 000000000000..e419892240f5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/fan.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2014 Martin Peres
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Martin Peres
+ */
+
+#include <subdev/bios.h>
+#include <subdev/bios/bit.h>
+#include <subdev/bios/fan.h>
+
+u16
+nvbios_fan_table(struct nouveau_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
+{
+	struct bit_entry bit_P;
+	u16 fan = 0x0000;
+
+	if (!bit_entry(bios, 'P', &bit_P)) {
+		if (bit_P.version == 2 && bit_P.length >= 0x5a)
+			fan = nv_ro16(bios, bit_P.offset + 0x58);
+
+		if (fan) {
+			*ver = nv_ro08(bios, fan + 0);
+			switch (*ver) {
+			case 0x10:
+				*hdr = nv_ro08(bios, fan + 1);
+				*len = nv_ro08(bios, fan + 2);
+				*cnt = nv_ro08(bios, fan + 3);
+				return fan;
+			default:
+				break;
+			}
+		}
+	}
+
+	return 0x0000;
+}
+
+u16
+nvbios_fan_entry(struct nouveau_bios *bios, int idx, u8 *ver, u8 *hdr,
+		 u8 *cnt, u8 *len)
+{
+	u16 data = nvbios_fan_table(bios, ver, hdr, cnt, len);
+	if (data && idx < *cnt)
+		return data + *hdr + (idx * (*len));
+	return 0x0000;
+}
+
+u16
+nvbios_fan_parse(struct nouveau_bios *bios, struct nvbios_therm_fan *fan)
+{
+	u8 ver, hdr, cnt, len;
+
+	u16 data = nvbios_fan_entry(bios, 0, &ver, &hdr, &cnt, &len);
+	if (data) {
+		u8 type = nv_ro08(bios, data + 0x00);
+		switch (type) {
+		case 0:
+			fan->type = NVBIOS_THERM_FAN_TOGGLE;
+			break;
+		case 1:
+		case 2:
+			/* TODO: Understand the difference between the two! */
+			fan->type = NVBIOS_THERM_FAN_PWM;
+			break;
+		default:
+			fan->type = NVBIOS_THERM_FAN_UNK;
+		}
+
+		fan->min_duty = nv_ro08(bios, data + 0x02);
+		fan->max_duty = nv_ro08(bios, data + 0x03);
+
+		fan->pwm_freq = nv_ro32(bios, data + 0x0b) & 0xffffff;
+	}
+	return data;
+}
diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/rammap.c b/drivers/gpu/drm/nouveau/core/subdev/bios/rammap.c
index 1811b2cb0472..585e69331ccc 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/rammap.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/rammap.c
@@ -75,31 +75,39 @@ nvbios_rammapEe(struct nouveau_bios *bios, int idx,
 }
 
 u32
-nvbios_rammapEm(struct nouveau_bios *bios, u16 khz,
-		u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
-{
-	int idx = 0;
-	u32 data;
-	while ((data = nvbios_rammapEe(bios, idx++, ver, hdr, cnt, len))) {
-		if (khz >= nv_ro16(bios, data + 0x00) &&
-		    khz <= nv_ro16(bios, data + 0x02))
-			break;
-	}
-	return data;
-}
-
-u32
-nvbios_rammapEp(struct nouveau_bios *bios, u16 khz,
+nvbios_rammapEp(struct nouveau_bios *bios, int idx,
 		u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
 		struct nvbios_ramcfg *p)
 {
-	u32 data = nvbios_rammapEm(bios, khz, ver, hdr, cnt, len);
+	u32 data = nvbios_rammapEe(bios, idx, ver, hdr, cnt, len), temp;
 	memset(p, 0x00, sizeof(*p));
+	p->rammap_ver = *ver;
+	p->rammap_hdr = *hdr;
 	switch (!!data * *ver) {
+	case 0x10:
+		p->rammap_min      =  nv_ro16(bios, data + 0x00);
+		p->rammap_max      =  nv_ro16(bios, data + 0x02);
+		p->rammap_10_04_02 = (nv_ro08(bios, data + 0x04) & 0x02) >> 1;
+		p->rammap_10_04_08 = (nv_ro08(bios, data + 0x04) & 0x08) >> 3;
+		break;
 	case 0x11:
+		p->rammap_min      =  nv_ro16(bios, data + 0x00);
+		p->rammap_max      =  nv_ro16(bios, data + 0x02);
 		p->rammap_11_08_01 = (nv_ro08(bios, data + 0x08) & 0x01) >> 0;
 		p->rammap_11_08_0c = (nv_ro08(bios, data + 0x08) & 0x0c) >> 2;
 		p->rammap_11_08_10 = (nv_ro08(bios, data + 0x08) & 0x10) >> 4;
+		temp = nv_ro32(bios, data + 0x09);
+		p->rammap_11_09_01ff = (temp & 0x000001ff) >> 0;
+		p->rammap_11_0a_03fe = (temp & 0x0003fe00) >> 9;
+		p->rammap_11_0a_0400 = (temp & 0x00040000) >> 18;
+		p->rammap_11_0a_0800 = (temp & 0x00080000) >> 19;
+		p->rammap_11_0b_01f0 = (temp & 0x01f00000) >> 20;
+		p->rammap_11_0b_0200 = (temp & 0x02000000) >> 25;
+		p->rammap_11_0b_0400 = (temp & 0x04000000) >> 26;
+		p->rammap_11_0b_0800 = (temp & 0x08000000) >> 27;
+		p->rammap_11_0d    =  nv_ro08(bios, data + 0x0d);
+		p->rammap_11_0e    =  nv_ro08(bios, data + 0x0e);
+		p->rammap_11_0f    =  nv_ro08(bios, data + 0x0f);
 		p->rammap_11_11_0c = (nv_ro08(bios, data + 0x11) & 0x0c) >> 2;
 		break;
 	default:
@@ -110,6 +118,20 @@ nvbios_rammapEp(struct nouveau_bios *bios, u16 khz,
 }
 
 u32
+nvbios_rammapEm(struct nouveau_bios *bios, u16 mhz,
+		u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+		struct nvbios_ramcfg *info)
+{
+	int idx = 0;
+	u32 data;
+	while ((data = nvbios_rammapEp(bios, idx++, ver, hdr, cnt, len, info))) {
+		if (mhz >= info->rammap_min && mhz <= info->rammap_max)
+			break;
+	}
+	return data;
+}
+
+u32
 nvbios_rammapSe(struct nouveau_bios *bios, u32 data,
 		u8 ever, u8 ehdr, u8 ecnt, u8 elen, int idx,
 		u8 *ver, u8 *hdr)
@@ -129,8 +151,28 @@ nvbios_rammapSp(struct nouveau_bios *bios, u32 data,
 		u8 *ver, u8 *hdr, struct nvbios_ramcfg *p)
 {
 	data = nvbios_rammapSe(bios, data, ever, ehdr, ecnt, elen, idx, ver, hdr);
+	p->ramcfg_ver = *ver;
+	p->ramcfg_hdr = *hdr;
 	switch (!!data * *ver) {
+	case 0x10:
+		p->ramcfg_timing   =  nv_ro08(bios, data + 0x01);
+		p->ramcfg_10_02_01 = (nv_ro08(bios, data + 0x02) & 0x01) >> 0;
+		p->ramcfg_10_02_02 = (nv_ro08(bios, data + 0x02) & 0x02) >> 1;
+		p->ramcfg_10_02_04 = (nv_ro08(bios, data + 0x02) & 0x04) >> 2;
+		p->ramcfg_10_02_08 = (nv_ro08(bios, data + 0x02) & 0x08) >> 3;
+		p->ramcfg_10_02_10 = (nv_ro08(bios, data + 0x02) & 0x10) >> 4;
+		p->ramcfg_10_02_20 = (nv_ro08(bios, data + 0x02) & 0x20) >> 5;
+		p->ramcfg_10_02_40 = (nv_ro08(bios, data + 0x02) & 0x40) >> 6;
+		p->ramcfg_10_03_0f = (nv_ro08(bios, data + 0x03) & 0x0f) >> 0;
+		p->ramcfg_10_05    = (nv_ro08(bios, data + 0x05) & 0xff) >> 0;
+		p->ramcfg_10_06    = (nv_ro08(bios, data + 0x06) & 0xff) >> 0;
+		p->ramcfg_10_07    = (nv_ro08(bios, data + 0x07) & 0xff) >> 0;
+		p->ramcfg_10_08    = (nv_ro08(bios, data + 0x08) & 0xff) >> 0;
+		p->ramcfg_10_09_0f = (nv_ro08(bios, data + 0x09) & 0x0f) >> 0;
+		p->ramcfg_10_09_f0 = (nv_ro08(bios, data + 0x09) & 0xf0) >> 4;
+		break;
 	case 0x11:
+		p->ramcfg_timing   =  nv_ro08(bios, data + 0x00);
 		p->ramcfg_11_01_01 = (nv_ro08(bios, data + 0x01) & 0x01) >> 0;
 		p->ramcfg_11_01_02 = (nv_ro08(bios, data + 0x01) & 0x02) >> 1;
 		p->ramcfg_11_01_04 = (nv_ro08(bios, data + 0x01) & 0x04) >> 2;
diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/timing.c b/drivers/gpu/drm/nouveau/core/subdev/bios/timing.c
index 350d44ab2ba2..46d955eb51eb 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/bios/timing.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/bios/timing.c
@@ -89,7 +89,15 @@ nvbios_timingEp(struct nouveau_bios *bios, int idx,
 		struct nvbios_ramcfg *p)
 {
 	u16 data = nvbios_timingEe(bios, idx, ver, hdr, cnt, len), temp;
+	p->timing_ver = *ver;
+	p->timing_hdr = *hdr;
 	switch (!!data * *ver) {
+	case 0x10:
+		p->timing_10_WR = nv_ro08(bios, data + 0x00);
+		p->timing_10_CL = nv_ro08(bios, data + 0x02);
+		p->timing_10_ODT = nv_ro08(bios, data + 0x0e) & 0x07;
+		p->timing_10_CWL = nv_ro08(bios, data + 0x13);
+		break;
 	case 0x20:
 		p->timing[0] = nv_ro32(bios, data + 0x00);
 		p->timing[1] = nv_ro32(bios, data + 0x04);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/base.c b/drivers/gpu/drm/nouveau/core/subdev/clock/base.c
index a276a711294a..e51b72d47129 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/clock/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/clock/base.c
@@ -573,7 +573,7 @@ nouveau_clock_create_(struct nouveau_object *parent,
 
 	clk->allow_reclock = allow_reclock;
 
-	ret = nvkm_notify_init(&device->event, nouveau_clock_pwrsrc, true,
+	ret = nvkm_notify_init(NULL, &device->event, nouveau_clock_pwrsrc, true,
 			       NULL, 0, 0, &clk->pwrsrc_ntfy);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c
index 087012b18956..094551d8ad9b 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.c
@@ -20,8 +20,10 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  * Authors: Ben Skeggs
+ *          Roy Spliet
  */
 
+#include <engine/fifo.h>
 #include <subdev/bios.h>
 #include <subdev/bios/pll.h>
 #include <subdev/timer.h>
@@ -42,9 +44,17 @@ static u32
 read_vco(struct nva3_clock_priv *priv, int clk)
 {
 	u32 sctl = nv_rd32(priv, 0x4120 + (clk * 4));
-	if ((sctl & 0x00000030) != 0x00000030)
+
+	switch (sctl & 0x00000030) {
+	case 0x00000000:
+		return nv_device(priv)->crystal;
+	case 0x00000020:
 		return read_pll(priv, 0x41, 0x00e820);
-	return read_pll(priv, 0x42, 0x00e8a0);
+	case 0x00000030:
+		return read_pll(priv, 0x42, 0x00e8a0);
+	default:
+		return 0;
+	}
 }
 
 static u32
@@ -66,14 +76,25 @@ read_clk(struct nva3_clock_priv *priv, int clk, bool ignore_en)
 	if (!ignore_en && !(sctl & 0x00000100))
 		return 0;
 
+	/* out_alt */
+	if (sctl & 0x00000400)
+		return 108000;
+
+	/* vco_out */
 	switch (sctl & 0x00003000) {
 	case 0x00000000:
-		return nv_device(priv)->crystal;
+		if (!(sctl & 0x00000200))
+			return nv_device(priv)->crystal;
+		return 0;
 	case 0x00002000:
 		if (sctl & 0x00000040)
 			return 108000;
 		return 100000;
 	case 0x00003000:
+		/* vco_enable */
+		if (!(sctl & 0x00000001))
+			return 0;
+
 		sclk = read_vco(priv, clk);
 		sdiv = ((sctl & 0x003f0000) >> 16) + 2;
 		return (sclk * 2) / sdiv;
@@ -95,7 +116,9 @@ read_pll(struct nva3_clock_priv *priv, int clk, u32 pll)
 			N = (coef & 0x0000ff00) >> 8;
 			P = (coef & 0x003f0000) >> 16;
 
-			/* no post-divider on these.. */
+			/* no post-divider on these..
+			 * XXX: it looks more like two post-"dividers" that
+			 * cross each other out in the default RPLL config */
 			if ((pll & 0x00ff00) == 0x00e800)
 				P = 1;
 
@@ -114,13 +137,13 @@ static int
 nva3_clock_read(struct nouveau_clock *clk, enum nv_clk_src src)
 {
 	struct nva3_clock_priv *priv = (void *)clk;
+	u32 hsrc;
 
 	switch (src) {
 	case nv_clk_src_crystal:
 		return nv_device(priv)->crystal;
-	case nv_clk_src_href:
-		return 100000;
 	case nv_clk_src_core:
+	case nv_clk_src_core_intm:
 		return read_pll(priv, 0x00, 0x4200);
 	case nv_clk_src_shader:
 		return read_pll(priv, 0x01, 0x4220);
@@ -132,24 +155,33 @@ nva3_clock_read(struct nouveau_clock *clk, enum nv_clk_src src)
 		return read_clk(priv, 0x21, false);
 	case nv_clk_src_daemon:
 		return read_clk(priv, 0x25, false);
+	case nv_clk_src_host:
+		hsrc = (nv_rd32(priv, 0xc040) & 0x30000000) >> 28;
+		switch (hsrc) {
+		case 0:
+			return read_clk(priv, 0x1d, false);
+		case 2:
+		case 3:
+			return 277000;
+		default:
+			nv_error(clk, "unknown HOST clock source %d\n", hsrc);
+			return -EINVAL;
+		}
 	default:
 		nv_error(clk, "invalid clock source %d\n", src);
 		return -EINVAL;
 	}
+
+	return 0;
 }
 
 int
-nva3_clock_info(struct nouveau_clock *clock, int clk, u32 pll, u32 khz,
+nva3_clk_info(struct nouveau_clock *clock, int clk, u32 khz,
 		struct nva3_clock_info *info)
 {
-	struct nouveau_bios *bios = nouveau_bios(clock);
 	struct nva3_clock_priv *priv = (void *)clock;
-	struct nvbios_pll limits;
-	u32 oclk, sclk, sdiv;
-	int P, N, M, diff;
-	int ret;
+	u32 oclk, sclk, sdiv, diff;
 
-	info->pll = 0;
 	info->clk = 0;
 
 	switch (khz) {
@@ -164,43 +196,69 @@ nva3_clock_info(struct nouveau_clock *clock, int clk, u32 pll, u32 khz,
 		return khz;
 	default:
 		sclk = read_vco(priv, clk);
-		sdiv = min((sclk * 2) / (khz - 2999), (u32)65);
-		/* if the clock has a PLL attached, and we can get a within
-		 * [-2, 3) MHz of a divider, we'll disable the PLL and use
-		 * the divider instead.
-		 *
-		 * divider can go as low as 2, limited here because NVIDIA
+		sdiv = min((sclk * 2) / khz, (u32)65);
+		oclk = (sclk * 2) / sdiv;
+		diff = ((khz + 3000) - oclk);
+
+		/* When imprecise, play it safe and aim for a clock lower than
+		 * desired rather than higher */
+		if (diff < 0) {
+			sdiv++;
+			oclk = (sclk * 2) / sdiv;
+		}
+
+		/* divider can go as low as 2, limited here because NVIDIA
 		 * and the VBIOS on my NVA8 seem to prefer using the PLL
 		 * for 810MHz - is there a good reason?
-		 */
+		 * XXX: PLLs with refclk 810MHz?  */
 		if (sdiv > 4) {
-			oclk = (sclk * 2) / sdiv;
-			diff = khz - oclk;
-			if (!pll || (diff >= -2000 && diff < 3000)) {
-				info->clk = (((sdiv - 2) << 16) | 0x00003100);
-				return oclk;
-			}
+			info->clk = (((sdiv - 2) << 16) | 0x00003100);
+			return oclk;
 		}
 
-		if (!pll)
-			return -ERANGE;
 		break;
 	}
 
+	return -ERANGE;
+}
+
+int
+nva3_pll_info(struct nouveau_clock *clock, int clk, u32 pll, u32 khz,
+		struct nva3_clock_info *info)
+{
+	struct nouveau_bios *bios = nouveau_bios(clock);
+	struct nva3_clock_priv *priv = (void *)clock;
+	struct nvbios_pll limits;
+	int P, N, M, diff;
+	int ret;
+
+	info->pll = 0;
+
+	/* If we can get a within [-2, 3) MHz of a divider, we'll disable the
+	 * PLL and use the divider instead. */
+	ret = nva3_clk_info(clock, clk, khz, info);
+	diff = khz - ret;
+	if (!pll || (diff >= -2000 && diff < 3000)) {
+		goto out;
+	}
+
+	/* Try with PLL */
 	ret = nvbios_pll_parse(bios, pll, &limits);
 	if (ret)
 		return ret;
 
-	limits.refclk = read_clk(priv, clk - 0x10, true);
-	if (!limits.refclk)
+	ret = nva3_clk_info(clock, clk - 0x10, limits.refclk, info);
+	if (ret != limits.refclk)
 		return -EINVAL;
 
 	ret = nva3_pll_calc(nv_subdev(priv), &limits, khz, &N, NULL, &M, &P);
 	if (ret >= 0) {
-		info->clk = nv_rd32(priv, 0x4120 + (clk * 4));
 		info->pll = (P << 16) | (N << 8) | M;
 	}
 
+out:
+	info->fb_delay = max(((khz + 7566) / 15133), (u32) 18);
+
 	return ret ? ret : -ERANGE;
 }
 
@@ -208,13 +266,76 @@ static int
 calc_clk(struct nva3_clock_priv *priv, struct nouveau_cstate *cstate,
 	 int clk, u32 pll, int idx)
 {
-	int ret = nva3_clock_info(&priv->base, clk, pll, cstate->domain[idx],
+	int ret = nva3_pll_info(&priv->base, clk, pll, cstate->domain[idx],
 				  &priv->eng[idx]);
 	if (ret >= 0)
 		return 0;
 	return ret;
 }
 
+static int
+calc_host(struct nva3_clock_priv *priv, struct nouveau_cstate *cstate)
+{
+	int ret = 0;
+	u32 kHz = cstate->domain[nv_clk_src_host];
+	struct nva3_clock_info *info = &priv->eng[nv_clk_src_host];
+
+	if (kHz == 277000) {
+		info->clk = 0;
+		info->host_out = NVA3_HOST_277;
+		return 0;
+	}
+
+	info->host_out = NVA3_HOST_CLK;
+
+	ret = nva3_clk_info(&priv->base, 0x1d, kHz, info);
+	if (ret >= 0)
+		return 0;
+	return ret;
+}
+
+int
+nva3_clock_pre(struct nouveau_clock *clk, unsigned long *flags)
+{
+	struct nouveau_fifo *pfifo = nouveau_fifo(clk);
+
+	/* halt and idle execution engines */
+	nv_mask(clk, 0x020060, 0x00070000, 0x00000000);
+	nv_mask(clk, 0x002504, 0x00000001, 0x00000001);
+	/* Wait until the interrupt handler is finished */
+	if (!nv_wait(clk, 0x000100, 0xffffffff, 0x00000000))
+		return -EBUSY;
+
+	if (pfifo)
+		pfifo->pause(pfifo, flags);
+
+	if (!nv_wait(clk, 0x002504, 0x00000010, 0x00000010))
+		return -EIO;
+	if (!nv_wait(clk, 0x00251c, 0x0000003f, 0x0000003f))
+		return -EIO;
+
+	return 0;
+}
+
+void
+nva3_clock_post(struct nouveau_clock *clk, unsigned long *flags)
+{
+	struct nouveau_fifo *pfifo = nouveau_fifo(clk);
+
+	if (pfifo && flags)
+		pfifo->start(pfifo, flags);
+
+	nv_mask(clk, 0x002504, 0x00000001, 0x00000000);
+	nv_mask(clk, 0x020060, 0x00070000, 0x00040000);
+}
+
+static void
+disable_clk_src(struct nva3_clock_priv *priv, u32 src)
+{
+	nv_mask(priv, src, 0x00000100, 0x00000000);
+	nv_mask(priv, src, 0x00000001, 0x00000000);
+}
+
 static void
 prog_pll(struct nva3_clock_priv *priv, int clk, u32 pll, int idx)
 {
@@ -223,24 +344,35 @@ prog_pll(struct nva3_clock_priv *priv, int clk, u32 pll, int idx)
 	const u32 src1 = 0x004160 + (clk * 4);
 	const u32 ctrl = pll + 0;
 	const u32 coef = pll + 4;
+	u32 bypass;
 
 	if (info->pll) {
-		nv_mask(priv, src0, 0x00000101, 0x00000101);
+		/* Always start from a non-PLL clock */
+		bypass = nv_rd32(priv, ctrl)  & 0x00000008;
+		if (!bypass) {
+			nv_mask(priv, src1, 0x00000101, 0x00000101);
+			nv_mask(priv, ctrl, 0x00000008, 0x00000008);
+			udelay(20);
+		}
+
+		nv_mask(priv, src0, 0x003f3141, 0x00000101 | info->clk);
 		nv_wr32(priv, coef, info->pll);
 		nv_mask(priv, ctrl, 0x00000015, 0x00000015);
 		nv_mask(priv, ctrl, 0x00000010, 0x00000000);
-		nv_wait(priv, ctrl, 0x00020000, 0x00020000);
+		if (!nv_wait(priv, ctrl, 0x00020000, 0x00020000)) {
+			nv_mask(priv, ctrl, 0x00000010, 0x00000010);
+			nv_mask(priv, src0, 0x00000101, 0x00000000);
+			return;
+		}
 		nv_mask(priv, ctrl, 0x00000010, 0x00000010);
 		nv_mask(priv, ctrl, 0x00000008, 0x00000000);
-		nv_mask(priv, src1, 0x00000100, 0x00000000);
-		nv_mask(priv, src1, 0x00000001, 0x00000000);
+		disable_clk_src(priv, src1);
 	} else {
 		nv_mask(priv, src1, 0x003f3141, 0x00000101 | info->clk);
 		nv_mask(priv, ctrl, 0x00000018, 0x00000018);
 		udelay(20);
 		nv_mask(priv, ctrl, 0x00000001, 0x00000000);
-		nv_mask(priv, src0, 0x00000100, 0x00000000);
-		nv_mask(priv, src0, 0x00000001, 0x00000000);
+		disable_clk_src(priv, src0);
 	}
 }
 
@@ -251,18 +383,72 @@ prog_clk(struct nva3_clock_priv *priv, int clk, int idx)
 	nv_mask(priv, 0x004120 + (clk * 4), 0x003f3141, 0x00000101 | info->clk);
 }
 
+static void
+prog_host(struct nva3_clock_priv *priv)
+{
+	struct nva3_clock_info *info = &priv->eng[nv_clk_src_host];
+	u32 hsrc = (nv_rd32(priv, 0xc040));
+
+	switch (info->host_out) {
+	case NVA3_HOST_277:
+		if ((hsrc & 0x30000000) == 0) {
+			nv_wr32(priv, 0xc040, hsrc | 0x20000000);
+			disable_clk_src(priv, 0x4194);
+		}
+		break;
+	case NVA3_HOST_CLK:
+		prog_clk(priv, 0x1d, nv_clk_src_host);
+		if ((hsrc & 0x30000000) >= 0x20000000) {
+			nv_wr32(priv, 0xc040, hsrc & ~0x30000000);
+		}
+		break;
+	default:
+		break;
+	}
+
+	/* This seems to be a clock gating factor on idle, always set to 64 */
+	nv_wr32(priv, 0xc044, 0x3e);
+}
+
+static void
+prog_core(struct nva3_clock_priv *priv, int idx)
+{
+	struct nva3_clock_info *info = &priv->eng[idx];
+	u32 fb_delay = nv_rd32(priv, 0x10002c);
+
+	if (fb_delay < info->fb_delay)
+		nv_wr32(priv, 0x10002c, info->fb_delay);
+
+	prog_pll(priv, 0x00, 0x004200, idx);
+
+	if (fb_delay > info->fb_delay)
+		nv_wr32(priv, 0x10002c, info->fb_delay);
+}
+
 static int
 nva3_clock_calc(struct nouveau_clock *clk, struct nouveau_cstate *cstate)
 {
 	struct nva3_clock_priv *priv = (void *)clk;
+	struct nva3_clock_info *core = &priv->eng[nv_clk_src_core];
 	int ret;
 
 	if ((ret = calc_clk(priv, cstate, 0x10, 0x4200, nv_clk_src_core)) ||
 	    (ret = calc_clk(priv, cstate, 0x11, 0x4220, nv_clk_src_shader)) ||
 	    (ret = calc_clk(priv, cstate, 0x20, 0x0000, nv_clk_src_disp)) ||
-	    (ret = calc_clk(priv, cstate, 0x21, 0x0000, nv_clk_src_vdec)))
+	    (ret = calc_clk(priv, cstate, 0x21, 0x0000, nv_clk_src_vdec)) ||
+	    (ret = calc_host(priv, cstate)))
 		return ret;
 
+	/* XXX: Should be reading the highest bit in the VBIOS clock to decide
+	 * whether to use a PLL or not... but using a PLL defeats the purpose */
+	if (core->pll) {
+		ret = nva3_clk_info(clk, 0x10,
+				cstate->domain[nv_clk_src_core_intm],
+				&priv->eng[nv_clk_src_core_intm]);
+		if (ret < 0)
+			return ret;
+	}
+
 	return 0;
 }
 
@@ -270,11 +456,31 @@ static int
 nva3_clock_prog(struct nouveau_clock *clk)
 {
 	struct nva3_clock_priv *priv = (void *)clk;
-	prog_pll(priv, 0x00, 0x004200, nv_clk_src_core);
+	struct nva3_clock_info *core = &priv->eng[nv_clk_src_core];
+	int ret = 0;
+	unsigned long flags;
+	unsigned long *f = &flags;
+
+	ret = nva3_clock_pre(clk, f);
+	if (ret)
+		goto out;
+
+	if (core->pll)
+		prog_core(priv, nv_clk_src_core_intm);
+
+	prog_core(priv,  nv_clk_src_core);
 	prog_pll(priv, 0x01, 0x004220, nv_clk_src_shader);
 	prog_clk(priv, 0x20, nv_clk_src_disp);
 	prog_clk(priv, 0x21, nv_clk_src_vdec);
-	return 0;
+	prog_host(priv);
+
+out:
+	if (ret == -EBUSY)
+		f = NULL;
+
+	nva3_clock_post(clk, f);
+
+	return ret;
 }
 
 static void
@@ -284,13 +490,14 @@ nva3_clock_tidy(struct nouveau_clock *clk)
 
 static struct nouveau_clocks
 nva3_domain[] = {
-	{ nv_clk_src_crystal, 0xff },
-	{ nv_clk_src_href   , 0xff },
-	{ nv_clk_src_core   , 0x00, 0, "core", 1000 },
-	{ nv_clk_src_shader , 0x01, 0, "shader", 1000 },
-	{ nv_clk_src_mem    , 0x02, 0, "memory", 1000 },
-	{ nv_clk_src_vdec   , 0x03 },
-	{ nv_clk_src_disp   , 0x04 },
+	{ nv_clk_src_crystal  , 0xff },
+	{ nv_clk_src_core     , 0x00, 0, "core", 1000 },
+	{ nv_clk_src_shader   , 0x01, 0, "shader", 1000 },
+	{ nv_clk_src_mem      , 0x02, 0, "memory", 1000 },
+	{ nv_clk_src_vdec     , 0x03 },
+	{ nv_clk_src_disp     , 0x04 },
+	{ nv_clk_src_host     , 0x05 },
+	{ nv_clk_src_core_intm, 0x06 },
 	{ nv_clk_src_max }
 };
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h
index 6229a509b42e..a45a1038b12f 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nva3.h
@@ -6,9 +6,15 @@
 struct nva3_clock_info {
 	u32 clk;
 	u32 pll;
+	enum {
+		NVA3_HOST_277,
+		NVA3_HOST_CLK,
+	} host_out;
+	u32 fb_delay;
 };
 
-int nva3_clock_info(struct nouveau_clock *, int, u32, u32,
+int nva3_pll_info(struct nouveau_clock *, int, u32, u32,
 		    struct nva3_clock_info *);
-
+int nva3_clock_pre(struct nouveau_clock *clk, unsigned long *flags);
+void nva3_clock_post(struct nouveau_clock *clk, unsigned long *flags);
 #endif
diff --git a/drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c b/drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c
index 74e19731b1b7..54aeab8005a0 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/clock/nvaa.c
@@ -28,6 +28,7 @@
 #include <subdev/timer.h>
 #include <subdev/clock.h>
 
+#include "nva3.h"
 #include "pll.h"
 
 struct nvaa_clock_priv {
@@ -299,25 +300,14 @@ static int
 nvaa_clock_prog(struct nouveau_clock *clk)
 {
 	struct nvaa_clock_priv *priv = (void *)clk;
-	struct nouveau_fifo *pfifo = nouveau_fifo(clk);
+	u32 pllmask = 0, mast;
 	unsigned long flags;
-	u32 pllmask = 0, mast, ptherm_gate;
-	int ret = -EBUSY;
-
-	/* halt and idle execution engines */
-	ptherm_gate = nv_mask(clk, 0x020060, 0x00070000, 0x00000000);
-	nv_mask(clk, 0x002504, 0x00000001, 0x00000001);
-	/* Wait until the interrupt handler is finished */
-	if (!nv_wait(clk, 0x000100, 0xffffffff, 0x00000000))
-		goto resume;
-
-	if (pfifo)
-		pfifo->pause(pfifo, &flags);
+	unsigned long *f = &flags;
+	int ret = 0;
 
-	if (!nv_wait(clk, 0x002504, 0x00000010, 0x00000010))
-		goto resume;
-	if (!nv_wait(clk, 0x00251c, 0x0000003f, 0x0000003f))
-		goto resume;
+	ret = nva3_clock_pre(clk, f);
+	if (ret)
+		goto out;
 
 	/* First switch to safe clocks: href */
 	mast = nv_mask(clk, 0xc054, 0x03400e70, 0x03400640);
@@ -375,15 +365,8 @@ nvaa_clock_prog(struct nouveau_clock *clk)
 	}
 
 	nv_wr32(clk, 0xc054, mast);
-	ret = 0;
 
 resume:
-	if (pfifo)
-		pfifo->start(pfifo, &flags);
-
-	nv_mask(clk, 0x002504, 0x00000001, 0x00000000);
-	nv_wr32(clk, 0x020060, ptherm_gate);
-
 	/* Disable some PLLs and dividers when unused */
 	if (priv->csrc != nv_clk_src_core) {
 		nv_wr32(clk, 0x4040, 0x00000000);
@@ -395,6 +378,12 @@ resume:
 		nv_mask(clk, 0x4020, 0x80000000, 0x00000000);
 	}
 
+out:
+	if (ret == -EBUSY)
+		f = NULL;
+
+	nva3_clock_post(clk, f);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/devinit/fbmem.h b/drivers/gpu/drm/nouveau/core/subdev/devinit/fbmem.h
index 4fe49cf4c99a..6103484fea72 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/devinit/fbmem.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/devinit/fbmem.h
@@ -26,22 +26,8 @@
 
 #include <core/device.h>
 
-#define NV04_PFB_BOOT_0						0x00100000
-#	define NV04_PFB_BOOT_0_RAM_AMOUNT			0x00000003
-#	define NV04_PFB_BOOT_0_RAM_AMOUNT_32MB			0x00000000
-#	define NV04_PFB_BOOT_0_RAM_AMOUNT_4MB			0x00000001
-#	define NV04_PFB_BOOT_0_RAM_AMOUNT_8MB			0x00000002
-#	define NV04_PFB_BOOT_0_RAM_AMOUNT_16MB			0x00000003
-#	define NV04_PFB_BOOT_0_RAM_WIDTH_128			0x00000004
-#	define NV04_PFB_BOOT_0_RAM_TYPE				0x00000028
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_8MBIT		0x00000000
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_16MBIT		0x00000008
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_16MBIT_4BANK	0x00000010
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_16MBIT		0x00000018
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_64MBIT		0x00000020
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_64MBITX16		0x00000028
-#	define NV04_PFB_BOOT_0_UMA_ENABLE			0x00000100
-#	define NV04_PFB_BOOT_0_UMA_SIZE				0x0000f000
+#include <subdev/fb/regsnv04.h>
+
 #define NV04_PFB_DEBUG_0					0x00100080
 #	define NV04_PFB_DEBUG_0_PAGE_MODE			0x00000001
 #	define NV04_PFB_DEBUG_0_REFRESH_OFF			0x00000010
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/gddr5.c b/drivers/gpu/drm/nouveau/core/subdev/fb/gddr5.c
index 66fe959b4f74..7fbbe05d5c60 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/gddr5.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/gddr5.c
@@ -40,7 +40,7 @@ nouveau_gddr5_calc(struct nouveau_ram *ram, bool nuts)
 	int WL, CL, WR, at[2], dt, ds;
 	int rq = ram->freq < 1000000; /* XXX */
 
-	switch (ram->ramcfg.version) {
+	switch (ram->next->bios.ramcfg_ver) {
 	case 0x11:
 		pd =  ram->next->bios.ramcfg_11_01_80;
 		lf =  ram->next->bios.ramcfg_11_01_40;
@@ -54,7 +54,7 @@ nouveau_gddr5_calc(struct nouveau_ram *ram, bool nuts)
 		return -ENOSYS;
 	}
 
-	switch (ram->timing.version) {
+	switch (ram->next->bios.timing_ver) {
 	case 0x20:
 		WL = (ram->next->bios.timing[1] & 0x00000f80) >> 7;
 		CL = (ram->next->bios.timing[1] & 0x0000001f);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv20.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv20.c
index f003c1b1893f..2209ade63339 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/nv20.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv20.c
@@ -45,7 +45,7 @@ nv20_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
 {
 	u32 tiles = DIV_ROUND_UP(size, 0x40);
 	u32 tags  = round_up(tiles / pfb->ram->parts, 0x40);
-	if (!nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
+	if (!nouveau_mm_head(&pfb->tags, 0, 1, tags, tags, 1, &tile->tag)) {
 		if (!(flags & 2)) tile->zcomp = 0x00000000; /* Z16 */
 		else              tile->zcomp = 0x04000000; /* Z24S8 */
 		tile->zcomp |= tile->tag->offset;
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv25.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv25.c
index f34f4223210b..e2a66c355c50 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/nv25.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv25.c
@@ -32,7 +32,7 @@ nv25_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
 {
 	u32 tiles = DIV_ROUND_UP(size, 0x40);
 	u32 tags  = round_up(tiles / pfb->ram->parts, 0x40);
-	if (!nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
+	if (!nouveau_mm_head(&pfb->tags, 0, 1, tags, tags, 1, &tile->tag)) {
 		if (!(flags & 2)) tile->zcomp = 0x00100000; /* Z16 */
 		else              tile->zcomp = 0x00200000; /* Z24S8 */
 		tile->zcomp |= tile->tag->offset;
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv30.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv30.c
index 69093f7151f0..cbec402ba5b9 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/nv30.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv30.c
@@ -51,7 +51,7 @@ nv30_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
 {
 	u32 tiles = DIV_ROUND_UP(size, 0x40);
 	u32 tags  = round_up(tiles / pfb->ram->parts, 0x40);
-	if (!nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
+	if (!nouveau_mm_head(&pfb->tags, 0, 1, tags, tags, 1, &tile->tag)) {
 		if (flags & 2) tile->zcomp |= 0x01000000; /* Z16 */
 		else           tile->zcomp |= 0x02000000; /* Z24S8 */
 		tile->zcomp |= ((tile->tag->offset           ) >> 6);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv35.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv35.c
index 161b06e8fc3f..b2cf8c69fb2e 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/nv35.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv35.c
@@ -32,7 +32,7 @@ nv35_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
 {
 	u32 tiles = DIV_ROUND_UP(size, 0x40);
 	u32 tags  = round_up(tiles / pfb->ram->parts, 0x40);
-	if (!nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
+	if (!nouveau_mm_head(&pfb->tags, 0, 1, tags, tags, 1, &tile->tag)) {
 		if (flags & 2) tile->zcomp |= 0x04000000; /* Z16 */
 		else           tile->zcomp |= 0x08000000; /* Z24S8 */
 		tile->zcomp |= ((tile->tag->offset           ) >> 6);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv36.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv36.c
index 2dd3d0aab6bb..b4cdae2a3b2f 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/nv36.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv36.c
@@ -32,7 +32,7 @@ nv36_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
 {
 	u32 tiles = DIV_ROUND_UP(size, 0x40);
 	u32 tags  = round_up(tiles / pfb->ram->parts, 0x40);
-	if (!nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
+	if (!nouveau_mm_head(&pfb->tags, 0, 1, tags, tags, 1, &tile->tag)) {
 		if (flags & 2) tile->zcomp |= 0x10000000; /* Z16 */
 		else           tile->zcomp |= 0x20000000; /* Z24S8 */
 		tile->zcomp |= ((tile->tag->offset           ) >> 6);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/nv40.c b/drivers/gpu/drm/nouveau/core/subdev/fb/nv40.c
index 95a115ab0c86..52814258c212 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/nv40.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/nv40.c
@@ -33,7 +33,7 @@ nv40_fb_tile_comp(struct nouveau_fb *pfb, int i, u32 size, u32 flags,
 	u32 tiles = DIV_ROUND_UP(size, 0x80);
 	u32 tags  = round_up(tiles / pfb->ram->parts, 0x100);
 	if ( (flags & 2) &&
-	    !nouveau_mm_head(&pfb->tags, 1, tags, tags, 1, &tile->tag)) {
+	    !nouveau_mm_head(&pfb->tags, 0, 1, tags, tags, 1, &tile->tag)) {
 		tile->zcomp  = 0x28000000; /* Z24S8_SPLIT_GRAD */
 		tile->zcomp |= ((tile->tag->offset           ) >> 8);
 		tile->zcomp |= ((tile->tag->offset + tags - 1) >> 8) << 13;
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h
index 82273f832e42..60322e906dd4 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/priv.h
@@ -35,6 +35,7 @@ extern struct nouveau_oclass nve0_ram_oclass;
 extern struct nouveau_oclass gk20a_ram_oclass;
 extern struct nouveau_oclass gm107_ram_oclass;
 
+int nouveau_sddr2_calc(struct nouveau_ram *ram);
 int nouveau_sddr3_calc(struct nouveau_ram *ram);
 int nouveau_gddr5_calc(struct nouveau_ram *ram, bool nuts);
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramfuc.h b/drivers/gpu/drm/nouveau/core/subdev/fb/ramfuc.h
index 2af9cfd2c60f..d1fbbe4b00a2 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramfuc.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramfuc.h
@@ -12,16 +12,32 @@ struct ramfuc {
 struct ramfuc_reg {
 	int sequence;
 	bool force;
-	u32 addr[2];
+	u32 addr;
+	u32 stride; /* in bytes */
+	u32 mask;
 	u32 data;
 };
 
 static inline struct ramfuc_reg
+ramfuc_stride(u32 addr, u32 stride, u32 mask)
+{
+	return (struct ramfuc_reg) {
+		.sequence = 0,
+		.addr = addr,
+		.stride = stride,
+		.mask = mask,
+		.data = 0xdeadbeef,
+	};
+}
+
+static inline struct ramfuc_reg
 ramfuc_reg2(u32 addr1, u32 addr2)
 {
 	return (struct ramfuc_reg) {
 		.sequence = 0,
-		.addr = { addr1, addr2 },
+		.addr = addr1,
+		.stride = addr2 - addr1,
+		.mask = 0x3,
 		.data = 0xdeadbeef,
 	};
 }
@@ -29,7 +45,13 @@ ramfuc_reg2(u32 addr1, u32 addr2)
 static noinline struct ramfuc_reg
 ramfuc_reg(u32 addr)
 {
-	return ramfuc_reg2(addr, addr);
+	return (struct ramfuc_reg) {
+		.sequence = 0,
+		.addr = addr,
+		.stride = 0,
+		.mask = 0x1,
+		.data = 0xdeadbeef,
+	};
 }
 
 static inline int
@@ -62,18 +84,25 @@ static inline u32
 ramfuc_rd32(struct ramfuc *ram, struct ramfuc_reg *reg)
 {
 	if (reg->sequence != ram->sequence)
-		reg->data = nv_rd32(ram->pfb, reg->addr[0]);
+		reg->data = nv_rd32(ram->pfb, reg->addr);
 	return reg->data;
 }
 
 static inline void
 ramfuc_wr32(struct ramfuc *ram, struct ramfuc_reg *reg, u32 data)
 {
+	unsigned int mask, off = 0;
+
 	reg->sequence = ram->sequence;
 	reg->data = data;
-	if (reg->addr[0] != reg->addr[1])
-		nouveau_memx_wr32(ram->memx, reg->addr[1], reg->data);
-	nouveau_memx_wr32(ram->memx, reg->addr[0], reg->data);
+
+	for (mask = reg->mask; mask > 0; mask = (mask & ~1) >> 1) {
+		if (mask & 1) {
+			nouveau_memx_wr32(ram->memx, reg->addr+off, reg->data);
+		}
+
+		off += reg->stride;
+	}
 }
 
 static inline void
@@ -105,14 +134,35 @@ ramfuc_nsec(struct ramfuc *ram, u32 nsec)
 	nouveau_memx_nsec(ram->memx, nsec);
 }
 
-#define ram_init(s,p)       ramfuc_init(&(s)->base, (p))
-#define ram_exec(s,e)       ramfuc_exec(&(s)->base, (e))
-#define ram_have(s,r)       ((s)->r_##r.addr[0] != 0x000000)
-#define ram_rd32(s,r)       ramfuc_rd32(&(s)->base, &(s)->r_##r)
-#define ram_wr32(s,r,d)     ramfuc_wr32(&(s)->base, &(s)->r_##r, (d))
-#define ram_nuke(s,r)       ramfuc_nuke(&(s)->base, &(s)->r_##r)
-#define ram_mask(s,r,m,d)   ramfuc_mask(&(s)->base, &(s)->r_##r, (m), (d))
-#define ram_wait(s,r,m,d,n) ramfuc_wait(&(s)->base, (r), (m), (d), (n))
-#define ram_nsec(s,n)       ramfuc_nsec(&(s)->base, (n))
+static inline void
+ramfuc_wait_vblank(struct ramfuc *ram)
+{
+	nouveau_memx_wait_vblank(ram->memx);
+}
+
+static inline void
+ramfuc_block(struct ramfuc *ram)
+{
+	nouveau_memx_block(ram->memx);
+}
+
+static inline void
+ramfuc_unblock(struct ramfuc *ram)
+{
+	nouveau_memx_unblock(ram->memx);
+}
+
+#define ram_init(s,p)        ramfuc_init(&(s)->base, (p))
+#define ram_exec(s,e)        ramfuc_exec(&(s)->base, (e))
+#define ram_have(s,r)        ((s)->r_##r.addr != 0x000000)
+#define ram_rd32(s,r)        ramfuc_rd32(&(s)->base, &(s)->r_##r)
+#define ram_wr32(s,r,d)      ramfuc_wr32(&(s)->base, &(s)->r_##r, (d))
+#define ram_nuke(s,r)        ramfuc_nuke(&(s)->base, &(s)->r_##r)
+#define ram_mask(s,r,m,d)    ramfuc_mask(&(s)->base, &(s)->r_##r, (m), (d))
+#define ram_wait(s,r,m,d,n)  ramfuc_wait(&(s)->base, (r), (m), (d), (n))
+#define ram_nsec(s,n)        ramfuc_nsec(&(s)->base, (n))
+#define ram_wait_vblank(s)   ramfuc_wait_vblank(&(s)->base)
+#define ram_block(s)         ramfuc_block(&(s)->base)
+#define ram_unblock(s)       ramfuc_unblock(&(s)->base)
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv04.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv04.c
index e781080d3327..1972268d1410 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv04.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv04.c
@@ -22,22 +22,7 @@
  * Authors: Ben Skeggs
  */
 
-#define NV04_PFB_BOOT_0						0x00100000
-#	define NV04_PFB_BOOT_0_RAM_AMOUNT			0x00000003
-#	define NV04_PFB_BOOT_0_RAM_AMOUNT_32MB			0x00000000
-#	define NV04_PFB_BOOT_0_RAM_AMOUNT_4MB			0x00000001
-#	define NV04_PFB_BOOT_0_RAM_AMOUNT_8MB			0x00000002
-#	define NV04_PFB_BOOT_0_RAM_AMOUNT_16MB			0x00000003
-#	define NV04_PFB_BOOT_0_RAM_WIDTH_128			0x00000004
-#	define NV04_PFB_BOOT_0_RAM_TYPE				0x00000028
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_8MBIT		0x00000000
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_16MBIT		0x00000008
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SGRAM_16MBIT_4BANK	0x00000010
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_16MBIT		0x00000018
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_64MBIT		0x00000020
-#	define NV04_PFB_BOOT_0_RAM_TYPE_SDRAM_64MBITX16		0x00000028
-#	define NV04_PFB_BOOT_0_UMA_ENABLE			0x00000100
-#	define NV04_PFB_BOOT_0_UMA_SIZE				0x0000f000
+#include <subdev/fb/regsnv04.h>
 
 #include "priv.h"
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv50.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv50.c
index e5d12c24cc43..64a983c96625 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv50.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnv50.c
@@ -280,7 +280,7 @@ nv50_ram_get(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin,
 		if (align == 16) {
 			int n = (max >> 4) * comp;
 
-			ret = nouveau_mm_head(tags, 1, n, n, 1, &mem->tag);
+			ret = nouveau_mm_head(tags, 0, 1, n, n, 1, &mem->tag);
 			if (ret)
 				mem->tag = NULL;
 		}
@@ -296,9 +296,9 @@ nv50_ram_get(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin,
 	type = nv50_fb_memtype[type];
 	do {
 		if (back)
-			ret = nouveau_mm_tail(heap, type, max, min, align, &r);
+			ret = nouveau_mm_tail(heap, 0, type, max, min, align, &r);
 		else
-			ret = nouveau_mm_head(heap, type, max, min, align, &r);
+			ret = nouveau_mm_head(heap, 0, type, max, min, align, &r);
 		if (ret) {
 			mutex_unlock(&pfb->base.mutex);
 			pfb->ram->put(pfb, &mem);
@@ -319,27 +319,22 @@ nv50_ram_get(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin,
 static u32
 nv50_fb_vram_rblock(struct nouveau_fb *pfb, struct nouveau_ram *ram)
 {
-	int i, parts, colbits, rowbitsa, rowbitsb, banks;
+	int colbits, rowbitsa, rowbitsb, banks;
 	u64 rowsize, predicted;
-	u32 r0, r4, rt, ru, rblock_size;
+	u32 r0, r4, rt, rblock_size;
 
 	r0 = nv_rd32(pfb, 0x100200);
 	r4 = nv_rd32(pfb, 0x100204);
 	rt = nv_rd32(pfb, 0x100250);
-	ru = nv_rd32(pfb, 0x001540);
-	nv_debug(pfb, "memcfg 0x%08x 0x%08x 0x%08x 0x%08x\n", r0, r4, rt, ru);
-
-	for (i = 0, parts = 0; i < 8; i++) {
-		if (ru & (0x00010000 << i))
-			parts++;
-	}
+	nv_debug(pfb, "memcfg 0x%08x 0x%08x 0x%08x 0x%08x\n", r0, r4, rt,
+			nv_rd32(pfb, 0x001540));
 
 	colbits  =  (r4 & 0x0000f000) >> 12;
 	rowbitsa = ((r4 & 0x000f0000) >> 16) + 8;
 	rowbitsb = ((r4 & 0x00f00000) >> 20) + 8;
 	banks    = 1 << (((r4 & 0x03000000) >> 24) + 2);
 
-	rowsize = parts * banks * (1 << colbits) * 8;
+	rowsize = ram->parts * banks * (1 << colbits) * 8;
 	predicted = rowsize << rowbitsa;
 	if (r0 & 0x00000004)
 		predicted += rowsize << rowbitsb;
@@ -376,6 +371,9 @@ nv50_ram_create_(struct nouveau_object *parent, struct nouveau_object *engine,
 	ram->size = nv_rd32(pfb, 0x10020c);
 	ram->size = (ram->size & 0xffffff00) | ((ram->size & 0x000000ff) << 32);
 
+	ram->part_mask = (nv_rd32(pfb, 0x001540) & 0x00ff0000) >> 16;
+	ram->parts = hweight8(ram->part_mask);
+
 	switch (nv_rd32(pfb, 0x100714) & 0x00000007) {
 	case 0: ram->type = NV_MEM_TYPE_DDR1; break;
 	case 1:
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnva3.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnva3.c
index 8076fb195dd5..3601deca0bd5 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnva3.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnva3.c
@@ -79,20 +79,27 @@ nva3_ram_calc(struct nouveau_fb *pfb, u32 freq)
 	struct nva3_ram *ram = (void *)pfb->ram;
 	struct nva3_ramfuc *fuc = &ram->fuc;
 	struct nva3_clock_info mclk;
-	u8  ver, cnt, len, strap;
+	struct nouveau_ram_data *next;
+	u8  ver, hdr, cnt, len, strap;
 	u32 data;
-	struct {
-		u32 data;
-		u8  size;
-	} rammap, ramcfg, timing;
 	u32 r004018, r100760, ctrl;
 	u32 unk714, unk718, unk71c;
-	int ret;
+	int ret, i;
+
+	next = &ram->base.target;
+	next->freq = freq;
+	ram->base.next = next;
 
 	/* lookup memory config data relevant to the target frequency */
-	rammap.data = nvbios_rammapEm(bios, freq / 1000, &ver, &rammap.size,
-				     &cnt, &ramcfg.size);
-	if (!rammap.data || ver != 0x10 || rammap.size < 0x0e) {
+	i = 0;
+	while ((data = nvbios_rammapEp(bios, i++, &ver, &hdr, &cnt, &len,
+				      &next->bios))) {
+		if (freq / 1000 >= next->bios.rammap_min &&
+		    freq / 1000 <= next->bios.rammap_max)
+			break;
+	}
+
+	if (!data || ver != 0x10 || hdr < 0x0e) {
 		nv_error(pfb, "invalid/missing rammap entry\n");
 		return -EINVAL;
 	}
@@ -104,26 +111,25 @@ nva3_ram_calc(struct nouveau_fb *pfb, u32 freq)
 		return -EINVAL;
 	}
 
-	ramcfg.data = rammap.data + rammap.size + (strap * ramcfg.size);
-	if (!ramcfg.data || ver != 0x10 || ramcfg.size < 0x0e) {
+	data = nvbios_rammapSp(bios, data, ver, hdr, cnt, len, strap,
+			       &ver, &hdr, &next->bios);
+	if (!data || ver != 0x10 || hdr < 0x0e) {
 		nv_error(pfb, "invalid/missing ramcfg entry\n");
 		return -EINVAL;
 	}
 
 	/* lookup memory timings, if bios says they're present */
-	strap = nv_ro08(bios, ramcfg.data + 0x01);
-	if (strap != 0xff) {
-		timing.data = nvbios_timingEe(bios, strap, &ver, &timing.size,
-					     &cnt, &len);
-		if (!timing.data || ver != 0x10 || timing.size < 0x19) {
+	if (next->bios.ramcfg_timing != 0xff) {
+		data = nvbios_timingEp(bios, next->bios.ramcfg_timing,
+				       &ver, &hdr, &cnt, &len,
+				       &next->bios);
+		if (!data || ver != 0x10 || hdr < 0x19) {
 			nv_error(pfb, "invalid/missing timing entry\n");
 			return -EINVAL;
 		}
-	} else {
-		timing.data = 0;
 	}
 
-	ret = nva3_clock_info(nouveau_clock(pfb), 0x12, 0x4000, freq, &mclk);
+	ret = nva3_pll_info(nouveau_clock(pfb), 0x12, 0x4000, freq, &mclk);
 	if (ret < 0) {
 		nv_error(pfb, "failed mclk calculation\n");
 		return ret;
@@ -163,17 +169,17 @@ nva3_ram_calc(struct nouveau_fb *pfb, u32 freq)
 		ram_mask(fuc, 0x004168, 0x003f3141, ctrl);
 	}
 
-	if ( (nv_ro08(bios, ramcfg.data + 0x02) & 0x10)) {
+	if (next->bios.ramcfg_10_02_10) {
 		ram_mask(fuc, 0x111104, 0x00000600, 0x00000000);
 	} else {
 		ram_mask(fuc, 0x111100, 0x40000000, 0x40000000);
 		ram_mask(fuc, 0x111104, 0x00000180, 0x00000000);
 	}
 
-	if (!(nv_ro08(bios, rammap.data + 0x04) & 0x02))
+	if (!next->bios.rammap_10_04_02)
 		ram_mask(fuc, 0x100200, 0x00000800, 0x00000000);
 	ram_wr32(fuc, 0x611200, 0x00003300);
-	if (!(nv_ro08(bios, ramcfg.data + 0x02) & 0x10))
+	if (!next->bios.ramcfg_10_02_10)
 		ram_wr32(fuc, 0x111100, 0x4c020000); /*XXX*/
 
 	ram_wr32(fuc, 0x1002d4, 0x00000001);
@@ -202,17 +208,16 @@ nva3_ram_calc(struct nouveau_fb *pfb, u32 freq)
 		ram_wr32(fuc, 0x004018, 0x0000d000 | r004018);
 	}
 
-	if ( (nv_ro08(bios, rammap.data + 0x04) & 0x08)) {
-		u32 unk5a0 = (nv_ro16(bios, ramcfg.data + 0x05) << 8) |
-			      nv_ro08(bios, ramcfg.data + 0x05);
-		u32 unk5a4 = (nv_ro16(bios, ramcfg.data + 0x07));
-		u32 unk804 = (nv_ro08(bios, ramcfg.data + 0x09) & 0xf0) << 16 |
-			     (nv_ro08(bios, ramcfg.data + 0x03) & 0x0f) << 16 |
-			     (nv_ro08(bios, ramcfg.data + 0x09) & 0x0f) |
-			     0x80000000;
-		ram_wr32(fuc, 0x1005a0, unk5a0);
-		ram_wr32(fuc, 0x1005a4, unk5a4);
-		ram_wr32(fuc, 0x10f804, unk804);
+	if (next->bios.rammap_10_04_08) {
+		ram_wr32(fuc, 0x1005a0, next->bios.ramcfg_10_06 << 16 |
+					next->bios.ramcfg_10_05 << 8 |
+					next->bios.ramcfg_10_05);
+		ram_wr32(fuc, 0x1005a4, next->bios.ramcfg_10_08 << 8 |
+					next->bios.ramcfg_10_07);
+		ram_wr32(fuc, 0x10f804, next->bios.ramcfg_10_09_f0 << 20 |
+					next->bios.ramcfg_10_03_0f << 16 |
+					next->bios.ramcfg_10_09_0f |
+					0x80000000);
 		ram_mask(fuc, 0x10053c, 0x00001000, 0x00000000);
 	} else {
 		ram_mask(fuc, 0x10053c, 0x00001000, 0x00001000);
@@ -250,27 +255,26 @@ nva3_ram_calc(struct nouveau_fb *pfb, u32 freq)
 	ram_mask(fuc, 0x100220[0], 0x00000000, 0x00000000);
 	ram_mask(fuc, 0x100220[8], 0x00000000, 0x00000000);
 
-	data = (nv_ro08(bios, ramcfg.data + 0x02) & 0x08) ? 0x00000000 : 0x00001000;
-	ram_mask(fuc, 0x100200, 0x00001000, data);
+	ram_mask(fuc, 0x100200, 0x00001000, !next->bios.ramcfg_10_02_08 << 12);
 
 	unk714 = ram_rd32(fuc, 0x100714) & ~0xf0000010;
 	unk718 = ram_rd32(fuc, 0x100718) & ~0x00000100;
 	unk71c = ram_rd32(fuc, 0x10071c) & ~0x00000100;
-	if ( (nv_ro08(bios, ramcfg.data + 0x02) & 0x20))
+	if (next->bios.ramcfg_10_02_20)
 		unk714 |= 0xf0000000;
-	if (!(nv_ro08(bios, ramcfg.data + 0x02) & 0x04))
+	if (!next->bios.ramcfg_10_02_04)
 		unk714 |= 0x00000010;
 	ram_wr32(fuc, 0x100714, unk714);
 
-	if (nv_ro08(bios, ramcfg.data + 0x02) & 0x01)
+	if (next->bios.ramcfg_10_02_01)
 		unk71c |= 0x00000100;
 	ram_wr32(fuc, 0x10071c, unk71c);
 
-	if (nv_ro08(bios, ramcfg.data + 0x02) & 0x02)
+	if (next->bios.ramcfg_10_02_02)
 		unk718 |= 0x00000100;
 	ram_wr32(fuc, 0x100718, unk718);
 
-	if (nv_ro08(bios, ramcfg.data + 0x02) & 0x10)
+	if (next->bios.ramcfg_10_02_10)
 		ram_wr32(fuc, 0x111100, 0x48000000); /*XXX*/
 
 	ram_mask(fuc, mr[0], 0x100, 0x100);
@@ -282,9 +286,9 @@ nva3_ram_calc(struct nouveau_fb *pfb, u32 freq)
 	ram_nsec(fuc, 12000);
 
 	ram_wr32(fuc, 0x611200, 0x00003330);
-	if ( (nv_ro08(bios, rammap.data + 0x04) & 0x02))
+	if (next->bios.rammap_10_04_02)
 		ram_mask(fuc, 0x100200, 0x00000800, 0x00000800);
-	if ( (nv_ro08(bios, ramcfg.data + 0x02) & 0x10)) {
+	if (next->bios.ramcfg_10_02_10) {
 		ram_mask(fuc, 0x111104, 0x00000180, 0x00000180);
 		ram_mask(fuc, 0x111100, 0x40000000, 0x00000000);
 	} else {
@@ -404,11 +408,11 @@ nva3_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	ram->fuc.r_0x100714 = ramfuc_reg(0x100714);
 	ram->fuc.r_0x100718 = ramfuc_reg(0x100718);
 	ram->fuc.r_0x10071c = ramfuc_reg(0x10071c);
-	ram->fuc.r_0x100760 = ramfuc_reg(0x100760);
-	ram->fuc.r_0x1007a0 = ramfuc_reg(0x1007a0);
-	ram->fuc.r_0x1007e0 = ramfuc_reg(0x1007e0);
+	ram->fuc.r_0x100760 = ramfuc_stride(0x100760, 4, ram->base.part_mask);
+	ram->fuc.r_0x1007a0 = ramfuc_stride(0x1007a0, 4, ram->base.part_mask);
+	ram->fuc.r_0x1007e0 = ramfuc_stride(0x1007e0, 4, ram->base.part_mask);
 	ram->fuc.r_0x10f804 = ramfuc_reg(0x10f804);
-	ram->fuc.r_0x1110e0 = ramfuc_reg(0x1110e0);
+	ram->fuc.r_0x1110e0 = ramfuc_stride(0x1110e0, 4, ram->base.part_mask);
 	ram->fuc.r_0x111100 = ramfuc_reg(0x111100);
 	ram->fuc.r_0x111104 = ramfuc_reg(0x111104);
 	ram->fuc.r_0x611200 = ramfuc_reg(0x611200);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvc0.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvc0.c
index 2b284b192763..735cb9580abe 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvc0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnvc0.c
@@ -133,6 +133,7 @@ nvc0_ram_calc(struct nouveau_fb *pfb, u32 freq)
 	struct nouveau_bios *bios = nouveau_bios(pfb);
 	struct nvc0_ram *ram = (void *)pfb->ram;
 	struct nvc0_ramfuc *fuc = &ram->fuc;
+	struct nvbios_ramcfg cfg;
 	u8  ver, cnt, len, strap;
 	struct {
 		u32 data;
@@ -145,7 +146,7 @@ nvc0_ram_calc(struct nouveau_fb *pfb, u32 freq)
 
 	/* lookup memory config data relevant to the target frequency */
 	rammap.data = nvbios_rammapEm(bios, freq / 1000, &ver, &rammap.size,
-				     &cnt, &ramcfg.size);
+				     &cnt, &ramcfg.size, &cfg);
 	if (!rammap.data || ver != 0x10 || rammap.size < 0x0e) {
 		nv_error(pfb, "invalid/missing rammap entry\n");
 		return -EINVAL;
@@ -483,9 +484,9 @@ nvc0_ram_get(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin,
 
 	do {
 		if (back)
-			ret = nouveau_mm_tail(mm, 1, size, ncmin, align, &r);
+			ret = nouveau_mm_tail(mm, 0, 1, size, ncmin, align, &r);
 		else
-			ret = nouveau_mm_head(mm, 1, size, ncmin, align, &r);
+			ret = nouveau_mm_head(mm, 0, 1, size, ncmin, align, &r);
 		if (ret) {
 			mutex_unlock(&pfb->base.mutex);
 			pfb->ram->put(pfb, &mem);
@@ -562,7 +563,7 @@ nvc0_ram_create_(struct nouveau_object *parent, struct nouveau_object *engine,
 		offset = (0x0200000000ULL >> 12) + (bsize << 8);
 		length = (ram->size >> 12) - ((bsize * parts) << 8) - rsvd_tail;
 
-		ret = nouveau_mm_init(&pfb->vram, offset, length, 0);
+		ret = nouveau_mm_init(&pfb->vram, offset, length, 1);
 		if (ret)
 			nouveau_mm_fini(&pfb->vram);
 	}
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c
index c5b46e302319..6bae474abb44 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/ramnve0.c
@@ -29,6 +29,8 @@
 #include <subdev/bios/init.h>
 #include <subdev/bios/rammap.h>
 #include <subdev/bios/timing.h>
+#include <subdev/bios/M0205.h>
+#include <subdev/bios/M0209.h>
 
 #include <subdev/clock.h>
 #include <subdev/clock/pll.h>
@@ -41,14 +43,6 @@
 
 #include "ramfuc.h"
 
-/* binary driver only executes this path if the condition (a) is true
- * for any configuration (combination of rammap+ramcfg+timing) that
- * can be reached on a given card.  for now, we will execute the branch
- * unconditionally in the hope that a "false everywhere" in the bios
- * tables doesn't actually mean "don't touch this".
- */
-#define NOTE00(a) 1
-
 struct nve0_ramfuc {
 	struct ramfuc base;
 
@@ -134,10 +128,12 @@ struct nve0_ram {
 	struct nouveau_ram base;
 	struct nve0_ramfuc fuc;
 
+	struct list_head cfg;
 	u32 parts;
 	u32 pmask;
 	u32 pnuts;
 
+	struct nvbios_ramcfg diff;
 	int from;
 	int mode;
 	int N1, fN1, M1, P1;
@@ -241,7 +237,7 @@ nve0_ram_nuts(struct nve0_ram *ram, struct ramfuc_reg *reg,
 {
 	struct nve0_fb_priv *priv = (void *)nouveau_fb(ram);
 	struct ramfuc *fuc = &ram->fuc.base;
-	u32 addr = 0x110000 + (reg->addr[0] & 0xfff);
+	u32 addr = 0x110000 + (reg->addr & 0xfff);
 	u32 mask = _mask | _copy;
 	u32 data = (_data & _mask) | (reg->data & _copy);
 	u32 i;
@@ -268,6 +264,7 @@ nve0_ram_calc_gddr5(struct nouveau_fb *pfb, u32 freq)
 	u32 mask, data;
 
 	ram_mask(fuc, 0x10f808, 0x40000000, 0x40000000);
+	ram_block(fuc);
 	ram_wr32(fuc, 0x62c000, 0x0f0f0000);
 
 	/* MR1: turn termination on early, for some reason.. */
@@ -478,7 +475,7 @@ nve0_ram_calc_gddr5(struct nouveau_fb *pfb, u32 freq)
 	ram_mask(fuc, 0x10f2e8, 0xffffffff, next->bios.timing[9]);
 
 	data = mask = 0x00000000;
-	if (NOTE00(ramcfg_08_20)) {
+	if (ram->diff.ramcfg_11_08_20) {
 		if (next->bios.ramcfg_11_08_20)
 			data |= 0x01000000;
 		mask |= 0x01000000;
@@ -486,11 +483,11 @@ nve0_ram_calc_gddr5(struct nouveau_fb *pfb, u32 freq)
 	ram_mask(fuc, 0x10f200, mask, data);
 
 	data = mask = 0x00000000;
-	if (NOTE00(ramcfg_02_03 != 0)) {
+	if (ram->diff.ramcfg_11_02_03) {
 		data |= next->bios.ramcfg_11_02_03 << 8;
 		mask |= 0x00000300;
 	}
-	if (NOTE00(ramcfg_01_10)) {
+	if (ram->diff.ramcfg_11_01_10) {
 		if (next->bios.ramcfg_11_01_10)
 			data |= 0x70000000;
 		mask |= 0x70000000;
@@ -498,11 +495,11 @@ nve0_ram_calc_gddr5(struct nouveau_fb *pfb, u32 freq)
 	ram_mask(fuc, 0x10f604, mask, data);
 
 	data = mask = 0x00000000;
-	if (NOTE00(timing_30_07 != 0)) {
+	if (ram->diff.timing_20_30_07) {
 		data |= next->bios.timing_20_30_07 << 28;
 		mask |= 0x70000000;
 	}
-	if (NOTE00(ramcfg_01_01)) {
+	if (ram->diff.ramcfg_11_01_01) {
 		if (next->bios.ramcfg_11_01_01)
 			data |= 0x00000100;
 		mask |= 0x00000100;
@@ -510,11 +507,11 @@ nve0_ram_calc_gddr5(struct nouveau_fb *pfb, u32 freq)
 	ram_mask(fuc, 0x10f614, mask, data);
 
 	data = mask = 0x00000000;
-	if (NOTE00(timing_30_07 != 0)) {
+	if (ram->diff.timing_20_30_07) {
 		data |= next->bios.timing_20_30_07 << 28;
 		mask |= 0x70000000;
 	}
-	if (NOTE00(ramcfg_01_02)) {
+	if (ram->diff.ramcfg_11_01_02) {
 		if (next->bios.ramcfg_11_01_02)
 			data |= 0x00000100;
 		mask |= 0x00000100;
@@ -548,11 +545,11 @@ nve0_ram_calc_gddr5(struct nouveau_fb *pfb, u32 freq)
 	ram_wr32(fuc, 0x10f870, 0x11111111 * next->bios.ramcfg_11_03_0f);
 
 	data = mask = 0x00000000;
-	if (NOTE00(ramcfg_02_03 != 0)) {
+	if (ram->diff.ramcfg_11_02_03) {
 		data |= next->bios.ramcfg_11_02_03;
 		mask |= 0x00000003;
 	}
-	if (NOTE00(ramcfg_01_10)) {
+	if (ram->diff.ramcfg_11_01_10) {
 		if (next->bios.ramcfg_11_01_10)
 			data |= 0x00000004;
 		mask |= 0x00000004;
@@ -666,6 +663,7 @@ nve0_ram_calc_gddr5(struct nouveau_fb *pfb, u32 freq)
 	if (next->bios.ramcfg_11_07_02)
 		nve0_ram_train(fuc, 0x80020000, 0x01000000);
 
+	ram_unblock(fuc);
 	ram_wr32(fuc, 0x62c000, 0x0f0f0f00);
 
 	if (next->bios.rammap_11_08_01)
@@ -695,6 +693,7 @@ nve0_ram_calc_sddr3(struct nouveau_fb *pfb, u32 freq)
 	u32 mask, data;
 
 	ram_mask(fuc, 0x10f808, 0x40000000, 0x40000000);
+	ram_block(fuc);
 	ram_wr32(fuc, 0x62c000, 0x0f0f0000);
 
 	if (vc == 1 && ram_have(fuc, gpio2E)) {
@@ -917,6 +916,7 @@ nve0_ram_calc_sddr3(struct nouveau_fb *pfb, u32 freq)
 	ram_mask(fuc, 0x10f200, 0x80000000, 0x00000000);
 	ram_nsec(fuc, 1000);
 
+	ram_unblock(fuc);
 	ram_wr32(fuc, 0x62c000, 0x0f0f0f00);
 
 	if (next->bios.rammap_11_08_01)
@@ -932,58 +932,24 @@ nve0_ram_calc_sddr3(struct nouveau_fb *pfb, u32 freq)
  ******************************************************************************/
 
 static int
-nve0_ram_calc_data(struct nouveau_fb *pfb, u32 freq,
+nve0_ram_calc_data(struct nouveau_fb *pfb, u32 khz,
 		   struct nouveau_ram_data *data)
 {
-	struct nouveau_bios *bios = nouveau_bios(pfb);
 	struct nve0_ram *ram = (void *)pfb->ram;
-	u8 strap, cnt, len;
-
-	/* lookup memory config data relevant to the target frequency */
-	ram->base.rammap.data = nvbios_rammapEp(bios, freq / 1000,
-					       &ram->base.rammap.version,
-					       &ram->base.rammap.size,
-					       &cnt, &len, &data->bios);
-	if (!ram->base.rammap.data || ram->base.rammap.version != 0x11 ||
-	     ram->base.rammap.size < 0x09) {
-		nv_error(pfb, "invalid/missing rammap entry\n");
-		return -EINVAL;
-	}
-
-	/* locate specific data set for the attached memory */
-	strap = nvbios_ramcfg_index(nv_subdev(pfb));
-	ram->base.ramcfg.data = nvbios_rammapSp(bios, ram->base.rammap.data,
-						ram->base.rammap.version,
-						ram->base.rammap.size,
-						cnt, len, strap,
-						&ram->base.ramcfg.version,
-						&ram->base.ramcfg.size,
-						&data->bios);
-	if (!ram->base.ramcfg.data || ram->base.ramcfg.version != 0x11 ||
-	     ram->base.ramcfg.size < 0x08) {
-		nv_error(pfb, "invalid/missing ramcfg entry\n");
-		return -EINVAL;
-	}
-
-	/* lookup memory timings, if bios says they're present */
-	strap = nv_ro08(bios, ram->base.ramcfg.data + 0x00);
-	if (strap != 0xff) {
-		ram->base.timing.data =
-			nvbios_timingEp(bios, strap, &ram->base.timing.version,
-				       &ram->base.timing.size, &cnt, &len,
-				       &data->bios);
-		if (!ram->base.timing.data ||
-		     ram->base.timing.version != 0x20 ||
-		     ram->base.timing.size < 0x33) {
-			nv_error(pfb, "invalid/missing timing entry\n");
-			return -EINVAL;
+	struct nouveau_ram_data *cfg;
+	u32 mhz = khz / 1000;
+
+	list_for_each_entry(cfg, &ram->cfg, head) {
+		if (mhz >= cfg->bios.rammap_min &&
+		    mhz <= cfg->bios.rammap_max) {
+			*data = *cfg;
+			data->freq = khz;
+			return 0;
 		}
-	} else {
-		ram->base.timing.data = 0;
 	}
 
-	data->freq = freq;
-	return 0;
+	nv_error(ram, "ramcfg data for %dMHz not found\n", mhz);
+	return -EINVAL;
 }
 
 static int
@@ -1106,13 +1072,99 @@ nve0_ram_calc(struct nouveau_fb *pfb, u32 freq)
 	return nve0_ram_calc_xits(pfb, ram->base.next);
 }
 
+static void
+nve0_ram_prog_0(struct nouveau_fb *pfb, u32 freq)
+{
+	struct nve0_ram *ram = (void *)pfb->ram;
+	struct nouveau_ram_data *cfg;
+	u32 mhz = freq / 1000;
+	u32 mask, data;
+
+	list_for_each_entry(cfg, &ram->cfg, head) {
+		if (mhz >= cfg->bios.rammap_min &&
+		    mhz <= cfg->bios.rammap_max)
+			break;
+	}
+
+	if (&cfg->head == &ram->cfg)
+		return;
+
+	if (mask = 0, data = 0, ram->diff.rammap_11_0a_03fe) {
+		data |= cfg->bios.rammap_11_0a_03fe << 12;
+		mask |= 0x001ff000;
+	}
+	if (ram->diff.rammap_11_09_01ff) {
+		data |= cfg->bios.rammap_11_09_01ff;
+		mask |= 0x000001ff;
+	}
+	nv_mask(pfb, 0x10f468, mask, data);
+
+	if (mask = 0, data = 0, ram->diff.rammap_11_0a_0400) {
+		data |= cfg->bios.rammap_11_0a_0400;
+		mask |= 0x00000001;
+	}
+	nv_mask(pfb, 0x10f420, mask, data);
+
+	if (mask = 0, data = 0, ram->diff.rammap_11_0a_0800) {
+		data |= cfg->bios.rammap_11_0a_0800;
+		mask |= 0x00000001;
+	}
+	nv_mask(pfb, 0x10f430, mask, data);
+
+	if (mask = 0, data = 0, ram->diff.rammap_11_0b_01f0) {
+		data |= cfg->bios.rammap_11_0b_01f0;
+		mask |= 0x0000001f;
+	}
+	nv_mask(pfb, 0x10f400, mask, data);
+
+	if (mask = 0, data = 0, ram->diff.rammap_11_0b_0200) {
+		data |= cfg->bios.rammap_11_0b_0200 << 9;
+		mask |= 0x00000200;
+	}
+	nv_mask(pfb, 0x10f410, mask, data);
+
+	if (mask = 0, data = 0, ram->diff.rammap_11_0d) {
+		data |= cfg->bios.rammap_11_0d << 16;
+		mask |= 0x00ff0000;
+	}
+	if (ram->diff.rammap_11_0f) {
+		data |= cfg->bios.rammap_11_0f << 8;
+		mask |= 0x0000ff00;
+	}
+	nv_mask(pfb, 0x10f440, mask, data);
+
+	if (mask = 0, data = 0, ram->diff.rammap_11_0e) {
+		data |= cfg->bios.rammap_11_0e << 8;
+		mask |= 0x0000ff00;
+	}
+	if (ram->diff.rammap_11_0b_0800) {
+		data |= cfg->bios.rammap_11_0b_0800 << 7;
+		mask |= 0x00000080;
+	}
+	if (ram->diff.rammap_11_0b_0400) {
+		data |= cfg->bios.rammap_11_0b_0400 << 5;
+		mask |= 0x00000020;
+	}
+	nv_mask(pfb, 0x10f444, mask, data);
+}
+
 static int
 nve0_ram_prog(struct nouveau_fb *pfb)
 {
 	struct nouveau_device *device = nv_device(pfb);
 	struct nve0_ram *ram = (void *)pfb->ram;
 	struct nve0_ramfuc *fuc = &ram->fuc;
-	ram_exec(fuc, nouveau_boolopt(device->cfgopt, "NvMemExec", true));
+	struct nouveau_ram_data *next = ram->base.next;
+
+	if (!nouveau_boolopt(device->cfgopt, "NvMemExec", true)) {
+		ram_exec(fuc, false);
+		return (ram->base.next == &ram->base.xition);
+	}
+
+	nve0_ram_prog_0(pfb, 1000);
+	ram_exec(fuc, true);
+	nve0_ram_prog_0(pfb, next->freq);
+
 	return (ram->base.next == &ram->base.xition);
 }
 
@@ -1125,24 +1177,147 @@ nve0_ram_tidy(struct nouveau_fb *pfb)
 	ram_exec(fuc, false);
 }
 
+struct nve0_ram_train {
+	u16 mask;
+	struct nvbios_M0209S remap;
+	struct nvbios_M0209S type00;
+	struct nvbios_M0209S type01;
+	struct nvbios_M0209S type04;
+	struct nvbios_M0209S type06;
+	struct nvbios_M0209S type07;
+	struct nvbios_M0209S type08;
+	struct nvbios_M0209S type09;
+};
+
+static int
+nve0_ram_train_type(struct nouveau_fb *pfb, int i, u8 ramcfg,
+		    struct nve0_ram_train *train)
+{
+	struct nouveau_bios *bios = nouveau_bios(pfb);
+	struct nvbios_M0205E M0205E;
+	struct nvbios_M0205S M0205S;
+	struct nvbios_M0209E M0209E;
+	struct nvbios_M0209S *remap = &train->remap;
+	struct nvbios_M0209S *value;
+	u8  ver, hdr, cnt, len;
+	u32 data;
+
+	/* determine type of data for this index */
+	if (!(data = nvbios_M0205Ep(bios, i, &ver, &hdr, &cnt, &len, &M0205E)))
+		return -ENOENT;
+
+	switch (M0205E.type) {
+	case 0x00: value = &train->type00; break;
+	case 0x01: value = &train->type01; break;
+	case 0x04: value = &train->type04; break;
+	case 0x06: value = &train->type06; break;
+	case 0x07: value = &train->type07; break;
+	case 0x08: value = &train->type08; break;
+	case 0x09: value = &train->type09; break;
+	default:
+		return 0;
+	}
+
+	/* training data index determined by ramcfg strap */
+	if (!(data = nvbios_M0205Sp(bios, i, ramcfg, &ver, &hdr, &M0205S)))
+		return -EINVAL;
+	i = M0205S.data;
+
+	/* training data format information */
+	if (!(data = nvbios_M0209Ep(bios, i, &ver, &hdr, &cnt, &len, &M0209E)))
+		return -EINVAL;
+
+	/* ... and the raw data */
+	if (!(data = nvbios_M0209Sp(bios, i, 0, &ver, &hdr, value)))
+		return -EINVAL;
+
+	if (M0209E.v02_07 == 2) {
+		/* of course! why wouldn't we have a pointer to another entry
+		 * in the same table, and use the first one as an array of
+		 * remap indices...
+		 */
+		if (!(data = nvbios_M0209Sp(bios, M0209E.v03, 0, &ver, &hdr,
+					    remap)))
+			return -EINVAL;
+
+		for (i = 0; i < ARRAY_SIZE(value->data); i++)
+			value->data[i] = remap->data[value->data[i]];
+	} else
+	if (M0209E.v02_07 != 1)
+		return -EINVAL;
+
+	train->mask |= 1 << M0205E.type;
+	return 0;
+}
+
+static int
+nve0_ram_train_init_0(struct nouveau_fb *pfb, struct nve0_ram_train *train)
+{
+	int i, j;
+
+	if ((train->mask & 0x03d3) != 0x03d3) {
+		nv_warn(pfb, "missing link training data\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < 0x30; i++) {
+		for (j = 0; j < 8; j += 4) {
+			nv_wr32(pfb, 0x10f968 + j, 0x00000000 | (i << 8));
+			nv_wr32(pfb, 0x10f920 + j, 0x00000000 |
+						   train->type08.data[i] << 4 |
+						   train->type06.data[i]);
+			nv_wr32(pfb, 0x10f918 + j, train->type00.data[i]);
+			nv_wr32(pfb, 0x10f920 + j, 0x00000100 |
+						   train->type09.data[i] << 4 |
+						   train->type07.data[i]);
+			nv_wr32(pfb, 0x10f918 + j, train->type01.data[i]);
+		}
+	}
+
+	for (j = 0; j < 8; j += 4) {
+		for (i = 0; i < 0x100; i++) {
+			nv_wr32(pfb, 0x10f968 + j, i);
+			nv_wr32(pfb, 0x10f900 + j, train->type04.data[i]);
+		}
+	}
+
+	return 0;
+}
+
+static int
+nve0_ram_train_init(struct nouveau_fb *pfb)
+{
+	u8 ramcfg = nvbios_ramcfg_index(nv_subdev(pfb));
+	struct nve0_ram_train *train;
+	int ret = -ENOMEM, i;
+
+	if ((train = kzalloc(sizeof(*train), GFP_KERNEL))) {
+		for (i = 0; i < 0x100; i++) {
+			ret = nve0_ram_train_type(pfb, i, ramcfg, train);
+			if (ret && ret != -ENOENT)
+				break;
+		}
+	}
+
+	switch (pfb->ram->type) {
+	case NV_MEM_TYPE_GDDR5:
+		ret = nve0_ram_train_init_0(pfb, train);
+		break;
+	default:
+		ret = 0;
+		break;
+	}
+
+	kfree(train);
+	return ret;
+}
+
 int
 nve0_ram_init(struct nouveau_object *object)
 {
 	struct nouveau_fb *pfb = (void *)object->parent;
 	struct nve0_ram *ram   = (void *)object;
 	struct nouveau_bios *bios = nouveau_bios(pfb);
-	static const u8  train0[] = {
-		0x00, 0xff, 0xff, 0x00, 0xff, 0x00,
-		0x00, 0xff, 0xff, 0x00, 0xff, 0x00,
-	};
-	static const u32 train1[] = {
-		0x00000000, 0xffffffff,
-		0x55555555, 0xaaaaaaaa,
-		0x33333333, 0xcccccccc,
-		0xf0f0f0f0, 0x0f0f0f0f,
-		0x00ff00ff, 0xff00ff00,
-		0x0000ffff, 0xffff0000,
-	};
 	u8  ver, hdr, cnt, len, snr, ssz;
 	u32 data, save;
 	int ret, i;
@@ -1168,51 +1343,107 @@ nve0_ram_init(struct nouveau_object *object)
 
 	cnt  = nv_ro08(bios, data + 0x14); /* guess at count */
 	data = nv_ro32(bios, data + 0x10); /* guess u32... */
-	save = nv_rd32(pfb, 0x10f65c);
-	for (i = 0; i < cnt; i++) {
-		nv_mask(pfb, 0x10f65c, 0x000000f0, i << 4);
-		nvbios_exec(&(struct nvbios_init) {
-				.subdev = nv_subdev(pfb),
-				.bios = bios,
-				.offset = nv_ro32(bios, data), /* guess u32 */
-				.execute = 1,
-			    });
-		data += 4;
-	}
-	nv_wr32(pfb, 0x10f65c, save);
+	save = nv_rd32(pfb, 0x10f65c) & 0x000000f0;
+	for (i = 0; i < cnt; i++, data += 4) {
+		if (i != save >> 4) {
+			nv_mask(pfb, 0x10f65c, 0x000000f0, i << 4);
+			nvbios_exec(&(struct nvbios_init) {
+					.subdev = nv_subdev(pfb),
+					.bios = bios,
+					.offset = nv_ro32(bios, data),
+					.execute = 1,
+				    });
+		}
+	}
+	nv_mask(pfb, 0x10f65c, 0x000000f0, save);
 	nv_mask(pfb, 0x10f584, 0x11000000, 0x00000000);
+	nv_wr32(pfb, 0x10ecc0, 0xffffffff);
+	nv_mask(pfb, 0x10f160, 0x00000010, 0x00000010);
 
-	switch (ram->base.type) {
-	case NV_MEM_TYPE_GDDR5:
-		for (i = 0; i < 0x30; i++) {
-			nv_wr32(pfb, 0x10f968, 0x00000000 | (i << 8));
-			nv_wr32(pfb, 0x10f920, 0x00000000 | train0[i % 12]);
-			nv_wr32(pfb, 0x10f918,              train1[i % 12]);
-			nv_wr32(pfb, 0x10f920, 0x00000100 | train0[i % 12]);
-			nv_wr32(pfb, 0x10f918,              train1[i % 12]);
-
-			nv_wr32(pfb, 0x10f96c, 0x00000000 | (i << 8));
-			nv_wr32(pfb, 0x10f924, 0x00000000 | train0[i % 12]);
-			nv_wr32(pfb, 0x10f91c,              train1[i % 12]);
-			nv_wr32(pfb, 0x10f924, 0x00000100 | train0[i % 12]);
-			nv_wr32(pfb, 0x10f91c,              train1[i % 12]);
-		}
+	return nve0_ram_train_init(pfb);
+}
 
-		for (i = 0; i < 0x100; i++) {
-			nv_wr32(pfb, 0x10f968, i);
-			nv_wr32(pfb, 0x10f900, train1[2 + (i & 1)]);
-		}
+static int
+nve0_ram_ctor_data(struct nve0_ram *ram, u8 ramcfg, int i)
+{
+	struct nouveau_fb *pfb = (void *)nv_object(ram)->parent;
+	struct nouveau_bios *bios = nouveau_bios(pfb);
+	struct nouveau_ram_data *cfg;
+	struct nvbios_ramcfg *d = &ram->diff;
+	struct nvbios_ramcfg *p, *n;
+	u8  ver, hdr, cnt, len;
+	u32 data;
+	int ret;
 
-		for (i = 0; i < 0x100; i++) {
-			nv_wr32(pfb, 0x10f96c, i);
-			nv_wr32(pfb, 0x10f900, train1[2 + (i & 1)]);
-		}
-		break;
-	default:
-		break;
+	if (!(cfg = kmalloc(sizeof(*cfg), GFP_KERNEL)))
+		return -ENOMEM;
+	p = &list_last_entry(&ram->cfg, typeof(*cfg), head)->bios;
+	n = &cfg->bios;
+
+	/* memory config data for a range of target frequencies */
+	data = nvbios_rammapEp(bios, i, &ver, &hdr, &cnt, &len, &cfg->bios);
+	if (ret = -ENOENT, !data)
+		goto done;
+	if (ret = -ENOSYS, ver != 0x11 || hdr < 0x12)
+		goto done;
+
+	/* ... and a portion specific to the attached memory */
+	data = nvbios_rammapSp(bios, data, ver, hdr, cnt, len, ramcfg,
+			       &ver, &hdr, &cfg->bios);
+	if (ret = -EINVAL, !data)
+		goto done;
+	if (ret = -ENOSYS, ver != 0x11 || hdr < 0x0a)
+		goto done;
+
+	/* lookup memory timings, if bios says they're present */
+	if (cfg->bios.ramcfg_timing != 0xff) {
+		data = nvbios_timingEp(bios, cfg->bios.ramcfg_timing,
+				       &ver, &hdr, &cnt, &len,
+				       &cfg->bios);
+		if (ret = -EINVAL, !data)
+			goto done;
+		if (ret = -ENOSYS, ver != 0x20 || hdr < 0x33)
+			goto done;
 	}
 
-	return 0;
+	list_add_tail(&cfg->head, &ram->cfg);
+	if (ret = 0, i == 0)
+		goto done;
+
+	d->rammap_11_0a_03fe |= p->rammap_11_0a_03fe != n->rammap_11_0a_03fe;
+	d->rammap_11_09_01ff |= p->rammap_11_09_01ff != n->rammap_11_09_01ff;
+	d->rammap_11_0a_0400 |= p->rammap_11_0a_0400 != n->rammap_11_0a_0400;
+	d->rammap_11_0a_0800 |= p->rammap_11_0a_0800 != n->rammap_11_0a_0800;
+	d->rammap_11_0b_01f0 |= p->rammap_11_0b_01f0 != n->rammap_11_0b_01f0;
+	d->rammap_11_0b_0200 |= p->rammap_11_0b_0200 != n->rammap_11_0b_0200;
+	d->rammap_11_0d |= p->rammap_11_0d != n->rammap_11_0d;
+	d->rammap_11_0f |= p->rammap_11_0f != n->rammap_11_0f;
+	d->rammap_11_0e |= p->rammap_11_0e != n->rammap_11_0e;
+	d->rammap_11_0b_0800 |= p->rammap_11_0b_0800 != n->rammap_11_0b_0800;
+	d->rammap_11_0b_0400 |= p->rammap_11_0b_0400 != n->rammap_11_0b_0400;
+	d->ramcfg_11_01_01 |= p->ramcfg_11_01_01 != n->ramcfg_11_01_01;
+	d->ramcfg_11_01_02 |= p->ramcfg_11_01_02 != n->ramcfg_11_01_02;
+	d->ramcfg_11_01_10 |= p->ramcfg_11_01_10 != n->ramcfg_11_01_10;
+	d->ramcfg_11_02_03 |= p->ramcfg_11_02_03 != n->ramcfg_11_02_03;
+	d->ramcfg_11_08_20 |= p->ramcfg_11_08_20 != n->ramcfg_11_08_20;
+	d->timing_20_30_07 |= p->timing_20_30_07 != n->timing_20_30_07;
+done:
+	if (ret)
+		kfree(cfg);
+	return ret;
+}
+
+static void
+nve0_ram_dtor(struct nouveau_object *object)
+{
+	struct nve0_ram *ram = (void *)object;
+	struct nouveau_ram_data *cfg, *tmp;
+
+	list_for_each_entry_safe(cfg, tmp, &ram->cfg, head) {
+		kfree(cfg);
+	}
+
+	nouveau_ram_destroy(&ram->base);
 }
 
 static int
@@ -1226,6 +1457,7 @@ nve0_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	struct dcb_gpio_func func;
 	struct nve0_ram *ram;
 	int ret, i;
+	u8  ramcfg = nvbios_ramcfg_index(nv_subdev(pfb));
 	u32 tmp;
 
 	ret = nvc0_ram_create(parent, engine, oclass, 0x022554, &ram);
@@ -1233,6 +1465,8 @@ nve0_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
+	INIT_LIST_HEAD(&ram->cfg);
+
 	switch (ram->base.type) {
 	case NV_MEM_TYPE_DDR3:
 	case NV_MEM_TYPE_GDDR5:
@@ -1264,7 +1498,26 @@ nve0_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		}
 	}
 
-	// parse bios data for both pll's
+	/* parse bios data for all rammap table entries up-front, and
+	 * build information on whether certain fields differ between
+	 * any of the entries.
+	 *
+	 * the binary driver appears to completely ignore some fields
+	 * when all entries contain the same value.  at first, it was
+	 * hoped that these were mere optimisations and the bios init
+	 * tables had configured as per the values here, but there is
+	 * evidence now to suggest that this isn't the case and we do
+	 * need to treat this condition as a "don't touch" indicator.
+	 */
+	for (i = 0; !ret; i++) {
+		ret = nve0_ram_ctor_data(ram, ramcfg, i);
+		if (ret && ret != -ENOENT) {
+			nv_error(pfb, "failed to parse ramcfg data\n");
+			return ret;
+		}
+	}
+
+	/* parse bios data for both pll's */
 	ret = nvbios_pll_parse(bios, 0x0c, &ram->fuc.refpll);
 	if (ret) {
 		nv_error(pfb, "mclk refpll data not found\n");
@@ -1277,6 +1530,7 @@ nve0_ram_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 		return ret;
 	}
 
+	/* lookup memory voltage gpios */
 	ret = gpio->find(gpio, 0, 0x18, DCB_GPIO_UNUSED, &func);
 	if (ret == 0) {
 		ram->fuc.r_gpioMV = ramfuc_reg(0x00d610 + (func.line * 0x04));
@@ -1385,7 +1639,7 @@ nve0_ram_oclass = {
 	.handle = 0,
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nve0_ram_ctor,
-		.dtor = _nouveau_ram_dtor,
+		.dtor = nve0_ram_dtor,
 		.init = nve0_ram_init,
 		.fini = _nouveau_ram_fini,
 	}
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/sddr2.c b/drivers/gpu/drm/nouveau/core/subdev/fb/sddr2.c
new file mode 100644
index 000000000000..bb1eb8f3e639
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/sddr2.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2014 Roy Spliet
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Roy Spliet <rspliet@eclipso.eu>
+ *          Ben Skeggs
+ */
+
+#include "priv.h"
+
+struct ramxlat {
+	int id;
+	u8 enc;
+};
+
+static inline int
+ramxlat(const struct ramxlat *xlat, int id)
+{
+	while (xlat->id >= 0) {
+		if (xlat->id == id)
+			return xlat->enc;
+		xlat++;
+	}
+	return -EINVAL;
+}
+
+static const struct ramxlat
+ramddr2_cl[] = {
+	{ 2, 2 }, { 3, 3 }, { 4, 4 }, { 5, 5 }, { 6, 6 },
+	/* The following are available in some, but not all DDR2 docs */
+	{ 7, 7 },
+	{ -1 }
+};
+
+static const struct ramxlat
+ramddr2_wr[] = {
+	{ 2, 1 }, { 3, 2 }, { 4, 3 }, { 5, 4 }, { 6, 5 },
+	/* The following are available in some, but not all DDR2 docs */
+	{ 7, 6 },
+	{ -1 }
+};
+
+int
+nouveau_sddr2_calc(struct nouveau_ram *ram)
+{
+	int CL, WR, DLL = 0, ODT = 0;
+
+	switch (ram->next->bios.timing_ver) {
+	case 0x10:
+		CL  = ram->next->bios.timing_10_CL;
+		WR  = ram->next->bios.timing_10_WR;
+		DLL = !ram->next->bios.ramcfg_10_02_40;
+		ODT = ram->next->bios.timing_10_ODT & 3;
+		break;
+	case 0x20:
+		CL  = (ram->next->bios.timing[1] & 0x0000001f);
+		WR  = (ram->next->bios.timing[2] & 0x007f0000) >> 16;
+		break;
+	default:
+		return -ENOSYS;
+	}
+
+	CL  = ramxlat(ramddr2_cl, CL);
+	WR  = ramxlat(ramddr2_wr, WR);
+	if (CL < 0 || WR < 0)
+		return -EINVAL;
+
+	ram->mr[0] &= ~0xf70;
+	ram->mr[0] |= (WR & 0x07) << 9;
+	ram->mr[0] |= (CL & 0x07) << 4;
+
+	ram->mr[1] &= ~0x045;
+	ram->mr[1] |= (ODT & 0x1) << 2;
+	ram->mr[1] |= (ODT & 0x2) << 5;
+	ram->mr[1] |= !DLL;
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fb/sddr3.c b/drivers/gpu/drm/nouveau/core/subdev/fb/sddr3.c
index ebd4cd9c35d9..83949b11833a 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/fb/sddr3.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/fb/sddr3.c
@@ -20,9 +20,9 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  * Authors: Ben Skeggs <bskeggs@redhat.com>
+ * 	    Roy Spliet <rspliet@eclipso.eu>
  */
 
-#include <subdev/bios.h>
 #include "priv.h"
 
 struct ramxlat {
@@ -69,31 +69,52 @@ ramddr3_cwl[] = {
 int
 nouveau_sddr3_calc(struct nouveau_ram *ram)
 {
-	struct nouveau_bios *bios = nouveau_bios(ram);
-	int WL, CL, WR;
+	int CWL, CL, WR, DLL = 0, ODT = 0;
 
-	switch (!!ram->timing.data * ram->timing.version) {
+	switch (ram->next->bios.timing_ver) {
+	case 0x10:
+		if (ram->next->bios.timing_hdr < 0x17) {
+			/* XXX: NV50: Get CWL from the timing register */
+			return -ENOSYS;
+		}
+		CWL = ram->next->bios.timing_10_CWL;
+		CL  = ram->next->bios.timing_10_CL;
+		WR  = ram->next->bios.timing_10_WR;
+		DLL = !ram->next->bios.ramcfg_10_02_40;
+		ODT = ram->next->bios.timing_10_ODT;
+		break;
 	case 0x20:
-		WL = (nv_ro16(bios, ram->timing.data + 0x04) & 0x0f80) >> 7;
-		CL =  nv_ro08(bios, ram->timing.data + 0x04) & 0x1f;
-		WR =  nv_ro08(bios, ram->timing.data + 0x0a) & 0x7f;
+		CWL = (ram->next->bios.timing[1] & 0x00000f80) >> 7;
+		CL  = (ram->next->bios.timing[1] & 0x0000001f) >> 0;
+		WR  = (ram->next->bios.timing[2] & 0x007f0000) >> 16;
+		/* XXX: Get these values from the VBIOS instead */
+		DLL = !(ram->mr[1] & 0x1);
+		ODT =   (ram->mr[1] & 0x004) >> 2 |
+			(ram->mr[1] & 0x040) >> 5 |
+		        (ram->mr[1] & 0x200) >> 7;
 		break;
 	default:
 		return -ENOSYS;
 	}
 
-	WL = ramxlat(ramddr3_cwl, WL);
-	CL = ramxlat(ramddr3_cl, CL);
-	WR = ramxlat(ramddr3_wr, WR);
-	if (WL < 0 || CL < 0 || WR < 0)
+	CWL = ramxlat(ramddr3_cwl, CWL);
+	CL  = ramxlat(ramddr3_cl, CL);
+	WR  = ramxlat(ramddr3_wr, WR);
+	if (CL < 0 || CWL < 0 || WR < 0)
 		return -EINVAL;
 
-	ram->mr[0] &= ~0xe74;
+	ram->mr[0] &= ~0xf74;
 	ram->mr[0] |= (WR & 0x07) << 9;
 	ram->mr[0] |= (CL & 0x0e) << 3;
 	ram->mr[0] |= (CL & 0x01) << 2;
 
+	ram->mr[1] &= ~0x245;
+	ram->mr[1] |= (ODT & 0x1) << 2;
+	ram->mr[1] |= (ODT & 0x2) << 5;
+	ram->mr[1] |= (ODT & 0x4) << 7;
+	ram->mr[1] |= !DLL;
+
 	ram->mr[2] &= ~0x038;
-	ram->mr[2] |= (WL & 0x07) << 3;
+	ram->mr[2] |= (CWL & 0x07) << 3;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fuse/base.c b/drivers/gpu/drm/nouveau/core/subdev/fuse/base.c
new file mode 100644
index 000000000000..9e8e92127715
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fuse/base.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2014 Martin Peres
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Martin Peres
+ */
+
+#include <subdev/fuse.h>
+
+int
+_nouveau_fuse_init(struct nouveau_object *object)
+{
+	struct nouveau_fuse *fuse = (void *)object;
+	return nouveau_subdev_init(&fuse->base);
+}
+
+void
+_nouveau_fuse_dtor(struct nouveau_object *object)
+{
+	struct nouveau_fuse *fuse = (void *)object;
+	nouveau_subdev_destroy(&fuse->base);
+}
+
+int
+nouveau_fuse_create_(struct nouveau_object *parent,
+		     struct nouveau_object *engine,
+		     struct nouveau_oclass *oclass, int length, void **pobject)
+{
+	struct nouveau_fuse *fuse;
+	int ret;
+
+	ret = nouveau_subdev_create_(parent, engine, oclass, 0, "FUSE",
+				     "fuse", length, pobject);
+	fuse = *pobject;
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fuse/g80.c b/drivers/gpu/drm/nouveau/core/subdev/fuse/g80.c
new file mode 100644
index 000000000000..a374ade485be
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fuse/g80.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2014 Martin Peres
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Martin Peres
+ */
+
+#include "priv.h"
+
+struct g80_fuse_priv {
+	struct nouveau_fuse base;
+
+	spinlock_t fuse_enable_lock;
+};
+
+static u32
+g80_fuse_rd32(struct nouveau_object *object, u64 addr)
+{
+	struct g80_fuse_priv *priv = (void *)object;
+	unsigned long flags;
+	u32 fuse_enable, val;
+
+	spin_lock_irqsave(&priv->fuse_enable_lock, flags);
+
+	/* racy if another part of nouveau start writing to this reg */
+	fuse_enable = nv_mask(priv, 0x1084, 0x800, 0x800);
+	val = nv_rd32(priv, 0x21000 + addr);
+	nv_wr32(priv, 0x1084, fuse_enable);
+
+	spin_unlock_irqrestore(&priv->fuse_enable_lock, flags);
+
+	return val;
+}
+
+
+static int
+g80_fuse_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	       struct nouveau_oclass *oclass, void *data, u32 size,
+	       struct nouveau_object **pobject)
+{
+	struct g80_fuse_priv *priv;
+	int ret;
+
+	ret = nouveau_fuse_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	spin_lock_init(&priv->fuse_enable_lock);
+
+	return 0;
+}
+
+struct nouveau_oclass
+g80_fuse_oclass = {
+	.handle = NV_SUBDEV(FUSE, 0x50),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = g80_fuse_ctor,
+		.dtor = _nouveau_fuse_dtor,
+		.init = _nouveau_fuse_init,
+		.fini = _nouveau_fuse_fini,
+		.rd32 = g80_fuse_rd32,
+	},
+};
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fuse/gf100.c b/drivers/gpu/drm/nouveau/core/subdev/fuse/gf100.c
new file mode 100644
index 000000000000..5ed03f54b3d4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fuse/gf100.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2014 Martin Peres
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Martin Peres
+ */
+
+#include "priv.h"
+
+struct gf100_fuse_priv {
+	struct nouveau_fuse base;
+
+	spinlock_t fuse_enable_lock;
+};
+
+static u32
+gf100_fuse_rd32(struct nouveau_object *object, u64 addr)
+{
+	struct gf100_fuse_priv *priv = (void *)object;
+	unsigned long flags;
+	u32 fuse_enable, unk, val;
+
+	spin_lock_irqsave(&priv->fuse_enable_lock, flags);
+
+	/* racy if another part of nouveau start writing to these regs */
+	fuse_enable = nv_mask(priv, 0x22400, 0x800, 0x800);
+	unk = nv_mask(priv, 0x21000, 0x1, 0x1);
+	val = nv_rd32(priv, 0x21100 + addr);
+	nv_wr32(priv, 0x21000, unk);
+	nv_wr32(priv, 0x22400, fuse_enable);
+
+	spin_unlock_irqrestore(&priv->fuse_enable_lock, flags);
+
+	return val;
+}
+
+
+static int
+gf100_fuse_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	       struct nouveau_oclass *oclass, void *data, u32 size,
+	       struct nouveau_object **pobject)
+{
+	struct gf100_fuse_priv *priv;
+	int ret;
+
+	ret = nouveau_fuse_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	spin_lock_init(&priv->fuse_enable_lock);
+
+	return 0;
+}
+
+struct nouveau_oclass
+gf100_fuse_oclass = {
+	.handle = NV_SUBDEV(FUSE, 0xC0),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = gf100_fuse_ctor,
+		.dtor = _nouveau_fuse_dtor,
+		.init = _nouveau_fuse_init,
+		.fini = _nouveau_fuse_fini,
+		.rd32 = gf100_fuse_rd32,
+	},
+};
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fuse/gm107.c b/drivers/gpu/drm/nouveau/core/subdev/fuse/gm107.c
new file mode 100644
index 000000000000..4f1a636c6538
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fuse/gm107.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2014 Martin Peres
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Martin Peres
+ */
+
+#include "priv.h"
+
+struct gm107_fuse_priv {
+	struct nouveau_fuse base;
+};
+
+static u32
+gm107_fuse_rd32(struct nouveau_object *object, u64 addr)
+{
+	struct gf100_fuse_priv *priv = (void *)object;
+
+	return nv_rd32(priv, 0x21100 + addr);
+}
+
+
+static int
+gm107_fuse_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
+	       struct nouveau_oclass *oclass, void *data, u32 size,
+	       struct nouveau_object **pobject)
+{
+	struct gm107_fuse_priv *priv;
+	int ret;
+
+	ret = nouveau_fuse_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+struct nouveau_oclass
+gm107_fuse_oclass = {
+	.handle = NV_SUBDEV(FUSE, 0x117),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = gm107_fuse_ctor,
+		.dtor = _nouveau_fuse_dtor,
+		.init = _nouveau_fuse_init,
+		.fini = _nouveau_fuse_fini,
+		.rd32 = gm107_fuse_rd32,
+	},
+};
diff --git a/drivers/gpu/drm/nouveau/core/subdev/fuse/priv.h b/drivers/gpu/drm/nouveau/core/subdev/fuse/priv.h
new file mode 100644
index 000000000000..d2085411a5cb
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/fuse/priv.h
@@ -0,0 +1,9 @@
+#ifndef __NVKM_FUSE_PRIV_H__
+#define __NVKM_FUSE_PRIV_H__
+
+#include <subdev/fuse.h>
+
+int _nouveau_fuse_init(struct nouveau_object *object);
+void _nouveau_fuse_dtor(struct nouveau_object *object);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/base.c b/drivers/gpu/drm/nouveau/core/subdev/gpio/base.c
index b1e3ed7c8beb..7ad99b763f4c 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/gpio/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/base.c
@@ -122,7 +122,8 @@ nouveau_gpio_intr_init(struct nvkm_event *event, int type, int index)
 }
 
 static int
-nouveau_gpio_intr_ctor(void *data, u32 size, struct nvkm_notify *notify)
+nouveau_gpio_intr_ctor(struct nouveau_object *object, void *data, u32 size,
+		       struct nvkm_notify *notify)
 {
 	struct nvkm_gpio_ntfy_req *req = data;
 	if (!WARN_ON(size != sizeof(*req))) {
diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/nv92.c b/drivers/gpu/drm/nouveau/core/subdev/gpio/nv94.c
index 252083d376f5..cae404ccadac 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/gpio/nv92.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/nv94.c
@@ -25,7 +25,7 @@
 #include "priv.h"
 
 void
-nv92_gpio_intr_stat(struct nouveau_gpio *gpio, u32 *hi, u32 *lo)
+nv94_gpio_intr_stat(struct nouveau_gpio *gpio, u32 *hi, u32 *lo)
 {
 	u32 intr0 = nv_rd32(gpio, 0x00e054);
 	u32 intr1 = nv_rd32(gpio, 0x00e074);
@@ -38,7 +38,7 @@ nv92_gpio_intr_stat(struct nouveau_gpio *gpio, u32 *hi, u32 *lo)
 }
 
 void
-nv92_gpio_intr_mask(struct nouveau_gpio *gpio, u32 type, u32 mask, u32 data)
+nv94_gpio_intr_mask(struct nouveau_gpio *gpio, u32 type, u32 mask, u32 data)
 {
 	u32 inte0 = nv_rd32(gpio, 0x00e050);
 	u32 inte1 = nv_rd32(gpio, 0x00e070);
@@ -57,8 +57,8 @@ nv92_gpio_intr_mask(struct nouveau_gpio *gpio, u32 type, u32 mask, u32 data)
 }
 
 struct nouveau_oclass *
-nv92_gpio_oclass = &(struct nouveau_gpio_impl) {
-	.base.handle = NV_SUBDEV(GPIO, 0x92),
+nv94_gpio_oclass = &(struct nouveau_gpio_impl) {
+	.base.handle = NV_SUBDEV(GPIO, 0x94),
 	.base.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = _nouveau_gpio_ctor,
 		.dtor = _nouveau_gpio_dtor,
@@ -66,8 +66,8 @@ nv92_gpio_oclass = &(struct nouveau_gpio_impl) {
 		.fini = _nouveau_gpio_fini,
 	},
 	.lines = 32,
-	.intr_stat = nv92_gpio_intr_stat,
-	.intr_mask = nv92_gpio_intr_mask,
+	.intr_stat = nv94_gpio_intr_stat,
+	.intr_mask = nv94_gpio_intr_mask,
 	.drive = nv50_gpio_drive,
 	.sense = nv50_gpio_sense,
 	.reset = nv50_gpio_reset,
diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/nvd0.c b/drivers/gpu/drm/nouveau/core/subdev/gpio/nvd0.c
index a4682b0956ad..480d6d2af770 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/gpio/nvd0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/nvd0.c
@@ -77,8 +77,8 @@ nvd0_gpio_oclass = &(struct nouveau_gpio_impl) {
 		.fini = _nouveau_gpio_fini,
 	},
 	.lines = 32,
-	.intr_stat = nv92_gpio_intr_stat,
-	.intr_mask = nv92_gpio_intr_mask,
+	.intr_stat = nv94_gpio_intr_stat,
+	.intr_mask = nv94_gpio_intr_mask,
 	.drive = nvd0_gpio_drive,
 	.sense = nvd0_gpio_sense,
 	.reset = nvd0_gpio_reset,
diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/priv.h b/drivers/gpu/drm/nouveau/core/subdev/gpio/priv.h
index e1724dfc86ae..bff98b86e2b5 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/gpio/priv.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/priv.h
@@ -56,8 +56,8 @@ void nv50_gpio_reset(struct nouveau_gpio *, u8);
 int  nv50_gpio_drive(struct nouveau_gpio *, int, int, int);
 int  nv50_gpio_sense(struct nouveau_gpio *, int);
 
-void nv92_gpio_intr_stat(struct nouveau_gpio *, u32 *, u32 *);
-void nv92_gpio_intr_mask(struct nouveau_gpio *, u32, u32, u32);
+void nv94_gpio_intr_stat(struct nouveau_gpio *, u32 *, u32 *);
+void nv94_gpio_intr_mask(struct nouveau_gpio *, u32, u32, u32);
 
 void nvd0_gpio_reset(struct nouveau_gpio *, u8);
 int  nvd0_gpio_drive(struct nouveau_gpio *, int, int, int);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c b/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c
index a652cafde3d6..2b1bf545e488 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/i2c/base.c
@@ -23,6 +23,7 @@
  */
 
 #include <core/option.h>
+#include <core/object.h>
 #include <core/event.h>
 
 #include <subdev/bios.h>
@@ -346,7 +347,8 @@ nouveau_i2c_intr_init(struct nvkm_event *event, int type, int index)
 }
 
 static int
-nouveau_i2c_intr_ctor(void *data, u32 size, struct nvkm_notify *notify)
+nouveau_i2c_intr_ctor(struct nouveau_object *object, void *data, u32 size,
+		      struct nvkm_notify *notify)
 {
 	struct nvkm_i2c_ntfy_req *req = data;
 	if (!WARN_ON(size != sizeof(*req))) {
diff --git a/drivers/gpu/drm/nouveau/core/subdev/instmem/nv04.c b/drivers/gpu/drm/nouveau/core/subdev/instmem/nv04.c
index 7b64befee48f..e8b1401c59c0 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/instmem/nv04.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/instmem/nv04.c
@@ -69,7 +69,7 @@ nv04_instobj_ctor(struct nouveau_object *parent, struct nouveau_object *engine,
 	if (ret)
 		return ret;
 
-	ret = nouveau_mm_head(&priv->heap, 1, args->size, args->size,
+	ret = nouveau_mm_head(&priv->heap, 0, 1, args->size, args->size,
 			      args->align, &node->mem);
 	if (ret)
 		return ret;
diff --git a/drivers/gpu/drm/nouveau/core/subdev/ltc/base.c b/drivers/gpu/drm/nouveau/core/subdev/ltc/base.c
index 32ed442c5913..7fa331516f84 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/ltc/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/ltc/base.c
@@ -31,7 +31,7 @@ nvkm_ltc_tags_alloc(struct nouveau_ltc *ltc, u32 n,
 	struct nvkm_ltc_priv *priv = (void *)ltc;
 	int ret;
 
-	ret = nouveau_mm_head(&priv->tags, 1, n, n, 1, pnode);
+	ret = nouveau_mm_head(&priv->tags, 0, 1, n, n, 1, pnode);
 	if (ret)
 		*pnode = NULL;
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/ltc/gf100.c b/drivers/gpu/drm/nouveau/core/subdev/ltc/gf100.c
index b54b582e72c4..e7b7872481ef 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/ltc/gf100.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/ltc/gf100.c
@@ -62,16 +62,38 @@ gf100_ltc_zbc_clear_depth(struct nvkm_ltc_priv *priv, int i, const u32 depth)
 	nv_wr32(priv, 0x17ea58, depth);
 }
 
+static const struct nouveau_bitfield
+gf100_ltc_lts_intr_name[] = {
+	{ 0x00000001, "IDLE_ERROR_IQ" },
+	{ 0x00000002, "IDLE_ERROR_CBC" },
+	{ 0x00000004, "IDLE_ERROR_TSTG" },
+	{ 0x00000008, "IDLE_ERROR_DSTG" },
+	{ 0x00000010, "EVICTED_CB" },
+	{ 0x00000020, "ILLEGAL_COMPSTAT" },
+	{ 0x00000040, "BLOCKLINEAR_CB" },
+	{ 0x00000100, "ECC_SEC_ERROR" },
+	{ 0x00000200, "ECC_DED_ERROR" },
+	{ 0x00000400, "DEBUG" },
+	{ 0x00000800, "ATOMIC_TO_Z" },
+	{ 0x00001000, "ILLEGAL_ATOMIC" },
+	{ 0x00002000, "BLKACTIVITY_ERR" },
+	{}
+};
+
 static void
-gf100_ltc_lts_isr(struct nvkm_ltc_priv *priv, int ltc, int lts)
+gf100_ltc_lts_intr(struct nvkm_ltc_priv *priv, int ltc, int lts)
 {
 	u32 base = 0x141000 + (ltc * 0x2000) + (lts * 0x400);
-	u32 stat = nv_rd32(priv, base + 0x020);
+	u32 intr = nv_rd32(priv, base + 0x020);
+	u32 stat = intr & 0x0000ffff;
 
 	if (stat) {
-		nv_info(priv, "LTC%d_LTS%d: 0x%08x\n", ltc, lts, stat);
-		nv_wr32(priv, base + 0x020, stat);
+		nv_info(priv, "LTC%d_LTS%d:", ltc, lts);
+		nouveau_bitfield_print(gf100_ltc_lts_intr_name, stat);
+		pr_cont("\n");
 	}
+
+	nv_wr32(priv, base + 0x020, intr);
 }
 
 void
@@ -84,14 +106,9 @@ gf100_ltc_intr(struct nouveau_subdev *subdev)
 	while (mask) {
 		u32 lts, ltc = __ffs(mask);
 		for (lts = 0; lts < priv->lts_nr; lts++)
-			gf100_ltc_lts_isr(priv, ltc, lts);
+			gf100_ltc_lts_intr(priv, ltc, lts);
 		mask &= ~(1 << ltc);
 	}
-
-	/* we do something horribly wrong and upset PMFB a lot, so mask off
-	 * interrupts from it after the first one until it's fixed
-	 */
-	nv_mask(priv, 0x000640, 0x02000000, 0x00000000);
 }
 
 static int
@@ -151,7 +168,7 @@ gf100_ltc_init_tag_ram(struct nouveau_fb *pfb, struct nvkm_ltc_priv *priv)
 	tag_size += tag_align;
 	tag_size  = (tag_size + 0xfff) >> 12; /* round up */
 
-	ret = nouveau_mm_tail(&pfb->vram, 1, tag_size, tag_size, 1,
+	ret = nouveau_mm_tail(&pfb->vram, 1, 1, tag_size, tag_size, 1,
 	                      &priv->tag_ram);
 	if (ret) {
 		priv->num_tags = 0;
diff --git a/drivers/gpu/drm/nouveau/core/subdev/ltc/gm107.c b/drivers/gpu/drm/nouveau/core/subdev/ltc/gm107.c
index 4761b2e9af00..a26bed86f384 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/ltc/gm107.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/ltc/gm107.c
@@ -87,11 +87,6 @@ gm107_ltc_intr(struct nouveau_subdev *subdev)
 			gm107_ltc_lts_isr(priv, ltc, lts);
 		mask &= ~(1 << ltc);
 	}
-
-	/* we do something horribly wrong and upset PMFB a lot, so mask off
-	 * interrupts from it after the first one until it's fixed
-	 */
-	nv_mask(priv, 0x000640, 0x02000000, 0x00000000);
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/core/subdev/ltc/priv.h b/drivers/gpu/drm/nouveau/core/subdev/ltc/priv.h
index 594924f39126..41f179d93da6 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/ltc/priv.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/ltc/priv.h
@@ -4,6 +4,8 @@
 #include <subdev/ltc.h>
 #include <subdev/fb.h>
 
+#include <core/enum.h>
+
 struct nvkm_ltc_priv {
 	struct nouveau_ltc base;
 	u32 ltc_nr;
diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/base.c b/drivers/gpu/drm/nouveau/core/subdev/pwr/base.c
index 69f1f34f6931..0ab55f27ec45 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/pwr/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/base.c
@@ -203,6 +203,8 @@ _nouveau_pwr_init(struct nouveau_object *object)
 	nv_wait(ppwr, 0x10a04c, 0xffffffff, 0x00000000);
 	nv_mask(ppwr, 0x000200, 0x00002000, 0x00000000);
 	nv_mask(ppwr, 0x000200, 0x00002000, 0x00002000);
+	nv_rd32(ppwr, 0x000200);
+	nv_wait(ppwr, 0x10a10c, 0x00000006, 0x00000000);
 
 	/* upload data segment */
 	nv_wr32(ppwr, 0x10a1c0, 0x01000000);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/arith.fuc b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/arith.fuc
new file mode 100644
index 000000000000..214a6d9e088d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/arith.fuc
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2014 Martin Peres <martin.peres@free.fr>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the folloing conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Martin Peres
+ */
+
+/******************************************************************************
+ * arith data segment
+ *****************************************************************************/
+#ifdef INCLUDE_PROC
+#endif
+
+#ifdef INCLUDE_DATA
+#endif
+
+/******************************************************************************
+ * arith code segment
+ *****************************************************************************/
+#ifdef INCLUDE_CODE
+
+// does a 32x32 -> 64 multiplication
+//
+// A * B = A_lo * B_lo
+//        + ( A_hi * B_lo ) << 16
+//        + ( A_lo * B_hi ) << 16
+//        + ( A_hi * B_hi ) << 32
+//
+// $r15 - current
+// $r14 - A
+// $r13 - B
+// $r12 - mul_lo (return)
+// $r11 - mul_hi (return)
+// $r0  - zero
+mulu32_32_64:
+	push $r1 // A_hi
+	push $r2 // B_hi
+	push $r3 // tmp0
+	push $r4 // tmp1
+
+	shr b32 $r1 $r14 16
+	shr b32 $r2 $r13 16
+
+	clear b32 $r12
+	clear b32 $r11
+
+	// A_lo * B_lo
+	mulu $r12 $r14 $r13
+
+	// ( A_hi * B_lo ) << 16
+	mulu $r3 $r1 $r13 // tmp0 = A_hi * B_lo
+	mov b32 $r4 $r3
+	and $r3 0xffff // tmp0 = tmp0_lo
+	shl b32 $r3 16
+	shr b32 $r4 16 // tmp1 = tmp0_hi
+	add b32 $r12 $r3
+	adc b32 $r11 $r4
+
+	// ( A_lo * B_hi ) << 16
+	mulu $r3 $r14 $r2 // tmp0 = A_lo * B_hi
+	mov b32 $r4 $r3
+	and $r3 0xffff // tmp0 = tmp0_lo
+	shl b32 $r3 16
+	shr b32 $r4 16 // tmp1 = tmp0_hi
+	add b32 $r12 $r3
+	adc b32 $r11 $r4
+
+	// ( A_hi * B_hi ) << 32
+	mulu $r3 $r1 $r2 // tmp0 = A_hi * B_hi
+	add b32 $r11 $r3
+
+	pop $r4
+	pop $r3
+	pop $r2
+	pop $r1
+	ret
+#endif
diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/kernel.fuc b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/kernel.fuc
index 8f29badd785f..5cf5be63cbef 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/kernel.fuc
+++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/kernel.fuc
@@ -98,12 +98,16 @@ wr32:
 // $r14 - ns
 // $r0  - zero
 nsec:
+	push $r9
+	push $r8
 	nv_iord($r8, NV_PPWR_TIMER_LOW)
 	nsec_loop:
 		nv_iord($r9, NV_PPWR_TIMER_LOW)
 		sub b32 $r9 $r8
 		cmp b32 $r9 $r14
 		bra l #nsec_loop
+	pop $r8
+	pop $r9
 	ret
 
 // busy-wait for a period of time
@@ -115,6 +119,8 @@ nsec:
 // $r11 - timeout (ns)
 // $r0  - zero
 wait:
+	push $r9
+	push $r8
 	nv_iord($r8, NV_PPWR_TIMER_LOW)
 	wait_loop:
 		nv_rd32($r10, $r14)
@@ -126,6 +132,8 @@ wait:
 		cmp b32 $r9 $r11
 		bra l #wait_loop
 	wait_done:
+	pop $r8
+	pop $r9
 	ret
 
 // $r15 - current (kern)
@@ -242,12 +250,89 @@ intr:
 	bclr $flags $p0
 	iret
 
-// request the current process be sent a message after a timeout expires
+// calculate the number of ticks in the specified nanoseconds delay
+//
+// $r15 - current
+// $r14 - ns
+// $r14 - ticks (return)
+// $r0  - zero
+ticks_from_ns:
+	push $r12
+	push $r11
+
+	/* try not losing precision (multiply then divide) */
+	imm32($r13, HW_TICKS_PER_US)
+	call #mulu32_32_64
+
+	/* use an immeditate, it's ok because HW_TICKS_PER_US < 16 bits */
+	div $r12 $r12 1000
+
+	/* check if there wasn't any overflow */
+	cmpu b32 $r11 0
+	bra e #ticks_from_ns_quit
+
+	/* let's divide then multiply, too bad for the precision! */
+	div $r14 $r14 1000
+	imm32($r13, HW_TICKS_PER_US)
+	call #mulu32_32_64
+
+	/* this cannot overflow as long as HW_TICKS_PER_US < 1000 */
+
+ticks_from_ns_quit:
+	mov b32 $r14 $r12
+	pop $r11
+	pop $r12
+	ret
+
+// calculate the number of ticks in the specified microsecond delay
+//
+// $r15 - current
+// $r14 - us
+// $r14 - ticks (return)
+// $r0  - zero
+ticks_from_us:
+	push $r12
+	push $r11
+
+	/* simply multiply $us by HW_TICKS_PER_US */
+	imm32($r13, HW_TICKS_PER_US)
+	call #mulu32_32_64
+	mov b32 $r14 $r12
+
+	/* check if there wasn't any overflow */
+	cmpu b32 $r11 0
+	bra e #ticks_from_us_quit
+
+	/* Overflow! */
+	clear b32 $r14
+
+ticks_from_us_quit:
+	pop $r11
+	pop $r12
+	ret
+
+// calculate the number of ticks in the specified microsecond delay
 //
 // $r15 - current
 // $r14 - ticks
+// $r14 - us (return)
+// $r0  - zero
+ticks_to_us:
+	/* simply divide $ticks by HW_TICKS_PER_US */
+	imm32($r13, HW_TICKS_PER_US)
+	div $r14 $r14 $r13
+
+	ret
+
+// request the current process be sent a message after a timeout expires
+//
+// $r15 - current
+// $r14 - ticks (make sure it is < 2^31 to avoid any possible overflow)
 // $r0  - zero
 timer:
+	push $r9
+	push $r8
+
 	// interrupts off to prevent racing with timer isr
 	bclr $flags ie0
 
@@ -255,13 +340,22 @@ timer:
 	ld b32 $r8 D[$r15 + #proc_time]
 	cmp b32 $r8 0
 	bra g #timer_done
-	st b32 D[$r15 + #proc_time] $r14
 
-	// halt watchdog timer temporarily and check for a pending
-	// interrupt.  if there's one already pending, we can just
-	// bail since the timer isr will queue the next soonest
-	// right after it's done
+	// halt watchdog timer temporarily
+	clear b32 $r8
 	nv_iowr(NV_PPWR_WATCHDOG_ENABLE, $r8)
+
+	// find out how much time elapsed since the last update
+	// of the watchdog and add this time to the wanted ticks
+	nv_iord($r8, NV_PPWR_WATCHDOG_TIME)
+	ld b32 $r9 D[$r0 + #time_prev]
+	sub b32 $r9 $r8
+	add b32 $r14 $r9
+	st b32 D[$r15 + #proc_time] $r14
+
+	// check for a pending interrupt.  if there's one already
+	// pending, we can just bail since the timer isr will
+	// queue the next soonest right after it's done
 	nv_iord($r8, NV_PPWR_INTR)
 	and $r8 NV_PPWR_INTR_WATCHDOG
 	bra nz #timer_enable
@@ -272,10 +366,10 @@ timer:
 	cmp b32 $r14 $r0
 	bra e #timer_reset
 	cmp b32 $r14 $r8
-	bra l #timer_done
-	timer_reset:
-	nv_iowr(NV_PPWR_WATCHDOG_TIME, $r14)
-	st b32 D[$r0 + #time_prev] $r14
+	bra g #timer_enable
+		timer_reset:
+		nv_iowr(NV_PPWR_WATCHDOG_TIME, $r14)
+		st b32 D[$r0 + #time_prev] $r14
 
 	// re-enable the watchdog timer
 	timer_enable:
@@ -285,6 +379,9 @@ timer:
 	// interrupts back on
 	timer_done:
 	bset $flags ie0
+
+	pop $r8
+	pop $r9
 	ret
 
 // send message to another process
@@ -371,6 +468,9 @@ send:
 // $r14 - process
 // $r0  - zero
 recv:
+	push $r9
+	push $r8
+
 	ld b32 $r8 D[$r14 + #proc_qget]
 	ld b32 $r9 D[$r14 + #proc_qput]
 	bclr $flags $p1
@@ -403,6 +503,8 @@ recv:
 		bset $flags $p1
 		pop $r15
 	recv_done:
+	pop $r8
+	pop $r9
 	ret
 
 init:
diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/macros.fuc b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/macros.fuc
index 5668e045bac1..96fc984dafdc 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/macros.fuc
+++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/macros.fuc
@@ -250,3 +250,23 @@
 */	st b32 D[$r0] reg /*
 */	clear b32 $r0
 #endif
+
+#define st(size, addr, reg) /*
+*/	movw $r0 addr /*
+*/	st size D[$r0] reg /*
+*/	clear b32 $r0
+
+#define ld(size, reg, addr) /*
+*/	movw $r0 addr /*
+*/	ld size reg D[$r0] /*
+*/	clear b32 $r0
+
+// does a 64+64 -> 64 unsigned addition (C = A + B)
+#define addu64(reg_a_c_hi, reg_a_c_lo, b_hi, b_lo) /*
+*/    add b32 reg_a_c_lo b_lo /*
+*/    adc b32 reg_a_c_hi b_hi
+
+// does a 64+64 -> 64 substraction (C = A - B)
+#define subu64(reg_a_c_hi, reg_a_c_lo, b_hi, b_lo) /*
+*/    sub b32 reg_a_c_lo b_lo /*
+*/    sbb b32 reg_a_c_hi b_hi
diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc
index d43741eccb11..e89789a53b80 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc
+++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/memx.fuc
@@ -43,17 +43,23 @@ process(PROC_MEMX, #memx_init, #memx_recv)
 */	.b32 func
 
 memx_func_head:
-handler(ENTER , 0x0001, 0x0000, #memx_func_enter)
+handler(ENTER , 0x0000, 0x0000, #memx_func_enter)
 memx_func_next:
 handler(LEAVE , 0x0000, 0x0000, #memx_func_leave)
 handler(WR32  , 0x0000, 0x0002, #memx_func_wr32)
 handler(WAIT  , 0x0004, 0x0000, #memx_func_wait)
 handler(DELAY , 0x0001, 0x0000, #memx_func_delay)
+handler(VBLANK, 0x0001, 0x0000, #memx_func_wait_vblank)
 memx_func_tail:
 
 .equ #memx_func_size #memx_func_next - #memx_func_head
 .equ #memx_func_num (#memx_func_tail - #memx_func_head) / #memx_func_size
 
+memx_ts_start:
+.b32 0
+memx_ts_end:
+.b32 0
+
 memx_data_head:
 .skip 0x0800
 memx_data_tail:
@@ -67,19 +73,44 @@ memx_data_tail:
 //
 // $r15 - current (memx)
 // $r4  - packet length
-//	+00: bitmask of heads to wait for vblank on
 // $r3  - opcode desciption
 // $r0  - zero
 memx_func_enter:
+#if NVKM_PPWR_CHIPSET == GT215
+	movw $r8 0x1610
+	nv_rd32($r7, $r8)
+	imm32($r6, 0xfffffffc)
+	and $r7 $r6
+	movw $r6 0x2
+	or $r7 $r6
+	nv_wr32($r8, $r7)
+#else
+	movw $r6 0x001620
+	imm32($r7, ~0x00000aa2);
+	nv_rd32($r8, $r6)
+	and $r8 $r7
+	nv_wr32($r6, $r8)
+
+	imm32($r7, ~0x00000001)
+	nv_rd32($r8, $r6)
+	and $r8 $r7
+	nv_wr32($r6, $r8)
+
+	movw $r6 0x0026f0
+	nv_rd32($r8, $r6)
+	and $r8 $r7
+	nv_wr32($r6, $r8)
+#endif
+
 	mov $r6 NV_PPWR_OUTPUT_SET_FB_PAUSE
 	nv_iowr(NV_PPWR_OUTPUT_SET, $r6)
 	memx_func_enter_wait:
 		nv_iord($r6, NV_PPWR_OUTPUT)
 		and $r6 NV_PPWR_OUTPUT_FB_PAUSE
 		bra z #memx_func_enter_wait
-	//XXX: TODO
-	ld b32 $r6 D[$r1 + 0x00]
-	add b32 $r1 0x04
+
+	nv_iord($r6, NV_PPWR_TIMER_LOW)
+	st b32 D[$r0 + #memx_ts_start] $r6
 	ret
 
 // description
@@ -89,14 +120,93 @@ memx_func_enter:
 // $r3  - opcode desciption
 // $r0  - zero
 memx_func_leave:
+	nv_iord($r6, NV_PPWR_TIMER_LOW)
+	st b32 D[$r0 + #memx_ts_end] $r6
+
 	mov $r6 NV_PPWR_OUTPUT_CLR_FB_PAUSE
 	nv_iowr(NV_PPWR_OUTPUT_CLR, $r6)
 	memx_func_leave_wait:
 		nv_iord($r6, NV_PPWR_OUTPUT)
 		and $r6 NV_PPWR_OUTPUT_FB_PAUSE
 		bra nz #memx_func_leave_wait
+
+#if NVKM_PPWR_CHIPSET == GT215
+	movw $r8 0x1610
+	nv_rd32($r7, $r8)
+	imm32($r6, 0xffffffcc)
+	and $r7 $r6
+	nv_wr32($r8, $r7)
+#else
+	movw $r6 0x0026f0
+	imm32($r7, 0x00000001)
+	nv_rd32($r8, $r6)
+	or $r8 $r7
+	nv_wr32($r6, $r8)
+
+	movw $r6 0x001620
+	nv_rd32($r8, $r6)
+	or $r8 $r7
+	nv_wr32($r6, $r8)
+
+	imm32($r7, 0x00000aa2);
+	nv_rd32($r8, $r6)
+	or $r8 $r7
+	nv_wr32($r6, $r8)
+#endif
+	ret
+
+#if NVKM_PPWR_CHIPSET < GF119
+// description
+//
+// $r15 - current (memx)
+// $r4  - packet length
+//	+00: head to wait for vblank on
+// $r3  - opcode desciption
+// $r0  - zero
+memx_func_wait_vblank:
+	ld b32 $r6 D[$r1 + 0x00]
+	cmp b32 $r6 0x0
+	bra z #memx_func_wait_vblank_head0
+	cmp b32 $r6 0x1
+	bra z #memx_func_wait_vblank_head1
+	bra #memx_func_wait_vblank_fini
+
+	memx_func_wait_vblank_head1:
+	movw $r7 0x20
+	bra #memx_func_wait_vblank_0
+
+	memx_func_wait_vblank_head0:
+	movw $r7 0x8
+
+	memx_func_wait_vblank_0:
+		nv_iord($r6, NV_PPWR_INPUT)
+		and $r6 $r7
+		bra nz #memx_func_wait_vblank_0
+
+	memx_func_wait_vblank_1:
+		nv_iord($r6, NV_PPWR_INPUT)
+		and $r6 $r7
+		bra z #memx_func_wait_vblank_1
+
+	memx_func_wait_vblank_fini:
+	add b32 $r1 0x4
+	ret
+
+#else
+
+// XXX: currently no-op
+//
+// $r15 - current (memx)
+// $r4  - packet length
+//	+00: head to wait for vblank on
+// $r3  - opcode desciption
+// $r0  - zero
+memx_func_wait_vblank:
+	add b32 $r1 0x4
 	ret
 
+#endif
+
 // description
 //
 // $r15 - current (memx)
@@ -160,14 +270,17 @@ memx_exec:
 	push $r13
 	mov b32 $r1 $r12
 	mov b32 $r2 $r11
+
 	memx_exec_next:
-		// fetch the packet header, and locate opcode info
+		// fetch the packet header
 		ld b32 $r3 D[$r1]
 		add b32 $r1 4
-		shr b32 $r4 $r3 16
-		mulu $r3 #memx_func_size
+		extr $r4 $r3 16:31
+		extr $r3 $r3 0:15
 
 		// execute the opcode handler
+		sub b32 $r3 1
+		mulu $r3 #memx_func_size
 		ld b32 $r5 D[$r3 + #memx_func_head + #memx_func]
 		call $r5
 
@@ -176,6 +289,10 @@ memx_exec:
 		bra l #memx_exec_next
 
 	// send completion reply
+	ld b32 $r11 D[$r0 + #memx_ts_start]
+	ld b32 $r12 D[$r0 + #memx_ts_end]
+	sub b32 $r12 $r11
+	nv_iord($r11, NV_PPWR_INPUT)
 	pop $r13
 	pop $r14
 	call(send)
diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nv108.fuc b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nv108.fuc
index 17a8a383d91a..b439519ec866 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nv108.fuc
+++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nv108.fuc
@@ -23,6 +23,7 @@
  */
 
 #define NVKM_PPWR_CHIPSET GK208
+#define HW_TICKS_PER_US 324
 
 #define NVKM_FALCON_PC24
 #define NVKM_FALCON_UNSHIFTED_IO
@@ -34,6 +35,7 @@
 .section #nv108_pwr_data
 #define INCLUDE_PROC
 #include "kernel.fuc"
+#include "arith.fuc"
 #include "host.fuc"
 #include "memx.fuc"
 #include "perf.fuc"
@@ -44,6 +46,7 @@
 
 #define INCLUDE_DATA
 #include "kernel.fuc"
+#include "arith.fuc"
 #include "host.fuc"
 #include "memx.fuc"
 #include "perf.fuc"
@@ -56,6 +59,7 @@
 .section #nv108_pwr_code
 #define INCLUDE_CODE
 #include "kernel.fuc"
+#include "arith.fuc"
 #include "host.fuc"
 #include "memx.fuc"
 #include "perf.fuc"
diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nv108.fuc.h b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nv108.fuc.h
index 986495d533dd..4d278a96b2bb 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nv108.fuc.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nv108.fuc.h
@@ -24,8 +24,8 @@ uint32_t nv108_pwr_data[] = {
 	0x00000000,
 /* 0x0058: proc_list_head */
 	0x54534f48,
-	0x00000379,
-	0x0000032a,
+	0x00000453,
+	0x00000404,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -46,8 +46,8 @@ uint32_t nv108_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x584d454d,
-	0x00000464,
-	0x00000456,
+	0x0000061c,
+	0x0000060e,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -68,8 +68,8 @@ uint32_t nv108_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x46524550,
-	0x00000468,
-	0x00000466,
+	0x00000620,
+	0x0000061e,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -90,8 +90,8 @@ uint32_t nv108_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x5f433249,
-	0x0000086c,
-	0x00000713,
+	0x00000a24,
+	0x000008cb,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -112,8 +112,8 @@ uint32_t nv108_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x54534554,
-	0x0000088d,
-	0x0000086e,
+	0x00000a45,
+	0x00000a26,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -134,8 +134,8 @@ uint32_t nv108_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x454c4449,
-	0x00000898,
-	0x00000896,
+	0x00000a50,
+	0x00000a4e,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -227,25 +227,31 @@ uint32_t nv108_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 /* 0x0370: memx_func_head */
-	0x00010000,
-	0x00000000,
-	0x000003a9,
-/* 0x037c: memx_func_next */
 	0x00000001,
 	0x00000000,
-	0x000003c7,
+	0x00000483,
+/* 0x037c: memx_func_next */
 	0x00000002,
+	0x00000000,
+	0x00000500,
+	0x00000003,
 	0x00000002,
-	0x000003df,
-	0x00040003,
+	0x00000580,
+	0x00040004,
+	0x00000000,
+	0x0000059d,
+	0x00010005,
+	0x00000000,
+	0x000005b7,
+	0x00010006,
 	0x00000000,
-	0x000003fc,
-	0x00010004,
+	0x0000057b,
+/* 0x03b8: memx_func_tail */
+/* 0x03b8: memx_ts_start */
 	0x00000000,
-	0x00000416,
-/* 0x03ac: memx_func_tail */
-/* 0x03ac: memx_data_head */
+/* 0x03bc: memx_ts_end */
 	0x00000000,
+/* 0x03c0: memx_data_head */
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -757,8 +763,9 @@ uint32_t nv108_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-/* 0x0bac: memx_data_tail */
-/* 0x0bac: i2c_scl_map */
+	0x00000000,
+/* 0x0bc0: memx_data_tail */
+/* 0x0bc0: i2c_scl_map */
 	0x00000400,
 	0x00000800,
 	0x00001000,
@@ -769,7 +776,7 @@ uint32_t nv108_pwr_data[] = {
 	0x00020000,
 	0x00040000,
 	0x00080000,
-/* 0x0bd4: i2c_sda_map */
+/* 0x0be8: i2c_sda_map */
 	0x00100000,
 	0x00200000,
 	0x00400000,
@@ -781,10 +788,69 @@ uint32_t nv108_pwr_data[] = {
 	0x10000000,
 	0x20000000,
 	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
 };
 
 uint32_t nv108_pwr_code[] = {
-	0x02910ef5,
+	0x031c0ef5,
 /* 0x0004: rd32 */
 	0xf607a040,
 	0x04bd000e,
@@ -812,15 +878,18 @@ uint32_t nv108_pwr_code[] = {
 	0x7000d4f1,
 	0xf8f61bf4,
 /* 0x005d: nsec */
-	0xcf2c0800,
-/* 0x0062: nsec_loop */
+	0xf990f900,
+	0xcf2c0880,
+/* 0x0066: nsec_loop */
 	0x2c090088,
 	0xbb0099cf,
 	0x9ea60298,
-	0xf8f61ef4,
-/* 0x0071: wait */
-	0xcf2c0800,
-/* 0x0076: wait_loop */
+	0xfcf61ef4,
+	0xf890fc80,
+/* 0x0079: wait */
+	0xf990f900,
+	0xcf2c0880,
+/* 0x0082: wait_loop */
 	0xeeb20088,
 	0x0000047e,
 	0xadfddab2,
@@ -828,28 +897,29 @@ uint32_t nv108_pwr_code[] = {
 	0x2c09100b,
 	0xbb0099cf,
 	0x9ba60298,
-/* 0x0093: wait_done */
-	0xf8e61ef4,
-/* 0x0095: intr_watchdog */
+/* 0x009f: wait_done */
+	0xfce61ef4,
+	0xf890fc80,
+/* 0x00a5: intr_watchdog */
 	0x03e99800,
 	0xf40096b0,
 	0x0a98280b,
 	0x029abb9a,
 	0x0d0e1cf4,
-	0x01de7e01,
+	0x02617e01,
 	0xf494bd00,
-/* 0x00b2: intr_watchdog_next_time */
+/* 0x00c2: intr_watchdog_next_time */
 	0x0a98140e,
 	0x00a6b09b,
 	0xa6080bf4,
 	0x061cf49a,
-/* 0x00c0: intr_watchdog_next_time_set */
-/* 0x00c3: intr_watchdog_next_proc */
+/* 0x00d0: intr_watchdog_next_time_set */
+/* 0x00d3: intr_watchdog_next_proc */
 	0xb59b09b5,
 	0xe0b603e9,
 	0x68e6b158,
 	0xc81bf402,
-/* 0x00d2: intr */
+/* 0x00e2: intr */
 	0x00f900f8,
 	0x80f904bd,
 	0xa0f990f9,
@@ -865,13 +935,13 @@ uint32_t nv108_pwr_code[] = {
 	0xc40088cf,
 	0x0bf40289,
 	0x9b00b51f,
-	0x957e580e,
+	0xa57e580e,
 	0x09980000,
 	0x0096b09b,
 	0x000d0bf4,
 	0x0009f634,
 	0x09b504bd,
-/* 0x0125: intr_skip_watchdog */
+/* 0x0135: intr_skip_watchdog */
 	0x0089e49a,
 	0x360bf408,
 	0xcf068849,
@@ -881,20 +951,20 @@ uint32_t nv108_pwr_code[] = {
 	0xc0f900cc,
 	0xf14f484e,
 	0x0d5453e3,
-	0x023f7e00,
+	0x02c27e00,
 	0x40c0fc00,
 	0x0cf604c0,
-/* 0x0157: intr_subintr_skip_fifo */
+/* 0x0167: intr_subintr_skip_fifo */
 	0x4004bd00,
 	0x09f60688,
-/* 0x015f: intr_skip_subintr */
+/* 0x016f: intr_skip_subintr */
 	0xc404bd00,
 	0x0bf42089,
 	0xbfa4f107,
-/* 0x0169: intr_skip_pause */
+/* 0x0179: intr_skip_pause */
 	0x4089c4ff,
 	0xf1070bf4,
-/* 0x0173: intr_skip_user0 */
+/* 0x0183: intr_skip_user0 */
 	0x00ffbfa4,
 	0x0008f604,
 	0x80fc04bd,
@@ -904,304 +974,417 @@ uint32_t nv108_pwr_code[] = {
 	0xfca0fcb0,
 	0xfc80fc90,
 	0x0032f400,
-/* 0x0196: timer */
-	0x32f401f8,
-	0x03f89810,
-	0xf40086b0,
-	0xfeb53a1c,
-	0xf6380003,
+/* 0x01a6: ticks_from_ns */
+	0xc0f901f8,
+	0xd7f1b0f9,
+	0xd3f00144,
+	0x7721f500,
+	0xe8ccec03,
+	0x00b4b003,
+	0xec120bf4,
+	0xf103e8ee,
+	0xf00144d7,
+	0x21f500d3,
+/* 0x01ce: ticks_from_ns_quit */
+	0xceb20377,
+	0xc0fcb0fc,
+/* 0x01d6: ticks_from_us */
+	0xc0f900f8,
+	0xd7f1b0f9,
+	0xd3f00144,
+	0x7721f500,
+	0xb0ceb203,
+	0x0bf400b4,
+/* 0x01ef: ticks_from_us_quit */
+	0xfce4bd05,
+	0xf8c0fcb0,
+/* 0x01f5: ticks_to_us */
+	0x44d7f100,
+	0x00d3f001,
+	0xf8ecedff,
+/* 0x0201: timer */
+	0xf990f900,
+	0x1032f480,
+	0xb003f898,
+	0x1cf40086,
+	0x0084bd4a,
+	0x0008f638,
+	0x340804bd,
+	0x980088cf,
+	0x98bb9a09,
+	0x00e9bb02,
+	0x0803feb5,
+	0x0088cf08,
+	0xf40284f0,
+	0x34081c1b,
+	0xa60088cf,
+	0x080bf4e0,
+	0x1cf4e8a6,
+/* 0x0245: timer_reset */
+	0xf634000d,
+	0x04bd000e,
+/* 0x024f: timer_enable */
+	0x089a0eb5,
+	0xf6380001,
 	0x04bd0008,
-	0x88cf0808,
-	0x0284f000,
-	0x081c1bf4,
-	0x0088cf34,
-	0x0bf4e0a6,
-	0xf4e8a608,
-/* 0x01c6: timer_reset */
-	0x3400161e,
-	0xbd000ef6,
-	0x9a0eb504,
-/* 0x01d0: timer_enable */
-	0x38000108,
-	0xbd0008f6,
-/* 0x01d9: timer_done */
-	0x1031f404,
-/* 0x01de: send_proc */
-	0x80f900f8,
-	0xe89890f9,
-	0x04e99805,
-	0xa60486f0,
-	0x2a0bf489,
-	0x940398c4,
-	0x80b60488,
-	0x008ebb18,
-	0xb500fa98,
-	0x8db5008a,
-	0x028cb501,
-	0xb6038bb5,
-	0x94f00190,
-	0x04e9b507,
-/* 0x0217: send_done */
-	0xfc0231f4,
-	0xf880fc90,
-/* 0x021d: find */
-	0x0880f900,
-	0x0131f458,
-/* 0x0224: find_loop */
-	0xa6008a98,
-	0x100bf4ae,
-	0xb15880b6,
-	0xf4026886,
-	0x32f4f11b,
-/* 0x0239: find_done */
-	0xfc8eb201,
-/* 0x023f: send */
-	0x7e00f880,
-	0xf400021d,
-	0x00f89b01,
-/* 0x0248: recv */
-	0x9805e898,
-	0x32f404e9,
-	0xf489a601,
-	0x89c43c0b,
-	0x0180b603,
-	0xb50784f0,
-	0xea9805e8,
-	0xfef0f902,
-	0xf0f9018f,
-	0x9994efb2,
-	0x00e9bb04,
-	0x9818e0b6,
-	0xec9803eb,
-	0x01ed9802,
-	0xf900ee98,
-	0xfef0fca5,
-	0x31f400f8,
-/* 0x028f: recv_done */
-	0xf8f0fc01,
-/* 0x0291: init */
-	0x01084100,
-	0xe70011cf,
-	0xb6010911,
-	0x14fe0814,
-	0x00e04100,
-	0x000013f0,
-	0x0001f61c,
-	0xff0104bd,
-	0x01f61400,
-	0x0104bd00,
-	0x0015f102,
-	0xf6100008,
-	0x04bd0001,
-	0xf000d241,
-	0x10fe0013,
-	0x1031f400,
-	0x38000101,
+/* 0x0258: timer_done */
+	0xfc1031f4,
+	0xf890fc80,
+/* 0x0261: send_proc */
+	0xf980f900,
+	0x05e89890,
+	0xf004e998,
+	0x89a60486,
+	0xc42a0bf4,
+	0x88940398,
+	0x1880b604,
+	0x98008ebb,
+	0x8ab500fa,
+	0x018db500,
+	0xb5028cb5,
+	0x90b6038b,
+	0x0794f001,
+	0xf404e9b5,
+/* 0x029a: send_done */
+	0x90fc0231,
+	0x00f880fc,
+/* 0x02a0: find */
+	0x580880f9,
+/* 0x02a7: find_loop */
+	0x980131f4,
+	0xaea6008a,
+	0xb6100bf4,
+	0x86b15880,
+	0x1bf40268,
+	0x0132f4f1,
+/* 0x02bc: find_done */
+	0x80fc8eb2,
+/* 0x02c2: send */
+	0xa07e00f8,
+	0x01f40002,
+/* 0x02cb: recv */
+	0xf900f89b,
+	0x9880f990,
+	0xe99805e8,
+	0x0132f404,
+	0x0bf489a6,
+	0x0389c43c,
+	0xf00180b6,
+	0xe8b50784,
+	0x02ea9805,
+	0x8ffef0f9,
+	0xb2f0f901,
+	0x049994ef,
+	0xb600e9bb,
+	0xeb9818e0,
+	0x02ec9803,
+	0x9801ed98,
+	0xa5f900ee,
+	0xf8fef0fc,
+	0x0131f400,
+/* 0x0316: recv_done */
+	0x80fcf0fc,
+	0x00f890fc,
+/* 0x031c: init */
+	0xcf010841,
+	0x11e70011,
+	0x14b60109,
+	0x0014fe08,
+	0xf000e041,
+	0x1c000013,
 	0xbd0001f6,
-/* 0x02db: init_proc */
-	0x98580f04,
-	0x16b001f1,
-	0xfa0bf400,
-	0xf0b615f9,
-	0xf20ef458,
-/* 0x02ec: host_send */
-	0xcf04b041,
-	0xa0420011,
-	0x0022cf04,
-	0x0bf412a6,
-	0x071ec42e,
-	0xb704ee94,
-	0x980270e0,
-	0xec9803eb,
-	0x01ed9802,
-	0x7e00ee98,
-	0xb600023f,
-	0x1ec40110,
-	0x04b0400f,
-	0xbd000ef6,
-	0xc70ef404,
-/* 0x0328: host_send_done */
-/* 0x032a: host_recv */
-	0x494100f8,
-	0x5413f14e,
-	0xf4e1a652,
-/* 0x0336: host_recv_wait */
-	0xcc41b90b,
-	0x0011cf04,
-	0xcf04c842,
-	0x16f00022,
-	0xf412a608,
-	0x23c4ef0b,
-	0x0434b607,
-	0x02f030b7,
-	0xb5033bb5,
-	0x3db5023c,
-	0x003eb501,
-	0xf00120b6,
-	0xc8400f24,
-	0x0002f604,
-	0x400204bd,
-	0x02f60000,
-	0xf804bd00,
-/* 0x0379: host_init */
-	0x00804100,
-	0xf11014b6,
-	0x40027015,
-	0x01f604d0,
+	0x00ff0104,
+	0x0001f614,
+	0x020104bd,
+	0x080015f1,
+	0x01f61000,
 	0x4104bd00,
+	0x13f000e2,
+	0x0010fe00,
+	0x011031f4,
+	0xf6380001,
+	0x04bd0001,
+/* 0x0366: init_proc */
+	0xf198580f,
+	0x0016b001,
+	0xf9fa0bf4,
+	0x58f0b615,
+/* 0x0377: mulu32_32_64 */
+	0xf9f20ef4,
+	0xf920f910,
+	0x9540f930,
+	0xd29510e1,
+	0xbdc4bd10,
+	0xc0edffb4,
+	0xb2301dff,
+	0xff34f134,
+	0x1034b6ff,
+	0xbb1045b6,
+	0xb4bb00c3,
+	0x30e2ff01,
+	0x34f134b2,
+	0x34b6ffff,
+	0x1045b610,
+	0xbb00c3bb,
+	0x12ff01b4,
+	0x00b3bb30,
+	0x30fc40fc,
+	0x10fc20fc,
+/* 0x03c6: host_send */
+	0xb04100f8,
+	0x0011cf04,
+	0xcf04a042,
+	0x12a60022,
+	0xc42e0bf4,
+	0xee94071e,
+	0x70e0b704,
+	0x03eb9802,
+	0x9802ec98,
+	0xee9801ed,
+	0x02c27e00,
+	0x0110b600,
+	0x400f1ec4,
+	0x0ef604b0,
+	0xf404bd00,
+/* 0x0402: host_send_done */
+	0x00f8c70e,
+/* 0x0404: host_recv */
+	0xf14e4941,
+	0xa6525413,
+	0xb90bf4e1,
+/* 0x0410: host_recv_wait */
+	0xcf04cc41,
+	0xc8420011,
+	0x0022cf04,
+	0xa60816f0,
+	0xef0bf412,
+	0xb60723c4,
+	0x30b70434,
+	0x3bb502f0,
+	0x023cb503,
+	0xb5013db5,
+	0x20b6003e,
+	0x0f24f001,
+	0xf604c840,
+	0x04bd0002,
+	0x00004002,
+	0xbd0002f6,
+/* 0x0453: host_init */
+	0x4100f804,
 	0x14b60080,
-	0xf015f110,
-	0x04dc4002,
+	0x7015f110,
+	0x04d04002,
+	0xbd0001f6,
+	0x00804104,
+	0xf11014b6,
+	0x4002f015,
+	0x01f604dc,
+	0x0104bd00,
+	0x04c44001,
 	0xbd0001f6,
-	0x40010104,
-	0x01f604c4,
-	0xf804bd00,
-/* 0x03a9: memx_func_enter */
-	0x40040600,
-	0x06f607e0,
-/* 0x03b3: memx_func_enter_wait */
-	0x4604bd00,
-	0x66cf07c0,
-	0x0464f000,
-	0x98f70bf4,
-	0x10b60016,
-/* 0x03c7: memx_func_leave */
-	0x0600f804,
-	0x07e44004,
-	0xbd0006f6,
-/* 0x03d1: memx_func_leave_wait */
-	0x07c04604,
-	0xf00066cf,
-	0x1bf40464,
-/* 0x03df: memx_func_wr32 */
-	0x9800f8f7,
-	0x15980016,
-	0x0810b601,
-	0x50f960f9,
+/* 0x0483: memx_func_enter */
+	0xf100f804,
+	0xf1162067,
+	0xf1f55d77,
+	0xb2ffff73,
+	0x00047e6e,
+	0xfdd8b200,
+	0x60f90487,
+	0xd0fc80f9,
+	0x2e7ee0fc,
+	0x77f10000,
+	0x73f1fffe,
+	0x6eb2ffff,
+	0x0000047e,
+	0x87fdd8b2,
+	0xf960f904,
+	0xfcd0fc80,
+	0x002e7ee0,
+	0xf067f100,
+	0x7e6eb226,
+	0xb2000004,
+	0x0487fdd8,
+	0x80f960f9,
 	0xe0fcd0fc,
 	0x00002e7e,
-	0xf40242b6,
-	0x00f8e81b,
-/* 0x03fc: memx_func_wait */
-	0x88cf2c08,
-	0x001e9800,
-	0x98011d98,
-	0x1b98021c,
-	0x1010b603,
-	0x0000717e,
-/* 0x0416: memx_func_delay */
-	0x1e9800f8,
-	0x0410b600,
-	0x00005d7e,
-/* 0x0422: memx_exec */
-	0xe0f900f8,
-	0xc1b2d0f9,
-/* 0x042a: memx_exec_next */
-	0x1398b2b2,
-	0x0410b600,
-	0xf0103495,
-	0x35980c30,
-	0xa655f9de,
-	0xed1ef412,
+	0xe0400406,
+	0x0006f607,
+/* 0x04ea: memx_func_enter_wait */
+	0xc04604bd,
+	0x0066cf07,
+	0xf40464f0,
+	0x2c06f70b,
+	0xb50066cf,
+	0x00f8ee06,
+/* 0x0500: memx_func_leave */
+	0x66cf2c06,
+	0xef06b500,
+	0xe4400406,
+	0x0006f607,
+/* 0x0512: memx_func_leave_wait */
+	0xc04604bd,
+	0x0066cf07,
+	0xf40464f0,
+	0x67f1f71b,
+	0x77f126f0,
+	0x73f00001,
+	0x7e6eb200,
+	0xb2000004,
+	0x0587fdd8,
+	0x80f960f9,
 	0xe0fcd0fc,
-	0x00023f7e,
-/* 0x044a: memx_info */
-	0xac4c00f8,
+	0x00002e7e,
+	0x162067f1,
+	0x047e6eb2,
+	0xd8b20000,
+	0xf90587fd,
+	0xfc80f960,
+	0x7ee0fcd0,
+	0xf100002e,
+	0xf00aa277,
+	0x6eb20073,
+	0x0000047e,
+	0x87fdd8b2,
+	0xf960f905,
+	0xfcd0fc80,
+	0x002e7ee0,
+/* 0x057b: memx_func_wait_vblank */
+	0xb600f800,
+	0x00f80410,
+/* 0x0580: memx_func_wr32 */
+	0x98001698,
+	0x10b60115,
+	0xf960f908,
+	0xfcd0fc50,
+	0x002e7ee0,
+	0x0242b600,
+	0xf8e81bf4,
+/* 0x059d: memx_func_wait */
+	0xcf2c0800,
+	0x1e980088,
+	0x011d9800,
+	0x98021c98,
+	0x10b6031b,
+	0x00797e10,
+/* 0x05b7: memx_func_delay */
+	0x9800f800,
+	0x10b6001e,
+	0x005d7e04,
+/* 0x05c3: memx_exec */
+	0xf900f800,
+	0xb2d0f9e0,
+/* 0x05cb: memx_exec_next */
+	0x98b2b2c1,
+	0x10b60013,
+	0xf034e704,
+	0xe033e701,
+	0x0132b601,
+	0x980c30f0,
+	0x55f9de35,
+	0x1ef412a6,
+	0xee0b98e5,
+	0xbbef0c98,
+	0xc44b02cb,
+	0x00bbcf07,
+	0xe0fcd0fc,
+	0x0002c27e,
+/* 0x0602: memx_info */
+	0xc04c00f8,
 	0x08004b03,
-	0x00023f7e,
-/* 0x0456: memx_recv */
+	0x0002c27e,
+/* 0x060e: memx_recv */
 	0xd6b000f8,
-	0xc90bf401,
+	0xb20bf401,
 	0xf400d6b0,
 	0x00f8eb0b,
-/* 0x0464: memx_init */
-/* 0x0466: perf_recv */
+/* 0x061c: memx_init */
+/* 0x061e: perf_recv */
 	0x00f800f8,
-/* 0x0468: perf_init */
-/* 0x046a: i2c_drive_scl */
+/* 0x0620: perf_init */
+/* 0x0622: i2c_drive_scl */
 	0x36b000f8,
 	0x0d0bf400,
 	0xf607e040,
 	0x04bd0001,
-/* 0x047a: i2c_drive_scl_lo */
+/* 0x0632: i2c_drive_scl_lo */
 	0xe44000f8,
 	0x0001f607,
 	0x00f804bd,
-/* 0x0484: i2c_drive_sda */
+/* 0x063c: i2c_drive_sda */
 	0xf40036b0,
 	0xe0400d0b,
 	0x0002f607,
 	0x00f804bd,
-/* 0x0494: i2c_drive_sda_lo */
+/* 0x064c: i2c_drive_sda_lo */
 	0xf607e440,
 	0x04bd0002,
-/* 0x049e: i2c_sense_scl */
+/* 0x0656: i2c_sense_scl */
 	0x32f400f8,
 	0x07c44301,
 	0xfd0033cf,
 	0x0bf40431,
 	0x0131f406,
-/* 0x04b0: i2c_sense_scl_done */
-/* 0x04b2: i2c_sense_sda */
+/* 0x0668: i2c_sense_scl_done */
+/* 0x066a: i2c_sense_sda */
 	0x32f400f8,
 	0x07c44301,
 	0xfd0033cf,
 	0x0bf40432,
 	0x0131f406,
-/* 0x04c4: i2c_sense_sda_done */
-/* 0x04c6: i2c_raise_scl */
+/* 0x067c: i2c_sense_sda_done */
+/* 0x067e: i2c_raise_scl */
 	0x40f900f8,
 	0x03089844,
-	0x046a7e01,
-/* 0x04d1: i2c_raise_scl_wait */
+	0x06227e01,
+/* 0x0689: i2c_raise_scl_wait */
 	0x03e84e00,
 	0x00005d7e,
-	0x00049e7e,
+	0x0006567e,
 	0xb60901f4,
 	0x1bf40142,
-/* 0x04e5: i2c_raise_scl_done */
+/* 0x069d: i2c_raise_scl_done */
 	0xf840fcef,
-/* 0x04e9: i2c_start */
-	0x049e7e00,
+/* 0x06a1: i2c_start */
+	0x06567e00,
 	0x0d11f400,
-	0x0004b27e,
+	0x00066a7e,
 	0xf40611f4,
-/* 0x04fa: i2c_start_rep */
+/* 0x06b2: i2c_start_rep */
 	0x00032e0e,
-	0x00046a7e,
-	0x847e0103,
-	0x76bb0004,
+	0x0006227e,
+	0x3c7e0103,
+	0x76bb0006,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0x7e50fc04,
-	0xb60004c6,
+	0xb600067e,
 	0x11f40464,
-/* 0x0525: i2c_start_send */
+/* 0x06dd: i2c_start_send */
 	0x7e00031d,
-	0x4e000484,
+	0x4e00063c,
 	0x5d7e1388,
 	0x00030000,
-	0x00046a7e,
+	0x0006227e,
 	0x7e13884e,
-/* 0x053f: i2c_start_out */
+/* 0x06f7: i2c_start_out */
 	0xf800005d,
-/* 0x0541: i2c_stop */
+/* 0x06f9: i2c_stop */
 	0x7e000300,
-	0x0300046a,
-	0x04847e00,
+	0x03000622,
+	0x063c7e00,
 	0x03e84e00,
 	0x00005d7e,
-	0x6a7e0103,
-	0x884e0004,
+	0x227e0103,
+	0x884e0006,
 	0x005d7e13,
 	0x7e010300,
-	0x4e000484,
+	0x4e00063c,
 	0x5d7e1388,
 	0x00f80000,
-/* 0x0570: i2c_bitw */
-	0x0004847e,
+/* 0x0728: i2c_bitw */
+	0x00063c7e,
 	0x7e03e84e,
 	0xbb00005d,
 	0x65b60076,
@@ -1209,44 +1392,44 @@ uint32_t nv108_pwr_code[] = {
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x0004c67e,
+	0x00067e7e,
 	0xf40464b6,
 	0x884e1711,
 	0x005d7e13,
 	0x7e000300,
-	0x4e00046a,
+	0x4e000622,
 	0x5d7e1388,
-/* 0x05ae: i2c_bitw_out */
+/* 0x0766: i2c_bitw_out */
 	0x00f80000,
-/* 0x05b0: i2c_bitr */
-	0x847e0103,
-	0xe84e0004,
+/* 0x0768: i2c_bitr */
+	0x3c7e0103,
+	0xe84e0006,
 	0x005d7e03,
 	0x0076bb00,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
-	0xc67e50fc,
-	0x64b60004,
+	0x7e7e50fc,
+	0x64b60006,
 	0x1a11f404,
-	0x0004b27e,
-	0x6a7e0003,
-	0x884e0004,
+	0x00066a7e,
+	0x227e0003,
+	0x884e0006,
 	0x005d7e13,
 	0x013cf000,
-/* 0x05f3: i2c_bitr_done */
+/* 0x07ab: i2c_bitr_done */
 	0xf80131f4,
-/* 0x05f5: i2c_get_byte */
+/* 0x07ad: i2c_get_byte */
 	0x04000500,
-/* 0x05f9: i2c_get_byte_next */
+/* 0x07b1: i2c_get_byte_next */
 	0x0154b608,
 	0xb60076bb,
 	0x50f90465,
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0x05b07e50,
+	0x07687e50,
 	0x0464b600,
 	0xfd2a11f4,
 	0x42b60553,
@@ -1257,11 +1440,11 @@ uint32_t nv108_pwr_code[] = {
 	0x0256bb04,
 	0x75fd50bd,
 	0x7e50fc04,
-	0xb6000570,
-/* 0x0642: i2c_get_byte_done */
+	0xb6000728,
+/* 0x07fa: i2c_get_byte_done */
 	0x00f80464,
-/* 0x0644: i2c_put_byte */
-/* 0x0646: i2c_put_byte_next */
+/* 0x07fc: i2c_put_byte */
+/* 0x07fe: i2c_put_byte_next */
 	0x42b60804,
 	0x3854ff01,
 	0xb60076bb,
@@ -1269,7 +1452,7 @@ uint32_t nv108_pwr_code[] = {
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0x05707e50,
+	0x07287e50,
 	0x0464b600,
 	0xb03411f4,
 	0x1bf40046,
@@ -1278,21 +1461,21 @@ uint32_t nv108_pwr_code[] = {
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
-	0xb07e50fc,
-	0x64b60005,
+	0x687e50fc,
+	0x64b60007,
 	0x0f11f404,
 	0xb00076bb,
 	0x1bf40136,
 	0x0132f406,
-/* 0x069c: i2c_put_byte_done */
-/* 0x069e: i2c_addr */
+/* 0x0854: i2c_put_byte_done */
+/* 0x0856: i2c_addr */
 	0x76bb00f8,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0x7e50fc04,
-	0xb60004e9,
+	0xb60006a1,
 	0x11f40464,
 	0x2ec3e729,
 	0x0134b601,
@@ -1302,32 +1485,32 @@ uint32_t nv108_pwr_code[] = {
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x0006447e,
-/* 0x06e3: i2c_addr_done */
+	0x0007fc7e,
+/* 0x089b: i2c_addr_done */
 	0xf80464b6,
-/* 0x06e5: i2c_acquire_addr */
+/* 0x089d: i2c_acquire_addr */
 	0xf8cec700,
 	0xb705e4b6,
 	0xf8d014e0,
-/* 0x06f1: i2c_acquire */
-	0x06e57e00,
+/* 0x08a9: i2c_acquire */
+	0x089d7e00,
 	0x00047e00,
 	0x03d9f000,
 	0x00002e7e,
-/* 0x0702: i2c_release */
-	0xe57e00f8,
-	0x047e0006,
+/* 0x08ba: i2c_release */
+	0x9d7e00f8,
+	0x047e0008,
 	0xdaf00000,
 	0x002e7e03,
-/* 0x0713: i2c_recv */
+/* 0x08cb: i2c_recv */
 	0xf400f800,
 	0xc1c70132,
 	0x0214b6f8,
 	0xf52816b0,
 	0xb801371f,
-	0x000bd413,
+	0x000be813,
 	0xb8003298,
-	0x000bac13,
+	0x000bc013,
 	0xf4003198,
 	0xd0f90231,
 	0xd0f9e0f9,
@@ -1339,7 +1522,7 @@ uint32_t nv108_pwr_code[] = {
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x0006f17e,
+	0x0008a97e,
 	0xfc0464b6,
 	0x00d6b0d0,
 	0x00b01bf5,
@@ -1349,7 +1532,7 @@ uint32_t nv108_pwr_code[] = {
 	0x0256bb04,
 	0x75fd50bd,
 	0x7e50fc04,
-	0xb600069e,
+	0xb6000856,
 	0x11f50464,
 	0xc5c700cc,
 	0x0076bbe0,
@@ -1357,8 +1540,8 @@ uint32_t nv108_pwr_code[] = {
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
-	0x447e50fc,
-	0x64b60006,
+	0xfc7e50fc,
+	0x64b60007,
 	0xa911f504,
 	0xbb010500,
 	0x65b60076,
@@ -1366,7 +1549,7 @@ uint32_t nv108_pwr_code[] = {
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x00069e7e,
+	0x0008567e,
 	0xf50464b6,
 	0xbb008711,
 	0x65b60076,
@@ -1374,7 +1557,7 @@ uint32_t nv108_pwr_code[] = {
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x0005f57e,
+	0x0007ad7e,
 	0xf40464b6,
 	0x5bcb6711,
 	0x0076bbe0,
@@ -1382,37 +1565,37 @@ uint32_t nv108_pwr_code[] = {
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
-	0x417e50fc,
-	0x64b60005,
+	0xf97e50fc,
+	0x64b60006,
 	0xbd5bb204,
 	0x410ef474,
-/* 0x0818: i2c_recv_not_rd08 */
+/* 0x09d0: i2c_recv_not_rd08 */
 	0xf401d6b0,
 	0x00053b1b,
-	0x00069e7e,
+	0x0008567e,
 	0xc73211f4,
-	0x447ee0c5,
-	0x11f40006,
+	0xfc7ee0c5,
+	0x11f40007,
 	0x7e000528,
-	0xf400069e,
+	0xf4000856,
 	0xb5c71f11,
-	0x06447ee0,
+	0x07fc7ee0,
 	0x1511f400,
-	0x0005417e,
+	0x0006f97e,
 	0xc5c774bd,
 	0x091bf408,
 	0xf40232f4,
-/* 0x0856: i2c_recv_not_wr08 */
-/* 0x0856: i2c_recv_done */
+/* 0x0a0e: i2c_recv_not_wr08 */
+/* 0x0a0e: i2c_recv_done */
 	0xcec7030e,
-	0x07027ef8,
+	0x08ba7ef8,
 	0xfce0fc00,
 	0x0912f4d0,
-	0x3f7e7cb2,
-/* 0x086a: i2c_recv_exit */
+	0xc27e7cb2,
+/* 0x0a22: i2c_recv_exit */
 	0x00f80002,
-/* 0x086c: i2c_init */
-/* 0x086e: test_recv */
+/* 0x0a24: i2c_init */
+/* 0x0a26: test_recv */
 	0x584100f8,
 	0x0011cf04,
 	0x400110b6,
@@ -1420,28 +1603,28 @@ uint32_t nv108_pwr_code[] = {
 	0xf104bd00,
 	0xf1d900e7,
 	0x7e134fe3,
-	0xf8000196,
-/* 0x088d: test_init */
+	0xf8000201,
+/* 0x0a45: test_init */
 	0x08004e00,
-	0x0001967e,
-/* 0x0896: idle_recv */
+	0x0002017e,
+/* 0x0a4e: idle_recv */
 	0x00f800f8,
-/* 0x0898: idle */
+/* 0x0a50: idle */
 	0x410031f4,
 	0x11cf0454,
 	0x0110b600,
 	0xf6045440,
 	0x04bd0001,
-/* 0x08ac: idle_loop */
+/* 0x0a64: idle_loop */
 	0x32f45801,
-/* 0x08b1: idle_proc */
-/* 0x08b1: idle_proc_exec */
+/* 0x0a69: idle_proc */
+/* 0x0a69: idle_proc_exec */
 	0xb210f902,
-	0x02487e1e,
+	0x02cb7e1e,
 	0xf410fc00,
 	0x31f40911,
 	0xf00ef402,
-/* 0x08c4: idle_proc_next */
+/* 0x0a7c: idle_proc_next */
 	0xa65810b6,
 	0xe81bf41f,
 	0xf4e002f4,
@@ -1457,4 +1640,22 @@ uint32_t nv108_pwr_code[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
 };
diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nva3.fuc b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nva3.fuc
index 6744fcc06151..daa06c1c655e 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nva3.fuc
+++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nva3.fuc
@@ -23,6 +23,7 @@
  */
 
 #define NVKM_PPWR_CHIPSET GT215
+#define HW_TICKS_PER_US 203 // should be 202.5
 
 //#define NVKM_FALCON_PC24
 //#define NVKM_FALCON_UNSHIFTED_IO
@@ -34,6 +35,7 @@
 .section #nva3_pwr_data
 #define INCLUDE_PROC
 #include "kernel.fuc"
+#include "arith.fuc"
 #include "host.fuc"
 #include "memx.fuc"
 #include "perf.fuc"
@@ -44,6 +46,7 @@
 
 #define INCLUDE_DATA
 #include "kernel.fuc"
+#include "arith.fuc"
 #include "host.fuc"
 #include "memx.fuc"
 #include "perf.fuc"
@@ -56,6 +59,7 @@
 .section #nva3_pwr_code
 #define INCLUDE_CODE
 #include "kernel.fuc"
+#include "arith.fuc"
 #include "host.fuc"
 #include "memx.fuc"
 #include "perf.fuc"
diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nva3.fuc.h b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nva3.fuc.h
index e087ce3041be..64e97baabc3c 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nva3.fuc.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nva3.fuc.h
@@ -24,8 +24,8 @@ uint32_t nva3_pwr_data[] = {
 	0x00000000,
 /* 0x0058: proc_list_head */
 	0x54534f48,
-	0x00000430,
-	0x000003cd,
+	0x00000512,
+	0x000004af,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -46,8 +46,8 @@ uint32_t nva3_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x584d454d,
-	0x00000542,
-	0x00000534,
+	0x000006e0,
+	0x000006d2,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -68,8 +68,8 @@ uint32_t nva3_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x46524550,
-	0x00000546,
-	0x00000544,
+	0x000006e4,
+	0x000006e2,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -90,8 +90,8 @@ uint32_t nva3_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x5f433249,
-	0x00000976,
-	0x00000819,
+	0x00000b14,
+	0x000009b7,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -112,8 +112,8 @@ uint32_t nva3_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x54534554,
-	0x0000099f,
-	0x00000978,
+	0x00000b3d,
+	0x00000b16,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -134,8 +134,8 @@ uint32_t nva3_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x454c4449,
-	0x000009ab,
-	0x000009a9,
+	0x00000b49,
+	0x00000b47,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -227,25 +227,31 @@ uint32_t nva3_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 /* 0x0370: memx_func_head */
-	0x00010000,
-	0x00000000,
-	0x0000046f,
-/* 0x037c: memx_func_next */
 	0x00000001,
 	0x00000000,
-	0x00000496,
+	0x00000551,
+/* 0x037c: memx_func_next */
 	0x00000002,
+	0x00000000,
+	0x000005a8,
+	0x00000003,
 	0x00000002,
-	0x000004b7,
-	0x00040003,
+	0x0000063a,
+	0x00040004,
+	0x00000000,
+	0x00000656,
+	0x00010005,
+	0x00000000,
+	0x00000673,
+	0x00010006,
 	0x00000000,
-	0x000004d3,
-	0x00010004,
+	0x000005f8,
+/* 0x03b8: memx_func_tail */
+/* 0x03b8: memx_ts_start */
 	0x00000000,
-	0x000004f0,
-/* 0x03ac: memx_func_tail */
-/* 0x03ac: memx_data_head */
+/* 0x03bc: memx_ts_end */
 	0x00000000,
+/* 0x03c0: memx_data_head */
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -757,8 +763,9 @@ uint32_t nva3_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-/* 0x0bac: memx_data_tail */
-/* 0x0bac: i2c_scl_map */
+	0x00000000,
+/* 0x0bc0: memx_data_tail */
+/* 0x0bc0: i2c_scl_map */
 	0x00001000,
 	0x00004000,
 	0x00010000,
@@ -769,7 +776,7 @@ uint32_t nva3_pwr_data[] = {
 	0x01000000,
 	0x04000000,
 	0x10000000,
-/* 0x0bd4: i2c_sda_map */
+/* 0x0be8: i2c_sda_map */
 	0x00002000,
 	0x00008000,
 	0x00020000,
@@ -780,7 +787,7 @@ uint32_t nva3_pwr_data[] = {
 	0x02000000,
 	0x08000000,
 	0x20000000,
-/* 0x0bfc: i2c_ctrl */
+/* 0x0c10: i2c_ctrl */
 	0x0000e138,
 	0x0000e150,
 	0x0000e168,
@@ -841,15 +848,10 @@ uint32_t nva3_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
 };
 
 uint32_t nva3_pwr_code[] = {
-	0x030d0ef5,
+	0x039e0ef5,
 /* 0x0004: rd32 */
 	0x07a007f1,
 	0xd00604b6,
@@ -885,19 +887,22 @@ uint32_t nva3_pwr_code[] = {
 	0xd4f100dd,
 	0x1bf47000,
 /* 0x007f: nsec */
-	0xf000f8f2,
+	0xf900f8f2,
+	0xf080f990,
 	0x84b62c87,
 	0x0088cf06,
-/* 0x0088: nsec_loop */
+/* 0x008c: nsec_loop */
 	0xb62c97f0,
 	0x99cf0694,
 	0x0298bb00,
 	0xf4069eb8,
-	0x00f8f11e,
-/* 0x009c: wait */
+	0x80fcf11e,
+	0x00f890fc,
+/* 0x00a4: wait */
+	0x80f990f9,
 	0xb62c87f0,
 	0x88cf0684,
-/* 0x00a5: wait_loop */
+/* 0x00b1: wait_loop */
 	0x02eeb900,
 	0xb90421f4,
 	0xadfd02da,
@@ -907,28 +912,29 @@ uint32_t nva3_pwr_code[] = {
 	0x0099cf06,
 	0xb80298bb,
 	0x1ef4069b,
-/* 0x00c9: wait_done */
-/* 0x00cb: intr_watchdog */
-	0x9800f8df,
+/* 0x00d5: wait_done */
+	0xfc80fcdf,
+/* 0x00db: intr_watchdog */
+	0x9800f890,
 	0x96b003e9,
 	0x2a0bf400,
 	0xbb9a0a98,
 	0x1cf4029a,
 	0x01d7f00f,
-	0x025421f5,
+	0x02dd21f5,
 	0x0ef494bd,
-/* 0x00e9: intr_watchdog_next_time */
+/* 0x00f9: intr_watchdog_next_time */
 	0x9b0a9815,
 	0xf400a6b0,
 	0x9ab8090b,
 	0x061cf406,
-/* 0x00f8: intr_watchdog_next_time_set */
-/* 0x00fb: intr_watchdog_next_proc */
+/* 0x0108: intr_watchdog_next_time_set */
+/* 0x010b: intr_watchdog_next_proc */
 	0x809b0980,
 	0xe0b603e9,
 	0x68e6b158,
 	0xc61bf402,
-/* 0x010a: intr */
+/* 0x011a: intr */
 	0x00f900f8,
 	0x80f904bd,
 	0xa0f990f9,
@@ -948,13 +954,13 @@ uint32_t nva3_pwr_code[] = {
 	0xf40289c4,
 	0x0080230b,
 	0x58e7f09b,
-	0x98cb21f4,
+	0x98db21f4,
 	0x96b09b09,
 	0x110bf400,
 	0xb63407f0,
 	0x09d00604,
 	0x8004bd00,
-/* 0x016e: intr_skip_watchdog */
+/* 0x017e: intr_skip_watchdog */
 	0x89e49a09,
 	0x0bf40800,
 	0x8897f148,
@@ -967,22 +973,22 @@ uint32_t nva3_pwr_code[] = {
 	0x48e7f1c0,
 	0x53e3f14f,
 	0x00d7f054,
-	0x02b921f5,
+	0x034221f5,
 	0x07f1c0fc,
 	0x04b604c0,
 	0x000cd006,
-/* 0x01ae: intr_subintr_skip_fifo */
+/* 0x01be: intr_subintr_skip_fifo */
 	0x07f104bd,
 	0x04b60688,
 	0x0009d006,
-/* 0x01ba: intr_skip_subintr */
+/* 0x01ca: intr_skip_subintr */
 	0x89c404bd,
 	0x070bf420,
 	0xffbfa4f1,
-/* 0x01c4: intr_skip_pause */
+/* 0x01d4: intr_skip_pause */
 	0xf44089c4,
 	0xa4f1070b,
-/* 0x01ce: intr_skip_user0 */
+/* 0x01de: intr_skip_user0 */
 	0x07f0ffbf,
 	0x0604b604,
 	0xbd0008d0,
@@ -993,596 +999,732 @@ uint32_t nva3_pwr_code[] = {
 	0x90fca0fc,
 	0x00fc80fc,
 	0xf80032f4,
-/* 0x01f5: timer */
-	0x1032f401,
-	0xb003f898,
-	0x1cf40086,
-	0x03fe8051,
+/* 0x0205: ticks_from_ns */
+	0xf9c0f901,
+	0xcbd7f1b0,
+	0x00d3f000,
+	0x041321f5,
+	0x03e8ccec,
+	0xf400b4b0,
+	0xeeec120b,
+	0xd7f103e8,
+	0xd3f000cb,
+	0x1321f500,
+/* 0x022d: ticks_from_ns_quit */
+	0x02ceb904,
+	0xc0fcb0fc,
+/* 0x0236: ticks_from_us */
+	0xc0f900f8,
+	0xd7f1b0f9,
+	0xd3f000cb,
+	0x1321f500,
+	0x02ceb904,
+	0xf400b4b0,
+	0xe4bd050b,
+/* 0x0250: ticks_from_us_quit */
+	0xc0fcb0fc,
+/* 0x0256: ticks_to_us */
+	0xd7f100f8,
+	0xd3f000cb,
+	0xecedff00,
+/* 0x0262: timer */
+	0x90f900f8,
+	0x32f480f9,
+	0x03f89810,
+	0xf40086b0,
+	0x84bd651c,
 	0xb63807f0,
 	0x08d00604,
 	0xf004bd00,
-	0x84b60887,
+	0x84b63487,
 	0x0088cf06,
-	0xf40284f0,
-	0x87f0261b,
-	0x0684b634,
-	0xb80088cf,
-	0x0bf406e0,
-	0x06e8b809,
-/* 0x0233: timer_reset */
-	0xf01f1ef4,
-	0x04b63407,
-	0x000ed006,
-	0x0e8004bd,
-/* 0x0241: timer_enable */
-	0x0187f09a,
+	0xbb9a0998,
+	0xe9bb0298,
+	0x03fe8000,
+	0xb60887f0,
+	0x88cf0684,
+	0x0284f000,
+	0xf0261bf4,
+	0x84b63487,
+	0x0088cf06,
+	0xf406e0b8,
+	0xe8b8090b,
+	0x111cf406,
+/* 0x02b8: timer_reset */
+	0xb63407f0,
+	0x0ed00604,
+	0x8004bd00,
+/* 0x02c6: timer_enable */
+	0x87f09a0e,
+	0x3807f001,
+	0xd00604b6,
+	0x04bd0008,
+/* 0x02d4: timer_done */
+	0xfc1031f4,
+	0xf890fc80,
+/* 0x02dd: send_proc */
+	0xf980f900,
+	0x05e89890,
+	0xf004e998,
+	0x89b80486,
+	0x2a0bf406,
+	0x940398c4,
+	0x80b60488,
+	0x008ebb18,
+	0x8000fa98,
+	0x8d80008a,
+	0x028c8001,
+	0xb6038b80,
+	0x94f00190,
+	0x04e98007,
+/* 0x0317: send_done */
+	0xfc0231f4,
+	0xf880fc90,
+/* 0x031d: find */
+	0xf080f900,
+	0x31f45887,
+/* 0x0325: find_loop */
+	0x008a9801,
+	0xf406aeb8,
+	0x80b6100b,
+	0x6886b158,
+	0xf01bf402,
+/* 0x033b: find_done */
+	0xb90132f4,
+	0x80fc028e,
+/* 0x0342: send */
+	0x21f500f8,
+	0x01f4031d,
+/* 0x034b: recv */
+	0xf900f897,
+	0x9880f990,
+	0xe99805e8,
+	0x0132f404,
+	0xf40689b8,
+	0x89c43d0b,
+	0x0180b603,
+	0x800784f0,
+	0xea9805e8,
+	0xfef0f902,
+	0xf0f9018f,
+	0x9402efb9,
+	0xe9bb0499,
+	0x18e0b600,
+	0x9803eb98,
+	0xed9802ec,
+	0x00ee9801,
+	0xf0fca5f9,
+	0xf400f8fe,
+	0xf0fc0131,
+/* 0x0398: recv_done */
+	0x90fc80fc,
+/* 0x039e: init */
+	0x17f100f8,
+	0x14b60108,
+	0x0011cf06,
+	0x010911e7,
+	0xfe0814b6,
+	0x17f10014,
+	0x13f000e0,
+	0x1c07f000,
+	0xd00604b6,
+	0x04bd0001,
+	0xf0ff17f0,
+	0x04b61407,
+	0x0001d006,
+	0x17f004bd,
+	0x0015f102,
+	0x1007f008,
+	0xd00604b6,
+	0x04bd0001,
+	0x011a17f1,
+	0xfe0013f0,
+	0x31f40010,
+	0x0117f010,
 	0xb63807f0,
-	0x08d00604,
-/* 0x024f: timer_done */
-	0xf404bd00,
-	0x00f81031,
-/* 0x0254: send_proc */
-	0x90f980f9,
-	0x9805e898,
-	0x86f004e9,
-	0x0689b804,
-	0xc42a0bf4,
-	0x88940398,
-	0x1880b604,
-	0x98008ebb,
-	0x8a8000fa,
-	0x018d8000,
-	0x80028c80,
-	0x90b6038b,
-	0x0794f001,
-	0xf404e980,
-/* 0x028e: send_done */
-	0x90fc0231,
-	0x00f880fc,
-/* 0x0294: find */
-	0x87f080f9,
-	0x0131f458,
-/* 0x029c: find_loop */
-	0xb8008a98,
-	0x0bf406ae,
-	0x5880b610,
-	0x026886b1,
-	0xf4f01bf4,
-/* 0x02b2: find_done */
-	0x8eb90132,
-	0xf880fc02,
-/* 0x02b9: send */
-	0x9421f500,
-	0x9701f402,
-/* 0x02c2: recv */
-	0xe89800f8,
-	0x04e99805,
-	0xb80132f4,
-	0x0bf40689,
-	0x0389c43d,
-	0xf00180b6,
-	0xe8800784,
-	0x02ea9805,
-	0x8ffef0f9,
-	0xb9f0f901,
-	0x999402ef,
-	0x00e9bb04,
-	0x9818e0b6,
-	0xec9803eb,
-	0x01ed9802,
-	0xf900ee98,
-	0xfef0fca5,
-	0x31f400f8,
-/* 0x030b: recv_done */
-	0xf8f0fc01,
-/* 0x030d: init */
-	0x0817f100,
-	0x0614b601,
-	0xe70011cf,
-	0xb6010911,
-	0x14fe0814,
-	0xe017f100,
-	0x0013f000,
-	0xb61c07f0,
 	0x01d00604,
 	0xf004bd00,
-	0x07f0ff17,
-	0x0604b614,
-	0xbd0001d0,
-	0x0217f004,
-	0x080015f1,
-	0xb61007f0,
-	0x01d00604,
-	0xf104bd00,
-	0xf0010a17,
-	0x10fe0013,
-	0x1031f400,
-	0xf00117f0,
-	0x04b63807,
-	0x0001d006,
-	0xf7f004bd,
-/* 0x0371: init_proc */
-	0x01f19858,
-	0xf40016b0,
-	0x15f9fa0b,
-	0xf458f0b6,
-/* 0x0382: host_send */
-	0x17f1f20e,
-	0x14b604b0,
-	0x0011cf06,
-	0x04a027f1,
-	0xcf0624b6,
-	0x12b80022,
-	0x320bf406,
-	0x94071ec4,
-	0xe0b704ee,
-	0xeb980270,
-	0x02ec9803,
-	0x9801ed98,
-	0x21f500ee,
-	0x10b602b9,
-	0x0f1ec401,
-	0x04b007f1,
-	0xd00604b6,
-	0x04bd000e,
-/* 0x03cb: host_send_done */
-	0xf8ba0ef4,
-/* 0x03cd: host_recv */
-	0x4917f100,
-	0x5413f14e,
-	0x06e1b852,
-/* 0x03db: host_recv_wait */
-	0xf1aa0bf4,
-	0xb604cc17,
-	0x11cf0614,
-	0xc827f100,
-	0x0624b604,
-	0xf00022cf,
-	0x12b80816,
-	0xe60bf406,
-	0xb60723c4,
-	0x30b70434,
-	0x3b8002f0,
-	0x023c8003,
-	0x80013d80,
-	0x20b6003e,
-	0x0f24f001,
-	0x04c807f1,
+/* 0x0402: init_proc */
+	0xf19858f7,
+	0x0016b001,
+	0xf9fa0bf4,
+	0x58f0b615,
+/* 0x0413: mulu32_32_64 */
+	0xf9f20ef4,
+	0xf920f910,
+	0x9540f930,
+	0xd29510e1,
+	0xbdc4bd10,
+	0xc0edffb4,
+	0xb9301dff,
+	0x34f10234,
+	0x34b6ffff,
+	0x1045b610,
+	0xbb00c3bb,
+	0xe2ff01b4,
+	0x0234b930,
+	0xffff34f1,
+	0xb61034b6,
+	0xc3bb1045,
+	0x01b4bb00,
+	0xbb3012ff,
+	0x40fc00b3,
+	0x20fc30fc,
+	0x00f810fc,
+/* 0x0464: host_send */
+	0x04b017f1,
+	0xcf0614b6,
+	0x27f10011,
+	0x24b604a0,
+	0x0022cf06,
+	0xf40612b8,
+	0x1ec4320b,
+	0x04ee9407,
+	0x0270e0b7,
+	0x9803eb98,
+	0xed9802ec,
+	0x00ee9801,
+	0x034221f5,
+	0xc40110b6,
+	0x07f10f1e,
+	0x04b604b0,
+	0x000ed006,
+	0x0ef404bd,
+/* 0x04ad: host_send_done */
+/* 0x04af: host_recv */
+	0xf100f8ba,
+	0xf14e4917,
+	0xb8525413,
+	0x0bf406e1,
+/* 0x04bd: host_recv_wait */
+	0xcc17f1aa,
+	0x0614b604,
+	0xf10011cf,
+	0xb604c827,
+	0x22cf0624,
+	0x0816f000,
+	0xf40612b8,
+	0x23c4e60b,
+	0x0434b607,
+	0x02f030b7,
+	0x80033b80,
+	0x3d80023c,
+	0x003e8001,
+	0xf00120b6,
+	0x07f10f24,
+	0x04b604c8,
+	0x0002d006,
+	0x27f004bd,
+	0x0007f040,
 	0xd00604b6,
 	0x04bd0002,
-	0xf04027f0,
-	0x04b60007,
-	0x0002d006,
-	0x00f804bd,
-/* 0x0430: host_init */
-	0x008017f1,
-	0xf11014b6,
-	0xf1027015,
-	0xb604d007,
-	0x01d00604,
-	0xf104bd00,
-	0xb6008017,
-	0x15f11014,
-	0x07f102f0,
-	0x04b604dc,
-	0x0001d006,
-	0x17f004bd,
-	0xc407f101,
+/* 0x0512: host_init */
+	0x17f100f8,
+	0x14b60080,
+	0x7015f110,
+	0xd007f102,
 	0x0604b604,
 	0xbd0001d0,
-/* 0x046f: memx_func_enter */
-	0xf000f804,
+	0x8017f104,
+	0x1014b600,
+	0x02f015f1,
+	0x04dc07f1,
+	0xd00604b6,
+	0x04bd0001,
+	0xf10117f0,
+	0xb604c407,
+	0x01d00604,
+	0xf804bd00,
+/* 0x0551: memx_func_enter */
+	0x1087f100,
+	0x028eb916,
+	0xb90421f4,
+	0x67f102d7,
+	0x63f1fffc,
+	0x76fdffff,
+	0x0267f104,
+	0x0576fd00,
+	0x70f980f9,
+	0xe0fcd0fc,
+	0xf03f21f4,
 	0x07f10467,
 	0x04b607e0,
 	0x0006d006,
-/* 0x047e: memx_func_enter_wait */
+/* 0x058a: memx_func_enter_wait */
 	0x67f104bd,
 	0x64b607c0,
 	0x0066cf06,
 	0xf40464f0,
-	0x1698f30b,
-	0x0410b600,
-/* 0x0496: memx_func_leave */
-	0x67f000f8,
-	0xe407f104,
-	0x0604b607,
-	0xbd0006d0,
-/* 0x04a5: memx_func_leave_wait */
-	0xc067f104,
+	0x67f0f30b,
+	0x0664b62c,
+	0x800066cf,
+	0x00f8ee06,
+/* 0x05a8: memx_func_leave */
+	0xb62c67f0,
+	0x66cf0664,
+	0xef068000,
+	0xf10467f0,
+	0xb607e407,
+	0x06d00604,
+/* 0x05c3: memx_func_leave_wait */
+	0xf104bd00,
+	0xb607c067,
+	0x66cf0664,
+	0x0464f000,
+	0xf1f31bf4,
+	0xb9161087,
+	0x21f4028e,
+	0x02d7b904,
+	0xffcc67f1,
+	0xffff63f1,
+	0xf90476fd,
+	0xfc70f980,
+	0xf4e0fcd0,
+	0x00f83f21,
+/* 0x05f8: memx_func_wait_vblank */
+	0xb0001698,
+	0x0bf40066,
+	0x0166b013,
+	0xf4060bf4,
+/* 0x060a: memx_func_wait_vblank_head1 */
+	0x77f12e0e,
+	0x0ef40020,
+/* 0x0611: memx_func_wait_vblank_head0 */
+	0x0877f107,
+/* 0x0615: memx_func_wait_vblank_0 */
+	0xc467f100,
 	0x0664b607,
-	0xf00066cf,
-	0x1bf40464,
-/* 0x04b7: memx_func_wr32 */
-	0x9800f8f3,
-	0x15980016,
-	0x0810b601,
-	0x50f960f9,
-	0xe0fcd0fc,
-	0xb63f21f4,
-	0x1bf40242,
-/* 0x04d3: memx_func_wait */
-	0xf000f8e9,
-	0x84b62c87,
-	0x0088cf06,
-	0x98001e98,
-	0x1c98011d,
-	0x031b9802,
-	0xf41010b6,
-	0x00f89c21,
-/* 0x04f0: memx_func_delay */
-	0xb6001e98,
-	0x21f40410,
-/* 0x04fb: memx_exec */
-	0xf900f87f,
-	0xb9d0f9e0,
-	0xb2b902c1,
-/* 0x0505: memx_exec_next */
-	0x00139802,
-	0x950410b6,
-	0x30f01034,
-	0xde35980c,
-	0x12b855f9,
-	0xec1ef406,
-	0xe0fcd0fc,
-	0x02b921f5,
-/* 0x0526: memx_info */
-	0xc7f100f8,
-	0xb7f103ac,
-	0x21f50800,
-	0x00f802b9,
-/* 0x0534: memx_recv */
-	0xf401d6b0,
-	0xd6b0c40b,
-	0xe90bf400,
-/* 0x0542: memx_init */
-	0x00f800f8,
-/* 0x0544: perf_recv */
-/* 0x0546: perf_init */
+	0xfd0066cf,
+	0x1bf40467,
+/* 0x0625: memx_func_wait_vblank_1 */
+	0xc467f1f3,
+	0x0664b607,
+	0xfd0066cf,
+	0x0bf40467,
+/* 0x0635: memx_func_wait_vblank_fini */
+	0x0410b6f3,
+/* 0x063a: memx_func_wr32 */
+	0x169800f8,
+	0x01159800,
+	0xf90810b6,
+	0xfc50f960,
+	0xf4e0fcd0,
+	0x42b63f21,
+	0xe91bf402,
+/* 0x0656: memx_func_wait */
+	0x87f000f8,
+	0x0684b62c,
+	0x980088cf,
+	0x1d98001e,
+	0x021c9801,
+	0xb6031b98,
+	0x21f41010,
+/* 0x0673: memx_func_delay */
+	0x9800f8a4,
+	0x10b6001e,
+	0x7f21f404,
+/* 0x067e: memx_exec */
+	0xe0f900f8,
+	0xc1b9d0f9,
+	0x02b2b902,
+/* 0x0688: memx_exec_next */
+	0xb6001398,
+	0x34e70410,
+	0x33e701f0,
+	0x32b601e0,
+	0x0c30f001,
+	0xf9de3598,
+	0x0612b855,
+	0x98e41ef4,
+	0x0c98ee0b,
+	0x02cbbbef,
+	0x07c4b7f1,
+	0xcf06b4b6,
+	0xd0fc00bb,
+	0x21f5e0fc,
+	0x00f80342,
+/* 0x06c4: memx_info */
+	0x03c0c7f1,
+	0x0800b7f1,
+	0x034221f5,
+/* 0x06d2: memx_recv */
+	0xd6b000f8,
+	0xa90bf401,
+	0xf400d6b0,
+	0x00f8e90b,
+/* 0x06e0: memx_init */
+/* 0x06e2: perf_recv */
 	0x00f800f8,
-/* 0x0548: i2c_drive_scl */
-	0xf40036b0,
-	0x07f1110b,
-	0x04b607e0,
-	0x0001d006,
-	0x00f804bd,
-/* 0x055c: i2c_drive_scl_lo */
-	0x07e407f1,
-	0xd00604b6,
-	0x04bd0001,
-/* 0x056a: i2c_drive_sda */
+/* 0x06e4: perf_init */
+/* 0x06e6: i2c_drive_scl */
 	0x36b000f8,
 	0x110bf400,
 	0x07e007f1,
 	0xd00604b6,
-	0x04bd0002,
-/* 0x057e: i2c_drive_sda_lo */
+	0x04bd0001,
+/* 0x06fa: i2c_drive_scl_lo */
 	0x07f100f8,
 	0x04b607e4,
+	0x0001d006,
+	0x00f804bd,
+/* 0x0708: i2c_drive_sda */
+	0xf40036b0,
+	0x07f1110b,
+	0x04b607e0,
 	0x0002d006,
 	0x00f804bd,
-/* 0x058c: i2c_sense_scl */
-	0xf10132f4,
-	0xb607c437,
-	0x33cf0634,
-	0x0431fd00,
-	0xf4060bf4,
-/* 0x05a2: i2c_sense_scl_done */
-	0x00f80131,
-/* 0x05a4: i2c_sense_sda */
-	0xf10132f4,
-	0xb607c437,
-	0x33cf0634,
-	0x0432fd00,
-	0xf4060bf4,
-/* 0x05ba: i2c_sense_sda_done */
-	0x00f80131,
-/* 0x05bc: i2c_raise_scl */
-	0x47f140f9,
-	0x37f00898,
-	0x4821f501,
-/* 0x05c9: i2c_raise_scl_wait */
-	0xe8e7f105,
-	0x7f21f403,
-	0x058c21f5,
-	0xb60901f4,
-	0x1bf40142,
-/* 0x05dd: i2c_raise_scl_done */
-	0xf840fcef,
-/* 0x05e1: i2c_start */
-	0x8c21f500,
-	0x0d11f405,
-	0x05a421f5,
-	0xf40611f4,
-/* 0x05f2: i2c_start_rep */
-	0x37f0300e,
-	0x4821f500,
-	0x0137f005,
-	0x056a21f5,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0xbc21f550,
-	0x0464b605,
-/* 0x061f: i2c_start_send */
-	0xf01f11f4,
+/* 0x071c: i2c_drive_sda_lo */
+	0x07e407f1,
+	0xd00604b6,
+	0x04bd0002,
+/* 0x072a: i2c_sense_scl */
+	0x32f400f8,
+	0xc437f101,
+	0x0634b607,
+	0xfd0033cf,
+	0x0bf40431,
+	0x0131f406,
+/* 0x0740: i2c_sense_scl_done */
+/* 0x0742: i2c_sense_sda */
+	0x32f400f8,
+	0xc437f101,
+	0x0634b607,
+	0xfd0033cf,
+	0x0bf40432,
+	0x0131f406,
+/* 0x0758: i2c_sense_sda_done */
+/* 0x075a: i2c_raise_scl */
+	0x40f900f8,
+	0x089847f1,
+	0xf50137f0,
+/* 0x0767: i2c_raise_scl_wait */
+	0xf106e621,
+	0xf403e8e7,
+	0x21f57f21,
+	0x01f4072a,
+	0x0142b609,
+/* 0x077b: i2c_raise_scl_done */
+	0xfcef1bf4,
+/* 0x077f: i2c_start */
+	0xf500f840,
+	0xf4072a21,
+	0x21f50d11,
+	0x11f40742,
+	0x300ef406,
+/* 0x0790: i2c_start_rep */
+	0xf50037f0,
+	0xf006e621,
+	0x21f50137,
+	0x76bb0708,
+	0x0465b600,
+	0x659450f9,
+	0x0256bb04,
+	0x75fd50bd,
+	0xf550fc04,
+	0xb6075a21,
+	0x11f40464,
+/* 0x07bd: i2c_start_send */
+	0x0037f01f,
+	0x070821f5,
+	0x1388e7f1,
+	0xf07f21f4,
 	0x21f50037,
-	0xe7f1056a,
+	0xe7f106e6,
 	0x21f41388,
-	0x0037f07f,
-	0x054821f5,
-	0x1388e7f1,
-/* 0x063b: i2c_start_out */
-	0xf87f21f4,
-/* 0x063d: i2c_stop */
-	0x0037f000,
-	0x054821f5,
-	0xf50037f0,
-	0xf1056a21,
-	0xf403e8e7,
-	0x37f07f21,
-	0x4821f501,
-	0x88e7f105,
-	0x7f21f413,
+/* 0x07d9: i2c_start_out */
+/* 0x07db: i2c_stop */
+	0xf000f87f,
+	0x21f50037,
+	0x37f006e6,
+	0x0821f500,
+	0xe8e7f107,
+	0x7f21f403,
 	0xf50137f0,
-	0xf1056a21,
+	0xf106e621,
 	0xf41388e7,
-	0x00f87f21,
-/* 0x0670: i2c_bitw */
-	0x056a21f5,
-	0x03e8e7f1,
-	0xbb7f21f4,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x05bc21f5,
-	0xf40464b6,
-	0xe7f11811,
-	0x21f41388,
-	0x0037f07f,
-	0x054821f5,
-	0x1388e7f1,
-/* 0x06af: i2c_bitw_out */
-	0xf87f21f4,
-/* 0x06b1: i2c_bitr */
-	0x0137f000,
-	0x056a21f5,
-	0x03e8e7f1,
-	0xbb7f21f4,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x05bc21f5,
-	0xf40464b6,
-	0x21f51b11,
-	0x37f005a4,
-	0x4821f500,
-	0x88e7f105,
+	0x37f07f21,
+	0x0821f501,
+	0x88e7f107,
 	0x7f21f413,
-	0xf4013cf0,
-/* 0x06f6: i2c_bitr_done */
-	0x00f80131,
-/* 0x06f8: i2c_get_byte */
-	0xf00057f0,
-/* 0x06fe: i2c_get_byte_next */
-	0x54b60847,
-	0x0076bb01,
+/* 0x080e: i2c_bitw */
+	0x21f500f8,
+	0xe7f10708,
+	0x21f403e8,
+	0x0076bb7f,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b606b1,
-	0x2b11f404,
-	0xb60553fd,
-	0x1bf40142,
-	0x0137f0d8,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0x7021f550,
-	0x0464b606,
-/* 0x0748: i2c_get_byte_done */
-/* 0x074a: i2c_put_byte */
-	0x47f000f8,
-/* 0x074d: i2c_put_byte_next */
-	0x0142b608,
-	0xbb3854ff,
+	0x64b6075a,
+	0x1811f404,
+	0x1388e7f1,
+	0xf07f21f4,
+	0x21f50037,
+	0xe7f106e6,
+	0x21f41388,
+/* 0x084d: i2c_bitw_out */
+/* 0x084f: i2c_bitr */
+	0xf000f87f,
+	0x21f50137,
+	0xe7f10708,
+	0x21f403e8,
+	0x0076bb7f,
+	0xf90465b6,
+	0x04659450,
+	0xbd0256bb,
+	0x0475fd50,
+	0x21f550fc,
+	0x64b6075a,
+	0x1b11f404,
+	0x074221f5,
+	0xf50037f0,
+	0xf106e621,
+	0xf41388e7,
+	0x3cf07f21,
+	0x0131f401,
+/* 0x0894: i2c_bitr_done */
+/* 0x0896: i2c_get_byte */
+	0x57f000f8,
+	0x0847f000,
+/* 0x089c: i2c_get_byte_next */
+	0xbb0154b6,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x067021f5,
+	0x084f21f5,
 	0xf40464b6,
-	0x46b03411,
-	0xd81bf400,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0xb121f550,
-	0x0464b606,
-	0xbb0f11f4,
-	0x36b00076,
-	0x061bf401,
-/* 0x07a3: i2c_put_byte_done */
-	0xf80132f4,
-/* 0x07a5: i2c_addr */
-	0x0076bb00,
+	0x53fd2b11,
+	0x0142b605,
+	0xf0d81bf4,
+	0x76bb0137,
+	0x0465b600,
+	0x659450f9,
+	0x0256bb04,
+	0x75fd50bd,
+	0xf550fc04,
+	0xb6080e21,
+/* 0x08e6: i2c_get_byte_done */
+	0x00f80464,
+/* 0x08e8: i2c_put_byte */
+/* 0x08eb: i2c_put_byte_next */
+	0xb60847f0,
+	0x54ff0142,
+	0x0076bb38,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b605e1,
-	0x2911f404,
-	0x012ec3e7,
-	0xfd0134b6,
-	0x76bb0553,
+	0x64b6080e,
+	0x3411f404,
+	0xf40046b0,
+	0x76bbd81b,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0xf550fc04,
-	0xb6074a21,
-/* 0x07ea: i2c_addr_done */
-	0x00f80464,
-/* 0x07ec: i2c_acquire_addr */
-	0xb6f8cec7,
-	0xe0b702e4,
-	0xee980bfc,
-/* 0x07fb: i2c_acquire */
-	0xf500f800,
-	0xf407ec21,
-	0xd9f00421,
-	0x3f21f403,
-/* 0x080a: i2c_release */
-	0x21f500f8,
-	0x21f407ec,
-	0x03daf004,
-	0xf83f21f4,
-/* 0x0819: i2c_recv */
-	0x0132f400,
-	0xb6f8c1c7,
-	0x16b00214,
-	0x3a1ff528,
-	0xd413a001,
-	0x0032980b,
-	0x0bac13a0,
-	0xf4003198,
-	0xd0f90231,
-	0xd0f9e0f9,
-	0x000067f1,
-	0x100063f1,
-	0xbb016792,
+	0xb6084f21,
+	0x11f40464,
+	0x0076bb0f,
+	0xf40136b0,
+	0x32f4061b,
+/* 0x0941: i2c_put_byte_done */
+/* 0x0943: i2c_addr */
+	0xbb00f801,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x07fb21f5,
-	0xfc0464b6,
-	0x00d6b0d0,
-	0x00b31bf5,
-	0xbb0057f0,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x07a521f5,
-	0xf50464b6,
-	0xc700d011,
-	0x76bbe0c5,
-	0x0465b600,
-	0x659450f9,
-	0x0256bb04,
-	0x75fd50bd,
-	0xf550fc04,
-	0xb6074a21,
-	0x11f50464,
-	0x57f000ad,
+	0x077f21f5,
+	0xf40464b6,
+	0xc3e72911,
+	0x34b6012e,
+	0x0553fd01,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0xe821f550,
+	0x0464b608,
+/* 0x0988: i2c_addr_done */
+/* 0x098a: i2c_acquire_addr */
+	0xcec700f8,
+	0x02e4b6f8,
+	0x0c10e0b7,
+	0xf800ee98,
+/* 0x0999: i2c_acquire */
+	0x8a21f500,
+	0x0421f409,
+	0xf403d9f0,
+	0x00f83f21,
+/* 0x09a8: i2c_release */
+	0x098a21f5,
+	0xf00421f4,
+	0x21f403da,
+/* 0x09b7: i2c_recv */
+	0xf400f83f,
+	0xc1c70132,
+	0x0214b6f8,
+	0xf52816b0,
+	0xa0013a1f,
+	0x980be813,
+	0x13a00032,
+	0x31980bc0,
+	0x0231f400,
+	0xe0f9d0f9,
+	0x67f1d0f9,
+	0x63f10000,
+	0x67921000,
 	0x0076bb01,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b607a5,
-	0x8a11f504,
+	0x64b60999,
+	0xb0d0fc04,
+	0x1bf500d6,
+	0x57f000b3,
 	0x0076bb00,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b606f8,
-	0x6a11f404,
-	0xbbe05bcb,
+	0x64b60943,
+	0xd011f504,
+	0xe0c5c700,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0xe821f550,
+	0x0464b608,
+	0x00ad11f5,
+	0xbb0157f0,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x063d21f5,
-	0xb90464b6,
-	0x74bd025b,
-/* 0x091f: i2c_recv_not_rd08 */
-	0xb0430ef4,
-	0x1bf401d6,
-	0x0057f03d,
-	0x07a521f5,
-	0xc73311f4,
-	0x21f5e0c5,
-	0x11f4074a,
-	0x0057f029,
-	0x07a521f5,
-	0xc71f11f4,
-	0x21f5e0b5,
-	0x11f4074a,
-	0x3d21f515,
-	0xc774bd06,
-	0x1bf408c5,
-	0x0232f409,
-/* 0x095f: i2c_recv_not_wr08 */
-/* 0x095f: i2c_recv_done */
-	0xc7030ef4,
-	0x21f5f8ce,
-	0xe0fc080a,
-	0x12f4d0fc,
-	0x027cb90a,
-	0x02b921f5,
-/* 0x0974: i2c_recv_exit */
-/* 0x0976: i2c_init */
-	0x00f800f8,
-/* 0x0978: test_recv */
-	0x05d817f1,
+	0x094321f5,
+	0xf50464b6,
+	0xbb008a11,
+	0x65b60076,
+	0x9450f904,
+	0x56bb0465,
+	0xfd50bd02,
+	0x50fc0475,
+	0x089621f5,
+	0xf40464b6,
+	0x5bcb6a11,
+	0x0076bbe0,
+	0xf90465b6,
+	0x04659450,
+	0xbd0256bb,
+	0x0475fd50,
+	0x21f550fc,
+	0x64b607db,
+	0x025bb904,
+	0x0ef474bd,
+/* 0x0abd: i2c_recv_not_rd08 */
+	0x01d6b043,
+	0xf03d1bf4,
+	0x21f50057,
+	0x11f40943,
+	0xe0c5c733,
+	0x08e821f5,
+	0xf02911f4,
+	0x21f50057,
+	0x11f40943,
+	0xe0b5c71f,
+	0x08e821f5,
+	0xf51511f4,
+	0xbd07db21,
+	0x08c5c774,
+	0xf4091bf4,
+	0x0ef40232,
+/* 0x0afd: i2c_recv_not_wr08 */
+/* 0x0afd: i2c_recv_done */
+	0xf8cec703,
+	0x09a821f5,
+	0xd0fce0fc,
+	0xb90a12f4,
+	0x21f5027c,
+/* 0x0b12: i2c_recv_exit */
+	0x00f80342,
+/* 0x0b14: i2c_init */
+/* 0x0b16: test_recv */
+	0x17f100f8,
+	0x14b605d8,
+	0x0011cf06,
+	0xf10110b6,
+	0xb605d807,
+	0x01d00604,
+	0xf104bd00,
+	0xf1d900e7,
+	0xf5134fe3,
+	0xf8026221,
+/* 0x0b3d: test_init */
+	0x00e7f100,
+	0x6221f508,
+/* 0x0b47: idle_recv */
+	0xf800f802,
+/* 0x0b49: idle */
+	0x0031f400,
+	0x05d417f1,
 	0xcf0614b6,
 	0x10b60011,
-	0xd807f101,
+	0xd407f101,
 	0x0604b605,
 	0xbd0001d0,
-	0x00e7f104,
-	0x4fe3f1d9,
-	0xf521f513,
-/* 0x099f: test_init */
-	0xf100f801,
-	0xf50800e7,
-	0xf801f521,
-/* 0x09a9: idle_recv */
-/* 0x09ab: idle */
-	0xf400f800,
-	0x17f10031,
-	0x14b605d4,
-	0x0011cf06,
-	0xf10110b6,
-	0xb605d407,
-	0x01d00604,
-/* 0x09c7: idle_loop */
-	0xf004bd00,
-	0x32f45817,
-/* 0x09cd: idle_proc */
-/* 0x09cd: idle_proc_exec */
-	0xb910f902,
-	0x21f5021e,
-	0x10fc02c2,
-	0xf40911f4,
-	0x0ef40231,
-/* 0x09e1: idle_proc_next */
-	0x5810b6ef,
-	0xf4061fb8,
-	0x02f4e61b,
-	0x0028f4dd,
-	0x00bb0ef4,
+/* 0x0b65: idle_loop */
+	0x5817f004,
+/* 0x0b6b: idle_proc */
+/* 0x0b6b: idle_proc_exec */
+	0xf90232f4,
+	0x021eb910,
+	0x034b21f5,
+	0x11f410fc,
+	0x0231f409,
+/* 0x0b7f: idle_proc_next */
+	0xb6ef0ef4,
+	0x1fb85810,
+	0xe61bf406,
+	0xf4dd02f4,
+	0x0ef40028,
+	0x000000bb,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
 	0x00000000,
 	0x00000000,
 	0x00000000,
diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nvc0.fuc b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nvc0.fuc
index 48f79434a449..21bf8cc7618f 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nvc0.fuc
+++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nvc0.fuc
@@ -23,6 +23,7 @@
  */
 
 #define NVKM_PPWR_CHIPSET GF100
+#define HW_TICKS_PER_US 203 // should be 202.5
 
 //#define NVKM_FALCON_PC24
 //#define NVKM_FALCON_UNSHIFTED_IO
@@ -34,6 +35,7 @@
 .section #nvc0_pwr_data
 #define INCLUDE_PROC
 #include "kernel.fuc"
+#include "arith.fuc"
 #include "host.fuc"
 #include "memx.fuc"
 #include "perf.fuc"
@@ -44,6 +46,7 @@
 
 #define INCLUDE_DATA
 #include "kernel.fuc"
+#include "arith.fuc"
 #include "host.fuc"
 #include "memx.fuc"
 #include "perf.fuc"
@@ -56,6 +59,7 @@
 .section #nvc0_pwr_code
 #define INCLUDE_CODE
 #include "kernel.fuc"
+#include "arith.fuc"
 #include "host.fuc"
 #include "memx.fuc"
 #include "perf.fuc"
diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nvc0.fuc.h b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nvc0.fuc.h
index 0773ff0e3dc3..ca30fa4011b5 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nvc0.fuc.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nvc0.fuc.h
@@ -24,8 +24,8 @@ uint32_t nvc0_pwr_data[] = {
 	0x00000000,
 /* 0x0058: proc_list_head */
 	0x54534f48,
-	0x00000430,
-	0x000003cd,
+	0x00000512,
+	0x000004af,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -46,8 +46,8 @@ uint32_t nvc0_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x584d454d,
-	0x00000542,
-	0x00000534,
+	0x0000074b,
+	0x0000073d,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -68,8 +68,8 @@ uint32_t nvc0_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x46524550,
-	0x00000546,
-	0x00000544,
+	0x0000074f,
+	0x0000074d,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -90,8 +90,8 @@ uint32_t nvc0_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x5f433249,
-	0x00000976,
-	0x00000819,
+	0x00000b7f,
+	0x00000a22,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -112,8 +112,8 @@ uint32_t nvc0_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x54534554,
-	0x0000099f,
-	0x00000978,
+	0x00000ba8,
+	0x00000b81,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -134,8 +134,8 @@ uint32_t nvc0_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x454c4449,
-	0x000009ab,
-	0x000009a9,
+	0x00000bb4,
+	0x00000bb2,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -227,25 +227,31 @@ uint32_t nvc0_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 /* 0x0370: memx_func_head */
-	0x00010000,
-	0x00000000,
-	0x0000046f,
-/* 0x037c: memx_func_next */
 	0x00000001,
 	0x00000000,
-	0x00000496,
+	0x00000551,
+/* 0x037c: memx_func_next */
 	0x00000002,
+	0x00000000,
+	0x000005db,
+	0x00000003,
 	0x00000002,
-	0x000004b7,
-	0x00040003,
+	0x000006a5,
+	0x00040004,
+	0x00000000,
+	0x000006c1,
+	0x00010005,
+	0x00000000,
+	0x000006de,
+	0x00010006,
 	0x00000000,
-	0x000004d3,
-	0x00010004,
+	0x00000663,
+/* 0x03b8: memx_func_tail */
+/* 0x03b8: memx_ts_start */
 	0x00000000,
-	0x000004f0,
-/* 0x03ac: memx_func_tail */
-/* 0x03ac: memx_data_head */
+/* 0x03bc: memx_ts_end */
 	0x00000000,
+/* 0x03c0: memx_data_head */
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -757,8 +763,9 @@ uint32_t nvc0_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-/* 0x0bac: memx_data_tail */
-/* 0x0bac: i2c_scl_map */
+	0x00000000,
+/* 0x0bc0: memx_data_tail */
+/* 0x0bc0: i2c_scl_map */
 	0x00001000,
 	0x00004000,
 	0x00010000,
@@ -769,7 +776,7 @@ uint32_t nvc0_pwr_data[] = {
 	0x01000000,
 	0x04000000,
 	0x10000000,
-/* 0x0bd4: i2c_sda_map */
+/* 0x0be8: i2c_sda_map */
 	0x00002000,
 	0x00008000,
 	0x00020000,
@@ -780,7 +787,7 @@ uint32_t nvc0_pwr_data[] = {
 	0x02000000,
 	0x08000000,
 	0x20000000,
-/* 0x0bfc: i2c_ctrl */
+/* 0x0c10: i2c_ctrl */
 	0x0000e138,
 	0x0000e150,
 	0x0000e168,
@@ -841,15 +848,10 @@ uint32_t nvc0_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
-	0x00000000,
 };
 
 uint32_t nvc0_pwr_code[] = {
-	0x030d0ef5,
+	0x039e0ef5,
 /* 0x0004: rd32 */
 	0x07a007f1,
 	0xd00604b6,
@@ -885,19 +887,22 @@ uint32_t nvc0_pwr_code[] = {
 	0xd4f100dd,
 	0x1bf47000,
 /* 0x007f: nsec */
-	0xf000f8f2,
+	0xf900f8f2,
+	0xf080f990,
 	0x84b62c87,
 	0x0088cf06,
-/* 0x0088: nsec_loop */
+/* 0x008c: nsec_loop */
 	0xb62c97f0,
 	0x99cf0694,
 	0x0298bb00,
 	0xf4069eb8,
-	0x00f8f11e,
-/* 0x009c: wait */
+	0x80fcf11e,
+	0x00f890fc,
+/* 0x00a4: wait */
+	0x80f990f9,
 	0xb62c87f0,
 	0x88cf0684,
-/* 0x00a5: wait_loop */
+/* 0x00b1: wait_loop */
 	0x02eeb900,
 	0xb90421f4,
 	0xadfd02da,
@@ -907,28 +912,29 @@ uint32_t nvc0_pwr_code[] = {
 	0x0099cf06,
 	0xb80298bb,
 	0x1ef4069b,
-/* 0x00c9: wait_done */
-/* 0x00cb: intr_watchdog */
-	0x9800f8df,
+/* 0x00d5: wait_done */
+	0xfc80fcdf,
+/* 0x00db: intr_watchdog */
+	0x9800f890,
 	0x96b003e9,
 	0x2a0bf400,
 	0xbb9a0a98,
 	0x1cf4029a,
 	0x01d7f00f,
-	0x025421f5,
+	0x02dd21f5,
 	0x0ef494bd,
-/* 0x00e9: intr_watchdog_next_time */
+/* 0x00f9: intr_watchdog_next_time */
 	0x9b0a9815,
 	0xf400a6b0,
 	0x9ab8090b,
 	0x061cf406,
-/* 0x00f8: intr_watchdog_next_time_set */
-/* 0x00fb: intr_watchdog_next_proc */
+/* 0x0108: intr_watchdog_next_time_set */
+/* 0x010b: intr_watchdog_next_proc */
 	0x809b0980,
 	0xe0b603e9,
 	0x68e6b158,
 	0xc61bf402,
-/* 0x010a: intr */
+/* 0x011a: intr */
 	0x00f900f8,
 	0x80f904bd,
 	0xa0f990f9,
@@ -948,13 +954,13 @@ uint32_t nvc0_pwr_code[] = {
 	0xf40289c4,
 	0x0080230b,
 	0x58e7f09b,
-	0x98cb21f4,
+	0x98db21f4,
 	0x96b09b09,
 	0x110bf400,
 	0xb63407f0,
 	0x09d00604,
 	0x8004bd00,
-/* 0x016e: intr_skip_watchdog */
+/* 0x017e: intr_skip_watchdog */
 	0x89e49a09,
 	0x0bf40800,
 	0x8897f148,
@@ -967,22 +973,22 @@ uint32_t nvc0_pwr_code[] = {
 	0x48e7f1c0,
 	0x53e3f14f,
 	0x00d7f054,
-	0x02b921f5,
+	0x034221f5,
 	0x07f1c0fc,
 	0x04b604c0,
 	0x000cd006,
-/* 0x01ae: intr_subintr_skip_fifo */
+/* 0x01be: intr_subintr_skip_fifo */
 	0x07f104bd,
 	0x04b60688,
 	0x0009d006,
-/* 0x01ba: intr_skip_subintr */
+/* 0x01ca: intr_skip_subintr */
 	0x89c404bd,
 	0x070bf420,
 	0xffbfa4f1,
-/* 0x01c4: intr_skip_pause */
+/* 0x01d4: intr_skip_pause */
 	0xf44089c4,
 	0xa4f1070b,
-/* 0x01ce: intr_skip_user0 */
+/* 0x01de: intr_skip_user0 */
 	0x07f0ffbf,
 	0x0604b604,
 	0xbd0008d0,
@@ -993,597 +999,733 @@ uint32_t nvc0_pwr_code[] = {
 	0x90fca0fc,
 	0x00fc80fc,
 	0xf80032f4,
-/* 0x01f5: timer */
-	0x1032f401,
-	0xb003f898,
-	0x1cf40086,
-	0x03fe8051,
+/* 0x0205: ticks_from_ns */
+	0xf9c0f901,
+	0xcbd7f1b0,
+	0x00d3f000,
+	0x041321f5,
+	0x03e8ccec,
+	0xf400b4b0,
+	0xeeec120b,
+	0xd7f103e8,
+	0xd3f000cb,
+	0x1321f500,
+/* 0x022d: ticks_from_ns_quit */
+	0x02ceb904,
+	0xc0fcb0fc,
+/* 0x0236: ticks_from_us */
+	0xc0f900f8,
+	0xd7f1b0f9,
+	0xd3f000cb,
+	0x1321f500,
+	0x02ceb904,
+	0xf400b4b0,
+	0xe4bd050b,
+/* 0x0250: ticks_from_us_quit */
+	0xc0fcb0fc,
+/* 0x0256: ticks_to_us */
+	0xd7f100f8,
+	0xd3f000cb,
+	0xecedff00,
+/* 0x0262: timer */
+	0x90f900f8,
+	0x32f480f9,
+	0x03f89810,
+	0xf40086b0,
+	0x84bd651c,
 	0xb63807f0,
 	0x08d00604,
 	0xf004bd00,
-	0x84b60887,
+	0x84b63487,
 	0x0088cf06,
-	0xf40284f0,
-	0x87f0261b,
-	0x0684b634,
-	0xb80088cf,
-	0x0bf406e0,
-	0x06e8b809,
-/* 0x0233: timer_reset */
-	0xf01f1ef4,
-	0x04b63407,
-	0x000ed006,
-	0x0e8004bd,
-/* 0x0241: timer_enable */
-	0x0187f09a,
+	0xbb9a0998,
+	0xe9bb0298,
+	0x03fe8000,
+	0xb60887f0,
+	0x88cf0684,
+	0x0284f000,
+	0xf0261bf4,
+	0x84b63487,
+	0x0088cf06,
+	0xf406e0b8,
+	0xe8b8090b,
+	0x111cf406,
+/* 0x02b8: timer_reset */
+	0xb63407f0,
+	0x0ed00604,
+	0x8004bd00,
+/* 0x02c6: timer_enable */
+	0x87f09a0e,
+	0x3807f001,
+	0xd00604b6,
+	0x04bd0008,
+/* 0x02d4: timer_done */
+	0xfc1031f4,
+	0xf890fc80,
+/* 0x02dd: send_proc */
+	0xf980f900,
+	0x05e89890,
+	0xf004e998,
+	0x89b80486,
+	0x2a0bf406,
+	0x940398c4,
+	0x80b60488,
+	0x008ebb18,
+	0x8000fa98,
+	0x8d80008a,
+	0x028c8001,
+	0xb6038b80,
+	0x94f00190,
+	0x04e98007,
+/* 0x0317: send_done */
+	0xfc0231f4,
+	0xf880fc90,
+/* 0x031d: find */
+	0xf080f900,
+	0x31f45887,
+/* 0x0325: find_loop */
+	0x008a9801,
+	0xf406aeb8,
+	0x80b6100b,
+	0x6886b158,
+	0xf01bf402,
+/* 0x033b: find_done */
+	0xb90132f4,
+	0x80fc028e,
+/* 0x0342: send */
+	0x21f500f8,
+	0x01f4031d,
+/* 0x034b: recv */
+	0xf900f897,
+	0x9880f990,
+	0xe99805e8,
+	0x0132f404,
+	0xf40689b8,
+	0x89c43d0b,
+	0x0180b603,
+	0x800784f0,
+	0xea9805e8,
+	0xfef0f902,
+	0xf0f9018f,
+	0x9402efb9,
+	0xe9bb0499,
+	0x18e0b600,
+	0x9803eb98,
+	0xed9802ec,
+	0x00ee9801,
+	0xf0fca5f9,
+	0xf400f8fe,
+	0xf0fc0131,
+/* 0x0398: recv_done */
+	0x90fc80fc,
+/* 0x039e: init */
+	0x17f100f8,
+	0x14b60108,
+	0x0011cf06,
+	0x010911e7,
+	0xfe0814b6,
+	0x17f10014,
+	0x13f000e0,
+	0x1c07f000,
+	0xd00604b6,
+	0x04bd0001,
+	0xf0ff17f0,
+	0x04b61407,
+	0x0001d006,
+	0x17f004bd,
+	0x0015f102,
+	0x1007f008,
+	0xd00604b6,
+	0x04bd0001,
+	0x011a17f1,
+	0xfe0013f0,
+	0x31f40010,
+	0x0117f010,
 	0xb63807f0,
-	0x08d00604,
-/* 0x024f: timer_done */
-	0xf404bd00,
-	0x00f81031,
-/* 0x0254: send_proc */
-	0x90f980f9,
-	0x9805e898,
-	0x86f004e9,
-	0x0689b804,
-	0xc42a0bf4,
-	0x88940398,
-	0x1880b604,
-	0x98008ebb,
-	0x8a8000fa,
-	0x018d8000,
-	0x80028c80,
-	0x90b6038b,
-	0x0794f001,
-	0xf404e980,
-/* 0x028e: send_done */
-	0x90fc0231,
-	0x00f880fc,
-/* 0x0294: find */
-	0x87f080f9,
-	0x0131f458,
-/* 0x029c: find_loop */
-	0xb8008a98,
-	0x0bf406ae,
-	0x5880b610,
-	0x026886b1,
-	0xf4f01bf4,
-/* 0x02b2: find_done */
-	0x8eb90132,
-	0xf880fc02,
-/* 0x02b9: send */
-	0x9421f500,
-	0x9701f402,
-/* 0x02c2: recv */
-	0xe89800f8,
-	0x04e99805,
-	0xb80132f4,
-	0x0bf40689,
-	0x0389c43d,
-	0xf00180b6,
-	0xe8800784,
-	0x02ea9805,
-	0x8ffef0f9,
-	0xb9f0f901,
-	0x999402ef,
-	0x00e9bb04,
-	0x9818e0b6,
-	0xec9803eb,
-	0x01ed9802,
-	0xf900ee98,
-	0xfef0fca5,
-	0x31f400f8,
-/* 0x030b: recv_done */
-	0xf8f0fc01,
-/* 0x030d: init */
-	0x0817f100,
-	0x0614b601,
-	0xe70011cf,
-	0xb6010911,
-	0x14fe0814,
-	0xe017f100,
-	0x0013f000,
-	0xb61c07f0,
 	0x01d00604,
 	0xf004bd00,
-	0x07f0ff17,
-	0x0604b614,
-	0xbd0001d0,
-	0x0217f004,
-	0x080015f1,
-	0xb61007f0,
-	0x01d00604,
-	0xf104bd00,
-	0xf0010a17,
-	0x10fe0013,
-	0x1031f400,
-	0xf00117f0,
-	0x04b63807,
-	0x0001d006,
-	0xf7f004bd,
-/* 0x0371: init_proc */
-	0x01f19858,
-	0xf40016b0,
-	0x15f9fa0b,
-	0xf458f0b6,
-/* 0x0382: host_send */
-	0x17f1f20e,
-	0x14b604b0,
-	0x0011cf06,
-	0x04a027f1,
-	0xcf0624b6,
-	0x12b80022,
-	0x320bf406,
-	0x94071ec4,
-	0xe0b704ee,
-	0xeb980270,
-	0x02ec9803,
-	0x9801ed98,
-	0x21f500ee,
-	0x10b602b9,
-	0x0f1ec401,
-	0x04b007f1,
-	0xd00604b6,
-	0x04bd000e,
-/* 0x03cb: host_send_done */
-	0xf8ba0ef4,
-/* 0x03cd: host_recv */
-	0x4917f100,
-	0x5413f14e,
-	0x06e1b852,
-/* 0x03db: host_recv_wait */
-	0xf1aa0bf4,
-	0xb604cc17,
-	0x11cf0614,
-	0xc827f100,
-	0x0624b604,
-	0xf00022cf,
-	0x12b80816,
-	0xe60bf406,
-	0xb60723c4,
-	0x30b70434,
-	0x3b8002f0,
-	0x023c8003,
-	0x80013d80,
-	0x20b6003e,
-	0x0f24f001,
-	0x04c807f1,
+/* 0x0402: init_proc */
+	0xf19858f7,
+	0x0016b001,
+	0xf9fa0bf4,
+	0x58f0b615,
+/* 0x0413: mulu32_32_64 */
+	0xf9f20ef4,
+	0xf920f910,
+	0x9540f930,
+	0xd29510e1,
+	0xbdc4bd10,
+	0xc0edffb4,
+	0xb9301dff,
+	0x34f10234,
+	0x34b6ffff,
+	0x1045b610,
+	0xbb00c3bb,
+	0xe2ff01b4,
+	0x0234b930,
+	0xffff34f1,
+	0xb61034b6,
+	0xc3bb1045,
+	0x01b4bb00,
+	0xbb3012ff,
+	0x40fc00b3,
+	0x20fc30fc,
+	0x00f810fc,
+/* 0x0464: host_send */
+	0x04b017f1,
+	0xcf0614b6,
+	0x27f10011,
+	0x24b604a0,
+	0x0022cf06,
+	0xf40612b8,
+	0x1ec4320b,
+	0x04ee9407,
+	0x0270e0b7,
+	0x9803eb98,
+	0xed9802ec,
+	0x00ee9801,
+	0x034221f5,
+	0xc40110b6,
+	0x07f10f1e,
+	0x04b604b0,
+	0x000ed006,
+	0x0ef404bd,
+/* 0x04ad: host_send_done */
+/* 0x04af: host_recv */
+	0xf100f8ba,
+	0xf14e4917,
+	0xb8525413,
+	0x0bf406e1,
+/* 0x04bd: host_recv_wait */
+	0xcc17f1aa,
+	0x0614b604,
+	0xf10011cf,
+	0xb604c827,
+	0x22cf0624,
+	0x0816f000,
+	0xf40612b8,
+	0x23c4e60b,
+	0x0434b607,
+	0x02f030b7,
+	0x80033b80,
+	0x3d80023c,
+	0x003e8001,
+	0xf00120b6,
+	0x07f10f24,
+	0x04b604c8,
+	0x0002d006,
+	0x27f004bd,
+	0x0007f040,
 	0xd00604b6,
 	0x04bd0002,
-	0xf04027f0,
-	0x04b60007,
-	0x0002d006,
-	0x00f804bd,
-/* 0x0430: host_init */
-	0x008017f1,
-	0xf11014b6,
-	0xf1027015,
-	0xb604d007,
-	0x01d00604,
-	0xf104bd00,
-	0xb6008017,
-	0x15f11014,
-	0x07f102f0,
-	0x04b604dc,
-	0x0001d006,
-	0x17f004bd,
-	0xc407f101,
+/* 0x0512: host_init */
+	0x17f100f8,
+	0x14b60080,
+	0x7015f110,
+	0xd007f102,
 	0x0604b604,
 	0xbd0001d0,
-/* 0x046f: memx_func_enter */
-	0xf000f804,
+	0x8017f104,
+	0x1014b600,
+	0x02f015f1,
+	0x04dc07f1,
+	0xd00604b6,
+	0x04bd0001,
+	0xf10117f0,
+	0xb604c407,
+	0x01d00604,
+	0xf804bd00,
+/* 0x0551: memx_func_enter */
+	0x2067f100,
+	0x5d77f116,
+	0xff73f1f5,
+	0x026eb9ff,
+	0xb90421f4,
+	0x87fd02d8,
+	0xf960f904,
+	0xfcd0fc80,
+	0x3f21f4e0,
+	0xfffe77f1,
+	0xffff73f1,
+	0xf4026eb9,
+	0xd8b90421,
+	0x0487fd02,
+	0x80f960f9,
+	0xe0fcd0fc,
+	0xf13f21f4,
+	0xb926f067,
+	0x21f4026e,
+	0x02d8b904,
+	0xf90487fd,
+	0xfc80f960,
+	0xf4e0fcd0,
+	0x67f03f21,
+	0xe007f104,
+	0x0604b607,
+	0xbd0006d0,
+/* 0x05bd: memx_func_enter_wait */
+	0xc067f104,
+	0x0664b607,
+	0xf00066cf,
+	0x0bf40464,
+	0x2c67f0f3,
+	0xcf0664b6,
+	0x06800066,
+/* 0x05db: memx_func_leave */
+	0xf000f8ee,
+	0x64b62c67,
+	0x0066cf06,
+	0xf0ef0680,
 	0x07f10467,
-	0x04b607e0,
+	0x04b607e4,
 	0x0006d006,
-/* 0x047e: memx_func_enter_wait */
+/* 0x05f6: memx_func_leave_wait */
 	0x67f104bd,
 	0x64b607c0,
 	0x0066cf06,
 	0xf40464f0,
-	0x1698f30b,
+	0x67f1f31b,
+	0x77f126f0,
+	0x73f00001,
+	0x026eb900,
+	0xb90421f4,
+	0x87fd02d8,
+	0xf960f905,
+	0xfcd0fc80,
+	0x3f21f4e0,
+	0x162067f1,
+	0xf4026eb9,
+	0xd8b90421,
+	0x0587fd02,
+	0x80f960f9,
+	0xe0fcd0fc,
+	0xf13f21f4,
+	0xf00aa277,
+	0x6eb90073,
+	0x0421f402,
+	0xfd02d8b9,
+	0x60f90587,
+	0xd0fc80f9,
+	0x21f4e0fc,
+/* 0x0663: memx_func_wait_vblank */
+	0x9800f83f,
+	0x66b00016,
+	0x130bf400,
+	0xf40166b0,
+	0x0ef4060b,
+/* 0x0675: memx_func_wait_vblank_head1 */
+	0x2077f12e,
+	0x070ef400,
+/* 0x067c: memx_func_wait_vblank_head0 */
+	0x000877f1,
+/* 0x0680: memx_func_wait_vblank_0 */
+	0x07c467f1,
+	0xcf0664b6,
+	0x67fd0066,
+	0xf31bf404,
+/* 0x0690: memx_func_wait_vblank_1 */
+	0x07c467f1,
+	0xcf0664b6,
+	0x67fd0066,
+	0xf30bf404,
+/* 0x06a0: memx_func_wait_vblank_fini */
+	0xf80410b6,
+/* 0x06a5: memx_func_wr32 */
+	0x00169800,
+	0xb6011598,
+	0x60f90810,
+	0xd0fc50f9,
+	0x21f4e0fc,
+	0x0242b63f,
+	0xf8e91bf4,
+/* 0x06c1: memx_func_wait */
+	0x2c87f000,
+	0xcf0684b6,
+	0x1e980088,
+	0x011d9800,
+	0x98021c98,
+	0x10b6031b,
+	0xa421f410,
+/* 0x06de: memx_func_delay */
+	0x1e9800f8,
 	0x0410b600,
-/* 0x0496: memx_func_leave */
-	0x67f000f8,
-	0xe407f104,
+	0xf87f21f4,
+/* 0x06e9: memx_exec */
+	0xf9e0f900,
+	0x02c1b9d0,
+/* 0x06f3: memx_exec_next */
+	0x9802b2b9,
+	0x10b60013,
+	0xf034e704,
+	0xe033e701,
+	0x0132b601,
+	0x980c30f0,
+	0x55f9de35,
+	0xf40612b8,
+	0x0b98e41e,
+	0xef0c98ee,
+	0xf102cbbb,
+	0xb607c4b7,
+	0xbbcf06b4,
+	0xfcd0fc00,
+	0x4221f5e0,
+/* 0x072f: memx_info */
+	0xf100f803,
+	0xf103c0c7,
+	0xf50800b7,
+	0xf8034221,
+/* 0x073d: memx_recv */
+	0x01d6b000,
+	0xb0a90bf4,
+	0x0bf400d6,
+/* 0x074b: memx_init */
+	0xf800f8e9,
+/* 0x074d: perf_recv */
+/* 0x074f: perf_init */
+	0xf800f800,
+/* 0x0751: i2c_drive_scl */
+	0x0036b000,
+	0xf1110bf4,
+	0xb607e007,
+	0x01d00604,
+	0xf804bd00,
+/* 0x0765: i2c_drive_scl_lo */
+	0xe407f100,
 	0x0604b607,
-	0xbd0006d0,
-/* 0x04a5: memx_func_leave_wait */
-	0xc067f104,
-	0x0664b607,
-	0xf00066cf,
-	0x1bf40464,
-/* 0x04b7: memx_func_wr32 */
-	0x9800f8f3,
-	0x15980016,
-	0x0810b601,
-	0x50f960f9,
-	0xe0fcd0fc,
-	0xb63f21f4,
-	0x1bf40242,
-/* 0x04d3: memx_func_wait */
-	0xf000f8e9,
-	0x84b62c87,
-	0x0088cf06,
-	0x98001e98,
-	0x1c98011d,
-	0x031b9802,
-	0xf41010b6,
-	0x00f89c21,
-/* 0x04f0: memx_func_delay */
-	0xb6001e98,
-	0x21f40410,
-/* 0x04fb: memx_exec */
-	0xf900f87f,
-	0xb9d0f9e0,
-	0xb2b902c1,
-/* 0x0505: memx_exec_next */
-	0x00139802,
-	0x950410b6,
-	0x30f01034,
-	0xde35980c,
-	0x12b855f9,
-	0xec1ef406,
-	0xe0fcd0fc,
-	0x02b921f5,
-/* 0x0526: memx_info */
-	0xc7f100f8,
-	0xb7f103ac,
-	0x21f50800,
-	0x00f802b9,
-/* 0x0534: memx_recv */
-	0xf401d6b0,
-	0xd6b0c40b,
-	0xe90bf400,
-/* 0x0542: memx_init */
-	0x00f800f8,
-/* 0x0544: perf_recv */
-/* 0x0546: perf_init */
-	0x00f800f8,
-/* 0x0548: i2c_drive_scl */
-	0xf40036b0,
-	0x07f1110b,
-	0x04b607e0,
-	0x0001d006,
-	0x00f804bd,
-/* 0x055c: i2c_drive_scl_lo */
-	0x07e407f1,
-	0xd00604b6,
-	0x04bd0001,
-/* 0x056a: i2c_drive_sda */
-	0x36b000f8,
-	0x110bf400,
-	0x07e007f1,
-	0xd00604b6,
-	0x04bd0002,
-/* 0x057e: i2c_drive_sda_lo */
-	0x07f100f8,
-	0x04b607e4,
-	0x0002d006,
-	0x00f804bd,
-/* 0x058c: i2c_sense_scl */
-	0xf10132f4,
-	0xb607c437,
-	0x33cf0634,
-	0x0431fd00,
-	0xf4060bf4,
-/* 0x05a2: i2c_sense_scl_done */
-	0x00f80131,
-/* 0x05a4: i2c_sense_sda */
-	0xf10132f4,
-	0xb607c437,
-	0x33cf0634,
-	0x0432fd00,
-	0xf4060bf4,
-/* 0x05ba: i2c_sense_sda_done */
-	0x00f80131,
-/* 0x05bc: i2c_raise_scl */
-	0x47f140f9,
-	0x37f00898,
-	0x4821f501,
-/* 0x05c9: i2c_raise_scl_wait */
-	0xe8e7f105,
-	0x7f21f403,
-	0x058c21f5,
-	0xb60901f4,
-	0x1bf40142,
-/* 0x05dd: i2c_raise_scl_done */
-	0xf840fcef,
-/* 0x05e1: i2c_start */
-	0x8c21f500,
-	0x0d11f405,
-	0x05a421f5,
-	0xf40611f4,
-/* 0x05f2: i2c_start_rep */
-	0x37f0300e,
-	0x4821f500,
-	0x0137f005,
-	0x056a21f5,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0xbc21f550,
-	0x0464b605,
-/* 0x061f: i2c_start_send */
-	0xf01f11f4,
+	0xbd0001d0,
+/* 0x0773: i2c_drive_sda */
+	0xb000f804,
+	0x0bf40036,
+	0xe007f111,
+	0x0604b607,
+	0xbd0002d0,
+/* 0x0787: i2c_drive_sda_lo */
+	0xf100f804,
+	0xb607e407,
+	0x02d00604,
+	0xf804bd00,
+/* 0x0795: i2c_sense_scl */
+	0x0132f400,
+	0x07c437f1,
+	0xcf0634b6,
+	0x31fd0033,
+	0x060bf404,
+/* 0x07ab: i2c_sense_scl_done */
+	0xf80131f4,
+/* 0x07ad: i2c_sense_sda */
+	0x0132f400,
+	0x07c437f1,
+	0xcf0634b6,
+	0x32fd0033,
+	0x060bf404,
+/* 0x07c3: i2c_sense_sda_done */
+	0xf80131f4,
+/* 0x07c5: i2c_raise_scl */
+	0xf140f900,
+	0xf0089847,
+	0x21f50137,
+/* 0x07d2: i2c_raise_scl_wait */
+	0xe7f10751,
+	0x21f403e8,
+	0x9521f57f,
+	0x0901f407,
+	0xf40142b6,
+/* 0x07e6: i2c_raise_scl_done */
+	0x40fcef1b,
+/* 0x07ea: i2c_start */
+	0x21f500f8,
+	0x11f40795,
+	0xad21f50d,
+	0x0611f407,
+/* 0x07fb: i2c_start_rep */
+	0xf0300ef4,
 	0x21f50037,
-	0xe7f1056a,
-	0x21f41388,
-	0x0037f07f,
-	0x054821f5,
-	0x1388e7f1,
-/* 0x063b: i2c_start_out */
-	0xf87f21f4,
-/* 0x063d: i2c_stop */
-	0x0037f000,
-	0x054821f5,
+	0x37f00751,
+	0x7321f501,
+	0x0076bb07,
+	0xf90465b6,
+	0x04659450,
+	0xbd0256bb,
+	0x0475fd50,
+	0x21f550fc,
+	0x64b607c5,
+	0x1f11f404,
+/* 0x0828: i2c_start_send */
 	0xf50037f0,
-	0xf1056a21,
-	0xf403e8e7,
+	0xf1077321,
+	0xf41388e7,
 	0x37f07f21,
-	0x4821f501,
-	0x88e7f105,
+	0x5121f500,
+	0x88e7f107,
 	0x7f21f413,
-	0xf50137f0,
-	0xf1056a21,
-	0xf41388e7,
-	0x00f87f21,
-/* 0x0670: i2c_bitw */
-	0x056a21f5,
+/* 0x0844: i2c_start_out */
+/* 0x0846: i2c_stop */
+	0x37f000f8,
+	0x5121f500,
+	0x0037f007,
+	0x077321f5,
 	0x03e8e7f1,
-	0xbb7f21f4,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x05bc21f5,
-	0xf40464b6,
-	0xe7f11811,
+	0xf07f21f4,
+	0x21f50137,
+	0xe7f10751,
 	0x21f41388,
-	0x0037f07f,
-	0x054821f5,
+	0x0137f07f,
+	0x077321f5,
 	0x1388e7f1,
-/* 0x06af: i2c_bitw_out */
 	0xf87f21f4,
-/* 0x06b1: i2c_bitr */
-	0x0137f000,
-	0x056a21f5,
-	0x03e8e7f1,
-	0xbb7f21f4,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x05bc21f5,
-	0xf40464b6,
-	0x21f51b11,
-	0x37f005a4,
-	0x4821f500,
-	0x88e7f105,
+/* 0x0879: i2c_bitw */
+	0x7321f500,
+	0xe8e7f107,
+	0x7f21f403,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0xc521f550,
+	0x0464b607,
+	0xf11811f4,
+	0xf41388e7,
+	0x37f07f21,
+	0x5121f500,
+	0x88e7f107,
 	0x7f21f413,
-	0xf4013cf0,
-/* 0x06f6: i2c_bitr_done */
-	0x00f80131,
-/* 0x06f8: i2c_get_byte */
-	0xf00057f0,
-/* 0x06fe: i2c_get_byte_next */
-	0x54b60847,
+/* 0x08b8: i2c_bitw_out */
+/* 0x08ba: i2c_bitr */
+	0x37f000f8,
+	0x7321f501,
+	0xe8e7f107,
+	0x7f21f403,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0xc521f550,
+	0x0464b607,
+	0xf51b11f4,
+	0xf007ad21,
+	0x21f50037,
+	0xe7f10751,
+	0x21f41388,
+	0x013cf07f,
+/* 0x08ff: i2c_bitr_done */
+	0xf80131f4,
+/* 0x0901: i2c_get_byte */
+	0x0057f000,
+/* 0x0907: i2c_get_byte_next */
+	0xb60847f0,
+	0x76bb0154,
+	0x0465b600,
+	0x659450f9,
+	0x0256bb04,
+	0x75fd50bd,
+	0xf550fc04,
+	0xb608ba21,
+	0x11f40464,
+	0x0553fd2b,
+	0xf40142b6,
+	0x37f0d81b,
 	0x0076bb01,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b606b1,
-	0x2b11f404,
-	0xb60553fd,
-	0x1bf40142,
-	0x0137f0d8,
-	0xb60076bb,
-	0x50f90465,
-	0xbb046594,
-	0x50bd0256,
-	0xfc0475fd,
-	0x7021f550,
-	0x0464b606,
-/* 0x0748: i2c_get_byte_done */
-/* 0x074a: i2c_put_byte */
-	0x47f000f8,
-/* 0x074d: i2c_put_byte_next */
-	0x0142b608,
-	0xbb3854ff,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x067021f5,
-	0xf40464b6,
-	0x46b03411,
-	0xd81bf400,
+	0x64b60879,
+/* 0x0951: i2c_get_byte_done */
+/* 0x0953: i2c_put_byte */
+	0xf000f804,
+/* 0x0956: i2c_put_byte_next */
+	0x42b60847,
+	0x3854ff01,
 	0xb60076bb,
 	0x50f90465,
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0xb121f550,
-	0x0464b606,
-	0xbb0f11f4,
-	0x36b00076,
-	0x061bf401,
-/* 0x07a3: i2c_put_byte_done */
-	0xf80132f4,
-/* 0x07a5: i2c_addr */
-	0x0076bb00,
+	0x7921f550,
+	0x0464b608,
+	0xb03411f4,
+	0x1bf40046,
+	0x0076bbd8,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b605e1,
-	0x2911f404,
-	0x012ec3e7,
-	0xfd0134b6,
-	0x76bb0553,
+	0x64b608ba,
+	0x0f11f404,
+	0xb00076bb,
+	0x1bf40136,
+	0x0132f406,
+/* 0x09ac: i2c_put_byte_done */
+/* 0x09ae: i2c_addr */
+	0x76bb00f8,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0xf550fc04,
-	0xb6074a21,
-/* 0x07ea: i2c_addr_done */
-	0x00f80464,
-/* 0x07ec: i2c_acquire_addr */
-	0xb6f8cec7,
-	0xe0b702e4,
-	0xee980bfc,
-/* 0x07fb: i2c_acquire */
-	0xf500f800,
-	0xf407ec21,
-	0xd9f00421,
-	0x3f21f403,
-/* 0x080a: i2c_release */
-	0x21f500f8,
-	0x21f407ec,
-	0x03daf004,
-	0xf83f21f4,
-/* 0x0819: i2c_recv */
-	0x0132f400,
-	0xb6f8c1c7,
-	0x16b00214,
-	0x3a1ff528,
-	0xd413a001,
-	0x0032980b,
-	0x0bac13a0,
-	0xf4003198,
-	0xd0f90231,
-	0xd0f9e0f9,
-	0x000067f1,
-	0x100063f1,
-	0xbb016792,
+	0xb607ea21,
+	0x11f40464,
+	0x2ec3e729,
+	0x0134b601,
+	0xbb0553fd,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x07fb21f5,
-	0xfc0464b6,
-	0x00d6b0d0,
-	0x00b31bf5,
-	0xbb0057f0,
+	0x095321f5,
+/* 0x09f3: i2c_addr_done */
+	0xf80464b6,
+/* 0x09f5: i2c_acquire_addr */
+	0xf8cec700,
+	0xb702e4b6,
+	0x980c10e0,
+	0x00f800ee,
+/* 0x0a04: i2c_acquire */
+	0x09f521f5,
+	0xf00421f4,
+	0x21f403d9,
+/* 0x0a13: i2c_release */
+	0xf500f83f,
+	0xf409f521,
+	0xdaf00421,
+	0x3f21f403,
+/* 0x0a22: i2c_recv */
+	0x32f400f8,
+	0xf8c1c701,
+	0xb00214b6,
+	0x1ff52816,
+	0x13a0013a,
+	0x32980be8,
+	0xc013a000,
+	0x0031980b,
+	0xf90231f4,
+	0xf9e0f9d0,
+	0x0067f1d0,
+	0x0063f100,
+	0x01679210,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0x0421f550,
+	0x0464b60a,
+	0xd6b0d0fc,
+	0xb31bf500,
+	0x0057f000,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0xae21f550,
+	0x0464b609,
+	0x00d011f5,
+	0xbbe0c5c7,
 	0x65b60076,
 	0x9450f904,
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x07a521f5,
+	0x095321f5,
 	0xf50464b6,
-	0xc700d011,
-	0x76bbe0c5,
+	0xf000ad11,
+	0x76bb0157,
 	0x0465b600,
 	0x659450f9,
 	0x0256bb04,
 	0x75fd50bd,
 	0xf550fc04,
-	0xb6074a21,
+	0xb609ae21,
 	0x11f50464,
-	0x57f000ad,
-	0x0076bb01,
-	0xf90465b6,
-	0x04659450,
-	0xbd0256bb,
-	0x0475fd50,
-	0x21f550fc,
-	0x64b607a5,
-	0x8a11f504,
-	0x0076bb00,
-	0xf90465b6,
-	0x04659450,
-	0xbd0256bb,
-	0x0475fd50,
-	0x21f550fc,
-	0x64b606f8,
-	0x6a11f404,
-	0xbbe05bcb,
-	0x65b60076,
-	0x9450f904,
-	0x56bb0465,
-	0xfd50bd02,
-	0x50fc0475,
-	0x063d21f5,
-	0xb90464b6,
-	0x74bd025b,
-/* 0x091f: i2c_recv_not_rd08 */
-	0xb0430ef4,
-	0x1bf401d6,
-	0x0057f03d,
-	0x07a521f5,
-	0xc73311f4,
-	0x21f5e0c5,
-	0x11f4074a,
-	0x0057f029,
-	0x07a521f5,
-	0xc71f11f4,
-	0x21f5e0b5,
-	0x11f4074a,
-	0x3d21f515,
-	0xc774bd06,
-	0x1bf408c5,
-	0x0232f409,
-/* 0x095f: i2c_recv_not_wr08 */
-/* 0x095f: i2c_recv_done */
-	0xc7030ef4,
-	0x21f5f8ce,
-	0xe0fc080a,
-	0x12f4d0fc,
-	0x027cb90a,
-	0x02b921f5,
-/* 0x0974: i2c_recv_exit */
-/* 0x0976: i2c_init */
+	0x76bb008a,
+	0x0465b600,
+	0x659450f9,
+	0x0256bb04,
+	0x75fd50bd,
+	0xf550fc04,
+	0xb6090121,
+	0x11f40464,
+	0xe05bcb6a,
+	0xb60076bb,
+	0x50f90465,
+	0xbb046594,
+	0x50bd0256,
+	0xfc0475fd,
+	0x4621f550,
+	0x0464b608,
+	0xbd025bb9,
+	0x430ef474,
+/* 0x0b28: i2c_recv_not_rd08 */
+	0xf401d6b0,
+	0x57f03d1b,
+	0xae21f500,
+	0x3311f409,
+	0xf5e0c5c7,
+	0xf4095321,
+	0x57f02911,
+	0xae21f500,
+	0x1f11f409,
+	0xf5e0b5c7,
+	0xf4095321,
+	0x21f51511,
+	0x74bd0846,
+	0xf408c5c7,
+	0x32f4091b,
+	0x030ef402,
+/* 0x0b68: i2c_recv_not_wr08 */
+/* 0x0b68: i2c_recv_done */
+	0xf5f8cec7,
+	0xfc0a1321,
+	0xf4d0fce0,
+	0x7cb90a12,
+	0x4221f502,
+/* 0x0b7d: i2c_recv_exit */
+/* 0x0b7f: i2c_init */
+	0xf800f803,
+/* 0x0b81: test_recv */
+	0xd817f100,
+	0x0614b605,
+	0xb60011cf,
+	0x07f10110,
+	0x04b605d8,
+	0x0001d006,
+	0xe7f104bd,
+	0xe3f1d900,
+	0x21f5134f,
+	0x00f80262,
+/* 0x0ba8: test_init */
+	0x0800e7f1,
+	0x026221f5,
+/* 0x0bb2: idle_recv */
 	0x00f800f8,
-/* 0x0978: test_recv */
-	0x05d817f1,
-	0xcf0614b6,
-	0x10b60011,
-	0xd807f101,
-	0x0604b605,
-	0xbd0001d0,
-	0x00e7f104,
-	0x4fe3f1d9,
-	0xf521f513,
-/* 0x099f: test_init */
-	0xf100f801,
-	0xf50800e7,
-	0xf801f521,
-/* 0x09a9: idle_recv */
-/* 0x09ab: idle */
-	0xf400f800,
-	0x17f10031,
-	0x14b605d4,
-	0x0011cf06,
-	0xf10110b6,
-	0xb605d407,
-	0x01d00604,
-/* 0x09c7: idle_loop */
-	0xf004bd00,
-	0x32f45817,
-/* 0x09cd: idle_proc */
-/* 0x09cd: idle_proc_exec */
-	0xb910f902,
-	0x21f5021e,
-	0x10fc02c2,
-	0xf40911f4,
-	0x0ef40231,
-/* 0x09e1: idle_proc_next */
-	0x5810b6ef,
-	0xf4061fb8,
-	0x02f4e61b,
-	0x0028f4dd,
-	0x00bb0ef4,
-	0x00000000,
-	0x00000000,
+/* 0x0bb4: idle */
+	0xf10031f4,
+	0xb605d417,
+	0x11cf0614,
+	0x0110b600,
+	0x05d407f1,
+	0xd00604b6,
+	0x04bd0001,
+/* 0x0bd0: idle_loop */
+	0xf45817f0,
+/* 0x0bd6: idle_proc */
+/* 0x0bd6: idle_proc_exec */
+	0x10f90232,
+	0xf5021eb9,
+	0xfc034b21,
+	0x0911f410,
+	0xf40231f4,
+/* 0x0bea: idle_proc_next */
+	0x10b6ef0e,
+	0x061fb858,
+	0xf4e61bf4,
+	0x28f4dd02,
+	0xbb0ef400,
 	0x00000000,
 };
diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nvd0.fuc b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nvd0.fuc
index 8a89dfe41ce1..b85443261569 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nvd0.fuc
+++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nvd0.fuc
@@ -23,6 +23,7 @@
  */
 
 #define NVKM_PPWR_CHIPSET GF119
+#define HW_TICKS_PER_US 324
 
 //#define NVKM_FALCON_PC24
 #define NVKM_FALCON_UNSHIFTED_IO
@@ -34,6 +35,7 @@
 .section #nvd0_pwr_data
 #define INCLUDE_PROC
 #include "kernel.fuc"
+#include "arith.fuc"
 #include "host.fuc"
 #include "memx.fuc"
 #include "perf.fuc"
@@ -44,6 +46,7 @@
 
 #define INCLUDE_DATA
 #include "kernel.fuc"
+#include "arith.fuc"
 #include "host.fuc"
 #include "memx.fuc"
 #include "perf.fuc"
@@ -56,6 +59,7 @@
 .section #nvd0_pwr_code
 #define INCLUDE_CODE
 #include "kernel.fuc"
+#include "arith.fuc"
 #include "host.fuc"
 #include "memx.fuc"
 #include "perf.fuc"
diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nvd0.fuc.h b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nvd0.fuc.h
index 8d369b3faaba..12d86f72ad10 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nvd0.fuc.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/nvd0.fuc.h
@@ -24,8 +24,8 @@ uint32_t nvd0_pwr_data[] = {
 	0x00000000,
 /* 0x0058: proc_list_head */
 	0x54534f48,
-	0x000003be,
-	0x00000367,
+	0x0000049d,
+	0x00000446,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -46,8 +46,8 @@ uint32_t nvd0_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x584d454d,
-	0x000004b8,
-	0x000004aa,
+	0x00000678,
+	0x0000066a,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -68,8 +68,8 @@ uint32_t nvd0_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x46524550,
-	0x000004bc,
-	0x000004ba,
+	0x0000067c,
+	0x0000067a,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -90,8 +90,8 @@ uint32_t nvd0_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x5f433249,
-	0x000008d7,
-	0x0000077a,
+	0x00000a97,
+	0x0000093a,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -112,8 +112,8 @@ uint32_t nvd0_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x54534554,
-	0x000008fa,
-	0x000008d9,
+	0x00000aba,
+	0x00000a99,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -134,8 +134,8 @@ uint32_t nvd0_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x454c4449,
-	0x00000906,
-	0x00000904,
+	0x00000ac6,
+	0x00000ac4,
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -227,24 +227,31 @@ uint32_t nvd0_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 /* 0x0370: memx_func_head */
-	0x00010000,
-	0x00000000,
-	0x000003f4,
-/* 0x037c: memx_func_next */
 	0x00000001,
 	0x00000000,
-	0x00000415,
+	0x000004d3,
+/* 0x037c: memx_func_next */
 	0x00000002,
+	0x00000000,
+	0x00000554,
+	0x00000003,
 	0x00000002,
-	0x00000430,
-	0x00040003,
+	0x000005d8,
+	0x00040004,
+	0x00000000,
+	0x000005f4,
+	0x00010005,
+	0x00000000,
+	0x0000060e,
+	0x00010006,
+	0x00000000,
+	0x000005d3,
+/* 0x03b8: memx_func_tail */
+/* 0x03b8: memx_ts_start */
 	0x00000000,
-	0x0000044c,
-	0x00010004,
+/* 0x03bc: memx_ts_end */
 	0x00000000,
-	0x00000466,
-/* 0x03ac: memx_func_tail */
-/* 0x03ac: memx_data_head */
+/* 0x03c0: memx_data_head */
 	0x00000000,
 	0x00000000,
 	0x00000000,
@@ -757,8 +764,8 @@ uint32_t nvd0_pwr_data[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-/* 0x0bac: memx_data_tail */
-/* 0x0bac: i2c_scl_map */
+/* 0x0bc0: memx_data_tail */
+/* 0x0bc0: i2c_scl_map */
 	0x00000400,
 	0x00000800,
 	0x00001000,
@@ -769,7 +776,7 @@ uint32_t nvd0_pwr_data[] = {
 	0x00020000,
 	0x00040000,
 	0x00080000,
-/* 0x0bd4: i2c_sda_map */
+/* 0x0be8: i2c_sda_map */
 	0x00100000,
 	0x00200000,
 	0x00400000,
@@ -781,10 +788,69 @@ uint32_t nvd0_pwr_data[] = {
 	0x10000000,
 	0x20000000,
 	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
 };
 
 uint32_t nvd0_pwr_code[] = {
-	0x02bf0ef5,
+	0x034d0ef5,
 /* 0x0004: rd32 */
 	0x07a007f1,
 	0xbd000ed0,
@@ -814,17 +880,20 @@ uint32_t nvd0_pwr_code[] = {
 	0xd4f100dd,
 	0x1bf47000,
 /* 0x0067: nsec */
-	0xf000f8f5,
+	0xf900f8f5,
+	0xf080f990,
 	0x88cf2c87,
-/* 0x006d: nsec_loop */
+/* 0x0071: nsec_loop */
 	0x2c97f000,
 	0xbb0099cf,
 	0x9eb80298,
 	0xf41ef406,
-/* 0x007e: wait */
-	0x87f000f8,
+	0x90fc80fc,
+/* 0x0086: wait */
+	0x90f900f8,
+	0x87f080f9,
 	0x0088cf2c,
-/* 0x0084: wait_loop */
+/* 0x0090: wait_loop */
 	0xf402eeb9,
 	0xdab90421,
 	0x04adfd02,
@@ -833,28 +902,29 @@ uint32_t nvd0_pwr_code[] = {
 	0x0099cf2c,
 	0xb80298bb,
 	0x1ef4069b,
-/* 0x00a5: wait_done */
-/* 0x00a7: intr_watchdog */
-	0x9800f8e2,
+/* 0x00b1: wait_done */
+	0xfc80fce2,
+/* 0x00b7: intr_watchdog */
+	0x9800f890,
 	0x96b003e9,
 	0x2a0bf400,
 	0xbb9a0a98,
 	0x1cf4029a,
 	0x01d7f00f,
-	0x020621f5,
+	0x028c21f5,
 	0x0ef494bd,
-/* 0x00c5: intr_watchdog_next_time */
+/* 0x00d5: intr_watchdog_next_time */
 	0x9b0a9815,
 	0xf400a6b0,
 	0x9ab8090b,
 	0x061cf406,
-/* 0x00d4: intr_watchdog_next_time_set */
-/* 0x00d7: intr_watchdog_next_proc */
+/* 0x00e4: intr_watchdog_next_time_set */
+/* 0x00e7: intr_watchdog_next_proc */
 	0x809b0980,
 	0xe0b603e9,
 	0x68e6b158,
 	0xc61bf402,
-/* 0x00e6: intr */
+/* 0x00f6: intr */
 	0x00f900f8,
 	0x80f904bd,
 	0xa0f990f9,
@@ -872,12 +942,12 @@ uint32_t nvd0_pwr_code[] = {
 	0x0bf40289,
 	0x9b008020,
 	0xf458e7f0,
-	0x0998a721,
+	0x0998b721,
 	0x0096b09b,
 	0xf00e0bf4,
 	0x09d03407,
 	0x8004bd00,
-/* 0x013e: intr_skip_watchdog */
+/* 0x014e: intr_skip_watchdog */
 	0x89e49a09,
 	0x0bf40800,
 	0x8897f13c,
@@ -889,20 +959,20 @@ uint32_t nvd0_pwr_code[] = {
 	0xf14f48e7,
 	0xf05453e3,
 	0x21f500d7,
-	0xc0fc026b,
+	0xc0fc02f1,
 	0x04c007f1,
 	0xbd000cd0,
-/* 0x0175: intr_subintr_skip_fifo */
+/* 0x0185: intr_subintr_skip_fifo */
 	0x8807f104,
 	0x0009d006,
-/* 0x017e: intr_skip_subintr */
+/* 0x018e: intr_skip_subintr */
 	0x89c404bd,
 	0x070bf420,
 	0xffbfa4f1,
-/* 0x0188: intr_skip_pause */
+/* 0x0198: intr_skip_pause */
 	0xf44089c4,
 	0xa4f1070b,
-/* 0x0192: intr_skip_user0 */
+/* 0x01a2: intr_skip_user0 */
 	0x07f0ffbf,
 	0x0008d004,
 	0x80fc04bd,
@@ -912,189 +982,298 @@ uint32_t nvd0_pwr_code[] = {
 	0xfca0fcb0,
 	0xfc80fc90,
 	0x0032f400,
-/* 0x01b6: timer */
-	0x32f401f8,
-	0x03f89810,
-	0xf40086b0,
-	0xfe80421c,
-	0x3807f003,
+/* 0x01c6: ticks_from_ns */
+	0xc0f901f8,
+	0xd7f1b0f9,
+	0xd3f00144,
+	0xb321f500,
+	0xe8ccec03,
+	0x00b4b003,
+	0xec120bf4,
+	0xf103e8ee,
+	0xf00144d7,
+	0x21f500d3,
+/* 0x01ee: ticks_from_ns_quit */
+	0xceb903b3,
+	0xfcb0fc02,
+/* 0x01f7: ticks_from_us */
+	0xf900f8c0,
+	0xf1b0f9c0,
+	0xf00144d7,
+	0x21f500d3,
+	0xceb903b3,
+	0x00b4b002,
+	0xbd050bf4,
+/* 0x0211: ticks_from_us_quit */
+	0xfcb0fce4,
+/* 0x0217: ticks_to_us */
+	0xf100f8c0,
+	0xf00144d7,
+	0xedff00d3,
+/* 0x0223: timer */
+	0xf900f8ec,
+	0xf480f990,
+	0xf8981032,
+	0x0086b003,
+	0xbd531cf4,
+	0x3807f084,
 	0xbd0008d0,
-	0x0887f004,
-	0xf00088cf,
-	0x1bf40284,
-	0x3487f020,
-	0xb80088cf,
-	0x0bf406e0,
-	0x06e8b809,
-/* 0x01eb: timer_reset */
-	0xf0191ef4,
-	0x0ed03407,
-	0x8004bd00,
-/* 0x01f6: timer_enable */
-	0x87f09a0e,
-	0x3807f001,
-	0xbd0008d0,
-/* 0x0201: timer_done */
-	0x1031f404,
-/* 0x0206: send_proc */
-	0x80f900f8,
-	0xe89890f9,
+	0x3487f004,
+	0x980088cf,
+	0x98bb9a09,
+	0x00e9bb02,
+	0xf003fe80,
+	0x88cf0887,
+	0x0284f000,
+	0xf0201bf4,
+	0x88cf3487,
+	0x06e0b800,
+	0xb8090bf4,
+	0x1cf406e8,
+/* 0x026d: timer_reset */
+	0x3407f00e,
+	0xbd000ed0,
+	0x9a0e8004,
+/* 0x0278: timer_enable */
+	0xf00187f0,
+	0x08d03807,
+/* 0x0283: timer_done */
+	0xf404bd00,
+	0x80fc1031,
+	0x00f890fc,
+/* 0x028c: send_proc */
+	0x90f980f9,
+	0x9805e898,
+	0x86f004e9,
+	0x0689b804,
+	0xc42a0bf4,
+	0x88940398,
+	0x1880b604,
+	0x98008ebb,
+	0x8a8000fa,
+	0x018d8000,
+	0x80028c80,
+	0x90b6038b,
+	0x0794f001,
+	0xf404e980,
+/* 0x02c6: send_done */
+	0x90fc0231,
+	0x00f880fc,
+/* 0x02cc: find */
+	0x87f080f9,
+	0x0131f458,
+/* 0x02d4: find_loop */
+	0xb8008a98,
+	0x0bf406ae,
+	0x5880b610,
+	0x026886b1,
+	0xf4f01bf4,
+/* 0x02ea: find_done */
+	0x8eb90132,
+	0xf880fc02,
+/* 0x02f1: send */
+	0xcc21f500,
+	0x9701f402,
+/* 0x02fa: recv */
+	0x90f900f8,
+	0xe89880f9,
 	0x04e99805,
-	0xb80486f0,
+	0xb80132f4,
 	0x0bf40689,
-	0x0398c42a,
-	0xb6048894,
-	0x8ebb1880,
-	0x00fa9800,
-	0x80008a80,
-	0x8c80018d,
-	0x038b8002,
-	0xf00190b6,
-	0xe9800794,
-	0x0231f404,
-/* 0x0240: send_done */
-	0x80fc90fc,
-/* 0x0246: find */
-	0x80f900f8,
-	0xf45887f0,
-/* 0x024e: find_loop */
-	0x8a980131,
-	0x06aeb800,
-	0xb6100bf4,
-	0x86b15880,
-	0x1bf40268,
-	0x0132f4f0,
-/* 0x0264: find_done */
-	0xfc028eb9,
-/* 0x026b: send */
-	0xf500f880,
-	0xf4024621,
-	0x00f89701,
-/* 0x0274: recv */
-	0x9805e898,
-	0x32f404e9,
-	0x0689b801,
-	0xc43d0bf4,
-	0x80b60389,
-	0x0784f001,
-	0x9805e880,
-	0xf0f902ea,
-	0xf9018ffe,
-	0x02efb9f0,
-	0xbb049994,
-	0xe0b600e9,
-	0x03eb9818,
-	0x9802ec98,
-	0xee9801ed,
-	0xfca5f900,
-	0x00f8fef0,
-	0xfc0131f4,
-/* 0x02bd: recv_done */
-/* 0x02bf: init */
-	0xf100f8f0,
-	0xcf010817,
-	0x11e70011,
-	0x14b60109,
-	0x0014fe08,
-	0x00e017f1,
-	0xf00013f0,
-	0x01d01c07,
-	0xf004bd00,
-	0x07f0ff17,
-	0x0001d014,
-	0x17f004bd,
-	0x0015f102,
-	0x1007f008,
-	0xbd0001d0,
-	0xe617f104,
-	0x0013f000,
-	0xf40010fe,
-	0x17f01031,
-	0x3807f001,
-	0xbd0001d0,
-	0x58f7f004,
-/* 0x0314: init_proc */
-	0xb001f198,
-	0x0bf40016,
-	0xb615f9fa,
-	0x0ef458f0,
-/* 0x0325: host_send */
-	0xb017f1f2,
-	0x0011cf04,
-	0x04a027f1,
-	0xb80022cf,
-	0x0bf40612,
-	0x071ec42f,
-	0xb704ee94,
-	0x980270e0,
+	0x0389c43d,
+	0xf00180b6,
+	0xe8800784,
+	0x02ea9805,
+	0x8ffef0f9,
+	0xb9f0f901,
+	0x999402ef,
+	0x00e9bb04,
+	0x9818e0b6,
 	0xec9803eb,
 	0x01ed9802,
-	0xf500ee98,
-	0xb6026b21,
-	0x1ec40110,
-	0xb007f10f,
-	0x000ed004,
-	0x0ef404bd,
-/* 0x0365: host_send_done */
-/* 0x0367: host_recv */
-	0xf100f8c3,
-	0xf14e4917,
-	0xb8525413,
-	0x0bf406e1,
-/* 0x0375: host_recv_wait */
-	0xcc17f1b3,
-	0x0011cf04,
-	0x04c827f1,
-	0xf00022cf,
-	0x12b80816,
-	0xec0bf406,
-	0xb60723c4,
-	0x30b70434,
-	0x3b8002f0,
-	0x023c8003,
-	0x80013d80,
-	0x20b6003e,
-	0x0f24f001,
-	0x04c807f1,
-	0xbd0002d0,
-	0x4027f004,
-	0xd00007f0,
-	0x04bd0002,
-/* 0x03be: host_init */
+	0xf900ee98,
+	0xfef0fca5,
+	0x31f400f8,
+/* 0x0347: recv_done */
+	0xfcf0fc01,
+	0xf890fc80,
+/* 0x034d: init */
+	0x0817f100,
+	0x0011cf01,
+	0x010911e7,
+	0xfe0814b6,
+	0x17f10014,
+	0x13f000e0,
+	0x1c07f000,
+	0xbd0001d0,
+	0xff17f004,
+	0xd01407f0,
+	0x04bd0001,
+	0xf10217f0,
+	0xf0080015,
+	0x01d01007,
+	0xf104bd00,
+	0xf000f617,
+	0x10fe0013,
+	0x1031f400,
+	0xf00117f0,
+	0x01d03807,
+	0xf004bd00,
+/* 0x03a2: init_proc */
+	0xf19858f7,
+	0x0016b001,
+	0xf9fa0bf4,
+	0x58f0b615,
+/* 0x03b3: mulu32_32_64 */
+	0xf9f20ef4,
+	0xf920f910,
+	0x9540f930,
+	0xd29510e1,
+	0xbdc4bd10,
+	0xc0edffb4,
+	0xb9301dff,
+	0x34f10234,
+	0x34b6ffff,
+	0x1045b610,
+	0xbb00c3bb,
+	0xe2ff01b4,
+	0x0234b930,
+	0xffff34f1,
+	0xb61034b6,
+	0xc3bb1045,
+	0x01b4bb00,
+	0xbb3012ff,
+	0x40fc00b3,
+	0x20fc30fc,
+	0x00f810fc,
+/* 0x0404: host_send */
+	0x04b017f1,
+	0xf10011cf,
+	0xcf04a027,
+	0x12b80022,
+	0x2f0bf406,
+	0x94071ec4,
+	0xe0b704ee,
+	0xeb980270,
+	0x02ec9803,
+	0x9801ed98,
+	0x21f500ee,
+	0x10b602f1,
+	0x0f1ec401,
+	0x04b007f1,
+	0xbd000ed0,
+	0xc30ef404,
+/* 0x0444: host_send_done */
+/* 0x0446: host_recv */
 	0x17f100f8,
-	0x14b60080,
-	0x7015f110,
-	0xd007f102,
-	0x0001d004,
-	0x17f104bd,
-	0x14b60080,
-	0xf015f110,
-	0xdc07f102,
-	0x0001d004,
-	0x17f004bd,
-	0xc407f101,
-	0x0001d004,
-	0x00f804bd,
-/* 0x03f4: memx_func_enter */
+	0x13f14e49,
+	0xe1b85254,
+	0xb30bf406,
+/* 0x0454: host_recv_wait */
+	0x04cc17f1,
+	0xf10011cf,
+	0xcf04c827,
+	0x16f00022,
+	0x0612b808,
+	0xc4ec0bf4,
+	0x34b60723,
+	0xf030b704,
+	0x033b8002,
+	0x80023c80,
+	0x3e80013d,
+	0x0120b600,
+	0xf10f24f0,
+	0xd004c807,
+	0x04bd0002,
+	0xf04027f0,
+	0x02d00007,
+	0xf804bd00,
+/* 0x049d: host_init */
+	0x8017f100,
+	0x1014b600,
+	0x027015f1,
+	0x04d007f1,
+	0xbd0001d0,
+	0x8017f104,
+	0x1014b600,
+	0x02f015f1,
+	0x04dc07f1,
+	0xbd0001d0,
+	0x0117f004,
+	0x04c407f1,
+	0xbd0001d0,
+/* 0x04d3: memx_func_enter */
+	0xf100f804,
+	0xf1162067,
+	0xf1f55d77,
+	0xb9ffff73,
+	0x21f4026e,
+	0x02d8b904,
+	0xf90487fd,
+	0xfc80f960,
+	0xf4e0fcd0,
+	0x77f13321,
+	0x73f1fffe,
+	0x6eb9ffff,
+	0x0421f402,
+	0xfd02d8b9,
+	0x60f90487,
+	0xd0fc80f9,
+	0x21f4e0fc,
+	0xf067f133,
+	0x026eb926,
+	0xb90421f4,
+	0x87fd02d8,
+	0xf960f904,
+	0xfcd0fc80,
+	0x3321f4e0,
 	0xf10467f0,
 	0xd007e007,
 	0x04bd0006,
-/* 0x0400: memx_func_enter_wait */
+/* 0x053c: memx_func_enter_wait */
 	0x07c067f1,
 	0xf00066cf,
 	0x0bf40464,
-	0x001698f6,
-	0xf80410b6,
-/* 0x0415: memx_func_leave */
-	0x0467f000,
+	0x2c67f0f6,
+	0x800066cf,
+	0x00f8ee06,
+/* 0x0554: memx_func_leave */
+	0xcf2c67f0,
+	0x06800066,
+	0x0467f0ef,
 	0x07e407f1,
 	0xbd0006d0,
-/* 0x0421: memx_func_leave_wait */
+/* 0x0569: memx_func_leave_wait */
 	0xc067f104,
 	0x0066cf07,
 	0xf40464f0,
-	0x00f8f61b,
-/* 0x0430: memx_func_wr32 */
+	0x67f1f61b,
+	0x77f126f0,
+	0x73f00001,
+	0x026eb900,
+	0xb90421f4,
+	0x87fd02d8,
+	0xf960f905,
+	0xfcd0fc80,
+	0x3321f4e0,
+	0x162067f1,
+	0xf4026eb9,
+	0xd8b90421,
+	0x0587fd02,
+	0x80f960f9,
+	0xe0fcd0fc,
+	0xf13321f4,
+	0xf00aa277,
+	0x6eb90073,
+	0x0421f402,
+	0xfd02d8b9,
+	0x60f90587,
+	0xd0fc80f9,
+	0x21f4e0fc,
+/* 0x05d3: memx_func_wait_vblank */
+	0xb600f833,
+	0x00f80410,
+/* 0x05d8: memx_func_wr32 */
 	0x98001698,
 	0x10b60115,
 	0xf960f908,
@@ -1102,131 +1281,137 @@ uint32_t nvd0_pwr_code[] = {
 	0x3321f4e0,
 	0xf40242b6,
 	0x00f8e91b,
-/* 0x044c: memx_func_wait */
+/* 0x05f4: memx_func_wait */
 	0xcf2c87f0,
 	0x1e980088,
 	0x011d9800,
 	0x98021c98,
 	0x10b6031b,
-	0x7e21f410,
-/* 0x0466: memx_func_delay */
+	0x8621f410,
+/* 0x060e: memx_func_delay */
 	0x1e9800f8,
 	0x0410b600,
 	0xf86721f4,
-/* 0x0471: memx_exec */
+/* 0x0619: memx_exec */
 	0xf9e0f900,
 	0x02c1b9d0,
-/* 0x047b: memx_exec_next */
+/* 0x0623: memx_exec_next */
 	0x9802b2b9,
 	0x10b60013,
-	0x10349504,
+	0xf034e704,
+	0xe033e701,
+	0x0132b601,
 	0x980c30f0,
 	0x55f9de35,
 	0xf40612b8,
-	0xd0fcec1e,
+	0x0b98e41e,
+	0xef0c98ee,
+	0xf102cbbb,
+	0xcf07c4b7,
+	0xd0fc00bb,
 	0x21f5e0fc,
-	0x00f8026b,
-/* 0x049c: memx_info */
-	0x03acc7f1,
+	0x00f802f1,
+/* 0x065c: memx_info */
+	0x03c0c7f1,
 	0x0800b7f1,
-	0x026b21f5,
-/* 0x04aa: memx_recv */
+	0x02f121f5,
+/* 0x066a: memx_recv */
 	0xd6b000f8,
-	0xc40bf401,
+	0xac0bf401,
 	0xf400d6b0,
 	0x00f8e90b,
-/* 0x04b8: memx_init */
-/* 0x04ba: perf_recv */
+/* 0x0678: memx_init */
+/* 0x067a: perf_recv */
 	0x00f800f8,
-/* 0x04bc: perf_init */
-/* 0x04be: i2c_drive_scl */
+/* 0x067c: perf_init */
+/* 0x067e: i2c_drive_scl */
 	0x36b000f8,
 	0x0e0bf400,
 	0x07e007f1,
 	0xbd0001d0,
-/* 0x04cf: i2c_drive_scl_lo */
+/* 0x068f: i2c_drive_scl_lo */
 	0xf100f804,
 	0xd007e407,
 	0x04bd0001,
-/* 0x04da: i2c_drive_sda */
+/* 0x069a: i2c_drive_sda */
 	0x36b000f8,
 	0x0e0bf400,
 	0x07e007f1,
 	0xbd0002d0,
-/* 0x04eb: i2c_drive_sda_lo */
+/* 0x06ab: i2c_drive_sda_lo */
 	0xf100f804,
 	0xd007e407,
 	0x04bd0002,
-/* 0x04f6: i2c_sense_scl */
+/* 0x06b6: i2c_sense_scl */
 	0x32f400f8,
 	0xc437f101,
 	0x0033cf07,
 	0xf40431fd,
 	0x31f4060b,
-/* 0x0509: i2c_sense_scl_done */
-/* 0x050b: i2c_sense_sda */
+/* 0x06c9: i2c_sense_scl_done */
+/* 0x06cb: i2c_sense_sda */
 	0xf400f801,
 	0x37f10132,
 	0x33cf07c4,
 	0x0432fd00,
 	0xf4060bf4,
-/* 0x051e: i2c_sense_sda_done */
+/* 0x06de: i2c_sense_sda_done */
 	0x00f80131,
-/* 0x0520: i2c_raise_scl */
+/* 0x06e0: i2c_raise_scl */
 	0x47f140f9,
 	0x37f00898,
-	0xbe21f501,
-/* 0x052d: i2c_raise_scl_wait */
-	0xe8e7f104,
+	0x7e21f501,
+/* 0x06ed: i2c_raise_scl_wait */
+	0xe8e7f106,
 	0x6721f403,
-	0x04f621f5,
+	0x06b621f5,
 	0xb60901f4,
 	0x1bf40142,
-/* 0x0541: i2c_raise_scl_done */
+/* 0x0701: i2c_raise_scl_done */
 	0xf840fcef,
-/* 0x0545: i2c_start */
-	0xf621f500,
-	0x0d11f404,
-	0x050b21f5,
+/* 0x0705: i2c_start */
+	0xb621f500,
+	0x0d11f406,
+	0x06cb21f5,
 	0xf40611f4,
-/* 0x0556: i2c_start_rep */
+/* 0x0716: i2c_start_rep */
 	0x37f0300e,
-	0xbe21f500,
-	0x0137f004,
-	0x04da21f5,
+	0x7e21f500,
+	0x0137f006,
+	0x069a21f5,
 	0xb60076bb,
 	0x50f90465,
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0x2021f550,
-	0x0464b605,
-/* 0x0583: i2c_start_send */
+	0xe021f550,
+	0x0464b606,
+/* 0x0743: i2c_start_send */
 	0xf01f11f4,
 	0x21f50037,
-	0xe7f104da,
+	0xe7f1069a,
 	0x21f41388,
 	0x0037f067,
-	0x04be21f5,
+	0x067e21f5,
 	0x1388e7f1,
-/* 0x059f: i2c_start_out */
+/* 0x075f: i2c_start_out */
 	0xf86721f4,
-/* 0x05a1: i2c_stop */
+/* 0x0761: i2c_stop */
 	0x0037f000,
-	0x04be21f5,
+	0x067e21f5,
 	0xf50037f0,
-	0xf104da21,
+	0xf1069a21,
 	0xf403e8e7,
 	0x37f06721,
-	0xbe21f501,
-	0x88e7f104,
+	0x7e21f501,
+	0x88e7f106,
 	0x6721f413,
 	0xf50137f0,
-	0xf104da21,
+	0xf1069a21,
 	0xf41388e7,
 	0x00f86721,
-/* 0x05d4: i2c_bitw */
-	0x04da21f5,
+/* 0x0794: i2c_bitw */
+	0x069a21f5,
 	0x03e8e7f1,
 	0xbb6721f4,
 	0x65b60076,
@@ -1234,18 +1419,18 @@ uint32_t nvd0_pwr_code[] = {
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x052021f5,
+	0x06e021f5,
 	0xf40464b6,
 	0xe7f11811,
 	0x21f41388,
 	0x0037f067,
-	0x04be21f5,
+	0x067e21f5,
 	0x1388e7f1,
-/* 0x0613: i2c_bitw_out */
+/* 0x07d3: i2c_bitw_out */
 	0xf86721f4,
-/* 0x0615: i2c_bitr */
+/* 0x07d5: i2c_bitr */
 	0x0137f000,
-	0x04da21f5,
+	0x069a21f5,
 	0x03e8e7f1,
 	0xbb6721f4,
 	0x65b60076,
@@ -1253,19 +1438,19 @@ uint32_t nvd0_pwr_code[] = {
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x052021f5,
+	0x06e021f5,
 	0xf40464b6,
 	0x21f51b11,
-	0x37f0050b,
-	0xbe21f500,
-	0x88e7f104,
+	0x37f006cb,
+	0x7e21f500,
+	0x88e7f106,
 	0x6721f413,
 	0xf4013cf0,
-/* 0x065a: i2c_bitr_done */
+/* 0x081a: i2c_bitr_done */
 	0x00f80131,
-/* 0x065c: i2c_get_byte */
+/* 0x081c: i2c_get_byte */
 	0xf00057f0,
-/* 0x0662: i2c_get_byte_next */
+/* 0x0822: i2c_get_byte_next */
 	0x54b60847,
 	0x0076bb01,
 	0xf90465b6,
@@ -1273,7 +1458,7 @@ uint32_t nvd0_pwr_code[] = {
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b60615,
+	0x64b607d5,
 	0x2b11f404,
 	0xb60553fd,
 	0x1bf40142,
@@ -1283,12 +1468,12 @@ uint32_t nvd0_pwr_code[] = {
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0xd421f550,
-	0x0464b605,
-/* 0x06ac: i2c_get_byte_done */
-/* 0x06ae: i2c_put_byte */
+	0x9421f550,
+	0x0464b607,
+/* 0x086c: i2c_get_byte_done */
+/* 0x086e: i2c_put_byte */
 	0x47f000f8,
-/* 0x06b1: i2c_put_byte_next */
+/* 0x0871: i2c_put_byte_next */
 	0x0142b608,
 	0xbb3854ff,
 	0x65b60076,
@@ -1296,7 +1481,7 @@ uint32_t nvd0_pwr_code[] = {
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x05d421f5,
+	0x079421f5,
 	0xf40464b6,
 	0x46b03411,
 	0xd81bf400,
@@ -1305,21 +1490,21 @@ uint32_t nvd0_pwr_code[] = {
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0x1521f550,
-	0x0464b606,
+	0xd521f550,
+	0x0464b607,
 	0xbb0f11f4,
 	0x36b00076,
 	0x061bf401,
-/* 0x0707: i2c_put_byte_done */
+/* 0x08c7: i2c_put_byte_done */
 	0xf80132f4,
-/* 0x0709: i2c_addr */
+/* 0x08c9: i2c_addr */
 	0x0076bb00,
 	0xf90465b6,
 	0x04659450,
 	0xbd0256bb,
 	0x0475fd50,
 	0x21f550fc,
-	0x64b60545,
+	0x64b60705,
 	0x2911f404,
 	0x012ec3e7,
 	0xfd0134b6,
@@ -1329,30 +1514,30 @@ uint32_t nvd0_pwr_code[] = {
 	0x0256bb04,
 	0x75fd50bd,
 	0xf550fc04,
-	0xb606ae21,
-/* 0x074e: i2c_addr_done */
+	0xb6086e21,
+/* 0x090e: i2c_addr_done */
 	0x00f80464,
-/* 0x0750: i2c_acquire_addr */
+/* 0x0910: i2c_acquire_addr */
 	0xb6f8cec7,
 	0xe0b705e4,
 	0x00f8d014,
-/* 0x075c: i2c_acquire */
-	0x075021f5,
+/* 0x091c: i2c_acquire */
+	0x091021f5,
 	0xf00421f4,
 	0x21f403d9,
-/* 0x076b: i2c_release */
+/* 0x092b: i2c_release */
 	0xf500f833,
-	0xf4075021,
+	0xf4091021,
 	0xdaf00421,
 	0x3321f403,
-/* 0x077a: i2c_recv */
+/* 0x093a: i2c_recv */
 	0x32f400f8,
 	0xf8c1c701,
 	0xb00214b6,
 	0x1ff52816,
 	0x13a0013a,
-	0x32980bd4,
-	0xac13a000,
+	0x32980be8,
+	0xc013a000,
 	0x0031980b,
 	0xf90231f4,
 	0xf9e0f9d0,
@@ -1364,8 +1549,8 @@ uint32_t nvd0_pwr_code[] = {
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0x5c21f550,
-	0x0464b607,
+	0x1c21f550,
+	0x0464b609,
 	0xd6b0d0fc,
 	0xb31bf500,
 	0x0057f000,
@@ -1374,8 +1559,8 @@ uint32_t nvd0_pwr_code[] = {
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0x0921f550,
-	0x0464b607,
+	0xc921f550,
+	0x0464b608,
 	0x00d011f5,
 	0xbbe0c5c7,
 	0x65b60076,
@@ -1383,7 +1568,7 @@ uint32_t nvd0_pwr_code[] = {
 	0x56bb0465,
 	0xfd50bd02,
 	0x50fc0475,
-	0x06ae21f5,
+	0x086e21f5,
 	0xf50464b6,
 	0xf000ad11,
 	0x76bb0157,
@@ -1392,7 +1577,7 @@ uint32_t nvd0_pwr_code[] = {
 	0x0256bb04,
 	0x75fd50bd,
 	0xf550fc04,
-	0xb6070921,
+	0xb608c921,
 	0x11f50464,
 	0x76bb008a,
 	0x0465b600,
@@ -1400,7 +1585,7 @@ uint32_t nvd0_pwr_code[] = {
 	0x0256bb04,
 	0x75fd50bd,
 	0xf550fc04,
-	0xb6065c21,
+	0xb6081c21,
 	0x11f40464,
 	0xe05bcb6a,
 	0xb60076bb,
@@ -1408,38 +1593,38 @@ uint32_t nvd0_pwr_code[] = {
 	0xbb046594,
 	0x50bd0256,
 	0xfc0475fd,
-	0xa121f550,
-	0x0464b605,
+	0x6121f550,
+	0x0464b607,
 	0xbd025bb9,
 	0x430ef474,
-/* 0x0880: i2c_recv_not_rd08 */
+/* 0x0a40: i2c_recv_not_rd08 */
 	0xf401d6b0,
 	0x57f03d1b,
-	0x0921f500,
-	0x3311f407,
+	0xc921f500,
+	0x3311f408,
 	0xf5e0c5c7,
-	0xf406ae21,
+	0xf4086e21,
 	0x57f02911,
-	0x0921f500,
-	0x1f11f407,
+	0xc921f500,
+	0x1f11f408,
 	0xf5e0b5c7,
-	0xf406ae21,
+	0xf4086e21,
 	0x21f51511,
-	0x74bd05a1,
+	0x74bd0761,
 	0xf408c5c7,
 	0x32f4091b,
 	0x030ef402,
-/* 0x08c0: i2c_recv_not_wr08 */
-/* 0x08c0: i2c_recv_done */
+/* 0x0a80: i2c_recv_not_wr08 */
+/* 0x0a80: i2c_recv_done */
 	0xf5f8cec7,
-	0xfc076b21,
+	0xfc092b21,
 	0xf4d0fce0,
 	0x7cb90a12,
-	0x6b21f502,
-/* 0x08d5: i2c_recv_exit */
-/* 0x08d7: i2c_init */
+	0xf121f502,
+/* 0x0a95: i2c_recv_exit */
+/* 0x0a97: i2c_init */
 	0xf800f802,
-/* 0x08d9: test_recv */
+/* 0x0a99: test_recv */
 	0xd817f100,
 	0x0011cf05,
 	0xf10110b6,
@@ -1447,29 +1632,29 @@ uint32_t nvd0_pwr_code[] = {
 	0x04bd0001,
 	0xd900e7f1,
 	0x134fe3f1,
-	0x01b621f5,
-/* 0x08fa: test_init */
+	0x022321f5,
+/* 0x0aba: test_init */
 	0xe7f100f8,
 	0x21f50800,
-	0x00f801b6,
-/* 0x0904: idle_recv */
-/* 0x0906: idle */
+	0x00f80223,
+/* 0x0ac4: idle_recv */
+/* 0x0ac6: idle */
 	0x31f400f8,
 	0xd417f100,
 	0x0011cf05,
 	0xf10110b6,
 	0xd005d407,
 	0x04bd0001,
-/* 0x091c: idle_loop */
+/* 0x0adc: idle_loop */
 	0xf45817f0,
-/* 0x0922: idle_proc */
-/* 0x0922: idle_proc_exec */
+/* 0x0ae2: idle_proc */
+/* 0x0ae2: idle_proc_exec */
 	0x10f90232,
 	0xf5021eb9,
-	0xfc027421,
+	0xfc02fa21,
 	0x0911f410,
 	0xf40231f4,
-/* 0x0936: idle_proc_next */
+/* 0x0af6: idle_proc_next */
 	0x10b6ef0e,
 	0x061fb858,
 	0xf4e61bf4,
@@ -1521,4 +1706,20 @@ uint32_t nvd0_pwr_code[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
 };
diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/os.h b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/os.h
index 574acfa44c8c..522e3079f824 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/os.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/fuc/os.h
@@ -19,11 +19,12 @@
 #define MEMX_MSG_EXEC 1
 
 /* MEMX: script opcode definitions */
-#define MEMX_ENTER  0
-#define MEMX_LEAVE  1
-#define MEMX_WR32   2
-#define MEMX_WAIT   3
-#define MEMX_DELAY  4
+#define MEMX_ENTER  1
+#define MEMX_LEAVE  2
+#define MEMX_WR32   3
+#define MEMX_WAIT   4
+#define MEMX_DELAY  5
+#define MEMX_VBLANK 6
 
 /* I2C_: message identifiers */
 #define I2C__MSG_RD08 0
diff --git a/drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c b/drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c
index def6a9ac68cf..65eaa2546cad 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/pwr/memx.c
@@ -20,10 +20,11 @@ memx_out(struct nouveau_memx *memx)
 	struct nouveau_pwr *ppwr = memx->ppwr;
 	int i;
 
-	if (memx->c.size) {
+	if (memx->c.mthd) {
 		nv_wr32(ppwr, 0x10a1c4, (memx->c.size << 16) | memx->c.mthd);
 		for (i = 0; i < memx->c.size; i++)
 			nv_wr32(ppwr, 0x10a1c4, memx->c.data[i]);
+		memx->c.mthd = 0;
 		memx->c.size = 0;
 	}
 }
@@ -32,7 +33,7 @@ static void
 memx_cmd(struct nouveau_memx *memx, u32 mthd, u32 size, u32 data[])
 {
 	if ((memx->c.size + size >= ARRAY_SIZE(memx->c.data)) ||
-	    (memx->c.size && memx->c.mthd != mthd))
+	    (memx->c.mthd && memx->c.mthd != mthd))
 		memx_out(memx);
 	memcpy(&memx->c.data[memx->c.size], data, size * sizeof(data[0]));
 	memx->c.size += size;
@@ -62,8 +63,7 @@ nouveau_memx_init(struct nouveau_pwr *ppwr, struct nouveau_memx **pmemx)
 		nv_wr32(ppwr, 0x10a580, 0x00000003);
 	} while (nv_rd32(ppwr, 0x10a580) != 0x00000003);
 	nv_wr32(ppwr, 0x10a1c0, 0x01000000 | memx->base);
-	nv_wr32(ppwr, 0x10a1c4, 0x00010000 | MEMX_ENTER);
-	nv_wr32(ppwr, 0x10a1c4, 0x00000000);
+
 	return 0;
 }
 
@@ -78,7 +78,6 @@ nouveau_memx_fini(struct nouveau_memx **pmemx, bool exec)
 	memx_out(memx);
 
 	/* release data segment access */
-	nv_wr32(ppwr, 0x10a1c4, 0x00000000 | MEMX_LEAVE);
 	finish = nv_rd32(ppwr, 0x10a1c0) & 0x00ffffff;
 	nv_wr32(ppwr, 0x10a580, 0x00000000);
 
@@ -88,6 +87,8 @@ nouveau_memx_fini(struct nouveau_memx **pmemx, bool exec)
 				 memx->base, finish);
 	}
 
+	nv_debug(memx->ppwr, "Exec took %uns, PPWR_IN %08x\n",
+		 reply[0], reply[1]);
 	kfree(memx);
 	return 0;
 }
@@ -117,4 +118,51 @@ nouveau_memx_nsec(struct nouveau_memx *memx, u32 nsec)
 	memx_out(memx); /* fuc can't handle multiple */
 }
 
+void
+nouveau_memx_wait_vblank(struct nouveau_memx *memx)
+{
+	struct nouveau_pwr *ppwr = memx->ppwr;
+	u32 heads, x, y, px = 0;
+	int i, head_sync;
+
+	if (nv_device(ppwr)->chipset < 0xd0) {
+		heads = nv_rd32(ppwr, 0x610050);
+		for (i = 0; i < 2; i++) {
+			/* Heuristic: sync to head with biggest resolution */
+			if (heads & (2 << (i << 3))) {
+				x = nv_rd32(ppwr, 0x610b40 + (0x540 * i));
+				y = (x & 0xffff0000) >> 16;
+				x &= 0x0000ffff;
+				if ((x * y) > px) {
+					px = (x * y);
+					head_sync = i;
+				}
+			}
+		}
+	}
+
+	if (px == 0) {
+		nv_debug(memx->ppwr, "WAIT VBLANK !NO ACTIVE HEAD\n");
+		return;
+	}
+
+	nv_debug(memx->ppwr, "WAIT VBLANK HEAD%d\n", head_sync);
+	memx_cmd(memx, MEMX_VBLANK, 1, (u32[]){ head_sync });
+	memx_out(memx); /* fuc can't handle multiple */
+}
+
+void
+nouveau_memx_block(struct nouveau_memx *memx)
+{
+	nv_debug(memx->ppwr, "   HOST BLOCKED\n");
+	memx_cmd(memx, MEMX_ENTER, 0, NULL);
+}
+
+void
+nouveau_memx_unblock(struct nouveau_memx *memx)
+{
+	nv_debug(memx->ppwr, "   HOST UNBLOCKED\n");
+	memx_cmd(memx, MEMX_LEAVE, 0, NULL);
+}
+
 #endif
diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/fan.c b/drivers/gpu/drm/nouveau/core/subdev/therm/fan.c
index 016990a8252c..3656d605168f 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/fan.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/fan.c
@@ -31,6 +31,8 @@
 #include <subdev/gpio.h>
 #include <subdev/timer.h>
 
+#include <subdev/bios/fan.h>
+
 static int
 nouveau_fan_update(struct nouveau_fan *fan, bool immediate, int target)
 {
@@ -275,8 +277,11 @@ nouveau_therm_fan_ctor(struct nouveau_therm *therm)
 	/* other random init... */
 	nouveau_therm_fan_set_defaults(therm);
 	nvbios_perf_fan_parse(bios, &priv->fan->perf);
-	if (nvbios_therm_fan_parse(bios, &priv->fan->bios))
-		nv_error(therm, "parsing the thermal table failed\n");
+	if (!nvbios_fan_parse(bios, &priv->fan->bios)) {
+		nv_debug(therm, "parsing the fan table failed\n");
+		if (nvbios_therm_fan_parse(bios, &priv->fan->bios))
+			nv_error(therm, "parsing both fan tables failed\n");
+	}
 	nouveau_therm_fan_safety_checks(therm);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/fanpwm.c b/drivers/gpu/drm/nouveau/core/subdev/therm/fanpwm.c
index 9a5c07340263..c629d7f2a6a4 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/fanpwm.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/fanpwm.c
@@ -25,6 +25,8 @@
 
 #include <core/option.h>
 #include <subdev/gpio.h>
+#include <subdev/bios.h>
+#include <subdev/bios/fan.h>
 
 #include "priv.h"
 
@@ -86,11 +88,15 @@ nouveau_fanpwm_create(struct nouveau_therm *therm, struct dcb_gpio_func *func)
 {
 	struct nouveau_device *device = nv_device(therm);
 	struct nouveau_therm_priv *tpriv = (void *)therm;
+	struct nouveau_bios *bios = nouveau_bios(therm);
 	struct nouveau_fanpwm_priv *priv;
+	struct nvbios_therm_fan fan;
 	u32 divs, duty;
 
+	nvbios_fan_parse(bios, &fan);
+
 	if (!nouveau_boolopt(device->cfgopt, "NvFanPWM", func->param) ||
-	    !therm->pwm_ctrl ||
+	    !therm->pwm_ctrl || fan.type == NVBIOS_THERM_FAN_TOGGLE ||
 	     therm->pwm_get(therm, func->line, &divs, &duty) == -ENODEV)
 		return -ENODEV;
 
diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/gm107.c b/drivers/gpu/drm/nouveau/core/subdev/therm/gm107.c
new file mode 100644
index 000000000000..668cf3322285
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/gm107.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2014 Martin Peres
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Martin Peres
+ */
+
+#include "priv.h"
+
+struct gm107_therm_priv {
+	struct nouveau_therm_priv base;
+};
+
+static int
+gm107_fan_pwm_ctrl(struct nouveau_therm *therm, int line, bool enable)
+{
+	/* nothing to do, it seems hardwired */
+	return 0;
+}
+
+static int
+gm107_fan_pwm_get(struct nouveau_therm *therm, int line, u32 *divs, u32 *duty)
+{
+	*divs = nv_rd32(therm, 0x10eb20) & 0x1fff;
+	*duty = nv_rd32(therm, 0x10eb24) & 0x1fff;
+	return 0;
+}
+
+static int
+gm107_fan_pwm_set(struct nouveau_therm *therm, int line, u32 divs, u32 duty)
+{
+	nv_mask(therm, 0x10eb10, 0x1fff, divs); /* keep the high bits */
+	nv_wr32(therm, 0x10eb14, duty | 0x80000000);
+	return 0;
+}
+
+static int
+gm107_fan_pwm_clock(struct nouveau_therm *therm, int line)
+{
+	return nv_device(therm)->crystal * 1000;
+}
+
+static int
+gm107_therm_ctor(struct nouveau_object *parent,
+		struct nouveau_object *engine,
+		struct nouveau_oclass *oclass, void *data, u32 size,
+		struct nouveau_object **pobject)
+{
+	struct gm107_therm_priv *priv;
+	int ret;
+
+	ret = nouveau_therm_create(parent, engine, oclass, &priv);
+	*pobject = nv_object(priv);
+	if (ret)
+		return ret;
+
+	priv->base.base.pwm_ctrl = gm107_fan_pwm_ctrl;
+	priv->base.base.pwm_get = gm107_fan_pwm_get;
+	priv->base.base.pwm_set = gm107_fan_pwm_set;
+	priv->base.base.pwm_clock = gm107_fan_pwm_clock;
+	priv->base.base.temp_get = nv84_temp_get;
+	priv->base.base.fan_sense = nva3_therm_fan_sense;
+	priv->base.sensor.program_alarms = nouveau_therm_program_alarms_polling;
+	return nouveau_therm_preinit(&priv->base.base);
+}
+
+struct nouveau_oclass
+gm107_therm_oclass = {
+	.handle = NV_SUBDEV(THERM, 0x117),
+	.ofuncs = &(struct nouveau_ofuncs) {
+		.ctor = gm107_therm_ctor,
+		.dtor = _nouveau_therm_dtor,
+		.init = nvd0_therm_init,
+		.fini = nv84_therm_fini,
+	},
+};
diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/nv84.c b/drivers/gpu/drm/nouveau/core/subdev/therm/nv84.c
index 1d15c52fad0c..14e2e09bfc24 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/nv84.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/nv84.c
@@ -24,6 +24,7 @@
  */
 
 #include "priv.h"
+#include <subdev/fuse.h>
 
 struct nv84_therm_priv {
 	struct nouveau_therm_priv base;
@@ -32,7 +33,25 @@ struct nv84_therm_priv {
 int
 nv84_temp_get(struct nouveau_therm *therm)
 {
-	return nv_rd32(therm, 0x20400);
+	struct nouveau_fuse *fuse = nouveau_fuse(therm);
+
+	if (nv_ro32(fuse, 0x1a8) == 1)
+		return nv_rd32(therm, 0x20400);
+	else
+		return -ENODEV;
+}
+
+void
+nv84_sensor_setup(struct nouveau_therm *therm)
+{
+	struct nouveau_fuse *fuse = nouveau_fuse(therm);
+
+	/* enable temperature reading for cards with insane defaults */
+	if (nv_ro32(fuse, 0x1a8) == 1) {
+		nv_mask(therm, 0x20008, 0x80008000, 0x80000000);
+		nv_mask(therm, 0x2000c, 0x80000003, 0x00000000);
+		mdelay(20); /* wait for the temperature to stabilize */
+	}
 }
 
 static void
@@ -171,6 +190,21 @@ nv84_therm_intr(struct nouveau_subdev *subdev)
 }
 
 static int
+nv84_therm_init(struct nouveau_object *object)
+{
+	struct nv84_therm_priv *priv = (void *)object;
+	int ret;
+
+	ret = nouveau_therm_init(&priv->base.base);
+	if (ret)
+		return ret;
+
+	nv84_sensor_setup(&priv->base.base);
+
+	return 0;
+}
+
+static int
 nv84_therm_ctor(struct nouveau_object *parent,
 		struct nouveau_object *engine,
 		struct nouveau_oclass *oclass, void *data, u32 size,
@@ -228,7 +262,7 @@ nv84_therm_oclass = {
 	.ofuncs = &(struct nouveau_ofuncs) {
 		.ctor = nv84_therm_ctor,
 		.dtor = _nouveau_therm_dtor,
-		.init = _nouveau_therm_init,
+		.init = nv84_therm_init,
 		.fini = nv84_therm_fini,
 	},
 };
diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/nva3.c b/drivers/gpu/drm/nouveau/core/subdev/therm/nva3.c
index 0478b2e3fb1d..7893357a7e9f 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/nva3.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/nva3.c
@@ -51,6 +51,8 @@ nva3_therm_init(struct nouveau_object *object)
 	if (ret)
 		return ret;
 
+	nv84_sensor_setup(&priv->base.base);
+
 	/* enable fan tach, count revolutions per-second */
 	nv_mask(priv, 0x00e720, 0x00000003, 0x00000002);
 	if (tach->func != DCB_GPIO_UNUSED) {
diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/nvd0.c b/drivers/gpu/drm/nouveau/core/subdev/therm/nvd0.c
index bbf117be572f..b70f7cc649b8 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/nvd0.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/nvd0.c
@@ -114,7 +114,7 @@ nvd0_fan_pwm_clock(struct nouveau_therm *therm, int line)
 		return nv_device(therm)->crystal * 1000 / 10;
 }
 
-static int
+int
 nvd0_therm_init(struct nouveau_object *object)
 {
 	struct nvd0_therm_priv *priv = (void *)object;
@@ -150,6 +150,8 @@ nvd0_therm_ctor(struct nouveau_object *parent,
 	if (ret)
 		return ret;
 
+	nv84_sensor_setup(&priv->base.base);
+
 	priv->base.base.pwm_ctrl = nvd0_fan_pwm_ctrl;
 	priv->base.base.pwm_get = nvd0_fan_pwm_get;
 	priv->base.base.pwm_set = nvd0_fan_pwm_set;
diff --git a/drivers/gpu/drm/nouveau/core/subdev/therm/priv.h b/drivers/gpu/drm/nouveau/core/subdev/therm/priv.h
index 916fca5c7816..7dba8c281a0b 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/therm/priv.h
+++ b/drivers/gpu/drm/nouveau/core/subdev/therm/priv.h
@@ -145,10 +145,13 @@ int nv50_fan_pwm_get(struct nouveau_therm *, int, u32 *, u32 *);
 int nv50_fan_pwm_set(struct nouveau_therm *, int, u32, u32);
 int nv50_fan_pwm_clock(struct nouveau_therm *, int);
 int nv84_temp_get(struct nouveau_therm *therm);
+void nv84_sensor_setup(struct nouveau_therm *therm);
 int nv84_therm_fini(struct nouveau_object *object, bool suspend);
 
 int nva3_therm_fan_sense(struct nouveau_therm *);
 
+int nvd0_therm_init(struct nouveau_object *object);
+
 int nouveau_fanpwm_create(struct nouveau_therm *, struct dcb_gpio_func *);
 int nouveau_fantog_create(struct nouveau_therm *, struct dcb_gpio_func *);
 int nouveau_fannil_create(struct nouveau_therm *);
diff --git a/drivers/gpu/drm/nouveau/core/subdev/vm/base.c b/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
index 7dd680ff2f6f..f75a683bd47a 100644
--- a/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
+++ b/drivers/gpu/drm/nouveau/core/subdev/vm/base.c
@@ -296,7 +296,7 @@ nouveau_vm_get(struct nouveau_vm *vm, u64 size, u32 page_shift,
 	int ret;
 
 	mutex_lock(&nv_subdev(vmm)->mutex);
-	ret = nouveau_mm_head(&vm->mm, page_shift, msize, msize, align,
+	ret = nouveau_mm_head(&vm->mm, 0, page_shift, msize, msize, align,
 			     &vma->node);
 	if (unlikely(ret != 0)) {
 		mutex_unlock(&nv_subdev(vmm)->mutex);
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 615714c1727d..a24faa5e2a2a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -448,7 +448,7 @@ nouveau_abi16_ioctl_notifierobj_alloc(ABI16_IOCTL_ARGS)
 	list_add(&ntfy->head, &chan->notifiers);
 	ntfy->handle = info->handle;
 
-	ret = nouveau_mm_head(&chan->heap, 1, info->size, info->size, 1,
+	ret = nouveau_mm_head(&chan->heap, 0, 1, info->size, info->size, 1,
 			      &ntfy->node);
 	if (ret)
 		goto done;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 01da508625f2..eea74b127b03 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -88,13 +88,13 @@ nv10_bo_get_tile_region(struct drm_device *dev, int i)
 
 static void
 nv10_bo_put_tile_region(struct drm_device *dev, struct nouveau_drm_tile *tile,
-			struct nouveau_fence *fence)
+			struct fence *fence)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
 	if (tile) {
 		spin_lock(&drm->tile.lock);
-		tile->fence = nouveau_fence_ref(fence);
+		tile->fence = (struct nouveau_fence *)fence_get(fence);
 		tile->used = false;
 		spin_unlock(&drm->tile.lock);
 	}
@@ -241,16 +241,16 @@ nouveau_bo_new(struct drm_device *dev, int size, int align,
 }
 
 static void
-set_placement_list(uint32_t *pl, unsigned *n, uint32_t type, uint32_t flags)
+set_placement_list(struct ttm_place *pl, unsigned *n, uint32_t type, uint32_t flags)
 {
 	*n = 0;
 
 	if (type & TTM_PL_FLAG_VRAM)
-		pl[(*n)++] = TTM_PL_FLAG_VRAM | flags;
+		pl[(*n)++].flags = TTM_PL_FLAG_VRAM | flags;
 	if (type & TTM_PL_FLAG_TT)
-		pl[(*n)++] = TTM_PL_FLAG_TT | flags;
+		pl[(*n)++].flags = TTM_PL_FLAG_TT | flags;
 	if (type & TTM_PL_FLAG_SYSTEM)
-		pl[(*n)++] = TTM_PL_FLAG_SYSTEM | flags;
+		pl[(*n)++].flags = TTM_PL_FLAG_SYSTEM | flags;
 }
 
 static void
@@ -258,6 +258,7 @@ set_placement_range(struct nouveau_bo *nvbo, uint32_t type)
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
 	u32 vram_pages = drm->device.info.ram_size >> PAGE_SHIFT;
+	unsigned i, fpfn, lpfn;
 
 	if (drm->device.info.family == NV_DEVICE_INFO_V0_CELSIUS &&
 	    nvbo->tile_mode && (type & TTM_PL_FLAG_VRAM) &&
@@ -269,11 +270,19 @@ set_placement_range(struct nouveau_bo *nvbo, uint32_t type)
 		 * at the same time.
 		 */
 		if (nvbo->tile_flags & NOUVEAU_GEM_TILE_ZETA) {
-			nvbo->placement.fpfn = vram_pages / 2;
-			nvbo->placement.lpfn = ~0;
+			fpfn = vram_pages / 2;
+			lpfn = ~0;
 		} else {
-			nvbo->placement.fpfn = 0;
-			nvbo->placement.lpfn = vram_pages / 2;
+			fpfn = 0;
+			lpfn = vram_pages / 2;
+		}
+		for (i = 0; i < nvbo->placement.num_placement; ++i) {
+			nvbo->placements[i].fpfn = fpfn;
+			nvbo->placements[i].lpfn = lpfn;
+		}
+		for (i = 0; i < nvbo->placement.num_busy_placement; ++i) {
+			nvbo->busy_placements[i].fpfn = fpfn;
+			nvbo->busy_placements[i].lpfn = lpfn;
 		}
 	}
 }
@@ -961,13 +970,14 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 	}
 
 	mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
-	ret = nouveau_fence_sync(bo->sync_obj, chan);
+	ret = nouveau_fence_sync(nouveau_bo(bo), chan, true);
 	if (ret == 0) {
 		ret = drm->ttm.move(chan, bo, &bo->mem, new_mem);
 		if (ret == 0) {
 			ret = nouveau_fence_new(chan, false, &fence);
 			if (ret == 0) {
-				ret = ttm_bo_move_accel_cleanup(bo, fence,
+				ret = ttm_bo_move_accel_cleanup(bo,
+								&fence->base,
 								evict,
 								no_wait_gpu,
 								new_mem);
@@ -1041,12 +1051,15 @@ static int
 nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
 		      bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
-	u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
+	struct ttm_place placement_memtype = {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING
+	};
 	struct ttm_placement placement;
 	struct ttm_mem_reg tmp_mem;
 	int ret;
 
-	placement.fpfn = placement.lpfn = 0;
 	placement.num_placement = placement.num_busy_placement = 1;
 	placement.placement = placement.busy_placement = &placement_memtype;
 
@@ -1074,12 +1087,15 @@ static int
 nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
 		      bool no_wait_gpu, struct ttm_mem_reg *new_mem)
 {
-	u32 placement_memtype = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
+	struct ttm_place placement_memtype = {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING
+	};
 	struct ttm_placement placement;
 	struct ttm_mem_reg tmp_mem;
 	int ret;
 
-	placement.fpfn = placement.lpfn = 0;
 	placement.num_placement = placement.num_busy_placement = 1;
 	placement.placement = placement.busy_placement = &placement_memtype;
 
@@ -1152,8 +1168,9 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct drm_device *dev = drm->dev;
+	struct fence *fence = reservation_object_get_excl(bo->resv);
 
-	nv10_bo_put_tile_region(dev, *old_tile, bo->sync_obj);
+	nv10_bo_put_tile_region(dev, *old_tile, fence);
 	*old_tile = new_tile;
 }
 
@@ -1197,9 +1214,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 	}
 
 	/* Fallback to software copy. */
-	spin_lock(&bo->bdev->fence_lock);
 	ret = ttm_bo_wait(bo, true, intr, no_wait_gpu);
-	spin_unlock(&bo->bdev->fence_lock);
 	if (ret == 0)
 		ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
 
@@ -1294,7 +1309,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
 	struct nvif_device *device = &drm->device;
 	u32 mappable = nv_device_resource_len(nvkm_device(device), 1) >> PAGE_SHIFT;
-	int ret;
+	int i, ret;
 
 	/* as long as the bo isn't in vram, and isn't tiled, we've got
 	 * nothing to do here.
@@ -1319,9 +1334,16 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 	    bo->mem.start + bo->mem.num_pages < mappable)
 		return 0;
 
+	for (i = 0; i < nvbo->placement.num_placement; ++i) {
+		nvbo->placements[i].fpfn = 0;
+		nvbo->placements[i].lpfn = mappable;
+	}
+
+	for (i = 0; i < nvbo->placement.num_busy_placement; ++i) {
+		nvbo->busy_placements[i].fpfn = 0;
+		nvbo->busy_placements[i].lpfn = mappable;
+	}
 
-	nvbo->placement.fpfn = 0;
-	nvbo->placement.lpfn = mappable;
 	nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_VRAM, 0);
 	return nouveau_bo_validate(nvbo, false, false);
 }
@@ -1436,47 +1458,14 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm)
 }
 
 void
-nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence)
-{
-	struct nouveau_fence *new_fence = nouveau_fence_ref(fence);
-	struct nouveau_fence *old_fence = NULL;
-
-	spin_lock(&nvbo->bo.bdev->fence_lock);
-	old_fence = nvbo->bo.sync_obj;
-	nvbo->bo.sync_obj = new_fence;
-	spin_unlock(&nvbo->bo.bdev->fence_lock);
-
-	nouveau_fence_unref(&old_fence);
-}
-
-static void
-nouveau_bo_fence_unref(void **sync_obj)
-{
-	nouveau_fence_unref((struct nouveau_fence **)sync_obj);
-}
-
-static void *
-nouveau_bo_fence_ref(void *sync_obj)
+nouveau_bo_fence(struct nouveau_bo *nvbo, struct nouveau_fence *fence, bool exclusive)
 {
-	return nouveau_fence_ref(sync_obj);
-}
+	struct reservation_object *resv = nvbo->bo.resv;
 
-static bool
-nouveau_bo_fence_signalled(void *sync_obj)
-{
-	return nouveau_fence_done(sync_obj);
-}
-
-static int
-nouveau_bo_fence_wait(void *sync_obj, bool lazy, bool intr)
-{
-	return nouveau_fence_wait(sync_obj, lazy, intr);
-}
-
-static int
-nouveau_bo_fence_flush(void *sync_obj)
-{
-	return 0;
+	if (exclusive)
+		reservation_object_add_excl_fence(resv, &fence->base);
+	else if (fence)
+		reservation_object_add_shared_fence(resv, &fence->base);
 }
 
 struct ttm_bo_driver nouveau_bo_driver = {
@@ -1489,11 +1478,6 @@ struct ttm_bo_driver nouveau_bo_driver = {
 	.move_notify = nouveau_bo_move_ntfy,
 	.move = nouveau_bo_move,
 	.verify_access = nouveau_bo_verify_access,
-	.sync_obj_signaled = nouveau_bo_fence_signalled,
-	.sync_obj_wait = nouveau_bo_fence_wait,
-	.sync_obj_flush = nouveau_bo_fence_flush,
-	.sync_obj_unref = nouveau_bo_fence_unref,
-	.sync_obj_ref = nouveau_bo_fence_ref,
 	.fault_reserve_notify = &nouveau_ttm_fault_reserve_notify,
 	.io_mem_reserve = &nouveau_ttm_io_mem_reserve,
 	.io_mem_free = &nouveau_ttm_io_mem_free,
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
index ff17c1f432fc..ae95b2d43b36 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -9,8 +9,8 @@ struct nouveau_bo {
 	struct ttm_buffer_object bo;
 	struct ttm_placement placement;
 	u32 valid_domains;
-	u32 placements[3];
-	u32 busy_placements[3];
+	struct ttm_place placements[3];
+	struct ttm_place busy_placements[3];
 	struct ttm_bo_kmap_obj kmap;
 	struct list_head head;
 
@@ -78,7 +78,7 @@ u16  nouveau_bo_rd16(struct nouveau_bo *, unsigned index);
 void nouveau_bo_wr16(struct nouveau_bo *, unsigned index, u16 val);
 u32  nouveau_bo_rd32(struct nouveau_bo *, unsigned index);
 void nouveau_bo_wr32(struct nouveau_bo *, unsigned index, u32 val);
-void nouveau_bo_fence(struct nouveau_bo *, struct nouveau_fence *);
+void nouveau_bo_fence(struct nouveau_bo *, struct nouveau_fence *, bool exclusive);
 int  nouveau_bo_validate(struct nouveau_bo *, bool interruptible,
 			 bool no_wait_gpu);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index 99cd9e4a2aa6..9a362ddd8225 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -36,7 +36,7 @@
 #include "nouveau_abi16.h"
 
 MODULE_PARM_DESC(vram_pushbuf, "Create DMA push buffers in VRAM");
-static int nouveau_vram_pushbuf;
+int nouveau_vram_pushbuf;
 module_param_named(vram_pushbuf, nouveau_vram_pushbuf, int, 0400);
 
 int
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h
index 20163709d608..8309c24ee698 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.h
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.h
@@ -47,4 +47,6 @@ int  nouveau_channel_new(struct nouveau_drm *, struct nvif_device *,
 void nouveau_channel_del(struct nouveau_channel **);
 int  nouveau_channel_idle(struct nouveau_channel *);
 
+extern int nouveau_vram_pushbuf;
+
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 1ec44c83e919..c8ac9482cf2e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -45,15 +45,15 @@
 #include <nvif/event.h>
 
 MODULE_PARM_DESC(tv_disable, "Disable TV-out detection");
-static int nouveau_tv_disable = 0;
+int nouveau_tv_disable = 0;
 module_param_named(tv_disable, nouveau_tv_disable, int, 0400);
 
 MODULE_PARM_DESC(ignorelid, "Ignore ACPI lid status");
-static int nouveau_ignorelid = 0;
+int nouveau_ignorelid = 0;
 module_param_named(ignorelid, nouveau_ignorelid, int, 0400);
 
 MODULE_PARM_DESC(duallink, "Allow dual-link TMDS (default: enabled)");
-static int nouveau_duallink = 1;
+int nouveau_duallink = 1;
 module_param_named(duallink, nouveau_duallink, int, 0400);
 
 struct nouveau_encoder *
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h
index 68029d041dd2..629a380c7085 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.h
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.h
@@ -105,4 +105,8 @@ nouveau_crtc_connector_get(struct nouveau_crtc *nv_crtc)
 struct drm_connector *
 nouveau_connector_create(struct drm_device *, int index);
 
+extern int nouveau_tv_disable;
+extern int nouveau_ignorelid;
+extern int nouveau_duallink;
+
 #endif /* __NOUVEAU_CONNECTOR_H__ */
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 65b4fd53dd4e..6d0a3cdc752b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -126,7 +126,7 @@ nouveau_display_scanoutpos_head(struct drm_crtc *crtc, int *vpos, int *hpos,
 	if (etime) *etime = ns_to_ktime(args.scan.time[1]);
 
 	if (*vpos < 0)
-		ret |= DRM_SCANOUTPOS_INVBL;
+		ret |= DRM_SCANOUTPOS_IN_VBLANK;
 	return ret;
 }
 
@@ -658,7 +658,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan,
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 
 	/* Synchronize with the old framebuffer */
-	ret = nouveau_fence_sync(old_bo->bo.sync_obj, chan);
+	ret = nouveau_fence_sync(old_bo, chan, false);
 	if (ret)
 		goto fail;
 
@@ -717,19 +717,24 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	}
 
 	mutex_lock(&cli->mutex);
-
-	/* synchronise rendering channel with the kernel's channel */
-	spin_lock(&new_bo->bo.bdev->fence_lock);
-	fence = nouveau_fence_ref(new_bo->bo.sync_obj);
-	spin_unlock(&new_bo->bo.bdev->fence_lock);
-	ret = nouveau_fence_sync(fence, chan);
-	nouveau_fence_unref(&fence);
+	ret = ttm_bo_reserve(&new_bo->bo, true, false, false, NULL);
 	if (ret)
 		goto fail_unpin;
 
-	ret = ttm_bo_reserve(&old_bo->bo, true, false, false, NULL);
-	if (ret)
+	/* synchronise rendering channel with the kernel's channel */
+	ret = nouveau_fence_sync(new_bo, chan, false);
+	if (ret) {
+		ttm_bo_unreserve(&new_bo->bo);
 		goto fail_unpin;
+	}
+
+	if (new_bo != old_bo) {
+		ttm_bo_unreserve(&new_bo->bo);
+
+		ret = ttm_bo_reserve(&old_bo->bo, true, false, false, NULL);
+		if (ret)
+			goto fail_unpin;
+	}
 
 	/* Initialize a page flip struct */
 	*s = (struct nouveau_page_flip_state)
@@ -775,7 +780,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	/* Update the crtc struct and cleanup */
 	crtc->primary->fb = fb;
 
-	nouveau_bo_fence(old_bo, fence);
+	nouveau_bo_fence(old_bo, fence, false);
 	ttm_bo_unreserve(&old_bo->bo);
 	if (old_bo != new_bo)
 		nouveau_bo_unpin(old_bo);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 250a5e88c751..244d78fc0cb5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -51,6 +51,7 @@
 #include "nouveau_fence.h"
 #include "nouveau_debugfs.h"
 #include "nouveau_usif.h"
+#include "nouveau_connector.h"
 
 MODULE_PARM_DESC(config, "option string to pass to driver core");
 static char *nouveau_config;
@@ -73,7 +74,9 @@ MODULE_PARM_DESC(runpm, "disable (0), force enable (1), optimus only default (-1
 int nouveau_runtime_pm = -1;
 module_param_named(runpm, nouveau_runtime_pm, int, 0400);
 
-static struct drm_driver driver;
+static struct drm_driver driver_stub;
+static struct drm_driver driver_pci;
+static struct drm_driver driver_platform;
 
 static u64
 nouveau_pci_name(struct pci_dev *pdev)
@@ -322,7 +325,7 @@ static int nouveau_drm_probe(struct pci_dev *pdev,
 
 	pci_set_master(pdev);
 
-	ret = drm_get_pci_dev(pdev, pent, &driver);
+	ret = drm_get_pci_dev(pdev, pent, &driver_pci);
 	if (ret) {
 		nouveau_object_ref(NULL, (struct nouveau_object **)&device);
 		return ret;
@@ -855,7 +858,7 @@ nouveau_driver_fops = {
 };
 
 static struct drm_driver
-driver = {
+driver_stub = {
 	.driver_features =
 		DRIVER_USE_AGP |
 		DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME | DRIVER_RENDER,
@@ -1026,6 +1029,23 @@ static int nouveau_pmops_runtime_idle(struct device *dev)
 	return 1;
 }
 
+static void nouveau_display_options(void)
+{
+	DRM_DEBUG_DRIVER("Loading Nouveau with parameters:\n");
+
+	DRM_DEBUG_DRIVER("... tv_disable   : %d\n", nouveau_tv_disable);
+	DRM_DEBUG_DRIVER("... ignorelid    : %d\n", nouveau_ignorelid);
+	DRM_DEBUG_DRIVER("... duallink     : %d\n", nouveau_duallink);
+	DRM_DEBUG_DRIVER("... nofbaccel    : %d\n", nouveau_nofbaccel);
+	DRM_DEBUG_DRIVER("... config       : %s\n", nouveau_config);
+	DRM_DEBUG_DRIVER("... debug        : %s\n", nouveau_debug);
+	DRM_DEBUG_DRIVER("... noaccel      : %d\n", nouveau_noaccel);
+	DRM_DEBUG_DRIVER("... modeset      : %d\n", nouveau_modeset);
+	DRM_DEBUG_DRIVER("... runpm        : %d\n", nouveau_runtime_pm);
+	DRM_DEBUG_DRIVER("... vram_pushbuf : %d\n", nouveau_vram_pushbuf);
+	DRM_DEBUG_DRIVER("... pstate       : %d\n", nouveau_pstate);
+}
+
 static const struct dev_pm_ops nouveau_pm_ops = {
 	.suspend = nouveau_pmops_suspend,
 	.resume = nouveau_pmops_resume,
@@ -1061,7 +1081,7 @@ nouveau_platform_device_create_(struct platform_device *pdev, int size,
 	if (err)
 		return ERR_PTR(err);
 
-	drm = drm_dev_alloc(&driver, &pdev->dev);
+	drm = drm_dev_alloc(&driver_platform, &pdev->dev);
 	if (!drm) {
 		err = -ENOMEM;
 		goto err_free;
@@ -1086,6 +1106,13 @@ EXPORT_SYMBOL(nouveau_platform_device_create_);
 static int __init
 nouveau_drm_init(void)
 {
+	driver_pci = driver_stub;
+	driver_pci.set_busid = drm_pci_set_busid;
+	driver_platform = driver_stub;
+	driver_platform.set_busid = drm_platform_set_busid;
+
+	nouveau_display_options();
+
 	if (nouveau_modeset == -1) {
 #ifdef CONFIG_VGA_CONSOLE
 		if (vgacon_text_force())
@@ -1097,7 +1124,7 @@ nouveau_drm_init(void)
 		return 0;
 
 	nouveau_register_dsm_handler();
-	return drm_pci_init(&driver, &nouveau_drm_pci_driver);
+	return drm_pci_init(&driver_pci, &nouveau_drm_pci_driver);
 }
 
 static void __exit
@@ -1106,7 +1133,7 @@ nouveau_drm_exit(void)
 	if (!nouveau_modeset)
 		return;
 
-	drm_pci_exit(&driver, &nouveau_drm_pci_driver);
+	drm_pci_exit(&driver_pci, &nouveau_drm_pci_driver);
 	nouveau_unregister_dsm_handler();
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 8bdd27091db8..f0ae10ca3ba9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -52,7 +52,7 @@
 #include "nouveau_crtc.h"
 
 MODULE_PARM_DESC(nofbaccel, "Disable fbcon acceleration");
-static int nouveau_nofbaccel = 0;
+int nouveau_nofbaccel = 0;
 module_param_named(nofbaccel, nouveau_nofbaccel, int, 0400);
 
 static void
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
index 34658cfa8f5d..1e2e9e27a03b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
@@ -72,5 +72,8 @@ void nouveau_fbcon_accel_save_disable(struct drm_device *dev);
 void nouveau_fbcon_accel_restore(struct drm_device *dev);
 
 void nouveau_fbcon_output_poll_changed(struct drm_device *dev);
+
+extern int nouveau_nofbaccel;
+
 #endif /* __NV50_FBCON_H__ */
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 0a93114158cd..decfe6c4ac07 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -28,6 +28,7 @@
 
 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
+#include <trace/events/fence.h>
 
 #include <nvif/notify.h>
 #include <nvif/event.h>
@@ -36,123 +37,210 @@
 #include "nouveau_dma.h"
 #include "nouveau_fence.h"
 
-struct fence_work {
-	struct work_struct base;
-	struct list_head head;
-	void (*func)(void *);
-	void *data;
-};
+static const struct fence_ops nouveau_fence_ops_uevent;
+static const struct fence_ops nouveau_fence_ops_legacy;
+
+static inline struct nouveau_fence *
+from_fence(struct fence *fence)
+{
+	return container_of(fence, struct nouveau_fence, base);
+}
+
+static inline struct nouveau_fence_chan *
+nouveau_fctx(struct nouveau_fence *fence)
+{
+	return container_of(fence->base.lock, struct nouveau_fence_chan, lock);
+}
 
 static void
 nouveau_fence_signal(struct nouveau_fence *fence)
 {
-	struct fence_work *work, *temp;
+	fence_signal_locked(&fence->base);
+	list_del(&fence->head);
+
+	if (test_bit(FENCE_FLAG_USER_BITS, &fence->base.flags)) {
+		struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
 
-	list_for_each_entry_safe(work, temp, &fence->work, head) {
-		schedule_work(&work->base);
-		list_del(&work->head);
+		if (!--fctx->notify_ref)
+			nvif_notify_put(&fctx->notify);
 	}
 
-	fence->channel = NULL;
-	list_del(&fence->head);
+	fence_put(&fence->base);
+}
+
+static struct nouveau_fence *
+nouveau_local_fence(struct fence *fence, struct nouveau_drm *drm) {
+	struct nouveau_fence_priv *priv = (void*)drm->fence;
+
+	if (fence->ops != &nouveau_fence_ops_legacy &&
+	    fence->ops != &nouveau_fence_ops_uevent)
+		return NULL;
+
+	if (fence->context < priv->context_base ||
+	    fence->context >= priv->context_base + priv->contexts)
+		return NULL;
+
+	return from_fence(fence);
 }
 
 void
 nouveau_fence_context_del(struct nouveau_fence_chan *fctx)
 {
-	struct nouveau_fence *fence, *fnext;
-	spin_lock(&fctx->lock);
-	list_for_each_entry_safe(fence, fnext, &fctx->pending, head) {
+	struct nouveau_fence *fence;
+
+	nvif_notify_fini(&fctx->notify);
+
+	spin_lock_irq(&fctx->lock);
+	while (!list_empty(&fctx->pending)) {
+		fence = list_entry(fctx->pending.next, typeof(*fence), head);
+
+		nouveau_fence_signal(fence);
+		fence->channel = NULL;
+	}
+	spin_unlock_irq(&fctx->lock);
+}
+
+static void
+nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx)
+{
+	struct nouveau_fence *fence;
+
+	u32 seq = fctx->read(chan);
+
+	while (!list_empty(&fctx->pending)) {
+		fence = list_entry(fctx->pending.next, typeof(*fence), head);
+
+		if ((int)(seq - fence->base.seqno) < 0)
+			return;
+
 		nouveau_fence_signal(fence);
 	}
-	spin_unlock(&fctx->lock);
+}
+
+static int
+nouveau_fence_wait_uevent_handler(struct nvif_notify *notify)
+{
+	struct nouveau_fence_chan *fctx =
+		container_of(notify, typeof(*fctx), notify);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fctx->lock, flags);
+	if (!list_empty(&fctx->pending)) {
+		struct nouveau_fence *fence;
+
+		fence = list_entry(fctx->pending.next, typeof(*fence), head);
+		nouveau_fence_update(fence->channel, fctx);
+	}
+	spin_unlock_irqrestore(&fctx->lock, flags);
+
+	/* Always return keep here. NVIF refcount is handled with nouveau_fence_update */
+	return NVIF_NOTIFY_KEEP;
 }
 
 void
-nouveau_fence_context_new(struct nouveau_fence_chan *fctx)
+nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx)
 {
+	struct nouveau_fence_priv *priv = (void*)chan->drm->fence;
+	int ret;
+
 	INIT_LIST_HEAD(&fctx->flip);
 	INIT_LIST_HEAD(&fctx->pending);
 	spin_lock_init(&fctx->lock);
+	fctx->context = priv->context_base + chan->chid;
+
+	if (!priv->uevent)
+		return;
+
+	ret = nvif_notify_init(chan->object, NULL,
+			 nouveau_fence_wait_uevent_handler, false,
+			 G82_CHANNEL_DMA_V0_NTFY_UEVENT,
+			 &(struct nvif_notify_uevent_req) { },
+			 sizeof(struct nvif_notify_uevent_req),
+			 sizeof(struct nvif_notify_uevent_rep),
+			 &fctx->notify);
+
+	WARN_ON(ret);
 }
 
+struct nouveau_fence_work {
+	struct work_struct work;
+	struct fence_cb cb;
+	void (*func)(void *);
+	void *data;
+};
+
 static void
 nouveau_fence_work_handler(struct work_struct *kwork)
 {
-	struct fence_work *work = container_of(kwork, typeof(*work), base);
+	struct nouveau_fence_work *work = container_of(kwork, typeof(*work), work);
 	work->func(work->data);
 	kfree(work);
 }
 
+static void nouveau_fence_work_cb(struct fence *fence, struct fence_cb *cb)
+{
+	struct nouveau_fence_work *work = container_of(cb, typeof(*work), cb);
+
+	schedule_work(&work->work);
+}
+
 void
-nouveau_fence_work(struct nouveau_fence *fence,
+nouveau_fence_work(struct fence *fence,
 		   void (*func)(void *), void *data)
 {
-	struct nouveau_channel *chan = fence->channel;
-	struct nouveau_fence_chan *fctx;
-	struct fence_work *work = NULL;
+	struct nouveau_fence_work *work;
 
-	if (nouveau_fence_done(fence)) {
-		func(data);
-		return;
-	}
+	if (fence_is_signaled(fence))
+		goto err;
 
-	fctx = chan->fence;
 	work = kmalloc(sizeof(*work), GFP_KERNEL);
 	if (!work) {
-		WARN_ON(nouveau_fence_wait(fence, false, false));
-		func(data);
-		return;
+		WARN_ON(nouveau_fence_wait((struct nouveau_fence *)fence,
+					   false, false));
+		goto err;
 	}
 
-	spin_lock(&fctx->lock);
-	if (!fence->channel) {
-		spin_unlock(&fctx->lock);
-		kfree(work);
-		func(data);
-		return;
-	}
-
-	INIT_WORK(&work->base, nouveau_fence_work_handler);
+	INIT_WORK(&work->work, nouveau_fence_work_handler);
 	work->func = func;
 	work->data = data;
-	list_add(&work->head, &fence->work);
-	spin_unlock(&fctx->lock);
-}
-
-static void
-nouveau_fence_update(struct nouveau_channel *chan)
-{
-	struct nouveau_fence_chan *fctx = chan->fence;
-	struct nouveau_fence *fence, *fnext;
 
-	spin_lock(&fctx->lock);
-	list_for_each_entry_safe(fence, fnext, &fctx->pending, head) {
-		if (fctx->read(chan) < fence->sequence)
-			break;
+	if (fence_add_callback(fence, &work->cb, nouveau_fence_work_cb) < 0)
+		goto err_free;
+	return;
 
-		nouveau_fence_signal(fence);
-		nouveau_fence_unref(&fence);
-	}
-	spin_unlock(&fctx->lock);
+err_free:
+	kfree(work);
+err:
+	func(data);
 }
 
 int
 nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
 {
 	struct nouveau_fence_chan *fctx = chan->fence;
+	struct nouveau_fence_priv *priv = (void*)chan->drm->fence;
 	int ret;
 
 	fence->channel  = chan;
 	fence->timeout  = jiffies + (15 * HZ);
-	fence->sequence = ++fctx->sequence;
 
+	if (priv->uevent)
+		fence_init(&fence->base, &nouveau_fence_ops_uevent,
+			   &fctx->lock,
+			   priv->context_base + chan->chid, ++fctx->sequence);
+	else
+		fence_init(&fence->base, &nouveau_fence_ops_legacy,
+			   &fctx->lock,
+			   priv->context_base + chan->chid, ++fctx->sequence);
+
+	trace_fence_emit(&fence->base);
 	ret = fctx->emit(fence);
 	if (!ret) {
-		kref_get(&fence->kref);
-		spin_lock(&fctx->lock);
+		fence_get(&fence->base);
+		spin_lock_irq(&fctx->lock);
+		nouveau_fence_update(chan, fctx);
 		list_add_tail(&fence->head, &fctx->pending);
-		spin_unlock(&fctx->lock);
+		spin_unlock_irq(&fctx->lock);
 	}
 
 	return ret;
@@ -161,114 +249,70 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
 bool
 nouveau_fence_done(struct nouveau_fence *fence)
 {
-	if (fence->channel)
-		nouveau_fence_update(fence->channel);
-	return !fence->channel;
-}
+	if (fence->base.ops == &nouveau_fence_ops_legacy ||
+	    fence->base.ops == &nouveau_fence_ops_uevent) {
+		struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
+		unsigned long flags;
 
-struct nouveau_fence_wait {
-	struct nouveau_fence_priv *priv;
-	struct nvif_notify notify;
-};
+		if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags))
+			return true;
 
-static int
-nouveau_fence_wait_uevent_handler(struct nvif_notify *notify)
-{
-	struct nouveau_fence_wait *wait =
-		container_of(notify, typeof(*wait), notify);
-	wake_up_all(&wait->priv->waiting);
-	return NVIF_NOTIFY_KEEP;
+		spin_lock_irqsave(&fctx->lock, flags);
+		nouveau_fence_update(fence->channel, fctx);
+		spin_unlock_irqrestore(&fctx->lock, flags);
+	}
+	return fence_is_signaled(&fence->base);
 }
 
-static int
-nouveau_fence_wait_uevent(struct nouveau_fence *fence, bool intr)
-
+static long
+nouveau_fence_wait_legacy(struct fence *f, bool intr, long wait)
 {
-	struct nouveau_channel *chan = fence->channel;
-	struct nouveau_fence_priv *priv = chan->drm->fence;
-	struct nouveau_fence_wait wait = { .priv = priv };
-	int ret = 0;
+	struct nouveau_fence *fence = from_fence(f);
+	unsigned long sleep_time = NSEC_PER_MSEC / 1000;
+	unsigned long t = jiffies, timeout = t + wait;
 
-	ret = nvif_notify_init(chan->object, NULL,
-			       nouveau_fence_wait_uevent_handler, false,
-			       G82_CHANNEL_DMA_V0_NTFY_UEVENT,
-			       &(struct nvif_notify_uevent_req) {
-			       },
-			       sizeof(struct nvif_notify_uevent_req),
-			       sizeof(struct nvif_notify_uevent_rep),
-			       &wait.notify);
-	if (ret)
-		return ret;
+	while (!nouveau_fence_done(fence)) {
+		ktime_t kt;
 
-	nvif_notify_get(&wait.notify);
-
-	if (fence->timeout) {
-		unsigned long timeout = fence->timeout - jiffies;
-
-		if (time_before(jiffies, fence->timeout)) {
-			if (intr) {
-				ret = wait_event_interruptible_timeout(
-						priv->waiting,
-						nouveau_fence_done(fence),
-						timeout);
-			} else {
-				ret = wait_event_timeout(priv->waiting,
-						nouveau_fence_done(fence),
-						timeout);
-			}
-		}
+		t = jiffies;
 
-		if (ret >= 0) {
-			fence->timeout = jiffies + ret;
-			if (time_after_eq(jiffies, fence->timeout))
-				ret = -EBUSY;
-		}
-	} else {
-		if (intr) {
-			ret = wait_event_interruptible(priv->waiting,
-					nouveau_fence_done(fence));
-		} else {
-			wait_event(priv->waiting, nouveau_fence_done(fence));
+		if (wait != MAX_SCHEDULE_TIMEOUT && time_after_eq(t, timeout)) {
+			__set_current_state(TASK_RUNNING);
+			return 0;
 		}
+
+		__set_current_state(intr ? TASK_INTERRUPTIBLE :
+					   TASK_UNINTERRUPTIBLE);
+
+		kt = ktime_set(0, sleep_time);
+		schedule_hrtimeout(&kt, HRTIMER_MODE_REL);
+		sleep_time *= 2;
+		if (sleep_time > NSEC_PER_MSEC)
+			sleep_time = NSEC_PER_MSEC;
+
+		if (intr && signal_pending(current))
+			return -ERESTARTSYS;
 	}
 
-	nvif_notify_fini(&wait.notify);
-	if (unlikely(ret < 0))
-		return ret;
+	__set_current_state(TASK_RUNNING);
 
-	return 0;
+	return timeout - t;
 }
 
-int
-nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
+static int
+nouveau_fence_wait_busy(struct nouveau_fence *fence, bool intr)
 {
-	struct nouveau_channel *chan = fence->channel;
-	struct nouveau_fence_priv *priv = chan ? chan->drm->fence : NULL;
-	unsigned long sleep_time = NSEC_PER_MSEC / 1000;
-	ktime_t t;
 	int ret = 0;
 
-	while (priv && priv->uevent && lazy && !nouveau_fence_done(fence)) {
-		ret = nouveau_fence_wait_uevent(fence, intr);
-		if (ret < 0)
-			return ret;
-	}
-
 	while (!nouveau_fence_done(fence)) {
-		if (fence->timeout && time_after_eq(jiffies, fence->timeout)) {
+		if (time_after_eq(jiffies, fence->timeout)) {
 			ret = -EBUSY;
 			break;
 		}
 
-		__set_current_state(intr ? TASK_INTERRUPTIBLE :
-					   TASK_UNINTERRUPTIBLE);
-		if (lazy) {
-			t = ktime_set(0, sleep_time);
-			schedule_hrtimeout(&t, HRTIMER_MODE_REL);
-			sleep_time *= 2;
-			if (sleep_time > NSEC_PER_MSEC)
-				sleep_time = NSEC_PER_MSEC;
-		}
+		__set_current_state(intr ?
+				    TASK_INTERRUPTIBLE :
+				    TASK_UNINTERRUPTIBLE);
 
 		if (intr && signal_pending(current)) {
 			ret = -ERESTARTSYS;
@@ -281,47 +325,86 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
 }
 
 int
-nouveau_fence_sync(struct nouveau_fence *fence, struct nouveau_channel *chan)
+nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
 {
-	struct nouveau_fence_chan *fctx = chan->fence;
-	struct nouveau_channel *prev;
-	int ret = 0;
+	long ret;
 
-	prev = fence ? fence->channel : NULL;
-	if (prev) {
-		if (unlikely(prev != chan && !nouveau_fence_done(fence))) {
-			ret = fctx->sync(fence, prev, chan);
-			if (unlikely(ret))
-				ret = nouveau_fence_wait(fence, true, false);
-		}
-	}
+	if (!lazy)
+		return nouveau_fence_wait_busy(fence, intr);
 
-	return ret;
+	ret = fence_wait_timeout(&fence->base, intr, 15 * HZ);
+	if (ret < 0)
+		return ret;
+	else if (!ret)
+		return -EBUSY;
+	else
+		return 0;
 }
 
-static void
-nouveau_fence_del(struct kref *kref)
+int
+nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive)
 {
-	struct nouveau_fence *fence = container_of(kref, typeof(*fence), kref);
-	kfree(fence);
+	struct nouveau_fence_chan *fctx = chan->fence;
+	struct fence *fence;
+	struct reservation_object *resv = nvbo->bo.resv;
+	struct reservation_object_list *fobj;
+	struct nouveau_fence *f;
+	int ret = 0, i;
+
+	if (!exclusive) {
+		ret = reservation_object_reserve_shared(resv);
+
+		if (ret)
+			return ret;
+	}
+
+	fobj = reservation_object_get_list(resv);
+	fence = reservation_object_get_excl(resv);
+
+	if (fence && (!exclusive || !fobj || !fobj->shared_count)) {
+		struct nouveau_channel *prev = NULL;
+
+		f = nouveau_local_fence(fence, chan->drm);
+		if (f)
+			prev = f->channel;
+
+		if (!prev || (prev != chan && (ret = fctx->sync(f, prev, chan))))
+			ret = fence_wait(fence, true);
+
+		return ret;
+	}
+
+	if (!exclusive || !fobj)
+		return ret;
+
+	for (i = 0; i < fobj->shared_count && !ret; ++i) {
+		struct nouveau_channel *prev = NULL;
+
+		fence = rcu_dereference_protected(fobj->shared[i],
+						reservation_object_held(resv));
+
+		f = nouveau_local_fence(fence, chan->drm);
+		if (f)
+			prev = f->channel;
+
+		if (!prev || (ret = fctx->sync(f, prev, chan)))
+			ret = fence_wait(fence, true);
+
+		if (ret)
+			break;
+	}
+
+	return ret;
 }
 
 void
 nouveau_fence_unref(struct nouveau_fence **pfence)
 {
 	if (*pfence)
-		kref_put(&(*pfence)->kref, nouveau_fence_del);
+		fence_put(&(*pfence)->base);
 	*pfence = NULL;
 }
 
-struct nouveau_fence *
-nouveau_fence_ref(struct nouveau_fence *fence)
-{
-	if (fence)
-		kref_get(&fence->kref);
-	return fence;
-}
-
 int
 nouveau_fence_new(struct nouveau_channel *chan, bool sysmem,
 		  struct nouveau_fence **pfence)
@@ -336,9 +419,7 @@ nouveau_fence_new(struct nouveau_channel *chan, bool sysmem,
 	if (!fence)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&fence->work);
 	fence->sysmem = sysmem;
-	kref_init(&fence->kref);
 
 	ret = nouveau_fence_emit(fence, chan);
 	if (ret)
@@ -347,3 +428,92 @@ nouveau_fence_new(struct nouveau_channel *chan, bool sysmem,
 	*pfence = fence;
 	return ret;
 }
+
+static const char *nouveau_fence_get_get_driver_name(struct fence *fence)
+{
+	return "nouveau";
+}
+
+static const char *nouveau_fence_get_timeline_name(struct fence *f)
+{
+	struct nouveau_fence *fence = from_fence(f);
+	struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
+
+	return fence->channel ? fctx->name : "dead channel";
+}
+
+/*
+ * In an ideal world, read would not assume the channel context is still alive.
+ * This function may be called from another device, running into free memory as a
+ * result. The drm node should still be there, so we can derive the index from
+ * the fence context.
+ */
+static bool nouveau_fence_is_signaled(struct fence *f)
+{
+	struct nouveau_fence *fence = from_fence(f);
+	struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
+	struct nouveau_channel *chan = fence->channel;
+
+	return (int)(fctx->read(chan) - fence->base.seqno) >= 0;
+}
+
+static bool nouveau_fence_no_signaling(struct fence *f)
+{
+	struct nouveau_fence *fence = from_fence(f);
+
+	/*
+	 * caller should have a reference on the fence,
+	 * else fence could get freed here
+	 */
+	WARN_ON(atomic_read(&fence->base.refcount.refcount) <= 1);
+
+	/*
+	 * This needs uevents to work correctly, but fence_add_callback relies on
+	 * being able to enable signaling. It will still get signaled eventually,
+	 * just not right away.
+	 */
+	if (nouveau_fence_is_signaled(f)) {
+		list_del(&fence->head);
+
+		fence_put(&fence->base);
+		return false;
+	}
+
+	return true;
+}
+
+static const struct fence_ops nouveau_fence_ops_legacy = {
+	.get_driver_name = nouveau_fence_get_get_driver_name,
+	.get_timeline_name = nouveau_fence_get_timeline_name,
+	.enable_signaling = nouveau_fence_no_signaling,
+	.signaled = nouveau_fence_is_signaled,
+	.wait = nouveau_fence_wait_legacy,
+	.release = NULL
+};
+
+static bool nouveau_fence_enable_signaling(struct fence *f)
+{
+	struct nouveau_fence *fence = from_fence(f);
+	struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
+	bool ret;
+
+	if (!fctx->notify_ref++)
+		nvif_notify_get(&fctx->notify);
+
+	ret = nouveau_fence_no_signaling(f);
+	if (ret)
+		set_bit(FENCE_FLAG_USER_BITS, &fence->base.flags);
+	else if (!--fctx->notify_ref)
+		nvif_notify_put(&fctx->notify);
+
+	return ret;
+}
+
+static const struct fence_ops nouveau_fence_ops_uevent = {
+	.get_driver_name = nouveau_fence_get_get_driver_name,
+	.get_timeline_name = nouveau_fence_get_timeline_name,
+	.enable_signaling = nouveau_fence_enable_signaling,
+	.signaled = nouveau_fence_is_signaled,
+	.wait = fence_default_wait,
+	.release = NULL
+};
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index c57bb61da58c..986c8135e564 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -1,33 +1,35 @@
 #ifndef __NOUVEAU_FENCE_H__
 #define __NOUVEAU_FENCE_H__
 
+#include <linux/fence.h>
+#include <nvif/notify.h>
+
 struct nouveau_drm;
+struct nouveau_bo;
 
 struct nouveau_fence {
+	struct fence base;
+
 	struct list_head head;
-	struct list_head work;
-	struct kref kref;
 
 	bool sysmem;
 
 	struct nouveau_channel *channel;
 	unsigned long timeout;
-	u32 sequence;
 };
 
 int  nouveau_fence_new(struct nouveau_channel *, bool sysmem,
 		       struct nouveau_fence **);
-struct nouveau_fence *
-nouveau_fence_ref(struct nouveau_fence *);
 void nouveau_fence_unref(struct nouveau_fence **);
 
 int  nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
 bool nouveau_fence_done(struct nouveau_fence *);
-void nouveau_fence_work(struct nouveau_fence *, void (*)(void *), void *);
+void nouveau_fence_work(struct fence *, void (*)(void *), void *);
 int  nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
-int  nouveau_fence_sync(struct nouveau_fence *, struct nouveau_channel *);
+int  nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive);
 
 struct nouveau_fence_chan {
+	spinlock_t lock;
 	struct list_head pending;
 	struct list_head flip;
 
@@ -38,8 +40,12 @@ struct nouveau_fence_chan {
 	int  (*emit32)(struct nouveau_channel *, u64, u32);
 	int  (*sync32)(struct nouveau_channel *, u64, u32);
 
-	spinlock_t lock;
 	u32 sequence;
+	u32 context;
+	char name[24];
+
+	struct nvif_notify notify;
+	int notify_ref;
 };
 
 struct nouveau_fence_priv {
@@ -49,13 +55,13 @@ struct nouveau_fence_priv {
 	int  (*context_new)(struct nouveau_channel *);
 	void (*context_del)(struct nouveau_channel *);
 
-	wait_queue_head_t waiting;
+	u32 contexts, context_base;
 	bool uevent;
 };
 
 #define nouveau_fence(drm) ((struct nouveau_fence_priv *)(drm)->fence)
 
-void nouveau_fence_context_new(struct nouveau_fence_chan *);
+void nouveau_fence_context_new(struct nouveau_channel *, struct nouveau_fence_chan *);
 void nouveau_fence_context_del(struct nouveau_fence_chan *);
 
 int nv04_fence_create(struct nouveau_drm *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 292a677bfed4..b7dbd16904e0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -98,17 +98,23 @@ static void
 nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nouveau_vma *vma)
 {
 	const bool mapped = nvbo->bo.mem.mem_type != TTM_PL_SYSTEM;
-	struct nouveau_fence *fence = NULL;
+	struct reservation_object *resv = nvbo->bo.resv;
+	struct reservation_object_list *fobj;
+	struct fence *fence = NULL;
+
+	fobj = reservation_object_get_list(resv);
 
 	list_del(&vma->head);
 
-	if (mapped) {
-		spin_lock(&nvbo->bo.bdev->fence_lock);
-		fence = nouveau_fence_ref(nvbo->bo.sync_obj);
-		spin_unlock(&nvbo->bo.bdev->fence_lock);
-	}
+	if (fobj && fobj->shared_count > 1)
+		ttm_bo_wait(&nvbo->bo, true, false, false);
+	else if (fobj && fobj->shared_count == 1)
+		fence = rcu_dereference_protected(fobj->shared[0],
+						reservation_object_held(resv));
+	else
+		fence = reservation_object_get_excl(nvbo->bo.resv);
 
-	if (fence) {
+	if (fence && mapped) {
 		nouveau_fence_work(fence, nouveau_gem_object_delete, vma);
 	} else {
 		if (mapped)
@@ -116,7 +122,6 @@ nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nouveau_vma *vma)
 		nouveau_vm_put(vma);
 		kfree(vma);
 	}
-	nouveau_fence_unref(&fence);
 }
 
 void
@@ -288,24 +293,23 @@ nouveau_gem_set_domain(struct drm_gem_object *gem, uint32_t read_domains,
 }
 
 struct validate_op {
-	struct list_head vram_list;
-	struct list_head gart_list;
-	struct list_head both_list;
+	struct list_head list;
 	struct ww_acquire_ctx ticket;
 };
 
 static void
-validate_fini_list(struct list_head *list, struct nouveau_fence *fence,
-		   struct ww_acquire_ctx *ticket)
+validate_fini_no_ticket(struct validate_op *op, struct nouveau_fence *fence,
+			struct drm_nouveau_gem_pushbuf_bo *pbbo)
 {
-	struct list_head *entry, *tmp;
 	struct nouveau_bo *nvbo;
+	struct drm_nouveau_gem_pushbuf_bo *b;
 
-	list_for_each_safe(entry, tmp, list) {
-		nvbo = list_entry(entry, struct nouveau_bo, entry);
+	while (!list_empty(&op->list)) {
+		nvbo = list_entry(op->list.next, struct nouveau_bo, entry);
+		b = &pbbo[nvbo->pbbo_index];
 
 		if (likely(fence))
-			nouveau_bo_fence(nvbo, fence);
+			nouveau_bo_fence(nvbo, fence, !!b->write_domains);
 
 		if (unlikely(nvbo->validate_mapped)) {
 			ttm_bo_kunmap(&nvbo->kmap);
@@ -314,23 +318,16 @@ validate_fini_list(struct list_head *list, struct nouveau_fence *fence,
 
 		list_del(&nvbo->entry);
 		nvbo->reserved_by = NULL;
-		ttm_bo_unreserve_ticket(&nvbo->bo, ticket);
+		ttm_bo_unreserve_ticket(&nvbo->bo, &op->ticket);
 		drm_gem_object_unreference_unlocked(&nvbo->gem);
 	}
 }
 
 static void
-validate_fini_no_ticket(struct validate_op *op, struct nouveau_fence *fence)
+validate_fini(struct validate_op *op, struct nouveau_fence *fence,
+	      struct drm_nouveau_gem_pushbuf_bo *pbbo)
 {
-	validate_fini_list(&op->vram_list, fence, &op->ticket);
-	validate_fini_list(&op->gart_list, fence, &op->ticket);
-	validate_fini_list(&op->both_list, fence, &op->ticket);
-}
-
-static void
-validate_fini(struct validate_op *op, struct nouveau_fence *fence)
-{
-	validate_fini_no_ticket(op, fence);
+	validate_fini_no_ticket(op, fence, pbbo);
 	ww_acquire_fini(&op->ticket);
 }
 
@@ -344,6 +341,9 @@ validate_init(struct nouveau_channel *chan, struct drm_file *file_priv,
 	int trycnt = 0;
 	int ret, i;
 	struct nouveau_bo *res_bo = NULL;
+	LIST_HEAD(gart_list);
+	LIST_HEAD(vram_list);
+	LIST_HEAD(both_list);
 
 	ww_acquire_init(&op->ticket, &reservation_ww_class);
 retry:
@@ -360,9 +360,8 @@ retry:
 		gem = drm_gem_object_lookup(dev, file_priv, b->handle);
 		if (!gem) {
 			NV_PRINTK(error, cli, "Unknown handle 0x%08x\n", b->handle);
-			ww_acquire_done(&op->ticket);
-			validate_fini(op, NULL);
-			return -ENOENT;
+			ret = -ENOENT;
+			break;
 		}
 		nvbo = nouveau_gem_object(gem);
 		if (nvbo == res_bo) {
@@ -375,14 +374,16 @@ retry:
 			NV_PRINTK(error, cli, "multiple instances of buffer %d on "
 				      "validation list\n", b->handle);
 			drm_gem_object_unreference_unlocked(gem);
-			ww_acquire_done(&op->ticket);
-			validate_fini(op, NULL);
-			return -EINVAL;
+			ret = -EINVAL;
+			break;
 		}
 
 		ret = ttm_bo_reserve(&nvbo->bo, true, false, true, &op->ticket);
 		if (ret) {
-			validate_fini_no_ticket(op, NULL);
+			list_splice_tail_init(&vram_list, &op->list);
+			list_splice_tail_init(&gart_list, &op->list);
+			list_splice_tail_init(&both_list, &op->list);
+			validate_fini_no_ticket(op, NULL, NULL);
 			if (unlikely(ret == -EDEADLK)) {
 				ret = ttm_bo_reserve_slowpath(&nvbo->bo, true,
 							      &op->ticket);
@@ -390,12 +391,9 @@ retry:
 					res_bo = nvbo;
 			}
 			if (unlikely(ret)) {
-				ww_acquire_done(&op->ticket);
-				ww_acquire_fini(&op->ticket);
-				drm_gem_object_unreference_unlocked(gem);
 				if (ret != -ERESTARTSYS)
 					NV_PRINTK(error, cli, "fail reserve\n");
-				return ret;
+				break;
 			}
 		}
 
@@ -404,45 +402,32 @@ retry:
 		nvbo->pbbo_index = i;
 		if ((b->valid_domains & NOUVEAU_GEM_DOMAIN_VRAM) &&
 		    (b->valid_domains & NOUVEAU_GEM_DOMAIN_GART))
-			list_add_tail(&nvbo->entry, &op->both_list);
+			list_add_tail(&nvbo->entry, &both_list);
 		else
 		if (b->valid_domains & NOUVEAU_GEM_DOMAIN_VRAM)
-			list_add_tail(&nvbo->entry, &op->vram_list);
+			list_add_tail(&nvbo->entry, &vram_list);
 		else
 		if (b->valid_domains & NOUVEAU_GEM_DOMAIN_GART)
-			list_add_tail(&nvbo->entry, &op->gart_list);
+			list_add_tail(&nvbo->entry, &gart_list);
 		else {
 			NV_PRINTK(error, cli, "invalid valid domains: 0x%08x\n",
 				 b->valid_domains);
-			list_add_tail(&nvbo->entry, &op->both_list);
-			ww_acquire_done(&op->ticket);
-			validate_fini(op, NULL);
-			return -EINVAL;
+			list_add_tail(&nvbo->entry, &both_list);
+			ret = -EINVAL;
+			break;
 		}
 		if (nvbo == res_bo)
 			goto retry;
 	}
 
 	ww_acquire_done(&op->ticket);
-	return 0;
-}
-
-static int
-validate_sync(struct nouveau_channel *chan, struct nouveau_bo *nvbo)
-{
-	struct nouveau_fence *fence = NULL;
-	int ret = 0;
-
-	spin_lock(&nvbo->bo.bdev->fence_lock);
-	fence = nouveau_fence_ref(nvbo->bo.sync_obj);
-	spin_unlock(&nvbo->bo.bdev->fence_lock);
-
-	if (fence) {
-		ret = nouveau_fence_sync(fence, chan);
-		nouveau_fence_unref(&fence);
-	}
-
+	list_splice_tail(&vram_list, &op->list);
+	list_splice_tail(&gart_list, &op->list);
+	list_splice_tail(&both_list, &op->list);
+	if (ret)
+		validate_fini(op, NULL, NULL);
 	return ret;
+
 }
 
 static int
@@ -474,9 +459,10 @@ validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli,
 			return ret;
 		}
 
-		ret = validate_sync(chan, nvbo);
+		ret = nouveau_fence_sync(nvbo, chan, !!b->write_domains);
 		if (unlikely(ret)) {
-			NV_PRINTK(error, cli, "fail post-validate sync\n");
+			if (ret != -ERESTARTSYS)
+				NV_PRINTK(error, cli, "fail post-validate sync\n");
 			return ret;
 		}
 
@@ -513,11 +499,9 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
 			     struct validate_op *op, int *apply_relocs)
 {
 	struct nouveau_cli *cli = nouveau_cli(file_priv);
-	int ret, relocs = 0;
+	int ret;
 
-	INIT_LIST_HEAD(&op->vram_list);
-	INIT_LIST_HEAD(&op->gart_list);
-	INIT_LIST_HEAD(&op->both_list);
+	INIT_LIST_HEAD(&op->list);
 
 	if (nr_buffers == 0)
 		return 0;
@@ -529,34 +513,14 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
 		return ret;
 	}
 
-	ret = validate_list(chan, cli, &op->vram_list, pbbo, user_buffers);
-	if (unlikely(ret < 0)) {
-		if (ret != -ERESTARTSYS)
-			NV_PRINTK(error, cli, "validate vram_list\n");
-		validate_fini(op, NULL);
-		return ret;
-	}
-	relocs += ret;
-
-	ret = validate_list(chan, cli, &op->gart_list, pbbo, user_buffers);
-	if (unlikely(ret < 0)) {
-		if (ret != -ERESTARTSYS)
-			NV_PRINTK(error, cli, "validate gart_list\n");
-		validate_fini(op, NULL);
-		return ret;
-	}
-	relocs += ret;
-
-	ret = validate_list(chan, cli, &op->both_list, pbbo, user_buffers);
+	ret = validate_list(chan, cli, &op->list, pbbo, user_buffers);
 	if (unlikely(ret < 0)) {
 		if (ret != -ERESTARTSYS)
-			NV_PRINTK(error, cli, "validate both_list\n");
-		validate_fini(op, NULL);
+			NV_PRINTK(error, cli, "validating bo list\n");
+		validate_fini(op, NULL, NULL);
 		return ret;
 	}
-	relocs += ret;
-
-	*apply_relocs = relocs;
+	*apply_relocs = ret;
 	return 0;
 }
 
@@ -659,9 +623,7 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
 				data |= r->vor;
 		}
 
-		spin_lock(&nvbo->bo.bdev->fence_lock);
-		ret = ttm_bo_wait(&nvbo->bo, false, false, false);
-		spin_unlock(&nvbo->bo.bdev->fence_lock);
+		ret = ttm_bo_wait(&nvbo->bo, true, false, false);
 		if (ret) {
 			NV_PRINTK(error, cli, "reloc wait_idle failed: %d\n", ret);
 			break;
@@ -839,7 +801,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 	}
 
 out:
-	validate_fini(&op, fence);
+	validate_fini(&op, fence, bo);
 	nouveau_fence_unref(&fence);
 
 out_prevalid:
@@ -884,17 +846,29 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
 	struct drm_gem_object *gem;
 	struct nouveau_bo *nvbo;
 	bool no_wait = !!(req->flags & NOUVEAU_GEM_CPU_PREP_NOWAIT);
-	int ret = -EINVAL;
+	bool write = !!(req->flags & NOUVEAU_GEM_CPU_PREP_WRITE);
+	int ret;
 
 	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
 	if (!gem)
 		return -ENOENT;
 	nvbo = nouveau_gem_object(gem);
 
-	spin_lock(&nvbo->bo.bdev->fence_lock);
-	ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait);
-	spin_unlock(&nvbo->bo.bdev->fence_lock);
+	if (no_wait)
+		ret = reservation_object_test_signaled_rcu(nvbo->bo.resv, write) ? 0 : -EBUSY;
+	else {
+		long lret;
+
+		lret = reservation_object_wait_timeout_rcu(nvbo->bo.resv, write, true, 30 * HZ);
+		if (!lret)
+			ret = -EBUSY;
+		else if (lret > 0)
+			ret = 0;
+		else
+			ret = lret;
+	}
 	drm_gem_object_unreference_unlocked(gem);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_sysfs.c b/drivers/gpu/drm/nouveau/nouveau_sysfs.c
index 3c6962d15b26..8fbbf3093d86 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sysfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sysfs.c
@@ -29,7 +29,7 @@
 #include "nouveau_sysfs.h"
 
 MODULE_PARM_DESC(pstate, "enable sysfs pstate file, which will be moved in the future");
-static int nouveau_pstate;
+int nouveau_pstate;
 module_param_named(pstate, nouveau_pstate, int, 0400);
 
 static inline struct drm_device *
diff --git a/drivers/gpu/drm/nouveau/nouveau_sysfs.h b/drivers/gpu/drm/nouveau/nouveau_sysfs.h
index f973378160f8..4e5ea9241b28 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sysfs.h
+++ b/drivers/gpu/drm/nouveau/nouveau_sysfs.h
@@ -16,4 +16,6 @@ nouveau_sysfs(struct drm_device *dev)
 int  nouveau_sysfs_init(struct drm_device *);
 void nouveau_sysfs_fini(struct drm_device *);
 
+extern int nouveau_pstate;
+
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 53874b76b031..e81d086577ce 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -71,8 +71,7 @@ nouveau_vram_manager_del(struct ttm_mem_type_manager *man,
 static int
 nouveau_vram_manager_new(struct ttm_mem_type_manager *man,
 			 struct ttm_buffer_object *bo,
-			 struct ttm_placement *placement,
-			 uint32_t flags,
+			 const struct ttm_place *place,
 			 struct ttm_mem_reg *mem)
 {
 	struct nouveau_drm *drm = nouveau_bdev(man->bdev);
@@ -158,8 +157,7 @@ nouveau_gart_manager_del(struct ttm_mem_type_manager *man,
 static int
 nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 			 struct ttm_buffer_object *bo,
-			 struct ttm_placement *placement,
-			 uint32_t flags,
+			 const struct ttm_place *place,
 			 struct ttm_mem_reg *mem)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
@@ -239,8 +237,7 @@ nv04_gart_manager_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem)
 static int
 nv04_gart_manager_new(struct ttm_mem_type_manager *man,
 		      struct ttm_buffer_object *bo,
-		      struct ttm_placement *placement,
-		      uint32_t flags,
+		      const struct ttm_place *place,
 		      struct ttm_mem_reg *mem)
 {
 	struct nouveau_mem *node;
diff --git a/drivers/gpu/drm/nouveau/nv04_fence.c b/drivers/gpu/drm/nouveau/nv04_fence.c
index 239c2c5a9615..4484131d826a 100644
--- a/drivers/gpu/drm/nouveau/nv04_fence.c
+++ b/drivers/gpu/drm/nouveau/nv04_fence.c
@@ -41,7 +41,7 @@ nv04_fence_emit(struct nouveau_fence *fence)
 	int ret = RING_SPACE(chan, 2);
 	if (ret == 0) {
 		BEGIN_NV04(chan, NvSubSw, 0x0150, 1);
-		OUT_RING  (chan, fence->sequence);
+		OUT_RING  (chan, fence->base.seqno);
 		FIRE_RING (chan);
 	}
 	return ret;
@@ -75,7 +75,7 @@ nv04_fence_context_new(struct nouveau_channel *chan)
 {
 	struct nv04_fence_chan *fctx = kzalloc(sizeof(*fctx), GFP_KERNEL);
 	if (fctx) {
-		nouveau_fence_context_new(&fctx->base);
+		nouveau_fence_context_new(chan, &fctx->base);
 		fctx->base.emit = nv04_fence_emit;
 		fctx->base.sync = nv04_fence_sync;
 		fctx->base.read = nv04_fence_read;
@@ -105,5 +105,7 @@ nv04_fence_create(struct nouveau_drm *drm)
 	priv->base.dtor = nv04_fence_destroy;
 	priv->base.context_new = nv04_fence_context_new;
 	priv->base.context_del = nv04_fence_context_del;
+	priv->base.contexts = 15;
+	priv->base.context_base = fence_context_alloc(priv->base.contexts);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv10_fence.c b/drivers/gpu/drm/nouveau/nv10_fence.c
index 4faaf0acf5d7..737d066ffc60 100644
--- a/drivers/gpu/drm/nouveau/nv10_fence.c
+++ b/drivers/gpu/drm/nouveau/nv10_fence.c
@@ -33,7 +33,7 @@ nv10_fence_emit(struct nouveau_fence *fence)
 	int ret = RING_SPACE(chan, 2);
 	if (ret == 0) {
 		BEGIN_NV04(chan, 0, NV10_SUBCHAN_REF_CNT, 1);
-		OUT_RING  (chan, fence->sequence);
+		OUT_RING  (chan, fence->base.seqno);
 		FIRE_RING (chan);
 	}
 	return ret;
@@ -75,7 +75,7 @@ nv10_fence_context_new(struct nouveau_channel *chan)
 	if (!fctx)
 		return -ENOMEM;
 
-	nouveau_fence_context_new(&fctx->base);
+	nouveau_fence_context_new(chan, &fctx->base);
 	fctx->base.emit = nv10_fence_emit;
 	fctx->base.read = nv10_fence_read;
 	fctx->base.sync = nv10_fence_sync;
@@ -106,6 +106,8 @@ nv10_fence_create(struct nouveau_drm *drm)
 	priv->base.dtor = nv10_fence_destroy;
 	priv->base.context_new = nv10_fence_context_new;
 	priv->base.context_del = nv10_fence_context_del;
+	priv->base.contexts = 31;
+	priv->base.context_base = fence_context_alloc(priv->base.contexts);
 	spin_lock_init(&priv->lock);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv17_fence.c b/drivers/gpu/drm/nouveau/nv17_fence.c
index ca907479f92f..6f9a1f8e2d0f 100644
--- a/drivers/gpu/drm/nouveau/nv17_fence.c
+++ b/drivers/gpu/drm/nouveau/nv17_fence.c
@@ -84,7 +84,7 @@ nv17_fence_context_new(struct nouveau_channel *chan)
 	if (!fctx)
 		return -ENOMEM;
 
-	nouveau_fence_context_new(&fctx->base);
+	nouveau_fence_context_new(chan, &fctx->base);
 	fctx->base.emit = nv10_fence_emit;
 	fctx->base.read = nv10_fence_read;
 	fctx->base.sync = nv17_fence_sync;
@@ -124,6 +124,8 @@ nv17_fence_create(struct nouveau_drm *drm)
 	priv->base.resume = nv17_fence_resume;
 	priv->base.context_new = nv17_fence_context_new;
 	priv->base.context_del = nv10_fence_context_del;
+	priv->base.contexts = 31;
+	priv->base.context_base = fence_context_alloc(priv->base.contexts);
 	spin_lock_init(&priv->lock);
 
 	ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 03949eaa629f..fdb3e1adea1e 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -1066,7 +1066,7 @@ nv50_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *umode,
 	u32 vscan = (mode->flags & DRM_MODE_FLAG_DBLSCAN) ? 2 : 1;
 	u32 hactive, hsynce, hbackp, hfrontp, hblanke, hblanks;
 	u32 vactive, vsynce, vbackp, vfrontp, vblanke, vblanks;
-	u32 vblan2e = 0, vblan2s = 1;
+	u32 vblan2e = 0, vblan2s = 1, vblankus = 0;
 	u32 *push;
 	int ret;
 
@@ -1083,6 +1083,11 @@ nv50_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *umode,
 	vblanke = vsynce + vbackp;
 	vfrontp = (mode->vsync_start - mode->vdisplay) * vscan / ilace;
 	vblanks = vactive - vfrontp - 1;
+	/* XXX: Safe underestimate, even "0" works */
+	vblankus = (vactive - mode->vdisplay - 2) * hactive;
+	vblankus *= 1000;
+	vblankus /= mode->clock;
+
 	if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
 		vblan2e = vactive + vsynce + vbackp;
 		vblan2s = vblan2e + (mode->vdisplay * vscan / ilace);
@@ -1099,14 +1104,14 @@ nv50_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *umode,
 			evo_mthd(push, 0x0804 + (nv_crtc->index * 0x400), 2);
 			evo_data(push, 0x00800000 | mode->clock);
 			evo_data(push, (ilace == 2) ? 2 : 0);
-			evo_mthd(push, 0x0810 + (nv_crtc->index * 0x400), 6);
+			evo_mthd(push, 0x0810 + (nv_crtc->index * 0x400), 8);
 			evo_data(push, 0x00000000);
 			evo_data(push, (vactive << 16) | hactive);
 			evo_data(push, ( vsynce << 16) | hsynce);
 			evo_data(push, (vblanke << 16) | hblanke);
 			evo_data(push, (vblanks << 16) | hblanks);
 			evo_data(push, (vblan2e << 16) | vblan2s);
-			evo_mthd(push, 0x082c + (nv_crtc->index * 0x400), 1);
+			evo_data(push, vblankus);
 			evo_data(push, 0x00000000);
 			evo_mthd(push, 0x0900 + (nv_crtc->index * 0x400), 2);
 			evo_data(push, 0x00000311);
@@ -1651,17 +1656,21 @@ static void
 nv50_audio_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode)
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
 	struct nouveau_connector *nv_connector;
 	struct nv50_disp *disp = nv50_disp(encoder->dev);
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_sor_hda_eld_v0 eld;
+	struct __packed {
+		struct {
+			struct nv50_disp_mthd_v1 mthd;
+			struct nv50_disp_sor_hda_eld_v0 eld;
+		} base;
 		u8 data[sizeof(nv_connector->base.eld)];
 	} args = {
-		.base.version = 1,
-		.base.method  = NV50_DISP_MTHD_V1_SOR_HDA_ELD,
-		.base.hasht   = nv_encoder->dcb->hasht,
-		.base.hashm   = nv_encoder->dcb->hashm,
+		.base.mthd.version = 1,
+		.base.mthd.method  = NV50_DISP_MTHD_V1_SOR_HDA_ELD,
+		.base.mthd.hasht   = nv_encoder->dcb->hasht,
+		.base.mthd.hashm   = (0xf0ff & nv_encoder->dcb->hashm) |
+				     (0x0100 << nv_crtc->index),
 	};
 
 	nv_connector = nouveau_encoder_connector_get(nv_encoder);
@@ -1671,11 +1680,11 @@ nv50_audio_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode)
 	drm_edid_to_eld(&nv_connector->base, nv_connector->edid);
 	memcpy(args.data, nv_connector->base.eld, sizeof(args.data));
 
-	nvif_mthd(disp->disp, 0, &args, sizeof(args));
+	nvif_mthd(disp->disp, 0, &args, sizeof(args.base) + args.data[2] * 4);
 }
 
 static void
-nv50_audio_disconnect(struct drm_encoder *encoder)
+nv50_audio_disconnect(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc)
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct nv50_disp *disp = nv50_disp(encoder->dev);
@@ -1686,7 +1695,8 @@ nv50_audio_disconnect(struct drm_encoder *encoder)
 		.base.version = 1,
 		.base.method  = NV50_DISP_MTHD_V1_SOR_HDA_ELD,
 		.base.hasht   = nv_encoder->dcb->hasht,
-		.base.hashm   = nv_encoder->dcb->hashm,
+		.base.hashm   = (0xf0ff & nv_encoder->dcb->hashm) |
+				(0x0100 << nv_crtc->index),
 	};
 
 	nvif_mthd(disp->disp, 0, &args, sizeof(args));
@@ -1745,8 +1755,6 @@ nv50_hdmi_disconnect(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc)
 			       (0x0100 << nv_crtc->index),
 	};
 
-	nv50_audio_disconnect(encoder);
-
 	nvif_mthd(disp->disp, 0, &args, sizeof(args));
 }
 
@@ -1855,6 +1863,7 @@ nv50_sor_disconnect(struct drm_encoder *encoder)
 	if (nv_crtc) {
 		nv50_crtc_prepare(&nv_crtc->base);
 		nv50_sor_ctrl(nv_encoder, 1 << nv_crtc->index, 0);
+		nv50_audio_disconnect(encoder, nv_crtc);
 		nv50_hdmi_disconnect(&nv_encoder->base.base, nv_crtc);
 	}
 }
@@ -1954,6 +1963,7 @@ nv50_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *umode,
 			proto = 0x8;
 		else
 			proto = 0x9;
+		nv50_audio_mode_set(encoder, mode);
 		break;
 	default:
 		BUG_ON(1);
diff --git a/drivers/gpu/drm/nouveau/nv50_fence.c b/drivers/gpu/drm/nouveau/nv50_fence.c
index 195cf51a7c31..08fad3668a1c 100644
--- a/drivers/gpu/drm/nouveau/nv50_fence.c
+++ b/drivers/gpu/drm/nouveau/nv50_fence.c
@@ -46,7 +46,7 @@ nv50_fence_context_new(struct nouveau_channel *chan)
 	if (!fctx)
 		return -ENOMEM;
 
-	nouveau_fence_context_new(&fctx->base);
+	nouveau_fence_context_new(chan, &fctx->base);
 	fctx->base.emit = nv10_fence_emit;
 	fctx->base.read = nv10_fence_read;
 	fctx->base.sync = nv17_fence_sync;
@@ -95,6 +95,8 @@ nv50_fence_create(struct nouveau_drm *drm)
 	priv->base.resume = nv17_fence_resume;
 	priv->base.context_new = nv50_fence_context_new;
 	priv->base.context_del = nv10_fence_context_del;
+	priv->base.contexts = 127;
+	priv->base.context_base = fence_context_alloc(priv->base.contexts);
 	spin_lock_init(&priv->lock);
 
 	ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c
index 933a779c93ab..a2f28082c272 100644
--- a/drivers/gpu/drm/nouveau/nv84_fence.c
+++ b/drivers/gpu/drm/nouveau/nv84_fence.c
@@ -82,7 +82,7 @@ nv84_fence_emit(struct nouveau_fence *fence)
 	else
 		addr += fctx->vma.offset;
 
-	return fctx->base.emit32(chan, addr, fence->sequence);
+	return fctx->base.emit32(chan, addr, fence->base.seqno);
 }
 
 static int
@@ -97,7 +97,7 @@ nv84_fence_sync(struct nouveau_fence *fence,
 	else
 		addr += fctx->vma.offset;
 
-	return fctx->base.sync32(chan, addr, fence->sequence);
+	return fctx->base.sync32(chan, addr, fence->base.seqno);
 }
 
 static u32
@@ -139,12 +139,13 @@ nv84_fence_context_new(struct nouveau_channel *chan)
 	if (!fctx)
 		return -ENOMEM;
 
-	nouveau_fence_context_new(&fctx->base);
+	nouveau_fence_context_new(chan, &fctx->base);
 	fctx->base.emit = nv84_fence_emit;
 	fctx->base.sync = nv84_fence_sync;
 	fctx->base.read = nv84_fence_read;
 	fctx->base.emit32 = nv84_fence_emit32;
 	fctx->base.sync32 = nv84_fence_sync32;
+	fctx->base.sequence = nv84_fence_read(chan);
 
 	ret = nouveau_bo_vma_add(priv->bo, cli->vm, &fctx->vma);
 	if (ret == 0) {
@@ -168,13 +169,12 @@ nv84_fence_context_new(struct nouveau_channel *chan)
 static bool
 nv84_fence_suspend(struct nouveau_drm *drm)
 {
-	struct nouveau_fifo *pfifo = nvkm_fifo(&drm->device);
 	struct nv84_fence_priv *priv = drm->fence;
 	int i;
 
-	priv->suspend = vmalloc((pfifo->max + 1) * sizeof(u32));
+	priv->suspend = vmalloc(priv->base.contexts * sizeof(u32));
 	if (priv->suspend) {
-		for (i = 0; i <= pfifo->max; i++)
+		for (i = 0; i < priv->base.contexts; i++)
 			priv->suspend[i] = nouveau_bo_rd32(priv->bo, i*4);
 	}
 
@@ -184,12 +184,11 @@ nv84_fence_suspend(struct nouveau_drm *drm)
 static void
 nv84_fence_resume(struct nouveau_drm *drm)
 {
-	struct nouveau_fifo *pfifo = nvkm_fifo(&drm->device);
 	struct nv84_fence_priv *priv = drm->fence;
 	int i;
 
 	if (priv->suspend) {
-		for (i = 0; i <= pfifo->max; i++)
+		for (i = 0; i < priv->base.contexts; i++)
 			nouveau_bo_wr32(priv->bo, i*4, priv->suspend[i]);
 		vfree(priv->suspend);
 		priv->suspend = NULL;
@@ -229,10 +228,11 @@ nv84_fence_create(struct nouveau_drm *drm)
 	priv->base.context_new = nv84_fence_context_new;
 	priv->base.context_del = nv84_fence_context_del;
 
-	init_waitqueue_head(&priv->base.waiting);
+	priv->base.contexts = pfifo->max + 1;
+	priv->base.context_base = fence_context_alloc(priv->base.contexts);
 	priv->base.uevent = true;
 
-	ret = nouveau_bo_new(drm->dev, 16 * (pfifo->max + 1), 0,
+	ret = nouveau_bo_new(drm->dev, 16 * priv->base.contexts, 0,
 			     TTM_PL_FLAG_VRAM, 0, 0, NULL, &priv->bo);
 	if (ret == 0) {
 		ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM);
@@ -246,7 +246,7 @@ nv84_fence_create(struct nouveau_drm *drm)
 	}
 
 	if (ret == 0)
-		ret = nouveau_bo_new(drm->dev, 16 * (pfifo->max + 1), 0,
+		ret = nouveau_bo_new(drm->dev, 16 * priv->base.contexts, 0,
 				     TTM_PL_FLAG_TT, 0, 0, NULL,
 				     &priv->bo_gart);
 	if (ret == 0) {
diff --git a/drivers/gpu/drm/nouveau/nvif/class.h b/drivers/gpu/drm/nouveau/nvif/class.h
index 573491f84792..e5a27df0672b 100644
--- a/drivers/gpu/drm/nouveau/nvif/class.h
+++ b/drivers/gpu/drm/nouveau/nvif/class.h
@@ -479,6 +479,8 @@ struct nv50_disp_core_channel_dma_v0 {
 	__u32 pushbuf;
 };
 
+#define NV50_DISP_CORE_CHANNEL_DMA_V0_NTFY_UEVENT                          0x00
+
 /* cursor immediate */
 struct nv50_disp_cursor_v0 {
 	__u8  version;
@@ -486,6 +488,8 @@ struct nv50_disp_cursor_v0 {
 	__u8  pad02[6];
 };
 
+#define NV50_DISP_CURSOR_V0_NTFY_UEVENT                                    0x00
+
 /* base */
 struct nv50_disp_base_channel_dma_v0 {
 	__u8  version;
@@ -494,6 +498,8 @@ struct nv50_disp_base_channel_dma_v0 {
 	__u32 pushbuf;
 };
 
+#define NV50_DISP_BASE_CHANNEL_DMA_V0_NTFY_UEVENT                          0x00
+
 /* overlay */
 struct nv50_disp_overlay_channel_dma_v0 {
 	__u8  version;
@@ -502,6 +508,8 @@ struct nv50_disp_overlay_channel_dma_v0 {
 	__u32 pushbuf;
 };
 
+#define NV50_DISP_OVERLAY_CHANNEL_DMA_V0_NTFY_UEVENT                       0x00
+
 /* overlay immediate */
 struct nv50_disp_overlay_v0 {
 	__u8  version;
@@ -509,6 +517,7 @@ struct nv50_disp_overlay_v0 {
 	__u8  pad02[6];
 };
 
+#define NV50_DISP_OVERLAY_V0_NTFY_UEVENT                                   0x00
 
 /*******************************************************************************
  * fermi
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
index 002b9721e85a..862ba03c236c 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.c
+++ b/drivers/gpu/drm/omapdrm/omap_drv.c
@@ -629,6 +629,7 @@ static struct drm_driver omap_drm_driver = {
 		.lastclose = dev_lastclose,
 		.preclose = dev_preclose,
 		.postclose = dev_postclose,
+		.set_busid = drm_platform_set_busid,
 		.get_vblank_counter = drm_vblank_count,
 		.enable_vblank = omap_irq_enable_vblank,
 		.disable_vblank = omap_irq_disable_vblank,
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 4ce1db0a68ff..23de22f8c820 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -352,6 +352,30 @@ static const struct panel_desc auo_b101aw03 = {
 	},
 };
 
+static const struct drm_display_mode auo_b101xtn01_mode = {
+	.clock = 72000,
+	.hdisplay = 1366,
+	.hsync_start = 1366 + 20,
+	.hsync_end = 1366 + 20 + 70,
+	.htotal = 1366 + 20 + 70,
+	.vdisplay = 768,
+	.vsync_start = 768 + 14,
+	.vsync_end = 768 + 14 + 42,
+	.vtotal = 768 + 14 + 42,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static const struct panel_desc auo_b101xtn01 = {
+	.modes = &auo_b101xtn01_mode,
+	.num_modes = 1,
+	.bpc = 6,
+	.size = {
+		.width = 223,
+		.height = 125,
+	},
+};
+
 static const struct drm_display_mode auo_b133xtn01_mode = {
 	.clock = 69500,
 	.hdisplay = 1366,
@@ -616,6 +640,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "auo,b101aw03",
 		.data = &auo_b101aw03,
 	}, {
+		.compatible = "auo,b101xtn01",
+		.data = &auo_b101xtn01,
+	}, {
 		.compatible = "auo,b133htn01",
 		.data = &auo_b133htn01,
 	}, {
diff --git a/drivers/gpu/drm/qxl/Makefile b/drivers/gpu/drm/qxl/Makefile
index ea046ba691d2..bacc4aff1201 100644
--- a/drivers/gpu/drm/qxl/Makefile
+++ b/drivers/gpu/drm/qxl/Makefile
@@ -4,6 +4,6 @@
 
 ccflags-y := -Iinclude/drm
 
-qxl-y := qxl_drv.o qxl_kms.o qxl_display.o qxl_ttm.o qxl_fb.o qxl_object.o qxl_gem.o qxl_cmd.o qxl_image.o qxl_draw.o qxl_debugfs.o qxl_irq.o qxl_dumb.o qxl_ioctl.o qxl_fence.o qxl_release.o
+qxl-y := qxl_drv.o qxl_kms.o qxl_display.o qxl_ttm.o qxl_fb.o qxl_object.o qxl_gem.o qxl_cmd.o qxl_image.o qxl_draw.o qxl_debugfs.o qxl_irq.o qxl_dumb.o qxl_ioctl.o qxl_release.o qxl_prime.o
 
 obj-$(CONFIG_DRM_QXL)+= qxl.o
diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c
index eb89653a7a17..97823644d347 100644
--- a/drivers/gpu/drm/qxl/qxl_cmd.c
+++ b/drivers/gpu/drm/qxl/qxl_cmd.c
@@ -620,17 +620,10 @@ static int qxl_reap_surf(struct qxl_device *qdev, struct qxl_bo *surf, bool stal
 	if (ret == -EBUSY)
 		return -EBUSY;
 
-	if (surf->fence.num_active_releases > 0 && stall == false) {
-		qxl_bo_unreserve(surf);
-		return -EBUSY;
-	}
-
 	if (stall)
 		mutex_unlock(&qdev->surf_evict_mutex);
 
-	spin_lock(&surf->tbo.bdev->fence_lock);
 	ret = ttm_bo_wait(&surf->tbo, true, true, !stall);
-	spin_unlock(&surf->tbo.bdev->fence_lock);
 
 	if (stall)
 		mutex_lock(&qdev->surf_evict_mutex);
diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c
index c3c2bbdc6674..6911b8c44492 100644
--- a/drivers/gpu/drm/qxl/qxl_debugfs.c
+++ b/drivers/gpu/drm/qxl/qxl_debugfs.c
@@ -58,9 +58,17 @@ qxl_debugfs_buffers_info(struct seq_file *m, void *data)
 	struct qxl_bo *bo;
 
 	list_for_each_entry(bo, &qdev->gem.objects, list) {
-		seq_printf(m, "size %ld, pc %d, sync obj %p, num releases %d\n",
-			   (unsigned long)bo->gem_base.size, bo->pin_count,
-			   bo->tbo.sync_obj, bo->fence.num_active_releases);
+		struct reservation_object_list *fobj;
+		int rel;
+
+		rcu_read_lock();
+		fobj = rcu_dereference(bo->tbo.resv->fence);
+		rel = fobj ? fobj->shared_count : 0;
+		rcu_read_unlock();
+
+		seq_printf(m, "size %ld, pc %d, num releases %d\n",
+			   (unsigned long)bo->gem_base.size,
+			   bo->pin_count, rel);
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index b8ced08b6291..af9e78546688 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -187,6 +187,54 @@ static void qxl_crtc_destroy(struct drm_crtc *crtc)
 	kfree(qxl_crtc);
 }
 
+static int qxl_crtc_page_flip(struct drm_crtc *crtc,
+                              struct drm_framebuffer *fb,
+                              struct drm_pending_vblank_event *event,
+                              uint32_t page_flip_flags)
+{
+	struct drm_device *dev = crtc->dev;
+	struct qxl_device *qdev = dev->dev_private;
+	struct qxl_crtc *qcrtc = to_qxl_crtc(crtc);
+	struct qxl_framebuffer *qfb_src = to_qxl_framebuffer(fb);
+	struct qxl_framebuffer *qfb_old = to_qxl_framebuffer(crtc->primary->fb);
+	struct qxl_bo *bo_old = gem_to_qxl_bo(qfb_old->obj);
+	struct qxl_bo *bo = gem_to_qxl_bo(qfb_src->obj);
+	unsigned long flags;
+	struct drm_clip_rect norect = {
+	    .x1 = 0,
+	    .y1 = 0,
+	    .x2 = fb->width,
+	    .y2 = fb->height
+	};
+	int inc = 1;
+	int one_clip_rect = 1;
+	int ret = 0;
+
+	crtc->primary->fb = fb;
+	bo_old->is_primary = false;
+	bo->is_primary = true;
+
+	ret = qxl_bo_reserve(bo, false);
+	if (ret)
+		return ret;
+
+	qxl_draw_dirty_fb(qdev, qfb_src, bo, 0, 0,
+			  &norect, one_clip_rect, inc);
+
+	drm_vblank_get(dev, qcrtc->index);
+
+	if (event) {
+		spin_lock_irqsave(&dev->event_lock, flags);
+		drm_send_vblank_event(dev, qcrtc->index, event);
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+	}
+	drm_vblank_put(dev, qcrtc->index);
+
+	qxl_bo_unreserve(bo);
+
+	return 0;
+}
+
 static int
 qxl_hide_cursor(struct qxl_device *qdev)
 {
@@ -374,6 +422,7 @@ static const struct drm_crtc_funcs qxl_crtc_funcs = {
 	.cursor_move = qxl_crtc_cursor_move,
 	.set_config = drm_crtc_helper_set_config,
 	.destroy = qxl_crtc_destroy,
+	.page_flip = qxl_crtc_page_flip,
 };
 
 static void qxl_user_framebuffer_destroy(struct drm_framebuffer *fb)
diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index a3fd92029a14..1d9b80c91a15 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -84,6 +84,7 @@ static const struct file_operations qxl_fops = {
 	.release = drm_release,
 	.unlocked_ioctl = drm_ioctl,
 	.poll = drm_poll,
+	.read = drm_read,
 	.mmap = qxl_mmap,
 };
 
@@ -195,6 +196,20 @@ static int qxl_pm_restore(struct device *dev)
 	return qxl_drm_resume(drm_dev, false);
 }
 
+static u32 qxl_noop_get_vblank_counter(struct drm_device *dev, int crtc)
+{
+	return dev->vblank[crtc].count.counter;
+}
+
+static int qxl_noop_enable_vblank(struct drm_device *dev, int crtc)
+{
+	return 0;
+}
+
+static void qxl_noop_disable_vblank(struct drm_device *dev, int crtc)
+{
+}
+
 static const struct dev_pm_ops qxl_pm_ops = {
 	.suspend = qxl_pm_suspend,
 	.resume = qxl_pm_resume,
@@ -212,10 +227,15 @@ static struct pci_driver qxl_pci_driver = {
 };
 
 static struct drm_driver qxl_driver = {
-	.driver_features = DRIVER_GEM | DRIVER_MODESET |
+	.driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME |
 			   DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED,
 	.load = qxl_driver_load,
 	.unload = qxl_driver_unload,
+	.get_vblank_counter = qxl_noop_get_vblank_counter,
+	.enable_vblank = qxl_noop_enable_vblank,
+	.disable_vblank = qxl_noop_disable_vblank,
+
+	.set_busid = drm_pci_set_busid,
 
 	.dumb_create = qxl_mode_dumb_create,
 	.dumb_map_offset = qxl_mode_dumb_mmap,
@@ -224,6 +244,17 @@ static struct drm_driver qxl_driver = {
 	.debugfs_init = qxl_debugfs_init,
 	.debugfs_cleanup = qxl_debugfs_takedown,
 #endif
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+	.gem_prime_export = drm_gem_prime_export,
+	.gem_prime_import = drm_gem_prime_import,
+	.gem_prime_pin = qxl_gem_prime_pin,
+	.gem_prime_unpin = qxl_gem_prime_unpin,
+	.gem_prime_get_sg_table = qxl_gem_prime_get_sg_table,
+	.gem_prime_import_sg_table = qxl_gem_prime_import_sg_table,
+	.gem_prime_vmap = qxl_gem_prime_vmap,
+	.gem_prime_vunmap = qxl_gem_prime_vunmap,
+	.gem_prime_mmap = qxl_gem_prime_mmap,
 	.gem_free_object = qxl_gem_object_free,
 	.gem_open_object = qxl_gem_object_open,
 	.gem_close_object = qxl_gem_object_close,
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 36ed40ba773f..d75c0a9f674f 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -31,6 +31,7 @@
  * Definitions taken from spice-protocol, plus kernel driver specific bits.
  */
 
+#include <linux/fence.h>
 #include <linux/workqueue.h>
 #include <linux/firmware.h>
 #include <linux/platform_device.h>
@@ -95,31 +96,24 @@ enum {
 	QXL_INTERRUPT_IO_CMD |\
 	QXL_INTERRUPT_CLIENT_MONITORS_CONFIG)
 
-struct qxl_fence {
-	struct qxl_device *qdev;
-	uint32_t num_active_releases;
-	uint32_t *release_ids;
-	struct radix_tree_root tree;
-};
-
 struct qxl_bo {
 	/* Protected by gem.mutex */
 	struct list_head		list;
 	/* Protected by tbo.reserved */
-	u32				placements[3];
+	struct ttm_place		placements[3];
 	struct ttm_placement		placement;
 	struct ttm_buffer_object	tbo;
 	struct ttm_bo_kmap_obj		kmap;
 	unsigned			pin_count;
 	void				*kptr;
 	int                             type;
+
 	/* Constant after initialization */
 	struct drm_gem_object		gem_base;
 	bool is_primary; /* is this now a primary surface */
 	bool hw_surf_alloc;
 	struct qxl_surface surf;
 	uint32_t surface_id;
-	struct qxl_fence fence; /* per bo fence  - list of releases */
 	struct qxl_release *surf_create;
 };
 #define gem_to_qxl_bo(gobj) container_of((gobj), struct qxl_bo, gem_base)
@@ -191,6 +185,8 @@ enum {
  * spice-protocol/qxl_dev.h */
 #define QXL_MAX_RES 96
 struct qxl_release {
+	struct fence base;
+
 	int id;
 	int type;
 	uint32_t release_offset;
@@ -284,7 +280,9 @@ struct qxl_device {
 	uint8_t		slot_gen_bits;
 	uint64_t	va_slot_mask;
 
+	spinlock_t	release_lock;
 	struct idr	release_idr;
+	uint32_t	release_seqno;
 	spinlock_t release_idr_lock;
 	struct mutex	async_io_mutex;
 	unsigned int last_sent_io_cmd;
@@ -532,6 +530,18 @@ int qxl_garbage_collect(struct qxl_device *qdev);
 int qxl_debugfs_init(struct drm_minor *minor);
 void qxl_debugfs_takedown(struct drm_minor *minor);
 
+/* qxl_prime.c */
+int qxl_gem_prime_pin(struct drm_gem_object *obj);
+void qxl_gem_prime_unpin(struct drm_gem_object *obj);
+struct sg_table *qxl_gem_prime_get_sg_table(struct drm_gem_object *obj);
+struct drm_gem_object *qxl_gem_prime_import_sg_table(
+	struct drm_device *dev, size_t size,
+	struct sg_table *sgt);
+void *qxl_gem_prime_vmap(struct drm_gem_object *obj);
+void qxl_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
+int qxl_gem_prime_mmap(struct drm_gem_object *obj,
+				struct vm_area_struct *vma);
+
 /* qxl_irq.c */
 int qxl_irq_init(struct qxl_device *qdev);
 irqreturn_t qxl_irq_handler(int irq, void *arg);
@@ -561,10 +571,4 @@ qxl_surface_lookup(struct drm_device *dev, int surface_id);
 void qxl_surface_evict(struct qxl_device *qdev, struct qxl_bo *surf, bool freeing);
 int qxl_update_surface(struct qxl_device *qdev, struct qxl_bo *surf);
 
-/* qxl_fence.c */
-void qxl_fence_add_release_locked(struct qxl_fence *qfence, uint32_t rel_id);
-int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id);
-int qxl_fence_init(struct qxl_device *qdev, struct qxl_fence *qfence);
-void qxl_fence_fini(struct qxl_fence *qfence);
-
 #endif
diff --git a/drivers/gpu/drm/qxl/qxl_fence.c b/drivers/gpu/drm/qxl/qxl_fence.c
deleted file mode 100644
index ae59e91cfb9a..000000000000
--- a/drivers/gpu/drm/qxl/qxl_fence.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright 2013 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Dave Airlie
- *          Alon Levy
- */
-
-
-#include "qxl_drv.h"
-
-/* QXL fencing-
-
-   When we submit operations to the GPU we pass a release reference to the GPU
-   with them, the release reference is then added to the release ring when
-   the GPU is finished with that particular operation and has removed it from
-   its tree.
-
-   So we have can have multiple outstanding non linear fences per object.
-
-   From a TTM POV we only care if the object has any outstanding releases on
-   it.
-
-   we wait until all outstanding releases are processeed.
-
-   sync object is just a list of release ids that represent that fence on
-   that buffer.
-
-   we just add new releases onto the sync object attached to the object.
-
-   This currently uses a radix tree to store the list of release ids.
-
-   For some reason every so often qxl hw fails to release, things go wrong.
-*/
-/* must be called with the fence lock held */
-void qxl_fence_add_release_locked(struct qxl_fence *qfence, uint32_t rel_id)
-{
-	radix_tree_insert(&qfence->tree, rel_id, qfence);
-	qfence->num_active_releases++;
-}
-
-int qxl_fence_remove_release(struct qxl_fence *qfence, uint32_t rel_id)
-{
-	void *ret;
-	int retval = 0;
-	struct qxl_bo *bo = container_of(qfence, struct qxl_bo, fence);
-
-	spin_lock(&bo->tbo.bdev->fence_lock);
-
-	ret = radix_tree_delete(&qfence->tree, rel_id);
-	if (ret == qfence)
-		qfence->num_active_releases--;
-	else {
-		DRM_DEBUG("didn't find fence in radix tree for %d\n", rel_id);
-		retval = -ENOENT;
-	}
-	spin_unlock(&bo->tbo.bdev->fence_lock);
-	return retval;
-}
-
-
-int qxl_fence_init(struct qxl_device *qdev, struct qxl_fence *qfence)
-{
-	qfence->qdev = qdev;
-	qfence->num_active_releases = 0;
-	INIT_RADIX_TREE(&qfence->tree, GFP_ATOMIC);
-	return 0;
-}
-
-void qxl_fence_fini(struct qxl_fence *qfence)
-{
-	kfree(qfence->release_ids);
-	qfence->num_active_releases = 0;
-}
diff --git a/drivers/gpu/drm/qxl/qxl_kms.c b/drivers/gpu/drm/qxl/qxl_kms.c
index fd88eb4a3f79..b2977a181935 100644
--- a/drivers/gpu/drm/qxl/qxl_kms.c
+++ b/drivers/gpu/drm/qxl/qxl_kms.c
@@ -223,6 +223,7 @@ static int qxl_device_init(struct qxl_device *qdev,
 
 	idr_init(&qdev->release_idr);
 	spin_lock_init(&qdev->release_idr_lock);
+	spin_lock_init(&qdev->release_lock);
 
 	idr_init(&qdev->surf_id_idr);
 	spin_lock_init(&qdev->surf_id_idr_lock);
@@ -297,6 +298,9 @@ int qxl_driver_unload(struct drm_device *dev)
 
 	if (qdev == NULL)
 		return 0;
+
+	drm_vblank_cleanup(dev);
+
 	qxl_modeset_fini(qdev);
 	qxl_device_fini(qdev);
 
@@ -324,15 +328,20 @@ int qxl_driver_load(struct drm_device *dev, unsigned long flags)
 	if (r)
 		goto out;
 
+	r = drm_vblank_init(dev, 1);
+	if (r)
+		goto unload;
+
 	r = qxl_modeset_init(qdev);
-	if (r) {
-		qxl_driver_unload(dev);
-		goto out;
-	}
+	if (r)
+		goto unload;
 
 	drm_kms_helper_poll_init(qdev->ddev);
 
 	return 0;
+unload:
+	qxl_driver_unload(dev);
+
 out:
 	kfree(qdev);
 	return r;
diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c
index b95f144f0b49..69c104c3240f 100644
--- a/drivers/gpu/drm/qxl/qxl_object.c
+++ b/drivers/gpu/drm/qxl/qxl_object.c
@@ -36,7 +36,6 @@ static void qxl_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 	qdev = (struct qxl_device *)bo->gem_base.dev->dev_private;
 
 	qxl_surface_evict(qdev, bo, false);
-	qxl_fence_fini(&bo->fence);
 	mutex_lock(&qdev->gem.mutex);
 	list_del_init(&bo->list);
 	mutex_unlock(&qdev->gem.mutex);
@@ -55,21 +54,24 @@ void qxl_ttm_placement_from_domain(struct qxl_bo *qbo, u32 domain, bool pinned)
 {
 	u32 c = 0;
 	u32 pflag = pinned ? TTM_PL_FLAG_NO_EVICT : 0;
+	unsigned i;
 
-	qbo->placement.fpfn = 0;
-	qbo->placement.lpfn = 0;
 	qbo->placement.placement = qbo->placements;
 	qbo->placement.busy_placement = qbo->placements;
 	if (domain == QXL_GEM_DOMAIN_VRAM)
-		qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_VRAM | pflag;
+		qbo->placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_VRAM | pflag;
 	if (domain == QXL_GEM_DOMAIN_SURFACE)
-		qbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_PRIV0 | pflag;
+		qbo->placements[c++].flags = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_PRIV0 | pflag;
 	if (domain == QXL_GEM_DOMAIN_CPU)
-		qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM | pflag;
+		qbo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM | pflag;
 	if (!c)
-		qbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+		qbo->placements[c++].flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
 	qbo->placement.num_placement = c;
 	qbo->placement.num_busy_placement = c;
+	for (i = 0; i < c; ++i) {
+		qbo->placements[i].fpfn = 0;
+		qbo->placements[i].lpfn = 0;
+	}
 }
 
 
@@ -99,7 +101,6 @@ int qxl_bo_create(struct qxl_device *qdev,
 	bo->type = domain;
 	bo->pin_count = pinned ? 1 : 0;
 	bo->surface_id = 0;
-	qxl_fence_init(qdev, &bo->fence);
 	INIT_LIST_HEAD(&bo->list);
 
 	if (surf)
@@ -259,7 +260,7 @@ int qxl_bo_unpin(struct qxl_bo *bo)
 	if (bo->pin_count)
 		return 0;
 	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
+		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (unlikely(r != 0))
 		dev_err(qdev->dev, "%p validate failed for unpin\n", bo);
diff --git a/drivers/gpu/drm/qxl/qxl_object.h b/drivers/gpu/drm/qxl/qxl_object.h
index 83a423293afd..37af1bc0dd00 100644
--- a/drivers/gpu/drm/qxl/qxl_object.h
+++ b/drivers/gpu/drm/qxl/qxl_object.h
@@ -76,12 +76,10 @@ static inline int qxl_bo_wait(struct qxl_bo *bo, u32 *mem_type,
 		}
 		return r;
 	}
-	spin_lock(&bo->tbo.bdev->fence_lock);
 	if (mem_type)
 		*mem_type = bo->tbo.mem.mem_type;
-	if (bo->tbo.sync_obj)
-		r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
-	spin_unlock(&bo->tbo.bdev->fence_lock);
+
+	r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
 	ttm_bo_unreserve(&bo->tbo);
 	return r;
 }
diff --git a/drivers/gpu/drm/qxl/qxl_prime.c b/drivers/gpu/drm/qxl/qxl_prime.c
new file mode 100644
index 000000000000..ba0689c728e8
--- /dev/null
+++ b/drivers/gpu/drm/qxl/qxl_prime.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2014 Canonical
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Andreas Pokorny
+ */
+
+#include "qxl_drv.h"
+
+/* Empty Implementations as there should not be any other driver for a virtual
+ * device that might share buffers with qxl */
+
+int qxl_gem_prime_pin(struct drm_gem_object *obj)
+{
+	WARN_ONCE(1, "not implemented");
+	return -ENOSYS;
+}
+
+void qxl_gem_prime_unpin(struct drm_gem_object *obj)
+{
+	WARN_ONCE(1, "not implemented");
+}
+
+
+struct sg_table *qxl_gem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+	WARN_ONCE(1, "not implemented");
+	return ERR_PTR(-ENOSYS);
+}
+
+struct drm_gem_object *qxl_gem_prime_import_sg_table(
+	struct drm_device *dev, size_t size,
+	struct sg_table *table)
+{
+	WARN_ONCE(1, "not implemented");
+	return ERR_PTR(-ENOSYS);
+}
+
+void *qxl_gem_prime_vmap(struct drm_gem_object *obj)
+{
+	WARN_ONCE(1, "not implemented");
+	return ERR_PTR(-ENOSYS);
+}
+
+void qxl_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
+{
+	WARN_ONCE(1, "not implemented");
+}
+
+int qxl_gem_prime_mmap(struct drm_gem_object *obj,
+		       struct vm_area_struct *area)
+{
+	WARN_ONCE(1, "not implemented");
+	return ENOSYS;
+}
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 14e776f1d14e..446e71ca36cb 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -21,6 +21,7 @@
  */
 #include "qxl_drv.h"
 #include "qxl_object.h"
+#include <trace/events/fence.h>
 
 /*
  * drawable cmd cache - allocate a bunch of VRAM pages, suballocate
@@ -39,6 +40,88 @@
 static const int release_size_per_bo[] = { RELEASE_SIZE, SURFACE_RELEASE_SIZE, RELEASE_SIZE };
 static const int releases_per_bo[] = { RELEASES_PER_BO, SURFACE_RELEASES_PER_BO, RELEASES_PER_BO };
 
+static const char *qxl_get_driver_name(struct fence *fence)
+{
+	return "qxl";
+}
+
+static const char *qxl_get_timeline_name(struct fence *fence)
+{
+	return "release";
+}
+
+static bool qxl_nop_signaling(struct fence *fence)
+{
+	/* fences are always automatically signaled, so just pretend we did this.. */
+	return true;
+}
+
+static long qxl_fence_wait(struct fence *fence, bool intr, signed long timeout)
+{
+	struct qxl_device *qdev;
+	struct qxl_release *release;
+	int count = 0, sc = 0;
+	bool have_drawable_releases;
+	unsigned long cur, end = jiffies + timeout;
+
+	qdev = container_of(fence->lock, struct qxl_device, release_lock);
+	release = container_of(fence, struct qxl_release, base);
+	have_drawable_releases = release->type == QXL_RELEASE_DRAWABLE;
+
+retry:
+	sc++;
+
+	if (fence_is_signaled(fence))
+		goto signaled;
+
+	qxl_io_notify_oom(qdev);
+
+	for (count = 0; count < 11; count++) {
+		if (!qxl_queue_garbage_collect(qdev, true))
+			break;
+
+		if (fence_is_signaled(fence))
+			goto signaled;
+	}
+
+	if (fence_is_signaled(fence))
+		goto signaled;
+
+	if (have_drawable_releases || sc < 4) {
+		if (sc > 2)
+			/* back off */
+			usleep_range(500, 1000);
+
+		if (time_after(jiffies, end))
+			return 0;
+
+		if (have_drawable_releases && sc > 300) {
+			FENCE_WARN(fence, "failed to wait on release %d "
+					  "after spincount %d\n",
+					  fence->context & ~0xf0000000, sc);
+			goto signaled;
+		}
+		goto retry;
+	}
+	/*
+	 * yeah, original sync_obj_wait gave up after 3 spins when
+	 * have_drawable_releases is not set.
+	 */
+
+signaled:
+	cur = jiffies;
+	if (time_after(cur, end))
+		return 0;
+	return end - cur;
+}
+
+static const struct fence_ops qxl_fence_ops = {
+	.get_driver_name = qxl_get_driver_name,
+	.get_timeline_name = qxl_get_timeline_name,
+	.enable_signaling = qxl_nop_signaling,
+	.wait = qxl_fence_wait,
+};
+
 static uint64_t
 qxl_release_alloc(struct qxl_device *qdev, int type,
 		  struct qxl_release **ret)
@@ -46,13 +129,13 @@ qxl_release_alloc(struct qxl_device *qdev, int type,
 	struct qxl_release *release;
 	int handle;
 	size_t size = sizeof(*release);
-	int idr_ret;
 
 	release = kmalloc(size, GFP_KERNEL);
 	if (!release) {
 		DRM_ERROR("Out of memory\n");
 		return 0;
 	}
+	release->base.ops = NULL;
 	release->type = type;
 	release->release_offset = 0;
 	release->surface_release_id = 0;
@@ -60,44 +143,61 @@ qxl_release_alloc(struct qxl_device *qdev, int type,
 
 	idr_preload(GFP_KERNEL);
 	spin_lock(&qdev->release_idr_lock);
-	idr_ret = idr_alloc(&qdev->release_idr, release, 1, 0, GFP_NOWAIT);
+	handle = idr_alloc(&qdev->release_idr, release, 1, 0, GFP_NOWAIT);
+	release->base.seqno = ++qdev->release_seqno;
 	spin_unlock(&qdev->release_idr_lock);
 	idr_preload_end();
-	handle = idr_ret;
-	if (idr_ret < 0)
-		goto release_fail;
+	if (handle < 0) {
+		kfree(release);
+		*ret = NULL;
+		return handle;
+	}
 	*ret = release;
 	QXL_INFO(qdev, "allocated release %lld\n", handle);
 	release->id = handle;
-release_fail:
-
 	return handle;
 }
 
+static void
+qxl_release_free_list(struct qxl_release *release)
+{
+	while (!list_empty(&release->bos)) {
+		struct qxl_bo_list *entry;
+		struct qxl_bo *bo;
+
+		entry = container_of(release->bos.next,
+				     struct qxl_bo_list, tv.head);
+		bo = to_qxl_bo(entry->tv.bo);
+		qxl_bo_unref(&bo);
+		list_del(&entry->tv.head);
+		kfree(entry);
+	}
+}
+
 void
 qxl_release_free(struct qxl_device *qdev,
 		 struct qxl_release *release)
 {
-	struct qxl_bo_list *entry, *tmp;
 	QXL_INFO(qdev, "release %d, type %d\n", release->id,
 		 release->type);
 
 	if (release->surface_release_id)
 		qxl_surface_id_dealloc(qdev, release->surface_release_id);
 
-	list_for_each_entry_safe(entry, tmp, &release->bos, tv.head) {
-		struct qxl_bo *bo = to_qxl_bo(entry->tv.bo);
-		QXL_INFO(qdev, "release %llx\n",
-			drm_vma_node_offset_addr(&entry->tv.bo->vma_node)
-						- DRM_FILE_OFFSET);
-		qxl_fence_remove_release(&bo->fence, release->id);
-		qxl_bo_unref(&bo);
-		kfree(entry);
-	}
 	spin_lock(&qdev->release_idr_lock);
 	idr_remove(&qdev->release_idr, release->id);
 	spin_unlock(&qdev->release_idr_lock);
-	kfree(release);
+
+	if (release->base.ops) {
+		WARN_ON(list_empty(&release->bos));
+		qxl_release_free_list(release);
+
+		fence_signal(&release->base);
+		fence_put(&release->base);
+	} else {
+		qxl_release_free_list(release);
+		kfree(release);
+	}
 }
 
 static int qxl_release_bo_alloc(struct qxl_device *qdev,
@@ -126,6 +226,7 @@ int qxl_release_list_add(struct qxl_release *release, struct qxl_bo *bo)
 
 	qxl_bo_ref(bo);
 	entry->tv.bo = &bo->tbo;
+	entry->tv.shared = false;
 	list_add_tail(&entry->tv.head, &release->bos);
 	return 0;
 }
@@ -142,6 +243,10 @@ static int qxl_release_validate_bo(struct qxl_bo *bo)
 			return ret;
 	}
 
+	ret = reservation_object_reserve_shared(bo->tbo.resv);
+	if (ret)
+		return ret;
+
 	/* allocate a surface for reserved + validated buffers */
 	ret = qxl_bo_check_id(bo->gem_base.dev->dev_private, bo);
 	if (ret)
@@ -159,7 +264,7 @@ int qxl_release_reserve_list(struct qxl_release *release, bool no_intr)
 	if (list_is_singular(&release->bos))
 		return 0;
 
-	ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos);
+	ret = ttm_eu_reserve_buffers(&release->ticket, &release->bos, !no_intr);
 	if (ret)
 		return ret;
 
@@ -199,6 +304,8 @@ int qxl_alloc_surface_release_reserved(struct qxl_device *qdev,
 
 		/* stash the release after the create command */
 		idr_ret = qxl_release_alloc(qdev, QXL_RELEASE_SURFACE_CMD, release);
+		if (idr_ret < 0)
+			return idr_ret;
 		bo = qxl_bo_ref(to_qxl_bo(entry->tv.bo));
 
 		(*release)->release_offset = create_rel->release_offset + 64;
@@ -239,6 +346,11 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size,
 	}
 
 	idr_ret = qxl_release_alloc(qdev, type, release);
+	if (idr_ret < 0) {
+		if (rbo)
+			*rbo = NULL;
+		return idr_ret;
+	}
 
 	mutex_lock(&qdev->release_mutex);
 	if (qdev->current_release_bo_offset[cur_idx] + 1 >= releases_per_bo[cur_idx]) {
@@ -319,40 +431,44 @@ void qxl_release_unmap(struct qxl_device *qdev,
 
 void qxl_release_fence_buffer_objects(struct qxl_release *release)
 {
-	struct ttm_validate_buffer *entry;
 	struct ttm_buffer_object *bo;
 	struct ttm_bo_global *glob;
 	struct ttm_bo_device *bdev;
 	struct ttm_bo_driver *driver;
 	struct qxl_bo *qbo;
+	struct ttm_validate_buffer *entry;
+	struct qxl_device *qdev;
 
 	/* if only one object on the release its the release itself
 	   since these objects are pinned no need to reserve */
-	if (list_is_singular(&release->bos))
+	if (list_is_singular(&release->bos) || list_empty(&release->bos))
 		return;
 
 	bo = list_first_entry(&release->bos, struct ttm_validate_buffer, head)->bo;
 	bdev = bo->bdev;
+	qdev = container_of(bdev, struct qxl_device, mman.bdev);
+
+	/*
+	 * Since we never really allocated a context and we don't want to conflict,
+	 * set the highest bits. This will break if we really allow exporting of dma-bufs.
+	 */
+	fence_init(&release->base, &qxl_fence_ops, &qdev->release_lock,
+		   release->id | 0xf0000000, release->base.seqno);
+	trace_fence_emit(&release->base);
+
 	driver = bdev->driver;
 	glob = bo->glob;
 
 	spin_lock(&glob->lru_lock);
-	spin_lock(&bdev->fence_lock);
 
 	list_for_each_entry(entry, &release->bos, head) {
 		bo = entry->bo;
 		qbo = to_qxl_bo(bo);
 
-		if (!entry->bo->sync_obj)
-			entry->bo->sync_obj = &qbo->fence;
-
-		qxl_fence_add_release_locked(&qbo->fence, release->id);
-
+		reservation_object_add_shared_fence(bo->resv, &release->base);
 		ttm_bo_add_to_lru(bo);
 		__ttm_bo_unreserve(bo);
-		entry->reserved = false;
 	}
-	spin_unlock(&bdev->fence_lock);
 	spin_unlock(&glob->lru_lock);
 	ww_acquire_fini(&release->ticket);
 }
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 71a1baeac14e..abe945a04fd4 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -188,11 +188,13 @@ static void qxl_evict_flags(struct ttm_buffer_object *bo,
 				struct ttm_placement *placement)
 {
 	struct qxl_bo *qbo;
-	static u32 placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+	static struct ttm_place placements = {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
+	};
 
 	if (!qxl_ttm_bo_is_qxl_bo(bo)) {
-		placement->fpfn = 0;
-		placement->lpfn = 0;
 		placement->placement = &placements;
 		placement->busy_placement = &placements;
 		placement->num_placement = 1;
@@ -355,92 +357,6 @@ static int qxl_bo_move(struct ttm_buffer_object *bo,
 	return ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
 }
 
-
-static int qxl_sync_obj_wait(void *sync_obj,
-			     bool lazy, bool interruptible)
-{
-	struct qxl_fence *qfence = (struct qxl_fence *)sync_obj;
-	int count = 0, sc = 0;
-	struct qxl_bo *bo = container_of(qfence, struct qxl_bo, fence);
-
-	if (qfence->num_active_releases == 0)
-		return 0;
-
-retry:
-	if (sc == 0) {
-		if (bo->type == QXL_GEM_DOMAIN_SURFACE)
-			qxl_update_surface(qfence->qdev, bo);
-	} else if (sc >= 1) {
-		qxl_io_notify_oom(qfence->qdev);
-	}
-
-	sc++;
-
-	for (count = 0; count < 10; count++) {
-		bool ret;
-		ret = qxl_queue_garbage_collect(qfence->qdev, true);
-		if (ret == false)
-			break;
-
-		if (qfence->num_active_releases == 0)
-			return 0;
-	}
-
-	if (qfence->num_active_releases) {
-		bool have_drawable_releases = false;
-		void **slot;
-		struct radix_tree_iter iter;
-		int release_id;
-
-		radix_tree_for_each_slot(slot, &qfence->tree, &iter, 0) {
-			struct qxl_release *release;
-
-			release_id = iter.index;
-			release = qxl_release_from_id_locked(qfence->qdev, release_id);
-			if (release == NULL)
-				continue;
-
-			if (release->type == QXL_RELEASE_DRAWABLE)
-				have_drawable_releases = true;
-		}
-
-		qxl_queue_garbage_collect(qfence->qdev, true);
-
-		if (have_drawable_releases || sc < 4) {
-			if (sc > 2)
-				/* back off */
-				usleep_range(500, 1000);
-			if (have_drawable_releases && sc > 300) {
-				WARN(1, "sync obj %d still has outstanding releases %d %d %d %ld %d\n", sc, bo->surface_id, bo->is_primary, bo->pin_count, (unsigned long)bo->gem_base.size, qfence->num_active_releases);
-				return -EBUSY;
-			}
-			goto retry;
-		}
-	}
-	return 0;
-}
-
-static int qxl_sync_obj_flush(void *sync_obj)
-{
-	return 0;
-}
-
-static void qxl_sync_obj_unref(void **sync_obj)
-{
-	*sync_obj = NULL;
-}
-
-static void *qxl_sync_obj_ref(void *sync_obj)
-{
-	return sync_obj;
-}
-
-static bool qxl_sync_obj_signaled(void *sync_obj)
-{
-	struct qxl_fence *qfence = (struct qxl_fence *)sync_obj;
-	return (qfence->num_active_releases == 0);
-}
-
 static void qxl_bo_move_notify(struct ttm_buffer_object *bo,
 			       struct ttm_mem_reg *new_mem)
 {
@@ -467,16 +383,9 @@ static struct ttm_bo_driver qxl_bo_driver = {
 	.verify_access = &qxl_verify_access,
 	.io_mem_reserve = &qxl_ttm_io_mem_reserve,
 	.io_mem_free = &qxl_ttm_io_mem_free,
-	.sync_obj_signaled = &qxl_sync_obj_signaled,
-	.sync_obj_wait = &qxl_sync_obj_wait,
-	.sync_obj_flush = &qxl_sync_obj_flush,
-	.sync_obj_unref = &qxl_sync_obj_unref,
-	.sync_obj_ref = &qxl_sync_obj_ref,
 	.move_notify = &qxl_bo_move_notify,
 };
 
-
-
 int qxl_ttm_init(struct qxl_device *qdev)
 {
 	int r;
diff --git a/drivers/gpu/drm/r128/r128_cce.c b/drivers/gpu/drm/r128/r128_cce.c
index 59459fe4e8c5..2c45ac9c1dc3 100644
--- a/drivers/gpu/drm/r128/r128_cce.c
+++ b/drivers/gpu/drm/r128/r128_cce.c
@@ -452,7 +452,7 @@ static int r128_do_init_cce(struct drm_device *dev, drm_r128_init_t *init)
 	dev_priv->span_pitch_offset_c = (((dev_priv->depth_pitch / 8) << 21) |
 					 (dev_priv->span_offset >> 5));
 
-	dev_priv->sarea = drm_getsarea(dev);
+	dev_priv->sarea = drm_legacy_getsarea(dev);
 	if (!dev_priv->sarea) {
 		DRM_ERROR("could not find sarea!\n");
 		dev->dev_private = (void *)dev_priv;
@@ -460,21 +460,21 @@ static int r128_do_init_cce(struct drm_device *dev, drm_r128_init_t *init)
 		return -EINVAL;
 	}
 
-	dev_priv->mmio = drm_core_findmap(dev, init->mmio_offset);
+	dev_priv->mmio = drm_legacy_findmap(dev, init->mmio_offset);
 	if (!dev_priv->mmio) {
 		DRM_ERROR("could not find mmio region!\n");
 		dev->dev_private = (void *)dev_priv;
 		r128_do_cleanup_cce(dev);
 		return -EINVAL;
 	}
-	dev_priv->cce_ring = drm_core_findmap(dev, init->ring_offset);
+	dev_priv->cce_ring = drm_legacy_findmap(dev, init->ring_offset);
 	if (!dev_priv->cce_ring) {
 		DRM_ERROR("could not find cce ring region!\n");
 		dev->dev_private = (void *)dev_priv;
 		r128_do_cleanup_cce(dev);
 		return -EINVAL;
 	}
-	dev_priv->ring_rptr = drm_core_findmap(dev, init->ring_rptr_offset);
+	dev_priv->ring_rptr = drm_legacy_findmap(dev, init->ring_rptr_offset);
 	if (!dev_priv->ring_rptr) {
 		DRM_ERROR("could not find ring read pointer!\n");
 		dev->dev_private = (void *)dev_priv;
@@ -482,7 +482,7 @@ static int r128_do_init_cce(struct drm_device *dev, drm_r128_init_t *init)
 		return -EINVAL;
 	}
 	dev->agp_buffer_token = init->buffers_offset;
-	dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset);
+	dev->agp_buffer_map = drm_legacy_findmap(dev, init->buffers_offset);
 	if (!dev->agp_buffer_map) {
 		DRM_ERROR("could not find dma buffer region!\n");
 		dev->dev_private = (void *)dev_priv;
@@ -492,7 +492,7 @@ static int r128_do_init_cce(struct drm_device *dev, drm_r128_init_t *init)
 
 	if (!dev_priv->is_pci) {
 		dev_priv->agp_textures =
-		    drm_core_findmap(dev, init->agp_textures_offset);
+		    drm_legacy_findmap(dev, init->agp_textures_offset);
 		if (!dev_priv->agp_textures) {
 			DRM_ERROR("could not find agp texture region!\n");
 			dev->dev_private = (void *)dev_priv;
@@ -507,9 +507,9 @@ static int r128_do_init_cce(struct drm_device *dev, drm_r128_init_t *init)
 
 #if __OS_HAS_AGP
 	if (!dev_priv->is_pci) {
-		drm_core_ioremap_wc(dev_priv->cce_ring, dev);
-		drm_core_ioremap_wc(dev_priv->ring_rptr, dev);
-		drm_core_ioremap_wc(dev->agp_buffer_map, dev);
+		drm_legacy_ioremap_wc(dev_priv->cce_ring, dev);
+		drm_legacy_ioremap_wc(dev_priv->ring_rptr, dev);
+		drm_legacy_ioremap_wc(dev->agp_buffer_map, dev);
 		if (!dev_priv->cce_ring->handle ||
 		    !dev_priv->ring_rptr->handle ||
 		    !dev->agp_buffer_map->handle) {
@@ -603,11 +603,11 @@ int r128_do_cleanup_cce(struct drm_device *dev)
 #if __OS_HAS_AGP
 		if (!dev_priv->is_pci) {
 			if (dev_priv->cce_ring != NULL)
-				drm_core_ioremapfree(dev_priv->cce_ring, dev);
+				drm_legacy_ioremapfree(dev_priv->cce_ring, dev);
 			if (dev_priv->ring_rptr != NULL)
-				drm_core_ioremapfree(dev_priv->ring_rptr, dev);
+				drm_legacy_ioremapfree(dev_priv->ring_rptr, dev);
 			if (dev->agp_buffer_map != NULL) {
-				drm_core_ioremapfree(dev->agp_buffer_map, dev);
+				drm_legacy_ioremapfree(dev->agp_buffer_map, dev);
 				dev->agp_buffer_map = NULL;
 			}
 		} else
diff --git a/drivers/gpu/drm/r128/r128_drv.c b/drivers/gpu/drm/r128/r128_drv.c
index 5bd307cd8da1..4a59370eb580 100644
--- a/drivers/gpu/drm/r128/r128_drv.c
+++ b/drivers/gpu/drm/r128/r128_drv.c
@@ -62,6 +62,7 @@ static struct drm_driver driver = {
 	.load = r128_driver_load,
 	.preclose = r128_driver_preclose,
 	.lastclose = r128_driver_lastclose,
+	.set_busid = drm_pci_set_busid,
 	.get_vblank_counter = r128_get_vblank_counter,
 	.enable_vblank = r128_enable_vblank,
 	.disable_vblank = r128_disable_vblank,
diff --git a/drivers/gpu/drm/r128/r128_drv.h b/drivers/gpu/drm/r128/r128_drv.h
index 5bf3f5ff805d..723e5d6f10a4 100644
--- a/drivers/gpu/drm/r128/r128_drv.h
+++ b/drivers/gpu/drm/r128/r128_drv.h
@@ -35,6 +35,9 @@
 #ifndef __R128_DRV_H__
 #define __R128_DRV_H__
 
+#include <drm/ati_pcigart.h>
+#include <drm/drm_legacy.h>
+
 /* General customization:
  */
 #define DRIVER_AUTHOR		"Gareth Hughes, VA Linux Systems Inc."
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index f77b7135ee4c..7d7aed5357f0 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -60,7 +60,7 @@ radeon-y := radeon_drv.o
 
 # add UMS driver
 radeon-$(CONFIG_DRM_RADEON_UMS)+= radeon_cp.o radeon_state.o radeon_mem.o \
-	radeon_irq.o r300_cmdbuf.o r600_cp.o r600_blit.o
+	radeon_irq.o r300_cmdbuf.o r600_cp.o r600_blit.o drm_buffer.o
 
 # add KMS driver
 radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
@@ -80,7 +80,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
 	r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \
 	rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \
 	trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
-	ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o
+	ci_dpm.o dce6_afmt.o radeon_vm.o radeon_ucode.o radeon_ib.o radeon_mn.o
 
 # add async DMA block
 radeon-y += \
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index fa9565957f9d..0d761f73a7fa 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -3959,18 +3959,19 @@ bool cik_semaphore_ring_emit(struct radeon_device *rdev,
  * @src_offset: src GPU address
  * @dst_offset: dst GPU address
  * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
+ * @resv: reservation object to sync to
  *
  * Copy GPU paging using the CP DMA engine (CIK+).
  * Used by the radeon ttm implementation to move pages if
  * registered as the asic copy callback.
  */
-int cik_copy_cpdma(struct radeon_device *rdev,
-		   uint64_t src_offset, uint64_t dst_offset,
-		   unsigned num_gpu_pages,
-		   struct radeon_fence **fence)
+struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
+				    uint64_t src_offset, uint64_t dst_offset,
+				    unsigned num_gpu_pages,
+				    struct reservation_object *resv)
 {
 	struct radeon_semaphore *sem = NULL;
+	struct radeon_fence *fence;
 	int ring_index = rdev->asic->copy.blit_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_bytes, cur_size_in_bytes, control;
@@ -3980,7 +3981,7 @@ int cik_copy_cpdma(struct radeon_device *rdev,
 	r = radeon_semaphore_create(rdev, &sem);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
+		return ERR_PTR(r);
 	}
 
 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
@@ -3989,10 +3990,10 @@ int cik_copy_cpdma(struct radeon_device *rdev,
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
 		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		return ERR_PTR(r);
 	}
 
-	radeon_semaphore_sync_to(sem, *fence);
+	radeon_semaphore_sync_resv(sem, resv, false);
 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
@@ -4014,17 +4015,17 @@ int cik_copy_cpdma(struct radeon_device *rdev,
 		dst_offset += cur_size_in_bytes;
 	}
 
-	r = radeon_fence_emit(rdev, fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		return ERR_PTR(r);
 	}
 
 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, *fence);
+	radeon_semaphore_free(rdev, &sem, fence);
 
-	return r;
+	return fence;
 }
 
 /*
@@ -8254,8 +8255,10 @@ restart_ih:
 	}
 	if (queue_hotplug)
 		schedule_work(&rdev->hotplug_work);
-	if (queue_reset)
-		schedule_work(&rdev->reset_work);
+	if (queue_reset) {
+		rdev->needs_reset = true;
+		wake_up_all(&rdev->fence_queue);
+	}
 	if (queue_thermal)
 		schedule_work(&rdev->pm.dpm.thermal.work);
 	rdev->ih.rptr = rptr;
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c
index 192278bc993c..c01a6100c318 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -537,18 +537,19 @@ void cik_sdma_fini(struct radeon_device *rdev)
  * @src_offset: src GPU address
  * @dst_offset: dst GPU address
  * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
+ * @resv: reservation object to sync to
  *
  * Copy GPU paging using the DMA engine (CIK).
  * Used by the radeon ttm implementation to move pages if
  * registered as the asic copy callback.
  */
-int cik_copy_dma(struct radeon_device *rdev,
-		 uint64_t src_offset, uint64_t dst_offset,
-		 unsigned num_gpu_pages,
-		 struct radeon_fence **fence)
+struct radeon_fence *cik_copy_dma(struct radeon_device *rdev,
+				  uint64_t src_offset, uint64_t dst_offset,
+				  unsigned num_gpu_pages,
+				  struct reservation_object *resv)
 {
 	struct radeon_semaphore *sem = NULL;
+	struct radeon_fence *fence;
 	int ring_index = rdev->asic->copy.dma_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_bytes, cur_size_in_bytes;
@@ -558,7 +559,7 @@ int cik_copy_dma(struct radeon_device *rdev,
 	r = radeon_semaphore_create(rdev, &sem);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
+		return ERR_PTR(r);
 	}
 
 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
@@ -567,10 +568,10 @@ int cik_copy_dma(struct radeon_device *rdev,
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
 		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		return ERR_PTR(r);
 	}
 
-	radeon_semaphore_sync_to(sem, *fence);
+	radeon_semaphore_sync_resv(sem, resv, false);
 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
@@ -589,17 +590,17 @@ int cik_copy_dma(struct radeon_device *rdev,
 		dst_offset += cur_size_in_bytes;
 	}
 
-	r = radeon_fence_emit(rdev, fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		return ERR_PTR(r);
 	}
 
 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, *fence);
+	radeon_semaphore_free(rdev, &sem, fence);
 
-	return r;
+	return fence;
 }
 
 /**
diff --git a/drivers/gpu/drm/drm_buffer.c b/drivers/gpu/drm/radeon/drm_buffer.c
index 86a4a4a60afc..f4e0f3a3d7b1 100644
--- a/drivers/gpu/drm/drm_buffer.c
+++ b/drivers/gpu/drm/radeon/drm_buffer.c
@@ -33,7 +33,7 @@
  */
 
 #include <linux/export.h>
-#include <drm/drm_buffer.h>
+#include "drm_buffer.h"
 
 /**
  * Allocate the drm buffer object.
@@ -86,7 +86,6 @@ error_out:
 	kfree(*buf);
 	return -ENOMEM;
 }
-EXPORT_SYMBOL(drm_buffer_alloc);
 
 /**
  * Copy the user data to the begin of the buffer and reset the processing
@@ -123,7 +122,6 @@ int drm_buffer_copy_from_user(struct drm_buffer *buf,
 	buf->iterator = 0;
 	return 0;
 }
-EXPORT_SYMBOL(drm_buffer_copy_from_user);
 
 /**
  * Free the drm buffer object
@@ -141,7 +139,6 @@ void drm_buffer_free(struct drm_buffer *buf)
 		kfree(buf);
 	}
 }
-EXPORT_SYMBOL(drm_buffer_free);
 
 /**
  * Read an object from buffer that may be split to multiple parts. If object
@@ -178,4 +175,3 @@ void *drm_buffer_read_object(struct drm_buffer *buf,
 	drm_buffer_advance(buf, objsize);
 	return obj;
 }
-EXPORT_SYMBOL(drm_buffer_read_object);
diff --git a/drivers/gpu/drm/radeon/drm_buffer.h b/drivers/gpu/drm/radeon/drm_buffer.h
new file mode 100644
index 000000000000..c80d3a340b94
--- /dev/null
+++ b/drivers/gpu/drm/radeon/drm_buffer.h
@@ -0,0 +1,148 @@
+/**************************************************************************
+ *
+ * Copyright 2010 Pauli Nieminen.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ **************************************************************************/
+/*
+ * Multipart buffer for coping data which is larger than the page size.
+ *
+ * Authors:
+ * Pauli Nieminen <suokkos-at-gmail-dot-com>
+ */
+
+#ifndef _DRM_BUFFER_H_
+#define _DRM_BUFFER_H_
+
+#include <drm/drmP.h>
+
+struct drm_buffer {
+	int iterator;
+	int size;
+	char *data[];
+};
+
+
+/**
+ * Return the index of page that buffer is currently pointing at.
+ */
+static inline int drm_buffer_page(struct drm_buffer *buf)
+{
+	return buf->iterator / PAGE_SIZE;
+}
+/**
+ * Return the index of the current byte in the page
+ */
+static inline int drm_buffer_index(struct drm_buffer *buf)
+{
+	return buf->iterator & (PAGE_SIZE - 1);
+}
+/**
+ * Return number of bytes that is left to process
+ */
+static inline int drm_buffer_unprocessed(struct drm_buffer *buf)
+{
+	return buf->size - buf->iterator;
+}
+
+/**
+ * Advance the buffer iterator number of bytes that is given.
+ */
+static inline void drm_buffer_advance(struct drm_buffer *buf, int bytes)
+{
+	buf->iterator += bytes;
+}
+
+/**
+ * Allocate the drm buffer object.
+ *
+ *   buf: A pointer to a pointer where the object is stored.
+ *   size: The number of bytes to allocate.
+ */
+extern int drm_buffer_alloc(struct drm_buffer **buf, int size);
+
+/**
+ * Copy the user data to the begin of the buffer and reset the processing
+ * iterator.
+ *
+ *   user_data: A pointer the data that is copied to the buffer.
+ *   size: The Number of bytes to copy.
+ */
+extern int drm_buffer_copy_from_user(struct drm_buffer *buf,
+		void __user *user_data, int size);
+
+/**
+ * Free the drm buffer object
+ */
+extern void drm_buffer_free(struct drm_buffer *buf);
+
+/**
+ * Read an object from buffer that may be split to multiple parts. If object
+ * is not split function just returns the pointer to object in buffer. But in
+ * case of split object data is copied to given stack object that is suplied
+ * by caller.
+ *
+ * The processing location of the buffer is also advanced to the next byte
+ * after the object.
+ *
+ *   objsize: The size of the objet in bytes.
+ *   stack_obj: A pointer to a memory location where object can be copied.
+ */
+extern void *drm_buffer_read_object(struct drm_buffer *buf,
+		int objsize, void *stack_obj);
+
+/**
+ * Returns the pointer to the dword which is offset number of elements from the
+ * current processing location.
+ *
+ * Caller must make sure that dword is not split in the buffer. This
+ * requirement is easily met if all the sizes of objects in buffer are
+ * multiples of dword and PAGE_SIZE is multiple dword.
+ *
+ * Call to this function doesn't change the processing location.
+ *
+ *   offset: The index of the dword relative to the internat iterator.
+ */
+static inline void *drm_buffer_pointer_to_dword(struct drm_buffer *buffer,
+		int offset)
+{
+	int iter = buffer->iterator + offset * 4;
+	return &buffer->data[iter / PAGE_SIZE][iter & (PAGE_SIZE - 1)];
+}
+/**
+ * Returns the pointer to the dword which is offset number of elements from
+ * the current processing location.
+ *
+ * Call to this function doesn't change the processing location.
+ *
+ *   offset: The index of the byte relative to the internat iterator.
+ */
+static inline void *drm_buffer_pointer_to_byte(struct drm_buffer *buffer,
+		int offset)
+{
+	int iter = buffer->iterator + offset;
+	return &buffer->data[iter / PAGE_SIZE][iter & (PAGE_SIZE - 1)];
+}
+
+#endif
diff --git a/drivers/gpu/drm/radeon/evergreen_dma.c b/drivers/gpu/drm/radeon/evergreen_dma.c
index afaba388c36d..946f37d0b469 100644
--- a/drivers/gpu/drm/radeon/evergreen_dma.c
+++ b/drivers/gpu/drm/radeon/evergreen_dma.c
@@ -104,12 +104,14 @@ void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
  * Used by the radeon ttm implementation to move pages if
  * registered as the asic copy callback.
  */
-int evergreen_copy_dma(struct radeon_device *rdev,
-		       uint64_t src_offset, uint64_t dst_offset,
-		       unsigned num_gpu_pages,
-		       struct radeon_fence **fence)
+struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev,
+					uint64_t src_offset,
+					uint64_t dst_offset,
+					unsigned num_gpu_pages,
+					struct reservation_object *resv)
 {
 	struct radeon_semaphore *sem = NULL;
+	struct radeon_fence *fence;
 	int ring_index = rdev->asic->copy.dma_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_dw, cur_size_in_dw;
@@ -119,7 +121,7 @@ int evergreen_copy_dma(struct radeon_device *rdev,
 	r = radeon_semaphore_create(rdev, &sem);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
+		return ERR_PTR(r);
 	}
 
 	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
@@ -128,10 +130,10 @@ int evergreen_copy_dma(struct radeon_device *rdev,
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
 		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		return ERR_PTR(r);
 	}
 
-	radeon_semaphore_sync_to(sem, *fence);
+	radeon_semaphore_sync_resv(sem, resv, false);
 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
@@ -148,17 +150,17 @@ int evergreen_copy_dma(struct radeon_device *rdev,
 		dst_offset += cur_size_in_dw * 4;
 	}
 
-	r = radeon_fence_emit(rdev, fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		return ERR_PTR(r);
 	}
 
 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, *fence);
+	radeon_semaphore_free(rdev, &sem, fence);
 
-	return r;
+	return fence;
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 4c5ec44ff328..c6b486f888d5 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -855,13 +855,14 @@ bool r100_semaphore_ring_emit(struct radeon_device *rdev,
 	return false;
 }
 
-int r100_copy_blit(struct radeon_device *rdev,
-		   uint64_t src_offset,
-		   uint64_t dst_offset,
-		   unsigned num_gpu_pages,
-		   struct radeon_fence **fence)
+struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
+				    uint64_t src_offset,
+				    uint64_t dst_offset,
+				    unsigned num_gpu_pages,
+				    struct reservation_object *resv)
 {
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	struct radeon_fence *fence;
 	uint32_t cur_pages;
 	uint32_t stride_bytes = RADEON_GPU_PAGE_SIZE;
 	uint32_t pitch;
@@ -882,7 +883,7 @@ int r100_copy_blit(struct radeon_device *rdev,
 	r = radeon_ring_lock(rdev, ring, ndw);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d) asking for %u dw.\n", r, ndw);
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 	}
 	while (num_gpu_pages > 0) {
 		cur_pages = num_gpu_pages;
@@ -922,11 +923,13 @@ int r100_copy_blit(struct radeon_device *rdev,
 			  RADEON_WAIT_2D_IDLECLEAN |
 			  RADEON_WAIT_HOST_IDLECLEAN |
 			  RADEON_WAIT_DMA_GUI_IDLE);
-	if (fence) {
-		r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
+	r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		radeon_ring_unlock_undo(rdev, ring);
+		return ERR_PTR(r);
 	}
 	radeon_ring_unlock_commit(rdev, ring, false);
-	return r;
+	return fence;
 }
 
 static int r100_cp_wait_for_idle(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c
index 67780374a652..732d4938aab7 100644
--- a/drivers/gpu/drm/radeon/r200.c
+++ b/drivers/gpu/drm/radeon/r200.c
@@ -80,13 +80,14 @@ static int r200_get_vtx_size_0(uint32_t vtx_fmt_0)
 	return vtx_size;
 }
 
-int r200_copy_dma(struct radeon_device *rdev,
-		  uint64_t src_offset,
-		  uint64_t dst_offset,
-		  unsigned num_gpu_pages,
-		  struct radeon_fence **fence)
+struct radeon_fence *r200_copy_dma(struct radeon_device *rdev,
+				   uint64_t src_offset,
+				   uint64_t dst_offset,
+				   unsigned num_gpu_pages,
+				   struct reservation_object *resv)
 {
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	struct radeon_fence *fence;
 	uint32_t size;
 	uint32_t cur_size;
 	int i, num_loops;
@@ -98,7 +99,7 @@ int r200_copy_dma(struct radeon_device *rdev,
 	r = radeon_ring_lock(rdev, ring, num_loops * 4 + 64);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
+		return ERR_PTR(r);
 	}
 	/* Must wait for 2D idle & clean before DMA or hangs might happen */
 	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
@@ -118,11 +119,13 @@ int r200_copy_dma(struct radeon_device *rdev,
 	}
 	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
 	radeon_ring_write(ring, RADEON_WAIT_DMA_GUI_IDLE);
-	if (fence) {
-		r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
+	r = radeon_fence_emit(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
+	if (r) {
+		radeon_ring_unlock_undo(rdev, ring);
+		return ERR_PTR(r);
 	}
 	radeon_ring_unlock_commit(rdev, ring, false);
-	return r;
+	return fence;
 }
 
 
diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c
index 84b1d5367a11..9418e388b045 100644
--- a/drivers/gpu/drm/radeon/r300_cmdbuf.c
+++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c
@@ -34,10 +34,10 @@
  */
 
 #include <drm/drmP.h>
-#include <drm/drm_buffer.h>
 #include <drm/radeon_drm.h>
 #include "radeon_drv.h"
 #include "r300_reg.h"
+#include "drm_buffer.h"
 
 #include <asm/unaligned.h>
 
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 3cfb50056f7a..25f367ac4637 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -122,6 +122,94 @@ u32 r600_get_xclk(struct radeon_device *rdev)
 
 int r600_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 {
+	unsigned fb_div = 0, ref_div, vclk_div = 0, dclk_div = 0;
+	int r;
+
+	/* bypass vclk and dclk with bclk */
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
+		 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
+
+	/* assert BYPASS_EN, deassert UPLL_RESET, UPLL_SLEEP and UPLL_CTLREQ */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~(
+		 UPLL_RESET_MASK | UPLL_SLEEP_MASK | UPLL_CTLREQ_MASK));
+
+	if (rdev->family >= CHIP_RS780)
+		WREG32_P(GFX_MACRO_BYPASS_CNTL, UPLL_BYPASS_CNTL,
+			 ~UPLL_BYPASS_CNTL);
+
+	if (!vclk || !dclk) {
+		/* keep the Bypass mode, put PLL to sleep */
+		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
+		return 0;
+	}
+
+	if (rdev->clock.spll.reference_freq == 10000)
+		ref_div = 34;
+	else
+		ref_div = 4;
+
+	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 50000, 160000,
+					  ref_div + 1, 0xFFF, 2, 30, ~0,
+					  &fb_div, &vclk_div, &dclk_div);
+	if (r)
+		return r;
+
+	if (rdev->family >= CHIP_RV670 && rdev->family < CHIP_RS780)
+		fb_div >>= 1;
+	else
+		fb_div |= 1;
+
+	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
+        if (r)
+                return r;
+
+	/* assert PLL_RESET */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
+
+	/* For RS780 we have to choose ref clk */
+	if (rdev->family >= CHIP_RS780)
+		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_REFCLK_SRC_SEL_MASK,
+			 ~UPLL_REFCLK_SRC_SEL_MASK);
+
+	/* set the required fb, ref and post divder values */
+	WREG32_P(CG_UPLL_FUNC_CNTL,
+		 UPLL_FB_DIV(fb_div) |
+		 UPLL_REF_DIV(ref_div),
+		 ~(UPLL_FB_DIV_MASK | UPLL_REF_DIV_MASK));
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		 UPLL_SW_HILEN(vclk_div >> 1) |
+		 UPLL_SW_LOLEN((vclk_div >> 1) + (vclk_div & 1)) |
+		 UPLL_SW_HILEN2(dclk_div >> 1) |
+		 UPLL_SW_LOLEN2((dclk_div >> 1) + (dclk_div & 1)) |
+		 UPLL_DIVEN_MASK | UPLL_DIVEN2_MASK,
+		 ~UPLL_SW_MASK);
+
+	/* give the PLL some time to settle */
+	mdelay(15);
+
+	/* deassert PLL_RESET */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
+
+	mdelay(15);
+
+	/* deassert BYPASS EN */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
+
+	if (rdev->family >= CHIP_RS780)
+		WREG32_P(GFX_MACRO_BYPASS_CNTL, 0, ~UPLL_BYPASS_CNTL);
+
+	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
+	if (r)
+		return r;
+
+	/* switch VCLK and DCLK selection */
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
+		 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
+
+	mdelay(100);
+
 	return 0;
 }
 
@@ -992,6 +1080,8 @@ static int r600_pcie_gart_enable(struct radeon_device *rdev)
 	WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp);
 	WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp);
 	WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_UVD_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_UVD_CNTL, tmp);
 	WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
 	WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
@@ -1042,6 +1132,8 @@ static void r600_pcie_gart_disable(struct radeon_device *rdev)
 	WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp);
 	WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp);
 	WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_UVD_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_UVD_CNTL, tmp);
 	radeon_gart_table_vram_unpin(rdev);
 }
 
@@ -2792,12 +2884,13 @@ bool r600_semaphore_ring_emit(struct radeon_device *rdev,
  * Used by the radeon ttm implementation to move pages if
  * registered as the asic copy callback.
  */
-int r600_copy_cpdma(struct radeon_device *rdev,
-		    uint64_t src_offset, uint64_t dst_offset,
-		    unsigned num_gpu_pages,
-		    struct radeon_fence **fence)
+struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev,
+				     uint64_t src_offset, uint64_t dst_offset,
+				     unsigned num_gpu_pages,
+				     struct reservation_object *resv)
 {
 	struct radeon_semaphore *sem = NULL;
+	struct radeon_fence *fence;
 	int ring_index = rdev->asic->copy.blit_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_bytes, cur_size_in_bytes, tmp;
@@ -2807,7 +2900,7 @@ int r600_copy_cpdma(struct radeon_device *rdev,
 	r = radeon_semaphore_create(rdev, &sem);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
+		return ERR_PTR(r);
 	}
 
 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
@@ -2816,10 +2909,10 @@ int r600_copy_cpdma(struct radeon_device *rdev,
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
 		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		return ERR_PTR(r);
 	}
 
-	radeon_semaphore_sync_to(sem, *fence);
+	radeon_semaphore_sync_resv(sem, resv, false);
 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
 
 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
@@ -2846,17 +2939,17 @@ int r600_copy_cpdma(struct radeon_device *rdev,
 	radeon_ring_write(ring, (WAIT_UNTIL - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
 	radeon_ring_write(ring, WAIT_CP_DMA_IDLE_bit);
 
-	r = radeon_fence_emit(rdev, fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		return ERR_PTR(r);
 	}
 
 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, *fence);
+	radeon_semaphore_free(rdev, &sem, fence);
 
-	return r;
+	return fence;
 }
 
 int r600_set_surface_reg(struct radeon_device *rdev, int reg,
@@ -2907,6 +3000,18 @@ static int r600_startup(struct radeon_device *rdev)
 		return r;
 	}
 
+	if (rdev->has_uvd) {
+		r = uvd_v1_0_resume(rdev);
+		if (!r) {
+			r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
+			if (r) {
+				dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
+			}
+		}
+		if (r)
+			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+	}
+
 	/* Enable IRQ */
 	if (!rdev->irq.installed) {
 		r = radeon_irq_kms_init(rdev);
@@ -2935,6 +3040,18 @@ static int r600_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	if (rdev->has_uvd) {
+		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+		if (ring->ring_size) {
+			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
+					     RADEON_CP_PACKET2);
+			if (!r)
+				r = uvd_v1_0_init(rdev);
+			if (r)
+				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
+		}
+	}
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -2994,6 +3111,10 @@ int r600_suspend(struct radeon_device *rdev)
 	radeon_pm_suspend(rdev);
 	r600_audio_fini(rdev);
 	r600_cp_stop(rdev);
+	if (rdev->has_uvd) {
+		uvd_v1_0_fini(rdev);
+		radeon_uvd_suspend(rdev);
+	}
 	r600_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	r600_pcie_gart_disable(rdev);
@@ -3073,6 +3194,14 @@ int r600_init(struct radeon_device *rdev)
 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
 	r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);
 
+	if (rdev->has_uvd) {
+		r = radeon_uvd_init(rdev);
+		if (!r) {
+			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
+			r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
+		}
+	}
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -3102,6 +3231,10 @@ void r600_fini(struct radeon_device *rdev)
 	r600_audio_fini(rdev);
 	r600_cp_fini(rdev);
 	r600_irq_fini(rdev);
+	if (rdev->has_uvd) {
+		uvd_v1_0_fini(rdev);
+		radeon_uvd_fini(rdev);
+	}
 	radeon_wb_fini(rdev);
 	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index 8c9b7e26533c..09e3f39925fa 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -1949,15 +1949,15 @@ int r600_do_cleanup_cp(struct drm_device *dev)
 #if __OS_HAS_AGP
 	if (dev_priv->flags & RADEON_IS_AGP) {
 		if (dev_priv->cp_ring != NULL) {
-			drm_core_ioremapfree(dev_priv->cp_ring, dev);
+			drm_legacy_ioremapfree(dev_priv->cp_ring, dev);
 			dev_priv->cp_ring = NULL;
 		}
 		if (dev_priv->ring_rptr != NULL) {
-			drm_core_ioremapfree(dev_priv->ring_rptr, dev);
+			drm_legacy_ioremapfree(dev_priv->ring_rptr, dev);
 			dev_priv->ring_rptr = NULL;
 		}
 		if (dev->agp_buffer_map != NULL) {
-			drm_core_ioremapfree(dev->agp_buffer_map, dev);
+			drm_legacy_ioremapfree(dev->agp_buffer_map, dev);
 			dev->agp_buffer_map = NULL;
 		}
 	} else
@@ -1968,7 +1968,7 @@ int r600_do_cleanup_cp(struct drm_device *dev)
 			r600_page_table_cleanup(dev, &dev_priv->gart_info);
 
 		if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB) {
-			drm_core_ioremapfree(&dev_priv->gart_info.mapping, dev);
+			drm_legacy_ioremapfree(&dev_priv->gart_info.mapping, dev);
 			dev_priv->gart_info.addr = NULL;
 		}
 	}
@@ -2052,27 +2052,27 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
 	dev_priv->buffers_offset = init->buffers_offset;
 	dev_priv->gart_textures_offset = init->gart_textures_offset;
 
-	master_priv->sarea = drm_getsarea(dev);
+	master_priv->sarea = drm_legacy_getsarea(dev);
 	if (!master_priv->sarea) {
 		DRM_ERROR("could not find sarea!\n");
 		r600_do_cleanup_cp(dev);
 		return -EINVAL;
 	}
 
-	dev_priv->cp_ring = drm_core_findmap(dev, init->ring_offset);
+	dev_priv->cp_ring = drm_legacy_findmap(dev, init->ring_offset);
 	if (!dev_priv->cp_ring) {
 		DRM_ERROR("could not find cp ring region!\n");
 		r600_do_cleanup_cp(dev);
 		return -EINVAL;
 	}
-	dev_priv->ring_rptr = drm_core_findmap(dev, init->ring_rptr_offset);
+	dev_priv->ring_rptr = drm_legacy_findmap(dev, init->ring_rptr_offset);
 	if (!dev_priv->ring_rptr) {
 		DRM_ERROR("could not find ring read pointer!\n");
 		r600_do_cleanup_cp(dev);
 		return -EINVAL;
 	}
 	dev->agp_buffer_token = init->buffers_offset;
-	dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset);
+	dev->agp_buffer_map = drm_legacy_findmap(dev, init->buffers_offset);
 	if (!dev->agp_buffer_map) {
 		DRM_ERROR("could not find dma buffer region!\n");
 		r600_do_cleanup_cp(dev);
@@ -2081,7 +2081,7 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
 
 	if (init->gart_textures_offset) {
 		dev_priv->gart_textures =
-		    drm_core_findmap(dev, init->gart_textures_offset);
+		    drm_legacy_findmap(dev, init->gart_textures_offset);
 		if (!dev_priv->gart_textures) {
 			DRM_ERROR("could not find GART texture region!\n");
 			r600_do_cleanup_cp(dev);
@@ -2092,9 +2092,9 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
 #if __OS_HAS_AGP
 	/* XXX */
 	if (dev_priv->flags & RADEON_IS_AGP) {
-		drm_core_ioremap_wc(dev_priv->cp_ring, dev);
-		drm_core_ioremap_wc(dev_priv->ring_rptr, dev);
-		drm_core_ioremap_wc(dev->agp_buffer_map, dev);
+		drm_legacy_ioremap_wc(dev_priv->cp_ring, dev);
+		drm_legacy_ioremap_wc(dev_priv->ring_rptr, dev);
+		drm_legacy_ioremap_wc(dev->agp_buffer_map, dev);
 		if (!dev_priv->cp_ring->handle ||
 		    !dev_priv->ring_rptr->handle ||
 		    !dev->agp_buffer_map->handle) {
@@ -2235,7 +2235,7 @@ int r600_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
 		dev_priv->gart_info.mapping.size =
 			dev_priv->gart_info.table_size;
 
-		drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev);
+		drm_legacy_ioremap_wc(&dev_priv->gart_info.mapping, dev);
 		if (!dev_priv->gart_info.mapping.handle) {
 			DRM_ERROR("ioremap failed.\n");
 			r600_do_cleanup_cp(dev);
diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c
index 51fd98553eaf..fc54224ce87b 100644
--- a/drivers/gpu/drm/radeon/r600_dma.c
+++ b/drivers/gpu/drm/radeon/r600_dma.c
@@ -436,18 +436,19 @@ void r600_dma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
  * @src_offset: src GPU address
  * @dst_offset: dst GPU address
  * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
+ * @resv: reservation object to sync to
  *
  * Copy GPU paging using the DMA engine (r6xx).
  * Used by the radeon ttm implementation to move pages if
  * registered as the asic copy callback.
  */
-int r600_copy_dma(struct radeon_device *rdev,
-		  uint64_t src_offset, uint64_t dst_offset,
-		  unsigned num_gpu_pages,
-		  struct radeon_fence **fence)
+struct radeon_fence *r600_copy_dma(struct radeon_device *rdev,
+				   uint64_t src_offset, uint64_t dst_offset,
+				   unsigned num_gpu_pages,
+				   struct reservation_object *resv)
 {
 	struct radeon_semaphore *sem = NULL;
+	struct radeon_fence *fence;
 	int ring_index = rdev->asic->copy.dma_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_dw, cur_size_in_dw;
@@ -457,7 +458,7 @@ int r600_copy_dma(struct radeon_device *rdev,
 	r = radeon_semaphore_create(rdev, &sem);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
+		return ERR_PTR(r);
 	}
 
 	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
@@ -466,10 +467,10 @@ int r600_copy_dma(struct radeon_device *rdev,
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
 		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		return ERR_PTR(r);
 	}
 
-	radeon_semaphore_sync_to(sem, *fence);
+	radeon_semaphore_sync_resv(sem, resv, false);
 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
@@ -486,15 +487,15 @@ int r600_copy_dma(struct radeon_device *rdev,
 		dst_offset += cur_size_in_dw * 4;
 	}
 
-	r = radeon_fence_emit(rdev, fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		return ERR_PTR(r);
 	}
 
 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, *fence);
+	radeon_semaphore_free(rdev, &sem, fence);
 
-	return r;
+	return fence;
 }
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index 0c4a7d8d93e0..671b48032a3d 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -330,11 +330,12 @@
 #define	HDP_TILING_CONFIG				0x2F3C
 #define HDP_DEBUG1                                      0x2F34
 
+#define MC_CONFIG					0x2000
 #define MC_VM_AGP_TOP					0x2184
 #define MC_VM_AGP_BOT					0x2188
 #define	MC_VM_AGP_BASE					0x218C
 #define MC_VM_FB_LOCATION				0x2180
-#define MC_VM_L1_TLB_MCD_RD_A_CNTL			0x219C
+#define MC_VM_L1_TLB_MCB_RD_UVD_CNTL			0x2124
 #define 	ENABLE_L1_TLB					(1 << 0)
 #define		ENABLE_L1_FRAGMENT_PROCESSING			(1 << 1)
 #define		ENABLE_L1_STRICT_ORDERING			(1 << 2)
@@ -354,12 +355,14 @@
 #define		EFFECTIVE_L1_QUEUE_SIZE(x)			(((x) & 7) << 15)
 #define		EFFECTIVE_L1_QUEUE_SIZE_MASK			0x00038000
 #define		EFFECTIVE_L1_QUEUE_SIZE_SHIFT			15
+#define MC_VM_L1_TLB_MCD_RD_A_CNTL			0x219C
 #define MC_VM_L1_TLB_MCD_RD_B_CNTL			0x21A0
 #define MC_VM_L1_TLB_MCB_RD_GFX_CNTL			0x21FC
 #define MC_VM_L1_TLB_MCB_RD_HDP_CNTL			0x2204
 #define MC_VM_L1_TLB_MCB_RD_PDMA_CNTL			0x2208
 #define MC_VM_L1_TLB_MCB_RD_SEM_CNTL			0x220C
 #define	MC_VM_L1_TLB_MCB_RD_SYS_CNTL			0x2200
+#define MC_VM_L1_TLB_MCB_WR_UVD_CNTL			0x212c
 #define MC_VM_L1_TLB_MCD_WR_A_CNTL			0x21A4
 #define MC_VM_L1_TLB_MCD_WR_B_CNTL			0x21A8
 #define MC_VM_L1_TLB_MCB_WR_GFX_CNTL			0x2210
@@ -373,6 +376,8 @@
 #define MC_VM_SYSTEM_APERTURE_HIGH_ADDR			0x2194
 #define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR		0x2198
 
+#define RS_DQ_RD_RET_CONF				0x2348
+
 #define	PA_CL_ENHANCE					0x8A14
 #define		CLIP_VTX_REORDER_ENA				(1 << 0)
 #define		NUM_CLIP_SEQ(x)					((x) << 1)
@@ -1483,6 +1488,7 @@
 #define UVD_CGC_GATE					0xf4a8
 #define UVD_LMI_CTRL2					0xf4f4
 #define UVD_MASTINT_EN					0xf500
+#define UVD_FW_START					0xf51C
 #define UVD_LMI_ADDR_EXT				0xf594
 #define UVD_LMI_CTRL					0xf598
 #define UVD_LMI_SWAP_CNTL				0xf5b4
@@ -1495,6 +1501,13 @@
 #define UVD_MPC_SET_MUX					0xf5f4
 #define UVD_MPC_SET_ALU					0xf5f8
 
+#define UVD_VCPU_CACHE_OFFSET0				0xf608
+#define UVD_VCPU_CACHE_SIZE0				0xf60c
+#define UVD_VCPU_CACHE_OFFSET1				0xf610
+#define UVD_VCPU_CACHE_SIZE1				0xf614
+#define UVD_VCPU_CACHE_OFFSET2				0xf618
+#define UVD_VCPU_CACHE_SIZE2				0xf61c
+
 #define UVD_VCPU_CNTL					0xf660
 #define UVD_SOFT_RESET					0xf680
 #define		RBC_SOFT_RESET					(1<<0)
@@ -1524,9 +1537,35 @@
 
 #define UVD_CONTEXT_ID					0xf6f4
 
+/* rs780 only */
+#define	GFX_MACRO_BYPASS_CNTL				0x30c0
+#define		SPLL_BYPASS_CNTL			(1 << 0)
+#define		UPLL_BYPASS_CNTL			(1 << 1)
+
+#define CG_UPLL_FUNC_CNTL				0x7e0
+#	define UPLL_RESET_MASK				0x00000001
+#	define UPLL_SLEEP_MASK				0x00000002
+#	define UPLL_BYPASS_EN_MASK			0x00000004
 #	define UPLL_CTLREQ_MASK				0x00000008
+#	define UPLL_FB_DIV(x)				((x) << 4)
+#	define UPLL_FB_DIV_MASK				0x0000FFF0
+#	define UPLL_REF_DIV(x)				((x) << 16)
+#	define UPLL_REF_DIV_MASK			0x003F0000
+#	define UPLL_REFCLK_SRC_SEL_MASK			0x20000000
 #	define UPLL_CTLACK_MASK				0x40000000
 #	define UPLL_CTLACK2_MASK			0x80000000
+#define CG_UPLL_FUNC_CNTL_2				0x7e4
+#	define UPLL_SW_HILEN(x)				((x) << 0)
+#	define UPLL_SW_LOLEN(x)				((x) << 4)
+#	define UPLL_SW_HILEN2(x)			((x) << 8)
+#	define UPLL_SW_LOLEN2(x)			((x) << 12)
+#	define UPLL_DIVEN_MASK				0x00010000
+#	define UPLL_DIVEN2_MASK				0x00020000
+#	define UPLL_SW_MASK				0x0003FFFF
+#	define VCLK_SRC_SEL(x)				((x) << 20)
+#	define VCLK_SRC_SEL_MASK			0x01F00000
+#	define DCLK_SRC_SEL(x)				((x) << 25)
+#	define DCLK_SRC_SEL_MASK			0x3E000000
 
 /*
  * PM4
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 5f05b4c84338..82b0e11ade89 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -65,6 +65,8 @@
 #include <linux/list.h>
 #include <linux/kref.h>
 #include <linux/interval_tree.h>
+#include <linux/hashtable.h>
+#include <linux/fence.h>
 
 #include <ttm/ttm_bo_api.h>
 #include <ttm/ttm_bo_driver.h>
@@ -119,9 +121,6 @@ extern int radeon_bapm;
 #define RADEONFB_CONN_LIMIT			4
 #define RADEON_BIOS_NUM_SCRATCH			8
 
-/* fence seq are set to this number when signaled */
-#define RADEON_FENCE_SIGNALED_SEQ		0LL
-
 /* internal ring indices */
 /* r1xx+ has gfx CP ring */
 #define RADEON_RING_TYPE_GFX_INDEX		0
@@ -349,28 +348,32 @@ extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw,
  * Fences.
  */
 struct radeon_fence_driver {
+	struct radeon_device		*rdev;
 	uint32_t			scratch_reg;
 	uint64_t			gpu_addr;
 	volatile uint32_t		*cpu_addr;
 	/* sync_seq is protected by ring emission lock */
 	uint64_t			sync_seq[RADEON_NUM_RINGS];
 	atomic64_t			last_seq;
-	bool				initialized;
+	bool				initialized, delayed_irq;
+	struct delayed_work		lockup_work;
 };
 
 struct radeon_fence {
+	struct fence base;
+
 	struct radeon_device		*rdev;
-	struct kref			kref;
-	/* protected by radeon_fence.lock */
 	uint64_t			seq;
 	/* RB, DMA, etc. */
 	unsigned			ring;
+
+	wait_queue_t			fence_wake;
 };
 
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
 int radeon_fence_driver_init(struct radeon_device *rdev);
 void radeon_fence_driver_fini(struct radeon_device *rdev);
-void radeon_fence_driver_force_completion(struct radeon_device *rdev);
+void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring);
 int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
 void radeon_fence_process(struct radeon_device *rdev, int ring);
 bool radeon_fence_signaled(struct radeon_fence *fence);
@@ -468,7 +471,7 @@ struct radeon_bo {
 	struct list_head		list;
 	/* Protected by tbo.reserved */
 	u32				initial_domain;
-	u32				placements[3];
+	struct ttm_place		placements[3];
 	struct ttm_placement		placement;
 	struct ttm_buffer_object	tbo;
 	struct ttm_bo_kmap_obj		kmap;
@@ -488,6 +491,9 @@ struct radeon_bo {
 
 	struct ttm_bo_kmap_obj		dma_buf_vmap;
 	pid_t				pid;
+
+	struct radeon_mn		*mn;
+	struct interval_tree_node	mn_it;
 };
 #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base)
 
@@ -579,8 +585,11 @@ bool radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
 				  struct radeon_semaphore *semaphore);
 bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
 				struct radeon_semaphore *semaphore);
-void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore,
-			      struct radeon_fence *fence);
+void radeon_semaphore_sync_fence(struct radeon_semaphore *semaphore,
+				 struct radeon_fence *fence);
+void radeon_semaphore_sync_resv(struct radeon_semaphore *semaphore,
+				struct reservation_object *resv,
+				bool shared);
 int radeon_semaphore_sync_rings(struct radeon_device *rdev,
 				struct radeon_semaphore *semaphore,
 				int waiting_ring);
@@ -779,6 +788,7 @@ struct radeon_irq {
 int radeon_irq_kms_init(struct radeon_device *rdev);
 void radeon_irq_kms_fini(struct radeon_device *rdev);
 void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring);
+bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring);
 void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring);
 void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc);
 void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc);
@@ -1641,7 +1651,8 @@ int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
 			      uint32_t handle, struct radeon_fence **fence);
 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
 			       uint32_t handle, struct radeon_fence **fence);
-void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo);
+void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo,
+				       uint32_t allowed_domains);
 void radeon_uvd_free_handles(struct radeon_device *rdev,
 			     struct drm_file *filp);
 int radeon_uvd_cs_parse(struct radeon_cs_parser *parser);
@@ -1730,6 +1741,11 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
 			   struct radeon_ring *cpB);
 void radeon_test_syncing(struct radeon_device *rdev);
 
+/*
+ * MMU Notifier
+ */
+int radeon_mn_register(struct radeon_bo *bo, unsigned long addr);
+void radeon_mn_unregister(struct radeon_bo *bo);
 
 /*
  * Debugfs
@@ -1844,24 +1860,24 @@ struct radeon_asic {
 	} display;
 	/* copy functions for bo handling */
 	struct {
-		int (*blit)(struct radeon_device *rdev,
-			    uint64_t src_offset,
-			    uint64_t dst_offset,
-			    unsigned num_gpu_pages,
-			    struct radeon_fence **fence);
+		struct radeon_fence *(*blit)(struct radeon_device *rdev,
+					     uint64_t src_offset,
+					     uint64_t dst_offset,
+					     unsigned num_gpu_pages,
+					     struct reservation_object *resv);
 		u32 blit_ring_index;
-		int (*dma)(struct radeon_device *rdev,
-			   uint64_t src_offset,
-			   uint64_t dst_offset,
-			   unsigned num_gpu_pages,
-			   struct radeon_fence **fence);
+		struct radeon_fence *(*dma)(struct radeon_device *rdev,
+					    uint64_t src_offset,
+					    uint64_t dst_offset,
+					    unsigned num_gpu_pages,
+					    struct reservation_object *resv);
 		u32 dma_ring_index;
 		/* method used for bo copy */
-		int (*copy)(struct radeon_device *rdev,
-			    uint64_t src_offset,
-			    uint64_t dst_offset,
-			    unsigned num_gpu_pages,
-			    struct radeon_fence **fence);
+		struct radeon_fence *(*copy)(struct radeon_device *rdev,
+					     uint64_t src_offset,
+					     uint64_t dst_offset,
+					     unsigned num_gpu_pages,
+					     struct reservation_object *resv);
 		/* ring used for bo copies */
 		u32 copy_ring_index;
 	} copy;
@@ -2143,6 +2159,8 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *filp);
 int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
 			    struct drm_file *filp);
+int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *filp);
 int radeon_gem_pin_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
 int radeon_gem_unpin_ioctl(struct drm_device *dev, void *data,
@@ -2299,6 +2317,7 @@ struct radeon_device {
 	struct radeon_mman		mman;
 	struct radeon_fence_driver	fence_drv[RADEON_NUM_RINGS];
 	wait_queue_head_t		fence_queue;
+	unsigned			fence_context;
 	struct mutex			ring_lock;
 	struct radeon_ring		ring[RADEON_NUM_RINGS];
 	bool				ib_pool_ready;
@@ -2317,7 +2336,7 @@ struct radeon_device {
 	bool				need_dma32;
 	bool				accel_working;
 	bool				fastfb_working; /* IGP feature*/
-	bool				needs_reset;
+	bool				needs_reset, in_reset;
 	struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
 	const struct firmware *me_fw;	/* all family ME firmware */
 	const struct firmware *pfp_fw;	/* r6/700 PFP firmware */
@@ -2338,7 +2357,6 @@ struct radeon_device {
 	struct radeon_mec mec;
 	struct work_struct hotplug_work;
 	struct work_struct audio_work;
-	struct work_struct reset_work;
 	int num_crtc; /* number of crtcs */
 	struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */
 	bool has_uvd;
@@ -2375,6 +2393,9 @@ struct radeon_device {
 	/* tracking pinned memory */
 	u64 vram_pin_size;
 	u64 gart_pin_size;
+
+	struct mutex	mn_lock;
+	DECLARE_HASHTABLE(mn_hash, 7);
 };
 
 bool radeon_is_px(struct drm_device *dev);
@@ -2430,7 +2451,17 @@ void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v);
 /*
  * Cast helper
  */
-#define to_radeon_fence(p) ((struct radeon_fence *)(p))
+extern const struct fence_ops radeon_fence_ops;
+
+static inline struct radeon_fence *to_radeon_fence(struct fence *f)
+{
+	struct radeon_fence *__f = container_of(f, struct radeon_fence, base);
+
+	if (__f->base.ops == &radeon_fence_ops)
+		return __f;
+
+	return NULL;
+}
 
 /*
  * Registers read & write functions.
@@ -2750,18 +2781,25 @@ void radeon_atombios_fini(struct radeon_device *rdev);
 /*
  * RING helpers.
  */
-#if DRM_DEBUG_CODE == 0
+
+/**
+ * radeon_ring_write - write a value to the ring
+ *
+ * @ring: radeon_ring structure holding ring information
+ * @v: dword (dw) value to write
+ *
+ * Write a value to the requested ring buffer (all asics).
+ */
 static inline void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
 {
+	if (ring->count_dw <= 0)
+		DRM_ERROR("radeon: writing more dwords to the ring than expected!\n");
+
 	ring->ring[ring->wptr++] = v;
 	ring->wptr &= ring->ptr_mask;
 	ring->count_dw--;
 	ring->ring_free_dw--;
 }
-#else
-/* With debugging this is just too big to inline */
-void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
-#endif
 
 /*
  * ASICs macro.
@@ -2800,9 +2838,9 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
 #define radeon_hdmi_setmode(rdev, e, m) (rdev)->asic->display.hdmi_setmode((e), (m))
 #define radeon_fence_ring_emit(rdev, r, fence) (rdev)->asic->ring[(r)]->emit_fence((rdev), (fence))
 #define radeon_semaphore_ring_emit(rdev, r, cp, semaphore, emit_wait) (rdev)->asic->ring[(r)]->emit_semaphore((rdev), (cp), (semaphore), (emit_wait))
-#define radeon_copy_blit(rdev, s, d, np, f) (rdev)->asic->copy.blit((rdev), (s), (d), (np), (f))
-#define radeon_copy_dma(rdev, s, d, np, f) (rdev)->asic->copy.dma((rdev), (s), (d), (np), (f))
-#define radeon_copy(rdev, s, d, np, f) (rdev)->asic->copy.copy((rdev), (s), (d), (np), (f))
+#define radeon_copy_blit(rdev, s, d, np, resv) (rdev)->asic->copy.blit((rdev), (s), (d), (np), (resv))
+#define radeon_copy_dma(rdev, s, d, np, resv) (rdev)->asic->copy.dma((rdev), (s), (d), (np), (resv))
+#define radeon_copy(rdev, s, d, np, resv) (rdev)->asic->copy.copy((rdev), (s), (d), (np), (resv))
 #define radeon_copy_blit_ring_index(rdev) (rdev)->asic->copy.blit_ring_index
 #define radeon_copy_dma_ring_index(rdev) (rdev)->asic->copy.dma_ring_index
 #define radeon_copy_ring_index(rdev) (rdev)->asic->copy.copy_ring_index
@@ -2876,6 +2914,10 @@ extern void radeon_legacy_set_clock_gating(struct radeon_device *rdev, int enabl
 extern void radeon_atom_set_clock_gating(struct radeon_device *rdev, int enable);
 extern void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain);
 extern bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo);
+extern int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
+				     uint32_t flags);
+extern bool radeon_ttm_tt_has_userptr(struct ttm_tt *ttm);
+extern bool radeon_ttm_tt_is_readonly(struct ttm_tt *ttm);
 extern void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 base);
 extern void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 extern int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index eeeeabe09758..d91f965e8219 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -965,6 +965,19 @@ static struct radeon_asic r600_asic = {
 	},
 };
 
+static struct radeon_asic_ring rv6xx_uvd_ring = {
+	.ib_execute = &uvd_v1_0_ib_execute,
+	.emit_fence = &uvd_v1_0_fence_emit,
+	.emit_semaphore = &uvd_v1_0_semaphore_emit,
+	.cs_parse = &radeon_uvd_cs_parse,
+	.ring_test = &uvd_v1_0_ring_test,
+	.ib_test = &uvd_v1_0_ib_test,
+	.is_lockup = &radeon_ring_test_lockup,
+	.get_rptr = &uvd_v1_0_get_rptr,
+	.get_wptr = &uvd_v1_0_get_wptr,
+	.set_wptr = &uvd_v1_0_set_wptr,
+};
+
 static struct radeon_asic rv6xx_asic = {
 	.init = &r600_init,
 	.fini = &r600_fini,
@@ -984,6 +997,7 @@ static struct radeon_asic rv6xx_asic = {
 	.ring = {
 		[RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring,
 		[R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring,
+		[R600_RING_TYPE_UVD_INDEX] = &rv6xx_uvd_ring,
 	},
 	.irq = {
 		.set = &r600_irq_set,
@@ -1074,6 +1088,7 @@ static struct radeon_asic rs780_asic = {
 	.ring = {
 		[RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring,
 		[R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring,
+		[R600_RING_TYPE_UVD_INDEX] = &rv6xx_uvd_ring,
 	},
 	.irq = {
 		.set = &r600_irq_set,
@@ -2298,7 +2313,15 @@ int radeon_asic_init(struct radeon_device *rdev)
 	case CHIP_RS780:
 	case CHIP_RS880:
 		rdev->asic = &rs780_asic;
-		rdev->has_uvd = true;
+		/* 760G/780V/880V don't have UVD */
+		if ((rdev->pdev->device == 0x9616)||
+		    (rdev->pdev->device == 0x9611)||
+		    (rdev->pdev->device == 0x9613)||
+		    (rdev->pdev->device == 0x9711)||
+		    (rdev->pdev->device == 0x9713))
+			rdev->has_uvd = false;
+		else
+			rdev->has_uvd = true;
 		break;
 	case CHIP_RV770:
 	case CHIP_RV730:
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 275a5dc01780..ca01bb8ea217 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -81,11 +81,11 @@ bool r100_semaphore_ring_emit(struct radeon_device *rdev,
 int r100_cs_parse(struct radeon_cs_parser *p);
 void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg);
-int r100_copy_blit(struct radeon_device *rdev,
-		   uint64_t src_offset,
-		   uint64_t dst_offset,
-		   unsigned num_gpu_pages,
-		   struct radeon_fence **fence);
+struct radeon_fence *r100_copy_blit(struct radeon_device *rdev,
+				    uint64_t src_offset,
+				    uint64_t dst_offset,
+				    unsigned num_gpu_pages,
+				    struct reservation_object *resv);
 int r100_set_surface_reg(struct radeon_device *rdev, int reg,
 			 uint32_t tiling_flags, uint32_t pitch,
 			 uint32_t offset, uint32_t obj_size);
@@ -153,11 +153,11 @@ void r100_ring_hdp_flush(struct radeon_device *rdev,
 /*
  * r200,rv250,rs300,rv280
  */
-extern int r200_copy_dma(struct radeon_device *rdev,
-			 uint64_t src_offset,
-			 uint64_t dst_offset,
-			 unsigned num_gpu_pages,
-			 struct radeon_fence **fence);
+struct radeon_fence *r200_copy_dma(struct radeon_device *rdev,
+				   uint64_t src_offset,
+				   uint64_t dst_offset,
+				   unsigned num_gpu_pages,
+				   struct reservation_object *resv);
 void r200_set_safe_registers(struct radeon_device *rdev);
 
 /*
@@ -341,12 +341,14 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
 void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
 int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
-int r600_copy_cpdma(struct radeon_device *rdev,
-		    uint64_t src_offset, uint64_t dst_offset,
-		    unsigned num_gpu_pages, struct radeon_fence **fence);
-int r600_copy_dma(struct radeon_device *rdev,
-		  uint64_t src_offset, uint64_t dst_offset,
-		  unsigned num_gpu_pages, struct radeon_fence **fence);
+struct radeon_fence *r600_copy_cpdma(struct radeon_device *rdev,
+				     uint64_t src_offset, uint64_t dst_offset,
+				     unsigned num_gpu_pages,
+				     struct reservation_object *resv);
+struct radeon_fence *r600_copy_dma(struct radeon_device *rdev,
+				   uint64_t src_offset, uint64_t dst_offset,
+				   unsigned num_gpu_pages,
+				   struct reservation_object *resv);
 void r600_hpd_init(struct radeon_device *rdev);
 void r600_hpd_fini(struct radeon_device *rdev);
 bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -462,10 +464,10 @@ bool rv770_page_flip_pending(struct radeon_device *rdev, int crtc);
 void r700_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
 void r700_cp_stop(struct radeon_device *rdev);
 void r700_cp_fini(struct radeon_device *rdev);
-int rv770_copy_dma(struct radeon_device *rdev,
-		  uint64_t src_offset, uint64_t dst_offset,
-		  unsigned num_gpu_pages,
-		   struct radeon_fence **fence);
+struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev,
+				    uint64_t src_offset, uint64_t dst_offset,
+				    unsigned num_gpu_pages,
+				    struct reservation_object *resv);
 u32 rv770_get_xclk(struct radeon_device *rdev);
 int rv770_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
 int rv770_get_temp(struct radeon_device *rdev);
@@ -536,10 +538,10 @@ void evergreen_dma_fence_ring_emit(struct radeon_device *rdev,
 				   struct radeon_fence *fence);
 void evergreen_dma_ring_ib_execute(struct radeon_device *rdev,
 				   struct radeon_ib *ib);
-int evergreen_copy_dma(struct radeon_device *rdev,
-		       uint64_t src_offset, uint64_t dst_offset,
-		       unsigned num_gpu_pages,
-		       struct radeon_fence **fence);
+struct radeon_fence *evergreen_copy_dma(struct radeon_device *rdev,
+					uint64_t src_offset, uint64_t dst_offset,
+					unsigned num_gpu_pages,
+					struct reservation_object *resv);
 void evergreen_hdmi_enable(struct drm_encoder *encoder, bool enable);
 void evergreen_hdmi_setmode(struct drm_encoder *encoder, struct drm_display_mode *mode);
 int evergreen_get_temp(struct radeon_device *rdev);
@@ -701,10 +703,10 @@ int si_vm_init(struct radeon_device *rdev);
 void si_vm_fini(struct radeon_device *rdev);
 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm);
 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
-int si_copy_dma(struct radeon_device *rdev,
-		uint64_t src_offset, uint64_t dst_offset,
-		unsigned num_gpu_pages,
-		struct radeon_fence **fence);
+struct radeon_fence *si_copy_dma(struct radeon_device *rdev,
+				 uint64_t src_offset, uint64_t dst_offset,
+				 unsigned num_gpu_pages,
+				 struct reservation_object *resv);
 
 void si_dma_vm_copy_pages(struct radeon_device *rdev,
 			  struct radeon_ib *ib,
@@ -760,14 +762,14 @@ bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
 				  struct radeon_semaphore *semaphore,
 				  bool emit_wait);
 void cik_sdma_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
-int cik_copy_dma(struct radeon_device *rdev,
-		 uint64_t src_offset, uint64_t dst_offset,
-		 unsigned num_gpu_pages,
-		 struct radeon_fence **fence);
-int cik_copy_cpdma(struct radeon_device *rdev,
-		   uint64_t src_offset, uint64_t dst_offset,
-		   unsigned num_gpu_pages,
-		   struct radeon_fence **fence);
+struct radeon_fence *cik_copy_dma(struct radeon_device *rdev,
+				  uint64_t src_offset, uint64_t dst_offset,
+				  unsigned num_gpu_pages,
+				  struct reservation_object *resv);
+struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
+				    uint64_t src_offset, uint64_t dst_offset,
+				    unsigned num_gpu_pages,
+				    struct reservation_object *resv);
 int cik_sdma_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
@@ -883,6 +885,7 @@ uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev,
                            struct radeon_ring *ring);
 void uvd_v1_0_set_wptr(struct radeon_device *rdev,
                        struct radeon_ring *ring);
+int uvd_v1_0_resume(struct radeon_device *rdev);
 
 int uvd_v1_0_init(struct radeon_device *rdev);
 void uvd_v1_0_fini(struct radeon_device *rdev);
@@ -890,6 +893,8 @@ int uvd_v1_0_start(struct radeon_device *rdev);
 void uvd_v1_0_stop(struct radeon_device *rdev);
 
 int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
+void uvd_v1_0_fence_emit(struct radeon_device *rdev,
+			 struct radeon_fence *fence);
 int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
 bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
 			     struct radeon_ring *ring,
diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c
index 69f5695bdab9..1e8855060fc7 100644
--- a/drivers/gpu/drm/radeon/radeon_benchmark.c
+++ b/drivers/gpu/drm/radeon/radeon_benchmark.c
@@ -45,33 +45,29 @@ static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size,
 	for (i = 0; i < n; i++) {
 		switch (flag) {
 		case RADEON_BENCHMARK_COPY_DMA:
-			r = radeon_copy_dma(rdev, saddr, daddr,
-					    size / RADEON_GPU_PAGE_SIZE,
-					    &fence);
+			fence = radeon_copy_dma(rdev, saddr, daddr,
+						size / RADEON_GPU_PAGE_SIZE,
+						NULL);
 			break;
 		case RADEON_BENCHMARK_COPY_BLIT:
-			r = radeon_copy_blit(rdev, saddr, daddr,
-					     size / RADEON_GPU_PAGE_SIZE,
-					     &fence);
+			fence = radeon_copy_blit(rdev, saddr, daddr,
+						 size / RADEON_GPU_PAGE_SIZE,
+						 NULL);
 			break;
 		default:
 			DRM_ERROR("Unknown copy method\n");
-			r = -EINVAL;
+			return -EINVAL;
 		}
-		if (r)
-			goto exit_do_move;
+		if (IS_ERR(fence))
+			return PTR_ERR(fence);
+
 		r = radeon_fence_wait(fence, false);
-		if (r)
-			goto exit_do_move;
 		radeon_fence_unref(&fence);
+		if (r)
+			return r;
 	}
 	end_jiffies = jiffies;
-	r = jiffies_to_msecs(end_jiffies - start_jiffies);
-
-exit_do_move:
-	if (fence)
-		radeon_fence_unref(&fence);
-	return r;
+	return jiffies_to_msecs(end_jiffies - start_jiffies);
 }
 
 
diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index bb0d5c3a8311..ea134a7d51a5 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -1298,27 +1298,27 @@ static int radeon_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
 	dev_priv->buffers_offset = init->buffers_offset;
 	dev_priv->gart_textures_offset = init->gart_textures_offset;
 
-	master_priv->sarea = drm_getsarea(dev);
+	master_priv->sarea = drm_legacy_getsarea(dev);
 	if (!master_priv->sarea) {
 		DRM_ERROR("could not find sarea!\n");
 		radeon_do_cleanup_cp(dev);
 		return -EINVAL;
 	}
 
-	dev_priv->cp_ring = drm_core_findmap(dev, init->ring_offset);
+	dev_priv->cp_ring = drm_legacy_findmap(dev, init->ring_offset);
 	if (!dev_priv->cp_ring) {
 		DRM_ERROR("could not find cp ring region!\n");
 		radeon_do_cleanup_cp(dev);
 		return -EINVAL;
 	}
-	dev_priv->ring_rptr = drm_core_findmap(dev, init->ring_rptr_offset);
+	dev_priv->ring_rptr = drm_legacy_findmap(dev, init->ring_rptr_offset);
 	if (!dev_priv->ring_rptr) {
 		DRM_ERROR("could not find ring read pointer!\n");
 		radeon_do_cleanup_cp(dev);
 		return -EINVAL;
 	}
 	dev->agp_buffer_token = init->buffers_offset;
-	dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset);
+	dev->agp_buffer_map = drm_legacy_findmap(dev, init->buffers_offset);
 	if (!dev->agp_buffer_map) {
 		DRM_ERROR("could not find dma buffer region!\n");
 		radeon_do_cleanup_cp(dev);
@@ -1327,7 +1327,7 @@ static int radeon_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
 
 	if (init->gart_textures_offset) {
 		dev_priv->gart_textures =
-		    drm_core_findmap(dev, init->gart_textures_offset);
+		    drm_legacy_findmap(dev, init->gart_textures_offset);
 		if (!dev_priv->gart_textures) {
 			DRM_ERROR("could not find GART texture region!\n");
 			radeon_do_cleanup_cp(dev);
@@ -1337,9 +1337,9 @@ static int radeon_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
 
 #if __OS_HAS_AGP
 	if (dev_priv->flags & RADEON_IS_AGP) {
-		drm_core_ioremap_wc(dev_priv->cp_ring, dev);
-		drm_core_ioremap_wc(dev_priv->ring_rptr, dev);
-		drm_core_ioremap_wc(dev->agp_buffer_map, dev);
+		drm_legacy_ioremap_wc(dev_priv->cp_ring, dev);
+		drm_legacy_ioremap_wc(dev_priv->ring_rptr, dev);
+		drm_legacy_ioremap_wc(dev->agp_buffer_map, dev);
 		if (!dev_priv->cp_ring->handle ||
 		    !dev_priv->ring_rptr->handle ||
 		    !dev->agp_buffer_map->handle) {
@@ -1475,7 +1475,7 @@ static int radeon_do_init_cp(struct drm_device *dev, drm_radeon_init_t *init,
 			dev_priv->gart_info.mapping.size =
 			    dev_priv->gart_info.table_size;
 
-			drm_core_ioremap_wc(&dev_priv->gart_info.mapping, dev);
+			drm_legacy_ioremap_wc(&dev_priv->gart_info.mapping, dev);
 			dev_priv->gart_info.addr =
 			    dev_priv->gart_info.mapping.handle;
 
@@ -1569,15 +1569,15 @@ static int radeon_do_cleanup_cp(struct drm_device * dev)
 #if __OS_HAS_AGP
 	if (dev_priv->flags & RADEON_IS_AGP) {
 		if (dev_priv->cp_ring != NULL) {
-			drm_core_ioremapfree(dev_priv->cp_ring, dev);
+			drm_legacy_ioremapfree(dev_priv->cp_ring, dev);
 			dev_priv->cp_ring = NULL;
 		}
 		if (dev_priv->ring_rptr != NULL) {
-			drm_core_ioremapfree(dev_priv->ring_rptr, dev);
+			drm_legacy_ioremapfree(dev_priv->ring_rptr, dev);
 			dev_priv->ring_rptr = NULL;
 		}
 		if (dev->agp_buffer_map != NULL) {
-			drm_core_ioremapfree(dev->agp_buffer_map, dev);
+			drm_legacy_ioremapfree(dev->agp_buffer_map, dev);
 			dev->agp_buffer_map = NULL;
 		}
 	} else
@@ -1597,7 +1597,7 @@ static int radeon_do_cleanup_cp(struct drm_device * dev)
 
 		if (dev_priv->gart_info.gart_table_location == DRM_ATI_GART_FB)
 		{
-			drm_core_ioremapfree(&dev_priv->gart_info.mapping, dev);
+			drm_legacy_ioremapfree(&dev_priv->gart_info.mapping, dev);
 			dev_priv->gart_info.addr = NULL;
 		}
 	}
@@ -2106,9 +2106,9 @@ int radeon_driver_load(struct drm_device *dev, unsigned long flags)
 	else
 		dev_priv->flags |= RADEON_IS_PCI;
 
-	ret = drm_addmap(dev, pci_resource_start(dev->pdev, 2),
-			 pci_resource_len(dev->pdev, 2), _DRM_REGISTERS,
-			 _DRM_READ_ONLY | _DRM_DRIVER, &dev_priv->mmio);
+	ret = drm_legacy_addmap(dev, pci_resource_start(dev->pdev, 2),
+				pci_resource_len(dev->pdev, 2), _DRM_REGISTERS,
+				_DRM_READ_ONLY | _DRM_DRIVER, &dev_priv->mmio);
 	if (ret != 0)
 		return ret;
 
@@ -2135,8 +2135,8 @@ int radeon_master_create(struct drm_device *dev, struct drm_master *master)
 
 	/* prebuild the SAREA */
 	sareapage = max_t(unsigned long, SAREA_MAX, PAGE_SIZE);
-	ret = drm_addmap(dev, 0, sareapage, _DRM_SHM, _DRM_CONTAINS_LOCK,
-			 &master_priv->sarea);
+	ret = drm_legacy_addmap(dev, 0, sareapage, _DRM_SHM, _DRM_CONTAINS_LOCK,
+				&master_priv->sarea);
 	if (ret) {
 		DRM_ERROR("SAREA setup failed\n");
 		kfree(master_priv);
@@ -2162,7 +2162,7 @@ void radeon_master_destroy(struct drm_device *dev, struct drm_master *master)
 
 	master_priv->sarea_priv = NULL;
 	if (master_priv->sarea)
-		drm_rmmap_locked(dev, master_priv->sarea);
+		drm_legacy_rmmap_locked(dev, master_priv->sarea);
 
 	kfree(master_priv);
 
@@ -2181,9 +2181,9 @@ int radeon_driver_firstopen(struct drm_device *dev)
 	dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
 
 	dev_priv->fb_aper_offset = pci_resource_start(dev->pdev, 0);
-	ret = drm_addmap(dev, dev_priv->fb_aper_offset,
-			 pci_resource_len(dev->pdev, 0), _DRM_FRAME_BUFFER,
-			 _DRM_WRITE_COMBINING, &map);
+	ret = drm_legacy_addmap(dev, dev_priv->fb_aper_offset,
+				pci_resource_len(dev->pdev, 0),
+				_DRM_FRAME_BUFFER, _DRM_WRITE_COMBINING, &map);
 	if (ret != 0)
 		return ret;
 
@@ -2196,7 +2196,7 @@ int radeon_driver_unload(struct drm_device *dev)
 
 	DRM_DEBUG("\n");
 
-	drm_rmmap(dev, dev_priv->mmio);
+	drm_legacy_rmmap(dev, dev_priv->mmio);
 
 	kfree(dev_priv);
 
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 83f382e8e40e..f662de41ba49 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -78,7 +78,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 	struct radeon_cs_chunk *chunk;
 	struct radeon_cs_buckets buckets;
 	unsigned i, j;
-	bool duplicate;
+	bool duplicate, need_mmap_lock = false;
+	int r;
 
 	if (p->chunk_relocs_idx == -1) {
 		return 0;
@@ -136,10 +137,13 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 			   + !!r->write_domain;
 
 		/* the first reloc of an UVD job is the msg and that must be in
-		   VRAM, also but everything into VRAM on AGP cards to avoid
-		   image corruptions */
+		   VRAM, also but everything into VRAM on AGP cards and older
+		   IGP chips to avoid image corruptions */
 		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
-		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) {
+		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
+		     p->rdev->family == CHIP_RS780 ||
+		     p->rdev->family == CHIP_RS880)) {
+
 			/* TODO: is this still needed for NI+ ? */
 			p->relocs[i].prefered_domains =
 				RADEON_GEM_DOMAIN_VRAM;
@@ -165,7 +169,21 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 			p->relocs[i].allowed_domains = domain;
 		}
 
+		if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) {
+			uint32_t domain = p->relocs[i].prefered_domains;
+			if (!(domain & RADEON_GEM_DOMAIN_GTT)) {
+				DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is "
+					  "allowed for userptr BOs\n");
+				return -EINVAL;
+			}
+			need_mmap_lock = true;
+			domain = RADEON_GEM_DOMAIN_GTT;
+			p->relocs[i].prefered_domains = domain;
+			p->relocs[i].allowed_domains = domain;
+		}
+
 		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
+		p->relocs[i].tv.shared = !r->write_domain;
 		p->relocs[i].handle = r->handle;
 
 		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
@@ -177,8 +195,15 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 	if (p->cs_flags & RADEON_CS_USE_VM)
 		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
 					      &p->validated);
+	if (need_mmap_lock)
+		down_read(&current->mm->mmap_sem);
+
+	r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
 
-	return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
+	if (need_mmap_lock)
+		up_read(&current->mm->mmap_sem);
+
+	return r;
 }
 
 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
@@ -229,11 +254,14 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
 	int i;
 
 	for (i = 0; i < p->nrelocs; i++) {
+		struct reservation_object *resv;
+
 		if (!p->relocs[i].robj)
 			continue;
 
-		radeon_semaphore_sync_to(p->ib.semaphore,
-					 p->relocs[i].robj->tbo.sync_obj);
+		resv = p->relocs[i].robj->tbo.resv;
+		radeon_semaphore_sync_resv(p->ib.semaphore, resv,
+					   p->relocs[i].tv.shared);
 	}
 }
 
@@ -403,7 +431,7 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
 
 		ttm_eu_fence_buffer_objects(&parser->ticket,
 					    &parser->validated,
-					    parser->ib.fence);
+					    &parser->ib.fence->base);
 	} else if (backoff) {
 		ttm_eu_backoff_reservation(&parser->ticket,
 					   &parser->validated);
@@ -538,7 +566,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 		goto out;
 	}
 	radeon_cs_sync_rings(parser);
-	radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence);
+	radeon_semaphore_sync_fence(parser->ib.semaphore, vm->fence);
 
 	if ((rdev->family >= CHIP_TAHITI) &&
 	    (parser->chunk_const_ib_idx != -1)) {
@@ -629,6 +657,13 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		up_read(&rdev->exclusive_lock);
 		return -EBUSY;
 	}
+	if (rdev->in_reset) {
+		up_read(&rdev->exclusive_lock);
+		r = radeon_gpu_reset(rdev);
+		if (!r)
+			r = -EAGAIN;
+		return r;
+	}
 	/* initialize parser */
 	memset(&parser, 0, sizeof(struct radeon_cs_parser));
 	parser.filp = filp;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 6a219bcee66d..e84a76e6656a 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1253,6 +1253,7 @@ int radeon_device_init(struct radeon_device *rdev,
 	for (i = 0; i < RADEON_NUM_RINGS; i++) {
 		rdev->ring[i].idx = i;
 	}
+	rdev->fence_context = fence_context_alloc(RADEON_NUM_RINGS);
 
 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n",
 		radeon_family_name[rdev->family], pdev->vendor, pdev->device,
@@ -1270,6 +1271,8 @@ int radeon_device_init(struct radeon_device *rdev,
 	init_rwsem(&rdev->pm.mclk_lock);
 	init_rwsem(&rdev->exclusive_lock);
 	init_waitqueue_head(&rdev->irq.vblank_queue);
+	mutex_init(&rdev->mn_lock);
+	hash_init(rdev->mn_hash);
 	r = radeon_gem_init(rdev);
 	if (r)
 		return r;
@@ -1395,10 +1398,6 @@ int radeon_device_init(struct radeon_device *rdev,
 	if (r)
 		return r;
 
-	r = radeon_ib_ring_tests(rdev);
-	if (r)
-		DRM_ERROR("ib ring test failed (%d).\n", r);
-
 	r = radeon_gem_debugfs_init(rdev);
 	if (r) {
 		DRM_ERROR("registering gem debugfs failed (%d).\n", r);
@@ -1416,6 +1415,10 @@ int radeon_device_init(struct radeon_device *rdev,
 			return r;
 	}
 
+	r = radeon_ib_ring_tests(rdev);
+	if (r)
+		DRM_ERROR("ib ring test failed (%d).\n", r);
+
 	if ((radeon_testing & 1)) {
 		if (rdev->accel_working)
 			radeon_test_moves(rdev);
@@ -1486,7 +1489,6 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
 	struct drm_crtc *crtc;
 	struct drm_connector *connector;
 	int i, r;
-	bool force_completion = false;
 
 	if (dev == NULL || dev->dev_private == NULL) {
 		return -ENODEV;
@@ -1530,12 +1532,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
 		r = radeon_fence_wait_empty(rdev, i);
 		if (r) {
 			/* delay GPU reset to resume */
-			force_completion = true;
+			radeon_fence_driver_force_completion(rdev, i);
 		}
 	}
-	if (force_completion) {
-		radeon_fence_driver_force_completion(rdev);
-	}
 
 	radeon_save_bios_scratch_regs(rdev);
 
@@ -1675,8 +1674,6 @@ int radeon_gpu_reset(struct radeon_device *rdev)
 		return 0;
 	}
 
-	rdev->needs_reset = false;
-
 	radeon_save_bios_scratch_regs(rdev);
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
@@ -1693,7 +1690,6 @@ int radeon_gpu_reset(struct radeon_device *rdev)
 		}
 	}
 
-retry:
 	r = radeon_asic_reset(rdev);
 	if (!r) {
 		dev_info(rdev->dev, "GPU reset succeeded, trying to resume\n");
@@ -1702,26 +1698,12 @@ retry:
 
 	radeon_restore_bios_scratch_regs(rdev);
 
-	if (!r) {
-		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		if (!r && ring_data[i]) {
 			radeon_ring_restore(rdev, &rdev->ring[i],
 					    ring_sizes[i], ring_data[i]);
-			ring_sizes[i] = 0;
-			ring_data[i] = NULL;
-		}
-
-		r = radeon_ib_ring_tests(rdev);
-		if (r) {
-			dev_err(rdev->dev, "ib ring test failed (%d).\n", r);
-			if (saved) {
-				saved = false;
-				radeon_suspend(rdev);
-				goto retry;
-			}
-		}
-	} else {
-		radeon_fence_driver_force_completion(rdev);
-		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		} else {
+			radeon_fence_driver_force_completion(rdev, i);
 			kfree(ring_data[i]);
 		}
 	}
@@ -1753,19 +1735,32 @@ retry:
 	/* reset hpd state */
 	radeon_hpd_init(rdev);
 
+	ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
+
+	rdev->in_reset = true;
+	rdev->needs_reset = false;
+
+	downgrade_write(&rdev->exclusive_lock);
+
 	drm_helper_resume_force_mode(rdev->ddev);
 
 	/* set the power state here in case we are a PX system or headless */
 	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled)
 		radeon_pm_compute_clocks(rdev);
 
-	ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
-	if (r) {
+	if (!r) {
+		r = radeon_ib_ring_tests(rdev);
+		if (r && saved)
+			r = -EAGAIN;
+	} else {
 		/* bad news, how to tell it to userspace ? */
 		dev_info(rdev->dev, "GPU reset failed\n");
 	}
 
-	up_write(&rdev->exclusive_lock);
+	rdev->needs_reset = r == -EAGAIN;
+	rdev->in_reset = false;
+
+	up_read(&rdev->exclusive_lock);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 3fdf87318069..4eb37976f879 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -405,7 +405,9 @@ static void radeon_flip_work_func(struct work_struct *__work)
 		r = radeon_fence_wait(work->fence, false);
 		if (r == -EDEADLK) {
 			up_read(&rdev->exclusive_lock);
-			r = radeon_gpu_reset(rdev);
+			do {
+				r = radeon_gpu_reset(rdev);
+			} while (r == -EAGAIN);
 			down_read(&rdev->exclusive_lock);
 		}
 		if (r)
@@ -474,11 +476,6 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
 	obj = new_radeon_fb->obj;
 	new_rbo = gem_to_radeon_bo(obj);
 
-	spin_lock(&new_rbo->tbo.bdev->fence_lock);
-	if (new_rbo->tbo.sync_obj)
-		work->fence = radeon_fence_ref(new_rbo->tbo.sync_obj);
-	spin_unlock(&new_rbo->tbo.bdev->fence_lock);
-
 	/* pin the new buffer */
 	DRM_DEBUG_DRIVER("flip-ioctl() cur_rbo = %p, new_rbo = %p\n",
 			 work->old_rbo, new_rbo);
@@ -497,6 +494,7 @@ static int radeon_crtc_page_flip(struct drm_crtc *crtc,
 		DRM_ERROR("failed to pin new rbo buffer before flip\n");
 		goto cleanup;
 	}
+	work->fence = (struct radeon_fence *)fence_get(reservation_object_get_excl(new_rbo->tbo.resv));
 	radeon_bo_get_tiling_flags(new_rbo, &tiling_flags, NULL);
 	radeon_bo_unreserve(new_rbo);
 
@@ -580,7 +578,6 @@ cleanup:
 	drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
 	radeon_fence_unref(&work->fence);
 	kfree(work);
-
 	return r;
 }
 
@@ -1917,7 +1914,7 @@ int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc, unsigned int fl
 
 	/* In vblank? */
 	if (in_vbl)
-		ret |= DRM_SCANOUTPOS_INVBL;
+		ret |= DRM_SCANOUTPOS_IN_VBLANK;
 
 	/* Is vpos outside nominal vblank area, but less than
 	 * 1/100 of a frame height away from start of vblank?
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 8df888908833..ec7e963d9bf7 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -114,6 +114,9 @@ int radeon_gem_object_open(struct drm_gem_object *obj,
 				struct drm_file *file_priv);
 void radeon_gem_object_close(struct drm_gem_object *obj,
 				struct drm_file *file_priv);
+struct dma_buf *radeon_gem_prime_export(struct drm_device *dev,
+					struct drm_gem_object *gobj,
+					int flags);
 extern int radeon_get_crtc_scanoutpos(struct drm_device *dev, int crtc,
 				      unsigned int flags,
 				      int *vpos, int *hpos, ktime_t *stime,
@@ -325,6 +328,7 @@ static struct drm_driver driver_old = {
 	.preclose = radeon_driver_preclose,
 	.postclose = radeon_driver_postclose,
 	.lastclose = radeon_driver_lastclose,
+	.set_busid = drm_pci_set_busid,
 	.unload = radeon_driver_unload,
 	.suspend = radeon_suspend,
 	.resume = radeon_resume,
@@ -548,6 +552,7 @@ static struct drm_driver kms_driver = {
 	.preclose = radeon_driver_preclose_kms,
 	.postclose = radeon_driver_postclose_kms,
 	.lastclose = radeon_driver_lastclose_kms,
+	.set_busid = drm_pci_set_busid,
 	.unload = radeon_driver_unload_kms,
 	.get_vblank_counter = radeon_get_vblank_counter_kms,
 	.enable_vblank = radeon_enable_vblank_kms,
@@ -573,7 +578,7 @@ static struct drm_driver kms_driver = {
 
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
-	.gem_prime_export = drm_gem_prime_export,
+	.gem_prime_export = radeon_gem_prime_export,
 	.gem_prime_import = drm_gem_prime_import,
 	.gem_prime_pin = radeon_gem_prime_pin,
 	.gem_prime_unpin = radeon_gem_prime_unpin,
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index dafd812e4571..46bd3938282c 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -33,7 +33,9 @@
 
 #include <linux/firmware.h>
 #include <linux/platform_device.h>
+#include <drm/drm_legacy.h>
 
+#include <drm/ati_pcigart.h>
 #include "radeon_family.h"
 
 /* General customization:
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 913787085dfa..af9f2d6bd7d0 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -98,6 +98,25 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
 }
 
 /**
+ * radeon_fence_schedule_check - schedule lockup check
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring index we should work with
+ *
+ * Queues a delayed work item to check for lockups.
+ */
+static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
+{
+	/*
+	 * Do not reset the timer here with mod_delayed_work,
+	 * this can livelock in an interaction with TTM delayed destroy.
+	 */
+	queue_delayed_work(system_power_efficient_wq,
+			   &rdev->fence_drv[ring].lockup_work,
+			   RADEON_FENCE_JIFFIES_TIMEOUT);
+}
+
+/**
  * radeon_fence_emit - emit a fence on the requested ring
  *
  * @rdev: radeon_device pointer
@@ -111,30 +130,70 @@ int radeon_fence_emit(struct radeon_device *rdev,
 		      struct radeon_fence **fence,
 		      int ring)
 {
+	u64 seq = ++rdev->fence_drv[ring].sync_seq[ring];
+
 	/* we are protected by the ring emission mutex */
 	*fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
 	if ((*fence) == NULL) {
 		return -ENOMEM;
 	}
-	kref_init(&((*fence)->kref));
 	(*fence)->rdev = rdev;
-	(*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring];
+	(*fence)->seq = seq;
 	(*fence)->ring = ring;
+	fence_init(&(*fence)->base, &radeon_fence_ops,
+		   &rdev->fence_queue.lock, rdev->fence_context + ring, seq);
 	radeon_fence_ring_emit(rdev, ring, *fence);
 	trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
+	radeon_fence_schedule_check(rdev, ring);
 	return 0;
 }
 
 /**
- * radeon_fence_process - process a fence
+ * radeon_fence_check_signaled - callback from fence_queue
+ *
+ * this function is called with fence_queue lock held, which is also used
+ * for the fence locking itself, so unlocked variants are used for
+ * fence_signal, and remove_wait_queue.
+ */
+static int radeon_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+{
+	struct radeon_fence *fence;
+	u64 seq;
+
+	fence = container_of(wait, struct radeon_fence, fence_wake);
+
+	/*
+	 * We cannot use radeon_fence_process here because we're already
+	 * in the waitqueue, in a call from wake_up_all.
+	 */
+	seq = atomic64_read(&fence->rdev->fence_drv[fence->ring].last_seq);
+	if (seq >= fence->seq) {
+		int ret = fence_signal_locked(&fence->base);
+
+		if (!ret)
+			FENCE_TRACE(&fence->base, "signaled from irq context\n");
+		else
+			FENCE_TRACE(&fence->base, "was already signaled\n");
+
+		radeon_irq_kms_sw_irq_put(fence->rdev, fence->ring);
+		__remove_wait_queue(&fence->rdev->fence_queue, &fence->fence_wake);
+		fence_put(&fence->base);
+	} else
+		FENCE_TRACE(&fence->base, "pending\n");
+	return 0;
+}
+
+/**
+ * radeon_fence_activity - check for fence activity
  *
  * @rdev: radeon_device pointer
  * @ring: ring index the fence is associated with
  *
- * Checks the current fence value and wakes the fence queue
- * if the sequence number has increased (all asics).
+ * Checks the current fence value and calculates the last
+ * signalled fence value. Returns true if activity occured
+ * on the ring, and the fence_queue should be waken up.
  */
-void radeon_fence_process(struct radeon_device *rdev, int ring)
+static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq, last_seq, last_emitted;
 	unsigned count_loop = 0;
@@ -190,23 +249,77 @@ void radeon_fence_process(struct radeon_device *rdev, int ring)
 		}
 	} while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
 
-	if (wake)
-		wake_up_all(&rdev->fence_queue);
+	if (seq < last_emitted)
+		radeon_fence_schedule_check(rdev, ring);
+
+	return wake;
 }
 
 /**
- * radeon_fence_destroy - destroy a fence
+ * radeon_fence_check_lockup - check for hardware lockup
  *
- * @kref: fence kref
+ * @work: delayed work item
  *
- * Frees the fence object (all asics).
+ * Checks for fence activity and if there is none probe
+ * the hardware if a lockup occured.
  */
-static void radeon_fence_destroy(struct kref *kref)
+static void radeon_fence_check_lockup(struct work_struct *work)
 {
-	struct radeon_fence *fence;
+	struct radeon_fence_driver *fence_drv;
+	struct radeon_device *rdev;
+	int ring;
+
+	fence_drv = container_of(work, struct radeon_fence_driver,
+				 lockup_work.work);
+	rdev = fence_drv->rdev;
+	ring = fence_drv - &rdev->fence_drv[0];
+
+	if (!down_read_trylock(&rdev->exclusive_lock)) {
+		/* just reschedule the check if a reset is going on */
+		radeon_fence_schedule_check(rdev, ring);
+		return;
+	}
+
+	if (fence_drv->delayed_irq && rdev->ddev->irq_enabled) {
+		unsigned long irqflags;
+
+		fence_drv->delayed_irq = false;
+		spin_lock_irqsave(&rdev->irq.lock, irqflags);
+		radeon_irq_set(rdev);
+		spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+	}
+
+	if (radeon_fence_activity(rdev, ring))
+		wake_up_all(&rdev->fence_queue);
 
-	fence = container_of(kref, struct radeon_fence, kref);
-	kfree(fence);
+	else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
+
+		/* good news we believe it's a lockup */
+		dev_warn(rdev->dev, "GPU lockup (current fence id "
+			 "0x%016llx last fence id 0x%016llx on ring %d)\n",
+			 (uint64_t)atomic64_read(&fence_drv->last_seq),
+			 fence_drv->sync_seq[ring], ring);
+
+		/* remember that we need an reset */
+		rdev->needs_reset = true;
+		wake_up_all(&rdev->fence_queue);
+	}
+	up_read(&rdev->exclusive_lock);
+}
+
+/**
+ * radeon_fence_process - process a fence
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring index the fence is associated with
+ *
+ * Checks the current fence value and wakes the fence queue
+ * if the sequence number has increased (all asics).
+ */
+void radeon_fence_process(struct radeon_device *rdev, int ring)
+{
+	if (radeon_fence_activity(rdev, ring))
+		wake_up_all(&rdev->fence_queue);
 }
 
 /**
@@ -237,6 +350,75 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
 	return false;
 }
 
+static bool radeon_fence_is_signaled(struct fence *f)
+{
+	struct radeon_fence *fence = to_radeon_fence(f);
+	struct radeon_device *rdev = fence->rdev;
+	unsigned ring = fence->ring;
+	u64 seq = fence->seq;
+
+	if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
+		return true;
+	}
+
+	if (down_read_trylock(&rdev->exclusive_lock)) {
+		radeon_fence_process(rdev, ring);
+		up_read(&rdev->exclusive_lock);
+
+		if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
+			return true;
+		}
+	}
+	return false;
+}
+
+/**
+ * radeon_fence_enable_signaling - enable signalling on fence
+ * @fence: fence
+ *
+ * This function is called with fence_queue lock held, and adds a callback
+ * to fence_queue that checks if this fence is signaled, and if so it
+ * signals the fence and removes itself.
+ */
+static bool radeon_fence_enable_signaling(struct fence *f)
+{
+	struct radeon_fence *fence = to_radeon_fence(f);
+	struct radeon_device *rdev = fence->rdev;
+
+	if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq)
+		return false;
+
+	if (down_read_trylock(&rdev->exclusive_lock)) {
+		radeon_irq_kms_sw_irq_get(rdev, fence->ring);
+
+		if (radeon_fence_activity(rdev, fence->ring))
+			wake_up_all_locked(&rdev->fence_queue);
+
+		/* did fence get signaled after we enabled the sw irq? */
+		if (atomic64_read(&rdev->fence_drv[fence->ring].last_seq) >= fence->seq) {
+			radeon_irq_kms_sw_irq_put(rdev, fence->ring);
+			up_read(&rdev->exclusive_lock);
+			return false;
+		}
+
+		up_read(&rdev->exclusive_lock);
+	} else {
+		/* we're probably in a lockup, lets not fiddle too much */
+		if (radeon_irq_kms_sw_irq_get_delayed(rdev, fence->ring))
+			rdev->fence_drv[fence->ring].delayed_irq = true;
+		radeon_fence_schedule_check(rdev, fence->ring);
+	}
+
+	fence->fence_wake.flags = 0;
+	fence->fence_wake.private = NULL;
+	fence->fence_wake.func = radeon_fence_check_signaled;
+	__add_wait_queue(&rdev->fence_queue, &fence->fence_wake);
+	fence_get(f);
+
+	FENCE_TRACE(&fence->base, "armed on ring %i!\n", fence->ring);
+	return true;
+}
+
 /**
  * radeon_fence_signaled - check if a fence has signaled
  *
@@ -247,14 +429,15 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
  */
 bool radeon_fence_signaled(struct radeon_fence *fence)
 {
-	if (!fence) {
+	if (!fence)
 		return true;
-	}
-	if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) {
-		return true;
-	}
+
 	if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
-		fence->seq = RADEON_FENCE_SIGNALED_SEQ;
+		int ret;
+
+		ret = fence_signal(&fence->base);
+		if (!ret)
+			FENCE_TRACE(&fence->base, "signaled from radeon_fence_signaled\n");
 		return true;
 	}
 	return false;
@@ -283,110 +466,70 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
 }
 
 /**
- * radeon_fence_wait_seq - wait for a specific sequence numbers
+ * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
  *
  * @rdev: radeon device pointer
  * @target_seq: sequence number(s) we want to wait for
  * @intr: use interruptable sleep
+ * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
  *
  * Wait for the requested sequence number(s) to be written by any ring
  * (all asics).  Sequnce number array is indexed by ring id.
  * @intr selects whether to use interruptable (true) or non-interruptable
  * (false) sleep when waiting for the sequence number.  Helper function
  * for radeon_fence_wait_*().
- * Returns 0 if the sequence number has passed, error for all other cases.
+ * Returns remaining time if the sequence number has passed, 0 when
+ * the wait timeout, or an error for all other cases.
  * -EDEADLK is returned when a GPU lockup has been detected.
  */
-static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
-				 bool intr)
+static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
+					  u64 *target_seq, bool intr,
+					  long timeout)
 {
-	uint64_t last_seq[RADEON_NUM_RINGS];
-	bool signaled;
-	int i, r;
-
-	while (!radeon_fence_any_seq_signaled(rdev, target_seq)) {
+	long r;
+	int i;
 
-		/* Save current sequence values, used to check for GPU lockups */
-		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-			if (!target_seq[i])
-				continue;
+	if (radeon_fence_any_seq_signaled(rdev, target_seq))
+		return timeout;
 
-			last_seq[i] = atomic64_read(&rdev->fence_drv[i].last_seq);
-			trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
-			radeon_irq_kms_sw_irq_get(rdev, i);
-		}
+	/* enable IRQs and tracing */
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		if (!target_seq[i])
+			continue;
 
-		if (intr) {
-			r = wait_event_interruptible_timeout(rdev->fence_queue, (
-				(signaled = radeon_fence_any_seq_signaled(rdev, target_seq))
-				 || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT);
-		} else {
-			r = wait_event_timeout(rdev->fence_queue, (
-				(signaled = radeon_fence_any_seq_signaled(rdev, target_seq))
-				 || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT);
-		}
+		trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
+		radeon_irq_kms_sw_irq_get(rdev, i);
+	}
 
-		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-			if (!target_seq[i])
-				continue;
+	if (intr) {
+		r = wait_event_interruptible_timeout(rdev->fence_queue, (
+			radeon_fence_any_seq_signaled(rdev, target_seq)
+			 || rdev->needs_reset), timeout);
+	} else {
+		r = wait_event_timeout(rdev->fence_queue, (
+			radeon_fence_any_seq_signaled(rdev, target_seq)
+			 || rdev->needs_reset), timeout);
+	}
 
-			radeon_irq_kms_sw_irq_put(rdev, i);
-			trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
-		}
+	if (rdev->needs_reset)
+		r = -EDEADLK;
 
-		if (unlikely(r < 0))
-			return r;
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		if (!target_seq[i])
+			continue;
 
-		if (unlikely(!signaled)) {
-			if (rdev->needs_reset)
-				return -EDEADLK;
-
-			/* we were interrupted for some reason and fence
-			 * isn't signaled yet, resume waiting */
-			if (r)
-				continue;
-
-			for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-				if (!target_seq[i])
-					continue;
-
-				if (last_seq[i] != atomic64_read(&rdev->fence_drv[i].last_seq))
-					break;
-			}
-
-			if (i != RADEON_NUM_RINGS)
-				continue;
-
-			for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-				if (!target_seq[i])
-					continue;
-
-				if (radeon_ring_is_lockup(rdev, i, &rdev->ring[i]))
-					break;
-			}
-
-			if (i < RADEON_NUM_RINGS) {
-				/* good news we believe it's a lockup */
-				dev_warn(rdev->dev, "GPU lockup (waiting for "
-					 "0x%016llx last fence id 0x%016llx on"
-					 " ring %d)\n",
-					 target_seq[i], last_seq[i], i);
-
-				/* remember that we need an reset */
-				rdev->needs_reset = true;
-				wake_up_all(&rdev->fence_queue);
-				return -EDEADLK;
-			}
-		}
+		radeon_irq_kms_sw_irq_put(rdev, i);
+		trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
 	}
-	return 0;
+
+	return r;
 }
 
 /**
  * radeon_fence_wait - wait for a fence to signal
  *
  * @fence: radeon fence object
- * @intr: use interruptable sleep
+ * @intr: use interruptible sleep
  *
  * Wait for the requested fence to signal (all asics).
  * @intr selects whether to use interruptable (true) or non-interruptable
@@ -396,22 +539,17 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
 int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 {
 	uint64_t seq[RADEON_NUM_RINGS] = {};
-	int r;
-
-	if (fence == NULL) {
-		WARN(1, "Querying an invalid fence : %p !\n", fence);
-		return -EINVAL;
-	}
+	long r;
 
 	seq[fence->ring] = fence->seq;
-	if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ)
-		return 0;
-
-	r = radeon_fence_wait_seq(fence->rdev, seq, intr);
-	if (r)
+	r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
+	if (r < 0) {
 		return r;
+	}
 
-	fence->seq = RADEON_FENCE_SIGNALED_SEQ;
+	r = fence_signal(&fence->base);
+	if (!r)
+		FENCE_TRACE(&fence->base, "signaled from fence_wait\n");
 	return 0;
 }
 
@@ -434,7 +572,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
 {
 	uint64_t seq[RADEON_NUM_RINGS];
 	unsigned i, num_rings = 0;
-	int r;
+	long r;
 
 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 		seq[i] = 0;
@@ -445,18 +583,14 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
 
 		seq[i] = fences[i]->seq;
 		++num_rings;
-
-		/* test if something was allready signaled */
-		if (seq[i] == RADEON_FENCE_SIGNALED_SEQ)
-			return 0;
 	}
 
 	/* nothing to wait for ? */
 	if (num_rings == 0)
 		return -ENOENT;
 
-	r = radeon_fence_wait_seq(rdev, seq, intr);
-	if (r) {
+	r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
+	if (r < 0) {
 		return r;
 	}
 	return 0;
@@ -475,6 +609,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
 int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq[RADEON_NUM_RINGS] = {};
+	long r;
 
 	seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
 	if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
@@ -482,7 +617,10 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 		   already the last emited fence */
 		return -ENOENT;
 	}
-	return radeon_fence_wait_seq(rdev, seq, false);
+	r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
+	if (r < 0)
+		return r;
+	return 0;
 }
 
 /**
@@ -498,18 +636,18 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq[RADEON_NUM_RINGS] = {};
-	int r;
+	long r;
 
 	seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
 	if (!seq[ring])
 		return 0;
 
-	r = radeon_fence_wait_seq(rdev, seq, false);
-	if (r) {
+	r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
+	if (r < 0) {
 		if (r == -EDEADLK)
 			return -EDEADLK;
 
-		dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n",
+		dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
 			ring, r);
 	}
 	return 0;
@@ -525,7 +663,7 @@ int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
  */
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
 {
-	kref_get(&fence->kref);
+	fence_get(&fence->base);
 	return fence;
 }
 
@@ -542,7 +680,7 @@ void radeon_fence_unref(struct radeon_fence **fence)
 
 	*fence = NULL;
 	if (tmp) {
-		kref_put(&tmp->kref, radeon_fence_destroy);
+		fence_put(&tmp->base);
 	}
 }
 
@@ -711,6 +849,9 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
 		rdev->fence_drv[ring].sync_seq[i] = 0;
 	atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
 	rdev->fence_drv[ring].initialized = false;
+	INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
+			  radeon_fence_check_lockup);
+	rdev->fence_drv[ring].rdev = rdev;
 }
 
 /**
@@ -758,8 +899,9 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
 		r = radeon_fence_wait_empty(rdev, ring);
 		if (r) {
 			/* no need to trigger GPU reset as we are unloading */
-			radeon_fence_driver_force_completion(rdev);
+			radeon_fence_driver_force_completion(rdev, ring);
 		}
+		cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
 		wake_up_all(&rdev->fence_queue);
 		radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
 		rdev->fence_drv[ring].initialized = false;
@@ -771,18 +913,16 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
  * radeon_fence_driver_force_completion - force all fence waiter to complete
  *
  * @rdev: radeon device pointer
+ * @ring: the ring to complete
  *
  * In case of GPU reset failure make sure no process keep waiting on fence
  * that will never complete.
  */
-void radeon_fence_driver_force_completion(struct radeon_device *rdev)
+void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
 {
-	int ring;
-
-	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
-		if (!rdev->fence_drv[ring].initialized)
-			continue;
+	if (rdev->fence_drv[ring].initialized) {
 		radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
+		cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
 	}
 }
 
@@ -833,6 +973,7 @@ static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data)
 	down_read(&rdev->exclusive_lock);
 	seq_printf(m, "%d\n", rdev->needs_reset);
 	rdev->needs_reset = true;
+	wake_up_all(&rdev->fence_queue);
 	up_read(&rdev->exclusive_lock);
 
 	return 0;
@@ -852,3 +993,72 @@ int radeon_debugfs_fence_init(struct radeon_device *rdev)
 	return 0;
 #endif
 }
+
+static const char *radeon_fence_get_driver_name(struct fence *fence)
+{
+	return "radeon";
+}
+
+static const char *radeon_fence_get_timeline_name(struct fence *f)
+{
+	struct radeon_fence *fence = to_radeon_fence(f);
+	switch (fence->ring) {
+	case RADEON_RING_TYPE_GFX_INDEX: return "radeon.gfx";
+	case CAYMAN_RING_TYPE_CP1_INDEX: return "radeon.cp1";
+	case CAYMAN_RING_TYPE_CP2_INDEX: return "radeon.cp2";
+	case R600_RING_TYPE_DMA_INDEX: return "radeon.dma";
+	case CAYMAN_RING_TYPE_DMA1_INDEX: return "radeon.dma1";
+	case R600_RING_TYPE_UVD_INDEX: return "radeon.uvd";
+	case TN_RING_TYPE_VCE1_INDEX: return "radeon.vce1";
+	case TN_RING_TYPE_VCE2_INDEX: return "radeon.vce2";
+	default: WARN_ON_ONCE(1); return "radeon.unk";
+	}
+}
+
+static inline bool radeon_test_signaled(struct radeon_fence *fence)
+{
+	return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
+}
+
+static signed long radeon_fence_default_wait(struct fence *f, bool intr,
+					     signed long t)
+{
+	struct radeon_fence *fence = to_radeon_fence(f);
+	struct radeon_device *rdev = fence->rdev;
+	bool signaled;
+
+	fence_enable_sw_signaling(&fence->base);
+
+	/*
+	 * This function has to return -EDEADLK, but cannot hold
+	 * exclusive_lock during the wait because some callers
+	 * may already hold it. This means checking needs_reset without
+	 * lock, and not fiddling with any gpu internals.
+	 *
+	 * The callback installed with fence_enable_sw_signaling will
+	 * run before our wait_event_*timeout call, so we will see
+	 * both the signaled fence and the changes to needs_reset.
+	 */
+
+	if (intr)
+		t = wait_event_interruptible_timeout(rdev->fence_queue,
+			((signaled = radeon_test_signaled(fence)) ||
+			 rdev->needs_reset), t);
+	else
+		t = wait_event_timeout(rdev->fence_queue,
+			((signaled = radeon_test_signaled(fence)) ||
+			 rdev->needs_reset), t);
+
+	if (t > 0 && !signaled)
+		return -EDEADLK;
+	return t;
+}
+
+const struct fence_ops radeon_fence_ops = {
+	.get_driver_name = radeon_fence_get_driver_name,
+	.get_timeline_name = radeon_fence_get_timeline_name,
+	.enable_signaling = radeon_fence_enable_signaling,
+	.signaled = radeon_fence_is_signaled,
+	.wait = radeon_fence_default_wait,
+	.release = NULL,
+};
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index bfd7e1b0ff3f..4b7c8ec36c2f 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -94,7 +94,7 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj,
 {
 	struct radeon_bo *robj;
 	uint32_t domain;
-	int r;
+	long r;
 
 	/* FIXME: reeimplement */
 	robj = gem_to_radeon_bo(gobj);
@@ -110,9 +110,12 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj,
 	}
 	if (domain == RADEON_GEM_DOMAIN_CPU) {
 		/* Asking for cpu access wait for object idle */
-		r = radeon_bo_wait(robj, NULL, false);
-		if (r) {
-			printk(KERN_ERR "Failed to wait for object !\n");
+		r = reservation_object_wait_timeout_rcu(robj->tbo.resv, true, true, 30 * HZ);
+		if (!r)
+			r = -EBUSY;
+
+		if (r < 0 && r != -EINTR) {
+			printk(KERN_ERR "Failed to wait for object: %li\n", r);
 			return r;
 		}
 	}
@@ -272,6 +275,94 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+int radeon_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *filp)
+{
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_radeon_gem_userptr *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_bo *bo;
+	uint32_t handle;
+	int r;
+
+	if (offset_in_page(args->addr | args->size))
+		return -EINVAL;
+
+	/* reject unknown flag values */
+	if (args->flags & ~(RADEON_GEM_USERPTR_READONLY |
+	    RADEON_GEM_USERPTR_ANONONLY | RADEON_GEM_USERPTR_VALIDATE |
+	    RADEON_GEM_USERPTR_REGISTER))
+		return -EINVAL;
+
+	if (args->flags & RADEON_GEM_USERPTR_READONLY) {
+		/* readonly pages not tested on older hardware */
+		if (rdev->family < CHIP_R600)
+			return -EINVAL;
+
+	} else if (!(args->flags & RADEON_GEM_USERPTR_ANONONLY) ||
+		   !(args->flags & RADEON_GEM_USERPTR_REGISTER)) {
+
+		/* if we want to write to it we must require anonymous
+		   memory and install a MMU notifier */
+		return -EACCES;
+	}
+
+	down_read(&rdev->exclusive_lock);
+
+	/* create a gem object to contain this object in */
+	r = radeon_gem_object_create(rdev, args->size, 0,
+				     RADEON_GEM_DOMAIN_CPU, 0,
+				     false, &gobj);
+	if (r)
+		goto handle_lockup;
+
+	bo = gem_to_radeon_bo(gobj);
+	r = radeon_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags);
+	if (r)
+		goto release_object;
+
+	if (args->flags & RADEON_GEM_USERPTR_REGISTER) {
+		r = radeon_mn_register(bo, args->addr);
+		if (r)
+			goto release_object;
+	}
+
+	if (args->flags & RADEON_GEM_USERPTR_VALIDATE) {
+		down_read(&current->mm->mmap_sem);
+		r = radeon_bo_reserve(bo, true);
+		if (r) {
+			up_read(&current->mm->mmap_sem);
+			goto release_object;
+		}
+
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_GTT);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
+		radeon_bo_unreserve(bo);
+		up_read(&current->mm->mmap_sem);
+		if (r)
+			goto release_object;
+	}
+
+	r = drm_gem_handle_create(filp, gobj, &handle);
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(gobj);
+	if (r)
+		goto handle_lockup;
+
+	args->handle = handle;
+	up_read(&rdev->exclusive_lock);
+	return 0;
+
+release_object:
+	drm_gem_object_unreference_unlocked(gobj);
+
+handle_lockup:
+	up_read(&rdev->exclusive_lock);
+	r = radeon_gem_handle_lockup(rdev, r);
+
+	return r;
+}
+
 int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp)
 {
@@ -315,6 +406,10 @@ int radeon_mode_dumb_mmap(struct drm_file *filp,
 		return -ENOENT;
 	}
 	robj = gem_to_radeon_bo(gobj);
+	if (radeon_ttm_tt_has_userptr(robj->tbo.ttm)) {
+		drm_gem_object_unreference_unlocked(gobj);
+		return -EPERM;
+	}
 	*offset_p = radeon_bo_mmap_offset(robj);
 	drm_gem_object_unreference_unlocked(gobj);
 	return 0;
@@ -357,15 +452,22 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 	struct drm_radeon_gem_wait_idle *args = data;
 	struct drm_gem_object *gobj;
 	struct radeon_bo *robj;
-	int r;
+	int r = 0;
 	uint32_t cur_placement = 0;
+	long ret;
 
 	gobj = drm_gem_object_lookup(dev, filp, args->handle);
 	if (gobj == NULL) {
 		return -ENOENT;
 	}
 	robj = gem_to_radeon_bo(gobj);
-	r = radeon_bo_wait(robj, &cur_placement, false);
+
+	ret = reservation_object_wait_timeout_rcu(robj->tbo.resv, true, true, 30 * HZ);
+	if (ret == 0)
+		r = -EBUSY;
+	else if (ret < 0)
+		r = ret;
+
 	/* Flush HDP cache via MMIO if necessary */
 	if (rdev->asic->mmio_hdp_flush &&
 	    radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM)
@@ -532,6 +634,11 @@ int radeon_gem_op_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 	}
 	robj = gem_to_radeon_bo(gobj);
+
+	r = -EPERM;
+	if (radeon_ttm_tt_has_userptr(robj->tbo.ttm))
+		goto out;
+
 	r = radeon_bo_reserve(robj, false);
 	if (unlikely(r))
 		goto out;
diff --git a/drivers/gpu/drm/radeon/radeon_ib.c b/drivers/gpu/drm/radeon/radeon_ib.c
index 5bf2c0a05827..3f39fcca4d07 100644
--- a/drivers/gpu/drm/radeon/radeon_ib.c
+++ b/drivers/gpu/drm/radeon/radeon_ib.c
@@ -145,7 +145,7 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
 	if (ib->vm) {
 		struct radeon_fence *vm_id_fence;
 		vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring);
-        	radeon_semaphore_sync_to(ib->semaphore, vm_id_fence);
+		radeon_semaphore_sync_fence(ib->semaphore, vm_id_fence);
 	}
 
 	/* sync with other rings */
@@ -269,6 +269,7 @@ int radeon_ib_ring_tests(struct radeon_device *rdev)
 
 		r = radeon_ib_test(rdev, i, ring);
 		if (r) {
+			radeon_fence_driver_force_completion(rdev, i);
 			ring->ready = false;
 			rdev->needs_reset = false;
 
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index 16807afab362..7784911d78ef 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -88,23 +88,6 @@ static void radeon_hotplug_work_func(struct work_struct *work)
 }
 
 /**
- * radeon_irq_reset_work_func - execute gpu reset
- *
- * @work: work struct
- *
- * Execute scheduled gpu reset (cayman+).
- * This function is called when the irq handler
- * thinks we need a gpu reset.
- */
-static void radeon_irq_reset_work_func(struct work_struct *work)
-{
-	struct radeon_device *rdev = container_of(work, struct radeon_device,
-						  reset_work);
-
-	radeon_gpu_reset(rdev);
-}
-
-/**
  * radeon_driver_irq_preinstall_kms - drm irq preinstall callback
  *
  * @dev: drm dev pointer
@@ -284,7 +267,6 @@ int radeon_irq_kms_init(struct radeon_device *rdev)
 
 	INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func);
 	INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi);
-	INIT_WORK(&rdev->reset_work, radeon_irq_reset_work_func);
 
 	rdev->irq.installed = true;
 	r = drm_irq_install(rdev->ddev, rdev->ddev->pdev->irq);
@@ -342,6 +324,21 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring)
 }
 
 /**
+ * radeon_irq_kms_sw_irq_get_delayed - enable software interrupt
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring whose interrupt you want to enable
+ *
+ * Enables the software interrupt for a specific ring (all asics).
+ * The software interrupt is generally used to signal a fence on
+ * a particular ring.
+ */
+bool radeon_irq_kms_sw_irq_get_delayed(struct radeon_device *rdev, int ring)
+{
+	return atomic_inc_return(&rdev->irq.ring_int[ring]) == 1;
+}
+
+/**
  * radeon_irq_kms_sw_irq_put - disable software interrupt
  *
  * @rdev: radeon device pointer
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index eb7164d07985..8309b11e674d 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -885,5 +885,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_USERPTR, radeon_gem_userptr_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 int radeon_max_kms_ioctl = ARRAY_SIZE(radeon_ioctls_kms);
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
new file mode 100644
index 000000000000..a69bd441dd2d
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <christian.koenig@amd.com>
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <linux/mmu_notifier.h>
+#include <drm/drmP.h>
+#include <drm/drm.h>
+
+#include "radeon.h"
+
+struct radeon_mn {
+	/* constant after initialisation */
+	struct radeon_device	*rdev;
+	struct mm_struct	*mm;
+	struct mmu_notifier	mn;
+
+	/* only used on destruction */
+	struct work_struct	work;
+
+	/* protected by rdev->mn_lock */
+	struct hlist_node	node;
+
+	/* objects protected by lock */
+	struct mutex		lock;
+	struct rb_root		objects;
+};
+
+/**
+ * radeon_mn_destroy - destroy the rmn
+ *
+ * @work: previously sheduled work item
+ *
+ * Lazy destroys the notifier from a work item
+ */
+static void radeon_mn_destroy(struct work_struct *work)
+{
+	struct radeon_mn *rmn = container_of(work, struct radeon_mn, work);
+	struct radeon_device *rdev = rmn->rdev;
+	struct radeon_bo *bo, *next;
+
+	mutex_lock(&rdev->mn_lock);
+	mutex_lock(&rmn->lock);
+	hash_del(&rmn->node);
+	rbtree_postorder_for_each_entry_safe(bo, next, &rmn->objects, mn_it.rb) {
+		interval_tree_remove(&bo->mn_it, &rmn->objects);
+		bo->mn = NULL;
+	}
+	mutex_unlock(&rmn->lock);
+	mutex_unlock(&rdev->mn_lock);
+	mmu_notifier_unregister(&rmn->mn, rmn->mm);
+	kfree(rmn);
+}
+
+/**
+ * radeon_mn_release - callback to notify about mm destruction
+ *
+ * @mn: our notifier
+ * @mn: the mm this callback is about
+ *
+ * Shedule a work item to lazy destroy our notifier.
+ */
+static void radeon_mn_release(struct mmu_notifier *mn,
+			      struct mm_struct *mm)
+{
+	struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn);
+	INIT_WORK(&rmn->work, radeon_mn_destroy);
+	schedule_work(&rmn->work);
+}
+
+/**
+ * radeon_mn_invalidate_range_start - callback to notify about mm change
+ *
+ * @mn: our notifier
+ * @mn: the mm this callback is about
+ * @start: start of updated range
+ * @end: end of updated range
+ *
+ * We block for all BOs between start and end to be idle and
+ * unmap them by move them into system domain again.
+ */
+static void radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
+					     struct mm_struct *mm,
+					     unsigned long start,
+					     unsigned long end)
+{
+	struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn);
+	struct interval_tree_node *it;
+
+	/* notification is exclusive, but interval is inclusive */
+	end -= 1;
+
+	mutex_lock(&rmn->lock);
+
+	it = interval_tree_iter_first(&rmn->objects, start, end);
+	while (it) {
+		struct radeon_bo *bo;
+		struct fence *fence;
+		int r;
+
+		bo = container_of(it, struct radeon_bo, mn_it);
+		it = interval_tree_iter_next(it, start, end);
+
+		r = radeon_bo_reserve(bo, true);
+		if (r) {
+			DRM_ERROR("(%d) failed to reserve user bo\n", r);
+			continue;
+		}
+
+		fence = reservation_object_get_excl(bo->tbo.resv);
+		if (fence) {
+			r = radeon_fence_wait((struct radeon_fence *)fence, false);
+			if (r)
+				DRM_ERROR("(%d) failed to wait for user bo\n", r);
+		}
+
+		radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_CPU);
+		r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
+		if (r)
+			DRM_ERROR("(%d) failed to validate user bo\n", r);
+
+		radeon_bo_unreserve(bo);
+	}
+	
+	mutex_unlock(&rmn->lock);
+}
+
+static const struct mmu_notifier_ops radeon_mn_ops = {
+	.release = radeon_mn_release,
+	.invalidate_range_start = radeon_mn_invalidate_range_start,
+};
+
+/**
+ * radeon_mn_get - create notifier context
+ *
+ * @rdev: radeon device pointer
+ *
+ * Creates a notifier context for current->mm.
+ */
+static struct radeon_mn *radeon_mn_get(struct radeon_device *rdev)
+{
+	struct mm_struct *mm = current->mm;
+	struct radeon_mn *rmn;
+	int r;
+
+	down_write(&mm->mmap_sem);
+	mutex_lock(&rdev->mn_lock);
+
+	hash_for_each_possible(rdev->mn_hash, rmn, node, (unsigned long)mm)
+		if (rmn->mm == mm)
+			goto release_locks;
+
+	rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
+	if (!rmn) {
+		rmn = ERR_PTR(-ENOMEM);
+		goto release_locks;
+	}
+
+	rmn->rdev = rdev;
+	rmn->mm = mm;
+	rmn->mn.ops = &radeon_mn_ops;
+	mutex_init(&rmn->lock);
+	rmn->objects = RB_ROOT;
+	
+	r = __mmu_notifier_register(&rmn->mn, mm);
+	if (r)
+		goto free_rmn;
+
+	hash_add(rdev->mn_hash, &rmn->node, (unsigned long)mm);
+
+release_locks:
+	mutex_unlock(&rdev->mn_lock);
+	up_write(&mm->mmap_sem);
+
+	return rmn;
+
+free_rmn:
+	mutex_unlock(&rdev->mn_lock);
+	up_write(&mm->mmap_sem);
+	kfree(rmn);
+
+	return ERR_PTR(r);
+}
+
+/**
+ * radeon_mn_register - register a BO for notifier updates
+ *
+ * @bo: radeon buffer object
+ * @addr: userptr addr we should monitor
+ *
+ * Registers an MMU notifier for the given BO at the specified address.
+ * Returns 0 on success, -ERRNO if anything goes wrong.
+ */
+int radeon_mn_register(struct radeon_bo *bo, unsigned long addr)
+{
+	unsigned long end = addr + radeon_bo_size(bo) - 1;
+	struct radeon_device *rdev = bo->rdev;
+	struct radeon_mn *rmn;
+	struct interval_tree_node *it;
+
+	rmn = radeon_mn_get(rdev);
+	if (IS_ERR(rmn))
+		return PTR_ERR(rmn);
+
+	mutex_lock(&rmn->lock);
+
+	it = interval_tree_iter_first(&rmn->objects, addr, end);
+	if (it) {
+		mutex_unlock(&rmn->lock);
+		return -EEXIST;
+	}
+
+	bo->mn = rmn;
+	bo->mn_it.start = addr;
+	bo->mn_it.last = end;
+	interval_tree_insert(&bo->mn_it, &rmn->objects);
+
+	mutex_unlock(&rmn->lock);
+
+	return 0;
+}
+
+/**
+ * radeon_mn_unregister - unregister a BO for notifier updates
+ *
+ * @bo: radeon buffer object
+ *
+ * Remove any registration of MMU notifier updates from the buffer object.
+ */
+void radeon_mn_unregister(struct radeon_bo *bo)
+{
+	struct radeon_device *rdev = bo->rdev;
+	struct radeon_mn *rmn;
+
+	mutex_lock(&rdev->mn_lock);
+	rmn = bo->mn;
+	if (rmn == NULL) {
+		mutex_unlock(&rdev->mn_lock);
+		return;
+	}
+
+	mutex_lock(&rmn->lock);
+	interval_tree_remove(&bo->mn_it, &rmn->objects);
+	bo->mn = NULL;
+	mutex_unlock(&rmn->lock);
+	mutex_unlock(&rdev->mn_lock);
+}
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 480c87d8edc5..8abee5fa93bd 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -75,6 +75,7 @@ static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 	bo = container_of(tbo, struct radeon_bo, tbo);
 
 	radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);
+	radeon_mn_unregister(bo);
 
 	mutex_lock(&bo->rdev->gem.mutex);
 	list_del_init(&bo->list);
@@ -96,48 +97,71 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
 {
 	u32 c = 0, i;
 
-	rbo->placement.fpfn = 0;
-	rbo->placement.lpfn = 0;
 	rbo->placement.placement = rbo->placements;
 	rbo->placement.busy_placement = rbo->placements;
 	if (domain & RADEON_GEM_DOMAIN_VRAM)
-		rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
-					TTM_PL_FLAG_VRAM;
+		rbo->placements[c++].flags = TTM_PL_FLAG_WC |
+					     TTM_PL_FLAG_UNCACHED |
+					     TTM_PL_FLAG_VRAM;
+
 	if (domain & RADEON_GEM_DOMAIN_GTT) {
 		if (rbo->flags & RADEON_GEM_GTT_UC) {
-			rbo->placements[c++] = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_TT;
+			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
+				TTM_PL_FLAG_TT;
+
 		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
 			   (rbo->rdev->flags & RADEON_IS_AGP)) {
-			rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
+			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
+				TTM_PL_FLAG_UNCACHED |
 				TTM_PL_FLAG_TT;
 		} else {
-			rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT;
+			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
+						     TTM_PL_FLAG_TT;
 		}
 	}
+
 	if (domain & RADEON_GEM_DOMAIN_CPU) {
 		if (rbo->flags & RADEON_GEM_GTT_UC) {
-			rbo->placements[c++] = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_SYSTEM;
+			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
+				TTM_PL_FLAG_SYSTEM;
+
 		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
 		    rbo->rdev->flags & RADEON_IS_AGP) {
-			rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
+			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
+				TTM_PL_FLAG_UNCACHED |
 				TTM_PL_FLAG_SYSTEM;
 		} else {
-			rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM;
+			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
+						     TTM_PL_FLAG_SYSTEM;
 		}
 	}
 	if (!c)
-		rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
+		rbo->placements[c++].flags = TTM_PL_MASK_CACHING |
+					     TTM_PL_FLAG_SYSTEM;
+
 	rbo->placement.num_placement = c;
 	rbo->placement.num_busy_placement = c;
 
+	for (i = 0; i < c; ++i) {
+		rbo->placements[i].fpfn = 0;
+		if ((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
+		    (rbo->placements[i].flags & TTM_PL_FLAG_VRAM))
+			rbo->placements[i].lpfn =
+				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
+		else
+			rbo->placements[i].lpfn = 0;
+	}
+
 	/*
 	 * Use two-ended allocation depending on the buffer size to
 	 * improve fragmentation quality.
 	 * 512kb was measured as the most optimal number.
 	 */
-	if (rbo->tbo.mem.size > 512 * 1024) {
+	if (!((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
+	      (rbo->placements[i].flags & TTM_PL_FLAG_VRAM)) &&
+	    rbo->tbo.mem.size > 512 * 1024) {
 		for (i = 0; i < c; i++) {
-			rbo->placements[i] |= TTM_PL_FLAG_TOPDOWN;
+			rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN;
 		}
 	}
 }
@@ -264,6 +288,9 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 {
 	int r, i;
 
+	if (radeon_ttm_tt_has_userptr(bo->tbo.ttm))
+		return -EPERM;
+
 	if (bo->pin_count) {
 		bo->pin_count++;
 		if (gpu_addr)
@@ -283,21 +310,19 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 		return 0;
 	}
 	radeon_ttm_placement_from_domain(bo, domain);
-	if (domain == RADEON_GEM_DOMAIN_VRAM) {
+	for (i = 0; i < bo->placement.num_placement; i++) {
 		/* force to pin into visible video ram */
-		bo->placement.lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
-	}
-	if (max_offset) {
-		u64 lpfn = max_offset >> PAGE_SHIFT;
-
-		if (!bo->placement.lpfn)
-			bo->placement.lpfn = bo->rdev->mc.gtt_size >> PAGE_SHIFT;
+		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
+		    !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
+		    (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
+			bo->placements[i].lpfn =
+				bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
+		else
+			bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;
 
-		if (lpfn < bo->placement.lpfn)
-			bo->placement.lpfn = lpfn;
+		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
 	}
-	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i] |= TTM_PL_FLAG_NO_EVICT;
+
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (likely(r == 0)) {
 		bo->pin_count = 1;
@@ -329,8 +354,10 @@ int radeon_bo_unpin(struct radeon_bo *bo)
 	bo->pin_count--;
 	if (bo->pin_count)
 		return 0;
-	for (i = 0; i < bo->placement.num_placement; i++)
-		bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT;
+	for (i = 0; i < bo->placement.num_placement; i++) {
+		bo->placements[i].lpfn = 0;
+		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
+	}
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (likely(r == 0)) {
 		if (bo->tbo.mem.mem_type == TTM_PL_VRAM)
@@ -459,7 +486,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 	u64 bytes_moved = 0, initial_bytes_moved;
 	u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
 
-	r = ttm_eu_reserve_buffers(ticket, head);
+	r = ttm_eu_reserve_buffers(ticket, head, true);
 	if (unlikely(r != 0)) {
 		return r;
 	}
@@ -468,6 +495,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 		bo = lobj->robj;
 		if (!bo->pin_count) {
 			u32 domain = lobj->prefered_domains;
+			u32 allowed = lobj->allowed_domains;
 			u32 current_domain =
 				radeon_mem_type_to_domain(bo->tbo.mem.mem_type);
 
@@ -479,7 +507,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 			 * into account. We don't want to disallow buffer moves
 			 * completely.
 			 */
-			if ((lobj->allowed_domains & current_domain) != 0 &&
+			if ((allowed & current_domain) != 0 &&
 			    (domain & current_domain) == 0 && /* will be moved */
 			    bytes_moved > bytes_moved_threshold) {
 				/* don't move it */
@@ -489,7 +517,7 @@ int radeon_bo_list_validate(struct radeon_device *rdev,
 		retry:
 			radeon_ttm_placement_from_domain(bo, domain);
 			if (ring == R600_RING_TYPE_UVD_INDEX)
-				radeon_uvd_force_into_uvd_segment(bo);
+				radeon_uvd_force_into_uvd_segment(bo, allowed);
 
 			initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
 			r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
@@ -731,7 +759,7 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 
 	/* hurrah the memory is not visible ! */
 	radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_VRAM);
-	rbo->placement.lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT;
+	rbo->placements[0].lpfn = rdev->mc.visible_vram_size >> PAGE_SHIFT;
 	r = ttm_bo_validate(bo, &rbo->placement, false, false);
 	if (unlikely(r == -ENOMEM)) {
 		radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_GTT);
@@ -755,12 +783,10 @@ int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
 	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL);
 	if (unlikely(r != 0))
 		return r;
-	spin_lock(&bo->tbo.bdev->fence_lock);
 	if (mem_type)
 		*mem_type = bo->tbo.mem.mem_type;
-	if (bo->tbo.sync_obj)
-		r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
-	spin_unlock(&bo->tbo.bdev->fence_lock);
+
+	r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
 	ttm_bo_unreserve(&bo->tbo);
 	return r;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 164898b0010c..32522cc940a1 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -1556,7 +1556,7 @@ static bool radeon_pm_in_vbl(struct radeon_device *rdev)
 		if (rdev->pm.active_crtcs & (1 << crtc)) {
 			vbl_status = radeon_get_crtc_scanoutpos(rdev->ddev, crtc, 0, &vpos, &hpos, NULL, NULL);
 			if ((vbl_status & DRM_SCANOUTPOS_VALID) &&
-			    !(vbl_status & DRM_SCANOUTPOS_INVBL))
+			    !(vbl_status & DRM_SCANOUTPOS_IN_VBLANK))
 				in_vbl = false;
 		}
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c
index 0b16f2cbcf17..d5414d42e44b 100644
--- a/drivers/gpu/drm/radeon/radeon_prime.c
+++ b/drivers/gpu/drm/radeon/radeon_prime.c
@@ -111,3 +111,13 @@ struct reservation_object *radeon_gem_prime_res_obj(struct drm_gem_object *obj)
 
 	return bo->tbo.resv;
 }
+
+struct dma_buf *radeon_gem_prime_export(struct drm_device *dev,
+					struct drm_gem_object *gobj,
+					int flags)
+{
+	struct radeon_bo *bo = gem_to_radeon_bo(gobj);
+	if (radeon_ttm_tt_has_userptr(bo->tbo.ttm))
+		return ERR_PTR(-EPERM);
+	return drm_gem_prime_export(dev, gobj, flags);
+}
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index d65607902537..6f2a9bd6bb54 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -45,27 +45,6 @@
 static int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring);
 
 /**
- * radeon_ring_write - write a value to the ring
- *
- * @ring: radeon_ring structure holding ring information
- * @v: dword (dw) value to write
- *
- * Write a value to the requested ring buffer (all asics).
- */
-void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
-{
-#if DRM_DEBUG_CODE
-	if (ring->count_dw <= 0) {
-		DRM_ERROR("radeon: writing more dwords to the ring than expected!\n");
-	}
-#endif
-	ring->ring[ring->wptr++] = v;
-	ring->wptr &= ring->ptr_mask;
-	ring->count_dw--;
-	ring->ring_free_dw--;
-}
-
-/**
  * radeon_ring_supports_scratch_reg - check if the ring supports
  * writing to scratch registers
  *
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index abd6753a570a..4d4b0773638a 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -96,15 +96,15 @@ bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ridx,
 }
 
 /**
- * radeon_semaphore_sync_to - use the semaphore to sync to a fence
+ * radeon_semaphore_sync_fence - use the semaphore to sync to a fence
  *
  * @semaphore: semaphore object to add fence to
  * @fence: fence to sync to
  *
  * Sync to the fence using this semaphore object
  */
-void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore,
-			      struct radeon_fence *fence)
+void radeon_semaphore_sync_fence(struct radeon_semaphore *semaphore,
+				 struct radeon_fence *fence)
 {
         struct radeon_fence *other;
 
@@ -116,6 +116,38 @@ void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore,
 }
 
 /**
+ * radeon_semaphore_sync_to - use the semaphore to sync to a reservation object
+ *
+ * @sema: semaphore object to add fence from reservation object to
+ * @resv: reservation object with embedded fence
+ * @shared: true if we should onyl sync to the exclusive fence
+ *
+ * Sync to the fence using this semaphore object
+ */
+void radeon_semaphore_sync_resv(struct radeon_semaphore *sema,
+				struct reservation_object *resv,
+				bool shared)
+{
+	struct reservation_object_list *flist;
+	struct fence *f;
+	unsigned i;
+
+	/* always sync to the exclusive fence */
+	f = reservation_object_get_excl(resv);
+	radeon_semaphore_sync_fence(sema, (struct radeon_fence*)f);
+
+	flist = reservation_object_get_list(resv);
+	if (shared || !flist)
+		return;
+
+	for (i = 0; i < flist->shared_count; ++i) {
+		f = rcu_dereference_protected(flist->shared[i],
+					      reservation_object_held(resv));
+		radeon_semaphore_sync_fence(sema, (struct radeon_fence*)f);
+	}
+}
+
+/**
  * radeon_semaphore_sync_rings - sync ring to all registered fences
  *
  * @rdev: radeon_device pointer
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 23bb64fd775f..535403e0c8a2 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -30,9 +30,9 @@
  */
 
 #include <drm/drmP.h>
-#include <drm/drm_buffer.h>
 #include <drm/radeon_drm.h>
 #include "radeon_drv.h"
+#include "drm_buffer.h"
 
 /* ================================================================
  * Helper functions for client state checking and fixup
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index 17bc3dced9f1..ce943e1a5e51 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -116,11 +116,16 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
 		radeon_bo_kunmap(gtt_obj[i]);
 
 		if (ring == R600_RING_TYPE_DMA_INDEX)
-			r = radeon_copy_dma(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+			fence = radeon_copy_dma(rdev, gtt_addr, vram_addr,
+						size / RADEON_GPU_PAGE_SIZE,
+						NULL);
 		else
-			r = radeon_copy_blit(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
-		if (r) {
+			fence = radeon_copy_blit(rdev, gtt_addr, vram_addr,
+						 size / RADEON_GPU_PAGE_SIZE,
+						 NULL);
+		if (IS_ERR(fence)) {
 			DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
+			r = PTR_ERR(fence);
 			goto out_lclean_unpin;
 		}
 
@@ -162,11 +167,16 @@ static void radeon_do_test_moves(struct radeon_device *rdev, int flag)
 		radeon_bo_kunmap(vram_obj);
 
 		if (ring == R600_RING_TYPE_DMA_INDEX)
-			r = radeon_copy_dma(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
+			fence = radeon_copy_dma(rdev, vram_addr, gtt_addr,
+						size / RADEON_GPU_PAGE_SIZE,
+						NULL);
 		else
-			r = radeon_copy_blit(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
-		if (r) {
+			fence = radeon_copy_blit(rdev, vram_addr, gtt_addr,
+						 size / RADEON_GPU_PAGE_SIZE,
+						 NULL);
+		if (IS_ERR(fence)) {
 			DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
+			r = PTR_ERR(fence);
 			goto out_lclean_unpin;
 		}
 
@@ -222,7 +232,7 @@ out_lclean:
 			radeon_bo_unreserve(gtt_obj[i]);
 			radeon_bo_unref(&gtt_obj[i]);
 		}
-		if (fence)
+		if (fence && !IS_ERR(fence))
 			radeon_fence_unref(&fence);
 		break;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 72afe82a95c9..eca2ce60d440 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -39,6 +39,8 @@
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/swiotlb.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
 #include <linux/debugfs.h>
 #include "radeon_reg.h"
 #include "radeon.h"
@@ -176,12 +178,15 @@ static int radeon_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 static void radeon_evict_flags(struct ttm_buffer_object *bo,
 				struct ttm_placement *placement)
 {
+	static struct ttm_place placements = {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM
+	};
+
 	struct radeon_bo *rbo;
-	static u32 placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM;
 
 	if (!radeon_ttm_bo_is_radeon_bo(bo)) {
-		placement->fpfn = 0;
-		placement->lpfn = 0;
 		placement->placement = &placements;
 		placement->busy_placement = &placements;
 		placement->num_placement = 1;
@@ -228,6 +233,7 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 	struct radeon_device *rdev;
 	uint64_t old_start, new_start;
 	struct radeon_fence *fence;
+	unsigned num_pages;
 	int r, ridx;
 
 	rdev = radeon_get_rdev(bo->bdev);
@@ -264,13 +270,12 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 
 	BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0);
 
-	/* sync other rings */
-	fence = bo->sync_obj;
-	r = radeon_copy(rdev, old_start, new_start,
-			new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */
-			&fence);
-	/* FIXME: handle copy error */
-	r = ttm_bo_move_accel_cleanup(bo, (void *)fence,
+	num_pages = new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
+	fence = radeon_copy(rdev, old_start, new_start, num_pages, bo->resv);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+
+	r = ttm_bo_move_accel_cleanup(bo, &fence->base,
 				      evict, no_wait_gpu, new_mem);
 	radeon_fence_unref(&fence);
 	return r;
@@ -284,20 +289,20 @@ static int radeon_move_vram_ram(struct ttm_buffer_object *bo,
 	struct radeon_device *rdev;
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	struct ttm_mem_reg tmp_mem;
-	u32 placements;
+	struct ttm_place placements;
 	struct ttm_placement placement;
 	int r;
 
 	rdev = radeon_get_rdev(bo->bdev);
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
-	placement.fpfn = 0;
-	placement.lpfn = 0;
 	placement.num_placement = 1;
 	placement.placement = &placements;
 	placement.num_busy_placement = 1;
 	placement.busy_placement = &placements;
-	placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
+	placements.fpfn = 0;
+	placements.lpfn = 0;
+	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
 			     interruptible, no_wait_gpu);
 	if (unlikely(r)) {
@@ -332,19 +337,19 @@ static int radeon_move_ram_vram(struct ttm_buffer_object *bo,
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	struct ttm_mem_reg tmp_mem;
 	struct ttm_placement placement;
-	u32 placements;
+	struct ttm_place placements;
 	int r;
 
 	rdev = radeon_get_rdev(bo->bdev);
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
-	placement.fpfn = 0;
-	placement.lpfn = 0;
 	placement.num_placement = 1;
 	placement.placement = &placements;
 	placement.num_busy_placement = 1;
 	placement.busy_placement = &placements;
-	placements = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
+	placements.fpfn = 0;
+	placements.lpfn = 0;
+	placements.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT;
 	r = ttm_bo_mem_space(bo, &placement, &tmp_mem,
 			     interruptible, no_wait_gpu);
 	if (unlikely(r)) {
@@ -483,39 +488,108 @@ static void radeon_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
 {
 }
 
-static int radeon_sync_obj_wait(void *sync_obj, bool lazy, bool interruptible)
-{
-	return radeon_fence_wait((struct radeon_fence *)sync_obj, interruptible);
-}
+/*
+ * TTM backend functions.
+ */
+struct radeon_ttm_tt {
+	struct ttm_dma_tt		ttm;
+	struct radeon_device		*rdev;
+	u64				offset;
+
+	uint64_t			userptr;
+	struct mm_struct		*usermm;
+	uint32_t			userflags;
+};
 
-static int radeon_sync_obj_flush(void *sync_obj)
+/* prepare the sg table with the user pages */
+static int radeon_ttm_tt_pin_userptr(struct ttm_tt *ttm)
 {
+	struct radeon_device *rdev = radeon_get_rdev(ttm->bdev);
+	struct radeon_ttm_tt *gtt = (void *)ttm;
+	unsigned pinned = 0, nents;
+	int r;
+
+	int write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY);
+	enum dma_data_direction direction = write ?
+		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+
+	if (current->mm != gtt->usermm)
+		return -EPERM;
+
+	if (gtt->userflags & RADEON_GEM_USERPTR_ANONONLY) {
+		/* check that we only pin down anonymous memory
+		   to prevent problems with writeback */
+		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
+		struct vm_area_struct *vma;
+		vma = find_vma(gtt->usermm, gtt->userptr);
+		if (!vma || vma->vm_file || vma->vm_end < end)
+			return -EPERM;
+	}
+
+	do {
+		unsigned num_pages = ttm->num_pages - pinned;
+		uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
+		struct page **pages = ttm->pages + pinned;
+
+		r = get_user_pages(current, current->mm, userptr, num_pages,
+				   write, 0, pages, NULL);
+		if (r < 0)
+			goto release_pages;
+
+		pinned += r;
+
+	} while (pinned < ttm->num_pages);
+
+	r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
+				      ttm->num_pages << PAGE_SHIFT,
+				      GFP_KERNEL);
+	if (r)
+		goto release_sg;
+
+	r = -ENOMEM;
+	nents = dma_map_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
+	if (nents != ttm->sg->nents)
+		goto release_sg;
+
+	drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
+					 gtt->ttm.dma_address, ttm->num_pages);
+
 	return 0;
-}
 
-static void radeon_sync_obj_unref(void **sync_obj)
-{
-	radeon_fence_unref((struct radeon_fence **)sync_obj);
-}
+release_sg:
+	kfree(ttm->sg);
 
-static void *radeon_sync_obj_ref(void *sync_obj)
-{
-	return radeon_fence_ref((struct radeon_fence *)sync_obj);
+release_pages:
+	release_pages(ttm->pages, pinned, 0);
+	return r;
 }
 
-static bool radeon_sync_obj_signaled(void *sync_obj)
+static void radeon_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
 {
-	return radeon_fence_signaled((struct radeon_fence *)sync_obj);
-}
+	struct radeon_device *rdev = radeon_get_rdev(ttm->bdev);
+	struct radeon_ttm_tt *gtt = (void *)ttm;
+	struct scatterlist *sg;
+	int i;
 
-/*
- * TTM backend functions.
- */
-struct radeon_ttm_tt {
-	struct ttm_dma_tt		ttm;
-	struct radeon_device		*rdev;
-	u64				offset;
-};
+	int write = !(gtt->userflags & RADEON_GEM_USERPTR_READONLY);
+	enum dma_data_direction direction = write ?
+		DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+
+	/* free the sg table and pages again */
+	dma_unmap_sg(rdev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
+
+	for_each_sg(ttm->sg->sgl, sg, ttm->sg->nents, i) {
+		struct page *page = sg_page(sg);
+
+		if (!(gtt->userflags & RADEON_GEM_USERPTR_READONLY))
+			set_page_dirty(page);
+
+		mark_page_accessed(page);
+		page_cache_release(page);
+	}
+
+	sg_free_table(ttm->sg);
+}
 
 static int radeon_ttm_backend_bind(struct ttm_tt *ttm,
 				   struct ttm_mem_reg *bo_mem)
@@ -525,6 +599,11 @@ static int radeon_ttm_backend_bind(struct ttm_tt *ttm,
 		RADEON_GART_PAGE_WRITE;
 	int r;
 
+	if (gtt->userptr) {
+		radeon_ttm_tt_pin_userptr(ttm);
+		flags &= ~RADEON_GART_PAGE_WRITE;
+	}
+
 	gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);
 	if (!ttm->num_pages) {
 		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
@@ -547,6 +626,10 @@ static int radeon_ttm_backend_unbind(struct ttm_tt *ttm)
 	struct radeon_ttm_tt *gtt = (void *)ttm;
 
 	radeon_gart_unbind(gtt->rdev, gtt->offset, ttm->num_pages);
+
+	if (gtt->userptr)
+		radeon_ttm_tt_unpin_userptr(ttm);
+
 	return 0;
 }
 
@@ -603,6 +686,16 @@ static int radeon_ttm_tt_populate(struct ttm_tt *ttm)
 	if (ttm->state != tt_unpopulated)
 		return 0;
 
+	if (gtt->userptr) {
+		ttm->sg = kcalloc(1, sizeof(struct sg_table), GFP_KERNEL);
+		if (!ttm->sg)
+			return -ENOMEM;
+
+		ttm->page_flags |= TTM_PAGE_FLAG_SG;
+		ttm->state = tt_unbound;
+		return 0;
+	}
+
 	if (slave && ttm->sg) {
 		drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
 						 gtt->ttm.dma_address, ttm->num_pages);
@@ -652,6 +745,12 @@ static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm)
 	unsigned i;
 	bool slave = !!(ttm->page_flags & TTM_PAGE_FLAG_SG);
 
+	if (gtt->userptr) {
+		kfree(ttm->sg);
+		ttm->page_flags &= ~TTM_PAGE_FLAG_SG;
+		return;
+	}
+
 	if (slave)
 		return;
 
@@ -680,6 +779,40 @@ static void radeon_ttm_tt_unpopulate(struct ttm_tt *ttm)
 	ttm_pool_unpopulate(ttm);
 }
 
+int radeon_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
+			      uint32_t flags)
+{
+	struct radeon_ttm_tt *gtt = (void *)ttm;
+
+	if (gtt == NULL)
+		return -EINVAL;
+
+	gtt->userptr = addr;
+	gtt->usermm = current->mm;
+	gtt->userflags = flags;
+	return 0;
+}
+
+bool radeon_ttm_tt_has_userptr(struct ttm_tt *ttm)
+{
+	struct radeon_ttm_tt *gtt = (void *)ttm;
+
+	if (gtt == NULL)
+		return false;
+
+	return !!gtt->userptr;
+}
+
+bool radeon_ttm_tt_is_readonly(struct ttm_tt *ttm)
+{
+	struct radeon_ttm_tt *gtt = (void *)ttm;
+
+	if (gtt == NULL)
+		return false;
+
+	return !!(gtt->userflags & RADEON_GEM_USERPTR_READONLY);
+}
+
 static struct ttm_bo_driver radeon_bo_driver = {
 	.ttm_tt_create = &radeon_ttm_tt_create,
 	.ttm_tt_populate = &radeon_ttm_tt_populate,
@@ -689,11 +822,6 @@ static struct ttm_bo_driver radeon_bo_driver = {
 	.evict_flags = &radeon_evict_flags,
 	.move = &radeon_bo_move,
 	.verify_access = &radeon_verify_access,
-	.sync_obj_signaled = &radeon_sync_obj_signaled,
-	.sync_obj_wait = &radeon_sync_obj_wait,
-	.sync_obj_flush = &radeon_sync_obj_flush,
-	.sync_obj_unref = &radeon_sync_obj_unref,
-	.sync_obj_ref = &radeon_sync_obj_ref,
 	.move_notify = &radeon_bo_move_notify,
 	.fault_reserve_notify = &radeon_bo_fault_reserve_notify,
 	.io_mem_reserve = &radeon_ttm_io_mem_reserve,
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
index 341848a14376..ba4f38916026 100644
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -40,12 +40,18 @@
 #define UVD_IDLE_TIMEOUT_MS	1000
 
 /* Firmware Names */
+#define FIRMWARE_R600		"radeon/R600_uvd.bin"
+#define FIRMWARE_RS780		"radeon/RS780_uvd.bin"
+#define FIRMWARE_RV770		"radeon/RV770_uvd.bin"
 #define FIRMWARE_RV710		"radeon/RV710_uvd.bin"
 #define FIRMWARE_CYPRESS	"radeon/CYPRESS_uvd.bin"
 #define FIRMWARE_SUMO		"radeon/SUMO_uvd.bin"
 #define FIRMWARE_TAHITI		"radeon/TAHITI_uvd.bin"
 #define FIRMWARE_BONAIRE	"radeon/BONAIRE_uvd.bin"
 
+MODULE_FIRMWARE(FIRMWARE_R600);
+MODULE_FIRMWARE(FIRMWARE_RS780);
+MODULE_FIRMWARE(FIRMWARE_RV770);
 MODULE_FIRMWARE(FIRMWARE_RV710);
 MODULE_FIRMWARE(FIRMWARE_CYPRESS);
 MODULE_FIRMWARE(FIRMWARE_SUMO);
@@ -63,6 +69,23 @@ int radeon_uvd_init(struct radeon_device *rdev)
 	INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler);
 
 	switch (rdev->family) {
+	case CHIP_RV610:
+	case CHIP_RV630:
+	case CHIP_RV670:
+	case CHIP_RV620:
+	case CHIP_RV635:
+		fw_name = FIRMWARE_R600;
+		break;
+
+	case CHIP_RS780:
+	case CHIP_RS880:
+		fw_name = FIRMWARE_RS780;
+		break;
+
+	case CHIP_RV770:
+		fw_name = FIRMWARE_RV770;
+		break;
+
 	case CHIP_RV710:
 	case CHIP_RV730:
 	case CHIP_RV740:
@@ -115,7 +138,8 @@ int radeon_uvd_init(struct radeon_device *rdev)
 	}
 
 	bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 8) +
-		  RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE;
+		  RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE +
+		  RADEON_GPU_PAGE_SIZE;
 	r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
 			     RADEON_GEM_DOMAIN_VRAM, 0, NULL, &rdev->uvd.vcpu_bo);
 	if (r) {
@@ -231,10 +255,30 @@ int radeon_uvd_resume(struct radeon_device *rdev)
 	return 0;
 }
 
-void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo)
+void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo,
+				       uint32_t allowed_domains)
 {
-	rbo->placement.fpfn = 0 >> PAGE_SHIFT;
-	rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
+	int i;
+
+	for (i = 0; i < rbo->placement.num_placement; ++i) {
+		rbo->placements[i].fpfn = 0 >> PAGE_SHIFT;
+		rbo->placements[i].lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
+	}
+
+	/* If it must be in VRAM it must be in the first segment as well */
+	if (allowed_domains == RADEON_GEM_DOMAIN_VRAM)
+		return;
+
+	/* abort if we already have more than one placement */
+	if (rbo->placement.num_placement > 1)
+		return;
+
+	/* add another 256MB segment */
+	rbo->placements[1] = rbo->placements[0];
+	rbo->placements[1].fpfn += (256 * 1024 * 1024) >> PAGE_SHIFT;
+	rbo->placements[1].lpfn += (256 * 1024 * 1024) >> PAGE_SHIFT;
+	rbo->placement.num_placement++;
+	rbo->placement.num_busy_placement++;
 }
 
 void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp)
@@ -356,6 +400,7 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
 {
 	int32_t *msg, msg_type, handle;
 	unsigned img_size = 0;
+	struct fence *f;
 	void *ptr;
 
 	int i, r;
@@ -365,8 +410,9 @@ static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *bo,
 		return -EINVAL;
 	}
 
-	if (bo->tbo.sync_obj) {
-		r = radeon_fence_wait(bo->tbo.sync_obj, false);
+	f = reservation_object_get_excl(bo->tbo.resv);
+	if (f) {
+		r = radeon_fence_wait((struct radeon_fence *)f, false);
 		if (r) {
 			DRM_ERROR("Failed waiting for UVD message (%d)!\n", r);
 			return r;
@@ -604,38 +650,16 @@ int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
 }
 
 static int radeon_uvd_send_msg(struct radeon_device *rdev,
-			       int ring, struct radeon_bo *bo,
+			       int ring, uint64_t addr,
 			       struct radeon_fence **fence)
 {
-	struct ttm_validate_buffer tv;
-	struct ww_acquire_ctx ticket;
-	struct list_head head;
 	struct radeon_ib ib;
-	uint64_t addr;
 	int i, r;
 
-	memset(&tv, 0, sizeof(tv));
-	tv.bo = &bo->tbo;
-
-	INIT_LIST_HEAD(&head);
-	list_add(&tv.head, &head);
-
-	r = ttm_eu_reserve_buffers(&ticket, &head);
-	if (r)
-		return r;
-
-	radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM);
-	radeon_uvd_force_into_uvd_segment(bo);
-
-	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-	if (r) 
-		goto err;
-
 	r = radeon_ib_get(rdev, ring, &ib, NULL, 64);
 	if (r)
-		goto err;
+		return r;
 
-	addr = radeon_bo_gpu_offset(bo);
 	ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0);
 	ib.ptr[1] = addr;
 	ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0);
@@ -647,19 +671,11 @@ static int radeon_uvd_send_msg(struct radeon_device *rdev,
 	ib.length_dw = 16;
 
 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
-	if (r)
-		goto err;
-	ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence);
 
 	if (fence)
 		*fence = radeon_fence_ref(ib.fence);
 
 	radeon_ib_free(rdev, &ib);
-	radeon_bo_unref(&bo);
-	return 0;
-
-err:
-	ttm_eu_backoff_reservation(&ticket, &head);
 	return r;
 }
 
@@ -669,27 +685,18 @@ err:
 int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
 			      uint32_t handle, struct radeon_fence **fence)
 {
-	struct radeon_bo *bo;
-	uint32_t *msg;
-	int r, i;
+	/* we use the last page of the vcpu bo for the UVD message */
+	uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) -
+		RADEON_GPU_PAGE_SIZE;
 
-	r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL, &bo);
-	if (r)
-		return r;
+	uint32_t *msg = rdev->uvd.cpu_addr + offs;
+	uint64_t addr = rdev->uvd.gpu_addr + offs;
 
-	r = radeon_bo_reserve(bo, false);
-	if (r) {
-		radeon_bo_unref(&bo);
-		return r;
-	}
+	int r, i;
 
-	r = radeon_bo_kmap(bo, (void **)&msg);
-	if (r) {
-		radeon_bo_unreserve(bo);
-		radeon_bo_unref(&bo);
+	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true);
+	if (r)
 		return r;
-	}
 
 	/* stitch together an UVD create msg */
 	msg[0] = cpu_to_le32(0x00000de4);
@@ -706,36 +713,26 @@ int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
 	for (i = 11; i < 1024; ++i)
 		msg[i] = cpu_to_le32(0x0);
 
-	radeon_bo_kunmap(bo);
-	radeon_bo_unreserve(bo);
-
-	return radeon_uvd_send_msg(rdev, ring, bo, fence);
+	r = radeon_uvd_send_msg(rdev, ring, addr, fence);
+	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
+	return r;
 }
 
 int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
 			       uint32_t handle, struct radeon_fence **fence)
 {
-	struct radeon_bo *bo;
-	uint32_t *msg;
-	int r, i;
+	/* we use the last page of the vcpu bo for the UVD message */
+	uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) -
+		RADEON_GPU_PAGE_SIZE;
 
-	r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
-			     RADEON_GEM_DOMAIN_VRAM, 0, NULL, &bo);
-	if (r)
-		return r;
+	uint32_t *msg = rdev->uvd.cpu_addr + offs;
+	uint64_t addr = rdev->uvd.gpu_addr + offs;
 
-	r = radeon_bo_reserve(bo, false);
-	if (r) {
-		radeon_bo_unref(&bo);
-		return r;
-	}
+	int r, i;
 
-	r = radeon_bo_kmap(bo, (void **)&msg);
-	if (r) {
-		radeon_bo_unreserve(bo);
-		radeon_bo_unref(&bo);
+	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, true);
+	if (r)
 		return r;
-	}
 
 	/* stitch together an UVD destroy msg */
 	msg[0] = cpu_to_le32(0x00000de4);
@@ -745,10 +742,9 @@ int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
 	for (i = 4; i < 1024; ++i)
 		msg[i] = cpu_to_le32(0x0);
 
-	radeon_bo_kunmap(bo);
-	radeon_bo_unreserve(bo);
-
-	return radeon_uvd_send_msg(rdev, ring, bo, fence);
+	r = radeon_uvd_send_msg(rdev, ring, addr, fence);
+	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
+	return r;
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index 088ffdc2f577..ce870959dff8 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -143,6 +143,7 @@ struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
 	list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
 	list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
 	list[0].tv.bo = &vm->page_directory->tbo;
+	list[0].tv.shared = false;
 	list[0].tiling_flags = 0;
 	list[0].handle = 0;
 	list_add(&list[0].tv.head, head);
@@ -156,6 +157,7 @@ struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
 		list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
 		list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
 		list[idx].tv.bo = &list[idx].robj->tbo;
+		list[idx].tv.shared = false;
 		list[idx].tiling_flags = 0;
 		list[idx].handle = 0;
 		list_add(&list[idx++].tv.head, head);
@@ -395,11 +397,12 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev,
 
         memset(&tv, 0, sizeof(tv));
         tv.bo = &bo->tbo;
+	tv.shared = false;
 
         INIT_LIST_HEAD(&head);
         list_add(&tv.head, &head);
 
-        r = ttm_eu_reserve_buffers(&ticket, &head);
+        r = ttm_eu_reserve_buffers(&ticket, &head, true);
         if (r)
 		return r;
 
@@ -424,7 +427,7 @@ static int radeon_vm_clear_bo(struct radeon_device *rdev,
 	if (r)
                 goto error;
 
-	ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence);
+	ttm_eu_fence_buffer_objects(&ticket, &head, &ib.fence->base);
 	radeon_ib_free(rdev, &ib);
 
 	return 0;
@@ -694,8 +697,9 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev,
 
 	if (ib.length_dw != 0) {
 		radeon_asic_vm_pad_ib(rdev, &ib);
-		radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj);
-		radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use);
+
+		radeon_semaphore_sync_resv(ib.semaphore, pd->tbo.resv, false);
+		radeon_semaphore_sync_fence(ib.semaphore, vm->last_id_use);
 		WARN_ON(ib.length_dw > ndw);
 		r = radeon_ib_schedule(rdev, &ib, NULL, false);
 		if (r) {
@@ -821,7 +825,7 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
 		unsigned nptes;
 		uint64_t pte;
 
-		radeon_semaphore_sync_to(ib->semaphore, pt->tbo.sync_obj);
+		radeon_semaphore_sync_resv(ib->semaphore, pt->tbo.resv, false);
 
 		if ((addr & ~mask) == (end & ~mask))
 			nptes = end - addr;
@@ -892,6 +896,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 	bo_va->flags &= ~RADEON_VM_PAGE_VALID;
 	bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
 	bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED;
+	if (bo_va->bo && radeon_ttm_tt_is_readonly(bo_va->bo->tbo.ttm))
+		bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE;
+
 	if (mem) {
 		addr = mem->start << PAGE_SHIFT;
 		if (mem->mem_type != TTM_PL_SYSTEM) {
@@ -960,7 +967,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 	radeon_asic_vm_pad_ib(rdev, &ib);
 	WARN_ON(ib.length_dw > ndw);
 
-	radeon_semaphore_sync_to(ib.semaphore, vm->fence);
+	radeon_semaphore_sync_fence(ib.semaphore, vm->fence);
 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
 	if (r) {
 		radeon_ib_free(rdev, &ib);
diff --git a/drivers/gpu/drm/radeon/rv770_dma.c b/drivers/gpu/drm/radeon/rv770_dma.c
index 74426ac2bb5c..c112764adfdf 100644
--- a/drivers/gpu/drm/radeon/rv770_dma.c
+++ b/drivers/gpu/drm/radeon/rv770_dma.c
@@ -33,18 +33,19 @@
  * @src_offset: src GPU address
  * @dst_offset: dst GPU address
  * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
+ * @resv: reservation object to sync to
  *
  * Copy GPU paging using the DMA engine (r7xx).
  * Used by the radeon ttm implementation to move pages if
  * registered as the asic copy callback.
  */
-int rv770_copy_dma(struct radeon_device *rdev,
-		  uint64_t src_offset, uint64_t dst_offset,
-		  unsigned num_gpu_pages,
-		  struct radeon_fence **fence)
+struct radeon_fence *rv770_copy_dma(struct radeon_device *rdev,
+				    uint64_t src_offset, uint64_t dst_offset,
+				    unsigned num_gpu_pages,
+				    struct reservation_object *resv)
 {
 	struct radeon_semaphore *sem = NULL;
+	struct radeon_fence *fence;
 	int ring_index = rdev->asic->copy.dma_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_dw, cur_size_in_dw;
@@ -54,7 +55,7 @@ int rv770_copy_dma(struct radeon_device *rdev,
 	r = radeon_semaphore_create(rdev, &sem);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
+		return ERR_PTR(r);
 	}
 
 	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
@@ -63,10 +64,10 @@ int rv770_copy_dma(struct radeon_device *rdev,
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
 		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		return ERR_PTR(r);
 	}
 
-	radeon_semaphore_sync_to(sem, *fence);
+	radeon_semaphore_sync_resv(sem, resv, false);
 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
@@ -83,15 +84,15 @@ int rv770_copy_dma(struct radeon_device *rdev,
 		dst_offset += cur_size_in_dw * 4;
 	}
 
-	r = radeon_fence_emit(rdev, fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		return ERR_PTR(r);
 	}
 
 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, *fence);
+	radeon_semaphore_free(rdev, &sem, fence);
 
-	return r;
+	return fence;
 }
diff --git a/drivers/gpu/drm/radeon/si_dma.c b/drivers/gpu/drm/radeon/si_dma.c
index 7c22baaf94db..9b0dfbc913f3 100644
--- a/drivers/gpu/drm/radeon/si_dma.c
+++ b/drivers/gpu/drm/radeon/si_dma.c
@@ -218,18 +218,19 @@ void si_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
  * @src_offset: src GPU address
  * @dst_offset: dst GPU address
  * @num_gpu_pages: number of GPU pages to xfer
- * @fence: radeon fence object
+ * @resv: reservation object to sync to
  *
  * Copy GPU paging using the DMA engine (SI).
  * Used by the radeon ttm implementation to move pages if
  * registered as the asic copy callback.
  */
-int si_copy_dma(struct radeon_device *rdev,
-		uint64_t src_offset, uint64_t dst_offset,
-		unsigned num_gpu_pages,
-		struct radeon_fence **fence)
+struct radeon_fence *si_copy_dma(struct radeon_device *rdev,
+				 uint64_t src_offset, uint64_t dst_offset,
+				 unsigned num_gpu_pages,
+				 struct reservation_object *resv)
 {
 	struct radeon_semaphore *sem = NULL;
+	struct radeon_fence *fence;
 	int ring_index = rdev->asic->copy.dma_ring_index;
 	struct radeon_ring *ring = &rdev->ring[ring_index];
 	u32 size_in_bytes, cur_size_in_bytes;
@@ -239,7 +240,7 @@ int si_copy_dma(struct radeon_device *rdev,
 	r = radeon_semaphore_create(rdev, &sem);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
-		return r;
+		return ERR_PTR(r);
 	}
 
 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
@@ -248,10 +249,10 @@ int si_copy_dma(struct radeon_device *rdev,
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
 		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		return ERR_PTR(r);
 	}
 
-	radeon_semaphore_sync_to(sem, *fence);
+	radeon_semaphore_sync_resv(sem, resv, false);
 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
 
 	for (i = 0; i < num_loops; i++) {
@@ -268,16 +269,16 @@ int si_copy_dma(struct radeon_device *rdev,
 		dst_offset += cur_size_in_bytes;
 	}
 
-	r = radeon_fence_emit(rdev, fence, ring->idx);
+	r = radeon_fence_emit(rdev, &fence, ring->idx);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		radeon_semaphore_free(rdev, &sem, NULL);
-		return r;
+		return ERR_PTR(r);
 	}
 
 	radeon_ring_unlock_commit(rdev, ring, false);
-	radeon_semaphore_free(rdev, &sem, *fence);
+	radeon_semaphore_free(rdev, &sem, fence);
 
-	return r;
+	return fence;
 }
 
diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c
index cda391347286..e72b3cb59358 100644
--- a/drivers/gpu/drm/radeon/uvd_v1_0.c
+++ b/drivers/gpu/drm/radeon/uvd_v1_0.c
@@ -22,6 +22,7 @@
  * Authors: Christian König <christian.koenig@amd.com>
  */
 
+#include <linux/firmware.h>
 #include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
@@ -70,6 +71,82 @@ void uvd_v1_0_set_wptr(struct radeon_device *rdev,
 }
 
 /**
+ * uvd_v1_0_fence_emit - emit an fence & trap command
+ *
+ * @rdev: radeon_device pointer
+ * @fence: fence to emit
+ *
+ * Write a fence and a trap command to the ring.
+ */
+void uvd_v1_0_fence_emit(struct radeon_device *rdev,
+			 struct radeon_fence *fence)
+{
+	struct radeon_ring *ring = &rdev->ring[fence->ring];
+	uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr;
+
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
+	radeon_ring_write(ring, addr & 0xffffffff);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
+	radeon_ring_write(ring, fence->seq);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
+	radeon_ring_write(ring, 0);
+
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
+	radeon_ring_write(ring, 2);
+	return;
+}
+
+/**
+ * uvd_v1_0_resume - memory controller programming
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Let the UVD memory controller know it's offsets
+ */
+int uvd_v1_0_resume(struct radeon_device *rdev)
+{
+	uint64_t addr;
+	uint32_t size;
+	int r;
+
+	r = radeon_uvd_resume(rdev);
+	if (r)
+		return r;
+
+	/* programm the VCPU memory controller bits 0-27 */
+	addr = (rdev->uvd.gpu_addr >> 3) + 16;
+	size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size) >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE0, size);
+
+	addr += size;
+	size = RADEON_UVD_STACK_SIZE >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE1, size);
+
+	addr += size;
+	size = RADEON_UVD_HEAP_SIZE >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE2, size);
+
+	/* bits 28-31 */
+	addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
+	WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
+
+	/* bits 32-39 */
+	addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
+	WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
+
+	WREG32(UVD_FW_START, *((uint32_t*)rdev->uvd.cpu_addr));
+
+	return 0;
+}
+
+/**
  * uvd_v1_0_init - start and test UVD block
  *
  * @rdev: radeon_device pointer
@@ -130,8 +207,32 @@ done:
 	/* lower clocks again */
 	radeon_set_uvd_clocks(rdev, 0, 0);
 
-	if (!r)
+	if (!r) {
+		switch (rdev->family) {
+		case CHIP_RV610:
+		case CHIP_RV630:
+		case CHIP_RV620:
+			/* 64byte granularity workaround */
+			WREG32(MC_CONFIG, 0);
+			WREG32(MC_CONFIG, 1 << 4);
+			WREG32(RS_DQ_RD_RET_CONF, 0x3f);
+			WREG32(MC_CONFIG, 0x1f);
+
+			/* fall through */
+		case CHIP_RV670:
+		case CHIP_RV635:
+
+			/* write clean workaround */
+			WREG32_P(UVD_VCPU_CNTL, 0x10, ~0x10);
+			break;
+
+		default:
+			/* TODO: Do we need more? */
+			break;
+		}
+
 		DRM_INFO("UVD initialized successfully.\n");
+	}
 
 	return r;
 }
@@ -218,12 +319,12 @@ int uvd_v1_0_start(struct radeon_device *rdev)
 	/* enable UMC */
 	WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
 
+	WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
+
 	/* boot up the VCPU */
 	WREG32(UVD_SOFT_RESET, 0);
 	mdelay(10);
 
-	WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
-
 	for (i = 0; i < 10; ++i) {
 		uint32_t status;
 		for (j = 0; j < 100; ++j) {
diff --git a/drivers/gpu/drm/radeon/uvd_v2_2.c b/drivers/gpu/drm/radeon/uvd_v2_2.c
index 8bfdadd56598..89193519f8a1 100644
--- a/drivers/gpu/drm/radeon/uvd_v2_2.c
+++ b/drivers/gpu/drm/radeon/uvd_v2_2.c
@@ -72,6 +72,10 @@ int uvd_v2_2_resume(struct radeon_device *rdev)
 	uint32_t chip_id, size;
 	int r;
 
+	/* RV770 uses V1.0 MC */
+	if (rdev->family == CHIP_RV770)
+		return uvd_v1_0_resume(rdev);
+
 	r = radeon_uvd_resume(rdev);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
index fda64b7b73e8..672d2fcba009 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
@@ -158,6 +158,7 @@ static struct drm_driver rcar_du_driver = {
 	.unload			= rcar_du_unload,
 	.preclose		= rcar_du_preclose,
 	.lastclose		= rcar_du_lastclose,
+	.set_busid		= drm_platform_set_busid,
 	.get_vblank_counter	= drm_vblank_count,
 	.enable_vblank		= rcar_du_enable_vblank,
 	.disable_vblank		= rcar_du_disable_vblank,
diff --git a/drivers/gpu/drm/savage/savage_bci.c b/drivers/gpu/drm/savage/savage_bci.c
index c97cdc9ab239..d47dff95fe52 100644
--- a/drivers/gpu/drm/savage/savage_bci.c
+++ b/drivers/gpu/drm/savage/savage_bci.c
@@ -556,7 +556,7 @@ int savage_driver_load(struct drm_device *dev, unsigned long chipset)
 /*
  * Initialize mappings. On Savage4 and SavageIX the alignment
  * and size of the aperture is not suitable for automatic MTRR setup
- * in drm_addmap. Therefore we add them manually before the maps are
+ * in drm_legacy_addmap. Therefore we add them manually before the maps are
  * initialized, and tear them down on last close.
  */
 int savage_driver_firstopen(struct drm_device *dev)
@@ -624,19 +624,20 @@ int savage_driver_firstopen(struct drm_device *dev)
 		/* Automatic MTRR setup will do the right thing. */
 	}
 
-	ret = drm_addmap(dev, mmio_base, SAVAGE_MMIO_SIZE, _DRM_REGISTERS,
-			 _DRM_READ_ONLY, &dev_priv->mmio);
+	ret = drm_legacy_addmap(dev, mmio_base, SAVAGE_MMIO_SIZE,
+				_DRM_REGISTERS, _DRM_READ_ONLY,
+				&dev_priv->mmio);
 	if (ret)
 		return ret;
 
-	ret = drm_addmap(dev, fb_base, fb_size, _DRM_FRAME_BUFFER,
-			 _DRM_WRITE_COMBINING, &dev_priv->fb);
+	ret = drm_legacy_addmap(dev, fb_base, fb_size, _DRM_FRAME_BUFFER,
+				_DRM_WRITE_COMBINING, &dev_priv->fb);
 	if (ret)
 		return ret;
 
-	ret = drm_addmap(dev, aperture_base, SAVAGE_APERTURE_SIZE,
-			 _DRM_FRAME_BUFFER, _DRM_WRITE_COMBINING,
-			 &dev_priv->aperture);
+	ret = drm_legacy_addmap(dev, aperture_base, SAVAGE_APERTURE_SIZE,
+				_DRM_FRAME_BUFFER, _DRM_WRITE_COMBINING,
+				&dev_priv->aperture);
 	return ret;
 }
 
@@ -698,14 +699,14 @@ static int savage_do_init_bci(struct drm_device * dev, drm_savage_init_t * init)
 	dev_priv->texture_offset = init->texture_offset;
 	dev_priv->texture_size = init->texture_size;
 
-	dev_priv->sarea = drm_getsarea(dev);
+	dev_priv->sarea = drm_legacy_getsarea(dev);
 	if (!dev_priv->sarea) {
 		DRM_ERROR("could not find sarea!\n");
 		savage_do_cleanup_bci(dev);
 		return -EINVAL;
 	}
 	if (init->status_offset != 0) {
-		dev_priv->status = drm_core_findmap(dev, init->status_offset);
+		dev_priv->status = drm_legacy_findmap(dev, init->status_offset);
 		if (!dev_priv->status) {
 			DRM_ERROR("could not find shadow status region!\n");
 			savage_do_cleanup_bci(dev);
@@ -716,14 +717,14 @@ static int savage_do_init_bci(struct drm_device * dev, drm_savage_init_t * init)
 	}
 	if (dev_priv->dma_type == SAVAGE_DMA_AGP && init->buffers_offset) {
 		dev->agp_buffer_token = init->buffers_offset;
-		dev->agp_buffer_map = drm_core_findmap(dev,
+		dev->agp_buffer_map = drm_legacy_findmap(dev,
 						       init->buffers_offset);
 		if (!dev->agp_buffer_map) {
 			DRM_ERROR("could not find DMA buffer region!\n");
 			savage_do_cleanup_bci(dev);
 			return -EINVAL;
 		}
-		drm_core_ioremap(dev->agp_buffer_map, dev);
+		drm_legacy_ioremap(dev->agp_buffer_map, dev);
 		if (!dev->agp_buffer_map->handle) {
 			DRM_ERROR("failed to ioremap DMA buffer region!\n");
 			savage_do_cleanup_bci(dev);
@@ -732,7 +733,7 @@ static int savage_do_init_bci(struct drm_device * dev, drm_savage_init_t * init)
 	}
 	if (init->agp_textures_offset) {
 		dev_priv->agp_textures =
-		    drm_core_findmap(dev, init->agp_textures_offset);
+		    drm_legacy_findmap(dev, init->agp_textures_offset);
 		if (!dev_priv->agp_textures) {
 			DRM_ERROR("could not find agp texture region!\n");
 			savage_do_cleanup_bci(dev);
@@ -755,7 +756,7 @@ static int savage_do_init_bci(struct drm_device * dev, drm_savage_init_t * init)
 			savage_do_cleanup_bci(dev);
 			return -EINVAL;
 		}
-		dev_priv->cmd_dma = drm_core_findmap(dev, init->cmd_dma_offset);
+		dev_priv->cmd_dma = drm_legacy_findmap(dev, init->cmd_dma_offset);
 		if (!dev_priv->cmd_dma) {
 			DRM_ERROR("could not find command DMA region!\n");
 			savage_do_cleanup_bci(dev);
@@ -768,7 +769,7 @@ static int savage_do_init_bci(struct drm_device * dev, drm_savage_init_t * init)
 				savage_do_cleanup_bci(dev);
 				return -EINVAL;
 			}
-			drm_core_ioremap(dev_priv->cmd_dma, dev);
+			drm_legacy_ioremap(dev_priv->cmd_dma, dev);
 			if (!dev_priv->cmd_dma->handle) {
 				DRM_ERROR("failed to ioremap command "
 					  "DMA region!\n");
@@ -894,11 +895,11 @@ static int savage_do_cleanup_bci(struct drm_device * dev)
 	} else if (dev_priv->cmd_dma && dev_priv->cmd_dma->handle &&
 		   dev_priv->cmd_dma->type == _DRM_AGP &&
 		   dev_priv->dma_type == SAVAGE_DMA_AGP)
-		drm_core_ioremapfree(dev_priv->cmd_dma, dev);
+		drm_legacy_ioremapfree(dev_priv->cmd_dma, dev);
 
 	if (dev_priv->dma_type == SAVAGE_DMA_AGP &&
 	    dev->agp_buffer_map && dev->agp_buffer_map->handle) {
-		drm_core_ioremapfree(dev->agp_buffer_map, dev);
+		drm_legacy_ioremapfree(dev->agp_buffer_map, dev);
 		/* make sure the next instance (which may be running
 		 * in PCI mode) doesn't try to use an old
 		 * agp_buffer_map. */
@@ -1050,7 +1051,7 @@ void savage_reclaim_buffers(struct drm_device *dev, struct drm_file *file_priv)
 		return;
 
 	if (file_priv->master && file_priv->master->lock.hw_lock) {
-		drm_idlelock_take(&file_priv->master->lock);
+		drm_legacy_idlelock_take(&file_priv->master->lock);
 		release_idlelock = 1;
 	}
 
@@ -1069,7 +1070,7 @@ void savage_reclaim_buffers(struct drm_device *dev, struct drm_file *file_priv)
 	}
 
 	if (release_idlelock)
-		drm_idlelock_release(&file_priv->master->lock);
+		drm_legacy_idlelock_release(&file_priv->master->lock);
 }
 
 const struct drm_ioctl_desc savage_ioctls[] = {
diff --git a/drivers/gpu/drm/savage/savage_drv.c b/drivers/gpu/drm/savage/savage_drv.c
index 3c030216e888..1b09d2182037 100644
--- a/drivers/gpu/drm/savage/savage_drv.c
+++ b/drivers/gpu/drm/savage/savage_drv.c
@@ -57,6 +57,7 @@ static struct drm_driver driver = {
 	.preclose = savage_reclaim_buffers,
 	.lastclose = savage_driver_lastclose,
 	.unload = savage_driver_unload,
+	.set_busid = drm_pci_set_busid,
 	.ioctls = savage_ioctls,
 	.dma_ioctl = savage_bci_buffers,
 	.fops = &savage_driver_fops,
diff --git a/drivers/gpu/drm/savage/savage_drv.h b/drivers/gpu/drm/savage/savage_drv.h
index 335f8fcf1041..37b699571ad0 100644
--- a/drivers/gpu/drm/savage/savage_drv.h
+++ b/drivers/gpu/drm/savage/savage_drv.h
@@ -26,6 +26,8 @@
 #ifndef __SAVAGE_DRV_H__
 #define __SAVAGE_DRV_H__
 
+#include <drm/drm_legacy.h>
+
 #define DRIVER_AUTHOR	"Felix Kuehling"
 
 #define DRIVER_NAME	"savage"
diff --git a/drivers/gpu/drm/shmobile/shmob_drm_drv.c b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
index ff4ba483b602..873d12f851bf 100644
--- a/drivers/gpu/drm/shmobile/shmob_drm_drv.c
+++ b/drivers/gpu/drm/shmobile/shmob_drm_drv.c
@@ -267,6 +267,7 @@ static struct drm_driver shmob_drm_driver = {
 	.load			= shmob_drm_load,
 	.unload			= shmob_drm_unload,
 	.preclose		= shmob_drm_preclose,
+	.set_busid		= drm_platform_set_busid,
 	.irq_handler		= shmob_drm_irq,
 	.get_vblank_counter	= drm_vblank_count,
 	.enable_vblank		= shmob_drm_enable_vblank,
diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c
index 756f787b7143..54858e6fedaf 100644
--- a/drivers/gpu/drm/sis/sis_drv.c
+++ b/drivers/gpu/drm/sis/sis_drv.c
@@ -108,6 +108,7 @@ static struct drm_driver driver = {
 	.open = sis_driver_open,
 	.preclose = sis_reclaim_buffers_locked,
 	.postclose = sis_driver_postclose,
+	.set_busid = drm_pci_set_busid,
 	.dma_quiescent = sis_idle,
 	.lastclose = sis_lastclose,
 	.ioctls = sis_ioctls,
diff --git a/drivers/gpu/drm/sis/sis_drv.h b/drivers/gpu/drm/sis/sis_drv.h
index c31c0253054d..16f972b2a76a 100644
--- a/drivers/gpu/drm/sis/sis_drv.h
+++ b/drivers/gpu/drm/sis/sis_drv.h
@@ -28,6 +28,8 @@
 #ifndef _SIS_DRV_H_
 #define _SIS_DRV_H_
 
+#include <drm/drm_legacy.h>
+
 /* General customization:
  */
 
diff --git a/drivers/gpu/drm/sis/sis_mm.c b/drivers/gpu/drm/sis/sis_mm.c
index 77f288e4a0a6..93ad8a5704d1 100644
--- a/drivers/gpu/drm/sis/sis_mm.c
+++ b/drivers/gpu/drm/sis/sis_mm.c
@@ -319,12 +319,12 @@ void sis_reclaim_buffers_locked(struct drm_device *dev,
 	if (!(file->minor->master && file->master->lock.hw_lock))
 		return;
 
-	drm_idlelock_take(&file->master->lock);
+	drm_legacy_idlelock_take(&file->master->lock);
 
 	mutex_lock(&dev->struct_mutex);
 	if (list_empty(&file_priv->obj_list)) {
 		mutex_unlock(&dev->struct_mutex);
-		drm_idlelock_release(&file->master->lock);
+		drm_legacy_idlelock_release(&file->master->lock);
 
 		return;
 	}
@@ -345,7 +345,7 @@ void sis_reclaim_buffers_locked(struct drm_device *dev,
 	}
 	mutex_unlock(&dev->struct_mutex);
 
-	drm_idlelock_release(&file->master->lock);
+	drm_legacy_idlelock_release(&file->master->lock);
 
 	return;
 }
diff --git a/drivers/gpu/drm/sti/sti_vtac.c b/drivers/gpu/drm/sti/sti_vtac.c
index 82a51d488434..97bcdac23ae1 100644
--- a/drivers/gpu/drm/sti/sti_vtac.c
+++ b/drivers/gpu/drm/sti/sti_vtac.c
@@ -56,8 +56,16 @@ struct sti_vtac_mode {
 	u32 phyts_per_pixel;
 };
 
-static const struct sti_vtac_mode vtac_mode_main = {0x2, 0x2, VTAC_5_PPP};
-static const struct sti_vtac_mode vtac_mode_aux = {0x1, 0x0, VTAC_17_PPP};
+static const struct sti_vtac_mode vtac_mode_main = {
+	.vid_in_width = 0x2,
+	.phyts_width = 0x2,
+	.phyts_per_pixel = VTAC_5_PPP,
+};
+static const struct sti_vtac_mode vtac_mode_aux = {
+	.vid_in_width = 0x1,
+	.phyts_width = 0x0,
+	.phyts_per_pixel = VTAC_17_PPP,
+};
 
 /**
  * VTAC structure
diff --git a/drivers/gpu/drm/tdfx/tdfx_drv.c b/drivers/gpu/drm/tdfx/tdfx_drv.c
index 3492ca5c46d3..df533ff999a4 100644
--- a/drivers/gpu/drm/tdfx/tdfx_drv.c
+++ b/drivers/gpu/drm/tdfx/tdfx_drv.c
@@ -55,6 +55,7 @@ static const struct file_operations tdfx_driver_fops = {
 };
 
 static struct drm_driver driver = {
+	.set_busid = drm_pci_set_busid,
 	.fops = &tdfx_driver_fops,
 	.name = DRIVER_NAME,
 	.desc = DRIVER_DESC,
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index 6be623b4a86f..aea4b7663934 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
@@ -502,6 +502,7 @@ static struct drm_driver tilcdc_driver = {
 	.unload             = tilcdc_unload,
 	.preclose           = tilcdc_preclose,
 	.lastclose          = tilcdc_lastclose,
+	.set_busid          = drm_platform_set_busid,
 	.irq_handler        = tilcdc_irq,
 	.irq_preinstall     = tilcdc_irq_preinstall,
 	.irq_postinstall    = tilcdc_irq_postinstall,
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 3da89d5dab60..a11969acfea5 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -40,6 +40,7 @@
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/atomic.h>
+#include <linux/reservation.h>
 
 #define TTM_ASSERT_LOCKED(param)
 #define TTM_DEBUG(fmt, arg...)
@@ -53,12 +54,13 @@ static struct attribute ttm_bo_count = {
 	.mode = S_IRUGO
 };
 
-static inline int ttm_mem_type_from_flags(uint32_t flags, uint32_t *mem_type)
+static inline int ttm_mem_type_from_place(const struct ttm_place *place,
+					  uint32_t *mem_type)
 {
 	int i;
 
 	for (i = 0; i <= TTM_PL_PRIV5; i++)
-		if (flags & (1 << i)) {
+		if (place->flags & (1 << i)) {
 			*mem_type = i;
 			return 0;
 		}
@@ -89,12 +91,12 @@ static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
 	       bo, bo->mem.num_pages, bo->mem.size >> 10,
 	       bo->mem.size >> 20);
 	for (i = 0; i < placement->num_placement; i++) {
-		ret = ttm_mem_type_from_flags(placement->placement[i],
+		ret = ttm_mem_type_from_place(&placement->placement[i],
 						&mem_type);
 		if (ret)
 			return;
 		pr_err("  placement[%d]=0x%08X (%d)\n",
-		       i, placement->placement[i], mem_type);
+		       i, placement->placement[i].flags, mem_type);
 		ttm_mem_type_debug(bo->bdev, mem_type);
 	}
 }
@@ -141,7 +143,6 @@ static void ttm_bo_release_list(struct kref *list_kref)
 	BUG_ON(atomic_read(&bo->list_kref.refcount));
 	BUG_ON(atomic_read(&bo->kref.refcount));
 	BUG_ON(atomic_read(&bo->cpu_writers));
-	BUG_ON(bo->sync_obj != NULL);
 	BUG_ON(bo->mem.mm_node != NULL);
 	BUG_ON(!list_empty(&bo->lru));
 	BUG_ON(!list_empty(&bo->ddestroy));
@@ -402,36 +403,48 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
 	ww_mutex_unlock (&bo->resv->lock);
 }
 
+static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo)
+{
+	struct reservation_object_list *fobj;
+	struct fence *fence;
+	int i;
+
+	fobj = reservation_object_get_list(bo->resv);
+	fence = reservation_object_get_excl(bo->resv);
+	if (fence && !fence->ops->signaled)
+		fence_enable_sw_signaling(fence);
+
+	for (i = 0; fobj && i < fobj->shared_count; ++i) {
+		fence = rcu_dereference_protected(fobj->shared[i],
+					reservation_object_held(bo->resv));
+
+		if (!fence->ops->signaled)
+			fence_enable_sw_signaling(fence);
+	}
+}
+
 static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_bo_global *glob = bo->glob;
-	struct ttm_bo_driver *driver = bdev->driver;
-	void *sync_obj = NULL;
 	int put_count;
 	int ret;
 
 	spin_lock(&glob->lru_lock);
 	ret = __ttm_bo_reserve(bo, false, true, false, NULL);
 
-	spin_lock(&bdev->fence_lock);
-	(void) ttm_bo_wait(bo, false, false, true);
-	if (!ret && !bo->sync_obj) {
-		spin_unlock(&bdev->fence_lock);
-		put_count = ttm_bo_del_from_lru(bo);
-
-		spin_unlock(&glob->lru_lock);
-		ttm_bo_cleanup_memtype_use(bo);
+	if (!ret) {
+		if (!ttm_bo_wait(bo, false, false, true)) {
+			put_count = ttm_bo_del_from_lru(bo);
 
-		ttm_bo_list_ref_sub(bo, put_count, true);
+			spin_unlock(&glob->lru_lock);
+			ttm_bo_cleanup_memtype_use(bo);
 
-		return;
-	}
-	if (bo->sync_obj)
-		sync_obj = driver->sync_obj_ref(bo->sync_obj);
-	spin_unlock(&bdev->fence_lock);
+			ttm_bo_list_ref_sub(bo, put_count, true);
 
-	if (!ret) {
+			return;
+		} else
+			ttm_bo_flush_all_fences(bo);
 
 		/*
 		 * Make NO_EVICT bos immediately available to
@@ -450,10 +463,6 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 	list_add_tail(&bo->ddestroy, &bdev->ddestroy);
 	spin_unlock(&glob->lru_lock);
 
-	if (sync_obj) {
-		driver->sync_obj_flush(sync_obj);
-		driver->sync_obj_unref(&sync_obj);
-	}
 	schedule_delayed_work(&bdev->wq,
 			      ((HZ / 100) < 1) ? 1 : HZ / 100);
 }
@@ -474,44 +483,26 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
 					  bool interruptible,
 					  bool no_wait_gpu)
 {
-	struct ttm_bo_device *bdev = bo->bdev;
-	struct ttm_bo_driver *driver = bdev->driver;
 	struct ttm_bo_global *glob = bo->glob;
 	int put_count;
 	int ret;
 
-	spin_lock(&bdev->fence_lock);
 	ret = ttm_bo_wait(bo, false, false, true);
 
 	if (ret && !no_wait_gpu) {
-		void *sync_obj;
-
-		/*
-		 * Take a reference to the fence and unreserve,
-		 * at this point the buffer should be dead, so
-		 * no new sync objects can be attached.
-		 */
-		sync_obj = driver->sync_obj_ref(bo->sync_obj);
-		spin_unlock(&bdev->fence_lock);
-
-		__ttm_bo_unreserve(bo);
+		long lret;
+		ww_mutex_unlock(&bo->resv->lock);
 		spin_unlock(&glob->lru_lock);
 
-		ret = driver->sync_obj_wait(sync_obj, false, interruptible);
-		driver->sync_obj_unref(&sync_obj);
-		if (ret)
-			return ret;
+		lret = reservation_object_wait_timeout_rcu(bo->resv,
+							   true,
+							   interruptible,
+							   30 * HZ);
 
-		/*
-		 * remove sync_obj with ttm_bo_wait, the wait should be
-		 * finished, and no new wait object should have been added.
-		 */
-		spin_lock(&bdev->fence_lock);
-		ret = ttm_bo_wait(bo, false, false, true);
-		WARN_ON(ret);
-		spin_unlock(&bdev->fence_lock);
-		if (ret)
-			return ret;
+		if (lret < 0)
+			return lret;
+		else if (lret == 0)
+			return -EBUSY;
 
 		spin_lock(&glob->lru_lock);
 		ret = __ttm_bo_reserve(bo, false, true, false, NULL);
@@ -528,8 +519,14 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
 			spin_unlock(&glob->lru_lock);
 			return 0;
 		}
-	} else
-		spin_unlock(&bdev->fence_lock);
+
+		/*
+		 * remove sync_obj with ttm_bo_wait, the wait should be
+		 * finished, and no new wait object should have been added.
+		 */
+		ret = ttm_bo_wait(bo, false, false, true);
+		WARN_ON(ret);
+	}
 
 	if (ret || unlikely(list_empty(&bo->ddestroy))) {
 		__ttm_bo_unreserve(bo);
@@ -667,9 +664,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 	struct ttm_placement placement;
 	int ret = 0;
 
-	spin_lock(&bdev->fence_lock);
 	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
-	spin_unlock(&bdev->fence_lock);
 
 	if (unlikely(ret != 0)) {
 		if (ret != -ERESTARTSYS) {
@@ -685,8 +680,6 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 	evict_mem.bus.io_reserved_vm = false;
 	evict_mem.bus.io_reserved_count = 0;
 
-	placement.fpfn = 0;
-	placement.lpfn = 0;
 	placement.num_placement = 0;
 	placement.num_busy_placement = 0;
 	bdev->driver->evict_flags(bo, &placement);
@@ -774,7 +767,7 @@ EXPORT_SYMBOL(ttm_bo_mem_put);
  */
 static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 					uint32_t mem_type,
-					struct ttm_placement *placement,
+					const struct ttm_place *place,
 					struct ttm_mem_reg *mem,
 					bool interruptible,
 					bool no_wait_gpu)
@@ -784,7 +777,7 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 	int ret;
 
 	do {
-		ret = (*man->func->get_node)(man, bo, placement, 0, mem);
+		ret = (*man->func->get_node)(man, bo, place, mem);
 		if (unlikely(ret != 0))
 			return ret;
 		if (mem->mm_node)
@@ -827,18 +820,18 @@ static uint32_t ttm_bo_select_caching(struct ttm_mem_type_manager *man,
 
 static bool ttm_bo_mt_compatible(struct ttm_mem_type_manager *man,
 				 uint32_t mem_type,
-				 uint32_t proposed_placement,
+				 const struct ttm_place *place,
 				 uint32_t *masked_placement)
 {
 	uint32_t cur_flags = ttm_bo_type_flags(mem_type);
 
-	if ((cur_flags & proposed_placement & TTM_PL_MASK_MEM) == 0)
+	if ((cur_flags & place->flags & TTM_PL_MASK_MEM) == 0)
 		return false;
 
-	if ((proposed_placement & man->available_caching) == 0)
+	if ((place->flags & man->available_caching) == 0)
 		return false;
 
-	cur_flags |= (proposed_placement & man->available_caching);
+	cur_flags |= (place->flags & man->available_caching);
 
 	*masked_placement = cur_flags;
 	return true;
@@ -869,15 +862,14 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 
 	mem->mm_node = NULL;
 	for (i = 0; i < placement->num_placement; ++i) {
-		ret = ttm_mem_type_from_flags(placement->placement[i],
-						&mem_type);
+		const struct ttm_place *place = &placement->placement[i];
+
+		ret = ttm_mem_type_from_place(place, &mem_type);
 		if (ret)
 			return ret;
 		man = &bdev->man[mem_type];
 
-		type_ok = ttm_bo_mt_compatible(man,
-						mem_type,
-						placement->placement[i],
+		type_ok = ttm_bo_mt_compatible(man, mem_type, place,
 						&cur_flags);
 
 		if (!type_ok)
@@ -889,7 +881,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		 * Use the access and other non-mapping-related flag bits from
 		 * the memory placement flags to the current flags
 		 */
-		ttm_flag_masked(&cur_flags, placement->placement[i],
+		ttm_flag_masked(&cur_flags, place->flags,
 				~TTM_PL_MASK_MEMTYPE);
 
 		if (mem_type == TTM_PL_SYSTEM)
@@ -897,8 +889,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 
 		if (man->has_type && man->use_type) {
 			type_found = true;
-			ret = (*man->func->get_node)(man, bo, placement,
-						     cur_flags, mem);
+			ret = (*man->func->get_node)(man, bo, place, mem);
 			if (unlikely(ret))
 				return ret;
 		}
@@ -916,17 +907,15 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		return -EINVAL;
 
 	for (i = 0; i < placement->num_busy_placement; ++i) {
-		ret = ttm_mem_type_from_flags(placement->busy_placement[i],
-						&mem_type);
+		const struct ttm_place *place = &placement->busy_placement[i];
+
+		ret = ttm_mem_type_from_place(place, &mem_type);
 		if (ret)
 			return ret;
 		man = &bdev->man[mem_type];
 		if (!man->has_type)
 			continue;
-		if (!ttm_bo_mt_compatible(man,
-						mem_type,
-						placement->busy_placement[i],
-						&cur_flags))
+		if (!ttm_bo_mt_compatible(man, mem_type, place, &cur_flags))
 			continue;
 
 		cur_flags = ttm_bo_select_caching(man, bo->mem.placement,
@@ -935,7 +924,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		 * Use the access and other non-mapping-related flag bits from
 		 * the memory placement flags to the current flags
 		 */
-		ttm_flag_masked(&cur_flags, placement->busy_placement[i],
+		ttm_flag_masked(&cur_flags, place->flags,
 				~TTM_PL_MASK_MEMTYPE);
 
 		if (mem_type == TTM_PL_SYSTEM) {
@@ -945,7 +934,7 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 			return 0;
 		}
 
-		ret = ttm_bo_mem_force_space(bo, mem_type, placement, mem,
+		ret = ttm_bo_mem_force_space(bo, mem_type, place, mem,
 						interruptible, no_wait_gpu);
 		if (ret == 0 && mem->mm_node) {
 			mem->placement = cur_flags;
@@ -966,7 +955,6 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 {
 	int ret = 0;
 	struct ttm_mem_reg mem;
-	struct ttm_bo_device *bdev = bo->bdev;
 
 	lockdep_assert_held(&bo->resv->lock.base);
 
@@ -975,9 +963,7 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 	 * Have the driver move function wait for idle when necessary,
 	 * instead of doing it here.
 	 */
-	spin_lock(&bdev->fence_lock);
 	ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
-	spin_unlock(&bdev->fence_lock);
 	if (ret)
 		return ret;
 	mem.num_pages = bo->num_pages;
@@ -1006,20 +992,27 @@ static bool ttm_bo_mem_compat(struct ttm_placement *placement,
 {
 	int i;
 
-	if (mem->mm_node && placement->lpfn != 0 &&
-	    (mem->start < placement->fpfn ||
-	     mem->start + mem->num_pages > placement->lpfn))
-		return false;
-
 	for (i = 0; i < placement->num_placement; i++) {
-		*new_flags = placement->placement[i];
+		const struct ttm_place *heap = &placement->placement[i];
+		if (mem->mm_node && heap->lpfn != 0 &&
+		    (mem->start < heap->fpfn ||
+		     mem->start + mem->num_pages > heap->lpfn))
+			continue;
+
+		*new_flags = heap->flags;
 		if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) &&
 		    (*new_flags & mem->placement & TTM_PL_MASK_MEM))
 			return true;
 	}
 
 	for (i = 0; i < placement->num_busy_placement; i++) {
-		*new_flags = placement->busy_placement[i];
+		const struct ttm_place *heap = &placement->busy_placement[i];
+		if (mem->mm_node && heap->lpfn != 0 &&
+		    (mem->start < heap->fpfn ||
+		     mem->start + mem->num_pages > heap->lpfn))
+			continue;
+
+		*new_flags = heap->flags;
 		if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) &&
 		    (*new_flags & mem->placement & TTM_PL_MASK_MEM))
 			return true;
@@ -1037,11 +1030,6 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
 	uint32_t new_flags;
 
 	lockdep_assert_held(&bo->resv->lock.base);
-	/* Check that range is valid */
-	if (placement->lpfn || placement->fpfn)
-		if (placement->fpfn > placement->lpfn ||
-			(placement->lpfn - placement->fpfn) < bo->num_pages)
-			return -EINVAL;
 	/*
 	 * Check whether we need to move buffer.
 	 */
@@ -1070,15 +1058,6 @@ int ttm_bo_validate(struct ttm_buffer_object *bo,
 }
 EXPORT_SYMBOL(ttm_bo_validate);
 
-int ttm_bo_check_placement(struct ttm_buffer_object *bo,
-				struct ttm_placement *placement)
-{
-	BUG_ON((placement->fpfn || placement->lpfn) &&
-	       (bo->mem.num_pages > (placement->lpfn - placement->fpfn)));
-
-	return 0;
-}
-
 int ttm_bo_init(struct ttm_bo_device *bdev,
 		struct ttm_buffer_object *bo,
 		unsigned long size,
@@ -1147,15 +1126,12 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
 	atomic_inc(&bo->glob->bo_count);
 	drm_vma_node_reset(&bo->vma_node);
 
-	ret = ttm_bo_check_placement(bo, placement);
-
 	/*
 	 * For ttm_bo_type_device buffers, allocate
 	 * address space from the device.
 	 */
-	if (likely(!ret) &&
-	    (bo->type == ttm_bo_type_device ||
-	     bo->type == ttm_bo_type_sg))
+	if (bo->type == ttm_bo_type_device ||
+	    bo->type == ttm_bo_type_sg)
 		ret = drm_vma_offset_add(&bdev->vma_manager, &bo->vma_node,
 					 bo->mem.num_pages);
 
@@ -1477,7 +1453,6 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev,
 	bdev->glob = glob;
 	bdev->need_dma32 = need_dma32;
 	bdev->val_seq = 0;
-	spin_lock_init(&bdev->fence_lock);
 	mutex_lock(&glob->device_list_mutex);
 	list_add_tail(&bdev->device_list, &glob->device_list);
 	mutex_unlock(&glob->device_list_mutex);
@@ -1530,65 +1505,56 @@ void ttm_bo_unmap_virtual(struct ttm_buffer_object *bo)
 
 EXPORT_SYMBOL(ttm_bo_unmap_virtual);
 
-
 int ttm_bo_wait(struct ttm_buffer_object *bo,
 		bool lazy, bool interruptible, bool no_wait)
 {
-	struct ttm_bo_driver *driver = bo->bdev->driver;
-	struct ttm_bo_device *bdev = bo->bdev;
-	void *sync_obj;
-	int ret = 0;
-
-	if (likely(bo->sync_obj == NULL))
-		return 0;
+	struct reservation_object_list *fobj;
+	struct reservation_object *resv;
+	struct fence *excl;
+	long timeout = 15 * HZ;
+	int i;
 
-	while (bo->sync_obj) {
+	resv = bo->resv;
+	fobj = reservation_object_get_list(resv);
+	excl = reservation_object_get_excl(resv);
+	if (excl) {
+		if (!fence_is_signaled(excl)) {
+			if (no_wait)
+				return -EBUSY;
 
-		if (driver->sync_obj_signaled(bo->sync_obj)) {
-			void *tmp_obj = bo->sync_obj;
-			bo->sync_obj = NULL;
-			clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
-			spin_unlock(&bdev->fence_lock);
-			driver->sync_obj_unref(&tmp_obj);
-			spin_lock(&bdev->fence_lock);
-			continue;
+			timeout = fence_wait_timeout(excl,
+						     interruptible, timeout);
 		}
+	}
 
-		if (no_wait)
-			return -EBUSY;
+	for (i = 0; fobj && timeout > 0 && i < fobj->shared_count; ++i) {
+		struct fence *fence;
+		fence = rcu_dereference_protected(fobj->shared[i],
+						reservation_object_held(resv));
 
-		sync_obj = driver->sync_obj_ref(bo->sync_obj);
-		spin_unlock(&bdev->fence_lock);
-		ret = driver->sync_obj_wait(sync_obj,
-					    lazy, interruptible);
-		if (unlikely(ret != 0)) {
-			driver->sync_obj_unref(&sync_obj);
-			spin_lock(&bdev->fence_lock);
-			return ret;
-		}
-		spin_lock(&bdev->fence_lock);
-		if (likely(bo->sync_obj == sync_obj)) {
-			void *tmp_obj = bo->sync_obj;
-			bo->sync_obj = NULL;
-			clear_bit(TTM_BO_PRIV_FLAG_MOVING,
-				  &bo->priv_flags);
-			spin_unlock(&bdev->fence_lock);
-			driver->sync_obj_unref(&sync_obj);
-			driver->sync_obj_unref(&tmp_obj);
-			spin_lock(&bdev->fence_lock);
-		} else {
-			spin_unlock(&bdev->fence_lock);
-			driver->sync_obj_unref(&sync_obj);
-			spin_lock(&bdev->fence_lock);
+		if (!fence_is_signaled(fence)) {
+			if (no_wait)
+				return -EBUSY;
+
+			timeout = fence_wait_timeout(fence,
+						     interruptible, timeout);
 		}
 	}
+
+	if (timeout < 0)
+		return timeout;
+
+	if (timeout == 0)
+		return -EBUSY;
+
+	reservation_object_add_excl_fence(resv, NULL);
+	clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
 	return 0;
 }
 EXPORT_SYMBOL(ttm_bo_wait);
 
 int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait)
 {
-	struct ttm_bo_device *bdev = bo->bdev;
 	int ret = 0;
 
 	/*
@@ -1598,9 +1564,7 @@ int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait)
 	ret = ttm_bo_reserve(bo, true, no_wait, false, NULL);
 	if (unlikely(ret != 0))
 		return ret;
-	spin_lock(&bdev->fence_lock);
 	ret = ttm_bo_wait(bo, false, true, no_wait);
-	spin_unlock(&bdev->fence_lock);
 	if (likely(ret == 0))
 		atomic_inc(&bo->cpu_writers);
 	ttm_bo_unreserve(bo);
@@ -1657,9 +1621,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 	 * Wait for GPU, then move to system cached.
 	 */
 
-	spin_lock(&bo->bdev->fence_lock);
 	ret = ttm_bo_wait(bo, false, false, false);
-	spin_unlock(&bo->bdev->fence_lock);
 
 	if (unlikely(ret != 0))
 		goto out;
diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c
index 9e103a4875c8..964387fc5c8f 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
@@ -49,8 +49,7 @@ struct ttm_range_manager {
 
 static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
 			       struct ttm_buffer_object *bo,
-			       struct ttm_placement *placement,
-			       uint32_t flags,
+			       const struct ttm_place *place,
 			       struct ttm_mem_reg *mem)
 {
 	struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv;
@@ -60,7 +59,7 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
 	unsigned long lpfn;
 	int ret;
 
-	lpfn = placement->lpfn;
+	lpfn = place->lpfn;
 	if (!lpfn)
 		lpfn = man->size;
 
@@ -68,13 +67,13 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
 	if (!node)
 		return -ENOMEM;
 
-	if (flags & TTM_PL_FLAG_TOPDOWN)
+	if (place->flags & TTM_PL_FLAG_TOPDOWN)
 		aflags = DRM_MM_CREATE_TOP;
 
 	spin_lock(&rman->lock);
 	ret = drm_mm_insert_node_in_range_generic(mm, node, mem->num_pages,
 					  mem->page_alignment, 0,
-					  placement->fpfn, lpfn,
+					  place->fpfn, lpfn,
 					  DRM_MM_SEARCH_BEST,
 					  aflags);
 	spin_unlock(&rman->lock);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index 30e5d90cb7bc..824af90cbe31 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -37,6 +37,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/module.h>
+#include <linux/reservation.h>
 
 void ttm_bo_free_old_node(struct ttm_buffer_object *bo)
 {
@@ -444,8 +445,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 				      struct ttm_buffer_object **new_obj)
 {
 	struct ttm_buffer_object *fbo;
-	struct ttm_bo_device *bdev = bo->bdev;
-	struct ttm_bo_driver *driver = bdev->driver;
 	int ret;
 
 	fbo = kmalloc(sizeof(*fbo), GFP_KERNEL);
@@ -466,12 +465,6 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	drm_vma_node_reset(&fbo->vma_node);
 	atomic_set(&fbo->cpu_writers, 0);
 
-	spin_lock(&bdev->fence_lock);
-	if (bo->sync_obj)
-		fbo->sync_obj = driver->sync_obj_ref(bo->sync_obj);
-	else
-		fbo->sync_obj = NULL;
-	spin_unlock(&bdev->fence_lock);
 	kref_init(&fbo->list_kref);
 	kref_init(&fbo->kref);
 	fbo->destroy = &ttm_transfered_destroy;
@@ -644,30 +637,20 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map)
 EXPORT_SYMBOL(ttm_bo_kunmap);
 
 int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
-			      void *sync_obj,
+			      struct fence *fence,
 			      bool evict,
 			      bool no_wait_gpu,
 			      struct ttm_mem_reg *new_mem)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
-	struct ttm_bo_driver *driver = bdev->driver;
 	struct ttm_mem_type_manager *man = &bdev->man[new_mem->mem_type];
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	int ret;
 	struct ttm_buffer_object *ghost_obj;
-	void *tmp_obj = NULL;
 
-	spin_lock(&bdev->fence_lock);
-	if (bo->sync_obj) {
-		tmp_obj = bo->sync_obj;
-		bo->sync_obj = NULL;
-	}
-	bo->sync_obj = driver->sync_obj_ref(sync_obj);
+	reservation_object_add_excl_fence(bo->resv, fence);
 	if (evict) {
 		ret = ttm_bo_wait(bo, false, false, false);
-		spin_unlock(&bdev->fence_lock);
-		if (tmp_obj)
-			driver->sync_obj_unref(&tmp_obj);
 		if (ret)
 			return ret;
 
@@ -688,14 +671,13 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 		 */
 
 		set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
-		spin_unlock(&bdev->fence_lock);
-		if (tmp_obj)
-			driver->sync_obj_unref(&tmp_obj);
 
 		ret = ttm_buffer_object_transfer(bo, &ghost_obj);
 		if (ret)
 			return ret;
 
+		reservation_object_add_excl_fence(ghost_obj->resv, fence);
+
 		/**
 		 * If we're not moving to fixed memory, the TTM object
 		 * needs to stay alive. Otherwhise hang it on the ghost
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 0ce48e5a9cb4..d05437f219e9 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -45,10 +45,8 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 				struct vm_area_struct *vma,
 				struct vm_fault *vmf)
 {
-	struct ttm_bo_device *bdev = bo->bdev;
 	int ret = 0;
 
-	spin_lock(&bdev->fence_lock);
 	if (likely(!test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)))
 		goto out_unlock;
 
@@ -82,7 +80,6 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 			VM_FAULT_NOPAGE;
 
 out_unlock:
-	spin_unlock(&bdev->fence_lock);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index e8dac8758528..8ce508e76208 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -32,20 +32,12 @@
 #include <linux/sched.h>
 #include <linux/module.h>
 
-static void ttm_eu_backoff_reservation_locked(struct list_head *list)
+static void ttm_eu_backoff_reservation_reverse(struct list_head *list,
+					      struct ttm_validate_buffer *entry)
 {
-	struct ttm_validate_buffer *entry;
-
-	list_for_each_entry(entry, list, head) {
+	list_for_each_entry_continue_reverse(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
-		if (!entry->reserved)
-			continue;
 
-		entry->reserved = false;
-		if (entry->removed) {
-			ttm_bo_add_to_lru(bo);
-			entry->removed = false;
-		}
 		__ttm_bo_unreserve(bo);
 	}
 }
@@ -56,27 +48,9 @@ static void ttm_eu_del_from_lru_locked(struct list_head *list)
 
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
-		if (!entry->reserved)
-			continue;
-
-		if (!entry->removed) {
-			entry->put_count = ttm_bo_del_from_lru(bo);
-			entry->removed = true;
-		}
-	}
-}
-
-static void ttm_eu_list_ref_sub(struct list_head *list)
-{
-	struct ttm_validate_buffer *entry;
-
-	list_for_each_entry(entry, list, head) {
-		struct ttm_buffer_object *bo = entry->bo;
+		unsigned put_count = ttm_bo_del_from_lru(bo);
 
-		if (entry->put_count) {
-			ttm_bo_list_ref_sub(bo, entry->put_count, true);
-			entry->put_count = 0;
-		}
+		ttm_bo_list_ref_sub(bo, put_count, true);
 	}
 }
 
@@ -91,11 +65,18 @@ void ttm_eu_backoff_reservation(struct ww_acquire_ctx *ticket,
 
 	entry = list_first_entry(list, struct ttm_validate_buffer, head);
 	glob = entry->bo->glob;
+
 	spin_lock(&glob->lru_lock);
-	ttm_eu_backoff_reservation_locked(list);
+	list_for_each_entry(entry, list, head) {
+		struct ttm_buffer_object *bo = entry->bo;
+
+		ttm_bo_add_to_lru(bo);
+		__ttm_bo_unreserve(bo);
+	}
+	spin_unlock(&glob->lru_lock);
+
 	if (ticket)
 		ww_acquire_fini(ticket);
-	spin_unlock(&glob->lru_lock);
 }
 EXPORT_SYMBOL(ttm_eu_backoff_reservation);
 
@@ -112,7 +93,7 @@ EXPORT_SYMBOL(ttm_eu_backoff_reservation);
  */
 
 int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
-			   struct list_head *list)
+			   struct list_head *list, bool intr)
 {
 	struct ttm_bo_global *glob;
 	struct ttm_validate_buffer *entry;
@@ -121,60 +102,64 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
 	if (list_empty(list))
 		return 0;
 
-	list_for_each_entry(entry, list, head) {
-		entry->reserved = false;
-		entry->put_count = 0;
-		entry->removed = false;
-	}
-
 	entry = list_first_entry(list, struct ttm_validate_buffer, head);
 	glob = entry->bo->glob;
 
 	if (ticket)
 		ww_acquire_init(ticket, &reservation_ww_class);
-retry:
+
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
 
-		/* already slowpath reserved? */
-		if (entry->reserved)
-			continue;
-
-		ret = __ttm_bo_reserve(bo, true, (ticket == NULL), true,
+		ret = __ttm_bo_reserve(bo, intr, (ticket == NULL), true,
 				       ticket);
+		if (!ret && unlikely(atomic_read(&bo->cpu_writers) > 0)) {
+			__ttm_bo_unreserve(bo);
+
+			ret = -EBUSY;
+		}
 
-		if (ret == -EDEADLK) {
-			/* uh oh, we lost out, drop every reservation and try
-			 * to only reserve this buffer, then start over if
-			 * this succeeds.
-			 */
-			BUG_ON(ticket == NULL);
-			spin_lock(&glob->lru_lock);
-			ttm_eu_backoff_reservation_locked(list);
-			spin_unlock(&glob->lru_lock);
-			ttm_eu_list_ref_sub(list);
+		if (!ret) {
+			if (!entry->shared)
+				continue;
+
+			ret = reservation_object_reserve_shared(bo->resv);
+			if (!ret)
+				continue;
+		}
+
+		/* uh oh, we lost out, drop every reservation and try
+		 * to only reserve this buffer, then start over if
+		 * this succeeds.
+		 */
+		ttm_eu_backoff_reservation_reverse(list, entry);
+
+		if (ret == -EDEADLK && intr) {
 			ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
 							       ticket);
-			if (unlikely(ret != 0)) {
-				if (ret == -EINTR)
-					ret = -ERESTARTSYS;
-				goto err_fini;
-			}
+		} else if (ret == -EDEADLK) {
+			ww_mutex_lock_slow(&bo->resv->lock, ticket);
+			ret = 0;
+		}
 
-			entry->reserved = true;
-			if (unlikely(atomic_read(&bo->cpu_writers) > 0)) {
-				ret = -EBUSY;
-				goto err;
-			}
-			goto retry;
-		} else if (ret)
-			goto err;
+		if (!ret && entry->shared)
+			ret = reservation_object_reserve_shared(bo->resv);
 
-		entry->reserved = true;
-		if (unlikely(atomic_read(&bo->cpu_writers) > 0)) {
-			ret = -EBUSY;
-			goto err;
+		if (unlikely(ret != 0)) {
+			if (ret == -EINTR)
+				ret = -ERESTARTSYS;
+			if (ticket) {
+				ww_acquire_done(ticket);
+				ww_acquire_fini(ticket);
+			}
+			return ret;
 		}
+
+		/* move this item to the front of the list,
+		 * forces correct iteration of the loop without keeping track
+		 */
+		list_del(&entry->head);
+		list_add(&entry->head, list);
 	}
 
 	if (ticket)
@@ -182,25 +167,12 @@ retry:
 	spin_lock(&glob->lru_lock);
 	ttm_eu_del_from_lru_locked(list);
 	spin_unlock(&glob->lru_lock);
-	ttm_eu_list_ref_sub(list);
 	return 0;
-
-err:
-	spin_lock(&glob->lru_lock);
-	ttm_eu_backoff_reservation_locked(list);
-	spin_unlock(&glob->lru_lock);
-	ttm_eu_list_ref_sub(list);
-err_fini:
-	if (ticket) {
-		ww_acquire_done(ticket);
-		ww_acquire_fini(ticket);
-	}
-	return ret;
 }
 EXPORT_SYMBOL(ttm_eu_reserve_buffers);
 
 void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
-				 struct list_head *list, void *sync_obj)
+				 struct list_head *list, struct fence *fence)
 {
 	struct ttm_validate_buffer *entry;
 	struct ttm_buffer_object *bo;
@@ -217,24 +189,18 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
 	glob = bo->glob;
 
 	spin_lock(&glob->lru_lock);
-	spin_lock(&bdev->fence_lock);
 
 	list_for_each_entry(entry, list, head) {
 		bo = entry->bo;
-		entry->old_sync_obj = bo->sync_obj;
-		bo->sync_obj = driver->sync_obj_ref(sync_obj);
+		if (entry->shared)
+			reservation_object_add_shared_fence(bo->resv, fence);
+		else
+			reservation_object_add_excl_fence(bo->resv, fence);
 		ttm_bo_add_to_lru(bo);
 		__ttm_bo_unreserve(bo);
-		entry->reserved = false;
 	}
-	spin_unlock(&bdev->fence_lock);
 	spin_unlock(&glob->lru_lock);
 	if (ticket)
 		ww_acquire_fini(ticket);
-
-	list_for_each_entry(entry, list, head) {
-		if (entry->old_sync_obj)
-			driver->sync_obj_unref(&entry->old_sync_obj);
-	}
 }
 EXPORT_SYMBOL(ttm_eu_fence_buffer_objects);
diff --git a/drivers/gpu/drm/udl/Kconfig b/drivers/gpu/drm/udl/Kconfig
index f02528686cd5..613ab0622d6e 100644
--- a/drivers/gpu/drm/udl/Kconfig
+++ b/drivers/gpu/drm/udl/Kconfig
@@ -1,8 +1,9 @@
 config DRM_UDL
 	tristate "DisplayLink"
 	depends on DRM
+	depends on USB_SUPPORT
 	depends on USB_ARCH_HAS_HCD
-	select DRM_USB
+	select USB
 	select FB_SYS_FILLRECT
 	select FB_SYS_COPYAREA
 	select FB_SYS_IMAGEBLIT
diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c
index e026a9e2942a..0110d95522f3 100644
--- a/drivers/gpu/drm/udl/udl_connector.c
+++ b/drivers/gpu/drm/udl/udl_connector.c
@@ -34,8 +34,8 @@ static u8 *udl_get_edid(struct udl_device *udl)
 		goto error;
 
 	for (i = 0; i < EDID_LENGTH; i++) {
-		ret = usb_control_msg(udl->ddev->usbdev,
-				      usb_rcvctrlpipe(udl->ddev->usbdev, 0), (0x02),
+		ret = usb_control_msg(udl->udev,
+				      usb_rcvctrlpipe(udl->udev, 0), (0x02),
 				      (0x80 | (0x02 << 5)), i << 8, 0xA1, rbuf, 2,
 				      HZ);
 		if (ret < 1) {
diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c
index 3ddd6cd98ac1..8607e9e513db 100644
--- a/drivers/gpu/drm/udl/udl_drv.c
+++ b/drivers/gpu/drm/udl/udl_drv.c
@@ -7,48 +7,13 @@
  */
 
 #include <linux/module.h>
-#include <drm/drm_usb.h>
+#include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include "udl_drv.h"
 
-static struct drm_driver driver;
-
-/*
- * There are many DisplayLink-based graphics products, all with unique PIDs.
- * So we match on DisplayLink's VID + Vendor-Defined Interface Class (0xff)
- * We also require a match on SubClass (0x00) and Protocol (0x00),
- * which is compatible with all known USB 2.0 era graphics chips and firmware,
- * but allows DisplayLink to increment those for any future incompatible chips
- */
-static struct usb_device_id id_table[] = {
-	{.idVendor = 0x17e9, .bInterfaceClass = 0xff,
-	 .bInterfaceSubClass = 0x00,
-	 .bInterfaceProtocol = 0x00,
-	 .match_flags = USB_DEVICE_ID_MATCH_VENDOR |
-			USB_DEVICE_ID_MATCH_INT_CLASS |
-			USB_DEVICE_ID_MATCH_INT_SUBCLASS |
-			USB_DEVICE_ID_MATCH_INT_PROTOCOL,},
-	{},
-};
-MODULE_DEVICE_TABLE(usb, id_table);
-
-MODULE_LICENSE("GPL");
-
-static int udl_usb_probe(struct usb_interface *interface,
-			 const struct usb_device_id *id)
+static int udl_driver_set_busid(struct drm_device *d, struct drm_master *m)
 {
-	return drm_get_usb_dev(interface, id, &driver);
-}
-
-static void udl_usb_disconnect(struct usb_interface *interface)
-{
-	struct drm_device *dev = usb_get_intfdata(interface);
-
-	drm_kms_helper_poll_disable(dev);
-	drm_connector_unplug_all(dev);
-	udl_fbdev_unplug(dev);
-	udl_drop_usb(dev);
-	drm_unplug_dev(dev);
+	return 0;
 }
 
 static const struct vm_operations_struct udl_gem_vm_ops = {
@@ -75,6 +40,7 @@ static struct drm_driver driver = {
 	.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME,
 	.load = udl_driver_load,
 	.unload = udl_driver_unload,
+	.set_busid = udl_driver_set_busid,
 
 	/* gem hooks */
 	.gem_free_object = udl_gem_free_object,
@@ -96,6 +62,61 @@ static struct drm_driver driver = {
 	.patchlevel = DRIVER_PATCHLEVEL,
 };
 
+static int udl_usb_probe(struct usb_interface *interface,
+			 const struct usb_device_id *id)
+{
+	struct usb_device *udev = interface_to_usbdev(interface);
+	struct drm_device *dev;
+	int r;
+
+	dev = drm_dev_alloc(&driver, &interface->dev);
+	if (!dev)
+		return -ENOMEM;
+
+	r = drm_dev_register(dev, (unsigned long)udev);
+	if (r)
+		goto err_free;
+
+	usb_set_intfdata(interface, dev);
+	DRM_INFO("Initialized udl on minor %d\n", dev->primary->index);
+
+	return 0;
+
+err_free:
+	drm_dev_unref(dev);
+	return r;
+}
+
+static void udl_usb_disconnect(struct usb_interface *interface)
+{
+	struct drm_device *dev = usb_get_intfdata(interface);
+
+	drm_kms_helper_poll_disable(dev);
+	drm_connector_unplug_all(dev);
+	udl_fbdev_unplug(dev);
+	udl_drop_usb(dev);
+	drm_unplug_dev(dev);
+}
+
+/*
+ * There are many DisplayLink-based graphics products, all with unique PIDs.
+ * So we match on DisplayLink's VID + Vendor-Defined Interface Class (0xff)
+ * We also require a match on SubClass (0x00) and Protocol (0x00),
+ * which is compatible with all known USB 2.0 era graphics chips and firmware,
+ * but allows DisplayLink to increment those for any future incompatible chips
+ */
+static struct usb_device_id id_table[] = {
+	{.idVendor = 0x17e9, .bInterfaceClass = 0xff,
+	 .bInterfaceSubClass = 0x00,
+	 .bInterfaceProtocol = 0x00,
+	 .match_flags = USB_DEVICE_ID_MATCH_VENDOR |
+			USB_DEVICE_ID_MATCH_INT_CLASS |
+			USB_DEVICE_ID_MATCH_INT_SUBCLASS |
+			USB_DEVICE_ID_MATCH_INT_PROTOCOL,},
+	{},
+};
+MODULE_DEVICE_TABLE(usb, id_table);
+
 static struct usb_driver udl_driver = {
 	.name = "udl",
 	.probe = udl_usb_probe,
@@ -105,13 +126,14 @@ static struct usb_driver udl_driver = {
 
 static int __init udl_init(void)
 {
-	return drm_usb_init(&driver, &udl_driver);
+	return usb_register(&udl_driver);
 }
 
 static void __exit udl_exit(void)
 {
-	drm_usb_exit(&driver, &udl_driver);
+	usb_deregister(&udl_driver);
 }
 
 module_init(udl_init);
 module_exit(udl_exit);
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h
index 1fbf7b357f16..51e10ee77f39 100644
--- a/drivers/gpu/drm/udl/udl_drv.h
+++ b/drivers/gpu/drm/udl/udl_drv.h
@@ -47,6 +47,7 @@ struct udl_fbdev;
 struct udl_device {
 	struct device *dev;
 	struct drm_device *ddev;
+	struct usb_device *udev;
 
 	int sku_pixel_limit;
 
diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c
index 42795674bc07..33dbfb2c4748 100644
--- a/drivers/gpu/drm/udl/udl_main.c
+++ b/drivers/gpu/drm/udl/udl_main.c
@@ -202,7 +202,7 @@ static int udl_alloc_urb_list(struct drm_device *dev, int count, size_t size)
 		}
 		unode->urb = urb;
 
-		buf = usb_alloc_coherent(udl->ddev->usbdev, MAX_TRANSFER, GFP_KERNEL,
+		buf = usb_alloc_coherent(udl->udev, MAX_TRANSFER, GFP_KERNEL,
 					 &urb->transfer_dma);
 		if (!buf) {
 			kfree(unode);
@@ -211,7 +211,7 @@ static int udl_alloc_urb_list(struct drm_device *dev, int count, size_t size)
 		}
 
 		/* urb->transfer_buffer_length set to actual before submit */
-		usb_fill_bulk_urb(urb, udl->ddev->usbdev, usb_sndbulkpipe(udl->ddev->usbdev, 1),
+		usb_fill_bulk_urb(urb, udl->udev, usb_sndbulkpipe(udl->udev, 1),
 			buf, size, udl_urb_completion, unode);
 		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
 
@@ -282,6 +282,7 @@ int udl_submit_urb(struct drm_device *dev, struct urb *urb, size_t len)
 
 int udl_driver_load(struct drm_device *dev, unsigned long flags)
 {
+	struct usb_device *udev = (void*)flags;
 	struct udl_device *udl;
 	int ret = -ENOMEM;
 
@@ -290,10 +291,11 @@ int udl_driver_load(struct drm_device *dev, unsigned long flags)
 	if (!udl)
 		return -ENOMEM;
 
+	udl->udev = udev;
 	udl->ddev = dev;
 	dev->dev_private = udl;
 
-	if (!udl_parse_vendor_descriptor(dev, dev->usbdev)) {
+	if (!udl_parse_vendor_descriptor(dev, udl->udev)) {
 		ret = -ENODEV;
 		DRM_ERROR("firmware not recognized. Assume incompatible device\n");
 		goto err;
diff --git a/drivers/gpu/drm/via/via_dma.c b/drivers/gpu/drm/via/via_dma.c
index 6fc0648dd37f..d17d8f245c1a 100644
--- a/drivers/gpu/drm/via/via_dma.c
+++ b/drivers/gpu/drm/via/via_dma.c
@@ -161,7 +161,7 @@ int via_dma_cleanup(struct drm_device *dev)
 		if (dev_priv->ring.virtual_start) {
 			via_cmdbuf_reset(dev_priv);
 
-			drm_core_ioremapfree(&dev_priv->ring.map, dev);
+			drm_legacy_ioremapfree(&dev_priv->ring.map, dev);
 			dev_priv->ring.virtual_start = NULL;
 		}
 
@@ -200,7 +200,7 @@ static int via_initialize(struct drm_device *dev,
 	dev_priv->ring.map.flags = 0;
 	dev_priv->ring.map.mtrr = 0;
 
-	drm_core_ioremap(&dev_priv->ring.map, dev);
+	drm_legacy_ioremap(&dev_priv->ring.map, dev);
 
 	if (dev_priv->ring.map.handle == NULL) {
 		via_dma_cleanup(dev);
diff --git a/drivers/gpu/drm/via/via_drv.c b/drivers/gpu/drm/via/via_drv.c
index 50abc2adfaee..c16ffa63ded6 100644
--- a/drivers/gpu/drm/via/via_drv.c
+++ b/drivers/gpu/drm/via/via_drv.c
@@ -79,6 +79,7 @@ static struct drm_driver driver = {
 	.open = via_driver_open,
 	.preclose = via_reclaim_buffers_locked,
 	.postclose = via_driver_postclose,
+	.set_busid = drm_pci_set_busid,
 	.context_dtor = via_final_context,
 	.get_vblank_counter = via_get_vblank_counter,
 	.enable_vblank = via_enable_vblank,
diff --git a/drivers/gpu/drm/via/via_drv.h b/drivers/gpu/drm/via/via_drv.h
index ad0273256beb..ef8c500b4a00 100644
--- a/drivers/gpu/drm/via/via_drv.h
+++ b/drivers/gpu/drm/via/via_drv.h
@@ -25,6 +25,8 @@
 #define _VIA_DRV_H_
 
 #include <drm/drm_mm.h>
+#include <drm/drm_legacy.h>
+
 #define DRIVER_AUTHOR	"Various"
 
 #define DRIVER_NAME		"via"
diff --git a/drivers/gpu/drm/via/via_map.c b/drivers/gpu/drm/via/via_map.c
index d0ab3fb32acd..0b3522dba6e8 100644
--- a/drivers/gpu/drm/via/via_map.c
+++ b/drivers/gpu/drm/via/via_map.c
@@ -31,7 +31,7 @@ static int via_do_init_map(struct drm_device *dev, drm_via_init_t *init)
 
 	DRM_DEBUG("\n");
 
-	dev_priv->sarea = drm_getsarea(dev);
+	dev_priv->sarea = drm_legacy_getsarea(dev);
 	if (!dev_priv->sarea) {
 		DRM_ERROR("could not find sarea!\n");
 		dev->dev_private = (void *)dev_priv;
@@ -39,14 +39,14 @@ static int via_do_init_map(struct drm_device *dev, drm_via_init_t *init)
 		return -EINVAL;
 	}
 
-	dev_priv->fb = drm_core_findmap(dev, init->fb_offset);
+	dev_priv->fb = drm_legacy_findmap(dev, init->fb_offset);
 	if (!dev_priv->fb) {
 		DRM_ERROR("could not find framebuffer!\n");
 		dev->dev_private = (void *)dev_priv;
 		via_do_cleanup_map(dev);
 		return -EINVAL;
 	}
-	dev_priv->mmio = drm_core_findmap(dev, init->mmio_offset);
+	dev_priv->mmio = drm_legacy_findmap(dev, init->mmio_offset);
 	if (!dev_priv->mmio) {
 		DRM_ERROR("could not find mmio region!\n");
 		dev->dev_private = (void *)dev_priv;
diff --git a/drivers/gpu/drm/via/via_mm.c b/drivers/gpu/drm/via/via_mm.c
index d70b1e1544bf..4f20742e7788 100644
--- a/drivers/gpu/drm/via/via_mm.c
+++ b/drivers/gpu/drm/via/via_mm.c
@@ -211,12 +211,12 @@ void via_reclaim_buffers_locked(struct drm_device *dev,
 	if (!(file->minor->master && file->master->lock.hw_lock))
 		return;
 
-	drm_idlelock_take(&file->master->lock);
+	drm_legacy_idlelock_take(&file->master->lock);
 
 	mutex_lock(&dev->struct_mutex);
 	if (list_empty(&file_priv->obj_list)) {
 		mutex_unlock(&dev->struct_mutex);
-		drm_idlelock_release(&file->master->lock);
+		drm_legacy_idlelock_release(&file->master->lock);
 
 		return;
 	}
@@ -231,7 +231,7 @@ void via_reclaim_buffers_locked(struct drm_device *dev,
 	}
 	mutex_unlock(&dev->struct_mutex);
 
-	drm_idlelock_release(&file->master->lock);
+	drm_legacy_idlelock_release(&file->master->lock);
 
 	return;
 }
diff --git a/drivers/gpu/drm/via/via_verifier.c b/drivers/gpu/drm/via/via_verifier.c
index 9dbc92bd1512..0677bbf4ec7e 100644
--- a/drivers/gpu/drm/via/via_verifier.c
+++ b/drivers/gpu/drm/via/via_verifier.c
@@ -31,6 +31,7 @@
 #include "via_3d_reg.h"
 #include <drm/drmP.h>
 #include <drm/via_drm.h>
+#include <drm/drm_legacy.h>
 #include "via_verifier.h"
 #include "via_drv.h"
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 6327cfc36805..cff2bf9db9d2 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -30,66 +30,101 @@
 #include <drm/ttm/ttm_placement.h>
 #include <drm/ttm/ttm_page_alloc.h>
 
-static uint32_t vram_placement_flags = TTM_PL_FLAG_VRAM |
-	TTM_PL_FLAG_CACHED;
-
-static uint32_t vram_ne_placement_flags = TTM_PL_FLAG_VRAM |
-	TTM_PL_FLAG_CACHED |
-	TTM_PL_FLAG_NO_EVICT;
+static struct ttm_place vram_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
+};
 
-static uint32_t sys_placement_flags = TTM_PL_FLAG_SYSTEM |
-	TTM_PL_FLAG_CACHED;
+static struct ttm_place vram_ne_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
+};
 
-static uint32_t sys_ne_placement_flags = TTM_PL_FLAG_SYSTEM |
-	TTM_PL_FLAG_CACHED |
-	TTM_PL_FLAG_NO_EVICT;
+static struct ttm_place sys_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED
+};
 
-static uint32_t gmr_placement_flags = VMW_PL_FLAG_GMR |
-	TTM_PL_FLAG_CACHED;
+static struct ttm_place sys_ne_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
+};
 
-static uint32_t gmr_ne_placement_flags = VMW_PL_FLAG_GMR |
-	TTM_PL_FLAG_CACHED |
-	TTM_PL_FLAG_NO_EVICT;
+static struct ttm_place gmr_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+};
 
-static uint32_t mob_placement_flags = VMW_PL_FLAG_MOB |
-	TTM_PL_FLAG_CACHED;
+static struct ttm_place gmr_ne_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
+};
 
-struct ttm_placement vmw_vram_placement = {
+static struct ttm_place mob_placement_flags = {
 	.fpfn = 0,
 	.lpfn = 0,
+	.flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED
+};
+
+struct ttm_placement vmw_vram_placement = {
 	.num_placement = 1,
 	.placement = &vram_placement_flags,
 	.num_busy_placement = 1,
 	.busy_placement = &vram_placement_flags
 };
 
-static uint32_t vram_gmr_placement_flags[] = {
-	TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED,
-	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+static struct ttm_place vram_gmr_placement_flags[] = {
+	{
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+	}
 };
 
-static uint32_t gmr_vram_placement_flags[] = {
-	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED,
-	TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
+static struct ttm_place gmr_vram_placement_flags[] = {
+	{
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
+	}
 };
 
 struct ttm_placement vmw_vram_gmr_placement = {
-	.fpfn = 0,
-	.lpfn = 0,
 	.num_placement = 2,
 	.placement = vram_gmr_placement_flags,
 	.num_busy_placement = 1,
 	.busy_placement = &gmr_placement_flags
 };
 
-static uint32_t vram_gmr_ne_placement_flags[] = {
-	TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT,
-	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
+static struct ttm_place vram_gmr_ne_placement_flags[] = {
+	{
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED |
+			 TTM_PL_FLAG_NO_EVICT
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED |
+			 TTM_PL_FLAG_NO_EVICT
+	}
 };
 
 struct ttm_placement vmw_vram_gmr_ne_placement = {
-	.fpfn = 0,
-	.lpfn = 0,
 	.num_placement = 2,
 	.placement = vram_gmr_ne_placement_flags,
 	.num_busy_placement = 1,
@@ -97,8 +132,6 @@ struct ttm_placement vmw_vram_gmr_ne_placement = {
 };
 
 struct ttm_placement vmw_vram_sys_placement = {
-	.fpfn = 0,
-	.lpfn = 0,
 	.num_placement = 1,
 	.placement = &vram_placement_flags,
 	.num_busy_placement = 1,
@@ -106,8 +139,6 @@ struct ttm_placement vmw_vram_sys_placement = {
 };
 
 struct ttm_placement vmw_vram_ne_placement = {
-	.fpfn = 0,
-	.lpfn = 0,
 	.num_placement = 1,
 	.placement = &vram_ne_placement_flags,
 	.num_busy_placement = 1,
@@ -115,8 +146,6 @@ struct ttm_placement vmw_vram_ne_placement = {
 };
 
 struct ttm_placement vmw_sys_placement = {
-	.fpfn = 0,
-	.lpfn = 0,
 	.num_placement = 1,
 	.placement = &sys_placement_flags,
 	.num_busy_placement = 1,
@@ -124,24 +153,33 @@ struct ttm_placement vmw_sys_placement = {
 };
 
 struct ttm_placement vmw_sys_ne_placement = {
-	.fpfn = 0,
-	.lpfn = 0,
 	.num_placement = 1,
 	.placement = &sys_ne_placement_flags,
 	.num_busy_placement = 1,
 	.busy_placement = &sys_ne_placement_flags
 };
 
-static uint32_t evictable_placement_flags[] = {
-	TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED,
-	TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED,
-	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED,
-	VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED
+static struct ttm_place evictable_placement_flags[] = {
+	{
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+	}, {
+		.fpfn = 0,
+		.lpfn = 0,
+		.flags = VMW_PL_FLAG_MOB | TTM_PL_FLAG_CACHED
+	}
 };
 
 struct ttm_placement vmw_evictable_placement = {
-	.fpfn = 0,
-	.lpfn = 0,
 	.num_placement = 4,
 	.placement = evictable_placement_flags,
 	.num_busy_placement = 1,
@@ -149,8 +187,6 @@ struct ttm_placement vmw_evictable_placement = {
 };
 
 struct ttm_placement vmw_srf_placement = {
-	.fpfn = 0,
-	.lpfn = 0,
 	.num_placement = 1,
 	.num_busy_placement = 2,
 	.placement = &gmr_placement_flags,
@@ -158,8 +194,6 @@ struct ttm_placement vmw_srf_placement = {
 };
 
 struct ttm_placement vmw_mob_placement = {
-	.fpfn = 0,
-	.lpfn = 0,
 	.num_placement = 1,
 	.num_busy_placement = 1,
 	.placement = &mob_placement_flags,
@@ -768,44 +802,6 @@ static int vmw_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 }
 
 /**
- * FIXME: We're using the old vmware polling method to sync.
- * Do this with fences instead.
- */
-
-static void *vmw_sync_obj_ref(void *sync_obj)
-{
-
-	return (void *)
-		vmw_fence_obj_reference((struct vmw_fence_obj *) sync_obj);
-}
-
-static void vmw_sync_obj_unref(void **sync_obj)
-{
-	vmw_fence_obj_unreference((struct vmw_fence_obj **) sync_obj);
-}
-
-static int vmw_sync_obj_flush(void *sync_obj)
-{
-	vmw_fence_obj_flush((struct vmw_fence_obj *) sync_obj);
-	return 0;
-}
-
-static bool vmw_sync_obj_signaled(void *sync_obj)
-{
-	return	vmw_fence_obj_signaled((struct vmw_fence_obj *) sync_obj,
-				       DRM_VMW_FENCE_FLAG_EXEC);
-
-}
-
-static int vmw_sync_obj_wait(void *sync_obj, bool lazy, bool interruptible)
-{
-	return vmw_fence_obj_wait((struct vmw_fence_obj *) sync_obj,
-				  DRM_VMW_FENCE_FLAG_EXEC,
-				  lazy, interruptible,
-				  VMW_FENCE_WAIT_TIMEOUT);
-}
-
-/**
  * vmw_move_notify - TTM move_notify_callback
  *
  * @bo:             The TTM buffer object about to move.
@@ -829,11 +825,7 @@ static void vmw_move_notify(struct ttm_buffer_object *bo,
  */
 static void vmw_swap_notify(struct ttm_buffer_object *bo)
 {
-	struct ttm_bo_device *bdev = bo->bdev;
-
-	spin_lock(&bdev->fence_lock);
 	ttm_bo_wait(bo, false, false, false);
-	spin_unlock(&bdev->fence_lock);
 }
 
 
@@ -846,11 +838,6 @@ struct ttm_bo_driver vmw_bo_driver = {
 	.evict_flags = vmw_evict_flags,
 	.move = NULL,
 	.verify_access = vmw_verify_access,
-	.sync_obj_signaled = vmw_sync_obj_signaled,
-	.sync_obj_wait = vmw_sync_obj_wait,
-	.sync_obj_flush = vmw_sync_obj_flush,
-	.sync_obj_unref = vmw_sync_obj_unref,
-	.sync_obj_ref = vmw_sync_obj_ref,
 	.move_notify = vmw_move_notify,
 	.swap_notify = vmw_swap_notify,
 	.fault_reserve_notify = &vmw_ttm_fault_reserve_notify,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
index ed1d51006ab1..914b375763dc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
@@ -198,13 +198,19 @@ int vmw_dmabuf_to_start_of_vram(struct vmw_private *dev_priv,
 {
 	struct ttm_buffer_object *bo = &buf->base;
 	struct ttm_placement placement;
+	struct ttm_place place;
 	int ret = 0;
 
 	if (pin)
-		placement = vmw_vram_ne_placement;
+		place = vmw_vram_ne_placement.placement[0];
 	else
-		placement = vmw_vram_placement;
-	placement.lpfn = bo->num_pages;
+		place = vmw_vram_placement.placement[0];
+	place.lpfn = bo->num_pages;
+
+	placement.num_placement = 1;
+	placement.placement = &place;
+	placement.num_busy_placement = 1;
+	placement.busy_placement = &place;
 
 	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
 	if (unlikely(ret != 0))
@@ -293,21 +299,23 @@ void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *bo,
  */
 void vmw_bo_pin(struct ttm_buffer_object *bo, bool pin)
 {
-	uint32_t pl_flags;
+	struct ttm_place pl;
 	struct ttm_placement placement;
 	uint32_t old_mem_type = bo->mem.mem_type;
 	int ret;
 
 	lockdep_assert_held(&bo->resv->lock.base);
 
-	pl_flags = TTM_PL_FLAG_VRAM | VMW_PL_FLAG_GMR | VMW_PL_FLAG_MOB
+	pl.fpfn = 0;
+	pl.lpfn = 0;
+	pl.flags = TTM_PL_FLAG_VRAM | VMW_PL_FLAG_GMR | VMW_PL_FLAG_MOB
 		| TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED;
 	if (pin)
-		pl_flags |= TTM_PL_FLAG_NO_EVICT;
+		pl.flags |= TTM_PL_FLAG_NO_EVICT;
 
 	memset(&placement, 0, sizeof(placement));
 	placement.num_placement = 1;
-	placement.placement = &pl_flags;
+	placement.placement = &pl;
 
 	ret = ttm_bo_validate(bo, &placement, false, true);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 18b54acacfbb..7197af157313 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -1418,6 +1418,7 @@ static struct drm_driver driver = {
 	.open = vmw_driver_open,
 	.preclose = vmw_preclose,
 	.postclose = vmw_postclose,
+	.set_busid = drm_pci_set_busid,
 
 	.dumb_create = vmw_dumb_create,
 	.dumb_map_offset = vmw_dumb_map_offset,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 99f731757c4b..4ee799b43d5d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -342,7 +342,6 @@ struct vmw_sw_context{
 	uint32_t *cmd_bounce;
 	uint32_t cmd_bounce_size;
 	struct list_head resource_list;
-	uint32_t fence_flags;
 	struct ttm_buffer_object *cur_query_bo;
 	struct list_head res_relocations;
 	uint32_t *buf_start;
@@ -704,6 +703,7 @@ extern void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes);
 extern void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes);
 extern int vmw_fifo_send_fence(struct vmw_private *dev_priv,
 			       uint32_t *seqno);
+extern void vmw_fifo_ping_host_locked(struct vmw_private *, uint32_t reason);
 extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason);
 extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv);
 extern bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 36b871686d3c..596cd6dafd33 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -346,13 +346,11 @@ static int vmw_bo_to_validate_list(struct vmw_sw_context *sw_context,
 		++sw_context->cur_val_buf;
 		val_buf = &vval_buf->base;
 		val_buf->bo = ttm_bo_reference(bo);
-		val_buf->reserved = false;
+		val_buf->shared = false;
 		list_add_tail(&val_buf->head, &sw_context->validate_nodes);
 		vval_buf->validate_as_mob = validate_as_mob;
 	}
 
-	sw_context->fence_flags |= DRM_VMW_FENCE_FLAG_EXEC;
-
 	if (p_val_node)
 		*p_val_node = val_node;
 
@@ -2337,13 +2335,9 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv,
 
 	if (p_handle != NULL)
 		ret = vmw_user_fence_create(file_priv, dev_priv->fman,
-					    sequence,
-					    DRM_VMW_FENCE_FLAG_EXEC,
-					    p_fence, p_handle);
+					    sequence, p_fence, p_handle);
 	else
-		ret = vmw_fence_create(dev_priv->fman, sequence,
-				       DRM_VMW_FENCE_FLAG_EXEC,
-				       p_fence);
+		ret = vmw_fence_create(dev_priv->fman, sequence, p_fence);
 
 	if (unlikely(ret != 0 && !synced)) {
 		(void) vmw_fallback_wait(dev_priv, false, false,
@@ -2395,7 +2389,7 @@ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
 		BUG_ON(fence == NULL);
 
 		fence_rep.handle = fence_handle;
-		fence_rep.seqno = fence->seqno;
+		fence_rep.seqno = fence->base.seqno;
 		vmw_update_seqno(dev_priv, &dev_priv->fifo);
 		fence_rep.passed_seqno = dev_priv->last_read_seqno;
 	}
@@ -2416,8 +2410,7 @@ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
 		ttm_ref_object_base_unref(vmw_fp->tfile,
 					  fence_handle, TTM_REF_USAGE);
 		DRM_ERROR("Fence copy error. Syncing.\n");
-		(void) vmw_fence_obj_wait(fence, fence->signal_mask,
-					  false, false,
+		(void) vmw_fence_obj_wait(fence, false, false,
 					  VMW_FENCE_WAIT_TIMEOUT);
 	}
 }
@@ -2469,7 +2462,6 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 	sw_context->fp = vmw_fpriv(file_priv);
 	sw_context->cur_reloc = 0;
 	sw_context->cur_val_buf = 0;
-	sw_context->fence_flags = 0;
 	INIT_LIST_HEAD(&sw_context->resource_list);
 	sw_context->cur_query_bo = dev_priv->pinned_bo;
 	sw_context->last_query_ctx = NULL;
@@ -2495,7 +2487,7 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 	if (unlikely(ret != 0))
 		goto out_err_nores;
 
-	ret = ttm_eu_reserve_buffers(&ticket, &sw_context->validate_nodes);
+	ret = ttm_eu_reserve_buffers(&ticket, &sw_context->validate_nodes, true);
 	if (unlikely(ret != 0))
 		goto out_err;
 
@@ -2678,15 +2670,14 @@ void __vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
 	INIT_LIST_HEAD(&validate_list);
 
 	pinned_val.bo = ttm_bo_reference(dev_priv->pinned_bo);
+	pinned_val.shared = false;
 	list_add_tail(&pinned_val.head, &validate_list);
 
 	query_val.bo = ttm_bo_reference(dev_priv->dummy_query_bo);
+	query_val.shared = false;
 	list_add_tail(&query_val.head, &validate_list);
 
-	do {
-		ret = ttm_eu_reserve_buffers(&ticket, &validate_list);
-	} while (ret == -ERESTARTSYS);
-
+	ret = ttm_eu_reserve_buffers(&ticket, &validate_list, false);
 	if (unlikely(ret != 0)) {
 		vmw_execbuf_unpin_panic(dev_priv);
 		goto out_no_reserve;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index b031b48dbb3c..0a474f391fad 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -374,10 +374,16 @@ static int vmw_fb_create_bo(struct vmw_private *vmw_priv,
 			    size_t size, struct vmw_dma_buffer **out)
 {
 	struct vmw_dma_buffer *vmw_bo;
-	struct ttm_placement ne_placement = vmw_vram_ne_placement;
+	struct ttm_place ne_place = vmw_vram_ne_placement.placement[0];
+	struct ttm_placement ne_placement;
 	int ret;
 
-	ne_placement.lpfn = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	ne_placement.num_placement = 1;
+	ne_placement.placement = &ne_place;
+	ne_placement.num_busy_placement = 1;
+	ne_placement.busy_placement = &ne_place;
+
+	ne_place.lpfn = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
 
 	(void) ttm_write_lock(&vmw_priv->reservation_sem, false);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 436b013b4231..197164fd7803 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -35,7 +35,7 @@ struct vmw_fence_manager {
 	struct vmw_private *dev_priv;
 	spinlock_t lock;
 	struct list_head fence_list;
-	struct work_struct work;
+	struct work_struct work, ping_work;
 	u32 user_fence_size;
 	u32 fence_size;
 	u32 event_fence_action_size;
@@ -46,6 +46,7 @@ struct vmw_fence_manager {
 	bool goal_irq_on; /* Protected by @goal_irq_mutex */
 	bool seqno_valid; /* Protected by @lock, and may not be set to true
 			     without the @goal_irq_mutex held. */
+	unsigned ctx;
 };
 
 struct vmw_user_fence {
@@ -80,6 +81,12 @@ struct vmw_event_fence_action {
 	uint32_t *tv_usec;
 };
 
+static struct vmw_fence_manager *
+fman_from_fence(struct vmw_fence_obj *fence)
+{
+	return container_of(fence->base.lock, struct vmw_fence_manager, lock);
+}
+
 /**
  * Note on fencing subsystem usage of irqs:
  * Typically the vmw_fences_update function is called
@@ -102,25 +109,143 @@ struct vmw_event_fence_action {
  * objects with actions attached to them.
  */
 
-static void vmw_fence_obj_destroy_locked(struct kref *kref)
+static void vmw_fence_obj_destroy(struct fence *f)
 {
 	struct vmw_fence_obj *fence =
-		container_of(kref, struct vmw_fence_obj, kref);
+		container_of(f, struct vmw_fence_obj, base);
 
-	struct vmw_fence_manager *fman = fence->fman;
-	unsigned int num_fences;
+	struct vmw_fence_manager *fman = fman_from_fence(fence);
+	unsigned long irq_flags;
 
+	spin_lock_irqsave(&fman->lock, irq_flags);
 	list_del_init(&fence->head);
-	num_fences = --fman->num_fence_objects;
-	spin_unlock_irq(&fman->lock);
-	if (fence->destroy)
-		fence->destroy(fence);
-	else
-		kfree(fence);
+	--fman->num_fence_objects;
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+	fence->destroy(fence);
+}
 
-	spin_lock_irq(&fman->lock);
+static const char *vmw_fence_get_driver_name(struct fence *f)
+{
+	return "vmwgfx";
+}
+
+static const char *vmw_fence_get_timeline_name(struct fence *f)
+{
+	return "svga";
+}
+
+static void vmw_fence_ping_func(struct work_struct *work)
+{
+	struct vmw_fence_manager *fman =
+		container_of(work, struct vmw_fence_manager, ping_work);
+
+	vmw_fifo_ping_host(fman->dev_priv, SVGA_SYNC_GENERIC);
+}
+
+static bool vmw_fence_enable_signaling(struct fence *f)
+{
+	struct vmw_fence_obj *fence =
+		container_of(f, struct vmw_fence_obj, base);
+
+	struct vmw_fence_manager *fman = fman_from_fence(fence);
+	struct vmw_private *dev_priv = fman->dev_priv;
+
+	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	u32 seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+	if (seqno - fence->base.seqno < VMW_FENCE_WRAP)
+		return false;
+
+	if (mutex_trylock(&dev_priv->hw_mutex)) {
+		vmw_fifo_ping_host_locked(dev_priv, SVGA_SYNC_GENERIC);
+		mutex_unlock(&dev_priv->hw_mutex);
+	} else
+		schedule_work(&fman->ping_work);
+
+	return true;
+}
+
+struct vmwgfx_wait_cb {
+	struct fence_cb base;
+	struct task_struct *task;
+};
+
+static void
+vmwgfx_wait_cb(struct fence *fence, struct fence_cb *cb)
+{
+	struct vmwgfx_wait_cb *wait =
+		container_of(cb, struct vmwgfx_wait_cb, base);
+
+	wake_up_process(wait->task);
+}
+
+static void __vmw_fences_update(struct vmw_fence_manager *fman);
+
+static long vmw_fence_wait(struct fence *f, bool intr, signed long timeout)
+{
+	struct vmw_fence_obj *fence =
+		container_of(f, struct vmw_fence_obj, base);
+
+	struct vmw_fence_manager *fman = fman_from_fence(fence);
+	struct vmw_private *dev_priv = fman->dev_priv;
+	struct vmwgfx_wait_cb cb;
+	long ret = timeout;
+	unsigned long irq_flags;
+
+	if (likely(vmw_fence_obj_signaled(fence)))
+		return timeout;
+
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+	vmw_seqno_waiter_add(dev_priv);
+
+	spin_lock_irqsave(f->lock, irq_flags);
+
+	if (intr && signal_pending(current)) {
+		ret = -ERESTARTSYS;
+		goto out;
+	}
+
+	cb.base.func = vmwgfx_wait_cb;
+	cb.task = current;
+	list_add(&cb.base.node, &f->cb_list);
+
+	while (ret > 0) {
+		__vmw_fences_update(fman);
+		if (test_bit(FENCE_FLAG_SIGNALED_BIT, &f->flags))
+			break;
+
+		if (intr)
+			__set_current_state(TASK_INTERRUPTIBLE);
+		else
+			__set_current_state(TASK_UNINTERRUPTIBLE);
+		spin_unlock_irqrestore(f->lock, irq_flags);
+
+		ret = schedule_timeout(ret);
+
+		spin_lock_irqsave(f->lock, irq_flags);
+		if (ret > 0 && intr && signal_pending(current))
+			ret = -ERESTARTSYS;
+	}
+
+	if (!list_empty(&cb.base.node))
+		list_del(&cb.base.node);
+	__set_current_state(TASK_RUNNING);
+
+out:
+	spin_unlock_irqrestore(f->lock, irq_flags);
+
+	vmw_seqno_waiter_remove(dev_priv);
+
+	return ret;
 }
 
+static struct fence_ops vmw_fence_ops = {
+	.get_driver_name = vmw_fence_get_driver_name,
+	.get_timeline_name = vmw_fence_get_timeline_name,
+	.enable_signaling = vmw_fence_enable_signaling,
+	.wait = vmw_fence_wait,
+	.release = vmw_fence_obj_destroy,
+};
+
 
 /**
  * Execute signal actions on fences recently signaled.
@@ -180,12 +305,14 @@ struct vmw_fence_manager *vmw_fence_manager_init(struct vmw_private *dev_priv)
 	INIT_LIST_HEAD(&fman->fence_list);
 	INIT_LIST_HEAD(&fman->cleanup_list);
 	INIT_WORK(&fman->work, &vmw_fence_work_func);
+	INIT_WORK(&fman->ping_work, &vmw_fence_ping_func);
 	fman->fifo_down = true;
 	fman->user_fence_size = ttm_round_pot(sizeof(struct vmw_user_fence));
 	fman->fence_size = ttm_round_pot(sizeof(struct vmw_fence_obj));
 	fman->event_fence_action_size =
 		ttm_round_pot(sizeof(struct vmw_event_fence_action));
 	mutex_init(&fman->goal_irq_mutex);
+	fman->ctx = fence_context_alloc(1);
 
 	return fman;
 }
@@ -196,6 +323,7 @@ void vmw_fence_manager_takedown(struct vmw_fence_manager *fman)
 	bool lists_empty;
 
 	(void) cancel_work_sync(&fman->work);
+	(void) cancel_work_sync(&fman->ping_work);
 
 	spin_lock_irqsave(&fman->lock, irq_flags);
 	lists_empty = list_empty(&fman->fence_list) &&
@@ -207,23 +335,16 @@ void vmw_fence_manager_takedown(struct vmw_fence_manager *fman)
 }
 
 static int vmw_fence_obj_init(struct vmw_fence_manager *fman,
-			      struct vmw_fence_obj *fence,
-			      u32 seqno,
-			      uint32_t mask,
+			      struct vmw_fence_obj *fence, u32 seqno,
 			      void (*destroy) (struct vmw_fence_obj *fence))
 {
 	unsigned long irq_flags;
-	unsigned int num_fences;
 	int ret = 0;
 
-	fence->seqno = seqno;
+	fence_init(&fence->base, &vmw_fence_ops, &fman->lock,
+		   fman->ctx, seqno);
 	INIT_LIST_HEAD(&fence->seq_passed_actions);
-	fence->fman = fman;
-	fence->signaled = 0;
-	fence->signal_mask = mask;
-	kref_init(&fence->kref);
 	fence->destroy = destroy;
-	init_waitqueue_head(&fence->queue);
 
 	spin_lock_irqsave(&fman->lock, irq_flags);
 	if (unlikely(fman->fifo_down)) {
@@ -231,7 +352,7 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman,
 		goto out_unlock;
 	}
 	list_add_tail(&fence->head, &fman->fence_list);
-	num_fences = ++fman->num_fence_objects;
+	++fman->num_fence_objects;
 
 out_unlock:
 	spin_unlock_irqrestore(&fman->lock, irq_flags);
@@ -239,38 +360,6 @@ out_unlock:
 
 }
 
-struct vmw_fence_obj *vmw_fence_obj_reference(struct vmw_fence_obj *fence)
-{
-	if (unlikely(fence == NULL))
-		return NULL;
-
-	kref_get(&fence->kref);
-	return fence;
-}
-
-/**
- * vmw_fence_obj_unreference
- *
- * Note that this function may not be entered with disabled irqs since
- * it may re-enable them in the destroy function.
- *
- */
-void vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p)
-{
-	struct vmw_fence_obj *fence = *fence_p;
-	struct vmw_fence_manager *fman;
-
-	if (unlikely(fence == NULL))
-		return;
-
-	fman = fence->fman;
-	*fence_p = NULL;
-	spin_lock_irq(&fman->lock);
-	BUG_ON(atomic_read(&fence->kref.refcount) == 0);
-	kref_put(&fence->kref, vmw_fence_obj_destroy_locked);
-	spin_unlock_irq(&fman->lock);
-}
-
 static void vmw_fences_perform_actions(struct vmw_fence_manager *fman,
 				struct list_head *list)
 {
@@ -326,7 +415,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
 	list_for_each_entry(fence, &fman->fence_list, head) {
 		if (!list_empty(&fence->seq_passed_actions)) {
 			fman->seqno_valid = true;
-			iowrite32(fence->seqno,
+			iowrite32(fence->base.seqno,
 				  fifo_mem + SVGA_FIFO_FENCE_GOAL);
 			break;
 		}
@@ -353,27 +442,27 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
  */
 static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence)
 {
+	struct vmw_fence_manager *fman = fman_from_fence(fence);
 	u32 goal_seqno;
 	__le32 __iomem *fifo_mem;
 
-	if (fence->signaled & DRM_VMW_FENCE_FLAG_EXEC)
+	if (fence_is_signaled_locked(&fence->base))
 		return false;
 
-	fifo_mem = fence->fman->dev_priv->mmio_virt;
+	fifo_mem = fman->dev_priv->mmio_virt;
 	goal_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE_GOAL);
-	if (likely(fence->fman->seqno_valid &&
-		   goal_seqno - fence->seqno < VMW_FENCE_WRAP))
+	if (likely(fman->seqno_valid &&
+		   goal_seqno - fence->base.seqno < VMW_FENCE_WRAP))
 		return false;
 
-	iowrite32(fence->seqno, fifo_mem + SVGA_FIFO_FENCE_GOAL);
-	fence->fman->seqno_valid = true;
+	iowrite32(fence->base.seqno, fifo_mem + SVGA_FIFO_FENCE_GOAL);
+	fman->seqno_valid = true;
 
 	return true;
 }
 
-void vmw_fences_update(struct vmw_fence_manager *fman)
+static void __vmw_fences_update(struct vmw_fence_manager *fman)
 {
-	unsigned long flags;
 	struct vmw_fence_obj *fence, *next_fence;
 	struct list_head action_list;
 	bool needs_rerun;
@@ -382,32 +471,25 @@ void vmw_fences_update(struct vmw_fence_manager *fman)
 
 	seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
 rerun:
-	spin_lock_irqsave(&fman->lock, flags);
 	list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) {
-		if (seqno - fence->seqno < VMW_FENCE_WRAP) {
+		if (seqno - fence->base.seqno < VMW_FENCE_WRAP) {
 			list_del_init(&fence->head);
-			fence->signaled |= DRM_VMW_FENCE_FLAG_EXEC;
+			fence_signal_locked(&fence->base);
 			INIT_LIST_HEAD(&action_list);
 			list_splice_init(&fence->seq_passed_actions,
 					 &action_list);
 			vmw_fences_perform_actions(fman, &action_list);
-			wake_up_all(&fence->queue);
 		} else
 			break;
 	}
 
-	needs_rerun = vmw_fence_goal_new_locked(fman, seqno);
-
-	if (!list_empty(&fman->cleanup_list))
-		(void) schedule_work(&fman->work);
-	spin_unlock_irqrestore(&fman->lock, flags);
-
 	/*
 	 * Rerun if the fence goal seqno was updated, and the
 	 * hardware might have raced with that update, so that
 	 * we missed a fence_goal irq.
 	 */
 
+	needs_rerun = vmw_fence_goal_new_locked(fman, seqno);
 	if (unlikely(needs_rerun)) {
 		new_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
 		if (new_seqno != seqno) {
@@ -415,79 +497,58 @@ rerun:
 			goto rerun;
 		}
 	}
+
+	if (!list_empty(&fman->cleanup_list))
+		(void) schedule_work(&fman->work);
 }
 
-bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
-			    uint32_t flags)
+void vmw_fences_update(struct vmw_fence_manager *fman)
 {
-	struct vmw_fence_manager *fman = fence->fman;
 	unsigned long irq_flags;
-	uint32_t signaled;
 
 	spin_lock_irqsave(&fman->lock, irq_flags);
-	signaled = fence->signaled;
+	__vmw_fences_update(fman);
 	spin_unlock_irqrestore(&fman->lock, irq_flags);
+}
 
-	flags &= fence->signal_mask;
-	if ((signaled & flags) == flags)
-		return 1;
+bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence)
+{
+	struct vmw_fence_manager *fman = fman_from_fence(fence);
 
-	if ((signaled & DRM_VMW_FENCE_FLAG_EXEC) == 0)
-		vmw_fences_update(fman);
+	if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags))
+		return 1;
 
-	spin_lock_irqsave(&fman->lock, irq_flags);
-	signaled = fence->signaled;
-	spin_unlock_irqrestore(&fman->lock, irq_flags);
+	vmw_fences_update(fman);
 
-	return ((signaled & flags) == flags);
+	return fence_is_signaled(&fence->base);
 }
 
-int vmw_fence_obj_wait(struct vmw_fence_obj *fence,
-		       uint32_t flags, bool lazy,
+int vmw_fence_obj_wait(struct vmw_fence_obj *fence, bool lazy,
 		       bool interruptible, unsigned long timeout)
 {
-	struct vmw_private *dev_priv = fence->fman->dev_priv;
-	long ret;
+	long ret = fence_wait_timeout(&fence->base, interruptible, timeout);
 
-	if (likely(vmw_fence_obj_signaled(fence, flags)))
+	if (likely(ret > 0))
 		return 0;
-
-	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
-	vmw_seqno_waiter_add(dev_priv);
-
-	if (interruptible)
-		ret = wait_event_interruptible_timeout
-			(fence->queue,
-			 vmw_fence_obj_signaled(fence, flags),
-			 timeout);
+	else if (ret == 0)
+		return -EBUSY;
 	else
-		ret = wait_event_timeout
-			(fence->queue,
-			 vmw_fence_obj_signaled(fence, flags),
-			 timeout);
-
-	vmw_seqno_waiter_remove(dev_priv);
-
-	if (unlikely(ret == 0))
-		ret = -EBUSY;
-	else if (likely(ret > 0))
-		ret = 0;
-
-	return ret;
+		return ret;
 }
 
 void vmw_fence_obj_flush(struct vmw_fence_obj *fence)
 {
-	struct vmw_private *dev_priv = fence->fman->dev_priv;
+	struct vmw_private *dev_priv = fman_from_fence(fence)->dev_priv;
 
 	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
 }
 
 static void vmw_fence_destroy(struct vmw_fence_obj *fence)
 {
-	struct vmw_fence_manager *fman = fence->fman;
+	struct vmw_fence_manager *fman = fman_from_fence(fence);
+
+	fence_free(&fence->base);
 
-	kfree(fence);
 	/*
 	 * Free kernel space accounting.
 	 */
@@ -497,7 +558,6 @@ static void vmw_fence_destroy(struct vmw_fence_obj *fence)
 
 int vmw_fence_create(struct vmw_fence_manager *fman,
 		     uint32_t seqno,
-		     uint32_t mask,
 		     struct vmw_fence_obj **p_fence)
 {
 	struct ttm_mem_global *mem_glob = vmw_mem_glob(fman->dev_priv);
@@ -515,7 +575,7 @@ int vmw_fence_create(struct vmw_fence_manager *fman,
 		goto out_no_object;
 	}
 
-	ret = vmw_fence_obj_init(fman, fence, seqno, mask,
+	ret = vmw_fence_obj_init(fman, fence, seqno,
 				 vmw_fence_destroy);
 	if (unlikely(ret != 0))
 		goto out_err_init;
@@ -535,7 +595,7 @@ static void vmw_user_fence_destroy(struct vmw_fence_obj *fence)
 {
 	struct vmw_user_fence *ufence =
 		container_of(fence, struct vmw_user_fence, fence);
-	struct vmw_fence_manager *fman = fence->fman;
+	struct vmw_fence_manager *fman = fman_from_fence(fence);
 
 	ttm_base_object_kfree(ufence, base);
 	/*
@@ -559,7 +619,6 @@ static void vmw_user_fence_base_release(struct ttm_base_object **p_base)
 int vmw_user_fence_create(struct drm_file *file_priv,
 			  struct vmw_fence_manager *fman,
 			  uint32_t seqno,
-			  uint32_t mask,
 			  struct vmw_fence_obj **p_fence,
 			  uint32_t *p_handle)
 {
@@ -586,7 +645,7 @@ int vmw_user_fence_create(struct drm_file *file_priv,
 	}
 
 	ret = vmw_fence_obj_init(fman, &ufence->fence, seqno,
-				 mask, vmw_user_fence_destroy);
+				 vmw_user_fence_destroy);
 	if (unlikely(ret != 0)) {
 		kfree(ufence);
 		goto out_no_object;
@@ -629,7 +688,6 @@ out_no_object:
 
 void vmw_fence_fifo_down(struct vmw_fence_manager *fman)
 {
-	unsigned long irq_flags;
 	struct list_head action_list;
 	int ret;
 
@@ -638,35 +696,32 @@ void vmw_fence_fifo_down(struct vmw_fence_manager *fman)
 	 * restart when we've released the fman->lock.
 	 */
 
-	spin_lock_irqsave(&fman->lock, irq_flags);
+	spin_lock_irq(&fman->lock);
 	fman->fifo_down = true;
 	while (!list_empty(&fman->fence_list)) {
 		struct vmw_fence_obj *fence =
 			list_entry(fman->fence_list.prev, struct vmw_fence_obj,
 				   head);
-		kref_get(&fence->kref);
+		fence_get(&fence->base);
 		spin_unlock_irq(&fman->lock);
 
-		ret = vmw_fence_obj_wait(fence, fence->signal_mask,
-					 false, false,
+		ret = vmw_fence_obj_wait(fence, false, false,
 					 VMW_FENCE_WAIT_TIMEOUT);
 
 		if (unlikely(ret != 0)) {
 			list_del_init(&fence->head);
-			fence->signaled |= DRM_VMW_FENCE_FLAG_EXEC;
+			fence_signal(&fence->base);
 			INIT_LIST_HEAD(&action_list);
 			list_splice_init(&fence->seq_passed_actions,
 					 &action_list);
 			vmw_fences_perform_actions(fman, &action_list);
-			wake_up_all(&fence->queue);
 		}
 
-		spin_lock_irq(&fman->lock);
-
 		BUG_ON(!list_empty(&fence->head));
-		kref_put(&fence->kref, vmw_fence_obj_destroy_locked);
+		fence_put(&fence->base);
+		spin_lock_irq(&fman->lock);
 	}
-	spin_unlock_irqrestore(&fman->lock, irq_flags);
+	spin_unlock_irq(&fman->lock);
 }
 
 void vmw_fence_fifo_up(struct vmw_fence_manager *fman)
@@ -716,14 +771,14 @@ int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data,
 
 	timeout = jiffies;
 	if (time_after_eq(timeout, (unsigned long)arg->kernel_cookie)) {
-		ret = ((vmw_fence_obj_signaled(fence, arg->flags)) ?
+		ret = ((vmw_fence_obj_signaled(fence)) ?
 		       0 : -EBUSY);
 		goto out;
 	}
 
 	timeout = (unsigned long)arg->kernel_cookie - timeout;
 
-	ret = vmw_fence_obj_wait(fence, arg->flags, arg->lazy, true, timeout);
+	ret = vmw_fence_obj_wait(fence, arg->lazy, true, timeout);
 
 out:
 	ttm_base_object_unref(&base);
@@ -758,12 +813,12 @@ int vmw_fence_obj_signaled_ioctl(struct drm_device *dev, void *data,
 	}
 
 	fence = &(container_of(base, struct vmw_user_fence, base)->fence);
-	fman = fence->fman;
+	fman = fman_from_fence(fence);
 
-	arg->signaled = vmw_fence_obj_signaled(fence, arg->flags);
-	spin_lock_irq(&fman->lock);
+	arg->signaled = vmw_fence_obj_signaled(fence);
 
-	arg->signaled_flags = fence->signaled;
+	arg->signaled_flags = arg->flags;
+	spin_lock_irq(&fman->lock);
 	arg->passed_seqno = dev_priv->last_read_seqno;
 	spin_unlock_irq(&fman->lock);
 
@@ -876,7 +931,7 @@ static void vmw_event_fence_action_cleanup(struct vmw_fence_action *action)
 {
 	struct vmw_event_fence_action *eaction =
 		container_of(action, struct vmw_event_fence_action, action);
-	struct vmw_fence_manager *fman = eaction->fence->fman;
+	struct vmw_fence_manager *fman = fman_from_fence(eaction->fence);
 	unsigned long irq_flags;
 
 	spin_lock_irqsave(&fman->lock, irq_flags);
@@ -900,7 +955,7 @@ static void vmw_event_fence_action_cleanup(struct vmw_fence_action *action)
 static void vmw_fence_obj_add_action(struct vmw_fence_obj *fence,
 			      struct vmw_fence_action *action)
 {
-	struct vmw_fence_manager *fman = fence->fman;
+	struct vmw_fence_manager *fman = fman_from_fence(fence);
 	unsigned long irq_flags;
 	bool run_update = false;
 
@@ -908,7 +963,7 @@ static void vmw_fence_obj_add_action(struct vmw_fence_obj *fence,
 	spin_lock_irqsave(&fman->lock, irq_flags);
 
 	fman->pending_actions[action->type]++;
-	if (fence->signaled & DRM_VMW_FENCE_FLAG_EXEC) {
+	if (fence_is_signaled_locked(&fence->base)) {
 		struct list_head action_list;
 
 		INIT_LIST_HEAD(&action_list);
@@ -960,7 +1015,7 @@ int vmw_event_fence_action_queue(struct drm_file *file_priv,
 				 bool interruptible)
 {
 	struct vmw_event_fence_action *eaction;
-	struct vmw_fence_manager *fman = fence->fman;
+	struct vmw_fence_manager *fman = fman_from_fence(fence);
 	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
 	unsigned long irq_flags;
 
@@ -1000,7 +1055,8 @@ static int vmw_event_fence_action_create(struct drm_file *file_priv,
 				  bool interruptible)
 {
 	struct vmw_event_fence_pending *event;
-	struct drm_device *dev = fence->fman->dev_priv->dev;
+	struct vmw_fence_manager *fman = fman_from_fence(fence);
+	struct drm_device *dev = fman->dev_priv->dev;
 	unsigned long irq_flags;
 	int ret;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
index faf2e7873860..26a4add39208 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
@@ -27,6 +27,8 @@
 
 #ifndef _VMWGFX_FENCE_H_
 
+#include <linux/fence.h>
+
 #define VMW_FENCE_WAIT_TIMEOUT (5*HZ)
 
 struct vmw_private;
@@ -50,16 +52,11 @@ struct vmw_fence_action {
 };
 
 struct vmw_fence_obj {
-	struct kref kref;
-	u32 seqno;
+	struct fence base;
 
-	struct vmw_fence_manager *fman;
 	struct list_head head;
-	uint32_t signaled;
-	uint32_t signal_mask;
 	struct list_head seq_passed_actions;
 	void (*destroy)(struct vmw_fence_obj *fence);
-	wait_queue_head_t queue;
 };
 
 extern struct vmw_fence_manager *
@@ -67,17 +64,29 @@ vmw_fence_manager_init(struct vmw_private *dev_priv);
 
 extern void vmw_fence_manager_takedown(struct vmw_fence_manager *fman);
 
-extern void vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p);
+static inline void
+vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p)
+{
+	struct vmw_fence_obj *fence = *fence_p;
+
+	*fence_p = NULL;
+	if (fence)
+		fence_put(&fence->base);
+}
 
-extern struct vmw_fence_obj *
-vmw_fence_obj_reference(struct vmw_fence_obj *fence);
+static inline struct vmw_fence_obj *
+vmw_fence_obj_reference(struct vmw_fence_obj *fence)
+{
+	if (fence)
+		fence_get(&fence->base);
+	return fence;
+}
 
 extern void vmw_fences_update(struct vmw_fence_manager *fman);
 
-extern bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
-				   uint32_t flags);
+extern bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence);
 
-extern int vmw_fence_obj_wait(struct vmw_fence_obj *fence, uint32_t flags,
+extern int vmw_fence_obj_wait(struct vmw_fence_obj *fence,
 			      bool lazy,
 			      bool interruptible, unsigned long timeout);
 
@@ -85,13 +94,11 @@ extern void vmw_fence_obj_flush(struct vmw_fence_obj *fence);
 
 extern int vmw_fence_create(struct vmw_fence_manager *fman,
 			    uint32_t seqno,
-			    uint32_t mask,
 			    struct vmw_fence_obj **p_fence);
 
 extern int vmw_user_fence_create(struct drm_file *file_priv,
 				 struct vmw_fence_manager *fman,
 				 uint32_t sequence,
-				 uint32_t mask,
 				 struct vmw_fence_obj **p_fence,
 				 uint32_t *p_handle);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 6eae14d2a3f7..09e10aefcd8e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -160,16 +160,21 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 	return vmw_fifo_send_fence(dev_priv, &dummy);
 }
 
-void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
+void vmw_fifo_ping_host_locked(struct vmw_private *dev_priv, uint32_t reason)
 {
 	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
 
-	mutex_lock(&dev_priv->hw_mutex);
-
 	if (unlikely(ioread32(fifo_mem + SVGA_FIFO_BUSY) == 0)) {
 		iowrite32(1, fifo_mem + SVGA_FIFO_BUSY);
 		vmw_write(dev_priv, SVGA_REG_SYNC, reason);
 	}
+}
+
+void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
+{
+	mutex_lock(&dev_priv->hw_mutex);
+
+	vmw_fifo_ping_host_locked(dev_priv, reason);
 
 	mutex_unlock(&dev_priv->hw_mutex);
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
index 26f8bdde3529..170b61be1e4e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
@@ -46,8 +46,7 @@ struct vmwgfx_gmrid_man {
 
 static int vmw_gmrid_man_get_node(struct ttm_mem_type_manager *man,
 				  struct ttm_buffer_object *bo,
-				  struct ttm_placement *placement,
-				  uint32_t flags,
+				  const struct ttm_place *place,
 				  struct ttm_mem_reg *mem)
 {
 	struct vmwgfx_gmrid_man *gman =
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index a432c0db257c..26584316cb78 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -133,6 +133,7 @@ static void vmw_resource_release(struct kref *kref)
 			struct ttm_validate_buffer val_buf;
 
 			val_buf.bo = bo;
+			val_buf.shared = false;
 			res->func->unbind(res, false, &val_buf);
 		}
 		res->backup_dirty = false;
@@ -567,13 +568,18 @@ static int vmw_user_dmabuf_synccpu_grab(struct vmw_user_dma_buffer *user_bo,
 	int ret;
 
 	if (flags & drm_vmw_synccpu_allow_cs) {
-		struct ttm_bo_device *bdev = bo->bdev;
+		bool nonblock = !!(flags & drm_vmw_synccpu_dontblock);
+		long lret;
 
-		spin_lock(&bdev->fence_lock);
-		ret = ttm_bo_wait(bo, false, true,
-				  !!(flags & drm_vmw_synccpu_dontblock));
-		spin_unlock(&bdev->fence_lock);
-		return ret;
+		if (nonblock)
+			return reservation_object_test_signaled_rcu(bo->resv, true) ? 0 : -EBUSY;
+
+		lret = reservation_object_wait_timeout_rcu(bo->resv, true, true, MAX_SCHEDULE_TIMEOUT);
+		if (!lret)
+			return -EBUSY;
+		else if (lret < 0)
+			return lret;
+		return 0;
 	}
 
 	ret = ttm_bo_synccpu_write_grab
@@ -1214,8 +1220,9 @@ vmw_resource_check_buffer(struct vmw_resource *res,
 
 	INIT_LIST_HEAD(&val_list);
 	val_buf->bo = ttm_bo_reference(&res->backup->base);
+	val_buf->shared = false;
 	list_add_tail(&val_buf->head, &val_list);
-	ret = ttm_eu_reserve_buffers(NULL, &val_list);
+	ret = ttm_eu_reserve_buffers(NULL, &val_list, interruptible);
 	if (unlikely(ret != 0))
 		goto out_no_reserve;
 
@@ -1307,6 +1314,7 @@ int vmw_resource_do_evict(struct vmw_resource *res, bool interruptible)
 	BUG_ON(!func->may_evict);
 
 	val_buf.bo = NULL;
+	val_buf.shared = false;
 	ret = vmw_resource_check_buffer(res, interruptible, &val_buf);
 	if (unlikely(ret != 0))
 		return ret;
@@ -1352,6 +1360,7 @@ int vmw_resource_validate(struct vmw_resource *res)
 		return 0;
 
 	val_buf.bo = NULL;
+	val_buf.shared = false;
 	if (res->backup)
 		val_buf.bo = &res->backup->base;
 	do {
@@ -1419,25 +1428,16 @@ void vmw_fence_single_bo(struct ttm_buffer_object *bo,
 			 struct vmw_fence_obj *fence)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
-	struct ttm_bo_driver *driver = bdev->driver;
-	struct vmw_fence_obj *old_fence_obj;
+
 	struct vmw_private *dev_priv =
 		container_of(bdev, struct vmw_private, bdev);
 
-	if (fence == NULL)
+	if (fence == NULL) {
 		vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL);
-	else
-		driver->sync_obj_ref(fence);
-
-	spin_lock(&bdev->fence_lock);
-
-	old_fence_obj = bo->sync_obj;
-	bo->sync_obj = fence;
-
-	spin_unlock(&bdev->fence_lock);
-
-	if (old_fence_obj)
-		vmw_fence_obj_unreference(&old_fence_obj);
+		reservation_object_add_excl_fence(bo->resv, &fence->base);
+		fence_put(&fence->base);
+	} else
+		reservation_object_add_excl_fence(bo->resv, &fence->base);
 }
 
 /**
@@ -1475,10 +1475,10 @@ void vmw_resource_move_notify(struct ttm_buffer_object *bo,
 
 	if (mem->mem_type != VMW_PL_MOB) {
 		struct vmw_resource *res, *n;
-		struct ttm_bo_device *bdev = bo->bdev;
 		struct ttm_validate_buffer val_buf;
 
 		val_buf.bo = bo;
+		val_buf.shared = false;
 
 		list_for_each_entry_safe(res, n, &dma_buf->res_list, mob_head) {
 
@@ -1491,9 +1491,7 @@ void vmw_resource_move_notify(struct ttm_buffer_object *bo,
 			list_del_init(&res->mob_head);
 		}
 
-		spin_lock(&bdev->fence_lock);
 		(void) ttm_bo_wait(bo, false, false, false);
-		spin_unlock(&bdev->fence_lock);
 	}
 }