187 files changed, 18194 insertions, 5484 deletions
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 2774ac1086d3..66cd0b8096ca 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -923,6 +923,9 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem,
 {
 	int ret = -EINVAL;
 
+	if (intel_private.base.do_idle_maps)
+		return -ENODEV;
+
 	if (intel_private.clear_fake_agp) {
 		int start = intel_private.base.stolen_size / PAGE_SIZE;
 		int end = intel_private.base.gtt_mappable_entries;
@@ -985,6 +988,9 @@ static int intel_fake_agp_remove_entries(struct agp_memory *mem,
 	if (mem->page_count == 0)
 		return 0;
 
+	if (intel_private.base.do_idle_maps)
+		return -ENODEV;
+
 	intel_gtt_clear_range(pg_start, mem->page_count);
 
 	if (intel_private.base.needs_dmar) {
@@ -1177,6 +1183,25 @@ static void gen6_cleanup(void)
 {
 }
 
+/* Certain Gen5 chipsets require require idling the GPU before
+ * unmapping anything from the GTT when VT-d is enabled.
+ */
+extern int intel_iommu_gfx_mapped;
+static inline int needs_idle_maps(void)
+{
+	const unsigned short gpu_devid = intel_private.pcidev->device;
+
+	/* Query intel_iommu to see if we need the workaround. Presumably that
+	 * was loaded first.
+	 */
+	if ((gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_HB ||
+	     gpu_devid == PCI_DEVICE_ID_INTEL_IRONLAKE_M_IG) &&
+	     intel_iommu_gfx_mapped)
+		return 1;
+
+	return 0;
+}
+
 static int i9xx_setup(void)
 {
 	u32 reg_addr;
@@ -1211,6 +1236,9 @@ static int i9xx_setup(void)
 		intel_private.gtt_bus_addr = reg_addr + gtt_offset;
 	}
 
+	if (needs_idle_maps());
+		intel_private.base.do_idle_maps = 1;
+
 	intel_i9xx_setup_flush();
 
 	return 0;
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index b493663c7ba7..785127cb281b 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -158,3 +158,7 @@ config DRM_SAVAGE
 	help
 	  Choose this option if you have a Savage3D/4/SuperSavage/Pro/Twister
 	  chipset. If M is selected the module will be called savage.
+
+source "drivers/gpu/drm/exynos/Kconfig"
+
+source "drivers/gpu/drm/vmwgfx/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 89cf05a72d1c..c0496f660707 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -35,4 +35,5 @@ obj-$(CONFIG_DRM_SAVAGE)+= savage/
 obj-$(CONFIG_DRM_VMWGFX)+= vmwgfx/
 obj-$(CONFIG_DRM_VIA)	+=via/
 obj-$(CONFIG_DRM_NOUVEAU) +=nouveau/
+obj-$(CONFIG_DRM_EXYNOS) +=exynos/
 obj-y			+= i2c/
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index f88a9b2c977b..f2366440b738 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -372,11 +372,13 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 		encoder_funcs = encoder->helper_private;
 		if (!(ret = encoder_funcs->mode_fixup(encoder, mode,
 						      adjusted_mode))) {
+			DRM_DEBUG_KMS("Encoder fixup failed\n");
 			goto done;
 		}
 	}
 
 	if (!(ret = crtc_funcs->mode_fixup(crtc, mode, adjusted_mode))) {
+		DRM_DEBUG_KMS("CRTC fixup failed\n");
 		goto done;
 	}
 	DRM_DEBUG_KMS("[CRTC:%d]\n", crtc->base.id);
diff --git a/drivers/gpu/drm/drm_debugfs.c b/drivers/gpu/drm/drm_debugfs.c
index 9d2668a50872..b9dc2629ea9a 100644
--- a/drivers/gpu/drm/drm_debugfs.c
+++ b/drivers/gpu/drm/drm_debugfs.c
@@ -107,11 +107,8 @@ int drm_debugfs_create_files(struct drm_info_list *files, int count,
 		ent = debugfs_create_file(files[i].name, S_IFREG | S_IRUGO,
 					  root, tmp, &drm_debugfs_fops);
 		if (!ent) {
-			char name[64];
-			strncpy(name, root->d_name.name,
-						min(root->d_name.len, 64U));
 			DRM_ERROR("Cannot create /sys/kernel/debug/dri/%s/%s\n",
-				  name, files[i].name);
+				  root->d_name.name, files[i].name);
 			kfree(tmp);
 			ret = -1;
 			goto fail;
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 93a112d45c1a..7a87e0878f30 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -438,6 +438,8 @@ long drm_ioctl(struct file *filp,
 					goto err_i1;
 				}
 			}
+			if (asize > usize)
+				memset(kdata + usize, 0, asize - usize);
 		}
 
 		if (cmd & IOC_IN) {
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 7425e5c9bd75..fe39c3570538 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1319,6 +1319,7 @@ add_detailed_modes(struct drm_connector *connector, struct edid *edid,
 #define HDMI_IDENTIFIER 0x000C03
 #define AUDIO_BLOCK	0x01
 #define VENDOR_BLOCK    0x03
+#define SPEAKER_BLOCK	0x04
 #define EDID_BASIC_AUDIO	(1 << 6)
 
 /**
@@ -1347,6 +1348,176 @@ u8 *drm_find_cea_extension(struct edid *edid)
 }
 EXPORT_SYMBOL(drm_find_cea_extension);
 
+static void
+parse_hdmi_vsdb(struct drm_connector *connector, uint8_t *db)
+{
+	connector->eld[5] |= (db[6] >> 7) << 1;  /* Supports_AI */
+
+	connector->dvi_dual = db[6] & 1;
+	connector->max_tmds_clock = db[7] * 5;
+
+	connector->latency_present[0] = db[8] >> 7;
+	connector->latency_present[1] = (db[8] >> 6) & 1;
+	connector->video_latency[0] = db[9];
+	connector->audio_latency[0] = db[10];
+	connector->video_latency[1] = db[11];
+	connector->audio_latency[1] = db[12];
+
+	DRM_LOG_KMS("HDMI: DVI dual %d, "
+		    "max TMDS clock %d, "
+		    "latency present %d %d, "
+		    "video latency %d %d, "
+		    "audio latency %d %d\n",
+		    connector->dvi_dual,
+		    connector->max_tmds_clock,
+	      (int) connector->latency_present[0],
+	      (int) connector->latency_present[1],
+		    connector->video_latency[0],
+		    connector->video_latency[1],
+		    connector->audio_latency[0],
+		    connector->audio_latency[1]);
+}
+
+static void
+monitor_name(struct detailed_timing *t, void *data)
+{
+	if (t->data.other_data.type == EDID_DETAIL_MONITOR_NAME)
+		*(u8 **)data = t->data.other_data.data.str.str;
+}
+
+/**
+ * drm_edid_to_eld - build ELD from EDID
+ * @connector: connector corresponding to the HDMI/DP sink
+ * @edid: EDID to parse
+ *
+ * Fill the ELD (EDID-Like Data) buffer for passing to the audio driver.
+ * Some ELD fields are left to the graphics driver caller:
+ * - Conn_Type
+ * - HDCP
+ * - Port_ID
+ */
+void drm_edid_to_eld(struct drm_connector *connector, struct edid *edid)
+{
+	uint8_t *eld = connector->eld;
+	u8 *cea;
+	u8 *name;
+	u8 *db;
+	int sad_count = 0;
+	int mnl;
+	int dbl;
+
+	memset(eld, 0, sizeof(connector->eld));
+
+	cea = drm_find_cea_extension(edid);
+	if (!cea) {
+		DRM_DEBUG_KMS("ELD: no CEA Extension found\n");
+		return;
+	}
+
+	name = NULL;
+	drm_for_each_detailed_block((u8 *)edid, monitor_name, &name);
+	for (mnl = 0; name && mnl < 13; mnl++) {
+		if (name[mnl] == 0x0a)
+			break;
+		eld[20 + mnl] = name[mnl];
+	}
+	eld[4] = (cea[1] << 5) | mnl;
+	DRM_DEBUG_KMS("ELD monitor %s\n", eld + 20);
+
+	eld[0] = 2 << 3;		/* ELD version: 2 */
+
+	eld[16] = edid->mfg_id[0];
+	eld[17] = edid->mfg_id[1];
+	eld[18] = edid->prod_code[0];
+	eld[19] = edid->prod_code[1];
+
+	for (db = cea + 4; db < cea + cea[2]; db += dbl + 1) {
+		dbl = db[0] & 0x1f;
+
+		switch ((db[0] & 0xe0) >> 5) {
+		case AUDIO_BLOCK:	/* Audio Data Block, contains SADs */
+			sad_count = dbl / 3;
+			memcpy(eld + 20 + mnl, &db[1], dbl);
+			break;
+		case SPEAKER_BLOCK:	/* Speaker Allocation Data Block */
+			eld[7] = db[1];
+			break;
+		case VENDOR_BLOCK:
+			/* HDMI Vendor-Specific Data Block */
+			if (db[1] == 0x03 && db[2] == 0x0c && db[3] == 0)
+				parse_hdmi_vsdb(connector, db);
+			break;
+		default:
+			break;
+		}
+	}
+	eld[5] |= sad_count << 4;
+	eld[2] = (20 + mnl + sad_count * 3 + 3) / 4;
+
+	DRM_DEBUG_KMS("ELD size %d, SAD count %d\n", (int)eld[2], sad_count);
+}
+EXPORT_SYMBOL(drm_edid_to_eld);
+
+/**
+ * drm_av_sync_delay - HDMI/DP sink audio-video sync delay in millisecond
+ * @connector: connector associated with the HDMI/DP sink
+ * @mode: the display mode
+ */
+int drm_av_sync_delay(struct drm_connector *connector,
+		      struct drm_display_mode *mode)
+{
+	int i = !!(mode->flags & DRM_MODE_FLAG_INTERLACE);
+	int a, v;
+
+	if (!connector->latency_present[0])
+		return 0;
+	if (!connector->latency_present[1])
+		i = 0;
+
+	a = connector->audio_latency[i];
+	v = connector->video_latency[i];
+
+	/*
+	 * HDMI/DP sink doesn't support audio or video?
+	 */
+	if (a == 255 || v == 255)
+		return 0;
+
+	/*
+	 * Convert raw EDID values to millisecond.
+	 * Treat unknown latency as 0ms.
+	 */
+	if (a)
+		a = min(2 * (a - 1), 500);
+	if (v)
+		v = min(2 * (v - 1), 500);
+
+	return max(v - a, 0);
+}
+EXPORT_SYMBOL(drm_av_sync_delay);
+
+/**
+ * drm_select_eld - select one ELD from multiple HDMI/DP sinks
+ * @encoder: the encoder just changed display mode
+ * @mode: the adjusted display mode
+ *
+ * It's possible for one encoder to be associated with multiple HDMI/DP sinks.
+ * The policy is now hard coded to simply use the first HDMI/DP sink's ELD.
+ */
+struct drm_connector *drm_select_eld(struct drm_encoder *encoder,
+				     struct drm_display_mode *mode)
+{
+	struct drm_connector *connector;
+	struct drm_device *dev = encoder->dev;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+		if (connector->encoder == encoder && connector->eld[0])
+			return connector;
+
+	return NULL;
+}
+EXPORT_SYMBOL(drm_select_eld);
+
 /**
  * drm_detect_hdmi_monitor - detect whether monitor is hdmi.
  * @edid: monitor EDID information
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 186d62eb063b..396e60ce8114 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -285,6 +285,94 @@ again:
 }
 EXPORT_SYMBOL(drm_gem_handle_create);
 
+
+/**
+ * drm_gem_free_mmap_offset - release a fake mmap offset for an object
+ * @obj: obj in question
+ *
+ * This routine frees fake offsets allocated by drm_gem_create_mmap_offset().
+ */
+void
+drm_gem_free_mmap_offset(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_map_list *list = &obj->map_list;
+
+	drm_ht_remove_item(&mm->offset_hash, &list->hash);
+	drm_mm_put_block(list->file_offset_node);
+	kfree(list->map);
+	list->map = NULL;
+}
+EXPORT_SYMBOL(drm_gem_free_mmap_offset);
+
+/**
+ * drm_gem_create_mmap_offset - create a fake mmap offset for an object
+ * @obj: obj in question
+ *
+ * GEM memory mapping works by handing back to userspace a fake mmap offset
+ * it can use in a subsequent mmap(2) call.  The DRM core code then looks
+ * up the object based on the offset and sets up the various memory mapping
+ * structures.
+ *
+ * This routine allocates and attaches a fake offset for @obj.
+ */
+int
+drm_gem_create_mmap_offset(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_map_list *list;
+	struct drm_local_map *map;
+	int ret = 0;
+
+	/* Set the object up for mmap'ing */
+	list = &obj->map_list;
+	list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
+	if (!list->map)
+		return -ENOMEM;
+
+	map = list->map;
+	map->type = _DRM_GEM;
+	map->size = obj->size;
+	map->handle = obj;
+
+	/* Get a DRM GEM mmap offset allocated... */
+	list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
+			obj->size / PAGE_SIZE, 0, 0);
+
+	if (!list->file_offset_node) {
+		DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
+		ret = -ENOSPC;
+		goto out_free_list;
+	}
+
+	list->file_offset_node = drm_mm_get_block(list->file_offset_node,
+			obj->size / PAGE_SIZE, 0);
+	if (!list->file_offset_node) {
+		ret = -ENOMEM;
+		goto out_free_list;
+	}
+
+	list->hash.key = list->file_offset_node->start;
+	ret = drm_ht_insert_item(&mm->offset_hash, &list->hash);
+	if (ret) {
+		DRM_ERROR("failed to add to map hash\n");
+		goto out_free_mm;
+	}
+
+	return 0;
+
+out_free_mm:
+	drm_mm_put_block(list->file_offset_node);
+out_free_list:
+	kfree(list->map);
+	list->map = NULL;
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_gem_create_mmap_offset);
+
 /** Returns a reference to the object named by the handle. */
 struct drm_gem_object *
 drm_gem_object_lookup(struct drm_device *dev, struct drm_file *filp,
diff --git a/drivers/gpu/drm/drm_proc.c b/drivers/gpu/drm/drm_proc.c
index 9e5b07efebb7..0f3c4e3cafc3 100644
--- a/drivers/gpu/drm/drm_proc.c
+++ b/drivers/gpu/drm/drm_proc.c
@@ -95,7 +95,6 @@ int drm_proc_create_files(struct drm_info_list *files, int count,
 	struct drm_device *dev = minor->dev;
 	struct proc_dir_entry *ent;
 	struct drm_info_node *tmp;
-	char name[64];
 	int i, ret;
 
 	for (i = 0; i < count; i++) {
@@ -118,7 +117,7 @@ int drm_proc_create_files(struct drm_info_list *files, int count,
 				       &drm_proc_fops, tmp);
 		if (!ent) {
 			DRM_ERROR("Cannot create /proc/dri/%s/%s\n",
-				  name, files[i].name);
+				  root->name, files[i].name);
 			list_del(&tmp->list);
 			kfree(tmp);
 			ret = -1;
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
new file mode 100644
index 000000000000..847466aab435
--- /dev/null
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -0,0 +1,20 @@
+config DRM_EXYNOS
+	tristate "DRM Support for Samsung SoC EXYNOS Series"
+	depends on DRM && PLAT_SAMSUNG
+	default	n
+	select DRM_KMS_HELPER
+	select FB_CFB_FILLRECT
+	select FB_CFB_COPYAREA
+	select FB_CFB_IMAGEBLIT
+	select VT_HW_CONSOLE_BINDING if FRAMEBUFFER_CONSOLE
+	help
+	  Choose this option if you have a Samsung SoC EXYNOS chipset.
+	  If M is selected the module will be called exynosdrm.
+
+config DRM_EXYNOS_FIMD
+	tristate "Exynos DRM FIMD"
+	depends on DRM_EXYNOS
+	default n
+	help
+	  Choose this option if you want to use Exynos FIMD for DRM.
+	  If M is selected, the module will be called exynos_drm_fimd
diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile
new file mode 100644
index 000000000000..0496d3ff2683
--- /dev/null
+++ b/drivers/gpu/drm/exynos/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for the drm device driver.  This driver provides support for the
+# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
+
+ccflags-y := -Iinclude/drm -Idrivers/gpu/drm/exynos
+exynosdrm-y := exynos_drm_drv.o exynos_drm_encoder.o exynos_drm_connector.o \
+		exynos_drm_crtc.o exynos_drm_fbdev.o exynos_drm_fb.o \
+		exynos_drm_buf.o exynos_drm_gem.o exynos_drm_core.o
+
+obj-$(CONFIG_DRM_EXYNOS) += exynosdrm.o
+obj-$(CONFIG_DRM_EXYNOS_FIMD) += exynos_drm_fimd.o
diff --git a/drivers/gpu/drm/exynos/exynos_drm_buf.c b/drivers/gpu/drm/exynos/exynos_drm_buf.c
new file mode 100644
index 000000000000..6f8afea94fc9
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_buf.c
@@ -0,0 +1,110 @@
+/* exynos_drm_buf.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Author: Inki Dae <inki.dae@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm.h"
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_buf.h"
+
+static DEFINE_MUTEX(exynos_drm_buf_lock);
+
+static int lowlevel_buffer_allocate(struct drm_device *dev,
+		struct exynos_drm_buf_entry *entry)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	entry->vaddr = dma_alloc_writecombine(dev->dev, entry->size,
+			(dma_addr_t *)&entry->paddr, GFP_KERNEL);
+	if (!entry->paddr) {
+		DRM_ERROR("failed to allocate buffer.\n");
+		return -ENOMEM;
+	}
+
+	DRM_DEBUG_KMS("allocated : vaddr(0x%x), paddr(0x%x), size(0x%x)\n",
+			(unsigned int)entry->vaddr, entry->paddr, entry->size);
+
+	return 0;
+}
+
+static void lowlevel_buffer_deallocate(struct drm_device *dev,
+		struct exynos_drm_buf_entry *entry)
+{
+	DRM_DEBUG_KMS("%s.\n", __FILE__);
+
+	if (entry->paddr && entry->vaddr && entry->size)
+		dma_free_writecombine(dev->dev, entry->size, entry->vaddr,
+				entry->paddr);
+	else
+		DRM_DEBUG_KMS("entry data is null.\n");
+}
+
+struct exynos_drm_buf_entry *exynos_drm_buf_create(struct drm_device *dev,
+		unsigned int size)
+{
+	struct exynos_drm_buf_entry *entry;
+
+	DRM_DEBUG_KMS("%s.\n", __FILE__);
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry) {
+		DRM_ERROR("failed to allocate exynos_drm_buf_entry.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	entry->size = size;
+
+	/*
+	 * allocate memory region with size and set the memory information
+	 * to vaddr and paddr of a entry object.
+	 */
+	if (lowlevel_buffer_allocate(dev, entry) < 0) {
+		kfree(entry);
+		entry = NULL;
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return entry;
+}
+
+void exynos_drm_buf_destroy(struct drm_device *dev,
+		struct exynos_drm_buf_entry *entry)
+{
+	DRM_DEBUG_KMS("%s.\n", __FILE__);
+
+	if (!entry) {
+		DRM_DEBUG_KMS("entry is null.\n");
+		return;
+	}
+
+	lowlevel_buffer_deallocate(dev, entry);
+
+	kfree(entry);
+	entry = NULL;
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM Buffer Management Module");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_buf.h b/drivers/gpu/drm/exynos/exynos_drm_buf.h
new file mode 100644
index 000000000000..045d59eab01a
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_buf.h
@@ -0,0 +1,53 @@
+/* exynos_drm_buf.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Author: Inki Dae <inki.dae@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_BUF_H_
+#define _EXYNOS_DRM_BUF_H_
+
+/*
+ * exynos drm buffer entry structure.
+ *
+ * @paddr: physical address of allocated memory.
+ * @vaddr: kernel virtual address of allocated memory.
+ * @size: size of allocated memory.
+ */
+struct exynos_drm_buf_entry {
+	dma_addr_t paddr;
+	void __iomem *vaddr;
+	unsigned int size;
+};
+
+/* allocate physical memory. */
+struct exynos_drm_buf_entry *exynos_drm_buf_create(struct drm_device *dev,
+		unsigned int size);
+
+/* get physical memory information of a drm framebuffer. */
+struct exynos_drm_buf_entry *exynos_drm_fb_get_buf(struct drm_framebuffer *fb);
+
+/* remove allocated physical memory. */
+void exynos_drm_buf_destroy(struct drm_device *dev,
+		struct exynos_drm_buf_entry *entry);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_connector.c b/drivers/gpu/drm/exynos/exynos_drm_connector.c
new file mode 100644
index 000000000000..985d9e768728
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_connector.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm_crtc_helper.h"
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_encoder.h"
+
+#define MAX_EDID 256
+#define to_exynos_connector(x)	container_of(x, struct exynos_drm_connector,\
+				drm_connector)
+
+struct exynos_drm_connector {
+	struct drm_connector	drm_connector;
+};
+
+/* convert exynos_video_timings to drm_display_mode */
+static inline void
+convert_to_display_mode(struct drm_display_mode *mode,
+			struct fb_videomode *timing)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mode->clock = timing->pixclock / 1000;
+
+	mode->hdisplay = timing->xres;
+	mode->hsync_start = mode->hdisplay + timing->left_margin;
+	mode->hsync_end = mode->hsync_start + timing->hsync_len;
+	mode->htotal = mode->hsync_end + timing->right_margin;
+
+	mode->vdisplay = timing->yres;
+	mode->vsync_start = mode->vdisplay + timing->upper_margin;
+	mode->vsync_end = mode->vsync_start + timing->vsync_len;
+	mode->vtotal = mode->vsync_end + timing->lower_margin;
+}
+
+/* convert drm_display_mode to exynos_video_timings */
+static inline void
+convert_to_video_timing(struct fb_videomode *timing,
+			struct drm_display_mode *mode)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	memset(timing, 0, sizeof(*timing));
+
+	timing->pixclock = mode->clock * 1000;
+	timing->refresh = mode->vrefresh;
+
+	timing->xres = mode->hdisplay;
+	timing->left_margin = mode->hsync_start - mode->hdisplay;
+	timing->hsync_len = mode->hsync_end - mode->hsync_start;
+	timing->right_margin = mode->htotal - mode->hsync_end;
+
+	timing->yres = mode->vdisplay;
+	timing->upper_margin = mode->vsync_start - mode->vdisplay;
+	timing->vsync_len = mode->vsync_end - mode->vsync_start;
+	timing->lower_margin = mode->vtotal - mode->vsync_end;
+
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		timing->vmode = FB_VMODE_INTERLACED;
+	else
+		timing->vmode = FB_VMODE_NONINTERLACED;
+
+	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+		timing->vmode |= FB_VMODE_DOUBLE;
+}
+
+static int exynos_drm_connector_get_modes(struct drm_connector *connector)
+{
+	struct exynos_drm_manager *manager =
+				exynos_drm_get_manager(connector->encoder);
+	struct exynos_drm_display *display = manager->display;
+	unsigned int count;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!display) {
+		DRM_DEBUG_KMS("display is null.\n");
+		return 0;
+	}
+
+	/*
+	 * if get_edid() exists then get_edid() callback of hdmi side
+	 * is called to get edid data through i2c interface else
+	 * get timing from the FIMD driver(display controller).
+	 *
+	 * P.S. in case of lcd panel, count is always 1 if success
+	 * because lcd panel has only one mode.
+	 */
+	if (display->get_edid) {
+		int ret;
+		void *edid;
+
+		edid = kzalloc(MAX_EDID, GFP_KERNEL);
+		if (!edid) {
+			DRM_ERROR("failed to allocate edid\n");
+			return 0;
+		}
+
+		ret = display->get_edid(manager->dev, connector,
+						edid, MAX_EDID);
+		if (ret < 0) {
+			DRM_ERROR("failed to get edid data.\n");
+			kfree(edid);
+			edid = NULL;
+			return 0;
+		}
+
+		drm_mode_connector_update_edid_property(connector, edid);
+		count = drm_add_edid_modes(connector, edid);
+
+		kfree(connector->display_info.raw_edid);
+		connector->display_info.raw_edid = edid;
+	} else {
+		struct drm_display_mode *mode = drm_mode_create(connector->dev);
+		struct fb_videomode *timing;
+
+		if (display->get_timing)
+			timing = display->get_timing(manager->dev);
+		else {
+			drm_mode_destroy(connector->dev, mode);
+			return 0;
+		}
+
+		convert_to_display_mode(mode, timing);
+
+		mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+		drm_mode_set_name(mode);
+		drm_mode_probed_add(connector, mode);
+
+		count = 1;
+	}
+
+	return count;
+}
+
+static int exynos_drm_connector_mode_valid(struct drm_connector *connector,
+					    struct drm_display_mode *mode)
+{
+	struct exynos_drm_manager *manager =
+				exynos_drm_get_manager(connector->encoder);
+	struct exynos_drm_display *display = manager->display;
+	struct fb_videomode timing;
+	int ret = MODE_BAD;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	convert_to_video_timing(&timing, mode);
+
+	if (display && display->check_timing)
+		if (!display->check_timing(manager->dev, (void *)&timing))
+			ret = MODE_OK;
+
+	return ret;
+}
+
+struct drm_encoder *exynos_drm_best_encoder(struct drm_connector *connector)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return connector->encoder;
+}
+
+static struct drm_connector_helper_funcs exynos_connector_helper_funcs = {
+	.get_modes	= exynos_drm_connector_get_modes,
+	.mode_valid	= exynos_drm_connector_mode_valid,
+	.best_encoder	= exynos_drm_best_encoder,
+};
+
+/* get detection status of display device. */
+static enum drm_connector_status
+exynos_drm_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct exynos_drm_manager *manager =
+				exynos_drm_get_manager(connector->encoder);
+	struct exynos_drm_display *display = manager->display;
+	enum drm_connector_status status = connector_status_disconnected;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (display && display->is_connected) {
+		if (display->is_connected(manager->dev))
+			status = connector_status_connected;
+		else
+			status = connector_status_disconnected;
+	}
+
+	return status;
+}
+
+static void exynos_drm_connector_destroy(struct drm_connector *connector)
+{
+	struct exynos_drm_connector *exynos_connector =
+		to_exynos_connector(connector);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	drm_sysfs_connector_remove(connector);
+	drm_connector_cleanup(connector);
+	kfree(exynos_connector);
+}
+
+static struct drm_connector_funcs exynos_connector_funcs = {
+	.dpms		= drm_helper_connector_dpms,
+	.fill_modes	= drm_helper_probe_single_connector_modes,
+	.detect		= exynos_drm_connector_detect,
+	.destroy	= exynos_drm_connector_destroy,
+};
+
+struct drm_connector *exynos_drm_connector_create(struct drm_device *dev,
+						   struct drm_encoder *encoder)
+{
+	struct exynos_drm_connector *exynos_connector;
+	struct exynos_drm_manager *manager = exynos_drm_get_manager(encoder);
+	struct drm_connector *connector;
+	int type;
+	int err;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_connector = kzalloc(sizeof(*exynos_connector), GFP_KERNEL);
+	if (!exynos_connector) {
+		DRM_ERROR("failed to allocate connector\n");
+		return NULL;
+	}
+
+	connector = &exynos_connector->drm_connector;
+
+	switch (manager->display->type) {
+	case EXYNOS_DISPLAY_TYPE_HDMI:
+		type = DRM_MODE_CONNECTOR_HDMIA;
+		break;
+	default:
+		type = DRM_MODE_CONNECTOR_Unknown;
+		break;
+	}
+
+	drm_connector_init(dev, connector, &exynos_connector_funcs, type);
+	drm_connector_helper_add(connector, &exynos_connector_helper_funcs);
+
+	err = drm_sysfs_connector_add(connector);
+	if (err)
+		goto err_connector;
+
+	connector->encoder = encoder;
+	err = drm_mode_connector_attach_encoder(connector, encoder);
+	if (err) {
+		DRM_ERROR("failed to attach a connector to a encoder\n");
+		goto err_sysfs;
+	}
+
+	DRM_DEBUG_KMS("connector has been created\n");
+
+	return connector;
+
+err_sysfs:
+	drm_sysfs_connector_remove(connector);
+err_connector:
+	drm_connector_cleanup(connector);
+	kfree(exynos_connector);
+	return NULL;
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM Connector Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_connector.h b/drivers/gpu/drm/exynos/exynos_drm_connector.h
new file mode 100644
index 000000000000..1c7b2b5b579c
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_connector.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_CONNECTOR_H_
+#define _EXYNOS_DRM_CONNECTOR_H_
+
+struct drm_connector *exynos_drm_connector_create(struct drm_device *dev,
+						   struct drm_encoder *encoder);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_core.c b/drivers/gpu/drm/exynos/exynos_drm_core.c
new file mode 100644
index 000000000000..661a03571d0c
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_core.c
@@ -0,0 +1,272 @@
+/* exynos_drm_core.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Author:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "exynos_drm_drv.h"
+#include "exynos_drm_encoder.h"
+#include "exynos_drm_connector.h"
+#include "exynos_drm_fbdev.h"
+
+static DEFINE_MUTEX(exynos_drm_mutex);
+static LIST_HEAD(exynos_drm_subdrv_list);
+static struct drm_device *drm_dev;
+
+static int exynos_drm_subdrv_probe(struct drm_device *dev,
+					struct exynos_drm_subdrv *subdrv)
+{
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (subdrv->probe) {
+		int ret;
+
+		/*
+		 * this probe callback would be called by sub driver
+		 * after setting of all resources to this sub driver,
+		 * such as clock, irq and register map are done or by load()
+		 * of exynos drm driver.
+		 *
+		 * P.S. note that this driver is considered for modularization.
+		 */
+		ret = subdrv->probe(dev, subdrv->manager.dev);
+		if (ret)
+			return ret;
+	}
+
+	/* create and initialize a encoder for this sub driver. */
+	encoder = exynos_drm_encoder_create(dev, &subdrv->manager,
+			(1 << MAX_CRTC) - 1);
+	if (!encoder) {
+		DRM_ERROR("failed to create encoder\n");
+		return -EFAULT;
+	}
+
+	/*
+	 * create and initialize a connector for this sub driver and
+	 * attach the encoder created above to the connector.
+	 */
+	connector = exynos_drm_connector_create(dev, encoder);
+	if (!connector) {
+		DRM_ERROR("failed to create connector\n");
+		encoder->funcs->destroy(encoder);
+		return -EFAULT;
+	}
+
+	subdrv->encoder = encoder;
+	subdrv->connector = connector;
+
+	return 0;
+}
+
+static void exynos_drm_subdrv_remove(struct drm_device *dev,
+				      struct exynos_drm_subdrv *subdrv)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (subdrv->remove)
+		subdrv->remove(dev);
+
+	if (subdrv->encoder) {
+		struct drm_encoder *encoder = subdrv->encoder;
+		encoder->funcs->destroy(encoder);
+		subdrv->encoder = NULL;
+	}
+
+	if (subdrv->connector) {
+		struct drm_connector *connector = subdrv->connector;
+		connector->funcs->destroy(connector);
+		subdrv->connector = NULL;
+	}
+}
+
+int exynos_drm_device_register(struct drm_device *dev)
+{
+	struct exynos_drm_subdrv *subdrv, *n;
+	int err;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (!dev)
+		return -EINVAL;
+
+	if (drm_dev) {
+		DRM_ERROR("Already drm device were registered\n");
+		return -EBUSY;
+	}
+
+	mutex_lock(&exynos_drm_mutex);
+	list_for_each_entry_safe(subdrv, n, &exynos_drm_subdrv_list, list) {
+		err = exynos_drm_subdrv_probe(dev, subdrv);
+		if (err) {
+			DRM_DEBUG("exynos drm subdrv probe failed.\n");
+			list_del(&subdrv->list);
+		}
+	}
+
+	drm_dev = dev;
+	mutex_unlock(&exynos_drm_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(exynos_drm_device_register);
+
+int exynos_drm_device_unregister(struct drm_device *dev)
+{
+	struct exynos_drm_subdrv *subdrv;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (!dev || dev != drm_dev) {
+		WARN(1, "Unexpected drm device unregister!\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&exynos_drm_mutex);
+	list_for_each_entry(subdrv, &exynos_drm_subdrv_list, list)
+		exynos_drm_subdrv_remove(dev, subdrv);
+
+	drm_dev = NULL;
+	mutex_unlock(&exynos_drm_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(exynos_drm_device_unregister);
+
+static int exynos_drm_mode_group_reinit(struct drm_device *dev)
+{
+	struct drm_mode_group *group = &dev->primary->mode_group;
+	uint32_t *id_list = group->id_list;
+	int ret;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	ret = drm_mode_group_init_legacy_group(dev, group);
+	if (ret < 0)
+		return ret;
+
+	kfree(id_list);
+	return 0;
+}
+
+int exynos_drm_subdrv_register(struct exynos_drm_subdrv *subdrv)
+{
+	int err;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (!subdrv)
+		return -EINVAL;
+
+	mutex_lock(&exynos_drm_mutex);
+	if (drm_dev) {
+		err = exynos_drm_subdrv_probe(drm_dev, subdrv);
+		if (err) {
+			DRM_ERROR("failed to probe exynos drm subdrv\n");
+			mutex_unlock(&exynos_drm_mutex);
+			return err;
+		}
+
+		/*
+		 * if any specific driver such as fimd or hdmi driver called
+		 * exynos_drm_subdrv_register() later than drm_load(),
+		 * the fb helper should be re-initialized and re-configured.
+		 */
+		err = exynos_drm_fbdev_reinit(drm_dev);
+		if (err) {
+			DRM_ERROR("failed to reinitialize exynos drm fbdev\n");
+			exynos_drm_subdrv_remove(drm_dev, subdrv);
+			mutex_unlock(&exynos_drm_mutex);
+			return err;
+		}
+
+		err = exynos_drm_mode_group_reinit(drm_dev);
+		if (err) {
+			DRM_ERROR("failed to reinitialize mode group\n");
+			exynos_drm_fbdev_fini(drm_dev);
+			exynos_drm_subdrv_remove(drm_dev, subdrv);
+			mutex_unlock(&exynos_drm_mutex);
+			return err;
+		}
+	}
+
+	subdrv->drm_dev = drm_dev;
+
+	list_add_tail(&subdrv->list, &exynos_drm_subdrv_list);
+	mutex_unlock(&exynos_drm_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(exynos_drm_subdrv_register);
+
+int exynos_drm_subdrv_unregister(struct exynos_drm_subdrv *subdrv)
+{
+	int ret = -EFAULT;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	if (!subdrv) {
+		DRM_DEBUG("Unexpected exynos drm subdrv unregister!\n");
+		return ret;
+	}
+
+	mutex_lock(&exynos_drm_mutex);
+	if (drm_dev) {
+		exynos_drm_subdrv_remove(drm_dev, subdrv);
+		list_del(&subdrv->list);
+
+		/*
+		 * fb helper should be updated once a sub driver is released
+		 * to re-configure crtc and connector and also to re-setup
+		 * drm framebuffer.
+		 */
+		ret = exynos_drm_fbdev_reinit(drm_dev);
+		if (ret < 0) {
+			DRM_ERROR("failed fb helper reinit.\n");
+			goto fail;
+		}
+
+		ret = exynos_drm_mode_group_reinit(drm_dev);
+		if (ret < 0) {
+			DRM_ERROR("failed drm mode group reinit.\n");
+			goto fail;
+		}
+	}
+
+fail:
+	mutex_unlock(&exynos_drm_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(exynos_drm_subdrv_unregister);
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM Core Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.c b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
new file mode 100644
index 000000000000..9337e5e2dbb6
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.c
@@ -0,0 +1,381 @@
+/* exynos_drm_crtc.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm_crtc_helper.h"
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_fb.h"
+#include "exynos_drm_encoder.h"
+#include "exynos_drm_buf.h"
+
+#define to_exynos_crtc(x)	container_of(x, struct exynos_drm_crtc,\
+				drm_crtc)
+
+/*
+ * Exynos specific crtc postion structure.
+ *
+ * @fb_x: offset x on a framebuffer to be displyed
+ *	- the unit is screen coordinates.
+ * @fb_y: offset y on a framebuffer to be displayed
+ *	- the unit is screen coordinates.
+ * @crtc_x: offset x on hardware screen.
+ * @crtc_y: offset y on hardware screen.
+ * @crtc_w: width of hardware screen.
+ * @crtc_h: height of hardware screen.
+ */
+struct exynos_drm_crtc_pos {
+	unsigned int fb_x;
+	unsigned int fb_y;
+	unsigned int crtc_x;
+	unsigned int crtc_y;
+	unsigned int crtc_w;
+	unsigned int crtc_h;
+};
+
+/*
+ * Exynos specific crtc structure.
+ *
+ * @drm_crtc: crtc object.
+ * @overlay: contain information common to display controller and hdmi and
+ *	contents of this overlay object would be copied to sub driver size.
+ * @pipe: a crtc index created at load() with a new crtc object creation
+ *	and the crtc object would be set to private->crtc array
+ *	to get a crtc object corresponding to this pipe from private->crtc
+ *	array when irq interrupt occured. the reason of using this pipe is that
+ *	drm framework doesn't support multiple irq yet.
+ *	we can refer to the crtc to current hardware interrupt occured through
+ *	this pipe value.
+ */
+struct exynos_drm_crtc {
+	struct drm_crtc			drm_crtc;
+	struct exynos_drm_overlay	overlay;
+	unsigned int			pipe;
+};
+
+static void exynos_drm_crtc_apply(struct drm_crtc *crtc)
+{
+	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
+	struct exynos_drm_overlay *overlay = &exynos_crtc->overlay;
+
+	exynos_drm_fn_encoder(crtc, overlay,
+			exynos_drm_encoder_crtc_mode_set);
+	exynos_drm_fn_encoder(crtc, NULL, exynos_drm_encoder_crtc_commit);
+}
+
+static int exynos_drm_overlay_update(struct exynos_drm_overlay *overlay,
+				       struct drm_framebuffer *fb,
+				       struct drm_display_mode *mode,
+				       struct exynos_drm_crtc_pos *pos)
+{
+	struct exynos_drm_buf_entry *entry;
+	unsigned int actual_w;
+	unsigned int actual_h;
+
+	entry = exynos_drm_fb_get_buf(fb);
+	if (!entry) {
+		DRM_LOG_KMS("entry is null.\n");
+		return -EFAULT;
+	}
+
+	overlay->paddr = entry->paddr;
+	overlay->vaddr = entry->vaddr;
+
+	DRM_DEBUG_KMS("vaddr = 0x%lx, paddr = 0x%lx\n",
+			(unsigned long)overlay->vaddr,
+			(unsigned long)overlay->paddr);
+
+	actual_w = min((mode->hdisplay - pos->crtc_x), pos->crtc_w);
+	actual_h = min((mode->vdisplay - pos->crtc_y), pos->crtc_h);
+
+	/* set drm framebuffer data. */
+	overlay->fb_x = pos->fb_x;
+	overlay->fb_y = pos->fb_y;
+	overlay->fb_width = fb->width;
+	overlay->fb_height = fb->height;
+	overlay->bpp = fb->bits_per_pixel;
+	overlay->pitch = fb->pitch;
+
+	/* set overlay range to be displayed. */
+	overlay->crtc_x = pos->crtc_x;
+	overlay->crtc_y = pos->crtc_y;
+	overlay->crtc_width = actual_w;
+	overlay->crtc_height = actual_h;
+
+	/* set drm mode data. */
+	overlay->mode_width = mode->hdisplay;
+	overlay->mode_height = mode->vdisplay;
+	overlay->refresh = mode->vrefresh;
+	overlay->scan_flag = mode->flags;
+
+	DRM_DEBUG_KMS("overlay : offset_x/y(%d,%d), width/height(%d,%d)",
+			overlay->crtc_x, overlay->crtc_y,
+			overlay->crtc_width, overlay->crtc_height);
+
+	return 0;
+}
+
+static int exynos_drm_crtc_update(struct drm_crtc *crtc)
+{
+	struct exynos_drm_crtc *exynos_crtc;
+	struct exynos_drm_overlay *overlay;
+	struct exynos_drm_crtc_pos pos;
+	struct drm_display_mode *mode = &crtc->mode;
+	struct drm_framebuffer *fb = crtc->fb;
+
+	if (!mode || !fb)
+		return -EINVAL;
+
+	exynos_crtc = to_exynos_crtc(crtc);
+	overlay = &exynos_crtc->overlay;
+
+	memset(&pos, 0, sizeof(struct exynos_drm_crtc_pos));
+
+	/* it means the offset of framebuffer to be displayed. */
+	pos.fb_x = crtc->x;
+	pos.fb_y = crtc->y;
+
+	/* OSD position to be displayed. */
+	pos.crtc_x = 0;
+	pos.crtc_y = 0;
+	pos.crtc_w = fb->width - crtc->x;
+	pos.crtc_h = fb->height - crtc->y;
+
+	return exynos_drm_overlay_update(overlay, crtc->fb, mode, &pos);
+}
+
+static void exynos_drm_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO */
+}
+
+static void exynos_drm_crtc_prepare(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* drm framework doesn't check NULL. */
+}
+
+static void exynos_drm_crtc_commit(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* drm framework doesn't check NULL. */
+}
+
+static bool
+exynos_drm_crtc_mode_fixup(struct drm_crtc *crtc,
+			    struct drm_display_mode *mode,
+			    struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* drm framework doesn't check NULL */
+	return true;
+}
+
+static int
+exynos_drm_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode,
+			  struct drm_display_mode *adjusted_mode, int x, int y,
+			  struct drm_framebuffer *old_fb)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mode = adjusted_mode;
+
+	return exynos_drm_crtc_update(crtc);
+}
+
+static int exynos_drm_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
+					  struct drm_framebuffer *old_fb)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	ret = exynos_drm_crtc_update(crtc);
+	if (ret)
+		return ret;
+
+	exynos_drm_crtc_apply(crtc);
+
+	return ret;
+}
+
+static void exynos_drm_crtc_load_lut(struct drm_crtc *crtc)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+	/* drm framework doesn't check NULL */
+}
+
+static struct drm_crtc_helper_funcs exynos_crtc_helper_funcs = {
+	.dpms		= exynos_drm_crtc_dpms,
+	.prepare	= exynos_drm_crtc_prepare,
+	.commit		= exynos_drm_crtc_commit,
+	.mode_fixup	= exynos_drm_crtc_mode_fixup,
+	.mode_set	= exynos_drm_crtc_mode_set,
+	.mode_set_base	= exynos_drm_crtc_mode_set_base,
+	.load_lut	= exynos_drm_crtc_load_lut,
+};
+
+static int exynos_drm_crtc_page_flip(struct drm_crtc *crtc,
+				      struct drm_framebuffer *fb,
+				      struct drm_pending_vblank_event *event)
+{
+	struct drm_device *dev = crtc->dev;
+	struct exynos_drm_private *dev_priv = dev->dev_private;
+	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
+	struct drm_framebuffer *old_fb = crtc->fb;
+	int ret = -EINVAL;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mutex_lock(&dev->struct_mutex);
+
+	if (event) {
+		/*
+		 * the pipe from user always is 0 so we can set pipe number
+		 * of current owner to event.
+		 */
+		event->pipe = exynos_crtc->pipe;
+
+		list_add_tail(&event->base.link,
+				&dev_priv->pageflip_event_list);
+
+		ret = drm_vblank_get(dev, exynos_crtc->pipe);
+		if (ret) {
+			DRM_DEBUG("failed to acquire vblank counter\n");
+			list_del(&event->base.link);
+
+			goto out;
+		}
+
+		crtc->fb = fb;
+		ret = exynos_drm_crtc_update(crtc);
+		if (ret) {
+			crtc->fb = old_fb;
+			drm_vblank_put(dev, exynos_crtc->pipe);
+			list_del(&event->base.link);
+
+			goto out;
+		}
+
+		/*
+		 * the values related to a buffer of the drm framebuffer
+		 * to be applied should be set at here. because these values
+		 * first, are set to shadow registers and then to
+		 * real registers at vsync front porch period.
+		 */
+		exynos_drm_crtc_apply(crtc);
+	}
+out:
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+}
+
+static void exynos_drm_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
+	struct exynos_drm_private *private = crtc->dev->dev_private;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	private->crtc[exynos_crtc->pipe] = NULL;
+
+	drm_crtc_cleanup(crtc);
+	kfree(exynos_crtc);
+}
+
+static struct drm_crtc_funcs exynos_crtc_funcs = {
+	.set_config	= drm_crtc_helper_set_config,
+	.page_flip	= exynos_drm_crtc_page_flip,
+	.destroy	= exynos_drm_crtc_destroy,
+};
+
+struct exynos_drm_overlay *get_exynos_drm_overlay(struct drm_device *dev,
+		struct drm_crtc *crtc)
+{
+	struct exynos_drm_crtc *exynos_crtc = to_exynos_crtc(crtc);
+
+	return &exynos_crtc->overlay;
+}
+
+int exynos_drm_crtc_create(struct drm_device *dev, unsigned int nr)
+{
+	struct exynos_drm_crtc *exynos_crtc;
+	struct exynos_drm_private *private = dev->dev_private;
+	struct drm_crtc *crtc;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_crtc = kzalloc(sizeof(*exynos_crtc), GFP_KERNEL);
+	if (!exynos_crtc) {
+		DRM_ERROR("failed to allocate exynos crtc\n");
+		return -ENOMEM;
+	}
+
+	exynos_crtc->pipe = nr;
+	crtc = &exynos_crtc->drm_crtc;
+
+	private->crtc[nr] = crtc;
+
+	drm_crtc_init(dev, crtc, &exynos_crtc_funcs);
+	drm_crtc_helper_add(crtc, &exynos_crtc_helper_funcs);
+
+	return 0;
+}
+
+int exynos_drm_crtc_enable_vblank(struct drm_device *dev, int crtc)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_drm_fn_encoder(private->crtc[crtc], &crtc,
+			exynos_drm_enable_vblank);
+
+	return 0;
+}
+
+void exynos_drm_crtc_disable_vblank(struct drm_device *dev, int crtc)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_drm_fn_encoder(private->crtc[crtc], &crtc,
+			exynos_drm_disable_vblank);
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM CRTC Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_crtc.h b/drivers/gpu/drm/exynos/exynos_drm_crtc.h
new file mode 100644
index 000000000000..c584042d6d2c
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_crtc.h
@@ -0,0 +1,38 @@
+/* exynos_drm_crtc.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_CRTC_H_
+#define _EXYNOS_DRM_CRTC_H_
+
+struct exynos_drm_overlay *get_exynos_drm_overlay(struct drm_device *dev,
+		struct drm_crtc *crtc);
+int exynos_drm_crtc_create(struct drm_device *dev, unsigned int nr);
+int exynos_drm_crtc_enable_vblank(struct drm_device *dev, int crtc);
+void exynos_drm_crtc_disable_vblank(struct drm_device *dev, int crtc);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
new file mode 100644
index 000000000000..83810cbe3c17
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm.h"
+
+#include <drm/exynos_drm.h>
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_crtc.h"
+#include "exynos_drm_fbdev.h"
+#include "exynos_drm_fb.h"
+#include "exynos_drm_gem.h"
+
+#define DRIVER_NAME	"exynos-drm"
+#define DRIVER_DESC	"Samsung SoC DRM"
+#define DRIVER_DATE	"20110530"
+#define DRIVER_MAJOR	1
+#define DRIVER_MINOR	0
+
+static int exynos_drm_load(struct drm_device *dev, unsigned long flags)
+{
+	struct exynos_drm_private *private;
+	int ret;
+	int nr;
+
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	private = kzalloc(sizeof(struct exynos_drm_private), GFP_KERNEL);
+	if (!private) {
+		DRM_ERROR("failed to allocate private\n");
+		return -ENOMEM;
+	}
+
+	INIT_LIST_HEAD(&private->pageflip_event_list);
+	dev->dev_private = (void *)private;
+
+	drm_mode_config_init(dev);
+
+	exynos_drm_mode_config_init(dev);
+
+	/*
+	 * EXYNOS4 is enough to have two CRTCs and each crtc would be used
+	 * without dependency of hardware.
+	 */
+	for (nr = 0; nr < MAX_CRTC; nr++) {
+		ret = exynos_drm_crtc_create(dev, nr);
+		if (ret)
+			goto err_crtc;
+	}
+
+	ret = drm_vblank_init(dev, MAX_CRTC);
+	if (ret)
+		goto err_crtc;
+
+	/*
+	 * probe sub drivers such as display controller and hdmi driver,
+	 * that were registered at probe() of platform driver
+	 * to the sub driver and create encoder and connector for them.
+	 */
+	ret = exynos_drm_device_register(dev);
+	if (ret)
+		goto err_vblank;
+
+	/*
+	 * create and configure fb helper and also exynos specific
+	 * fbdev object.
+	 */
+	ret = exynos_drm_fbdev_init(dev);
+	if (ret) {
+		DRM_ERROR("failed to initialize drm fbdev\n");
+		goto err_drm_device;
+	}
+
+	return 0;
+
+err_drm_device:
+	exynos_drm_device_unregister(dev);
+err_vblank:
+	drm_vblank_cleanup(dev);
+err_crtc:
+	drm_mode_config_cleanup(dev);
+	kfree(private);
+
+	return ret;
+}
+
+static int exynos_drm_unload(struct drm_device *dev)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	exynos_drm_fbdev_fini(dev);
+	exynos_drm_device_unregister(dev);
+	drm_vblank_cleanup(dev);
+	drm_mode_config_cleanup(dev);
+	kfree(dev->dev_private);
+
+	dev->dev_private = NULL;
+
+	return 0;
+}
+
+static void exynos_drm_preclose(struct drm_device *dev,
+					struct drm_file *file_priv)
+{
+	struct exynos_drm_private *dev_priv = dev->dev_private;
+
+	/*
+	 * drm framework frees all events at release time,
+	 * so private event list should be cleared.
+	 */
+	if (!list_empty(&dev_priv->pageflip_event_list))
+		INIT_LIST_HEAD(&dev_priv->pageflip_event_list);
+}
+
+static void exynos_drm_lastclose(struct drm_device *dev)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	exynos_drm_fbdev_restore_mode(dev);
+}
+
+static struct vm_operations_struct exynos_drm_gem_vm_ops = {
+	.fault = exynos_drm_gem_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
+static struct drm_ioctl_desc exynos_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_CREATE, exynos_drm_gem_create_ioctl,
+			DRM_UNLOCKED | DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_MAP_OFFSET,
+			exynos_drm_gem_map_offset_ioctl, DRM_UNLOCKED |
+			DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(EXYNOS_GEM_MMAP,
+			exynos_drm_gem_mmap_ioctl, DRM_UNLOCKED | DRM_AUTH),
+};
+
+static struct drm_driver exynos_drm_driver = {
+	.driver_features	= DRIVER_HAVE_IRQ | DRIVER_BUS_PLATFORM |
+				  DRIVER_MODESET | DRIVER_GEM,
+	.load			= exynos_drm_load,
+	.unload			= exynos_drm_unload,
+	.preclose		= exynos_drm_preclose,
+	.lastclose		= exynos_drm_lastclose,
+	.get_vblank_counter	= drm_vblank_count,
+	.enable_vblank		= exynos_drm_crtc_enable_vblank,
+	.disable_vblank		= exynos_drm_crtc_disable_vblank,
+	.gem_init_object	= exynos_drm_gem_init_object,
+	.gem_free_object	= exynos_drm_gem_free_object,
+	.gem_vm_ops		= &exynos_drm_gem_vm_ops,
+	.dumb_create		= exynos_drm_gem_dumb_create,
+	.dumb_map_offset	= exynos_drm_gem_dumb_map_offset,
+	.dumb_destroy		= exynos_drm_gem_dumb_destroy,
+	.ioctls			= exynos_ioctls,
+	.fops = {
+		.owner		= THIS_MODULE,
+		.open		= drm_open,
+		.mmap		= exynos_drm_gem_mmap,
+		.poll		= drm_poll,
+		.read		= drm_read,
+		.unlocked_ioctl	= drm_ioctl,
+		.release	= drm_release,
+	},
+	.name	= DRIVER_NAME,
+	.desc	= DRIVER_DESC,
+	.date	= DRIVER_DATE,
+	.major	= DRIVER_MAJOR,
+	.minor	= DRIVER_MINOR,
+};
+
+static int exynos_drm_platform_probe(struct platform_device *pdev)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	exynos_drm_driver.num_ioctls = DRM_ARRAY_SIZE(exynos_ioctls);
+
+	return drm_platform_init(&exynos_drm_driver, pdev);
+}
+
+static int exynos_drm_platform_remove(struct platform_device *pdev)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	drm_platform_exit(&exynos_drm_driver, pdev);
+
+	return 0;
+}
+
+static struct platform_driver exynos_drm_platform_driver = {
+	.probe		= exynos_drm_platform_probe,
+	.remove		= __devexit_p(exynos_drm_platform_remove),
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= DRIVER_NAME,
+	},
+};
+
+static int __init exynos_drm_init(void)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	return platform_driver_register(&exynos_drm_platform_driver);
+}
+
+static void __exit exynos_drm_exit(void)
+{
+	DRM_DEBUG_DRIVER("%s\n", __FILE__);
+
+	platform_driver_unregister(&exynos_drm_platform_driver);
+}
+
+module_init(exynos_drm_init);
+module_exit(exynos_drm_exit);
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
new file mode 100644
index 000000000000..c03683f2ae72
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -0,0 +1,254 @@
+/* exynos_drm_drv.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_DRV_H_
+#define _EXYNOS_DRM_DRV_H_
+
+#include "drm.h"
+
+#define MAX_CRTC	2
+
+struct drm_device;
+struct exynos_drm_overlay;
+struct drm_connector;
+
+/* this enumerates display type. */
+enum exynos_drm_output_type {
+	EXYNOS_DISPLAY_TYPE_NONE,
+	/* RGB or CPU Interface. */
+	EXYNOS_DISPLAY_TYPE_LCD,
+	/* HDMI Interface. */
+	EXYNOS_DISPLAY_TYPE_HDMI,
+};
+
+/*
+ * Exynos drm overlay ops structure.
+ *
+ * @mode_set: copy drm overlay info to hw specific overlay info.
+ * @commit: apply hardware specific overlay data to registers.
+ * @disable: disable hardware specific overlay.
+ */
+struct exynos_drm_overlay_ops {
+	void (*mode_set)(struct device *subdrv_dev,
+			 struct exynos_drm_overlay *overlay);
+	void (*commit)(struct device *subdrv_dev);
+	void (*disable)(struct device *subdrv_dev);
+};
+
+/*
+ * Exynos drm common overlay structure.
+ *
+ * @fb_x: offset x on a framebuffer to be displayed.
+ *	- the unit is screen coordinates.
+ * @fb_y: offset y on a framebuffer to be displayed.
+ *	- the unit is screen coordinates.
+ * @fb_width: width of a framebuffer.
+ * @fb_height: height of a framebuffer.
+ * @crtc_x: offset x on hardware screen.
+ * @crtc_y: offset y on hardware screen.
+ * @crtc_width: window width to be displayed (hardware screen).
+ * @crtc_height: window height to be displayed (hardware screen).
+ * @mode_width: width of screen mode.
+ * @mode_height: height of screen mode.
+ * @refresh: refresh rate.
+ * @scan_flag: interlace or progressive way.
+ *	(it could be DRM_MODE_FLAG_*)
+ * @bpp: pixel size.(in bit)
+ * @paddr: bus(accessed by dma) physical memory address to this overlay
+ *		and this is physically continuous.
+ * @vaddr: virtual memory addresss to this overlay.
+ * @default_win: a window to be enabled.
+ * @color_key: color key on or off.
+ * @index_color: if using color key feature then this value would be used
+ *			as index color.
+ * @local_path: in case of lcd type, local path mode on or off.
+ * @transparency: transparency on or off.
+ * @activated: activated or not.
+ *
+ * this structure is common to exynos SoC and its contents would be copied
+ * to hardware specific overlay info.
+ */
+struct exynos_drm_overlay {
+	unsigned int fb_x;
+	unsigned int fb_y;
+	unsigned int fb_width;
+	unsigned int fb_height;
+	unsigned int crtc_x;
+	unsigned int crtc_y;
+	unsigned int crtc_width;
+	unsigned int crtc_height;
+	unsigned int mode_width;
+	unsigned int mode_height;
+	unsigned int refresh;
+	unsigned int scan_flag;
+	unsigned int bpp;
+	unsigned int pitch;
+	dma_addr_t paddr;
+	void __iomem *vaddr;
+
+	bool default_win;
+	bool color_key;
+	unsigned int index_color;
+	bool local_path;
+	bool transparency;
+	bool activated;
+};
+
+/*
+ * Exynos DRM Display Structure.
+ *	- this structure is common to analog tv, digital tv and lcd panel.
+ *
+ * @type: one of EXYNOS_DISPLAY_TYPE_LCD and HDMI.
+ * @is_connected: check for that display is connected or not.
+ * @get_edid: get edid modes from display driver.
+ * @get_timing: get timing object from display driver.
+ * @check_timing: check if timing is valid or not.
+ * @power_on: display device on or off.
+ */
+struct exynos_drm_display {
+	enum exynos_drm_output_type type;
+	bool (*is_connected)(struct device *dev);
+	int (*get_edid)(struct device *dev, struct drm_connector *connector,
+				u8 *edid, int len);
+	void *(*get_timing)(struct device *dev);
+	int (*check_timing)(struct device *dev, void *timing);
+	int (*power_on)(struct device *dev, int mode);
+};
+
+/*
+ * Exynos drm manager ops
+ *
+ * @mode_set: convert drm_display_mode to hw specific display mode and
+ *	      would be called by encoder->mode_set().
+ * @commit: set current hw specific display mode to hw.
+ * @enable_vblank: specific driver callback for enabling vblank interrupt.
+ * @disable_vblank: specific driver callback for disabling vblank interrupt.
+ */
+struct exynos_drm_manager_ops {
+	void (*mode_set)(struct device *subdrv_dev, void *mode);
+	void (*commit)(struct device *subdrv_dev);
+	int (*enable_vblank)(struct device *subdrv_dev);
+	void (*disable_vblank)(struct device *subdrv_dev);
+};
+
+/*
+ * Exynos drm common manager structure.
+ *
+ * @dev: pointer to device object for subdrv device driver.
+ *	sub drivers such as display controller or hdmi driver,
+ *	have their own device object.
+ * @ops: pointer to callbacks for exynos drm specific framebuffer.
+ *	these callbacks should be set by specific drivers such fimd
+ *	or hdmi driver and are used to control hardware global registers.
+ * @overlay_ops: pointer to callbacks for exynos drm specific framebuffer.
+ *	these callbacks should be set by specific drivers such fimd
+ *	or hdmi driver and are used to control hardware overlay reigsters.
+ * @display: pointer to callbacks for exynos drm specific framebuffer.
+ *	these callbacks should be set by specific drivers such fimd
+ *	or hdmi driver and are used to control display devices such as
+ *	analog tv, digital tv and lcd panel and also get timing data for them.
+ */
+struct exynos_drm_manager {
+	struct device *dev;
+	int pipe;
+	struct exynos_drm_manager_ops *ops;
+	struct exynos_drm_overlay_ops *overlay_ops;
+	struct exynos_drm_display *display;
+};
+
+/*
+ * Exynos drm private structure.
+ */
+struct exynos_drm_private {
+	struct drm_fb_helper *fb_helper;
+
+	/* list head for new event to be added. */
+	struct list_head pageflip_event_list;
+
+	/*
+	 * created crtc object would be contained at this array and
+	 * this array is used to be aware of which crtc did it request vblank.
+	 */
+	struct drm_crtc *crtc[MAX_CRTC];
+};
+
+/*
+ * Exynos drm sub driver structure.
+ *
+ * @list: sub driver has its own list object to register to exynos drm driver.
+ * @drm_dev: pointer to drm_device and this pointer would be set
+ *	when sub driver calls exynos_drm_subdrv_register().
+ * @probe: this callback would be called by exynos drm driver after
+ *	subdrv is registered to it.
+ * @remove: this callback is used to release resources created
+ *	by probe callback.
+ * @manager: subdrv has its own manager to control a hardware appropriately
+ *	and we can access a hardware drawing on this manager.
+ * @encoder: encoder object owned by this sub driver.
+ * @connector: connector object owned by this sub driver.
+ */
+struct exynos_drm_subdrv {
+	struct list_head list;
+	struct drm_device *drm_dev;
+
+	int (*probe)(struct drm_device *drm_dev, struct device *dev);
+	void (*remove)(struct drm_device *dev);
+
+	struct exynos_drm_manager manager;
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+};
+
+/*
+ * this function calls a probe callback registered to sub driver list and
+ * create its own encoder and connector and then set drm_device object
+ * to global one.
+ */
+int exynos_drm_device_register(struct drm_device *dev);
+/*
+ * this function calls a remove callback registered to sub driver list and
+ * destroy its own encoder and connetor.
+ */
+int exynos_drm_device_unregister(struct drm_device *dev);
+
+/*
+ * this function would be called by sub drivers such as display controller
+ * or hdmi driver to register this sub driver object to exynos drm driver
+ * and when a sub driver is registered to exynos drm driver a probe callback
+ * of the sub driver is called and creates its own encoder and connector
+ * and then fb helper and drm mode group would be re-initialized.
+ */
+int exynos_drm_subdrv_register(struct exynos_drm_subdrv *drm_subdrv);
+
+/*
+ * this function removes subdrv list from exynos drm driver and fb helper
+ * and drm mode group would be re-initialized.
+ */
+int exynos_drm_subdrv_unregister(struct exynos_drm_subdrv *drm_subdrv);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_encoder.c b/drivers/gpu/drm/exynos/exynos_drm_encoder.c
new file mode 100644
index 000000000000..7cf6fa86a67e
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_encoder.c
@@ -0,0 +1,271 @@
+/* exynos_drm_encoder.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm_crtc_helper.h"
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_crtc.h"
+#include "exynos_drm_encoder.h"
+
+#define to_exynos_encoder(x)	container_of(x, struct exynos_drm_encoder,\
+				drm_encoder)
+
+/*
+ * exynos specific encoder structure.
+ *
+ * @drm_encoder: encoder object.
+ * @manager: specific encoder has its own manager to control a hardware
+ *	appropriately and we can access a hardware drawing on this manager.
+ */
+struct exynos_drm_encoder {
+	struct drm_encoder		drm_encoder;
+	struct exynos_drm_manager	*manager;
+};
+
+static void exynos_drm_encoder_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_connector *connector;
+	struct exynos_drm_manager *manager = exynos_drm_get_manager(encoder);
+
+	DRM_DEBUG_KMS("%s, encoder dpms: %d\n", __FILE__, mode);
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder) {
+			struct exynos_drm_display *display = manager->display;
+
+			if (display && display->power_on)
+				display->power_on(manager->dev, mode);
+		}
+	}
+}
+
+static bool
+exynos_drm_encoder_mode_fixup(struct drm_encoder *encoder,
+			       struct drm_display_mode *mode,
+			       struct drm_display_mode *adjusted_mode)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* drm framework doesn't check NULL. */
+
+	return true;
+}
+
+static void exynos_drm_encoder_mode_set(struct drm_encoder *encoder,
+					 struct drm_display_mode *mode,
+					 struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_connector *connector;
+	struct exynos_drm_manager *manager = exynos_drm_get_manager(encoder);
+	struct exynos_drm_manager_ops *manager_ops = manager->ops;
+	struct exynos_drm_overlay_ops *overlay_ops = manager->overlay_ops;
+	struct exynos_drm_overlay *overlay = get_exynos_drm_overlay(dev,
+						encoder->crtc);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mode = adjusted_mode;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder) {
+			if (manager_ops && manager_ops->mode_set)
+				manager_ops->mode_set(manager->dev, mode);
+
+			if (overlay_ops && overlay_ops->mode_set)
+				overlay_ops->mode_set(manager->dev, overlay);
+		}
+	}
+}
+
+static void exynos_drm_encoder_prepare(struct drm_encoder *encoder)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* drm framework doesn't check NULL. */
+}
+
+static void exynos_drm_encoder_commit(struct drm_encoder *encoder)
+{
+	struct exynos_drm_manager *manager = exynos_drm_get_manager(encoder);
+	struct exynos_drm_manager_ops *manager_ops = manager->ops;
+	struct exynos_drm_overlay_ops *overlay_ops = manager->overlay_ops;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (manager_ops && manager_ops->commit)
+		manager_ops->commit(manager->dev);
+
+	if (overlay_ops && overlay_ops->commit)
+		overlay_ops->commit(manager->dev);
+}
+
+static struct drm_crtc *
+exynos_drm_encoder_get_crtc(struct drm_encoder *encoder)
+{
+	return encoder->crtc;
+}
+
+static struct drm_encoder_helper_funcs exynos_encoder_helper_funcs = {
+	.dpms		= exynos_drm_encoder_dpms,
+	.mode_fixup	= exynos_drm_encoder_mode_fixup,
+	.mode_set	= exynos_drm_encoder_mode_set,
+	.prepare	= exynos_drm_encoder_prepare,
+	.commit		= exynos_drm_encoder_commit,
+	.get_crtc	= exynos_drm_encoder_get_crtc,
+};
+
+static void exynos_drm_encoder_destroy(struct drm_encoder *encoder)
+{
+	struct exynos_drm_encoder *exynos_encoder =
+		to_exynos_encoder(encoder);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_encoder->manager->pipe = -1;
+
+	drm_encoder_cleanup(encoder);
+	encoder->dev->mode_config.num_encoder--;
+	kfree(exynos_encoder);
+}
+
+static struct drm_encoder_funcs exynos_encoder_funcs = {
+	.destroy = exynos_drm_encoder_destroy,
+};
+
+struct drm_encoder *
+exynos_drm_encoder_create(struct drm_device *dev,
+			   struct exynos_drm_manager *manager,
+			   unsigned int possible_crtcs)
+{
+	struct drm_encoder *encoder;
+	struct exynos_drm_encoder *exynos_encoder;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!manager || !possible_crtcs)
+		return NULL;
+
+	if (!manager->dev)
+		return NULL;
+
+	exynos_encoder = kzalloc(sizeof(*exynos_encoder), GFP_KERNEL);
+	if (!exynos_encoder) {
+		DRM_ERROR("failed to allocate encoder\n");
+		return NULL;
+	}
+
+	exynos_encoder->manager = manager;
+	encoder = &exynos_encoder->drm_encoder;
+	encoder->possible_crtcs = possible_crtcs;
+
+	DRM_DEBUG_KMS("possible_crtcs = 0x%x\n", encoder->possible_crtcs);
+
+	drm_encoder_init(dev, encoder, &exynos_encoder_funcs,
+			DRM_MODE_ENCODER_TMDS);
+
+	drm_encoder_helper_add(encoder, &exynos_encoder_helper_funcs);
+
+	DRM_DEBUG_KMS("encoder has been created\n");
+
+	return encoder;
+}
+
+struct exynos_drm_manager *exynos_drm_get_manager(struct drm_encoder *encoder)
+{
+	return to_exynos_encoder(encoder)->manager;
+}
+
+void exynos_drm_fn_encoder(struct drm_crtc *crtc, void *data,
+			    void (*fn)(struct drm_encoder *, void *))
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_encoder *encoder;
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		if (encoder->crtc != crtc)
+			continue;
+
+		fn(encoder, data);
+	}
+}
+
+void exynos_drm_enable_vblank(struct drm_encoder *encoder, void *data)
+{
+	struct exynos_drm_manager *manager =
+		to_exynos_encoder(encoder)->manager;
+	struct exynos_drm_manager_ops *manager_ops = manager->ops;
+	int crtc = *(int *)data;
+
+	if (manager->pipe == -1)
+		manager->pipe = crtc;
+
+	if (manager_ops->enable_vblank)
+		manager_ops->enable_vblank(manager->dev);
+}
+
+void exynos_drm_disable_vblank(struct drm_encoder *encoder, void *data)
+{
+	struct exynos_drm_manager *manager =
+		to_exynos_encoder(encoder)->manager;
+	struct exynos_drm_manager_ops *manager_ops = manager->ops;
+	int crtc = *(int *)data;
+
+	if (manager->pipe == -1)
+		manager->pipe = crtc;
+
+	if (manager_ops->disable_vblank)
+		manager_ops->disable_vblank(manager->dev);
+}
+
+void exynos_drm_encoder_crtc_commit(struct drm_encoder *encoder, void *data)
+{
+	struct exynos_drm_manager *manager =
+		to_exynos_encoder(encoder)->manager;
+	struct exynos_drm_overlay_ops *overlay_ops = manager->overlay_ops;
+
+	overlay_ops->commit(manager->dev);
+}
+
+void exynos_drm_encoder_crtc_mode_set(struct drm_encoder *encoder, void *data)
+{
+	struct exynos_drm_manager *manager =
+		to_exynos_encoder(encoder)->manager;
+	struct exynos_drm_overlay_ops *overlay_ops = manager->overlay_ops;
+	struct exynos_drm_overlay *overlay = data;
+
+	overlay_ops->mode_set(manager->dev, overlay);
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM Encoder Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_encoder.h b/drivers/gpu/drm/exynos/exynos_drm_encoder.h
new file mode 100644
index 000000000000..5ecd645d06a9
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_encoder.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_ENCODER_H_
+#define _EXYNOS_DRM_ENCODER_H_
+
+struct exynos_drm_manager;
+
+struct drm_encoder *exynos_drm_encoder_create(struct drm_device *dev,
+					       struct exynos_drm_manager *mgr,
+					       unsigned int possible_crtcs);
+struct exynos_drm_manager *
+exynos_drm_get_manager(struct drm_encoder *encoder);
+void exynos_drm_fn_encoder(struct drm_crtc *crtc, void *data,
+			    void (*fn)(struct drm_encoder *, void *));
+void exynos_drm_enable_vblank(struct drm_encoder *encoder, void *data);
+void exynos_drm_disable_vblank(struct drm_encoder *encoder, void *data);
+void exynos_drm_encoder_crtc_commit(struct drm_encoder *encoder, void *data);
+void exynos_drm_encoder_crtc_mode_set(struct drm_encoder *encoder, void *data);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c
new file mode 100644
index 000000000000..48d29cfd5240
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c
@@ -0,0 +1,265 @@
+/* exynos_drm_fb.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm_crtc.h"
+#include "drm_crtc_helper.h"
+
+#include "exynos_drm_fb.h"
+#include "exynos_drm_buf.h"
+#include "exynos_drm_gem.h"
+
+#define to_exynos_fb(x)	container_of(x, struct exynos_drm_fb, fb)
+
+/*
+ * exynos specific framebuffer structure.
+ *
+ * @fb: drm framebuffer obejct.
+ * @exynos_gem_obj: exynos specific gem object containing a gem object.
+ * @entry: pointer to exynos drm buffer entry object.
+ *	- containing only the information to physically continuous memory
+ *	region allocated at default framebuffer creation.
+ */
+struct exynos_drm_fb {
+	struct drm_framebuffer		fb;
+	struct exynos_drm_gem_obj	*exynos_gem_obj;
+	struct exynos_drm_buf_entry	*entry;
+};
+
+static void exynos_drm_fb_destroy(struct drm_framebuffer *fb)
+{
+	struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	drm_framebuffer_cleanup(fb);
+
+	/*
+	 * default framebuffer has no gem object so
+	 * a buffer of the default framebuffer should be released at here.
+	 */
+	if (!exynos_fb->exynos_gem_obj && exynos_fb->entry)
+		exynos_drm_buf_destroy(fb->dev, exynos_fb->entry);
+
+	kfree(exynos_fb);
+	exynos_fb = NULL;
+}
+
+static int exynos_drm_fb_create_handle(struct drm_framebuffer *fb,
+					struct drm_file *file_priv,
+					unsigned int *handle)
+{
+	struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return drm_gem_handle_create(file_priv,
+			&exynos_fb->exynos_gem_obj->base, handle);
+}
+
+static int exynos_drm_fb_dirty(struct drm_framebuffer *fb,
+				struct drm_file *file_priv, unsigned flags,
+				unsigned color, struct drm_clip_rect *clips,
+				unsigned num_clips)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO */
+
+	return 0;
+}
+
+static struct drm_framebuffer_funcs exynos_drm_fb_funcs = {
+	.destroy	= exynos_drm_fb_destroy,
+	.create_handle	= exynos_drm_fb_create_handle,
+	.dirty		= exynos_drm_fb_dirty,
+};
+
+static struct drm_framebuffer *
+exynos_drm_fb_init(struct drm_file *file_priv, struct drm_device *dev,
+		    struct drm_mode_fb_cmd *mode_cmd)
+{
+	struct exynos_drm_fb *exynos_fb;
+	struct drm_framebuffer *fb;
+	struct exynos_drm_gem_obj *exynos_gem_obj = NULL;
+	struct drm_gem_object *obj;
+	unsigned int size;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mode_cmd->pitch = max(mode_cmd->pitch,
+			mode_cmd->width * (mode_cmd->bpp >> 3));
+
+	DRM_LOG_KMS("drm fb create(%dx%d)\n",
+			mode_cmd->width, mode_cmd->height);
+
+	exynos_fb = kzalloc(sizeof(*exynos_fb), GFP_KERNEL);
+	if (!exynos_fb) {
+		DRM_ERROR("failed to allocate exynos drm framebuffer.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	fb = &exynos_fb->fb;
+	ret = drm_framebuffer_init(dev, fb, &exynos_drm_fb_funcs);
+	if (ret) {
+		DRM_ERROR("failed to initialize framebuffer.\n");
+		goto err_init;
+	}
+
+	DRM_LOG_KMS("create: fb id: %d\n", fb->base.id);
+
+	size = mode_cmd->pitch * mode_cmd->height;
+
+	/*
+	 * mode_cmd->handle could be NULL at booting time or
+	 * with user request. if NULL, a new buffer or a gem object
+	 * would be allocated.
+	 */
+	if (!mode_cmd->handle) {
+		if (!file_priv) {
+			struct exynos_drm_buf_entry *entry;
+
+			/*
+			 * in case that file_priv is NULL, it allocates
+			 * only buffer and this buffer would be used
+			 * for default framebuffer.
+			 */
+			entry = exynos_drm_buf_create(dev, size);
+			if (IS_ERR(entry)) {
+				ret = PTR_ERR(entry);
+				goto err_buffer;
+			}
+
+			exynos_fb->entry = entry;
+
+			DRM_LOG_KMS("default fb: paddr = 0x%lx, size = 0x%x\n",
+					(unsigned long)entry->paddr, size);
+
+			goto out;
+		} else {
+			exynos_gem_obj = exynos_drm_gem_create(file_priv, dev,
+							size,
+							&mode_cmd->handle);
+			if (IS_ERR(exynos_gem_obj)) {
+				ret = PTR_ERR(exynos_gem_obj);
+				goto err_buffer;
+			}
+		}
+	} else {
+		obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handle);
+		if (!obj) {
+			DRM_ERROR("failed to lookup gem object.\n");
+			goto err_buffer;
+		}
+
+		exynos_gem_obj = to_exynos_gem_obj(obj);
+
+		drm_gem_object_unreference_unlocked(obj);
+	}
+
+	/*
+	 * if got a exynos_gem_obj from either a handle or
+	 * a new creation then exynos_fb->exynos_gem_obj is NULL
+	 * so that default framebuffer has no its own gem object,
+	 * only its own buffer object.
+	 */
+	exynos_fb->entry = exynos_gem_obj->entry;
+
+	DRM_LOG_KMS("paddr = 0x%lx, size = 0x%x, gem object = 0x%x\n",
+			(unsigned long)exynos_fb->entry->paddr, size,
+			(unsigned int)&exynos_gem_obj->base);
+
+out:
+	exynos_fb->exynos_gem_obj = exynos_gem_obj;
+
+	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+
+	return fb;
+
+err_buffer:
+	drm_framebuffer_cleanup(fb);
+
+err_init:
+	kfree(exynos_fb);
+
+	return ERR_PTR(ret);
+}
+
+struct drm_framebuffer *exynos_drm_fb_create(struct drm_device *dev,
+					      struct drm_file *file_priv,
+					      struct drm_mode_fb_cmd *mode_cmd)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return exynos_drm_fb_init(file_priv, dev, mode_cmd);
+}
+
+struct exynos_drm_buf_entry *exynos_drm_fb_get_buf(struct drm_framebuffer *fb)
+{
+	struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb);
+	struct exynos_drm_buf_entry *entry;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	entry = exynos_fb->entry;
+	if (!entry)
+		return NULL;
+
+	DRM_DEBUG_KMS("vaddr = 0x%lx, paddr = 0x%lx\n",
+			(unsigned long)entry->vaddr,
+			(unsigned long)entry->paddr);
+
+	return entry;
+}
+
+static struct drm_mode_config_funcs exynos_drm_mode_config_funcs = {
+	.fb_create = exynos_drm_fb_create,
+};
+
+void exynos_drm_mode_config_init(struct drm_device *dev)
+{
+	dev->mode_config.min_width = 0;
+	dev->mode_config.min_height = 0;
+
+	/*
+	 * set max width and height as default value(4096x4096).
+	 * this value would be used to check framebuffer size limitation
+	 * at drm_mode_addfb().
+	 */
+	dev->mode_config.max_width = 4096;
+	dev->mode_config.max_height = 4096;
+
+	dev->mode_config.funcs = &exynos_drm_mode_config_funcs;
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM FB Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.h b/drivers/gpu/drm/exynos/exynos_drm_fb.h
new file mode 100644
index 000000000000..eb35931d302c
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fb.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_FB_H_
+#define _EXYNOS_DRM_FB_H
+
+struct drm_framebuffer *exynos_drm_fb_create(struct drm_device *dev,
+					      struct drm_file *filp,
+					      struct drm_mode_fb_cmd *mode_cmd);
+
+void exynos_drm_mode_config_init(struct drm_device *dev);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
new file mode 100644
index 000000000000..1f4b3d1a7713
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -0,0 +1,456 @@
+/* exynos_drm_fbdev.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm_crtc.h"
+#include "drm_fb_helper.h"
+#include "drm_crtc_helper.h"
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_fb.h"
+#include "exynos_drm_buf.h"
+
+#define MAX_CONNECTOR		4
+#define PREFERRED_BPP		32
+
+#define to_exynos_fbdev(x)	container_of(x, struct exynos_drm_fbdev,\
+				drm_fb_helper)
+
+struct exynos_drm_fbdev {
+	struct drm_fb_helper	drm_fb_helper;
+	struct drm_framebuffer	*fb;
+};
+
+static int exynos_drm_fbdev_set_par(struct fb_info *info)
+{
+	struct fb_var_screeninfo *var = &info->var;
+
+	switch (var->bits_per_pixel) {
+	case 32:
+	case 24:
+	case 18:
+	case 16:
+	case 12:
+		info->fix.visual = FB_VISUAL_TRUECOLOR;
+		break;
+	case 1:
+		info->fix.visual = FB_VISUAL_MONO01;
+		break;
+	default:
+		info->fix.visual = FB_VISUAL_PSEUDOCOLOR;
+		break;
+	}
+
+	info->fix.line_length = (var->xres_virtual * var->bits_per_pixel) / 8;
+
+	return drm_fb_helper_set_par(info);
+}
+
+
+static struct fb_ops exynos_drm_fb_ops = {
+	.owner		= THIS_MODULE,
+	.fb_fillrect	= cfb_fillrect,
+	.fb_copyarea	= cfb_copyarea,
+	.fb_imageblit	= cfb_imageblit,
+	.fb_check_var	= drm_fb_helper_check_var,
+	.fb_set_par	= exynos_drm_fbdev_set_par,
+	.fb_blank	= drm_fb_helper_blank,
+	.fb_pan_display	= drm_fb_helper_pan_display,
+	.fb_setcmap	= drm_fb_helper_setcmap,
+};
+
+static int exynos_drm_fbdev_update(struct drm_fb_helper *helper,
+				     struct drm_framebuffer *fb,
+				     unsigned int fb_width,
+				     unsigned int fb_height)
+{
+	struct fb_info *fbi = helper->fbdev;
+	struct drm_device *dev = helper->dev;
+	struct exynos_drm_fbdev *exynos_fb = to_exynos_fbdev(helper);
+	struct exynos_drm_buf_entry *entry;
+	unsigned int size = fb_width * fb_height * (fb->bits_per_pixel >> 3);
+	unsigned long offset;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_fb->fb = fb;
+
+	drm_fb_helper_fill_fix(fbi, fb->pitch, fb->depth);
+	drm_fb_helper_fill_var(fbi, helper, fb_width, fb_height);
+
+	entry = exynos_drm_fb_get_buf(fb);
+	if (!entry) {
+		DRM_LOG_KMS("entry is null.\n");
+		return -EFAULT;
+	}
+
+	offset = fbi->var.xoffset * (fb->bits_per_pixel >> 3);
+	offset += fbi->var.yoffset * fb->pitch;
+
+	dev->mode_config.fb_base = entry->paddr;
+	fbi->screen_base = entry->vaddr + offset;
+	fbi->fix.smem_start = entry->paddr + offset;
+	fbi->screen_size = size;
+	fbi->fix.smem_len = size;
+
+	return 0;
+}
+
+static int exynos_drm_fbdev_create(struct drm_fb_helper *helper,
+				    struct drm_fb_helper_surface_size *sizes)
+{
+	struct exynos_drm_fbdev *exynos_fbdev = to_exynos_fbdev(helper);
+	struct drm_device *dev = helper->dev;
+	struct fb_info *fbi;
+	struct drm_mode_fb_cmd mode_cmd = { 0 };
+	struct platform_device *pdev = dev->platformdev;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	DRM_DEBUG_KMS("surface width(%d), height(%d) and bpp(%d\n",
+			sizes->surface_width, sizes->surface_height,
+			sizes->surface_bpp);
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+	mode_cmd.bpp = sizes->surface_bpp;
+	mode_cmd.depth = sizes->surface_depth;
+
+	mutex_lock(&dev->struct_mutex);
+
+	fbi = framebuffer_alloc(0, &pdev->dev);
+	if (!fbi) {
+		DRM_ERROR("failed to allocate fb info.\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	exynos_fbdev->fb = exynos_drm_fb_create(dev, NULL, &mode_cmd);
+	if (IS_ERR_OR_NULL(exynos_fbdev->fb)) {
+		DRM_ERROR("failed to create drm framebuffer.\n");
+		ret = PTR_ERR(exynos_fbdev->fb);
+		goto out;
+	}
+
+	helper->fb = exynos_fbdev->fb;
+	helper->fbdev = fbi;
+
+	fbi->par = helper;
+	fbi->flags = FBINFO_FLAG_DEFAULT;
+	fbi->fbops = &exynos_drm_fb_ops;
+
+	ret = fb_alloc_cmap(&fbi->cmap, 256, 0);
+	if (ret) {
+		DRM_ERROR("failed to allocate cmap.\n");
+		goto out;
+	}
+
+	ret = exynos_drm_fbdev_update(helper, helper->fb, sizes->fb_width,
+			sizes->fb_height);
+	if (ret < 0)
+		fb_dealloc_cmap(&fbi->cmap);
+
+/*
+ * if failed, all resources allocated above would be released by
+ * drm_mode_config_cleanup() when drm_load() had been called prior
+ * to any specific driver such as fimd or hdmi driver.
+ */
+out:
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+}
+
+static bool
+exynos_drm_fbdev_is_samefb(struct drm_framebuffer *fb,
+			    struct drm_fb_helper_surface_size *sizes)
+{
+	if (fb->width != sizes->surface_width)
+		return false;
+	if (fb->height != sizes->surface_height)
+		return false;
+	if (fb->bits_per_pixel != sizes->surface_bpp)
+		return false;
+	if (fb->depth != sizes->surface_depth)
+		return false;
+
+	return true;
+}
+
+static int exynos_drm_fbdev_recreate(struct drm_fb_helper *helper,
+				      struct drm_fb_helper_surface_size *sizes)
+{
+	struct drm_device *dev = helper->dev;
+	struct exynos_drm_fbdev *exynos_fbdev = to_exynos_fbdev(helper);
+	struct drm_framebuffer *fb = exynos_fbdev->fb;
+	struct drm_mode_fb_cmd mode_cmd = { 0 };
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (helper->fb != fb) {
+		DRM_ERROR("drm framebuffer is different\n");
+		return -EINVAL;
+	}
+
+	if (exynos_drm_fbdev_is_samefb(fb, sizes))
+		return 0;
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+	mode_cmd.bpp = sizes->surface_bpp;
+	mode_cmd.depth = sizes->surface_depth;
+
+	if (fb->funcs->destroy)
+		fb->funcs->destroy(fb);
+
+	exynos_fbdev->fb = exynos_drm_fb_create(dev, NULL, &mode_cmd);
+	if (IS_ERR(exynos_fbdev->fb)) {
+		DRM_ERROR("failed to allocate fb.\n");
+		return PTR_ERR(exynos_fbdev->fb);
+	}
+
+	helper->fb = exynos_fbdev->fb;
+	return exynos_drm_fbdev_update(helper, helper->fb, sizes->fb_width,
+			sizes->fb_height);
+}
+
+static int exynos_drm_fbdev_probe(struct drm_fb_helper *helper,
+				   struct drm_fb_helper_surface_size *sizes)
+{
+	int ret = 0;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!helper->fb) {
+		ret = exynos_drm_fbdev_create(helper, sizes);
+		if (ret < 0) {
+			DRM_ERROR("failed to create fbdev.\n");
+			return ret;
+		}
+
+		/*
+		 * fb_helper expects a value more than 1 if succeed
+		 * because register_framebuffer() should be called.
+		 */
+		ret = 1;
+	} else {
+		ret = exynos_drm_fbdev_recreate(helper, sizes);
+		if (ret < 0) {
+			DRM_ERROR("failed to reconfigure fbdev\n");
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+static struct drm_fb_helper_funcs exynos_drm_fb_helper_funcs = {
+	.fb_probe =	exynos_drm_fbdev_probe,
+};
+
+int exynos_drm_fbdev_init(struct drm_device *dev)
+{
+	struct exynos_drm_fbdev *fbdev;
+	struct exynos_drm_private *private = dev->dev_private;
+	struct drm_fb_helper *helper;
+	unsigned int num_crtc;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!dev->mode_config.num_crtc || !dev->mode_config.num_connector)
+		return 0;
+
+	fbdev = kzalloc(sizeof(*fbdev), GFP_KERNEL);
+	if (!fbdev) {
+		DRM_ERROR("failed to allocate drm fbdev.\n");
+		return -ENOMEM;
+	}
+
+	private->fb_helper = helper = &fbdev->drm_fb_helper;
+	helper->funcs = &exynos_drm_fb_helper_funcs;
+
+	num_crtc = dev->mode_config.num_crtc;
+
+	ret = drm_fb_helper_init(dev, helper, num_crtc, MAX_CONNECTOR);
+	if (ret < 0) {
+		DRM_ERROR("failed to initialize drm fb helper.\n");
+		goto err_init;
+	}
+
+	ret = drm_fb_helper_single_add_all_connectors(helper);
+	if (ret < 0) {
+		DRM_ERROR("failed to register drm_fb_helper_connector.\n");
+		goto err_setup;
+
+	}
+
+	ret = drm_fb_helper_initial_config(helper, PREFERRED_BPP);
+	if (ret < 0) {
+		DRM_ERROR("failed to set up hw configuration.\n");
+		goto err_setup;
+	}
+
+	return 0;
+
+err_setup:
+	drm_fb_helper_fini(helper);
+
+err_init:
+	private->fb_helper = NULL;
+	kfree(fbdev);
+
+	return ret;
+}
+
+static void exynos_drm_fbdev_destroy(struct drm_device *dev,
+				      struct drm_fb_helper *fb_helper)
+{
+	struct drm_framebuffer *fb;
+
+	/* release drm framebuffer and real buffer */
+	if (fb_helper->fb && fb_helper->fb->funcs) {
+		fb = fb_helper->fb;
+		if (fb && fb->funcs->destroy)
+			fb->funcs->destroy(fb);
+	}
+
+	/* release linux framebuffer */
+	if (fb_helper->fbdev) {
+		struct fb_info *info;
+		int ret;
+
+		info = fb_helper->fbdev;
+		ret = unregister_framebuffer(info);
+		if (ret < 0)
+			DRM_DEBUG_KMS("failed unregister_framebuffer()\n");
+
+		if (info->cmap.len)
+			fb_dealloc_cmap(&info->cmap);
+
+		framebuffer_release(info);
+	}
+
+	drm_fb_helper_fini(fb_helper);
+}
+
+void exynos_drm_fbdev_fini(struct drm_device *dev)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+	struct exynos_drm_fbdev *fbdev;
+
+	if (!private || !private->fb_helper)
+		return;
+
+	fbdev = to_exynos_fbdev(private->fb_helper);
+
+	exynos_drm_fbdev_destroy(dev, private->fb_helper);
+	kfree(fbdev);
+	private->fb_helper = NULL;
+}
+
+void exynos_drm_fbdev_restore_mode(struct drm_device *dev)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+
+	if (!private || !private->fb_helper)
+		return;
+
+	drm_fb_helper_restore_fbdev_mode(private->fb_helper);
+}
+
+int exynos_drm_fbdev_reinit(struct drm_device *dev)
+{
+	struct exynos_drm_private *private = dev->dev_private;
+	struct drm_fb_helper *fb_helper;
+	int ret;
+
+	if (!private)
+		return -EINVAL;
+
+	/*
+	 * if all sub drivers were unloaded then num_connector is 0
+	 * so at this time, the framebuffers also should be destroyed.
+	 */
+	if (!dev->mode_config.num_connector) {
+		exynos_drm_fbdev_fini(dev);
+		return 0;
+	}
+
+	fb_helper = private->fb_helper;
+
+	if (fb_helper) {
+		drm_fb_helper_fini(fb_helper);
+
+		ret = drm_fb_helper_init(dev, fb_helper,
+				dev->mode_config.num_crtc, MAX_CONNECTOR);
+		if (ret < 0) {
+			DRM_ERROR("failed to initialize drm fb helper\n");
+			return ret;
+		}
+
+		ret = drm_fb_helper_single_add_all_connectors(fb_helper);
+		if (ret < 0) {
+			DRM_ERROR("failed to add fb helper to connectors\n");
+			goto err;
+		}
+
+		ret = drm_fb_helper_initial_config(fb_helper, PREFERRED_BPP);
+		if (ret < 0) {
+			DRM_ERROR("failed to set up hw configuration.\n");
+			goto err;
+		}
+	} else {
+		/*
+		 * if drm_load() failed whem drm load() was called prior
+		 * to specific drivers, fb_helper must be NULL and so
+		 * this fuction should be called again to re-initialize and
+		 * re-configure the fb helper. it means that this function
+		 * has been called by the specific drivers.
+		 */
+		ret = exynos_drm_fbdev_init(dev);
+	}
+
+	return ret;
+
+err:
+	/*
+	 * if drm_load() failed when drm load() was called prior
+	 * to specific drivers, the fb_helper must be NULL and so check it.
+	 */
+	if (fb_helper)
+		drm_fb_helper_fini(fb_helper);
+
+	return ret;
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Seung-Woo Kim <sw0312.kim@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM FBDEV Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.h b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
new file mode 100644
index 000000000000..ccfce8a1a451
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ *
+ * Authors:
+ *	Inki Dae <inki.dae@samsung.com>
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Seung-Woo Kim <sw0312.kim@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_FBDEV_H_
+#define _EXYNOS_DRM_FBDEV_H_
+
+int exynos_drm_fbdev_init(struct drm_device *dev);
+int exynos_drm_fbdev_reinit(struct drm_device *dev);
+void exynos_drm_fbdev_fini(struct drm_device *dev);
+void exynos_drm_fbdev_restore_mode(struct drm_device *dev);
+
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
new file mode 100644
index 000000000000..4659c88cdd9b
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -0,0 +1,811 @@
+/* exynos_drm_fimd.c
+ *
+ * Copyright (C) 2011 Samsung Electronics Co.Ltd
+ * Authors:
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *	Inki Dae <inki.dae@samsung.com>
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ */
+#include "drmP.h"
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+
+#include <drm/exynos_drm.h>
+#include <plat/regs-fb-v4.h>
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_fbdev.h"
+#include "exynos_drm_crtc.h"
+
+/*
+ * FIMD is stand for Fully Interactive Mobile Display and
+ * as a display controller, it transfers contents drawn on memory
+ * to a LCD Panel through Display Interfaces such as RGB or
+ * CPU Interface.
+ */
+
+/* position control register for hardware window 0, 2 ~ 4.*/
+#define VIDOSD_A(win)		(VIDOSD_BASE + 0x00 + (win) * 16)
+#define VIDOSD_B(win)		(VIDOSD_BASE + 0x04 + (win) * 16)
+/* size control register for hardware window 0. */
+#define VIDOSD_C_SIZE_W0	(VIDOSD_BASE + 0x08)
+/* alpha control register for hardware window 1 ~ 4. */
+#define VIDOSD_C(win)		(VIDOSD_BASE + 0x18 + (win) * 16)
+/* size control register for hardware window 1 ~ 4. */
+#define VIDOSD_D(win)		(VIDOSD_BASE + 0x0C + (win) * 16)
+
+#define VIDWx_BUF_START(win, buf)	(VIDW_BUF_START(buf) + (win) * 8)
+#define VIDWx_BUF_END(win, buf)		(VIDW_BUF_END(buf) + (win) * 8)
+#define VIDWx_BUF_SIZE(win, buf)	(VIDW_BUF_SIZE(buf) + (win) * 4)
+
+/* color key control register for hardware window 1 ~ 4. */
+#define WKEYCON0_BASE(x)		((WKEYCON0 + 0x140) + (x * 8))
+/* color key value register for hardware window 1 ~ 4. */
+#define WKEYCON1_BASE(x)		((WKEYCON1 + 0x140) + (x * 8))
+
+/* FIMD has totally five hardware windows. */
+#define WINDOWS_NR	5
+
+#define get_fimd_context(dev)	platform_get_drvdata(to_platform_device(dev))
+
+struct fimd_win_data {
+	unsigned int		offset_x;
+	unsigned int		offset_y;
+	unsigned int		ovl_width;
+	unsigned int		ovl_height;
+	unsigned int		fb_width;
+	unsigned int		fb_height;
+	unsigned int		bpp;
+	dma_addr_t		paddr;
+	void __iomem		*vaddr;
+	unsigned int		buf_offsize;
+	unsigned int		line_size;	/* bytes */
+};
+
+struct fimd_context {
+	struct exynos_drm_subdrv	subdrv;
+	int				irq;
+	struct drm_crtc			*crtc;
+	struct clk			*bus_clk;
+	struct clk			*lcd_clk;
+	struct resource			*regs_res;
+	void __iomem			*regs;
+	struct fimd_win_data		win_data[WINDOWS_NR];
+	unsigned int			clkdiv;
+	unsigned int			default_win;
+	unsigned long			irq_flags;
+	u32				vidcon0;
+	u32				vidcon1;
+
+	struct fb_videomode		*timing;
+};
+
+static bool fimd_display_is_connected(struct device *dev)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO. */
+
+	return true;
+}
+
+static void *fimd_get_timing(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return ctx->timing;
+}
+
+static int fimd_check_timing(struct device *dev, void *timing)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO. */
+
+	return 0;
+}
+
+static int fimd_display_power_on(struct device *dev, int mode)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO. */
+
+	return 0;
+}
+
+static struct exynos_drm_display fimd_display = {
+	.type = EXYNOS_DISPLAY_TYPE_LCD,
+	.is_connected = fimd_display_is_connected,
+	.get_timing = fimd_get_timing,
+	.check_timing = fimd_check_timing,
+	.power_on = fimd_display_power_on,
+};
+
+static void fimd_commit(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fb_videomode *timing = ctx->timing;
+	u32 val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* setup polarity values from machine code. */
+	writel(ctx->vidcon1, ctx->regs + VIDCON1);
+
+	/* setup vertical timing values. */
+	val = VIDTCON0_VBPD(timing->upper_margin - 1) |
+	       VIDTCON0_VFPD(timing->lower_margin - 1) |
+	       VIDTCON0_VSPW(timing->vsync_len - 1);
+	writel(val, ctx->regs + VIDTCON0);
+
+	/* setup horizontal timing values.  */
+	val = VIDTCON1_HBPD(timing->left_margin - 1) |
+	       VIDTCON1_HFPD(timing->right_margin - 1) |
+	       VIDTCON1_HSPW(timing->hsync_len - 1);
+	writel(val, ctx->regs + VIDTCON1);
+
+	/* setup horizontal and vertical display size. */
+	val = VIDTCON2_LINEVAL(timing->yres - 1) |
+	       VIDTCON2_HOZVAL(timing->xres - 1);
+	writel(val, ctx->regs + VIDTCON2);
+
+	/* setup clock source, clock divider, enable dma. */
+	val = ctx->vidcon0;
+	val &= ~(VIDCON0_CLKVAL_F_MASK | VIDCON0_CLKDIR);
+
+	if (ctx->clkdiv > 1)
+		val |= VIDCON0_CLKVAL_F(ctx->clkdiv - 1) | VIDCON0_CLKDIR;
+	else
+		val &= ~VIDCON0_CLKDIR;	/* 1:1 clock */
+
+	/*
+	 * fields of register with prefix '_F' would be updated
+	 * at vsync(same as dma start)
+	 */
+	val |= VIDCON0_ENVID | VIDCON0_ENVID_F;
+	writel(val, ctx->regs + VIDCON0);
+}
+
+static int fimd_enable_vblank(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	u32 val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!test_and_set_bit(0, &ctx->irq_flags)) {
+		val = readl(ctx->regs + VIDINTCON0);
+
+		val |= VIDINTCON0_INT_ENABLE;
+		val |= VIDINTCON0_INT_FRAME;
+
+		val &= ~VIDINTCON0_FRAMESEL0_MASK;
+		val |= VIDINTCON0_FRAMESEL0_VSYNC;
+		val &= ~VIDINTCON0_FRAMESEL1_MASK;
+		val |= VIDINTCON0_FRAMESEL1_NONE;
+
+		writel(val, ctx->regs + VIDINTCON0);
+	}
+
+	return 0;
+}
+
+static void fimd_disable_vblank(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	u32 val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (test_and_clear_bit(0, &ctx->irq_flags)) {
+		val = readl(ctx->regs + VIDINTCON0);
+
+		val &= ~VIDINTCON0_INT_FRAME;
+		val &= ~VIDINTCON0_INT_ENABLE;
+
+		writel(val, ctx->regs + VIDINTCON0);
+	}
+}
+
+static struct exynos_drm_manager_ops fimd_manager_ops = {
+	.commit = fimd_commit,
+	.enable_vblank = fimd_enable_vblank,
+	.disable_vblank = fimd_disable_vblank,
+};
+
+static void fimd_win_mode_set(struct device *dev,
+			      struct exynos_drm_overlay *overlay)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fimd_win_data *win_data;
+	unsigned long offset;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!overlay) {
+		dev_err(dev, "overlay is NULL\n");
+		return;
+	}
+
+	offset = overlay->fb_x * (overlay->bpp >> 3);
+	offset += overlay->fb_y * overlay->pitch;
+
+	DRM_DEBUG_KMS("offset = 0x%lx, pitch = %x\n", offset, overlay->pitch);
+
+	win_data = &ctx->win_data[ctx->default_win];
+
+	win_data->offset_x = overlay->crtc_x;
+	win_data->offset_y = overlay->crtc_y;
+	win_data->ovl_width = overlay->crtc_width;
+	win_data->ovl_height = overlay->crtc_height;
+	win_data->fb_width = overlay->fb_width;
+	win_data->fb_height = overlay->fb_height;
+	win_data->paddr = overlay->paddr + offset;
+	win_data->vaddr = overlay->vaddr + offset;
+	win_data->bpp = overlay->bpp;
+	win_data->buf_offsize = (overlay->fb_width - overlay->crtc_width) *
+				(overlay->bpp >> 3);
+	win_data->line_size = overlay->crtc_width * (overlay->bpp >> 3);
+
+	DRM_DEBUG_KMS("offset_x = %d, offset_y = %d\n",
+			win_data->offset_x, win_data->offset_y);
+	DRM_DEBUG_KMS("ovl_width = %d, ovl_height = %d\n",
+			win_data->ovl_width, win_data->ovl_height);
+	DRM_DEBUG_KMS("paddr = 0x%lx, vaddr = 0x%lx\n",
+			(unsigned long)win_data->paddr,
+			(unsigned long)win_data->vaddr);
+	DRM_DEBUG_KMS("fb_width = %d, crtc_width = %d\n",
+			overlay->fb_width, overlay->crtc_width);
+}
+
+static void fimd_win_set_pixfmt(struct device *dev, unsigned int win)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fimd_win_data *win_data = &ctx->win_data[win];
+	unsigned long val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	val = WINCONx_ENWIN;
+
+	switch (win_data->bpp) {
+	case 1:
+		val |= WINCON0_BPPMODE_1BPP;
+		val |= WINCONx_BITSWP;
+		val |= WINCONx_BURSTLEN_4WORD;
+		break;
+	case 2:
+		val |= WINCON0_BPPMODE_2BPP;
+		val |= WINCONx_BITSWP;
+		val |= WINCONx_BURSTLEN_8WORD;
+		break;
+	case 4:
+		val |= WINCON0_BPPMODE_4BPP;
+		val |= WINCONx_BITSWP;
+		val |= WINCONx_BURSTLEN_8WORD;
+		break;
+	case 8:
+		val |= WINCON0_BPPMODE_8BPP_PALETTE;
+		val |= WINCONx_BURSTLEN_8WORD;
+		val |= WINCONx_BYTSWP;
+		break;
+	case 16:
+		val |= WINCON0_BPPMODE_16BPP_565;
+		val |= WINCONx_HAWSWP;
+		val |= WINCONx_BURSTLEN_16WORD;
+		break;
+	case 24:
+		val |= WINCON0_BPPMODE_24BPP_888;
+		val |= WINCONx_WSWP;
+		val |= WINCONx_BURSTLEN_16WORD;
+		break;
+	case 32:
+		val |= WINCON1_BPPMODE_28BPP_A4888
+			| WINCON1_BLD_PIX | WINCON1_ALPHA_SEL;
+		val |= WINCONx_WSWP;
+		val |= WINCONx_BURSTLEN_16WORD;
+		break;
+	default:
+		DRM_DEBUG_KMS("invalid pixel size so using unpacked 24bpp.\n");
+
+		val |= WINCON0_BPPMODE_24BPP_888;
+		val |= WINCONx_WSWP;
+		val |= WINCONx_BURSTLEN_16WORD;
+		break;
+	}
+
+	DRM_DEBUG_KMS("bpp = %d\n", win_data->bpp);
+
+	writel(val, ctx->regs + WINCON(win));
+}
+
+static void fimd_win_set_colkey(struct device *dev, unsigned int win)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	unsigned int keycon0 = 0, keycon1 = 0;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	keycon0 = ~(WxKEYCON0_KEYBL_EN | WxKEYCON0_KEYEN_F |
+			WxKEYCON0_DIRCON) | WxKEYCON0_COMPKEY(0);
+
+	keycon1 = WxKEYCON1_COLVAL(0xffffffff);
+
+	writel(keycon0, ctx->regs + WKEYCON0_BASE(win));
+	writel(keycon1, ctx->regs + WKEYCON1_BASE(win));
+}
+
+static void fimd_win_commit(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fimd_win_data *win_data;
+	int win = ctx->default_win;
+	unsigned long val, alpha, size;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (win < 0 || win > WINDOWS_NR)
+		return;
+
+	win_data = &ctx->win_data[win];
+
+	/*
+	 * SHADOWCON register is used for enabling timing.
+	 *
+	 * for example, once only width value of a register is set,
+	 * if the dma is started then fimd hardware could malfunction so
+	 * with protect window setting, the register fields with prefix '_F'
+	 * wouldn't be updated at vsync also but updated once unprotect window
+	 * is set.
+	 */
+
+	/* protect windows */
+	val = readl(ctx->regs + SHADOWCON);
+	val |= SHADOWCON_WINx_PROTECT(win);
+	writel(val, ctx->regs + SHADOWCON);
+
+	/* buffer start address */
+	val = win_data->paddr;
+	writel(val, ctx->regs + VIDWx_BUF_START(win, 0));
+
+	/* buffer end address */
+	size = win_data->fb_width * win_data->ovl_height * (win_data->bpp >> 3);
+	val = win_data->paddr + size;
+	writel(val, ctx->regs + VIDWx_BUF_END(win, 0));
+
+	DRM_DEBUG_KMS("start addr = 0x%lx, end addr = 0x%lx, size = 0x%lx\n",
+			(unsigned long)win_data->paddr, val, size);
+	DRM_DEBUG_KMS("ovl_width = %d, ovl_height = %d\n",
+			win_data->ovl_width, win_data->ovl_height);
+
+	/* buffer size */
+	val = VIDW_BUF_SIZE_OFFSET(win_data->buf_offsize) |
+		VIDW_BUF_SIZE_PAGEWIDTH(win_data->line_size);
+	writel(val, ctx->regs + VIDWx_BUF_SIZE(win, 0));
+
+	/* OSD position */
+	val = VIDOSDxA_TOPLEFT_X(win_data->offset_x) |
+		VIDOSDxA_TOPLEFT_Y(win_data->offset_y);
+	writel(val, ctx->regs + VIDOSD_A(win));
+
+	val = VIDOSDxB_BOTRIGHT_X(win_data->offset_x +
+					win_data->ovl_width - 1) |
+		VIDOSDxB_BOTRIGHT_Y(win_data->offset_y +
+					win_data->ovl_height - 1);
+	writel(val, ctx->regs + VIDOSD_B(win));
+
+	DRM_DEBUG_KMS("osd pos: tx = %d, ty = %d, bx = %d, by = %d\n",
+			win_data->offset_x, win_data->offset_y,
+			win_data->offset_x + win_data->ovl_width - 1,
+			win_data->offset_y + win_data->ovl_height - 1);
+
+	/* hardware window 0 doesn't support alpha channel. */
+	if (win != 0) {
+		/* OSD alpha */
+		alpha = VIDISD14C_ALPHA1_R(0xf) |
+			VIDISD14C_ALPHA1_G(0xf) |
+			VIDISD14C_ALPHA1_B(0xf);
+
+		writel(alpha, ctx->regs + VIDOSD_C(win));
+	}
+
+	/* OSD size */
+	if (win != 3 && win != 4) {
+		u32 offset = VIDOSD_D(win);
+		if (win == 0)
+			offset = VIDOSD_C_SIZE_W0;
+		val = win_data->ovl_width * win_data->ovl_height;
+		writel(val, ctx->regs + offset);
+
+		DRM_DEBUG_KMS("osd size = 0x%x\n", (unsigned int)val);
+	}
+
+	fimd_win_set_pixfmt(dev, win);
+
+	/* hardware window 0 doesn't support color key. */
+	if (win != 0)
+		fimd_win_set_colkey(dev, win);
+
+	/* Enable DMA channel and unprotect windows */
+	val = readl(ctx->regs + SHADOWCON);
+	val |= SHADOWCON_CHx_ENABLE(win);
+	val &= ~SHADOWCON_WINx_PROTECT(win);
+	writel(val, ctx->regs + SHADOWCON);
+}
+
+static void fimd_win_disable(struct device *dev)
+{
+	struct fimd_context *ctx = get_fimd_context(dev);
+	struct fimd_win_data *win_data;
+	int win = ctx->default_win;
+	u32 val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (win < 0 || win > WINDOWS_NR)
+		return;
+
+	win_data = &ctx->win_data[win];
+
+	/* protect windows */
+	val = readl(ctx->regs + SHADOWCON);
+	val |= SHADOWCON_WINx_PROTECT(win);
+	writel(val, ctx->regs + SHADOWCON);
+
+	/* wincon */
+	val = readl(ctx->regs + WINCON(win));
+	val &= ~WINCONx_ENWIN;
+	writel(val, ctx->regs + WINCON(win));
+
+	/* unprotect windows */
+	val = readl(ctx->regs + SHADOWCON);
+	val &= ~SHADOWCON_CHx_ENABLE(win);
+	val &= ~SHADOWCON_WINx_PROTECT(win);
+	writel(val, ctx->regs + SHADOWCON);
+}
+
+static struct exynos_drm_overlay_ops fimd_overlay_ops = {
+	.mode_set = fimd_win_mode_set,
+	.commit = fimd_win_commit,
+	.disable = fimd_win_disable,
+};
+
+static void fimd_finish_pageflip(struct drm_device *drm_dev, int crtc)
+{
+	struct exynos_drm_private *dev_priv = drm_dev->dev_private;
+	struct drm_pending_vblank_event *e, *t;
+	struct timeval now;
+	unsigned long flags;
+	bool is_checked = false;
+
+	spin_lock_irqsave(&drm_dev->event_lock, flags);
+
+	list_for_each_entry_safe(e, t, &dev_priv->pageflip_event_list,
+			base.link) {
+		/* if event's pipe isn't same as crtc then ignore it. */
+		if (crtc != e->pipe)
+			continue;
+
+		is_checked = true;
+
+		do_gettimeofday(&now);
+		e->event.sequence = 0;
+		e->event.tv_sec = now.tv_sec;
+		e->event.tv_usec = now.tv_usec;
+
+		list_move_tail(&e->base.link, &e->base.file_priv->event_list);
+		wake_up_interruptible(&e->base.file_priv->event_wait);
+	}
+
+	if (is_checked)
+		drm_vblank_put(drm_dev, crtc);
+
+	spin_unlock_irqrestore(&drm_dev->event_lock, flags);
+}
+
+static irqreturn_t fimd_irq_handler(int irq, void *dev_id)
+{
+	struct fimd_context *ctx = (struct fimd_context *)dev_id;
+	struct exynos_drm_subdrv *subdrv = &ctx->subdrv;
+	struct drm_device *drm_dev = subdrv->drm_dev;
+	struct exynos_drm_manager *manager = &subdrv->manager;
+	u32 val;
+
+	val = readl(ctx->regs + VIDINTCON1);
+
+	if (val & VIDINTCON1_INT_FRAME)
+		/* VSYNC interrupt */
+		writel(VIDINTCON1_INT_FRAME, ctx->regs + VIDINTCON1);
+
+	drm_handle_vblank(drm_dev, manager->pipe);
+	fimd_finish_pageflip(drm_dev, manager->pipe);
+
+	return IRQ_HANDLED;
+}
+
+static int fimd_subdrv_probe(struct drm_device *drm_dev, struct device *dev)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/*
+	 * enable drm irq mode.
+	 * - with irq_enabled = 1, we can use the vblank feature.
+	 *
+	 * P.S. note that we wouldn't use drm irq handler but
+	 *	just specific driver own one instead because
+	 *	drm framework supports only one irq handler.
+	 */
+	drm_dev->irq_enabled = 1;
+
+	/*
+	 * with vblank_disable_allowed = 1, vblank interrupt will be disabled
+	 * by drm timer once a current process gives up ownership of
+	 * vblank event.(drm_vblank_put function was called)
+	 */
+	drm_dev->vblank_disable_allowed = 1;
+
+	return 0;
+}
+
+static void fimd_subdrv_remove(struct drm_device *drm_dev)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* TODO. */
+}
+
+static int fimd_calc_clkdiv(struct fimd_context *ctx,
+			    struct fb_videomode *timing)
+{
+	unsigned long clk = clk_get_rate(ctx->lcd_clk);
+	u32 retrace;
+	u32 clkdiv;
+	u32 best_framerate = 0;
+	u32 framerate;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	retrace = timing->left_margin + timing->hsync_len +
+				timing->right_margin + timing->xres;
+	retrace *= timing->upper_margin + timing->vsync_len +
+				timing->lower_margin + timing->yres;
+
+	/* default framerate is 60Hz */
+	if (!timing->refresh)
+		timing->refresh = 60;
+
+	clk /= retrace;
+
+	for (clkdiv = 1; clkdiv < 0x100; clkdiv++) {
+		int tmp;
+
+		/* get best framerate */
+		framerate = clk / clkdiv;
+		tmp = timing->refresh - framerate;
+		if (tmp < 0) {
+			best_framerate = framerate;
+			continue;
+		} else {
+			if (!best_framerate)
+				best_framerate = framerate;
+			else if (tmp < (best_framerate - framerate))
+				best_framerate = framerate;
+			break;
+		}
+	}
+
+	return clkdiv;
+}
+
+static void fimd_clear_win(struct fimd_context *ctx, int win)
+{
+	u32 val;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	writel(0, ctx->regs + WINCON(win));
+	writel(0, ctx->regs + VIDOSD_A(win));
+	writel(0, ctx->regs + VIDOSD_B(win));
+	writel(0, ctx->regs + VIDOSD_C(win));
+
+	if (win == 1 || win == 2)
+		writel(0, ctx->regs + VIDOSD_D(win));
+
+	val = readl(ctx->regs + SHADOWCON);
+	val &= ~SHADOWCON_WINx_PROTECT(win);
+	writel(val, ctx->regs + SHADOWCON);
+}
+
+static int __devinit fimd_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct fimd_context *ctx;
+	struct exynos_drm_subdrv *subdrv;
+	struct exynos_drm_fimd_pdata *pdata;
+	struct fb_videomode *timing;
+	struct resource *res;
+	int win;
+	int ret = -EINVAL;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata) {
+		dev_err(dev, "no platform data specified\n");
+		return -EINVAL;
+	}
+
+	timing = &pdata->timing;
+	if (!timing) {
+		dev_err(dev, "timing is null.\n");
+		return -EINVAL;
+	}
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->bus_clk = clk_get(dev, "fimd");
+	if (IS_ERR(ctx->bus_clk)) {
+		dev_err(dev, "failed to get bus clock\n");
+		ret = PTR_ERR(ctx->bus_clk);
+		goto err_clk_get;
+	}
+
+	clk_enable(ctx->bus_clk);
+
+	ctx->lcd_clk = clk_get(dev, "sclk_fimd");
+	if (IS_ERR(ctx->lcd_clk)) {
+		dev_err(dev, "failed to get lcd clock\n");
+		ret = PTR_ERR(ctx->lcd_clk);
+		goto err_bus_clk;
+	}
+
+	clk_enable(ctx->lcd_clk);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(dev, "failed to find registers\n");
+		ret = -ENOENT;
+		goto err_clk;
+	}
+
+	ctx->regs_res = request_mem_region(res->start, resource_size(res),
+					   dev_name(dev));
+	if (!ctx->regs_res) {
+		dev_err(dev, "failed to claim register region\n");
+		ret = -ENOENT;
+		goto err_clk;
+	}
+
+	ctx->regs = ioremap(res->start, resource_size(res));
+	if (!ctx->regs) {
+		dev_err(dev, "failed to map registers\n");
+		ret = -ENXIO;
+		goto err_req_region_io;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+	if (!res) {
+		dev_err(dev, "irq request failed.\n");
+		goto err_req_region_irq;
+	}
+
+	ctx->irq = res->start;
+
+	for (win = 0; win < WINDOWS_NR; win++)
+		fimd_clear_win(ctx, win);
+
+	ret = request_irq(ctx->irq, fimd_irq_handler, 0, "drm_fimd", ctx);
+	if (ret < 0) {
+		dev_err(dev, "irq request failed.\n");
+		goto err_req_irq;
+	}
+
+	ctx->clkdiv = fimd_calc_clkdiv(ctx, timing);
+	ctx->vidcon0 = pdata->vidcon0;
+	ctx->vidcon1 = pdata->vidcon1;
+	ctx->default_win = pdata->default_win;
+	ctx->timing = timing;
+
+	timing->pixclock = clk_get_rate(ctx->lcd_clk) / ctx->clkdiv;
+
+	DRM_DEBUG_KMS("pixel clock = %d, clkdiv = %d\n",
+			timing->pixclock, ctx->clkdiv);
+
+	subdrv = &ctx->subdrv;
+
+	subdrv->probe = fimd_subdrv_probe;
+	subdrv->remove = fimd_subdrv_remove;
+	subdrv->manager.pipe = -1;
+	subdrv->manager.ops = &fimd_manager_ops;
+	subdrv->manager.overlay_ops = &fimd_overlay_ops;
+	subdrv->manager.display = &fimd_display;
+	subdrv->manager.dev = dev;
+
+	platform_set_drvdata(pdev, ctx);
+	exynos_drm_subdrv_register(subdrv);
+
+	return 0;
+
+err_req_irq:
+err_req_region_irq:
+	iounmap(ctx->regs);
+
+err_req_region_io:
+	release_resource(ctx->regs_res);
+	kfree(ctx->regs_res);
+
+err_clk:
+	clk_disable(ctx->lcd_clk);
+	clk_put(ctx->lcd_clk);
+
+err_bus_clk:
+	clk_disable(ctx->bus_clk);
+	clk_put(ctx->bus_clk);
+
+err_clk_get:
+	kfree(ctx);
+	return ret;
+}
+
+static int __devexit fimd_remove(struct platform_device *pdev)
+{
+	struct fimd_context *ctx = platform_get_drvdata(pdev);
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	exynos_drm_subdrv_unregister(&ctx->subdrv);
+
+	clk_disable(ctx->lcd_clk);
+	clk_disable(ctx->bus_clk);
+	clk_put(ctx->lcd_clk);
+	clk_put(ctx->bus_clk);
+
+	iounmap(ctx->regs);
+	release_resource(ctx->regs_res);
+	kfree(ctx->regs_res);
+	free_irq(ctx->irq, ctx);
+
+	kfree(ctx);
+
+	return 0;
+}
+
+static struct platform_driver fimd_driver = {
+	.probe		= fimd_probe,
+	.remove		= __devexit_p(fimd_remove),
+	.driver		= {
+		.name	= "exynos4-fb",
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init fimd_init(void)
+{
+	return platform_driver_register(&fimd_driver);
+}
+
+static void __exit fimd_exit(void)
+{
+	platform_driver_unregister(&fimd_driver);
+}
+
+module_init(fimd_init);
+module_exit(fimd_exit);
+
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_DESCRIPTION("Samsung DRM FIMD Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
new file mode 100644
index 000000000000..a8e7a88906ed
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -0,0 +1,415 @@
+/* exynos_drm_gem.c
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Author: Inki Dae <inki.dae@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm.h"
+
+#include <drm/exynos_drm.h>
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_gem.h"
+#include "exynos_drm_buf.h"
+
+static unsigned int convert_to_vm_err_msg(int msg)
+{
+	unsigned int out_msg;
+
+	switch (msg) {
+	case 0:
+	case -ERESTARTSYS:
+	case -EINTR:
+		out_msg = VM_FAULT_NOPAGE;
+		break;
+
+	case -ENOMEM:
+		out_msg = VM_FAULT_OOM;
+		break;
+
+	default:
+		out_msg = VM_FAULT_SIGBUS;
+		break;
+	}
+
+	return out_msg;
+}
+
+static unsigned int get_gem_mmap_offset(struct drm_gem_object *obj)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return (unsigned int)obj->map_list.hash.key << PAGE_SHIFT;
+}
+
+struct exynos_drm_gem_obj *exynos_drm_gem_create(struct drm_file *file_priv,
+		struct drm_device *dev, unsigned int size,
+		unsigned int *handle)
+{
+	struct exynos_drm_gem_obj *exynos_gem_obj;
+	struct exynos_drm_buf_entry *entry;
+	struct drm_gem_object *obj;
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	size = roundup(size, PAGE_SIZE);
+
+	exynos_gem_obj = kzalloc(sizeof(*exynos_gem_obj), GFP_KERNEL);
+	if (!exynos_gem_obj) {
+		DRM_ERROR("failed to allocate exynos gem object.\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* allocate the new buffer object and memory region. */
+	entry = exynos_drm_buf_create(dev, size);
+	if (!entry) {
+		kfree(exynos_gem_obj);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	exynos_gem_obj->entry = entry;
+
+	obj = &exynos_gem_obj->base;
+
+	ret = drm_gem_object_init(dev, obj, size);
+	if (ret < 0) {
+		DRM_ERROR("failed to initailize gem object.\n");
+		goto err_obj_init;
+	}
+
+	DRM_DEBUG_KMS("created file object = 0x%x\n", (unsigned int)obj->filp);
+
+	ret = drm_gem_create_mmap_offset(obj);
+	if (ret < 0) {
+		DRM_ERROR("failed to allocate mmap offset.\n");
+		goto err_create_mmap_offset;
+	}
+
+	/*
+	 * allocate a id of idr table where the obj is registered
+	 * and handle has the id what user can see.
+	 */
+	ret = drm_gem_handle_create(file_priv, obj, handle);
+	if (ret)
+		goto err_handle_create;
+
+	DRM_DEBUG_KMS("gem handle = 0x%x\n", *handle);
+
+	/* drop reference from allocate - handle holds it now. */
+	drm_gem_object_unreference_unlocked(obj);
+
+	return exynos_gem_obj;
+
+err_handle_create:
+	drm_gem_free_mmap_offset(obj);
+
+err_create_mmap_offset:
+	drm_gem_object_release(obj);
+
+err_obj_init:
+	exynos_drm_buf_destroy(dev, exynos_gem_obj->entry);
+
+	kfree(exynos_gem_obj);
+
+	return ERR_PTR(ret);
+}
+
+int exynos_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv)
+{
+	struct drm_exynos_gem_create *args = data;
+	struct exynos_drm_gem_obj *exynos_gem_obj;
+
+	DRM_DEBUG_KMS("%s : size = 0x%x\n", __FILE__, args->size);
+
+	exynos_gem_obj = exynos_drm_gem_create(file_priv, dev, args->size,
+			&args->handle);
+	if (IS_ERR(exynos_gem_obj))
+		return PTR_ERR(exynos_gem_obj);
+
+	return 0;
+}
+
+int exynos_drm_gem_map_offset_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv)
+{
+	struct drm_exynos_gem_map_off *args = data;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	DRM_DEBUG_KMS("handle = 0x%x, offset = 0x%lx\n",
+			args->handle, (unsigned long)args->offset);
+
+	if (!(dev->driver->driver_features & DRIVER_GEM)) {
+		DRM_ERROR("does not support GEM.\n");
+		return -ENODEV;
+	}
+
+	return exynos_drm_gem_dumb_map_offset(file_priv, dev, args->handle,
+			&args->offset);
+}
+
+static int exynos_drm_gem_mmap_buffer(struct file *filp,
+		struct vm_area_struct *vma)
+{
+	struct drm_gem_object *obj = filp->private_data;
+	struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj);
+	struct exynos_drm_buf_entry *entry;
+	unsigned long pfn, vm_size;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	vma->vm_flags |= (VM_IO | VM_RESERVED);
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vma->vm_file = filp;
+
+	vm_size = vma->vm_end - vma->vm_start;
+	/*
+	 * a entry contains information to physically continuous memory
+	 * allocated by user request or at framebuffer creation.
+	 */
+	entry = exynos_gem_obj->entry;
+
+	/* check if user-requested size is valid. */
+	if (vm_size > entry->size)
+		return -EINVAL;
+
+	/*
+	 * get page frame number to physical memory to be mapped
+	 * to user space.
+	 */
+	pfn = exynos_gem_obj->entry->paddr >> PAGE_SHIFT;
+
+	DRM_DEBUG_KMS("pfn = 0x%lx\n", pfn);
+
+	if (remap_pfn_range(vma, vma->vm_start, pfn, vm_size,
+				vma->vm_page_prot)) {
+		DRM_ERROR("failed to remap pfn range.\n");
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+static const struct file_operations exynos_drm_gem_fops = {
+	.mmap = exynos_drm_gem_mmap_buffer,
+};
+
+int exynos_drm_gem_mmap_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv)
+{
+	struct drm_exynos_gem_mmap *args = data;
+	struct drm_gem_object *obj;
+	unsigned int addr;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	if (!(dev->driver->driver_features & DRIVER_GEM)) {
+		DRM_ERROR("does not support GEM.\n");
+		return -ENODEV;
+	}
+
+	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+	if (!obj) {
+		DRM_ERROR("failed to lookup gem object.\n");
+		return -EINVAL;
+	}
+
+	obj->filp->f_op = &exynos_drm_gem_fops;
+	obj->filp->private_data = obj;
+
+	down_write(&current->mm->mmap_sem);
+	addr = do_mmap(obj->filp, 0, args->size,
+			PROT_READ | PROT_WRITE, MAP_SHARED, 0);
+	up_write(&current->mm->mmap_sem);
+
+	drm_gem_object_unreference_unlocked(obj);
+
+	if (IS_ERR((void *)addr))
+		return PTR_ERR((void *)addr);
+
+	args->mapped = addr;
+
+	DRM_DEBUG_KMS("mapped = 0x%lx\n", (unsigned long)args->mapped);
+
+	return 0;
+}
+
+int exynos_drm_gem_init_object(struct drm_gem_object *obj)
+{
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	return 0;
+}
+
+void exynos_drm_gem_free_object(struct drm_gem_object *gem_obj)
+{
+	struct exynos_drm_gem_obj *exynos_gem_obj;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	DRM_DEBUG_KMS("handle count = %d\n",
+			atomic_read(&gem_obj->handle_count));
+
+	if (gem_obj->map_list.map)
+		drm_gem_free_mmap_offset(gem_obj);
+
+	/* release file pointer to gem object. */
+	drm_gem_object_release(gem_obj);
+
+	exynos_gem_obj = to_exynos_gem_obj(gem_obj);
+
+	exynos_drm_buf_destroy(gem_obj->dev, exynos_gem_obj->entry);
+
+	kfree(exynos_gem_obj);
+}
+
+int exynos_drm_gem_dumb_create(struct drm_file *file_priv,
+		struct drm_device *dev, struct drm_mode_create_dumb *args)
+{
+	struct exynos_drm_gem_obj *exynos_gem_obj;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/*
+	 * alocate memory to be used for framebuffer.
+	 * - this callback would be called by user application
+	 *	with DRM_IOCTL_MODE_CREATE_DUMB command.
+	 */
+
+	args->pitch = args->width * args->bpp >> 3;
+	args->size = args->pitch * args->height;
+
+	exynos_gem_obj = exynos_drm_gem_create(file_priv, dev, args->size,
+							&args->handle);
+	if (IS_ERR(exynos_gem_obj))
+		return PTR_ERR(exynos_gem_obj);
+
+	return 0;
+}
+
+int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv,
+		struct drm_device *dev, uint32_t handle, uint64_t *offset)
+{
+	struct exynos_drm_gem_obj *exynos_gem_obj;
+	struct drm_gem_object *obj;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	mutex_lock(&dev->struct_mutex);
+
+	/*
+	 * get offset of memory allocated for drm framebuffer.
+	 * - this callback would be called by user application
+	 *	with DRM_IOCTL_MODE_MAP_DUMB command.
+	 */
+
+	obj = drm_gem_object_lookup(dev, file_priv, handle);
+	if (!obj) {
+		DRM_ERROR("failed to lookup gem object.\n");
+		mutex_unlock(&dev->struct_mutex);
+		return -EINVAL;
+	}
+
+	exynos_gem_obj = to_exynos_gem_obj(obj);
+
+	*offset = get_gem_mmap_offset(&exynos_gem_obj->base);
+
+	drm_gem_object_unreference(obj);
+
+	DRM_DEBUG_KMS("offset = 0x%lx\n", (unsigned long)*offset);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
+int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct exynos_drm_gem_obj *exynos_gem_obj = to_exynos_gem_obj(obj);
+	struct drm_device *dev = obj->dev;
+	unsigned long pfn;
+	pgoff_t page_offset;
+	int ret;
+
+	page_offset = ((unsigned long)vmf->virtual_address -
+			vma->vm_start) >> PAGE_SHIFT;
+
+	mutex_lock(&dev->struct_mutex);
+
+	pfn = (exynos_gem_obj->entry->paddr >> PAGE_SHIFT) + page_offset;
+
+	ret = vm_insert_mixed(vma, (unsigned long)vmf->virtual_address, pfn);
+
+	mutex_unlock(&dev->struct_mutex);
+
+	return convert_to_vm_err_msg(ret);
+}
+
+int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/* set vm_area_struct. */
+	ret = drm_gem_mmap(filp, vma);
+	if (ret < 0) {
+		DRM_ERROR("failed to mmap.\n");
+		return ret;
+	}
+
+	vma->vm_flags &= ~VM_PFNMAP;
+	vma->vm_flags |= VM_MIXEDMAP;
+
+	return ret;
+}
+
+
+int exynos_drm_gem_dumb_destroy(struct drm_file *file_priv,
+		struct drm_device *dev, unsigned int handle)
+{
+	int ret;
+
+	DRM_DEBUG_KMS("%s\n", __FILE__);
+
+	/*
+	 * obj->refcount and obj->handle_count are decreased and
+	 * if both them are 0 then exynos_drm_gem_free_object()
+	 * would be called by callback to release resources.
+	 */
+	ret = drm_gem_handle_delete(file_priv, handle);
+	if (ret < 0) {
+		DRM_ERROR("failed to delete drm_gem_handle.\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+MODULE_AUTHOR("Inki Dae <inki.dae@samsung.com>");
+MODULE_DESCRIPTION("Samsung SoC DRM GEM Module");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
new file mode 100644
index 000000000000..e5fc0148277b
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -0,0 +1,107 @@
+/* exynos_drm_gem.h
+ *
+ * Copyright (c) 2011 Samsung Electronics Co., Ltd.
+ * Authoer: Inki Dae <inki.dae@samsung.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _EXYNOS_DRM_GEM_H_
+#define _EXYNOS_DRM_GEM_H_
+
+#define to_exynos_gem_obj(x)	container_of(x,\
+			struct exynos_drm_gem_obj, base)
+
+/*
+ * exynos drm buffer structure.
+ *
+ * @base: a gem object.
+ *	- a new handle to this gem object would be created
+ *	by drm_gem_handle_create().
+ * @entry: pointer to exynos drm buffer entry object.
+ *	- containing the information to physically
+ *	continuous memory region allocated by user request
+ *	or at framebuffer creation.
+ *
+ * P.S. this object would be transfered to user as kms_bo.handle so
+ *	user can access the buffer through kms_bo.handle.
+ */
+struct exynos_drm_gem_obj {
+	struct drm_gem_object base;
+	struct exynos_drm_buf_entry *entry;
+};
+
+/* create a new buffer and get a new gem handle. */
+struct exynos_drm_gem_obj *exynos_drm_gem_create(struct drm_file *file_priv,
+		struct drm_device *dev, unsigned int size,
+		unsigned int *handle);
+
+/*
+ * request gem object creation and buffer allocation as the size
+ * that it is calculated with framebuffer information such as width,
+ * height and bpp.
+ */
+int exynos_drm_gem_create_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv);
+
+/* get buffer offset to map to user space. */
+int exynos_drm_gem_map_offset_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv);
+
+/* unmap a buffer from user space. */
+int exynos_drm_gem_munmap_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv);
+
+/* initialize gem object. */
+int exynos_drm_gem_init_object(struct drm_gem_object *obj);
+
+/* free gem object. */
+void exynos_drm_gem_free_object(struct drm_gem_object *gem_obj);
+
+/* create memory region for drm framebuffer. */
+int exynos_drm_gem_dumb_create(struct drm_file *file_priv,
+		struct drm_device *dev, struct drm_mode_create_dumb *args);
+
+/* map memory region for drm framebuffer to user space. */
+int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv,
+		struct drm_device *dev, uint32_t handle, uint64_t *offset);
+
+/* page fault handler and mmap fault address(virtual) to physical memory. */
+int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+
+/*
+ * mmap the physically continuous memory that a gem object contains
+ * to user space.
+ */
+int exynos_drm_gem_mmap_ioctl(struct drm_device *dev, void *data,
+		struct drm_file *file_priv);
+
+/* set vm_flags and we can change the vm attribute to other one at here. */
+int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
+
+/*
+ * destroy memory region allocated.
+ *	- a gem handle and physical memory region pointed by a gem object
+ *	would be released by drm_gem_handle_delete().
+ */
+int exynos_drm_gem_dumb_destroy(struct drm_file *file_priv,
+		struct drm_device *dev, unsigned int handle);
+
+#endif
diff --git a/drivers/gpu/drm/i915/dvo_ch7017.c b/drivers/gpu/drm/i915/dvo_ch7017.c
index d3e8c540f778..1ca799a1e1fc 100644
--- a/drivers/gpu/drm/i915/dvo_ch7017.c
+++ b/drivers/gpu/drm/i915/dvo_ch7017.c
@@ -227,7 +227,7 @@ static bool ch7017_init(struct intel_dvo_device *dvo,
 	default:
 		DRM_DEBUG_KMS("ch701x not detected, got %d: from %s "
 			      "slave %d.\n",
-			      val, adapter->name,dvo->slave_addr);
+			      val, adapter->name, dvo->slave_addr);
 		goto fail;
 	}
 
diff --git a/drivers/gpu/drm/i915/dvo_ch7xxx.c b/drivers/gpu/drm/i915/dvo_ch7xxx.c
index 7eaa94e4ff06..4a036600e806 100644
--- a/drivers/gpu/drm/i915/dvo_ch7xxx.c
+++ b/drivers/gpu/drm/i915/dvo_ch7xxx.c
@@ -111,7 +111,7 @@ static char *ch7xxx_get_id(uint8_t vid)
 /** Reads an 8 bit register */
 static bool ch7xxx_readb(struct intel_dvo_device *dvo, int addr, uint8_t *ch)
 {
-	struct ch7xxx_priv *ch7xxx= dvo->dev_priv;
+	struct ch7xxx_priv *ch7xxx = dvo->dev_priv;
 	struct i2c_adapter *adapter = dvo->i2c_bus;
 	u8 out_buf[2];
 	u8 in_buf[2];
@@ -303,7 +303,7 @@ static void ch7xxx_dump_regs(struct intel_dvo_device *dvo)
 
 	for (i = 0; i < CH7xxx_NUM_REGS; i++) {
 		uint8_t val;
-		if ((i % 8) == 0 )
+		if ((i % 8) == 0)
 			DRM_LOG_KMS("\n %02X: ", i);
 		ch7xxx_readb(dvo, i, &val);
 		DRM_LOG_KMS("%02X ", val);
diff --git a/drivers/gpu/drm/i915/dvo_ivch.c b/drivers/gpu/drm/i915/dvo_ivch.c
index a12ed9414cc7..04f2893d5e3c 100644
--- a/drivers/gpu/drm/i915/dvo_ivch.c
+++ b/drivers/gpu/drm/i915/dvo_ivch.c
@@ -344,8 +344,8 @@ static void ivch_mode_set(struct intel_dvo_device *dvo,
 			   (adjusted_mode->hdisplay - 1)) >> 2;
 		y_ratio = (((mode->vdisplay - 1) << 16) /
 			   (adjusted_mode->vdisplay - 1)) >> 2;
-		ivch_write (dvo, VR42, x_ratio);
-		ivch_write (dvo, VR41, y_ratio);
+		ivch_write(dvo, VR42, x_ratio);
+		ivch_write(dvo, VR41, y_ratio);
 	} else {
 		vr01 &= ~VR01_PANEL_FIT_ENABLE;
 		vr40 &= ~VR40_CLOCK_GATING_ENABLE;
@@ -410,7 +410,7 @@ static void ivch_destroy(struct intel_dvo_device *dvo)
 	}
 }
 
-struct intel_dvo_dev_ops ivch_ops= {
+struct intel_dvo_dev_ops ivch_ops = {
 	.init = ivch_init,
 	.dpms = ivch_dpms,
 	.mode_valid = ivch_mode_valid,
diff --git a/drivers/gpu/drm/i915/dvo_sil164.c b/drivers/gpu/drm/i915/dvo_sil164.c
index e4b4091df942..a0b13a6f619d 100644
--- a/drivers/gpu/drm/i915/dvo_sil164.c
+++ b/drivers/gpu/drm/i915/dvo_sil164.c
@@ -104,7 +104,7 @@ static bool sil164_readb(struct intel_dvo_device *dvo, int addr, uint8_t *ch)
 
 static bool sil164_writeb(struct intel_dvo_device *dvo, int addr, uint8_t ch)
 {
-	struct sil164_priv *sil= dvo->dev_priv;
+	struct sil164_priv *sil = dvo->dev_priv;
 	struct i2c_adapter *adapter = dvo->i2c_bus;
 	uint8_t out_buf[2];
 	struct i2c_msg msg = {
diff --git a/drivers/gpu/drm/i915/dvo_tfp410.c b/drivers/gpu/drm/i915/dvo_tfp410.c
index 8ab2855bb544..aa2cd3ec54aa 100644
--- a/drivers/gpu/drm/i915/dvo_tfp410.c
+++ b/drivers/gpu/drm/i915/dvo_tfp410.c
@@ -56,7 +56,7 @@
 #define TFP410_CTL_2_MDI	(1<<0)
 
 #define TFP410_CTL_3		0x0A
-#define TFP410_CTL_3_DK_MASK 	(0x7<<5)
+#define TFP410_CTL_3_DK_MASK	(0x7<<5)
 #define TFP410_CTL_3_DK		(1<<5)
 #define TFP410_CTL_3_DKEN	(1<<4)
 #define TFP410_CTL_3_CTL_MASK	(0x7<<1)
@@ -225,12 +225,12 @@ static void tfp410_mode_set(struct intel_dvo_device *dvo,
 			    struct drm_display_mode *mode,
 			    struct drm_display_mode *adjusted_mode)
 {
-    /* As long as the basics are set up, since we don't have clock dependencies
-     * in the mode setup, we can just leave the registers alone and everything
-     * will work fine.
-     */
-    /* don't do much */
-    return;
+	/* As long as the basics are set up, since we don't have clock dependencies
+	* in the mode setup, we can just leave the registers alone and everything
+	* will work fine.
+	*/
+	/* don't do much */
+	return;
 }
 
 /* set the tfp410 power state */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 3c395a59da35..8e95d66800b0 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -98,12 +98,12 @@ static const char *get_pin_flag(struct drm_i915_gem_object *obj)
 
 static const char *get_tiling_flag(struct drm_i915_gem_object *obj)
 {
-    switch (obj->tiling_mode) {
-    default:
-    case I915_TILING_NONE: return " ";
-    case I915_TILING_X: return "X";
-    case I915_TILING_Y: return "Y";
-    }
+	switch (obj->tiling_mode) {
+	default:
+	case I915_TILING_NONE: return " ";
+	case I915_TILING_X: return "X";
+	case I915_TILING_Y: return "Y";
+	}
 }
 
 static const char *cache_level_str(int type)
@@ -217,7 +217,7 @@ static int i915_gem_object_list_info(struct seq_file *m, void *data)
 			++mappable_count; \
 		} \
 	} \
-} while(0)
+} while (0)
 
 static int i915_gem_object_info(struct seq_file *m, void* data)
 {
@@ -1293,12 +1293,12 @@ i915_wedged_read(struct file *filp,
 	char buf[80];
 	int len;
 
-	len = snprintf(buf, sizeof (buf),
+	len = snprintf(buf, sizeof(buf),
 		       "wedged :  %d\n",
 		       atomic_read(&dev_priv->mm.wedged));
 
-	if (len > sizeof (buf))
-		len = sizeof (buf);
+	if (len > sizeof(buf))
+		len = sizeof(buf);
 
 	return simple_read_from_buffer(ubuf, max, ppos, buf, len);
 }
@@ -1314,7 +1314,7 @@ i915_wedged_write(struct file *filp,
 	int val = 1;
 
 	if (cnt > 0) {
-		if (cnt > sizeof (buf) - 1)
+		if (cnt > sizeof(buf) - 1)
 			return -EINVAL;
 
 		if (copy_from_user(buf, ubuf, cnt))
@@ -1357,11 +1357,11 @@ i915_max_freq_read(struct file *filp,
 	char buf[80];
 	int len;
 
-	len = snprintf(buf, sizeof (buf),
+	len = snprintf(buf, sizeof(buf),
 		       "max freq: %d\n", dev_priv->max_delay * 50);
 
-	if (len > sizeof (buf))
-		len = sizeof (buf);
+	if (len > sizeof(buf))
+		len = sizeof(buf);
 
 	return simple_read_from_buffer(ubuf, max, ppos, buf, len);
 }
@@ -1378,7 +1378,7 @@ i915_max_freq_write(struct file *filp,
 	int val = 1;
 
 	if (cnt > 0) {
-		if (cnt > sizeof (buf) - 1)
+		if (cnt > sizeof(buf) - 1)
 			return -EINVAL;
 
 		if (copy_from_user(buf, ubuf, cnt))
@@ -1432,12 +1432,12 @@ i915_cache_sharing_read(struct file *filp,
 	snpcr = I915_READ(GEN6_MBCUNIT_SNPCR);
 	mutex_unlock(&dev_priv->dev->struct_mutex);
 
-	len = snprintf(buf, sizeof (buf),
+	len = snprintf(buf, sizeof(buf),
 		       "%d\n", (snpcr & GEN6_MBC_SNPCR_MASK) >>
 		       GEN6_MBC_SNPCR_SHIFT);
 
-	if (len > sizeof (buf))
-		len = sizeof (buf);
+	if (len > sizeof(buf))
+		len = sizeof(buf);
 
 	return simple_read_from_buffer(ubuf, max, ppos, buf, len);
 }
@@ -1455,7 +1455,7 @@ i915_cache_sharing_write(struct file *filp,
 	int val = 1;
 
 	if (cnt > 0) {
-		if (cnt > sizeof (buf) - 1)
+		if (cnt > sizeof(buf) - 1)
 			return -EINVAL;
 
 		if (copy_from_user(buf, ubuf, cnt))
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8a3942c4f099..2eac955dee18 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -884,7 +884,7 @@ static int i915_get_bridge_dev(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	dev_priv->bridge_dev = pci_get_bus_and_slot(0, PCI_DEVFN(0,0));
+	dev_priv->bridge_dev = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0));
 	if (!dev_priv->bridge_dev) {
 		DRM_ERROR("bridge device not found\n");
 		return -1;
@@ -1730,10 +1730,10 @@ static DEFINE_SPINLOCK(mchdev_lock);
  */
 unsigned long i915_read_mch_val(void)
 {
-  	struct drm_i915_private *dev_priv;
+	struct drm_i915_private *dev_priv;
 	unsigned long chipset_val, graphics_val, ret = 0;
 
-  	spin_lock(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 	if (!i915_mch_dev)
 		goto out_unlock;
 	dev_priv = i915_mch_dev;
@@ -1744,9 +1744,9 @@ unsigned long i915_read_mch_val(void)
 	ret = chipset_val + graphics_val;
 
 out_unlock:
-  	spin_unlock(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 
-  	return ret;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_read_mch_val);
 
@@ -1757,10 +1757,10 @@ EXPORT_SYMBOL_GPL(i915_read_mch_val);
  */
 bool i915_gpu_raise(void)
 {
-  	struct drm_i915_private *dev_priv;
+	struct drm_i915_private *dev_priv;
 	bool ret = true;
 
-  	spin_lock(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 	if (!i915_mch_dev) {
 		ret = false;
 		goto out_unlock;
@@ -1771,9 +1771,9 @@ bool i915_gpu_raise(void)
 		dev_priv->max_delay--;
 
 out_unlock:
-  	spin_unlock(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 
-  	return ret;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_raise);
 
@@ -1785,10 +1785,10 @@ EXPORT_SYMBOL_GPL(i915_gpu_raise);
  */
 bool i915_gpu_lower(void)
 {
-  	struct drm_i915_private *dev_priv;
+	struct drm_i915_private *dev_priv;
 	bool ret = true;
 
-  	spin_lock(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 	if (!i915_mch_dev) {
 		ret = false;
 		goto out_unlock;
@@ -1799,9 +1799,9 @@ bool i915_gpu_lower(void)
 		dev_priv->max_delay++;
 
 out_unlock:
-  	spin_unlock(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 
-  	return ret;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_lower);
 
@@ -1812,10 +1812,10 @@ EXPORT_SYMBOL_GPL(i915_gpu_lower);
  */
 bool i915_gpu_busy(void)
 {
-  	struct drm_i915_private *dev_priv;
+	struct drm_i915_private *dev_priv;
 	bool ret = false;
 
-  	spin_lock(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 	if (!i915_mch_dev)
 		goto out_unlock;
 	dev_priv = i915_mch_dev;
@@ -1823,9 +1823,9 @@ bool i915_gpu_busy(void)
 	ret = dev_priv->busy;
 
 out_unlock:
-  	spin_unlock(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 
-  	return ret;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_busy);
 
@@ -1837,10 +1837,10 @@ EXPORT_SYMBOL_GPL(i915_gpu_busy);
  */
 bool i915_gpu_turbo_disable(void)
 {
-  	struct drm_i915_private *dev_priv;
+	struct drm_i915_private *dev_priv;
 	bool ret = true;
 
-  	spin_lock(&mchdev_lock);
+	spin_lock(&mchdev_lock);
 	if (!i915_mch_dev) {
 		ret = false;
 		goto out_unlock;
@@ -1853,9 +1853,9 @@ bool i915_gpu_turbo_disable(void)
 		ret = false;
 
 out_unlock:
-  	spin_unlock(&mchdev_lock);
+	spin_unlock(&mchdev_lock);
 
-  	return ret;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(i915_gpu_turbo_disable);
 
@@ -1948,7 +1948,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 
 	agp_size = dev_priv->mm.gtt->gtt_mappable_entries << PAGE_SHIFT;
 
-        dev_priv->mm.gtt_mapping =
+	dev_priv->mm.gtt_mapping =
 		io_mapping_create_wc(dev->agp->base, agp_size);
 	if (dev_priv->mm.gtt_mapping == NULL) {
 		ret = -EIO;
@@ -2035,7 +2035,9 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 	spin_lock_init(&dev_priv->error_lock);
 	spin_lock_init(&dev_priv->rps_lock);
 
-	if (IS_MOBILE(dev) || !IS_GEN2(dev))
+	if (IS_IVYBRIDGE(dev))
+		dev_priv->num_pipe = 3;
+	else if (IS_MOBILE(dev) || !IS_GEN2(dev))
 		dev_priv->num_pipe = 2;
 	else
 		dev_priv->num_pipe = 1;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f07e4252b708..4c8d681c2151 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -79,11 +79,11 @@ MODULE_PARM_DESC(lvds_downclock,
 		"Use panel (LVDS/eDP) downclocking for power savings "
 		"(default: false)");
 
-unsigned int i915_panel_use_ssc __read_mostly = 1;
+unsigned int i915_panel_use_ssc __read_mostly = -1;
 module_param_named(lvds_use_ssc, i915_panel_use_ssc, int, 0600);
 MODULE_PARM_DESC(lvds_use_ssc,
 		"Use Spread Spectrum Clock with panels [LVDS/eDP] "
-		"(default: true)");
+		"(default: auto from VBT)");
 
 int i915_vbt_sdvo_panel_type __read_mostly = -1;
 module_param_named(vbt_sdvo_panel_type, i915_vbt_sdvo_panel_type, int, 0600);
@@ -294,7 +294,7 @@ MODULE_DEVICE_TABLE(pci, pciidlist);
 #define INTEL_PCH_CPT_DEVICE_ID_TYPE	0x1c00
 #define INTEL_PCH_PPT_DEVICE_ID_TYPE	0x1e00
 
-void intel_detect_pch (struct drm_device *dev)
+void intel_detect_pch(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct pci_dev *pch;
@@ -377,7 +377,7 @@ void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
 
 void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
 {
-	if (dev_priv->gt_fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES ) {
+	if (dev_priv->gt_fifo_count < GT_FIFO_NUM_RESERVED_ENTRIES) {
 		int loop = 500;
 		u32 fifo = I915_READ_NOTRACE(GT_FIFO_FREE_ENTRIES);
 		while (fifo <= GT_FIFO_NUM_RESERVED_ENTRIES && loop--) {
@@ -471,6 +471,9 @@ static int i915_drm_thaw(struct drm_device *dev)
 		error = i915_gem_init_ringbuffer(dev);
 		mutex_unlock(&dev->struct_mutex);
 
+		if (HAS_PCH_SPLIT(dev))
+			ironlake_init_pch_refclk(dev);
+
 		drm_mode_config_reset(dev);
 		drm_irq_install(dev);
 
@@ -770,12 +773,12 @@ static int i915_pm_poweroff(struct device *dev)
 }
 
 static const struct dev_pm_ops i915_pm_ops = {
-     .suspend = i915_pm_suspend,
-     .resume = i915_pm_resume,
-     .freeze = i915_pm_freeze,
-     .thaw = i915_pm_thaw,
-     .poweroff = i915_pm_poweroff,
-     .restore = i915_pm_resume,
+	.suspend = i915_pm_suspend,
+	.resume = i915_pm_resume,
+	.freeze = i915_pm_freeze,
+	.thaw = i915_pm_thaw,
+	.poweroff = i915_pm_poweroff,
+	.restore = i915_pm_resume,
 };
 
 static struct vm_operations_struct i915_gem_vm_ops = {
@@ -895,3 +898,43 @@ module_exit(i915_exit);
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL and additional rights");
+
+/* We give fast paths for the really cool registers */
+#define NEEDS_FORCE_WAKE(dev_priv, reg) \
+	(((dev_priv)->info->gen >= 6) && \
+	((reg) < 0x40000) && \
+	((reg) != FORCEWAKE))
+
+#define __i915_read(x, y) \
+u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
+	u##x val = 0; \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		gen6_gt_force_wake_get(dev_priv); \
+		val = read##y(dev_priv->regs + reg); \
+		gen6_gt_force_wake_put(dev_priv); \
+	} else { \
+		val = read##y(dev_priv->regs + reg); \
+	} \
+	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
+	return val; \
+}
+
+__i915_read(8, b)
+__i915_read(16, w)
+__i915_read(32, l)
+__i915_read(64, q)
+#undef __i915_read
+
+#define __i915_write(x, y) \
+void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
+	trace_i915_reg_rw(true, reg, val, sizeof(val)); \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		__gen6_gt_wait_for_fifo(dev_priv); \
+	} \
+	write##y(val, dev_priv->regs + reg); \
+}
+__i915_write(8, b)
+__i915_write(16, w)
+__i915_write(32, l)
+__i915_write(64, q)
+#undef __i915_write
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7916bd97d5c1..06a37f4fd74b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -139,7 +139,6 @@ struct sdvo_device_mapping {
 	u8 slave_addr;
 	u8 dvo_wiring;
 	u8 i2c_pin;
-	u8 i2c_speed;
 	u8 ddc_pin;
 };
 
@@ -209,6 +208,8 @@ struct drm_i915_display_funcs {
 			     struct drm_display_mode *adjusted_mode,
 			     int x, int y,
 			     struct drm_framebuffer *old_fb);
+	void (*write_eld)(struct drm_connector *connector,
+			  struct drm_crtc *crtc);
 	void (*fdi_link_train)(struct drm_crtc *crtc);
 	void (*init_clock_gating)(struct drm_device *dev);
 	void (*init_pch_clock_gating)(struct drm_device *dev);
@@ -226,26 +227,26 @@ struct drm_i915_display_funcs {
 
 struct intel_device_info {
 	u8 gen;
-	u8 is_mobile : 1;
-	u8 is_i85x : 1;
-	u8 is_i915g : 1;
-	u8 is_i945gm : 1;
-	u8 is_g33 : 1;
-	u8 need_gfx_hws : 1;
-	u8 is_g4x : 1;
-	u8 is_pineview : 1;
-	u8 is_broadwater : 1;
-	u8 is_crestline : 1;
-	u8 is_ivybridge : 1;
-	u8 has_fbc : 1;
-	u8 has_pipe_cxsr : 1;
-	u8 has_hotplug : 1;
-	u8 cursor_needs_physical : 1;
-	u8 has_overlay : 1;
-	u8 overlay_needs_physical : 1;
-	u8 supports_tv : 1;
-	u8 has_bsd_ring : 1;
-	u8 has_blt_ring : 1;
+	u8 is_mobile:1;
+	u8 is_i85x:1;
+	u8 is_i915g:1;
+	u8 is_i945gm:1;
+	u8 is_g33:1;
+	u8 need_gfx_hws:1;
+	u8 is_g4x:1;
+	u8 is_pineview:1;
+	u8 is_broadwater:1;
+	u8 is_crestline:1;
+	u8 is_ivybridge:1;
+	u8 has_fbc:1;
+	u8 has_pipe_cxsr:1;
+	u8 has_hotplug:1;
+	u8 cursor_needs_physical:1;
+	u8 has_overlay:1;
+	u8 overlay_needs_physical:1;
+	u8 supports_tv:1;
+	u8 has_bsd_ring:1;
+	u8 has_blt_ring:1;
 };
 
 enum no_fbc_reason {
@@ -347,7 +348,6 @@ typedef struct drm_i915_private {
 	/* LVDS info */
 	int backlight_level;  /* restore backlight to this value */
 	bool backlight_enabled;
-	struct drm_display_mode *panel_fixed_mode;
 	struct drm_display_mode *lfp_lvds_vbt_mode; /* if any */
 	struct drm_display_mode *sdvo_lvds_vbt_mode; /* if any */
 
@@ -357,6 +357,7 @@ typedef struct drm_i915_private {
 	unsigned int lvds_vbt:1;
 	unsigned int int_crt_support:1;
 	unsigned int lvds_use_ssc:1;
+	unsigned int display_clock_mode:1;
 	int lvds_ssc_freq;
 	struct {
 		int rate;
@@ -672,10 +673,9 @@ typedef struct drm_i915_private {
 	unsigned int lvds_border_bits;
 	/* Panel fitter placement and size for Ironlake+ */
 	u32 pch_pf_pos, pch_pf_size;
-	int panel_t3, panel_t12;
 
-	struct drm_crtc *plane_to_crtc_mapping[2];
-	struct drm_crtc *pipe_to_crtc_mapping[2];
+	struct drm_crtc *plane_to_crtc_mapping[3];
+	struct drm_crtc *pipe_to_crtc_mapping[3];
 	wait_queue_head_t pending_flip_queue;
 	bool flip_pending_is_done;
 
@@ -759,19 +759,19 @@ struct drm_i915_gem_object {
 	 * (has pending rendering), and is not set if it's on inactive (ready
 	 * to be unbound).
 	 */
-	unsigned int active : 1;
+	unsigned int active:1;
 
 	/**
 	 * This is set if the object has been written to since last bound
 	 * to the GTT
 	 */
-	unsigned int dirty : 1;
+	unsigned int dirty:1;
 
 	/**
 	 * This is set if the object has been written to since the last
 	 * GPU flush.
 	 */
-	unsigned int pending_gpu_write : 1;
+	unsigned int pending_gpu_write:1;
 
 	/**
 	 * Fence register bits (if any) for this object.  Will be set
@@ -780,18 +780,18 @@ struct drm_i915_gem_object {
 	 *
 	 * Size: 4 bits for 16 fences + sign (for FENCE_REG_NONE)
 	 */
-	signed int fence_reg : 5;
+	signed int fence_reg:5;
 
 	/**
 	 * Advice: are the backing pages purgeable?
 	 */
-	unsigned int madv : 2;
+	unsigned int madv:2;
 
 	/**
 	 * Current tiling mode for the object.
 	 */
-	unsigned int tiling_mode : 2;
-	unsigned int tiling_changed : 1;
+	unsigned int tiling_mode:2;
+	unsigned int tiling_changed:1;
 
 	/** How many users have pinned this object in GTT space. The following
 	 * users can each hold at most one reference: pwrite/pread, pin_ioctl
@@ -802,22 +802,22 @@ struct drm_i915_gem_object {
 	 *
 	 * In the worst case this is 1 + 1 + 1 + 2*2 = 7. That would fit into 3
 	 * bits with absolutely no headroom. So use 4 bits. */
-	unsigned int pin_count : 4;
+	unsigned int pin_count:4;
 #define DRM_I915_GEM_OBJECT_MAX_PIN_COUNT 0xf
 
 	/**
 	 * Is the object at the current location in the gtt mappable and
 	 * fenceable? Used to avoid costly recalculations.
 	 */
-	unsigned int map_and_fenceable : 1;
+	unsigned int map_and_fenceable:1;
 
 	/**
 	 * Whether the current gtt mapping needs to be mappable (and isn't just
 	 * mappable by accident). Track pin and fault separate for a more
 	 * accurate mappable working set.
 	 */
-	unsigned int fault_mappable : 1;
-	unsigned int pin_mappable : 1;
+	unsigned int fault_mappable:1;
+	unsigned int pin_mappable:1;
 
 	/*
 	 * Is the GPU currently using a fence to access this buffer,
@@ -1056,7 +1056,7 @@ i915_enable_pipestat(drm_i915_private_t *dev_priv, int pipe, u32 mask);
 void
 i915_disable_pipestat(drm_i915_private_t *dev_priv, int pipe, u32 mask);
 
-void intel_enable_asle (struct drm_device *dev);
+void intel_enable_asle(struct drm_device *dev);
 
 #ifdef CONFIG_DEBUG_FS
 extern void i915_destroy_error_state(struct drm_device *dev);
@@ -1146,7 +1146,7 @@ int i915_gem_dumb_create(struct drm_file *file_priv,
 int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
 		      uint32_t handle, uint64_t *offset);
 int i915_gem_dumb_destroy(struct drm_file *file_priv, struct drm_device *dev,
-			  uint32_t handle);			  
+			  uint32_t handle);
 /**
  * Returns true if seq1 is later than seq2.
  */
@@ -1301,10 +1301,11 @@ extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state);
 extern bool intel_fbc_enabled(struct drm_device *dev);
 extern void intel_disable_fbc(struct drm_device *dev);
 extern bool ironlake_set_drps(struct drm_device *dev, u8 val);
+extern void ironlake_init_pch_refclk(struct drm_device *dev);
 extern void ironlake_enable_rc6(struct drm_device *dev);
 extern void gen6_set_rps(struct drm_device *dev, u8 val);
-extern void intel_detect_pch (struct drm_device *dev);
-extern int intel_trans_dp_port_sel (struct drm_crtc *crtc);
+extern void intel_detect_pch(struct drm_device *dev);
+extern int intel_trans_dp_port_sel(struct drm_crtc *crtc);
 
 /* overlay */
 #ifdef CONFIG_DEBUG_FS
@@ -1354,18 +1355,7 @@ void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
 	((reg) != FORCEWAKE))
 
 #define __i915_read(x, y) \
-static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
-	u##x val = 0; \
-	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-		gen6_gt_force_wake_get(dev_priv); \
-		val = read##y(dev_priv->regs + reg); \
-		gen6_gt_force_wake_put(dev_priv); \
-	} else { \
-		val = read##y(dev_priv->regs + reg); \
-	} \
-	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
-	return val; \
-}
+	u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg);
 
 __i915_read(8, b)
 __i915_read(16, w)
@@ -1374,13 +1364,8 @@ __i915_read(64, q)
 #undef __i915_read
 
 #define __i915_write(x, y) \
-static inline void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
-	trace_i915_reg_rw(true, reg, val, sizeof(val)); \
-	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-		__gen6_gt_wait_for_fifo(dev_priv); \
-	} \
-	write##y(val, dev_priv->regs + reg); \
-}
+	void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val);
+
 __i915_write(8, b)
 __i915_write(16, w)
 __i915_write(32, l)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a546a71fb060..6651c36b6e8a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -179,7 +179,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
 	mutex_unlock(&dev->struct_mutex);
 
 	args->aper_size = dev_priv->mm.gtt_total;
-	args->aper_available_size = args->aper_size -pinned;
+	args->aper_available_size = args->aper_size - pinned;
 
 	return 0;
 }
@@ -195,6 +195,8 @@ i915_gem_create(struct drm_file *file,
 	u32 handle;
 
 	size = roundup(size, PAGE_SIZE);
+	if (size == 0)
+		return -EINVAL;
 
 	/* Allocate the new object */
 	obj = i915_gem_alloc_object(dev, size);
@@ -800,11 +802,11 @@ i915_gem_shmem_pwrite_fast(struct drm_device *dev,
 		if (IS_ERR(page))
 			return PTR_ERR(page);
 
-		vaddr = kmap_atomic(page, KM_USER0);
+		vaddr = kmap_atomic(page);
 		ret = __copy_from_user_inatomic(vaddr + page_offset,
 						user_data,
 						page_length);
-		kunmap_atomic(vaddr, KM_USER0);
+		kunmap_atomic(vaddr);
 
 		set_page_dirty(page);
 		mark_page_accessed(page);
@@ -1265,74 +1267,6 @@ out:
 }
 
 /**
- * i915_gem_create_mmap_offset - create a fake mmap offset for an object
- * @obj: obj in question
- *
- * GEM memory mapping works by handing back to userspace a fake mmap offset
- * it can use in a subsequent mmap(2) call.  The DRM core code then looks
- * up the object based on the offset and sets up the various memory mapping
- * structures.
- *
- * This routine allocates and attaches a fake offset for @obj.
- */
-static int
-i915_gem_create_mmap_offset(struct drm_i915_gem_object *obj)
-{
-	struct drm_device *dev = obj->base.dev;
-	struct drm_gem_mm *mm = dev->mm_private;
-	struct drm_map_list *list;
-	struct drm_local_map *map;
-	int ret = 0;
-
-	/* Set the object up for mmap'ing */
-	list = &obj->base.map_list;
-	list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
-	if (!list->map)
-		return -ENOMEM;
-
-	map = list->map;
-	map->type = _DRM_GEM;
-	map->size = obj->base.size;
-	map->handle = obj;
-
-	/* Get a DRM GEM mmap offset allocated... */
-	list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
-						    obj->base.size / PAGE_SIZE,
-						    0, 0);
-	if (!list->file_offset_node) {
-		DRM_ERROR("failed to allocate offset for bo %d\n",
-			  obj->base.name);
-		ret = -ENOSPC;
-		goto out_free_list;
-	}
-
-	list->file_offset_node = drm_mm_get_block(list->file_offset_node,
-						  obj->base.size / PAGE_SIZE,
-						  0);
-	if (!list->file_offset_node) {
-		ret = -ENOMEM;
-		goto out_free_list;
-	}
-
-	list->hash.key = list->file_offset_node->start;
-	ret = drm_ht_insert_item(&mm->offset_hash, &list->hash);
-	if (ret) {
-		DRM_ERROR("failed to add to map hash\n");
-		goto out_free_mm;
-	}
-
-	return 0;
-
-out_free_mm:
-	drm_mm_put_block(list->file_offset_node);
-out_free_list:
-	kfree(list->map);
-	list->map = NULL;
-
-	return ret;
-}
-
-/**
  * i915_gem_release_mmap - remove physical page mappings
  * @obj: obj in question
  *
@@ -1360,19 +1294,6 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	obj->fault_mappable = false;
 }
 
-static void
-i915_gem_free_mmap_offset(struct drm_i915_gem_object *obj)
-{
-	struct drm_device *dev = obj->base.dev;
-	struct drm_gem_mm *mm = dev->mm_private;
-	struct drm_map_list *list = &obj->base.map_list;
-
-	drm_ht_remove_item(&mm->offset_hash, &list->hash);
-	drm_mm_put_block(list->file_offset_node);
-	kfree(list->map);
-	list->map = NULL;
-}
-
 static uint32_t
 i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
 {
@@ -1485,7 +1406,7 @@ i915_gem_mmap_gtt(struct drm_file *file,
 	}
 
 	if (!obj->base.map_list.map) {
-		ret = i915_gem_create_mmap_offset(obj);
+		ret = drm_gem_create_mmap_offset(&obj->base);
 		if (ret)
 			goto out;
 	}
@@ -1557,7 +1478,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj,
 		obj->pages[i] = page;
 	}
 
-	if (obj->tiling_mode != I915_TILING_NONE)
+	if (i915_gem_object_needs_bit17_swizzle(obj))
 		i915_gem_object_do_bit_17_swizzle(obj);
 
 	return 0;
@@ -1579,7 +1500,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
 
 	BUG_ON(obj->madv == __I915_MADV_PURGED);
 
-	if (obj->tiling_mode != I915_TILING_NONE)
+	if (i915_gem_object_needs_bit17_swizzle(obj))
 		i915_gem_object_save_bit_17_swizzle(obj);
 
 	if (obj->madv == I915_MADV_DONTNEED)
@@ -1856,7 +1777,7 @@ void i915_gem_reset(struct drm_device *dev)
 	 * lost bo to the inactive list.
 	 */
 	while (!list_empty(&dev_priv->mm.flushing_list)) {
-		obj= list_first_entry(&dev_priv->mm.flushing_list,
+		obj = list_first_entry(&dev_priv->mm.flushing_list,
 				      struct drm_i915_gem_object,
 				      mm_list);
 
@@ -1922,7 +1843,7 @@ i915_gem_retire_requests_ring(struct intel_ring_buffer *ring)
 	while (!list_empty(&ring->active_list)) {
 		struct drm_i915_gem_object *obj;
 
-		obj= list_first_entry(&ring->active_list,
+		obj = list_first_entry(&ring->active_list,
 				      struct drm_i915_gem_object,
 				      ring_list);
 
@@ -2272,14 +2193,8 @@ int
 i915_gpu_idle(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	bool lists_empty;
 	int ret, i;
 
-	lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
-		       list_empty(&dev_priv->mm.active_list));
-	if (lists_empty)
-		return 0;
-
 	/* Flush everything onto the inactive list. */
 	for (i = 0; i < I915_NUM_RINGS; i++) {
 		ret = i915_ring_idle(&dev_priv->ring[i]);
@@ -2882,7 +2797,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
 
 	fenceable =
 		obj->gtt_space->size == fence_size &&
-		(obj->gtt_space->start & (fence_alignment -1)) == 0;
+		(obj->gtt_space->start & (fence_alignment - 1)) == 0;
 
 	mappable =
 		obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end;
@@ -3598,7 +3513,7 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 			 */
 			request = kzalloc(sizeof(*request), GFP_KERNEL);
 			if (request)
-				ret = i915_add_request(obj->ring, NULL,request);
+				ret = i915_add_request(obj->ring, NULL, request);
 			else
 				ret = -ENOMEM;
 		}
@@ -3623,7 +3538,7 @@ int
 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
 {
-    return i915_gem_ring_throttle(dev, file_priv);
+	return i915_gem_ring_throttle(dev, file_priv);
 }
 
 int
@@ -3752,7 +3667,7 @@ static void i915_gem_free_object_tail(struct drm_i915_gem_object *obj)
 	trace_i915_gem_object_destroy(obj);
 
 	if (obj->base.map_list.map)
-		i915_gem_free_mmap_offset(obj);
+		drm_gem_free_mmap_offset(&obj->base);
 
 	drm_gem_object_release(&obj->base);
 	i915_gem_info_remove_obj(dev_priv, obj->base.size);
diff --git a/drivers/gpu/drm/i915/i915_gem_debug.c b/drivers/gpu/drm/i915/i915_gem_debug.c
index 8da1899bd24f..cc93cac242d6 100644
--- a/drivers/gpu/drm/i915/i915_gem_debug.c
+++ b/drivers/gpu/drm/i915/i915_gem_debug.c
@@ -72,7 +72,7 @@ i915_verify_lists(struct drm_device *dev)
 			break;
 		} else if (!obj->active ||
 			   (obj->base.write_domain & I915_GEM_GPU_DOMAINS) == 0 ||
-			   list_empty(&obj->gpu_write_list)){
+			   list_empty(&obj->gpu_write_list)) {
 			DRM_ERROR("invalid flushing %p (a %d w %x gwl %d)\n",
 				  obj,
 				  obj->active,
@@ -157,7 +157,7 @@ i915_gem_object_check_coherency(struct drm_i915_gem_object *obj, int handle)
 	for (page = 0; page < obj->size / PAGE_SIZE; page++) {
 		int i;
 
-		backing_map = kmap_atomic(obj->pages[page], KM_USER0);
+		backing_map = kmap_atomic(obj->pages[page]);
 
 		if (backing_map == NULL) {
 			DRM_ERROR("failed to map backing page\n");
@@ -181,13 +181,13 @@ i915_gem_object_check_coherency(struct drm_i915_gem_object *obj, int handle)
 				}
 			}
 		}
-		kunmap_atomic(backing_map, KM_USER0);
+		kunmap_atomic(backing_map);
 		backing_map = NULL;
 	}
 
  out:
 	if (backing_map != NULL)
-		kunmap_atomic(backing_map, KM_USER0);
+		kunmap_atomic(backing_map);
 	iounmap(gtt_mapping);
 
 	/* give syslog time to catch up */
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index da05a2692a75..ead5d00f91b0 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -122,7 +122,7 @@ i915_gem_evict_something(struct drm_device *dev, int min_size,
 			goto found;
 	}
 	list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) {
-		if (! obj->base.write_domain || obj->pin_count)
+		if (!obj->base.write_domain || obj->pin_count)
 			continue;
 
 		if (mark_free(obj, &unwind_list))
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 4934cf84c320..3693e83a97f3 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -784,7 +784,8 @@ i915_gem_execbuffer_sync_rings(struct drm_i915_gem_object *obj,
 	}
 
 	from->sync_seqno[idx] = seqno;
-	return intel_ring_sync(to, from, seqno - 1);
+
+	return to->sync_to(to, from, seqno - 1);
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7a709cd8d543..6042c5e6d278 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -49,6 +49,28 @@ static unsigned int cache_level_to_agp_type(struct drm_device *dev,
 	}
 }
 
+static bool do_idling(struct drm_i915_private *dev_priv)
+{
+	bool ret = dev_priv->mm.interruptible;
+
+	if (unlikely(dev_priv->mm.gtt->do_idle_maps)) {
+		dev_priv->mm.interruptible = false;
+		if (i915_gpu_idle(dev_priv->dev)) {
+			DRM_ERROR("Couldn't idle GPU\n");
+			/* Wait a bit, in hopes it avoids the hang */
+			udelay(10);
+		}
+	}
+
+	return ret;
+}
+
+static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
+{
+	if (unlikely(dev_priv->mm.gtt->do_idle_maps))
+		dev_priv->mm.interruptible = interruptible;
+}
+
 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -117,6 +139,12 @@ void i915_gem_gtt_rebind_object(struct drm_i915_gem_object *obj,
 
 void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
 {
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	bool interruptible;
+
+	interruptible = do_idling(dev_priv);
+
 	intel_gtt_clear_range(obj->gtt_space->start >> PAGE_SHIFT,
 			      obj->base.size >> PAGE_SHIFT);
 
@@ -124,4 +152,6 @@ void i915_gem_gtt_unbind_object(struct drm_i915_gem_object *obj)
 		intel_gtt_unmap_memory(obj->sg_list, obj->num_sg);
 		obj->sg_list = NULL;
 	}
+
+	undo_idling(dev_priv, interruptible);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 99c4faa59d8f..31d334d9d9da 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -92,7 +92,10 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 	uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 
-	if (INTEL_INFO(dev)->gen >= 5) {
+	if (INTEL_INFO(dev)->gen >= 6) {
+		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
+		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
+	} else if (IS_GEN5(dev)) {
 		/* On Ironlake whatever DRAM config, GPU always do
 		 * same swizzling setup.
 		 */
@@ -440,14 +443,9 @@ i915_gem_swizzle_page(struct page *page)
 void
 i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
 {
-	struct drm_device *dev = obj->base.dev;
-	drm_i915_private_t *dev_priv = dev->dev_private;
 	int page_count = obj->base.size >> PAGE_SHIFT;
 	int i;
 
-	if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17)
-		return;
-
 	if (obj->bit_17 == NULL)
 		return;
 
@@ -464,14 +462,9 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
 void
 i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
 {
-	struct drm_device *dev = obj->base.dev;
-	drm_i915_private_t *dev_priv = dev->dev_private;
 	int page_count = obj->base.size >> PAGE_SHIFT;
 	int i;
 
-	if (dev_priv->mm.bit_6_swizzle_x != I915_BIT_6_SWIZZLE_9_10_17)
-		return;
-
 	if (obj->bit_17 == NULL) {
 		obj->bit_17 = kmalloc(BITS_TO_LONGS(page_count) *
 					   sizeof(long), GFP_KERNEL);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 9cbb0cd8f46a..9ee2729fe5c6 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -383,6 +383,7 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	pm_iir = dev_priv->pm_iir;
 	dev_priv->pm_iir = 0;
 	pm_imr = I915_READ(GEN6_PMIMR);
+	I915_WRITE(GEN6_PMIMR, 0);
 	spin_unlock_irq(&dev_priv->rps_lock);
 
 	if (!pm_iir)
@@ -420,7 +421,6 @@ static void gen6_pm_rps_work(struct work_struct *work)
 	 * an *extremely* unlikely race with gen6_rps_enable() that is prevented
 	 * by holding struct_mutex for the duration of the write.
 	 */
-	I915_WRITE(GEN6_PMIMR, pm_imr & ~pm_iir);
 	mutex_unlock(&dev_priv->dev->struct_mutex);
 }
 
@@ -536,8 +536,9 @@ static irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS)
 		unsigned long flags;
 		spin_lock_irqsave(&dev_priv->rps_lock, flags);
 		WARN(dev_priv->pm_iir & pm_iir, "Missed a PM interrupt\n");
-		I915_WRITE(GEN6_PMIMR, pm_iir);
 		dev_priv->pm_iir |= pm_iir;
+		I915_WRITE(GEN6_PMIMR, dev_priv->pm_iir);
+		POSTING_READ(GEN6_PMIMR);
 		spin_unlock_irqrestore(&dev_priv->rps_lock, flags);
 		queue_work(dev_priv->wq, &dev_priv->rps_work);
 	}
@@ -649,8 +650,9 @@ static irqreturn_t ironlake_irq_handler(DRM_IRQ_ARGS)
 		unsigned long flags;
 		spin_lock_irqsave(&dev_priv->rps_lock, flags);
 		WARN(dev_priv->pm_iir & pm_iir, "Missed a PM interrupt\n");
-		I915_WRITE(GEN6_PMIMR, pm_iir);
 		dev_priv->pm_iir |= pm_iir;
+		I915_WRITE(GEN6_PMIMR, dev_priv->pm_iir);
+		POSTING_READ(GEN6_PMIMR);
 		spin_unlock_irqrestore(&dev_priv->rps_lock, flags);
 		queue_work(dev_priv->wq, &dev_priv->rps_work);
 	}
@@ -711,7 +713,7 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
 
 	page_count = src->base.size / PAGE_SIZE;
 
-	dst = kmalloc(sizeof(*dst) + page_count * sizeof (u32 *), GFP_ATOMIC);
+	dst = kmalloc(sizeof(*dst) + page_count * sizeof(u32 *), GFP_ATOMIC);
 	if (dst == NULL)
 		return NULL;
 
@@ -1493,7 +1495,7 @@ static int ironlake_enable_vblank(struct drm_device *dev, int pipe)
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
 	ironlake_enable_display_irq(dev_priv, (pipe == 0) ?
-				    DE_PIPEA_VBLANK: DE_PIPEB_VBLANK);
+				    DE_PIPEA_VBLANK : DE_PIPEB_VBLANK);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 
 	return 0;
@@ -1541,7 +1543,7 @@ static void ironlake_disable_vblank(struct drm_device *dev, int pipe)
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
 	ironlake_disable_display_irq(dev_priv, (pipe == 0) ?
-				     DE_PIPEA_VBLANK: DE_PIPEB_VBLANK);
+				     DE_PIPEA_VBLANK : DE_PIPEB_VBLANK);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
@@ -1777,6 +1779,26 @@ static void ironlake_irq_preinstall(struct drm_device *dev)
 	POSTING_READ(SDEIER);
 }
 
+/*
+ * Enable digital hotplug on the PCH, and configure the DP short pulse
+ * duration to 2ms (which is the minimum in the Display Port spec)
+ *
+ * This register is the same on all known PCH chips.
+ */
+
+static void ironlake_enable_pch_hotplug(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	u32	hotplug;
+
+	hotplug = I915_READ(PCH_PORT_HOTPLUG);
+	hotplug &= ~(PORTD_PULSE_DURATION_MASK|PORTC_PULSE_DURATION_MASK|PORTB_PULSE_DURATION_MASK);
+	hotplug |= PORTD_HOTPLUG_ENABLE | PORTD_PULSE_DURATION_2ms;
+	hotplug |= PORTC_HOTPLUG_ENABLE | PORTC_PULSE_DURATION_2ms;
+	hotplug |= PORTB_HOTPLUG_ENABLE | PORTB_PULSE_DURATION_2ms;
+	I915_WRITE(PCH_PORT_HOTPLUG, hotplug);
+}
+
 static int ironlake_irq_postinstall(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
@@ -1839,6 +1861,8 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
 	I915_WRITE(SDEIER, hotplug_mask);
 	POSTING_READ(SDEIER);
 
+	ironlake_enable_pch_hotplug(dev);
+
 	if (IS_IRONLAKE_M(dev)) {
 		/* Clear & enable PCU event interrupts */
 		I915_WRITE(DEIIR, DE_PCU_EVENT);
@@ -1896,6 +1920,8 @@ static int ivybridge_irq_postinstall(struct drm_device *dev)
 	I915_WRITE(SDEIER, hotplug_mask);
 	POSTING_READ(SDEIER);
 
+	ironlake_enable_pch_hotplug(dev);
+
 	return 0;
 }
 
@@ -2020,6 +2046,10 @@ static void ironlake_irq_uninstall(struct drm_device *dev)
 	I915_WRITE(GTIMR, 0xffffffff);
 	I915_WRITE(GTIER, 0x0);
 	I915_WRITE(GTIIR, I915_READ(GTIIR));
+
+	I915_WRITE(SDEIMR, 0xffffffff);
+	I915_WRITE(SDEIER, 0x0);
+	I915_WRITE(SDEIIR, I915_READ(SDEIIR));
 }
 
 static void i915_driver_irq_uninstall(struct drm_device * dev)
diff --git a/drivers/gpu/drm/i915/i915_mem.c b/drivers/gpu/drm/i915/i915_mem.c
index 83b7b81bb2b8..cc8f6d49cf20 100644
--- a/drivers/gpu/drm/i915/i915_mem.c
+++ b/drivers/gpu/drm/i915/i915_mem.c
@@ -202,7 +202,7 @@ static int init_heap(struct mem_block **heap, int start, int size)
 	blocks->next = blocks->prev = *heap;
 
 	memset(*heap, 0, sizeof(**heap));
-	(*heap)->file_priv = (struct drm_file *) - 1;
+	(*heap)->file_priv = (struct drm_file *) -1;
 	(*heap)->next = (*heap)->prev = blocks;
 	return 0;
 }
@@ -359,19 +359,19 @@ int i915_mem_init_heap(struct drm_device *dev, void *data,
 	return init_heap(heap, initheap->start, initheap->size);
 }
 
-int i915_mem_destroy_heap( struct drm_device *dev, void *data,
-			   struct drm_file *file_priv )
+int i915_mem_destroy_heap(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	drm_i915_mem_destroy_heap_t *destroyheap = data;
 	struct mem_block **heap;
 
-	if ( !dev_priv ) {
-		DRM_ERROR( "called with no initialization\n" );
+	if (!dev_priv) {
+		DRM_ERROR("called with no initialization\n");
 		return -EINVAL;
 	}
 
-	heap = get_heap( dev_priv, destroyheap->region );
+	heap = get_heap(dev_priv, destroyheap->region);
 	if (!heap) {
 		DRM_ERROR("get_heap failed");
 		return -EFAULT;
@@ -382,6 +382,6 @@ int i915_mem_destroy_heap( struct drm_device *dev, void *data,
 		return -EFAULT;
 	}
 
-	i915_mem_takedown( heap );
+	i915_mem_takedown(heap);
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 542453f7498c..5a09416e611f 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -156,7 +156,7 @@
 #define MI_SUSPEND_FLUSH	MI_INSTR(0x0b, 0)
 #define   MI_SUSPEND_FLUSH_EN	(1<<0)
 #define MI_REPORT_HEAD		MI_INSTR(0x07, 0)
-#define MI_OVERLAY_FLIP		MI_INSTR(0x11,0)
+#define MI_OVERLAY_FLIP		MI_INSTR(0x11, 0)
 #define   MI_OVERLAY_CONTINUE	(0x0<<21)
 #define   MI_OVERLAY_ON		(0x1<<21)
 #define   MI_OVERLAY_OFF	(0x2<<21)
@@ -194,6 +194,13 @@
 #define  MI_SEMAPHORE_UPDATE	    (1<<21)
 #define  MI_SEMAPHORE_COMPARE	    (1<<20)
 #define  MI_SEMAPHORE_REGISTER	    (1<<18)
+#define  MI_SEMAPHORE_SYNC_RV	    (2<<16)
+#define  MI_SEMAPHORE_SYNC_RB	    (0<<16)
+#define  MI_SEMAPHORE_SYNC_VR	    (0<<16)
+#define  MI_SEMAPHORE_SYNC_VB	    (2<<16)
+#define  MI_SEMAPHORE_SYNC_BR	    (2<<16)
+#define  MI_SEMAPHORE_SYNC_BV	    (0<<16)
+#define  MI_SEMAPHORE_SYNC_INVALID  (1<<0)
 /*
  * 3D instructions used by the kernel
  */
@@ -235,16 +242,22 @@
 #define   ASYNC_FLIP                (1<<22)
 #define   DISPLAY_PLANE_A           (0<<20)
 #define   DISPLAY_PLANE_B           (1<<20)
-#define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24)|2)
-#define   PIPE_CONTROL_QW_WRITE	(1<<14)
-#define   PIPE_CONTROL_DEPTH_STALL (1<<13)
-#define   PIPE_CONTROL_WC_FLUSH	(1<<12)
-#define   PIPE_CONTROL_IS_FLUSH	(1<<11) /* MBZ on Ironlake */
-#define   PIPE_CONTROL_TC_FLUSH (1<<10) /* GM45+ only */
-#define   PIPE_CONTROL_ISP_DIS	(1<<9)
-#define   PIPE_CONTROL_NOTIFY	(1<<8)
+#define GFX_OP_PIPE_CONTROL(len)	((0x3<<29)|(0x3<<27)|(0x2<<24)|(len-2))
+#define   PIPE_CONTROL_CS_STALL				(1<<20)
+#define   PIPE_CONTROL_QW_WRITE				(1<<14)
+#define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
+#define   PIPE_CONTROL_WRITE_FLUSH			(1<<12)
+#define   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH	(1<<12) /* gen6+ */
+#define   PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE	(1<<11) /* MBZ on Ironlake */
+#define   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE		(1<<10) /* GM45+ only */
+#define   PIPE_CONTROL_INDIRECT_STATE_DISABLE		(1<<9)
+#define   PIPE_CONTROL_NOTIFY				(1<<8)
+#define   PIPE_CONTROL_VF_CACHE_INVALIDATE		(1<<4)
+#define   PIPE_CONTROL_CONST_CACHE_INVALIDATE		(1<<3)
+#define   PIPE_CONTROL_STATE_CACHE_INVALIDATE		(1<<2)
+#define   PIPE_CONTROL_STALL_AT_SCOREBOARD		(1<<1)
+#define   PIPE_CONTROL_DEPTH_CACHE_FLUSH		(1<<0)
 #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
-#define   PIPE_CONTROL_STALL_EN	(1<<1) /* in addr word, Ironlake+ only */
 
 
 /*
@@ -296,6 +309,12 @@
 #define RING_CTL(base)		((base)+0x3c)
 #define RING_SYNC_0(base)	((base)+0x40)
 #define RING_SYNC_1(base)	((base)+0x44)
+#define GEN6_RVSYNC (RING_SYNC_0(RENDER_RING_BASE))
+#define GEN6_RBSYNC (RING_SYNC_1(RENDER_RING_BASE))
+#define GEN6_VRSYNC (RING_SYNC_1(GEN6_BSD_RING_BASE))
+#define GEN6_VBSYNC (RING_SYNC_0(GEN6_BSD_RING_BASE))
+#define GEN6_BRSYNC (RING_SYNC_0(BLT_RING_BASE))
+#define GEN6_BVSYNC (RING_SYNC_1(BLT_RING_BASE))
 #define RING_MAX_IDLE(base)	((base)+0x54)
 #define RING_HWS_PGA(base)	((base)+0x80)
 #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
@@ -470,7 +489,7 @@
 
 /* Enables non-sequential data reads through arbiter
  */
-#define   MI_ARB_DUAL_DATA_PHASE_DISABLE       	(1 << 9)
+#define   MI_ARB_DUAL_DATA_PHASE_DISABLE	(1 << 9)
 
 /* Disable FSB snooping of cacheable write cycles from binner/render
  * command stream
@@ -626,7 +645,7 @@
 
 #define ILK_DISPLAY_CHICKEN1	0x42000
 #define   ILK_FBCQ_DIS		(1<<22)
-#define   ILK_PABSTRETCH_DIS 	(1<<21)
+#define	  ILK_PABSTRETCH_DIS	(1<<21)
 
 
 /*
@@ -2358,7 +2377,7 @@
 
 #define DSPFW1			0x70034
 #define   DSPFW_SR_SHIFT	23
-#define   DSPFW_SR_MASK 	(0x1ff<<23)
+#define   DSPFW_SR_MASK		(0x1ff<<23)
 #define   DSPFW_CURSORB_SHIFT	16
 #define   DSPFW_CURSORB_MASK	(0x3f<<16)
 #define   DSPFW_PLANEB_SHIFT	8
@@ -2416,6 +2435,7 @@
 #define  WM0_PIPE_CURSOR_MASK	(0x1f)
 
 #define WM0_PIPEB_ILK		0x45104
+#define WM0_PIPEC_IVB		0x45200
 #define WM1_LP_ILK		0x45108
 #define  WM1_LP_SR_EN		(1<<31)
 #define  WM1_LP_LATENCY_SHIFT	24
@@ -2554,10 +2574,18 @@
 #define _CURBBASE		0x700c4
 #define _CURBPOS			0x700c8
 
+#define _CURBCNTR_IVB		0x71080
+#define _CURBBASE_IVB		0x71084
+#define _CURBPOS_IVB		0x71088
+
 #define CURCNTR(pipe) _PIPE(pipe, _CURACNTR, _CURBCNTR)
 #define CURBASE(pipe) _PIPE(pipe, _CURABASE, _CURBBASE)
 #define CURPOS(pipe) _PIPE(pipe, _CURAPOS, _CURBPOS)
 
+#define CURCNTR_IVB(pipe) _PIPE(pipe, _CURACNTR, _CURBCNTR_IVB)
+#define CURBASE_IVB(pipe) _PIPE(pipe, _CURABASE, _CURBBASE_IVB)
+#define CURPOS_IVB(pipe) _PIPE(pipe, _CURAPOS, _CURBPOS_IVB)
+
 /* Display A control */
 #define _DSPACNTR                0x70180
 #define   DISPLAY_PLANE_ENABLE			(1<<31)
@@ -2903,12 +2931,13 @@
 #define SDEIER  0xc400c
 
 /* digital port hotplug */
-#define PCH_PORT_HOTPLUG        0xc4030
+#define PCH_PORT_HOTPLUG        0xc4030		/* SHOTPLUG_CTL */
 #define PORTD_HOTPLUG_ENABLE            (1 << 20)
 #define PORTD_PULSE_DURATION_2ms        (0)
 #define PORTD_PULSE_DURATION_4_5ms      (1 << 18)
 #define PORTD_PULSE_DURATION_6ms        (2 << 18)
 #define PORTD_PULSE_DURATION_100ms      (3 << 18)
+#define PORTD_PULSE_DURATION_MASK	(3 << 18)
 #define PORTD_HOTPLUG_NO_DETECT         (0)
 #define PORTD_HOTPLUG_SHORT_DETECT      (1 << 16)
 #define PORTD_HOTPLUG_LONG_DETECT       (1 << 17)
@@ -2917,6 +2946,7 @@
 #define PORTC_PULSE_DURATION_4_5ms      (1 << 10)
 #define PORTC_PULSE_DURATION_6ms        (2 << 10)
 #define PORTC_PULSE_DURATION_100ms      (3 << 10)
+#define PORTC_PULSE_DURATION_MASK	(3 << 10)
 #define PORTC_HOTPLUG_NO_DETECT         (0)
 #define PORTC_HOTPLUG_SHORT_DETECT      (1 << 8)
 #define PORTC_HOTPLUG_LONG_DETECT       (1 << 9)
@@ -2925,6 +2955,7 @@
 #define PORTB_PULSE_DURATION_4_5ms      (1 << 2)
 #define PORTB_PULSE_DURATION_6ms        (2 << 2)
 #define PORTB_PULSE_DURATION_100ms      (3 << 2)
+#define PORTB_PULSE_DURATION_MASK	(3 << 2)
 #define PORTB_HOTPLUG_NO_DETECT         (0)
 #define PORTB_HOTPLUG_SHORT_DETECT      (1 << 0)
 #define PORTB_HOTPLUG_LONG_DETECT       (1 << 1)
@@ -2945,15 +2976,15 @@
 
 #define _PCH_DPLL_A              0xc6014
 #define _PCH_DPLL_B              0xc6018
-#define PCH_DPLL(pipe) _PIPE(pipe, _PCH_DPLL_A, _PCH_DPLL_B)
+#define PCH_DPLL(pipe) (pipe == 0 ?  _PCH_DPLL_A : _PCH_DPLL_B)
 
 #define _PCH_FPA0                0xc6040
 #define  FP_CB_TUNE		(0x3<<22)
 #define _PCH_FPA1                0xc6044
 #define _PCH_FPB0                0xc6048
 #define _PCH_FPB1                0xc604c
-#define PCH_FP0(pipe) _PIPE(pipe, _PCH_FPA0, _PCH_FPB0)
-#define PCH_FP1(pipe) _PIPE(pipe, _PCH_FPA1, _PCH_FPB1)
+#define PCH_FP0(pipe) (pipe == 0 ? _PCH_FPA0 : _PCH_FPB0)
+#define PCH_FP1(pipe) (pipe == 0 ? _PCH_FPA1 : _PCH_FPB1)
 
 #define PCH_DPLL_TEST           0xc606c
 
@@ -3167,6 +3198,7 @@
 #define  FDI_LINK_TRAIN_NONE_IVB            (3<<8)
 
 /* both Tx and Rx */
+#define  FDI_COMPOSITE_SYNC		(1<<11)
 #define  FDI_LINK_TRAIN_AUTO		(1<<10)
 #define  FDI_SCRAMBLING_ENABLE          (0<<7)
 #define  FDI_SCRAMBLING_DISABLE         (1<<7)
@@ -3308,15 +3340,35 @@
 #define PCH_PP_STATUS		0xc7200
 #define PCH_PP_CONTROL		0xc7204
 #define  PANEL_UNLOCK_REGS	(0xabcd << 16)
+#define  PANEL_UNLOCK_MASK	(0xffff << 16)
 #define  EDP_FORCE_VDD		(1 << 3)
 #define  EDP_BLC_ENABLE		(1 << 2)
 #define  PANEL_POWER_RESET	(1 << 1)
 #define  PANEL_POWER_OFF	(0 << 0)
 #define  PANEL_POWER_ON		(1 << 0)
 #define PCH_PP_ON_DELAYS	0xc7208
+#define  PANEL_PORT_SELECT_MASK	(3 << 30)
+#define  PANEL_PORT_SELECT_LVDS	(0 << 30)
+#define  PANEL_PORT_SELECT_DPA	(1 << 30)
 #define  EDP_PANEL		(1 << 30)
+#define  PANEL_PORT_SELECT_DPC	(2 << 30)
+#define  PANEL_PORT_SELECT_DPD	(3 << 30)
+#define  PANEL_POWER_UP_DELAY_MASK	(0x1fff0000)
+#define  PANEL_POWER_UP_DELAY_SHIFT	16
+#define  PANEL_LIGHT_ON_DELAY_MASK	(0x1fff)
+#define  PANEL_LIGHT_ON_DELAY_SHIFT	0
+
 #define PCH_PP_OFF_DELAYS	0xc720c
+#define  PANEL_POWER_DOWN_DELAY_MASK	(0x1fff0000)
+#define  PANEL_POWER_DOWN_DELAY_SHIFT	16
+#define  PANEL_LIGHT_OFF_DELAY_MASK	(0x1fff)
+#define  PANEL_LIGHT_OFF_DELAY_SHIFT	0
+
 #define PCH_PP_DIVISOR		0xc7210
+#define  PP_REFERENCE_DIVIDER_MASK	(0xffffff00)
+#define  PP_REFERENCE_DIVIDER_SHIFT	8
+#define  PANEL_POWER_CYCLE_DELAY_MASK	(0x1f)
+#define  PANEL_POWER_CYCLE_DELAY_SHIFT	0
 
 #define PCH_DP_B		0xe4100
 #define PCH_DPB_AUX_CH_CTL	0xe4110
@@ -3470,4 +3522,29 @@
 #define GEN6_PCODE_DATA				0x138128
 #define   GEN6_PCODE_FREQ_IA_RATIO_SHIFT	8
 
+#define G4X_AUD_VID_DID			0x62020
+#define INTEL_AUDIO_DEVCL		0x808629FB
+#define INTEL_AUDIO_DEVBLC		0x80862801
+#define INTEL_AUDIO_DEVCTG		0x80862802
+
+#define G4X_AUD_CNTL_ST			0x620B4
+#define G4X_ELDV_DEVCL_DEVBLC		(1 << 13)
+#define G4X_ELDV_DEVCTG			(1 << 14)
+#define G4X_ELD_ADDR			(0xf << 5)
+#define G4X_ELD_ACK			(1 << 4)
+#define G4X_HDMIW_HDMIEDID		0x6210C
+
+#define GEN5_HDMIW_HDMIEDID_A		0xE2050
+#define GEN5_AUD_CNTL_ST_A		0xE20B4
+#define GEN5_ELD_BUFFER_SIZE		(0x1f << 10)
+#define GEN5_ELD_ADDRESS		(0x1f << 5)
+#define GEN5_ELD_ACK			(1 << 4)
+#define GEN5_AUD_CNTL_ST2		0xE20C0
+#define GEN5_ELD_VALIDB			(1 << 0)
+#define GEN5_CP_READYB			(1 << 1)
+
+#define GEN7_HDMIW_HDMIEDID_A		0xE5050
+#define GEN7_AUD_CNTRL_ST_A		0xE50B4
+#define GEN7_AUD_CNTRL_ST2		0xE50C0
+
 #endif /* _I915_REG_H_ */
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index f10742359ec9..f8f602d76650 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -60,7 +60,7 @@ static void i915_save_palette(struct drm_device *dev, enum pipe pipe)
 	else
 		array = dev_priv->save_palette_b;
 
-	for(i = 0; i < 256; i++)
+	for (i = 0; i < 256; i++)
 		array[i] = I915_READ(reg + (i << 2));
 }
 
@@ -82,7 +82,7 @@ static void i915_restore_palette(struct drm_device *dev, enum pipe pipe)
 	else
 		array = dev_priv->save_palette_b;
 
-	for(i = 0; i < 256; i++)
+	for (i = 0; i < 256; i++)
 		I915_WRITE(reg + (i << 2), array[i]);
 }
 
@@ -887,10 +887,10 @@ int i915_restore_state(struct drm_device *dev)
 	mutex_lock(&dev->struct_mutex);
 
 	/* Cache mode state */
-	I915_WRITE (CACHE_MODE_0, dev_priv->saveCACHE_MODE_0 | 0xffff0000);
+	I915_WRITE(CACHE_MODE_0, dev_priv->saveCACHE_MODE_0 | 0xffff0000);
 
 	/* Memory arbitration state */
-	I915_WRITE (MI_ARB_STATE, dev_priv->saveMI_ARB_STATE | 0xffff0000);
+	I915_WRITE(MI_ARB_STATE, dev_priv->saveMI_ARB_STATE | 0xffff0000);
 
 	for (i = 0; i < 16; i++) {
 		I915_WRITE(SWF00 + (i << 2), dev_priv->saveSWF0[i]);
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index d623fefbfaca..dac7bba4d9da 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -385,29 +385,29 @@ TRACE_EVENT(i915_flip_complete,
 );
 
 TRACE_EVENT(i915_reg_rw,
-           TP_PROTO(bool write, u32 reg, u64 val, int len),
-
-           TP_ARGS(write, reg, val, len),
-
-           TP_STRUCT__entry(
-                   __field(u64, val)
-                   __field(u32, reg)
-                   __field(u16, write)
-                   __field(u16, len)
-                   ),
-
-           TP_fast_assign(
-                   __entry->val = (u64)val;
-                   __entry->reg = reg;
-                   __entry->write = write;
-                   __entry->len = len;
-                   ),
-
-           TP_printk("%s reg=0x%x, len=%d, val=(0x%x, 0x%x)",
-                     __entry->write ? "write" : "read",
-		     __entry->reg, __entry->len,
-		     (u32)(__entry->val & 0xffffffff),
-		     (u32)(__entry->val >> 32))
+	TP_PROTO(bool write, u32 reg, u64 val, int len),
+
+	TP_ARGS(write, reg, val, len),
+
+	TP_STRUCT__entry(
+		__field(u64, val)
+		__field(u32, reg)
+		__field(u16, write)
+		__field(u16, len)
+		),
+
+	TP_fast_assign(
+		__entry->val = (u64)val;
+		__entry->reg = reg;
+		__entry->write = write;
+		__entry->len = len;
+		),
+
+	TP_printk("%s reg=0x%x, len=%d, val=(0x%x, 0x%x)",
+		__entry->write ? "write" : "read",
+		__entry->reg, __entry->len,
+		(u32)(__entry->val & 0xffffffff),
+		(u32)(__entry->val >> 32))
 );
 
 #endif /* _I915_TRACE_H_ */
diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c
index 2cb8e0b9f1ee..cb912106d1a2 100644
--- a/drivers/gpu/drm/i915/intel_acpi.c
+++ b/drivers/gpu/drm/i915/intel_acpi.c
@@ -64,7 +64,7 @@ static int intel_dsm(acpi_handle handle, int func, int arg)
 
 	case ACPI_TYPE_BUFFER:
 		if (obj->buffer.length == 4) {
-			result =(obj->buffer.pointer[0] |
+			result = (obj->buffer.pointer[0] |
 				(obj->buffer.pointer[1] <<  8) |
 				(obj->buffer.pointer[2] << 16) |
 				(obj->buffer.pointer[3] << 24));
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index 61abef8a8119..63880e2e5cfd 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -1,5 +1,5 @@
 /*
- * Copyright � 2006 Intel Corporation
+ * Copyright © 2006 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -309,6 +309,13 @@ parse_general_features(struct drm_i915_private *dev_priv,
 		dev_priv->lvds_use_ssc = general->enable_ssc;
 		dev_priv->lvds_ssc_freq =
 			intel_bios_ssc_frequency(dev, general->ssc_freq);
+		dev_priv->display_clock_mode = general->display_clock_mode;
+		DRM_DEBUG_KMS("BDB_GENERAL_FEATURES int_tv_support %d int_crt_support %d lvds_use_ssc %d lvds_ssc_freq %d display_clock_mode %d\n",
+			      dev_priv->int_tv_support,
+			      dev_priv->int_crt_support,
+			      dev_priv->lvds_use_ssc,
+			      dev_priv->lvds_ssc_freq,
+			      dev_priv->display_clock_mode);
 	}
 }
 
@@ -381,7 +388,7 @@ parse_sdvo_device_mapping(struct drm_i915_private *dev_priv,
 		if (p_child->dvo_port != DEVICE_PORT_DVOB &&
 			p_child->dvo_port != DEVICE_PORT_DVOC) {
 			/* skip the incorrect SDVO port */
-			DRM_DEBUG_KMS("Incorrect SDVO port. Skip it \n");
+			DRM_DEBUG_KMS("Incorrect SDVO port. Skip it\n");
 			continue;
 		}
 		DRM_DEBUG_KMS("the SDVO device with slave addr %2x is found on"
@@ -396,15 +403,13 @@ parse_sdvo_device_mapping(struct drm_i915_private *dev_priv,
 			p_mapping->dvo_wiring = p_child->dvo_wiring;
 			p_mapping->ddc_pin = p_child->ddc_pin;
 			p_mapping->i2c_pin = p_child->i2c_pin;
-			p_mapping->i2c_speed = p_child->i2c_speed;
 			p_mapping->initialized = 1;
-			DRM_DEBUG_KMS("SDVO device: dvo=%x, addr=%x, wiring=%d, ddc_pin=%d, i2c_pin=%d, i2c_speed=%d\n",
+			DRM_DEBUG_KMS("SDVO device: dvo=%x, addr=%x, wiring=%d, ddc_pin=%d, i2c_pin=%d\n",
 				      p_mapping->dvo_port,
 				      p_mapping->slave_addr,
 				      p_mapping->dvo_wiring,
 				      p_mapping->ddc_pin,
-				      p_mapping->i2c_pin,
-				      p_mapping->i2c_speed);
+				      p_mapping->i2c_pin);
 		} else {
 			DRM_DEBUG_KMS("Maybe one SDVO port is shared by "
 					 "two SDVO device.\n");
@@ -564,7 +569,7 @@ parse_device_mapping(struct drm_i915_private *dev_priv,
 		count++;
 	}
 	if (!count) {
-		DRM_DEBUG_KMS("no child dev is parsed from VBT \n");
+		DRM_DEBUG_KMS("no child dev is parsed from VBT\n");
 		return;
 	}
 	dev_priv->child_dev = kzalloc(sizeof(*p_child) * count, GFP_KERNEL);
@@ -610,7 +615,7 @@ init_vbt_defaults(struct drm_i915_private *dev_priv)
 	/* Default to using SSC */
 	dev_priv->lvds_use_ssc = 1;
 	dev_priv->lvds_ssc_freq = intel_bios_ssc_frequency(dev, 1);
-	DRM_DEBUG("Set default to SSC at %dMHz\n", dev_priv->lvds_ssc_freq);
+	DRM_DEBUG_KMS("Set default to SSC at %dMHz\n", dev_priv->lvds_ssc_freq);
 
 	/* eDP data */
 	dev_priv->edp.bpp = 18;
@@ -639,7 +644,7 @@ intel_parse_bios(struct drm_device *dev)
 	if (dev_priv->opregion.vbt) {
 		struct vbt_header *vbt = dev_priv->opregion.vbt;
 		if (memcmp(vbt->signature, "$VBT", 4) == 0) {
-			DRM_DEBUG_DRIVER("Using VBT from OpRegion: %20s\n",
+			DRM_DEBUG_KMS("Using VBT from OpRegion: %20s\n",
 					 vbt->signature);
 			bdb = (struct bdb_header *)((char *)vbt + vbt->bdb_offset);
 		} else
diff --git a/drivers/gpu/drm/i915/intel_bios.h b/drivers/gpu/drm/i915/intel_bios.h
index 5f8e4edcbbb9..8af3735e27c6 100644
--- a/drivers/gpu/drm/i915/intel_bios.h
+++ b/drivers/gpu/drm/i915/intel_bios.h
@@ -1,5 +1,5 @@
 /*
- * Copyright � 2006 Intel Corporation
+ * Copyright © 2006 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -120,7 +120,9 @@ struct bdb_general_features {
 	u8 ssc_freq:1;
 	u8 enable_lfp_on_override:1;
 	u8 disable_ssc_ddt:1;
-	u8 rsvd8:3; /* finish byte */
+	u8 rsvd7:1;
+	u8 display_clock_mode:1;
+	u8 rsvd8:1; /* finish byte */
 
         /* bits 3 */
 	u8 disable_smooth_vision:1;
@@ -133,7 +135,10 @@ struct bdb_general_features {
         /* bits 5 */
 	u8 int_crt_support:1;
 	u8 int_tv_support:1;
-	u8 rsvd11:6; /* finish byte */
+	u8 int_efp_support:1;
+	u8 dp_ssc_enb:1;	/* PCH attached eDP supports SSC */
+	u8 dp_ssc_freq:1;	/* SSC freq for PCH attached eDP */
+	u8 rsvd11:3; /* finish byte */
 } __attribute__((packed));
 
 /* pre-915 */
@@ -197,8 +202,7 @@ struct bdb_general_features {
 struct child_device_config {
 	u16 handle;
 	u16 device_type;
-	u8  i2c_speed;
-	u8  rsvd[9];
+	u8  device_id[10]; /* ascii string */
 	u16 addin_offset;
 	u8  dvo_port; /* See Device_PORT_* above */
 	u8  i2c_pin;
@@ -240,7 +244,7 @@ struct bdb_general_definitions {
 	 * And the device num is related with the size of general definition
 	 * block. It is obtained by using the following formula:
 	 * number = (block_size - sizeof(bdb_general_definitions))/
-	 * 		sizeof(child_device_config);
+	 *	     sizeof(child_device_config);
 	 */
 	struct child_device_config devices[0];
 } __attribute__((packed));
@@ -446,11 +450,11 @@ struct bdb_driver_features {
 #define EDP_VSWING_1_2V		3
 
 struct edp_power_seq {
-	u16 t3;
-	u16 t7;
+	u16 t1_t3;
+	u16 t8;
 	u16 t9;
 	u16 t10;
-	u16 t12;
+	u16 t11_t12;
 } __attribute__ ((packed));
 
 struct edp_link_params {
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 0979d8877880..fee0ad02c6d0 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -69,7 +69,7 @@ static void intel_crt_dpms(struct drm_encoder *encoder, int mode)
 	temp &= ~(ADPA_HSYNC_CNTL_DISABLE | ADPA_VSYNC_CNTL_DISABLE);
 	temp &= ~ADPA_DAC_ENABLE;
 
-	switch(mode) {
+	switch (mode) {
 	case DRM_MODE_DPMS_ON:
 		temp |= ADPA_DAC_ENABLE;
 		break;
@@ -152,17 +152,13 @@ static void intel_crt_mode_set(struct drm_encoder *encoder,
 	if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
 		adpa |= ADPA_VSYNC_ACTIVE_HIGH;
 
-	if (intel_crtc->pipe == 0) {
-		if (HAS_PCH_CPT(dev))
-			adpa |= PORT_TRANS_A_SEL_CPT;
-		else
-			adpa |= ADPA_PIPE_A_SELECT;
-	} else {
-		if (HAS_PCH_CPT(dev))
-			adpa |= PORT_TRANS_B_SEL_CPT;
-		else
-			adpa |= ADPA_PIPE_B_SELECT;
-	}
+	/* For CPT allow 3 pipe config, for others just use A or B */
+	if (HAS_PCH_CPT(dev))
+		adpa |= PORT_TRANS_SEL_CPT(intel_crtc->pipe);
+	else if (intel_crtc->pipe == 0)
+		adpa |= ADPA_PIPE_A_SELECT;
+	else
+		adpa |= ADPA_PIPE_B_SELECT;
 
 	if (!HAS_PCH_SPLIT(dev))
 		I915_WRITE(BCLRPAT(intel_crtc->pipe), 0);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 04411ad2e779..981b1f1c04d8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -31,6 +31,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/vgaarb.h>
+#include <drm/drm_edid.h>
 #include "drmP.h"
 #include "intel_drv.h"
 #include "i915_drm.h"
@@ -42,39 +43,39 @@
 
 #define HAS_eDP (intel_pipe_has_type(crtc, INTEL_OUTPUT_EDP))
 
-bool intel_pipe_has_type (struct drm_crtc *crtc, int type);
+bool intel_pipe_has_type(struct drm_crtc *crtc, int type);
 static void intel_update_watermarks(struct drm_device *dev);
 static void intel_increase_pllclock(struct drm_crtc *crtc);
 static void intel_crtc_update_cursor(struct drm_crtc *crtc, bool on);
 
 typedef struct {
-    /* given values */
-    int n;
-    int m1, m2;
-    int p1, p2;
-    /* derived values */
-    int	dot;
-    int	vco;
-    int	m;
-    int	p;
+	/* given values */
+	int n;
+	int m1, m2;
+	int p1, p2;
+	/* derived values */
+	int	dot;
+	int	vco;
+	int	m;
+	int	p;
 } intel_clock_t;
 
 typedef struct {
-    int	min, max;
+	int	min, max;
 } intel_range_t;
 
 typedef struct {
-    int	dot_limit;
-    int	p2_slow, p2_fast;
+	int	dot_limit;
+	int	p2_slow, p2_fast;
 } intel_p2_t;
 
 #define INTEL_P2_NUM		      2
 typedef struct intel_limit intel_limit_t;
 struct intel_limit {
-    intel_range_t   dot, vco, n, m, m1, m2, p, p1;
-    intel_p2_t	    p2;
-    bool (* find_pll)(const intel_limit_t *, struct drm_crtc *,
-		      int, int, intel_clock_t *);
+	intel_range_t   dot, vco, n, m, m1, m2, p, p1;
+	intel_p2_t	    p2;
+	bool (* find_pll)(const intel_limit_t *, struct drm_crtc *,
+			int, int, intel_clock_t *);
 };
 
 /* FDI */
@@ -105,56 +106,56 @@ intel_fdi_link_freq(struct drm_device *dev)
 }
 
 static const intel_limit_t intel_limits_i8xx_dvo = {
-        .dot = { .min = 25000, .max = 350000 },
-        .vco = { .min = 930000, .max = 1400000 },
-        .n = { .min = 3, .max = 16 },
-        .m = { .min = 96, .max = 140 },
-        .m1 = { .min = 18, .max = 26 },
-        .m2 = { .min = 6, .max = 16 },
-        .p = { .min = 4, .max = 128 },
-        .p1 = { .min = 2, .max = 33 },
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 930000, .max = 1400000 },
+	.n = { .min = 3, .max = 16 },
+	.m = { .min = 96, .max = 140 },
+	.m1 = { .min = 18, .max = 26 },
+	.m2 = { .min = 6, .max = 16 },
+	.p = { .min = 4, .max = 128 },
+	.p1 = { .min = 2, .max = 33 },
 	.p2 = { .dot_limit = 165000,
 		.p2_slow = 4, .p2_fast = 2 },
 	.find_pll = intel_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_i8xx_lvds = {
-        .dot = { .min = 25000, .max = 350000 },
-        .vco = { .min = 930000, .max = 1400000 },
-        .n = { .min = 3, .max = 16 },
-        .m = { .min = 96, .max = 140 },
-        .m1 = { .min = 18, .max = 26 },
-        .m2 = { .min = 6, .max = 16 },
-        .p = { .min = 4, .max = 128 },
-        .p1 = { .min = 1, .max = 6 },
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 930000, .max = 1400000 },
+	.n = { .min = 3, .max = 16 },
+	.m = { .min = 96, .max = 140 },
+	.m1 = { .min = 18, .max = 26 },
+	.m2 = { .min = 6, .max = 16 },
+	.p = { .min = 4, .max = 128 },
+	.p1 = { .min = 1, .max = 6 },
 	.p2 = { .dot_limit = 165000,
 		.p2_slow = 14, .p2_fast = 7 },
 	.find_pll = intel_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_i9xx_sdvo = {
-        .dot = { .min = 20000, .max = 400000 },
-        .vco = { .min = 1400000, .max = 2800000 },
-        .n = { .min = 1, .max = 6 },
-        .m = { .min = 70, .max = 120 },
-        .m1 = { .min = 10, .max = 22 },
-        .m2 = { .min = 5, .max = 9 },
-        .p = { .min = 5, .max = 80 },
-        .p1 = { .min = 1, .max = 8 },
+	.dot = { .min = 20000, .max = 400000 },
+	.vco = { .min = 1400000, .max = 2800000 },
+	.n = { .min = 1, .max = 6 },
+	.m = { .min = 70, .max = 120 },
+	.m1 = { .min = 10, .max = 22 },
+	.m2 = { .min = 5, .max = 9 },
+	.p = { .min = 5, .max = 80 },
+	.p1 = { .min = 1, .max = 8 },
 	.p2 = { .dot_limit = 200000,
 		.p2_slow = 10, .p2_fast = 5 },
 	.find_pll = intel_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_i9xx_lvds = {
-        .dot = { .min = 20000, .max = 400000 },
-        .vco = { .min = 1400000, .max = 2800000 },
-        .n = { .min = 1, .max = 6 },
-        .m = { .min = 70, .max = 120 },
-        .m1 = { .min = 10, .max = 22 },
-        .m2 = { .min = 5, .max = 9 },
-        .p = { .min = 7, .max = 98 },
-        .p1 = { .min = 1, .max = 8 },
+	.dot = { .min = 20000, .max = 400000 },
+	.vco = { .min = 1400000, .max = 2800000 },
+	.n = { .min = 1, .max = 6 },
+	.m = { .min = 70, .max = 120 },
+	.m1 = { .min = 10, .max = 22 },
+	.m2 = { .min = 5, .max = 9 },
+	.p = { .min = 7, .max = 98 },
+	.p1 = { .min = 1, .max = 8 },
 	.p2 = { .dot_limit = 112000,
 		.p2_slow = 14, .p2_fast = 7 },
 	.find_pll = intel_find_best_PLL,
@@ -222,44 +223,44 @@ static const intel_limit_t intel_limits_g4x_dual_channel_lvds = {
 };
 
 static const intel_limit_t intel_limits_g4x_display_port = {
-        .dot = { .min = 161670, .max = 227000 },
-        .vco = { .min = 1750000, .max = 3500000},
-        .n = { .min = 1, .max = 2 },
-        .m = { .min = 97, .max = 108 },
-        .m1 = { .min = 0x10, .max = 0x12 },
-        .m2 = { .min = 0x05, .max = 0x06 },
-        .p = { .min = 10, .max = 20 },
-        .p1 = { .min = 1, .max = 2},
-        .p2 = { .dot_limit = 0,
+	.dot = { .min = 161670, .max = 227000 },
+	.vco = { .min = 1750000, .max = 3500000},
+	.n = { .min = 1, .max = 2 },
+	.m = { .min = 97, .max = 108 },
+	.m1 = { .min = 0x10, .max = 0x12 },
+	.m2 = { .min = 0x05, .max = 0x06 },
+	.p = { .min = 10, .max = 20 },
+	.p1 = { .min = 1, .max = 2},
+	.p2 = { .dot_limit = 0,
 		.p2_slow = 10, .p2_fast = 10 },
-        .find_pll = intel_find_pll_g4x_dp,
+	.find_pll = intel_find_pll_g4x_dp,
 };
 
 static const intel_limit_t intel_limits_pineview_sdvo = {
-        .dot = { .min = 20000, .max = 400000},
-        .vco = { .min = 1700000, .max = 3500000 },
+	.dot = { .min = 20000, .max = 400000},
+	.vco = { .min = 1700000, .max = 3500000 },
 	/* Pineview's Ncounter is a ring counter */
-        .n = { .min = 3, .max = 6 },
-        .m = { .min = 2, .max = 256 },
+	.n = { .min = 3, .max = 6 },
+	.m = { .min = 2, .max = 256 },
 	/* Pineview only has one combined m divider, which we treat as m2. */
-        .m1 = { .min = 0, .max = 0 },
-        .m2 = { .min = 0, .max = 254 },
-        .p = { .min = 5, .max = 80 },
-        .p1 = { .min = 1, .max = 8 },
+	.m1 = { .min = 0, .max = 0 },
+	.m2 = { .min = 0, .max = 254 },
+	.p = { .min = 5, .max = 80 },
+	.p1 = { .min = 1, .max = 8 },
 	.p2 = { .dot_limit = 200000,
 		.p2_slow = 10, .p2_fast = 5 },
 	.find_pll = intel_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_pineview_lvds = {
-        .dot = { .min = 20000, .max = 400000 },
-        .vco = { .min = 1700000, .max = 3500000 },
-        .n = { .min = 3, .max = 6 },
-        .m = { .min = 2, .max = 256 },
-        .m1 = { .min = 0, .max = 0 },
-        .m2 = { .min = 0, .max = 254 },
-        .p = { .min = 7, .max = 112 },
-        .p1 = { .min = 1, .max = 8 },
+	.dot = { .min = 20000, .max = 400000 },
+	.vco = { .min = 1700000, .max = 3500000 },
+	.n = { .min = 3, .max = 6 },
+	.m = { .min = 2, .max = 256 },
+	.m1 = { .min = 0, .max = 0 },
+	.m2 = { .min = 0, .max = 254 },
+	.p = { .min = 7, .max = 112 },
+	.p1 = { .min = 1, .max = 8 },
 	.p2 = { .dot_limit = 112000,
 		.p2_slow = 14, .p2_fast = 14 },
 	.find_pll = intel_find_best_PLL,
@@ -321,7 +322,7 @@ static const intel_limit_t intel_limits_ironlake_single_lvds_100m = {
 	.m1 = { .min = 12, .max = 22 },
 	.m2 = { .min = 5, .max = 9 },
 	.p = { .min = 28, .max = 112 },
-	.p1 = { .min = 2,.max = 8 },
+	.p1 = { .min = 2, .max = 8 },
 	.p2 = { .dot_limit = 225000,
 		.p2_slow = 14, .p2_fast = 14 },
 	.find_pll = intel_g4x_find_best_PLL,
@@ -335,24 +336,24 @@ static const intel_limit_t intel_limits_ironlake_dual_lvds_100m = {
 	.m1 = { .min = 12, .max = 22 },
 	.m2 = { .min = 5, .max = 9 },
 	.p = { .min = 14, .max = 42 },
-	.p1 = { .min = 2,.max = 6 },
+	.p1 = { .min = 2, .max = 6 },
 	.p2 = { .dot_limit = 225000,
 		.p2_slow = 7, .p2_fast = 7 },
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_ironlake_display_port = {
-        .dot = { .min = 25000, .max = 350000 },
-        .vco = { .min = 1760000, .max = 3510000},
-        .n = { .min = 1, .max = 2 },
-        .m = { .min = 81, .max = 90 },
-        .m1 = { .min = 12, .max = 22 },
-        .m2 = { .min = 5, .max = 9 },
-        .p = { .min = 10, .max = 20 },
-        .p1 = { .min = 1, .max = 2},
-        .p2 = { .dot_limit = 0,
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 1760000, .max = 3510000},
+	.n = { .min = 1, .max = 2 },
+	.m = { .min = 81, .max = 90 },
+	.m1 = { .min = 12, .max = 22 },
+	.m2 = { .min = 5, .max = 9 },
+	.p = { .min = 10, .max = 20 },
+	.p1 = { .min = 1, .max = 2},
+	.p2 = { .dot_limit = 0,
 		.p2_slow = 10, .p2_fast = 10 },
-        .find_pll = intel_find_pll_ironlake_dp,
+	.find_pll = intel_find_pll_ironlake_dp,
 };
 
 static const intel_limit_t *intel_ironlake_limit(struct drm_crtc *crtc,
@@ -404,7 +405,7 @@ static const intel_limit_t *intel_g4x_limit(struct drm_crtc *crtc)
 		limit = &intel_limits_g4x_hdmi;
 	} else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_SDVO)) {
 		limit = &intel_limits_g4x_sdvo;
-	} else if (intel_pipe_has_type (crtc, INTEL_OUTPUT_DISPLAYPORT)) {
+	} else if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) {
 		limit = &intel_limits_g4x_display_port;
 	} else /* The option is for other outputs */
 		limit = &intel_limits_i9xx_sdvo;
@@ -488,26 +489,26 @@ static bool intel_PLL_is_valid(struct drm_device *dev,
 			       const intel_clock_t *clock)
 {
 	if (clock->p1  < limit->p1.min  || limit->p1.max  < clock->p1)
-		INTELPllInvalid ("p1 out of range\n");
+		INTELPllInvalid("p1 out of range\n");
 	if (clock->p   < limit->p.min   || limit->p.max   < clock->p)
-		INTELPllInvalid ("p out of range\n");
+		INTELPllInvalid("p out of range\n");
 	if (clock->m2  < limit->m2.min  || limit->m2.max  < clock->m2)
-		INTELPllInvalid ("m2 out of range\n");
+		INTELPllInvalid("m2 out of range\n");
 	if (clock->m1  < limit->m1.min  || limit->m1.max  < clock->m1)
-		INTELPllInvalid ("m1 out of range\n");
+		INTELPllInvalid("m1 out of range\n");
 	if (clock->m1 <= clock->m2 && !IS_PINEVIEW(dev))
-		INTELPllInvalid ("m1 <= m2\n");
+		INTELPllInvalid("m1 <= m2\n");
 	if (clock->m   < limit->m.min   || limit->m.max   < clock->m)
-		INTELPllInvalid ("m out of range\n");
+		INTELPllInvalid("m out of range\n");
 	if (clock->n   < limit->n.min   || limit->n.max   < clock->n)
-		INTELPllInvalid ("n out of range\n");
+		INTELPllInvalid("n out of range\n");
 	if (clock->vco < limit->vco.min || limit->vco.max < clock->vco)
-		INTELPllInvalid ("vco out of range\n");
+		INTELPllInvalid("vco out of range\n");
 	/* XXX: We may need to be checking "Dot clock" depending on the multiplier,
 	 * connector, etc., rather than just a single range.
 	 */
 	if (clock->dot < limit->dot.min || limit->dot.max < clock->dot)
-		INTELPllInvalid ("dot out of range\n");
+		INTELPllInvalid("dot out of range\n");
 
 	return true;
 }
@@ -542,7 +543,7 @@ intel_find_best_PLL(const intel_limit_t *limit, struct drm_crtc *crtc,
 			clock.p2 = limit->p2.p2_fast;
 	}
 
-	memset (best_clock, 0, sizeof (*best_clock));
+	memset(best_clock, 0, sizeof(*best_clock));
 
 	for (clock.m1 = limit->m1.min; clock.m1 <= limit->m1.max;
 	     clock.m1++) {
@@ -802,6 +803,19 @@ static void assert_pch_pll(struct drm_i915_private *dev_priv,
 	u32 val;
 	bool cur_state;
 
+	if (HAS_PCH_CPT(dev_priv->dev)) {
+		u32 pch_dpll;
+
+		pch_dpll = I915_READ(PCH_DPLL_SEL);
+
+		/* Make sure the selected PLL is enabled to the transcoder */
+		WARN(!((pch_dpll >> (4 * pipe)) & 8),
+		     "transcoder %d PLL not enabled\n", pipe);
+
+		/* Convert the transcoder pipe number to a pll pipe number */
+		pipe = (pch_dpll >> (4 * pipe)) & 1;
+	}
+
 	reg = PCH_DPLL(pipe);
 	val = I915_READ(reg);
 	cur_state = !!(val & DPLL_VCO_ENABLE);
@@ -1171,6 +1185,9 @@ static void intel_enable_pch_pll(struct drm_i915_private *dev_priv,
 	int reg;
 	u32 val;
 
+	if (pipe > 1)
+		return;
+
 	/* PCH only available on ILK+ */
 	BUG_ON(dev_priv->info->gen < 5);
 
@@ -1191,6 +1208,9 @@ static void intel_disable_pch_pll(struct drm_i915_private *dev_priv,
 	int reg;
 	u32 val;
 
+	if (pipe > 1)
+		return;
+
 	/* PCH only available on ILK+ */
 	BUG_ON(dev_priv->info->gen < 5);
 
@@ -1256,7 +1276,7 @@ static void intel_disable_transcoder(struct drm_i915_private *dev_priv,
 	I915_WRITE(reg, val);
 	/* wait for PCH transcoder off, transcoder state */
 	if (wait_for((I915_READ(reg) & TRANS_STATE_ENABLE) == 0, 50))
-		DRM_ERROR("failed to disable transcoder\n");
+		DRM_ERROR("failed to disable transcoder %d\n", pipe);
 }
 
 /**
@@ -2085,6 +2105,7 @@ static int ironlake_update_plane(struct drm_crtc *crtc,
 	switch (plane) {
 	case 0:
 	case 1:
+	case 2:
 		break;
 	default:
 		DRM_ERROR("Can't update plane %d in SAREA\n", plane);
@@ -2184,6 +2205,10 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y,
 	case 0:
 	case 1:
 		break;
+	case 2:
+		if (IS_IVYBRIDGE(dev))
+			break;
+		/* fall through otherwise */
 	default:
 		DRM_ERROR("no plane for crtc\n");
 		return -EINVAL;
@@ -2440,7 +2465,7 @@ static void ironlake_fdi_link_train(struct drm_crtc *crtc)
 
 }
 
-static const int snb_b_fdi_train_param [] = {
+static const int snb_b_fdi_train_param[] = {
 	FDI_LINK_TRAIN_400MV_0DB_SNB_B,
 	FDI_LINK_TRAIN_400MV_6DB_SNB_B,
 	FDI_LINK_TRAIN_600MV_3_5DB_SNB_B,
@@ -2496,7 +2521,7 @@ static void gen6_fdi_link_train(struct drm_crtc *crtc)
 	if (HAS_PCH_CPT(dev))
 		cpt_phase_pointer_enable(dev, pipe);
 
-	for (i = 0; i < 4; i++ ) {
+	for (i = 0; i < 4; i++) {
 		reg = FDI_TX_CTL(pipe);
 		temp = I915_READ(reg);
 		temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
@@ -2545,7 +2570,7 @@ static void gen6_fdi_link_train(struct drm_crtc *crtc)
 	POSTING_READ(reg);
 	udelay(150);
 
-	for (i = 0; i < 4; i++ ) {
+	for (i = 0; i < 4; i++) {
 		reg = FDI_TX_CTL(pipe);
 		temp = I915_READ(reg);
 		temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
@@ -2600,6 +2625,7 @@ static void ivb_manual_fdi_link_train(struct drm_crtc *crtc)
 	temp |= FDI_LINK_TRAIN_PATTERN_1_IVB;
 	temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
 	temp |= FDI_LINK_TRAIN_400MV_0DB_SNB_B;
+	temp |= FDI_COMPOSITE_SYNC;
 	I915_WRITE(reg, temp | FDI_TX_ENABLE);
 
 	reg = FDI_RX_CTL(pipe);
@@ -2607,6 +2633,7 @@ static void ivb_manual_fdi_link_train(struct drm_crtc *crtc)
 	temp &= ~FDI_LINK_TRAIN_AUTO;
 	temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
 	temp |= FDI_LINK_TRAIN_PATTERN_1_CPT;
+	temp |= FDI_COMPOSITE_SYNC;
 	I915_WRITE(reg, temp | FDI_RX_ENABLE);
 
 	POSTING_READ(reg);
@@ -2615,7 +2642,7 @@ static void ivb_manual_fdi_link_train(struct drm_crtc *crtc)
 	if (HAS_PCH_CPT(dev))
 		cpt_phase_pointer_enable(dev, pipe);
 
-	for (i = 0; i < 4; i++ ) {
+	for (i = 0; i < 4; i++) {
 		reg = FDI_TX_CTL(pipe);
 		temp = I915_READ(reg);
 		temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
@@ -2657,7 +2684,7 @@ static void ivb_manual_fdi_link_train(struct drm_crtc *crtc)
 	POSTING_READ(reg);
 	udelay(150);
 
-	for (i = 0; i < 4; i++ ) {
+	for (i = 0; i < 4; i++) {
 		reg = FDI_TX_CTL(pipe);
 		temp = I915_READ(reg);
 		temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
@@ -2866,7 +2893,7 @@ static void ironlake_pch_enable(struct drm_crtc *crtc)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
-	u32 reg, temp;
+	u32 reg, temp, transc_sel;
 
 	/* For PCH output, training FDI link */
 	dev_priv->display.fdi_link_train(crtc);
@@ -2874,12 +2901,21 @@ static void ironlake_pch_enable(struct drm_crtc *crtc)
 	intel_enable_pch_pll(dev_priv, pipe);
 
 	if (HAS_PCH_CPT(dev)) {
+		transc_sel = intel_crtc->use_pll_a ? TRANSC_DPLLA_SEL :
+			TRANSC_DPLLB_SEL;
+
 		/* Be sure PCH DPLL SEL is set */
 		temp = I915_READ(PCH_DPLL_SEL);
-		if (pipe == 0 && (temp & TRANSA_DPLL_ENABLE) == 0)
+		if (pipe == 0) {
+			temp &= ~(TRANSA_DPLLB_SEL);
 			temp |= (TRANSA_DPLL_ENABLE | TRANSA_DPLLA_SEL);
-		else if (pipe == 1 && (temp & TRANSB_DPLL_ENABLE) == 0)
+		} else if (pipe == 1) {
+			temp &= ~(TRANSB_DPLLB_SEL);
 			temp |= (TRANSB_DPLL_ENABLE | TRANSB_DPLLB_SEL);
+		} else if (pipe == 2) {
+			temp &= ~(TRANSC_DPLLB_SEL);
+			temp |= (TRANSC_DPLL_ENABLE | transc_sel);
+		}
 		I915_WRITE(PCH_DPLL_SEL, temp);
 	}
 
@@ -2935,6 +2971,24 @@ static void ironlake_pch_enable(struct drm_crtc *crtc)
 	intel_enable_transcoder(dev_priv, pipe);
 }
 
+void intel_cpt_verify_modeset(struct drm_device *dev, int pipe)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int dslreg = PIPEDSL(pipe), tc2reg = TRANS_CHICKEN2(pipe);
+	u32 temp;
+
+	temp = I915_READ(dslreg);
+	udelay(500);
+	if (wait_for(I915_READ(dslreg) != temp, 5)) {
+		/* Without this, mode sets may fail silently on FDI */
+		I915_WRITE(tc2reg, TRANS_AUTOTRAIN_GEN_STALL_DIS);
+		udelay(250);
+		I915_WRITE(tc2reg, 0);
+		if (wait_for(I915_READ(dslreg) != temp, 5))
+			DRM_ERROR("mode set failed: pipe %d stuck\n", pipe);
+	}
+}
+
 static void ironlake_crtc_enable(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
@@ -3045,13 +3099,13 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc)
 		temp = I915_READ(PCH_DPLL_SEL);
 		switch (pipe) {
 		case 0:
-			temp &= ~(TRANSA_DPLL_ENABLE | TRANSA_DPLLA_SEL);
+			temp &= ~(TRANSA_DPLL_ENABLE | TRANSA_DPLLB_SEL);
 			break;
 		case 1:
 			temp &= ~(TRANSB_DPLL_ENABLE | TRANSB_DPLLB_SEL);
 			break;
 		case 2:
-			/* FIXME: manage transcoder PLLs? */
+			/* C shares PLL A or B */
 			temp &= ~(TRANSC_DPLL_ENABLE | TRANSC_DPLLB_SEL);
 			break;
 		default:
@@ -3061,7 +3115,8 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc)
 	}
 
 	/* disable PCH DPLL */
-	intel_disable_pch_pll(dev_priv, pipe);
+	if (!intel_crtc->no_pll)
+		intel_disable_pch_pll(dev_priv, pipe);
 
 	/* Switch from PCDclk to Rawclk */
 	reg = FDI_RX_CTL(pipe);
@@ -3293,18 +3348,25 @@ static void ironlake_crtc_commit(struct drm_crtc *crtc)
 	ironlake_crtc_enable(crtc);
 }
 
-void intel_encoder_prepare (struct drm_encoder *encoder)
+void intel_encoder_prepare(struct drm_encoder *encoder)
 {
 	struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
 	/* lvds has its own version of prepare see intel_lvds_prepare */
 	encoder_funcs->dpms(encoder, DRM_MODE_DPMS_OFF);
 }
 
-void intel_encoder_commit (struct drm_encoder *encoder)
+void intel_encoder_commit(struct drm_encoder *encoder)
 {
 	struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
+	struct drm_device *dev = encoder->dev;
+	struct intel_encoder *intel_encoder = to_intel_encoder(encoder);
+	struct intel_crtc *intel_crtc = to_intel_crtc(intel_encoder->base.crtc);
+
 	/* lvds has its own version of commit see intel_lvds_commit */
 	encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON);
+
+	if (HAS_PCH_CPT(dev))
+		intel_cpt_verify_modeset(dev, intel_crtc->pipe);
 }
 
 void intel_encoder_destroy(struct drm_encoder *encoder)
@@ -4478,6 +4540,20 @@ static void sandybridge_update_wm(struct drm_device *dev)
 		enabled |= 2;
 	}
 
+	/* IVB has 3 pipes */
+	if (IS_IVYBRIDGE(dev) &&
+	    g4x_compute_wm0(dev, 2,
+			    &sandybridge_display_wm_info, latency,
+			    &sandybridge_cursor_wm_info, latency,
+			    &plane_wm, &cursor_wm)) {
+		I915_WRITE(WM0_PIPEC_IVB,
+			   (plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm);
+		DRM_DEBUG_KMS("FIFO watermarks For pipe C -"
+			      " plane %d, cursor: %d\n",
+			      plane_wm, cursor_wm);
+		enabled |= 3;
+	}
+
 	/*
 	 * Calculate and update the self-refresh watermark only when one
 	 * display plane is used.
@@ -4584,7 +4660,9 @@ static void intel_update_watermarks(struct drm_device *dev)
 
 static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv)
 {
-	return dev_priv->lvds_use_ssc && i915_panel_use_ssc
+	if (i915_panel_use_ssc >= 0)
+		return i915_panel_use_ssc != 0;
+	return dev_priv->lvds_use_ssc
 		&& !(dev_priv->quirks & QUIRK_LVDS_SSC_DISABLE);
 }
 
@@ -5107,36 +5185,52 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
 	return ret;
 }
 
-static void ironlake_update_pch_refclk(struct drm_device *dev)
+/*
+ * Initialize reference clocks when the driver loads
+ */
+void ironlake_init_pch_refclk(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct drm_mode_config *mode_config = &dev->mode_config;
-	struct drm_crtc *crtc;
 	struct intel_encoder *encoder;
-	struct intel_encoder *has_edp_encoder = NULL;
 	u32 temp;
 	bool has_lvds = false;
+	bool has_cpu_edp = false;
+	bool has_pch_edp = false;
+	bool has_panel = false;
+	bool has_ck505 = false;
+	bool can_ssc = false;
 
 	/* We need to take the global config into account */
-	list_for_each_entry(crtc, &mode_config->crtc_list, head) {
-		if (!crtc->enabled)
-			continue;
-
-		list_for_each_entry(encoder, &mode_config->encoder_list,
-				    base.head) {
-			if (encoder->base.crtc != crtc)
-				continue;
-
-			switch (encoder->type) {
-			case INTEL_OUTPUT_LVDS:
-				has_lvds = true;
-			case INTEL_OUTPUT_EDP:
-				has_edp_encoder = encoder;
-				break;
-			}
+	list_for_each_entry(encoder, &mode_config->encoder_list,
+			    base.head) {
+		switch (encoder->type) {
+		case INTEL_OUTPUT_LVDS:
+			has_panel = true;
+			has_lvds = true;
+			break;
+		case INTEL_OUTPUT_EDP:
+			has_panel = true;
+			if (intel_encoder_is_pch_edp(&encoder->base))
+				has_pch_edp = true;
+			else
+				has_cpu_edp = true;
+			break;
 		}
 	}
 
+	if (HAS_PCH_IBX(dev)) {
+		has_ck505 = dev_priv->display_clock_mode;
+		can_ssc = has_ck505;
+	} else {
+		has_ck505 = false;
+		can_ssc = true;
+	}
+
+	DRM_DEBUG_KMS("has_panel %d has_lvds %d has_pch_edp %d has_cpu_edp %d has_ck505 %d\n",
+		      has_panel, has_lvds, has_pch_edp, has_cpu_edp,
+		      has_ck505);
+
 	/* Ironlake: try to setup display ref clock before DPLL
 	 * enabling. This is only under driver's control after
 	 * PCH B stepping, previous chipset stepping should be
@@ -5145,43 +5239,102 @@ static void ironlake_update_pch_refclk(struct drm_device *dev)
 	temp = I915_READ(PCH_DREF_CONTROL);
 	/* Always enable nonspread source */
 	temp &= ~DREF_NONSPREAD_SOURCE_MASK;
-	temp |= DREF_NONSPREAD_SOURCE_ENABLE;
-	temp &= ~DREF_SSC_SOURCE_MASK;
-	temp |= DREF_SSC_SOURCE_ENABLE;
-	I915_WRITE(PCH_DREF_CONTROL, temp);
 
-	POSTING_READ(PCH_DREF_CONTROL);
-	udelay(200);
+	if (has_ck505)
+		temp |= DREF_NONSPREAD_CK505_ENABLE;
+	else
+		temp |= DREF_NONSPREAD_SOURCE_ENABLE;
 
-	if (has_edp_encoder) {
-		if (intel_panel_use_ssc(dev_priv)) {
-			temp |= DREF_SSC1_ENABLE;
-			I915_WRITE(PCH_DREF_CONTROL, temp);
+	if (has_panel) {
+		temp &= ~DREF_SSC_SOURCE_MASK;
+		temp |= DREF_SSC_SOURCE_ENABLE;
 
-			POSTING_READ(PCH_DREF_CONTROL);
-			udelay(200);
+		/* SSC must be turned on before enabling the CPU output  */
+		if (intel_panel_use_ssc(dev_priv) && can_ssc) {
+			DRM_DEBUG_KMS("Using SSC on panel\n");
+			temp |= DREF_SSC1_ENABLE;
 		}
+
+		/* Get SSC going before enabling the outputs */
+		I915_WRITE(PCH_DREF_CONTROL, temp);
+		POSTING_READ(PCH_DREF_CONTROL);
+		udelay(200);
+
 		temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
 
 		/* Enable CPU source on CPU attached eDP */
-		if (!intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
-			if (intel_panel_use_ssc(dev_priv))
+		if (has_cpu_edp) {
+			if (intel_panel_use_ssc(dev_priv) && can_ssc) {
+				DRM_DEBUG_KMS("Using SSC on eDP\n");
 				temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
+			}
 			else
 				temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
-		} else {
-			/* Enable SSC on PCH eDP if needed */
-			if (intel_panel_use_ssc(dev_priv)) {
-				DRM_ERROR("enabling SSC on PCH\n");
-				temp |= DREF_SUPERSPREAD_SOURCE_ENABLE;
-			}
-		}
+		} else
+			temp |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
+
+		I915_WRITE(PCH_DREF_CONTROL, temp);
+		POSTING_READ(PCH_DREF_CONTROL);
+		udelay(200);
+	} else {
+		DRM_DEBUG_KMS("Disabling SSC entirely\n");
+
+		temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
+
+		/* Turn off CPU output */
+		temp |= DREF_CPU_SOURCE_OUTPUT_DISABLE;
+
+		I915_WRITE(PCH_DREF_CONTROL, temp);
+		POSTING_READ(PCH_DREF_CONTROL);
+		udelay(200);
+
+		/* Turn off the SSC source */
+		temp &= ~DREF_SSC_SOURCE_MASK;
+		temp |= DREF_SSC_SOURCE_DISABLE;
+
+		/* Turn off SSC1 */
+		temp &= ~ DREF_SSC1_ENABLE;
+
 		I915_WRITE(PCH_DREF_CONTROL, temp);
 		POSTING_READ(PCH_DREF_CONTROL);
 		udelay(200);
 	}
 }
 
+static int ironlake_get_refclk(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_encoder *encoder;
+	struct drm_mode_config *mode_config = &dev->mode_config;
+	struct intel_encoder *edp_encoder = NULL;
+	int num_connectors = 0;
+	bool is_lvds = false;
+
+	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
+		if (encoder->base.crtc != crtc)
+			continue;
+
+		switch (encoder->type) {
+		case INTEL_OUTPUT_LVDS:
+			is_lvds = true;
+			break;
+		case INTEL_OUTPUT_EDP:
+			edp_encoder = encoder;
+			break;
+		}
+		num_connectors++;
+	}
+
+	if (is_lvds && intel_panel_use_ssc(dev_priv) && num_connectors < 2) {
+		DRM_DEBUG_KMS("using SSC reference clock of %d MHz\n",
+			      dev_priv->lvds_ssc_freq);
+		return dev_priv->lvds_ssc_freq * 1000;
+	}
+
+	return 120000;
+}
+
 static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 				  struct drm_display_mode *mode,
 				  struct drm_display_mode *adjusted_mode,
@@ -5241,16 +5394,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 		num_connectors++;
 	}
 
-	if (is_lvds && intel_panel_use_ssc(dev_priv) && num_connectors < 2) {
-		refclk = dev_priv->lvds_ssc_freq * 1000;
-		DRM_DEBUG_KMS("using SSC reference clock of %d MHz\n",
-			      refclk / 1000);
-	} else {
-		refclk = 96000;
-		if (!has_edp_encoder ||
-		    intel_encoder_is_pch_edp(&has_edp_encoder->base))
-			refclk = 120000; /* 120Mhz refclk */
-	}
+	refclk = ironlake_get_refclk(crtc);
 
 	/*
 	 * Returns a set of divisors for the desired target clock with the given
@@ -5377,8 +5521,6 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	ironlake_compute_m_n(intel_crtc->bpp, lane, target_clock, link_bw,
 			     &m_n);
 
-	ironlake_update_pch_refclk(dev);
-
 	fp = clock.n << 16 | clock.m1 << 8 | clock.m2;
 	if (has_reduced_clock)
 		fp2 = reduced_clock.n << 16 | reduced_clock.m1 << 8 |
@@ -5450,39 +5592,32 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	/* Set up the display plane register */
 	dspcntr = DISPPLANE_GAMMA_ENABLE;
 
-	DRM_DEBUG_KMS("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B');
+	DRM_DEBUG_KMS("Mode for pipe %d:\n", pipe);
 	drm_mode_debug_printmodeline(mode);
 
 	/* PCH eDP needs FDI, but CPU eDP does not */
-	if (!has_edp_encoder || intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
-		I915_WRITE(PCH_FP0(pipe), fp);
-		I915_WRITE(PCH_DPLL(pipe), dpll & ~DPLL_VCO_ENABLE);
-
-		POSTING_READ(PCH_DPLL(pipe));
-		udelay(150);
-	}
+	if (!intel_crtc->no_pll) {
+		if (!has_edp_encoder ||
+		    intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+			I915_WRITE(PCH_FP0(pipe), fp);
+			I915_WRITE(PCH_DPLL(pipe), dpll & ~DPLL_VCO_ENABLE);
 
-	/* enable transcoder DPLL */
-	if (HAS_PCH_CPT(dev)) {
-		temp = I915_READ(PCH_DPLL_SEL);
-		switch (pipe) {
-		case 0:
-			temp |= TRANSA_DPLL_ENABLE | TRANSA_DPLLA_SEL;
-			break;
-		case 1:
-			temp |=	TRANSB_DPLL_ENABLE | TRANSB_DPLLB_SEL;
-			break;
-		case 2:
-			/* FIXME: manage transcoder PLLs? */
-			temp |= TRANSC_DPLL_ENABLE | TRANSC_DPLLB_SEL;
-			break;
-		default:
-			BUG();
+			POSTING_READ(PCH_DPLL(pipe));
+			udelay(150);
+		}
+	} else {
+		if (dpll == (I915_READ(PCH_DPLL(0)) & 0x7fffffff) &&
+		    fp == I915_READ(PCH_FP0(0))) {
+			intel_crtc->use_pll_a = true;
+			DRM_DEBUG_KMS("using pipe a dpll\n");
+		} else if (dpll == (I915_READ(PCH_DPLL(1)) & 0x7fffffff) &&
+			   fp == I915_READ(PCH_FP0(1))) {
+			intel_crtc->use_pll_a = false;
+			DRM_DEBUG_KMS("using pipe b dpll\n");
+		} else {
+			DRM_DEBUG_KMS("no matching PLL configuration for pipe 2\n");
+			return -EINVAL;
 		}
-		I915_WRITE(PCH_DPLL_SEL, temp);
-
-		POSTING_READ(PCH_DPLL_SEL);
-		udelay(150);
 	}
 
 	/* The LVDS pin pair needs to be on before the DPLLs are enabled.
@@ -5492,17 +5627,13 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	if (is_lvds) {
 		temp = I915_READ(PCH_LVDS);
 		temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP;
-		if (pipe == 1) {
-			if (HAS_PCH_CPT(dev))
-				temp |= PORT_TRANS_B_SEL_CPT;
-			else
-				temp |= LVDS_PIPEB_SELECT;
-		} else {
-			if (HAS_PCH_CPT(dev))
-				temp &= ~PORT_TRANS_SEL_MASK;
-			else
-				temp &= ~LVDS_PIPEB_SELECT;
-		}
+		if (HAS_PCH_CPT(dev))
+			temp |= PORT_TRANS_SEL_CPT(pipe);
+		else if (pipe == 1)
+			temp |= LVDS_PIPEB_SELECT;
+		else
+			temp &= ~LVDS_PIPEB_SELECT;
+
 		/* set the corresponsding LVDS_BORDER bit */
 		temp |= dev_priv->lvds_border_bits;
 		/* Set the B0-B3 data pairs corresponding to whether we're going to
@@ -5552,8 +5683,9 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 		I915_WRITE(TRANSDPLINK_N1(pipe), 0);
 	}
 
-	if (!has_edp_encoder ||
-	    intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+	if (!intel_crtc->no_pll &&
+	    (!has_edp_encoder ||
+	     intel_encoder_is_pch_edp(&has_edp_encoder->base))) {
 		I915_WRITE(PCH_DPLL(pipe), dpll);
 
 		/* Wait for the clocks to stabilize. */
@@ -5569,18 +5701,20 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
 	}
 
 	intel_crtc->lowfreq_avail = false;
-	if (is_lvds && has_reduced_clock && i915_powersave) {
-		I915_WRITE(PCH_FP1(pipe), fp2);
-		intel_crtc->lowfreq_avail = true;
-		if (HAS_PIPE_CXSR(dev)) {
-			DRM_DEBUG_KMS("enabling CxSR downclocking\n");
-			pipeconf |= PIPECONF_CXSR_DOWNCLOCK;
-		}
-	} else {
-		I915_WRITE(PCH_FP1(pipe), fp);
-		if (HAS_PIPE_CXSR(dev)) {
-			DRM_DEBUG_KMS("disabling CxSR downclocking\n");
-			pipeconf &= ~PIPECONF_CXSR_DOWNCLOCK;
+	if (!intel_crtc->no_pll) {
+		if (is_lvds && has_reduced_clock && i915_powersave) {
+			I915_WRITE(PCH_FP1(pipe), fp2);
+			intel_crtc->lowfreq_avail = true;
+			if (HAS_PIPE_CXSR(dev)) {
+				DRM_DEBUG_KMS("enabling CxSR downclocking\n");
+				pipeconf |= PIPECONF_CXSR_DOWNCLOCK;
+			}
+		} else {
+			I915_WRITE(PCH_FP1(pipe), fp);
+			if (HAS_PIPE_CXSR(dev)) {
+				DRM_DEBUG_KMS("disabling CxSR downclocking\n");
+				pipeconf &= ~PIPECONF_CXSR_DOWNCLOCK;
+			}
 		}
 	}
 
@@ -5677,6 +5811,131 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 	return ret;
 }
 
+static void g4x_write_eld(struct drm_connector *connector,
+			  struct drm_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = connector->dev->dev_private;
+	uint8_t *eld = connector->eld;
+	uint32_t eldv;
+	uint32_t len;
+	uint32_t i;
+
+	i = I915_READ(G4X_AUD_VID_DID);
+
+	if (i == INTEL_AUDIO_DEVBLC || i == INTEL_AUDIO_DEVCL)
+		eldv = G4X_ELDV_DEVCL_DEVBLC;
+	else
+		eldv = G4X_ELDV_DEVCTG;
+
+	i = I915_READ(G4X_AUD_CNTL_ST);
+	i &= ~(eldv | G4X_ELD_ADDR);
+	len = (i >> 9) & 0x1f;		/* ELD buffer size */
+	I915_WRITE(G4X_AUD_CNTL_ST, i);
+
+	if (!eld[0])
+		return;
+
+	len = min_t(uint8_t, eld[2], len);
+	DRM_DEBUG_DRIVER("ELD size %d\n", len);
+	for (i = 0; i < len; i++)
+		I915_WRITE(G4X_HDMIW_HDMIEDID, *((uint32_t *)eld + i));
+
+	i = I915_READ(G4X_AUD_CNTL_ST);
+	i |= eldv;
+	I915_WRITE(G4X_AUD_CNTL_ST, i);
+}
+
+static void ironlake_write_eld(struct drm_connector *connector,
+				     struct drm_crtc *crtc)
+{
+	struct drm_i915_private *dev_priv = connector->dev->dev_private;
+	uint8_t *eld = connector->eld;
+	uint32_t eldv;
+	uint32_t i;
+	int len;
+	int hdmiw_hdmiedid;
+	int aud_cntl_st;
+	int aud_cntrl_st2;
+
+	if (IS_IVYBRIDGE(connector->dev)) {
+		hdmiw_hdmiedid = GEN7_HDMIW_HDMIEDID_A;
+		aud_cntl_st = GEN7_AUD_CNTRL_ST_A;
+		aud_cntrl_st2 = GEN7_AUD_CNTRL_ST2;
+	} else {
+		hdmiw_hdmiedid = GEN5_HDMIW_HDMIEDID_A;
+		aud_cntl_st = GEN5_AUD_CNTL_ST_A;
+		aud_cntrl_st2 = GEN5_AUD_CNTL_ST2;
+	}
+
+	i = to_intel_crtc(crtc)->pipe;
+	hdmiw_hdmiedid += i * 0x100;
+	aud_cntl_st += i * 0x100;
+
+	DRM_DEBUG_DRIVER("ELD on pipe %c\n", pipe_name(i));
+
+	i = I915_READ(aud_cntl_st);
+	i = (i >> 29) & 0x3;		/* DIP_Port_Select, 0x1 = PortB */
+	if (!i) {
+		DRM_DEBUG_DRIVER("Audio directed to unknown port\n");
+		/* operate blindly on all ports */
+		eldv = GEN5_ELD_VALIDB;
+		eldv |= GEN5_ELD_VALIDB << 4;
+		eldv |= GEN5_ELD_VALIDB << 8;
+	} else {
+		DRM_DEBUG_DRIVER("ELD on port %c\n", 'A' + i);
+		eldv = GEN5_ELD_VALIDB << ((i - 1) * 4);
+	}
+
+	i = I915_READ(aud_cntrl_st2);
+	i &= ~eldv;
+	I915_WRITE(aud_cntrl_st2, i);
+
+	if (!eld[0])
+		return;
+
+	if (intel_pipe_has_type(crtc, INTEL_OUTPUT_DISPLAYPORT)) {
+		DRM_DEBUG_DRIVER("ELD: DisplayPort detected\n");
+		eld[5] |= (1 << 2);	/* Conn_Type, 0x1 = DisplayPort */
+	}
+
+	i = I915_READ(aud_cntl_st);
+	i &= ~GEN5_ELD_ADDRESS;
+	I915_WRITE(aud_cntl_st, i);
+
+	len = min_t(uint8_t, eld[2], 21);	/* 84 bytes of hw ELD buffer */
+	DRM_DEBUG_DRIVER("ELD size %d\n", len);
+	for (i = 0; i < len; i++)
+		I915_WRITE(hdmiw_hdmiedid, *((uint32_t *)eld + i));
+
+	i = I915_READ(aud_cntrl_st2);
+	i |= eldv;
+	I915_WRITE(aud_cntrl_st2, i);
+}
+
+void intel_write_eld(struct drm_encoder *encoder,
+		     struct drm_display_mode *mode)
+{
+	struct drm_crtc *crtc = encoder->crtc;
+	struct drm_connector *connector;
+	struct drm_device *dev = encoder->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	connector = drm_select_eld(encoder, mode);
+	if (!connector)
+		return;
+
+	DRM_DEBUG_DRIVER("ELD on [CONNECTOR:%d:%s], [ENCODER:%d:%s]\n",
+			 connector->base.id,
+			 drm_get_connector_name(connector),
+			 connector->encoder->base.id,
+			 drm_get_encoder_name(connector->encoder));
+
+	connector->eld[6] = drm_av_sync_delay(connector, mode) / 2;
+
+	if (dev_priv->display.write_eld)
+		dev_priv->display.write_eld(connector, crtc);
+}
+
 /** Loads the palette/gamma unit for the CRTC with the prepared values */
 void intel_crtc_load_lut(struct drm_crtc *crtc)
 {
@@ -5758,6 +6017,31 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base)
 	I915_WRITE(CURBASE(pipe), base);
 }
 
+static void ivb_update_cursor(struct drm_crtc *crtc, u32 base)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
+	bool visible = base != 0;
+
+	if (intel_crtc->cursor_visible != visible) {
+		uint32_t cntl = I915_READ(CURCNTR_IVB(pipe));
+		if (base) {
+			cntl &= ~CURSOR_MODE;
+			cntl |= CURSOR_MODE_64_ARGB_AX | MCURSOR_GAMMA_ENABLE;
+		} else {
+			cntl &= ~(CURSOR_MODE | MCURSOR_GAMMA_ENABLE);
+			cntl |= CURSOR_MODE_DISABLE;
+		}
+		I915_WRITE(CURCNTR_IVB(pipe), cntl);
+
+		intel_crtc->cursor_visible = visible;
+	}
+	/* and commit changes on next vblank */
+	I915_WRITE(CURBASE_IVB(pipe), base);
+}
+
 /* If no-part of the cursor is visible on the framebuffer, then the GPU may hang... */
 static void intel_crtc_update_cursor(struct drm_crtc *crtc,
 				     bool on)
@@ -5805,11 +6089,16 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc,
 	if (!visible && !intel_crtc->cursor_visible)
 		return;
 
-	I915_WRITE(CURPOS(pipe), pos);
-	if (IS_845G(dev) || IS_I865G(dev))
-		i845_update_cursor(crtc, base);
-	else
-		i9xx_update_cursor(crtc, base);
+	if (IS_IVYBRIDGE(dev)) {
+		I915_WRITE(CURPOS_IVB(pipe), pos);
+		ivb_update_cursor(crtc, base);
+	} else {
+		I915_WRITE(CURPOS(pipe), pos);
+		if (IS_845G(dev) || IS_I865G(dev))
+			i845_update_cursor(crtc, base);
+		else
+			i9xx_update_cursor(crtc, base);
+	}
 
 	if (visible)
 		intel_mark_busy(dev, to_intel_framebuffer(crtc->fb)->obj);
@@ -7071,6 +7360,8 @@ static void intel_crtc_init(struct drm_device *dev, int pipe)
 	intel_crtc->bpp = 24; /* default for pre-Ironlake */
 
 	if (HAS_PCH_SPLIT(dev)) {
+		if (pipe == 2 && IS_IVYBRIDGE(dev))
+			intel_crtc->no_pll = true;
 		intel_helper_funcs.prepare = ironlake_crtc_prepare;
 		intel_helper_funcs.commit = ironlake_crtc_commit;
 	} else {
@@ -7250,6 +7541,9 @@ static void intel_setup_outputs(struct drm_device *dev)
 
 	/* disable all the possible outputs/crtcs before entering KMS mode */
 	drm_helper_disable_unused_functions(dev);
+
+	if (HAS_PCH_SPLIT(dev))
+		ironlake_init_pch_refclk(dev);
 }
 
 static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb)
@@ -7494,6 +7788,10 @@ void gen6_disable_rps(struct drm_device *dev)
 	I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
 	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
 	I915_WRITE(GEN6_PMIER, 0);
+	/* Complete PM interrupt masking here doesn't race with the rps work
+	 * item again unmasking PM interrupts because that is using a different
+	 * register (PMIMR) to mask PM interrupts. The only risk is in leaving
+	 * stale bits in PMIIR and PMIMR which gen6_enable_rps will clean up. */
 
 	spin_lock_irq(&dev_priv->rps_lock);
 	dev_priv->pm_iir = 0;
@@ -8154,7 +8452,7 @@ static void intel_init_display(struct drm_device *dev)
 	}
 
 	/* Returns the core display clock speed */
-	if (IS_I945G(dev) || (IS_G33(dev) && ! IS_PINEVIEW_M(dev)))
+	if (IS_I945G(dev) || (IS_G33(dev) && !IS_PINEVIEW_M(dev)))
 		dev_priv->display.get_display_clock_speed =
 			i945_get_display_clock_speed;
 	else if (IS_I915G(dev))
@@ -8193,6 +8491,7 @@ static void intel_init_display(struct drm_device *dev)
 			}
 			dev_priv->display.fdi_link_train = ironlake_fdi_link_train;
 			dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
+			dev_priv->display.write_eld = ironlake_write_eld;
 		} else if (IS_GEN6(dev)) {
 			if (SNB_READ_WM0_LATENCY()) {
 				dev_priv->display.update_wm = sandybridge_update_wm;
@@ -8203,6 +8502,7 @@ static void intel_init_display(struct drm_device *dev)
 			}
 			dev_priv->display.fdi_link_train = gen6_fdi_link_train;
 			dev_priv->display.init_clock_gating = gen6_init_clock_gating;
+			dev_priv->display.write_eld = ironlake_write_eld;
 		} else if (IS_IVYBRIDGE(dev)) {
 			/* FIXME: detect B0+ stepping and use auto training */
 			dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train;
@@ -8214,7 +8514,7 @@ static void intel_init_display(struct drm_device *dev)
 				dev_priv->display.update_wm = NULL;
 			}
 			dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
-
+			dev_priv->display.write_eld = ironlake_write_eld;
 		} else
 			dev_priv->display.update_wm = NULL;
 	} else if (IS_PINEVIEW(dev)) {
@@ -8225,7 +8525,7 @@ static void intel_init_display(struct drm_device *dev)
 			DRM_INFO("failed to find known CxSR latency "
 				 "(found ddr%s fsb freq %d, mem freq %d), "
 				 "disabling CxSR\n",
-				 (dev_priv->is_ddr3 == 1) ? "3": "2",
+				 (dev_priv->is_ddr3 == 1) ? "3" : "2",
 				 dev_priv->fsb_freq, dev_priv->mem_freq);
 			/* Disable CxSR and never update its watermark again */
 			pineview_disable_cxsr(dev);
@@ -8234,6 +8534,7 @@ static void intel_init_display(struct drm_device *dev)
 			dev_priv->display.update_wm = pineview_update_wm;
 		dev_priv->display.init_clock_gating = gen3_init_clock_gating;
 	} else if (IS_G4X(dev)) {
+		dev_priv->display.write_eld = g4x_write_eld;
 		dev_priv->display.update_wm = g4x_update_wm;
 		dev_priv->display.init_clock_gating = g4x_init_clock_gating;
 	} else if (IS_GEN4(dev)) {
@@ -8294,7 +8595,7 @@ static void intel_init_display(struct drm_device *dev)
  * resume, or other times.  This quirk makes sure that's the case for
  * affected systems.
  */
-static void quirk_pipea_force (struct drm_device *dev)
+static void quirk_pipea_force(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
@@ -8322,7 +8623,7 @@ struct intel_quirk intel_quirks[] = {
 	/* HP Compaq 2730p needs pipe A force quirk (LP: #291555) */
 	{ 0x2a42, 0x103c, 0x30eb, quirk_pipea_force },
 	/* HP Mini needs pipe A force quirk (LP: #322104) */
-	{ 0x27ae,0x103c, 0x361a, quirk_pipea_force },
+	{ 0x27ae, 0x103c, 0x361a, quirk_pipea_force },
 
 	/* Thinkpad R31 needs pipe A force quirk */
 	{ 0x3577, 0x1014, 0x0505, quirk_pipea_force },
@@ -8488,6 +8789,7 @@ void intel_modeset_cleanup(struct drm_device *dev)
 	 * enqueue unpin/hotplug work. */
 	drm_irq_uninstall(dev);
 	cancel_work_sync(&dev_priv->hotplug_work);
+	cancel_work_sync(&dev_priv->rps_work);
 
 	/* flush any delayed tasks or pending work */
 	flush_scheduled_work();
@@ -8573,7 +8875,7 @@ struct intel_display_error_state {
 struct intel_display_error_state *
 intel_display_capture_error_state(struct drm_device *dev)
 {
-        drm_i915_private_t *dev_priv = dev->dev_private;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct intel_display_error_state *error;
 	int i;
 
@@ -8589,7 +8891,7 @@ intel_display_capture_error_state(struct drm_device *dev)
 		error->plane[i].control = I915_READ(DSPCNTR(i));
 		error->plane[i].stride = I915_READ(DSPSTRIDE(i));
 		error->plane[i].size = I915_READ(DSPSIZE(i));
-		error->plane[i].pos= I915_READ(DSPPOS(i));
+		error->plane[i].pos = I915_READ(DSPPOS(i));
 		error->plane[i].addr = I915_READ(DSPADDR(i));
 		if (INTEL_INFO(dev)->gen >= 4) {
 			error->plane[i].surface = I915_READ(DSPSURF(i));
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 44fef5e1c490..fc1a0832af4f 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -36,7 +36,7 @@
 #include "i915_drv.h"
 #include "drm_dp_helper.h"
 
-
+#define DP_RECEIVER_CAP_SIZE	0xf
 #define DP_LINK_STATUS_SIZE	6
 #define DP_LINK_CHECK_TIMEOUT	(10 * 1000)
 
@@ -53,12 +53,21 @@ struct intel_dp {
 	int dpms_mode;
 	uint8_t link_bw;
 	uint8_t lane_count;
-	uint8_t dpcd[8];
+	uint8_t dpcd[DP_RECEIVER_CAP_SIZE];
 	struct i2c_adapter adapter;
 	struct i2c_algo_dp_aux_data algo;
 	bool is_pch_edp;
 	uint8_t	train_set[4];
 	uint8_t link_status[DP_LINK_STATUS_SIZE];
+	int panel_power_up_delay;
+	int panel_power_down_delay;
+	int panel_power_cycle_delay;
+	int backlight_on_delay;
+	int backlight_off_delay;
+	struct drm_display_mode *panel_fixed_mode;  /* for eDP */
+	struct delayed_work panel_vdd_work;
+	bool want_panel_vdd;
+	unsigned long panel_off_jiffies;
 };
 
 /**
@@ -86,6 +95,17 @@ static bool is_pch_edp(struct intel_dp *intel_dp)
 	return intel_dp->is_pch_edp;
 }
 
+/**
+ * is_cpu_edp - is the port on the CPU and attached to an eDP panel?
+ * @intel_dp: DP struct
+ *
+ * Returns true if the given DP struct corresponds to a CPU eDP port.
+ */
+static bool is_cpu_edp(struct intel_dp *intel_dp)
+{
+	return is_edp(intel_dp) && !is_pch_edp(intel_dp);
+}
+
 static struct intel_dp *enc_to_intel_dp(struct drm_encoder *encoder)
 {
 	return container_of(encoder, struct intel_dp, base.base);
@@ -121,7 +141,7 @@ static void intel_dp_complete_link_train(struct intel_dp *intel_dp);
 static void intel_dp_link_down(struct intel_dp *intel_dp);
 
 void
-intel_edp_link_config (struct intel_encoder *intel_encoder,
+intel_edp_link_config(struct intel_encoder *intel_encoder,
 		       int *lane_num, int *link_bw)
 {
 	struct intel_dp *intel_dp = container_of(intel_encoder, struct intel_dp, base);
@@ -175,9 +195,25 @@ intel_dp_link_clock(uint8_t link_bw)
 		return 162000;
 }
 
-/* I think this is a fiction */
+/*
+ * The units on the numbers in the next two are... bizarre.  Examples will
+ * make it clearer; this one parallels an example in the eDP spec.
+ *
+ * intel_dp_max_data_rate for one lane of 2.7GHz evaluates as:
+ *
+ *     270000 * 1 * 8 / 10 == 216000
+ *
+ * The actual data capacity of that configuration is 2.16Gbit/s, so the
+ * units are decakilobits.  ->clock in a drm_display_mode is in kilohertz -
+ * or equivalently, kilopixels per second - so for 1680x1050R it'd be
+ * 119000.  At 18bpp that's 2142000 kilobits per second.
+ *
+ * Thus the strange-looking division by 10 in intel_dp_link_required, to
+ * get the result in decakilobits instead of kilobits.
+ */
+
 static int
-intel_dp_link_required(struct drm_device *dev, struct intel_dp *intel_dp, int pixel_clock)
+intel_dp_link_required(struct intel_dp *intel_dp, int pixel_clock)
 {
 	struct drm_crtc *crtc = intel_dp->base.base.crtc;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
@@ -186,7 +222,7 @@ intel_dp_link_required(struct drm_device *dev, struct intel_dp *intel_dp, int pi
 	if (intel_crtc)
 		bpp = intel_crtc->bpp;
 
-	return (pixel_clock * bpp + 7) / 8;
+	return (pixel_clock * bpp + 9) / 10;
 }
 
 static int
@@ -200,24 +236,19 @@ intel_dp_mode_valid(struct drm_connector *connector,
 		    struct drm_display_mode *mode)
 {
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
-	struct drm_device *dev = connector->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	int max_link_clock = intel_dp_link_clock(intel_dp_max_link_bw(intel_dp));
 	int max_lanes = intel_dp_max_lane_count(intel_dp);
 
-	if (is_edp(intel_dp) && dev_priv->panel_fixed_mode) {
-		if (mode->hdisplay > dev_priv->panel_fixed_mode->hdisplay)
+	if (is_edp(intel_dp) && intel_dp->panel_fixed_mode) {
+		if (mode->hdisplay > intel_dp->panel_fixed_mode->hdisplay)
 			return MODE_PANEL;
 
-		if (mode->vdisplay > dev_priv->panel_fixed_mode->vdisplay)
+		if (mode->vdisplay > intel_dp->panel_fixed_mode->vdisplay)
 			return MODE_PANEL;
 	}
 
-	/* only refuse the mode on non eDP since we have seen some weird eDP panels
-	   which are outside spec tolerances but somehow work by magic */
-	if (!is_edp(intel_dp) &&
-	    (intel_dp_link_required(connector->dev, intel_dp, mode->clock)
-	     > intel_dp_max_data_rate(max_link_clock, max_lanes)))
+	if (intel_dp_link_required(intel_dp, mode->clock)
+	    > intel_dp_max_data_rate(max_link_clock, max_lanes))
 		return MODE_CLOCK_HIGH;
 
 	if (mode->clock < 10000)
@@ -279,6 +310,38 @@ intel_hrawclk(struct drm_device *dev)
 	}
 }
 
+static bool ironlake_edp_have_panel_power(struct intel_dp *intel_dp)
+{
+	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	return (I915_READ(PCH_PP_STATUS) & PP_ON) != 0;
+}
+
+static bool ironlake_edp_have_panel_vdd(struct intel_dp *intel_dp)
+{
+	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	return (I915_READ(PCH_PP_CONTROL) & EDP_FORCE_VDD) != 0;
+}
+
+static void
+intel_dp_check_edp(struct intel_dp *intel_dp)
+{
+	struct drm_device *dev = intel_dp->base.base.dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (!is_edp(intel_dp))
+		return;
+	if (!ironlake_edp_have_panel_power(intel_dp) && !ironlake_edp_have_panel_vdd(intel_dp)) {
+		WARN(1, "eDP powered off while attempting aux channel communication.\n");
+		DRM_DEBUG_KMS("Status 0x%08x Control 0x%08x\n",
+			      I915_READ(PCH_PP_STATUS),
+			      I915_READ(PCH_PP_CONTROL));
+	}
+}
+
 static int
 intel_dp_aux_ch(struct intel_dp *intel_dp,
 		uint8_t *send, int send_bytes,
@@ -295,6 +358,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
 	uint32_t aux_clock_divider;
 	int try, precharge;
 
+	intel_dp_check_edp(intel_dp);
 	/* The clock divider is based off the hrawclk,
 	 * and would like to run at 2MHz. So, take the
 	 * hrawclk value and divide by 2 and use that
@@ -302,7 +366,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
 	 * Note that PCH attached eDP panels should use a 125MHz input
 	 * clock divider.
 	 */
-	if (is_edp(intel_dp) && !is_pch_edp(intel_dp)) {
+	if (is_cpu_edp(intel_dp)) {
 		if (IS_GEN6(dev))
 			aux_clock_divider = 200; /* SNB eDP input clock at 400Mhz */
 		else
@@ -337,7 +401,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
 		for (i = 0; i < send_bytes; i += 4)
 			I915_WRITE(ch_data + i,
 				   pack_aux(send + i, send_bytes - i));
-	
+
 		/* Send the command and wait for it to complete */
 		I915_WRITE(ch_ctl,
 			   DP_AUX_CH_CTL_SEND_BUSY |
@@ -354,7 +418,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
 				break;
 			udelay(100);
 		}
-	
+
 		/* Clear done status and any errors */
 		I915_WRITE(ch_ctl,
 			   status |
@@ -390,7 +454,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
 		      DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT);
 	if (recv_bytes > recv_size)
 		recv_bytes = recv_size;
-	
+
 	for (i = 0; i < recv_bytes; i += 4)
 		unpack_aux(I915_READ(ch_data + i),
 			   recv + i, recv_bytes - i);
@@ -408,6 +472,7 @@ intel_dp_aux_native_write(struct intel_dp *intel_dp,
 	int msg_bytes;
 	uint8_t	ack;
 
+	intel_dp_check_edp(intel_dp);
 	if (send_bytes > 16)
 		return -1;
 	msg[0] = AUX_NATIVE_WRITE << 4;
@@ -450,6 +515,7 @@ intel_dp_aux_native_read(struct intel_dp *intel_dp,
 	uint8_t ack;
 	int ret;
 
+	intel_dp_check_edp(intel_dp);
 	msg[0] = AUX_NATIVE_READ << 4;
 	msg[1] = address >> 8;
 	msg[2] = address & 0xff;
@@ -493,6 +559,7 @@ intel_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
 	int reply_bytes;
 	int ret;
 
+	intel_dp_check_edp(intel_dp);
 	/* Set up the command byte */
 	if (mode & MODE_I2C_READ)
 		msg[0] = AUX_I2C_READ << 4;
@@ -573,24 +640,32 @@ intel_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
 	return -EREMOTEIO;
 }
 
+static void ironlake_edp_panel_vdd_on(struct intel_dp *intel_dp);
+static void ironlake_edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync);
+
 static int
 intel_dp_i2c_init(struct intel_dp *intel_dp,
 		  struct intel_connector *intel_connector, const char *name)
 {
+	int	ret;
+
 	DRM_DEBUG_KMS("i2c_init %s\n", name);
 	intel_dp->algo.running = false;
 	intel_dp->algo.address = 0;
 	intel_dp->algo.aux_ch = intel_dp_i2c_aux_ch;
 
-	memset(&intel_dp->adapter, '\0', sizeof (intel_dp->adapter));
+	memset(&intel_dp->adapter, '\0', sizeof(intel_dp->adapter));
 	intel_dp->adapter.owner = THIS_MODULE;
 	intel_dp->adapter.class = I2C_CLASS_DDC;
-	strncpy (intel_dp->adapter.name, name, sizeof(intel_dp->adapter.name) - 1);
+	strncpy(intel_dp->adapter.name, name, sizeof(intel_dp->adapter.name) - 1);
 	intel_dp->adapter.name[sizeof(intel_dp->adapter.name) - 1] = '\0';
 	intel_dp->adapter.algo_data = &intel_dp->algo;
 	intel_dp->adapter.dev.parent = &intel_connector->base.kdev;
 
-	return i2c_dp_aux_add_bus(&intel_dp->adapter);
+	ironlake_edp_panel_vdd_on(intel_dp);
+	ret = i2c_dp_aux_add_bus(&intel_dp->adapter);
+	ironlake_edp_panel_vdd_off(intel_dp, false);
+	return ret;
 }
 
 static bool
@@ -598,29 +673,28 @@ intel_dp_mode_fixup(struct drm_encoder *encoder, struct drm_display_mode *mode,
 		    struct drm_display_mode *adjusted_mode)
 {
 	struct drm_device *dev = encoder->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	int lane_count, clock;
 	int max_lane_count = intel_dp_max_lane_count(intel_dp);
 	int max_clock = intel_dp_max_link_bw(intel_dp) == DP_LINK_BW_2_7 ? 1 : 0;
 	static int bws[2] = { DP_LINK_BW_1_62, DP_LINK_BW_2_7 };
 
-	if (is_edp(intel_dp) && dev_priv->panel_fixed_mode) {
-		intel_fixed_panel_mode(dev_priv->panel_fixed_mode, adjusted_mode);
+	if (is_edp(intel_dp) && intel_dp->panel_fixed_mode) {
+		intel_fixed_panel_mode(intel_dp->panel_fixed_mode, adjusted_mode);
 		intel_pch_panel_fitting(dev, DRM_MODE_SCALE_FULLSCREEN,
 					mode, adjusted_mode);
 		/*
 		 * the mode->clock is used to calculate the Data&Link M/N
 		 * of the pipe. For the eDP the fixed clock should be used.
 		 */
-		mode->clock = dev_priv->panel_fixed_mode->clock;
+		mode->clock = intel_dp->panel_fixed_mode->clock;
 	}
 
 	for (lane_count = 1; lane_count <= max_lane_count; lane_count <<= 1) {
 		for (clock = 0; clock <= max_clock; clock++) {
 			int link_avail = intel_dp_max_data_rate(intel_dp_link_clock(bws[clock]), lane_count);
 
-			if (intel_dp_link_required(encoder->dev, intel_dp, mode->clock)
+			if (intel_dp_link_required(intel_dp, mode->clock)
 					<= link_avail) {
 				intel_dp->link_bw = bws[clock];
 				intel_dp->lane_count = lane_count;
@@ -634,19 +708,6 @@ intel_dp_mode_fixup(struct drm_encoder *encoder, struct drm_display_mode *mode,
 		}
 	}
 
-	if (is_edp(intel_dp)) {
-		/* okay we failed just pick the highest */
-		intel_dp->lane_count = max_lane_count;
-		intel_dp->link_bw = bws[max_clock];
-		adjusted_mode->clock = intel_dp_link_clock(intel_dp->link_bw);
-		DRM_DEBUG_KMS("Force picking display port link bw %02x lane "
-			      "count %d clock %d\n",
-			      intel_dp->link_bw, intel_dp->lane_count,
-			      adjusted_mode->clock);
-
-		return true;
-	}
-
 	return false;
 }
 
@@ -740,6 +801,9 @@ intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode,
 	}
 }
 
+static void ironlake_edp_pll_on(struct drm_encoder *encoder);
+static void ironlake_edp_pll_off(struct drm_encoder *encoder);
+
 static void
 intel_dp_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 		  struct drm_display_mode *adjusted_mode)
@@ -749,6 +813,14 @@ intel_dp_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 	struct drm_crtc *crtc = intel_dp->base.base.crtc;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 
+	/* Turn on the eDP PLL if needed */
+	if (is_edp(intel_dp)) {
+		if (!is_pch_edp(intel_dp))
+			ironlake_edp_pll_on(encoder);
+		else
+			ironlake_edp_pll_off(encoder);
+	}
+
 	intel_dp->DP = DP_VOLTAGE_0_4 | DP_PRE_EMPHASIS_0;
 	intel_dp->DP |= intel_dp->color_range;
 
@@ -757,7 +829,7 @@ intel_dp_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 	if (adjusted_mode->flags & DRM_MODE_FLAG_PVSYNC)
 		intel_dp->DP |= DP_SYNC_VS_HIGH;
 
-	if (HAS_PCH_CPT(dev) && !is_edp(intel_dp))
+	if (HAS_PCH_CPT(dev) && !is_cpu_edp(intel_dp))
 		intel_dp->DP |= DP_LINK_TRAIN_OFF_CPT;
 	else
 		intel_dp->DP |= DP_LINK_TRAIN_OFF;
@@ -773,8 +845,12 @@ intel_dp_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 		intel_dp->DP |= DP_PORT_WIDTH_4;
 		break;
 	}
-	if (intel_dp->has_audio)
+	if (intel_dp->has_audio) {
+		DRM_DEBUG_DRIVER("Enabling DP audio on pipe %c\n",
+				 pipe_name(intel_crtc->pipe));
 		intel_dp->DP |= DP_AUDIO_OUTPUT_ENABLE;
+		intel_write_eld(encoder, adjusted_mode);
+	}
 
 	memset(intel_dp->link_configuration, 0, DP_LINK_CONFIGURATION_SIZE);
 	intel_dp->link_configuration[0] = intel_dp->link_bw;
@@ -794,7 +870,7 @@ intel_dp_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 	if (intel_crtc->pipe == 1 && !HAS_PCH_CPT(dev))
 		intel_dp->DP |= DP_PIPEB_SELECT;
 
-	if (is_edp(intel_dp) && !is_pch_edp(intel_dp)) {
+	if (is_cpu_edp(intel_dp)) {
 		/* don't miss out required setting for eDP */
 		intel_dp->DP |= DP_PLL_ENABLE;
 		if (adjusted_mode->clock < 200000)
@@ -804,58 +880,150 @@ intel_dp_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 	}
 }
 
+static void ironlake_wait_panel_off(struct intel_dp *intel_dp)
+{
+	unsigned long	off_time;
+	unsigned long	delay;
+
+	DRM_DEBUG_KMS("Wait for panel power off time\n");
+
+	if (ironlake_edp_have_panel_power(intel_dp) ||
+	    ironlake_edp_have_panel_vdd(intel_dp))
+	{
+		DRM_DEBUG_KMS("Panel still on, no delay needed\n");
+		return;
+	}
+
+	off_time = intel_dp->panel_off_jiffies + msecs_to_jiffies(intel_dp->panel_power_down_delay);
+	if (time_after(jiffies, off_time)) {
+		DRM_DEBUG_KMS("Time already passed");
+		return;
+	}
+	delay = jiffies_to_msecs(off_time - jiffies);
+	if (delay > intel_dp->panel_power_down_delay)
+		delay = intel_dp->panel_power_down_delay;
+	DRM_DEBUG_KMS("Waiting an additional %ld ms\n", delay);
+	msleep(delay);
+}
+
 static void ironlake_edp_panel_vdd_on(struct intel_dp *intel_dp)
 {
 	struct drm_device *dev = intel_dp->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp;
 
-	/*
-	 * If the panel wasn't on, make sure there's not a currently
-	 * active PP sequence before enabling AUX VDD.
-	 */
-	if (!(I915_READ(PCH_PP_STATUS) & PP_ON))
-		msleep(dev_priv->panel_t3);
+	if (!is_edp(intel_dp))
+		return;
+	DRM_DEBUG_KMS("Turn eDP VDD on\n");
+
+	WARN(intel_dp->want_panel_vdd,
+	     "eDP VDD already requested on\n");
+
+	intel_dp->want_panel_vdd = true;
+	if (ironlake_edp_have_panel_vdd(intel_dp)) {
+		DRM_DEBUG_KMS("eDP VDD already on\n");
+		return;
+	}
 
+	ironlake_wait_panel_off(intel_dp);
 	pp = I915_READ(PCH_PP_CONTROL);
+	pp &= ~PANEL_UNLOCK_MASK;
+	pp |= PANEL_UNLOCK_REGS;
 	pp |= EDP_FORCE_VDD;
 	I915_WRITE(PCH_PP_CONTROL, pp);
 	POSTING_READ(PCH_PP_CONTROL);
+	DRM_DEBUG_KMS("PCH_PP_STATUS: 0x%08x PCH_PP_CONTROL: 0x%08x\n",
+		      I915_READ(PCH_PP_STATUS), I915_READ(PCH_PP_CONTROL));
+
+	/*
+	 * If the panel wasn't on, delay before accessing aux channel
+	 */
+	if (!ironlake_edp_have_panel_power(intel_dp)) {
+		DRM_DEBUG_KMS("eDP was not running\n");
+		msleep(intel_dp->panel_power_up_delay);
+	}
 }
 
-static void ironlake_edp_panel_vdd_off(struct intel_dp *intel_dp)
+static void ironlake_panel_vdd_off_sync(struct intel_dp *intel_dp)
 {
 	struct drm_device *dev = intel_dp->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp;
 
-	pp = I915_READ(PCH_PP_CONTROL);
-	pp &= ~EDP_FORCE_VDD;
-	I915_WRITE(PCH_PP_CONTROL, pp);
-	POSTING_READ(PCH_PP_CONTROL);
+	if (!intel_dp->want_panel_vdd && ironlake_edp_have_panel_vdd(intel_dp)) {
+		pp = I915_READ(PCH_PP_CONTROL);
+		pp &= ~PANEL_UNLOCK_MASK;
+		pp |= PANEL_UNLOCK_REGS;
+		pp &= ~EDP_FORCE_VDD;
+		I915_WRITE(PCH_PP_CONTROL, pp);
+		POSTING_READ(PCH_PP_CONTROL);
+
+		/* Make sure sequencer is idle before allowing subsequent activity */
+		DRM_DEBUG_KMS("PCH_PP_STATUS: 0x%08x PCH_PP_CONTROL: 0x%08x\n",
+			      I915_READ(PCH_PP_STATUS), I915_READ(PCH_PP_CONTROL));
+		intel_dp->panel_off_jiffies = jiffies;
+	}
+}
+
+static void ironlake_panel_vdd_work(struct work_struct *__work)
+{
+	struct intel_dp *intel_dp = container_of(to_delayed_work(__work),
+						 struct intel_dp, panel_vdd_work);
+	struct drm_device *dev = intel_dp->base.base.dev;
 
-	/* Make sure sequencer is idle before allowing subsequent activity */
-	msleep(dev_priv->panel_t12);
+	mutex_lock(&dev->struct_mutex);
+	ironlake_panel_vdd_off_sync(intel_dp);
+	mutex_unlock(&dev->struct_mutex);
+}
+
+static void ironlake_edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync)
+{
+	if (!is_edp(intel_dp))
+		return;
+
+	DRM_DEBUG_KMS("Turn eDP VDD off %d\n", intel_dp->want_panel_vdd);
+	WARN(!intel_dp->want_panel_vdd, "eDP VDD not forced on");
+	
+	intel_dp->want_panel_vdd = false;
+
+	if (sync) {
+		ironlake_panel_vdd_off_sync(intel_dp);
+	} else {
+		/*
+		 * Queue the timer to fire a long
+		 * time from now (relative to the power down delay)
+		 * to keep the panel power up across a sequence of operations
+		 */
+		schedule_delayed_work(&intel_dp->panel_vdd_work,
+				      msecs_to_jiffies(intel_dp->panel_power_cycle_delay * 5));
+	}
 }
 
 /* Returns true if the panel was already on when called */
-static bool ironlake_edp_panel_on (struct intel_dp *intel_dp)
+static void ironlake_edp_panel_on(struct intel_dp *intel_dp)
 {
 	struct drm_device *dev = intel_dp->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp, idle_on_mask = PP_ON | PP_SEQUENCE_STATE_ON_IDLE;
 
-	if (I915_READ(PCH_PP_STATUS) & PP_ON)
-		return true;
+	if (!is_edp(intel_dp))
+		return;
+	if (ironlake_edp_have_panel_power(intel_dp))
+		return;
 
+	ironlake_wait_panel_off(intel_dp);
 	pp = I915_READ(PCH_PP_CONTROL);
+	pp &= ~PANEL_UNLOCK_MASK;
+	pp |= PANEL_UNLOCK_REGS;
+
+	if (IS_GEN5(dev)) {
+		/* ILK workaround: disable reset around power sequence */
+		pp &= ~PANEL_POWER_RESET;
+		I915_WRITE(PCH_PP_CONTROL, pp);
+		POSTING_READ(PCH_PP_CONTROL);
+	}
 
-	/* ILK workaround: disable reset around power sequence */
-	pp &= ~PANEL_POWER_RESET;
-	I915_WRITE(PCH_PP_CONTROL, pp);
-	POSTING_READ(PCH_PP_CONTROL);
-
-	pp |= PANEL_UNLOCK_REGS | POWER_TARGET_ON;
+	pp |= POWER_TARGET_ON;
 	I915_WRITE(PCH_PP_CONTROL, pp);
 	POSTING_READ(PCH_PP_CONTROL);
 
@@ -864,44 +1032,64 @@ static bool ironlake_edp_panel_on (struct intel_dp *intel_dp)
 		DRM_ERROR("panel on wait timed out: 0x%08x\n",
 			  I915_READ(PCH_PP_STATUS));
 
-	pp |= PANEL_POWER_RESET; /* restore panel reset bit */
-	I915_WRITE(PCH_PP_CONTROL, pp);
-	POSTING_READ(PCH_PP_CONTROL);
-
-	return false;
+	if (IS_GEN5(dev)) {
+		pp |= PANEL_POWER_RESET; /* restore panel reset bit */
+		I915_WRITE(PCH_PP_CONTROL, pp);
+		POSTING_READ(PCH_PP_CONTROL);
+	}
 }
 
-static void ironlake_edp_panel_off (struct drm_device *dev)
+static void ironlake_edp_panel_off(struct drm_encoder *encoder)
 {
+	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
+	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp, idle_off_mask = PP_ON | PP_SEQUENCE_MASK |
 		PP_CYCLE_DELAY_ACTIVE | PP_SEQUENCE_STATE_MASK;
 
+	if (!is_edp(intel_dp))
+		return;
 	pp = I915_READ(PCH_PP_CONTROL);
+	pp &= ~PANEL_UNLOCK_MASK;
+	pp |= PANEL_UNLOCK_REGS;
+
+	if (IS_GEN5(dev)) {
+		/* ILK workaround: disable reset around power sequence */
+		pp &= ~PANEL_POWER_RESET;
+		I915_WRITE(PCH_PP_CONTROL, pp);
+		POSTING_READ(PCH_PP_CONTROL);
+	}
 
-	/* ILK workaround: disable reset around power sequence */
-	pp &= ~PANEL_POWER_RESET;
-	I915_WRITE(PCH_PP_CONTROL, pp);
-	POSTING_READ(PCH_PP_CONTROL);
+	intel_dp->panel_off_jiffies = jiffies;
 
-	pp &= ~POWER_TARGET_ON;
-	I915_WRITE(PCH_PP_CONTROL, pp);
-	POSTING_READ(PCH_PP_CONTROL);
+	if (IS_GEN5(dev)) {
+		pp &= ~POWER_TARGET_ON;
+		I915_WRITE(PCH_PP_CONTROL, pp);
+		POSTING_READ(PCH_PP_CONTROL);
+		pp &= ~POWER_TARGET_ON;
+		I915_WRITE(PCH_PP_CONTROL, pp);
+		POSTING_READ(PCH_PP_CONTROL);
+		msleep(intel_dp->panel_power_cycle_delay);
 
-	if (wait_for((I915_READ(PCH_PP_STATUS) & idle_off_mask) == 0, 5000))
-		DRM_ERROR("panel off wait timed out: 0x%08x\n",
-			  I915_READ(PCH_PP_STATUS));
+		if (wait_for((I915_READ(PCH_PP_STATUS) & idle_off_mask) == 0, 5000))
+			DRM_ERROR("panel off wait timed out: 0x%08x\n",
+				  I915_READ(PCH_PP_STATUS));
 
-	pp |= PANEL_POWER_RESET; /* restore panel reset bit */
-	I915_WRITE(PCH_PP_CONTROL, pp);
-	POSTING_READ(PCH_PP_CONTROL);
+		pp |= PANEL_POWER_RESET; /* restore panel reset bit */
+		I915_WRITE(PCH_PP_CONTROL, pp);
+		POSTING_READ(PCH_PP_CONTROL);
+	}
 }
 
-static void ironlake_edp_backlight_on (struct drm_device *dev)
+static void ironlake_edp_backlight_on(struct intel_dp *intel_dp)
 {
+	struct drm_device *dev = intel_dp->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp;
 
+	if (!is_edp(intel_dp))
+		return;
+
 	DRM_DEBUG_KMS("\n");
 	/*
 	 * If we enable the backlight right away following a panel power
@@ -909,21 +1097,32 @@ static void ironlake_edp_backlight_on (struct drm_device *dev)
 	 * link.  So delay a bit to make sure the image is solid before
 	 * allowing it to appear.
 	 */
-	msleep(300);
+	msleep(intel_dp->backlight_on_delay);
 	pp = I915_READ(PCH_PP_CONTROL);
+	pp &= ~PANEL_UNLOCK_MASK;
+	pp |= PANEL_UNLOCK_REGS;
 	pp |= EDP_BLC_ENABLE;
 	I915_WRITE(PCH_PP_CONTROL, pp);
+	POSTING_READ(PCH_PP_CONTROL);
 }
 
-static void ironlake_edp_backlight_off (struct drm_device *dev)
+static void ironlake_edp_backlight_off(struct intel_dp *intel_dp)
 {
+	struct drm_device *dev = intel_dp->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp;
 
+	if (!is_edp(intel_dp))
+		return;
+
 	DRM_DEBUG_KMS("\n");
 	pp = I915_READ(PCH_PP_CONTROL);
+	pp &= ~PANEL_UNLOCK_MASK;
+	pp |= PANEL_UNLOCK_REGS;
 	pp &= ~EDP_BLC_ENABLE;
 	I915_WRITE(PCH_PP_CONTROL, pp);
+	POSTING_READ(PCH_PP_CONTROL);
+	msleep(intel_dp->backlight_off_delay);
 }
 
 static void ironlake_edp_pll_on(struct drm_encoder *encoder)
@@ -986,43 +1185,39 @@ static void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode)
 static void intel_dp_prepare(struct drm_encoder *encoder)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
-	struct drm_device *dev = encoder->dev;
 
 	/* Wake up the sink first */
+	ironlake_edp_panel_vdd_on(intel_dp);
 	intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
+	ironlake_edp_panel_vdd_off(intel_dp, false);
 
-	if (is_edp(intel_dp)) {
-		ironlake_edp_backlight_off(dev);
-		ironlake_edp_panel_off(dev);
-		if (!is_pch_edp(intel_dp))
-			ironlake_edp_pll_on(encoder);
-		else
-			ironlake_edp_pll_off(encoder);
-	}
+	/* Make sure the panel is off before trying to
+	 * change the mode
+	 */
+	ironlake_edp_backlight_off(intel_dp);
 	intel_dp_link_down(intel_dp);
+	ironlake_edp_panel_off(encoder);
 }
 
 static void intel_dp_commit(struct drm_encoder *encoder)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 	struct drm_device *dev = encoder->dev;
+	struct intel_crtc *intel_crtc = to_intel_crtc(intel_dp->base.base.crtc);
 
-	if (is_edp(intel_dp))
-		ironlake_edp_panel_vdd_on(intel_dp);
-
+	ironlake_edp_panel_vdd_on(intel_dp);
+	intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
 	intel_dp_start_link_train(intel_dp);
-
-	if (is_edp(intel_dp)) {
-		ironlake_edp_panel_on(intel_dp);
-		ironlake_edp_panel_vdd_off(intel_dp);
-	}
+	ironlake_edp_panel_on(intel_dp);
+	ironlake_edp_panel_vdd_off(intel_dp, true);
 
 	intel_dp_complete_link_train(intel_dp);
-
-	if (is_edp(intel_dp))
-		ironlake_edp_backlight_on(dev);
+	ironlake_edp_backlight_on(intel_dp);
 
 	intel_dp->dpms_mode = DRM_MODE_DPMS_ON;
+
+	if (HAS_PCH_CPT(dev))
+		intel_cpt_verify_modeset(dev, intel_crtc->pipe);
 }
 
 static void
@@ -1034,28 +1229,27 @@ intel_dp_dpms(struct drm_encoder *encoder, int mode)
 	uint32_t dp_reg = I915_READ(intel_dp->output_reg);
 
 	if (mode != DRM_MODE_DPMS_ON) {
+		ironlake_edp_panel_vdd_on(intel_dp);
 		if (is_edp(intel_dp))
-			ironlake_edp_backlight_off(dev);
+			ironlake_edp_backlight_off(intel_dp);
 		intel_dp_sink_dpms(intel_dp, mode);
 		intel_dp_link_down(intel_dp);
-		if (is_edp(intel_dp))
-			ironlake_edp_panel_off(dev);
+		ironlake_edp_panel_off(encoder);
 		if (is_edp(intel_dp) && !is_pch_edp(intel_dp))
 			ironlake_edp_pll_off(encoder);
+		ironlake_edp_panel_vdd_off(intel_dp, false);
 	} else {
-		if (is_edp(intel_dp))
-			ironlake_edp_panel_vdd_on(intel_dp);
+		ironlake_edp_panel_vdd_on(intel_dp);
 		intel_dp_sink_dpms(intel_dp, mode);
 		if (!(dp_reg & DP_PORT_EN)) {
 			intel_dp_start_link_train(intel_dp);
-			if (is_edp(intel_dp)) {
-				ironlake_edp_panel_on(intel_dp);
-				ironlake_edp_panel_vdd_off(intel_dp);
-			}
+			ironlake_edp_panel_on(intel_dp);
+			ironlake_edp_panel_vdd_off(intel_dp, true);
 			intel_dp_complete_link_train(intel_dp);
-		}
-		if (is_edp(intel_dp))
-			ironlake_edp_backlight_on(dev);
+			ironlake_edp_backlight_on(intel_dp);
+		} else
+			ironlake_edp_panel_vdd_off(intel_dp, false);
+		ironlake_edp_backlight_on(intel_dp);
 	}
 	intel_dp->dpms_mode = mode;
 }
@@ -1364,7 +1558,7 @@ intel_dp_start_link_train(struct intel_dp *intel_dp)
 				  DP_LINK_CONFIGURATION_SIZE);
 
 	DP |= DP_PORT_EN;
-	if (HAS_PCH_CPT(dev) && !is_edp(intel_dp))
+	if (HAS_PCH_CPT(dev) && !is_cpu_edp(intel_dp))
 		DP &= ~DP_LINK_TRAIN_MASK_CPT;
 	else
 		DP &= ~DP_LINK_TRAIN_MASK;
@@ -1383,7 +1577,7 @@ intel_dp_start_link_train(struct intel_dp *intel_dp)
 			DP = (DP & ~(DP_VOLTAGE_MASK|DP_PRE_EMPHASIS_MASK)) | signal_levels;
 		}
 
-		if (HAS_PCH_CPT(dev) && !is_edp(intel_dp))
+		if (HAS_PCH_CPT(dev) && !is_cpu_edp(intel_dp))
 			reg = DP | DP_LINK_TRAIN_PAT_1_CPT;
 		else
 			reg = DP | DP_LINK_TRAIN_PAT_1;
@@ -1458,7 +1652,7 @@ intel_dp_complete_link_train(struct intel_dp *intel_dp)
 			DP = (DP & ~(DP_VOLTAGE_MASK|DP_PRE_EMPHASIS_MASK)) | signal_levels;
 		}
 
-		if (HAS_PCH_CPT(dev) && !is_edp(intel_dp))
+		if (HAS_PCH_CPT(dev) && !is_cpu_edp(intel_dp))
 			reg = DP | DP_LINK_TRAIN_PAT_2_CPT;
 		else
 			reg = DP | DP_LINK_TRAIN_PAT_2;
@@ -1499,7 +1693,7 @@ intel_dp_complete_link_train(struct intel_dp *intel_dp)
 		++tries;
 	}
 
-	if (HAS_PCH_CPT(dev) && !is_edp(intel_dp))
+	if (HAS_PCH_CPT(dev) && !is_cpu_edp(intel_dp))
 		reg = DP | DP_LINK_TRAIN_OFF_CPT;
 	else
 		reg = DP | DP_LINK_TRAIN_OFF;
@@ -1529,7 +1723,7 @@ intel_dp_link_down(struct intel_dp *intel_dp)
 		udelay(100);
 	}
 
-	if (HAS_PCH_CPT(dev) && !is_edp(intel_dp)) {
+	if (HAS_PCH_CPT(dev) && !is_cpu_edp(intel_dp)) {
 		DP &= ~DP_LINK_TRAIN_MASK_CPT;
 		I915_WRITE(intel_dp->output_reg, DP | DP_LINK_TRAIN_PAT_IDLE_CPT);
 	} else {
@@ -1578,13 +1772,14 @@ intel_dp_link_down(struct intel_dp *intel_dp)
 
 	I915_WRITE(intel_dp->output_reg, DP & ~DP_PORT_EN);
 	POSTING_READ(intel_dp->output_reg);
+	msleep(intel_dp->panel_power_down_delay);
 }
 
 static bool
 intel_dp_get_dpcd(struct intel_dp *intel_dp)
 {
 	if (intel_dp_aux_native_read_retry(intel_dp, 0x000, intel_dp->dpcd,
-					   sizeof (intel_dp->dpcd)) &&
+					   sizeof(intel_dp->dpcd)) &&
 	    (intel_dp->dpcd[DP_DPCD_REV] != 0)) {
 		return true;
 	}
@@ -1592,6 +1787,27 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
 	return false;
 }
 
+static bool
+intel_dp_get_sink_irq(struct intel_dp *intel_dp, u8 *sink_irq_vector)
+{
+	int ret;
+
+	ret = intel_dp_aux_native_read_retry(intel_dp,
+					     DP_DEVICE_SERVICE_IRQ_VECTOR,
+					     sink_irq_vector, 1);
+	if (!ret)
+		return false;
+
+	return true;
+}
+
+static void
+intel_dp_handle_test_request(struct intel_dp *intel_dp)
+{
+	/* NAK by default */
+	intel_dp_aux_native_write_1(intel_dp, DP_TEST_RESPONSE, DP_TEST_ACK);
+}
+
 /*
  * According to DP spec
  * 5.1.2:
@@ -1604,6 +1820,8 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
 static void
 intel_dp_check_link_status(struct intel_dp *intel_dp)
 {
+	u8 sink_irq_vector;
+
 	if (intel_dp->dpms_mode != DRM_MODE_DPMS_ON)
 		return;
 
@@ -1622,6 +1840,20 @@ intel_dp_check_link_status(struct intel_dp *intel_dp)
 		return;
 	}
 
+	/* Try to read the source of the interrupt */
+	if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 &&
+	    intel_dp_get_sink_irq(intel_dp, &sink_irq_vector)) {
+		/* Clear interrupt source */
+		intel_dp_aux_native_write_1(intel_dp,
+					    DP_DEVICE_SERVICE_IRQ_VECTOR,
+					    sink_irq_vector);
+
+		if (sink_irq_vector & DP_AUTOMATED_TEST_REQUEST)
+			intel_dp_handle_test_request(intel_dp);
+		if (sink_irq_vector & (DP_CP_IRQ | DP_SINK_SPECIFIC_IRQ))
+			DRM_DEBUG_DRIVER("CP or sink specific irq unhandled\n");
+	}
+
 	if (!intel_channel_eq_ok(intel_dp)) {
 		DRM_DEBUG_KMS("%s: channel EQ not ok, retraining\n",
 			      drm_get_encoder_name(&intel_dp->base.base));
@@ -1683,6 +1915,31 @@ g4x_dp_detect(struct intel_dp *intel_dp)
 	return intel_dp_detect_dpcd(intel_dp);
 }
 
+static struct edid *
+intel_dp_get_edid(struct drm_connector *connector, struct i2c_adapter *adapter)
+{
+	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	struct edid	*edid;
+
+	ironlake_edp_panel_vdd_on(intel_dp);
+	edid = drm_get_edid(connector, adapter);
+	ironlake_edp_panel_vdd_off(intel_dp, false);
+	return edid;
+}
+
+static int
+intel_dp_get_edid_modes(struct drm_connector *connector, struct i2c_adapter *adapter)
+{
+	struct intel_dp *intel_dp = intel_attached_dp(connector);
+	int	ret;
+
+	ironlake_edp_panel_vdd_on(intel_dp);
+	ret = intel_ddc_get_modes(connector, adapter);
+	ironlake_edp_panel_vdd_off(intel_dp, false);
+	return ret;
+}
+
+
 /**
  * Uses CRT_HOTPLUG_EN and CRT_HOTPLUG_STAT to detect DP connection.
  *
@@ -1715,7 +1972,7 @@ intel_dp_detect(struct drm_connector *connector, bool force)
 	if (intel_dp->force_audio) {
 		intel_dp->has_audio = intel_dp->force_audio > 0;
 	} else {
-		edid = drm_get_edid(connector, &intel_dp->adapter);
+		edid = intel_dp_get_edid(connector, &intel_dp->adapter);
 		if (edid) {
 			intel_dp->has_audio = drm_detect_monitor_audio(edid);
 			connector->display_info.raw_edid = NULL;
@@ -1736,28 +1993,36 @@ static int intel_dp_get_modes(struct drm_connector *connector)
 	/* We should parse the EDID data and find out if it has an audio sink
 	 */
 
-	ret = intel_ddc_get_modes(connector, &intel_dp->adapter);
+	ret = intel_dp_get_edid_modes(connector, &intel_dp->adapter);
 	if (ret) {
-		if (is_edp(intel_dp) && !dev_priv->panel_fixed_mode) {
+		if (is_edp(intel_dp) && !intel_dp->panel_fixed_mode) {
 			struct drm_display_mode *newmode;
 			list_for_each_entry(newmode, &connector->probed_modes,
 					    head) {
-				if (newmode->type & DRM_MODE_TYPE_PREFERRED) {
-					dev_priv->panel_fixed_mode =
+				if ((newmode->type & DRM_MODE_TYPE_PREFERRED)) {
+					intel_dp->panel_fixed_mode =
 						drm_mode_duplicate(dev, newmode);
 					break;
 				}
 			}
 		}
-
 		return ret;
 	}
 
 	/* if eDP has no EDID, try to use fixed panel mode from VBT */
 	if (is_edp(intel_dp)) {
-		if (dev_priv->panel_fixed_mode != NULL) {
+		/* initialize panel mode from VBT if available for eDP */
+		if (intel_dp->panel_fixed_mode == NULL && dev_priv->lfp_lvds_vbt_mode != NULL) {
+			intel_dp->panel_fixed_mode =
+				drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode);
+			if (intel_dp->panel_fixed_mode) {
+				intel_dp->panel_fixed_mode->type |=
+					DRM_MODE_TYPE_PREFERRED;
+			}
+		}
+		if (intel_dp->panel_fixed_mode) {
 			struct drm_display_mode *mode;
-			mode = drm_mode_duplicate(dev, dev_priv->panel_fixed_mode);
+			mode = drm_mode_duplicate(dev, intel_dp->panel_fixed_mode);
 			drm_mode_probed_add(connector, mode);
 			return 1;
 		}
@@ -1772,7 +2037,7 @@ intel_dp_detect_audio(struct drm_connector *connector)
 	struct edid *edid;
 	bool has_audio = false;
 
-	edid = drm_get_edid(connector, &intel_dp->adapter);
+	edid = intel_dp_get_edid(connector, &intel_dp->adapter);
 	if (edid) {
 		has_audio = drm_detect_monitor_audio(edid);
 
@@ -1839,7 +2104,7 @@ done:
 }
 
 static void
-intel_dp_destroy (struct drm_connector *connector)
+intel_dp_destroy(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
 
@@ -1857,6 +2122,10 @@ static void intel_dp_encoder_destroy(struct drm_encoder *encoder)
 
 	i2c_del_adapter(&intel_dp->adapter);
 	drm_encoder_cleanup(encoder);
+	if (is_edp(intel_dp)) {
+		cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
+		ironlake_panel_vdd_off_sync(intel_dp);
+	}
 	kfree(intel_dp);
 }
 
@@ -1896,7 +2165,7 @@ intel_dp_hot_plug(struct intel_encoder *intel_encoder)
 
 /* Return which DP Port should be selected for Transcoder DP control */
 int
-intel_trans_dp_port_sel (struct drm_crtc *crtc)
+intel_trans_dp_port_sel(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_mode_config *mode_config = &dev->mode_config;
@@ -1993,10 +2262,13 @@ intel_dp_init(struct drm_device *dev, int output_reg)
 	else if (output_reg == DP_D || output_reg == PCH_DP_D)
 		intel_encoder->clone_mask = (1 << INTEL_DP_D_CLONE_BIT);
 
-	if (is_edp(intel_dp))
+	if (is_edp(intel_dp)) {
 		intel_encoder->clone_mask = (1 << INTEL_EDP_CLONE_BIT);
+		INIT_DELAYED_WORK(&intel_dp->panel_vdd_work,
+				  ironlake_panel_vdd_work);
+	}
 
-	intel_encoder->crtc_mask = (1 << 0) | (1 << 1);
+	intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
 	connector->interlace_allowed = true;
 	connector->doublescan_allowed = 0;
 
@@ -2032,25 +2304,60 @@ intel_dp_init(struct drm_device *dev, int output_reg)
 			break;
 	}
 
-	intel_dp_i2c_init(intel_dp, intel_connector, name);
-
 	/* Cache some DPCD data in the eDP case */
 	if (is_edp(intel_dp)) {
 		bool ret;
-		u32 pp_on, pp_div;
+		struct edp_power_seq	cur, vbt;
+		u32 pp_on, pp_off, pp_div;
 
 		pp_on = I915_READ(PCH_PP_ON_DELAYS);
+		pp_off = I915_READ(PCH_PP_OFF_DELAYS);
 		pp_div = I915_READ(PCH_PP_DIVISOR);
 
-		/* Get T3 & T12 values (note: VESA not bspec terminology) */
-		dev_priv->panel_t3 = (pp_on & 0x1fff0000) >> 16;
-		dev_priv->panel_t3 /= 10; /* t3 in 100us units */
-		dev_priv->panel_t12 = pp_div & 0xf;
-		dev_priv->panel_t12 *= 100; /* t12 in 100ms units */
+		/* Pull timing values out of registers */
+		cur.t1_t3 = (pp_on & PANEL_POWER_UP_DELAY_MASK) >>
+			PANEL_POWER_UP_DELAY_SHIFT;
+
+		cur.t8 = (pp_on & PANEL_LIGHT_ON_DELAY_MASK) >>
+			PANEL_LIGHT_ON_DELAY_SHIFT;
+		
+		cur.t9 = (pp_off & PANEL_LIGHT_OFF_DELAY_MASK) >>
+			PANEL_LIGHT_OFF_DELAY_SHIFT;
+
+		cur.t10 = (pp_off & PANEL_POWER_DOWN_DELAY_MASK) >>
+			PANEL_POWER_DOWN_DELAY_SHIFT;
+
+		cur.t11_t12 = ((pp_div & PANEL_POWER_CYCLE_DELAY_MASK) >>
+			       PANEL_POWER_CYCLE_DELAY_SHIFT) * 1000;
+
+		DRM_DEBUG_KMS("cur t1_t3 %d t8 %d t9 %d t10 %d t11_t12 %d\n",
+			      cur.t1_t3, cur.t8, cur.t9, cur.t10, cur.t11_t12);
+
+		vbt = dev_priv->edp.pps;
+
+		DRM_DEBUG_KMS("vbt t1_t3 %d t8 %d t9 %d t10 %d t11_t12 %d\n",
+			      vbt.t1_t3, vbt.t8, vbt.t9, vbt.t10, vbt.t11_t12);
+
+#define get_delay(field)	((max(cur.field, vbt.field) + 9) / 10)
+
+		intel_dp->panel_power_up_delay = get_delay(t1_t3);
+		intel_dp->backlight_on_delay = get_delay(t8);
+		intel_dp->backlight_off_delay = get_delay(t9);
+		intel_dp->panel_power_down_delay = get_delay(t10);
+		intel_dp->panel_power_cycle_delay = get_delay(t11_t12);
+
+		DRM_DEBUG_KMS("panel power up delay %d, power down delay %d, power cycle delay %d\n",
+			      intel_dp->panel_power_up_delay, intel_dp->panel_power_down_delay,
+			      intel_dp->panel_power_cycle_delay);
+
+		DRM_DEBUG_KMS("backlight on delay %d, off delay %d\n",
+			      intel_dp->backlight_on_delay, intel_dp->backlight_off_delay);
+
+		intel_dp->panel_off_jiffies = jiffies - intel_dp->panel_power_down_delay;
 
 		ironlake_edp_panel_vdd_on(intel_dp);
 		ret = intel_dp_get_dpcd(intel_dp);
-		ironlake_edp_panel_vdd_off(intel_dp);
+		ironlake_edp_panel_vdd_off(intel_dp, false);
 		if (ret) {
 			if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11)
 				dev_priv->no_aux_handshake =
@@ -2065,18 +2372,11 @@ intel_dp_init(struct drm_device *dev, int output_reg)
 		}
 	}
 
+	intel_dp_i2c_init(intel_dp, intel_connector, name);
+
 	intel_encoder->hot_plug = intel_dp_hot_plug;
 
 	if (is_edp(intel_dp)) {
-		/* initialize panel mode from VBT if available for eDP */
-		if (dev_priv->lfp_lvds_vbt_mode) {
-			dev_priv->panel_fixed_mode =
-				drm_mode_duplicate(dev, dev_priv->lfp_lvds_vbt_mode);
-			if (dev_priv->panel_fixed_mode) {
-				dev_priv->panel_fixed_mode->type |=
-					DRM_MODE_TYPE_PREFERRED;
-			}
-		}
 		dev_priv->int_edp_connector = connector;
 		intel_panel_setup_backlight(dev);
 	}
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index fe1099d8817e..bd9a604b73da 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -34,7 +34,7 @@
 #define _wait_for(COND, MS, W) ({ \
 	unsigned long timeout__ = jiffies + msecs_to_jiffies(MS);	\
 	int ret__ = 0;							\
-	while (! (COND)) {						\
+	while (!(COND)) {						\
 		if (time_after(jiffies, timeout__)) {			\
 			ret__ = -ETIMEDOUT;				\
 			break;						\
@@ -49,10 +49,10 @@
 
 #define MSLEEP(x) do { \
 	if (in_dbg_master()) \
-	       	mdelay(x); \
+		mdelay(x); \
 	else \
 		msleep(x); \
-} while(0)
+} while (0)
 
 #define KHz(x) (1000*x)
 #define MHz(x) KHz(1000*x)
@@ -171,6 +171,9 @@ struct intel_crtc {
 	int16_t cursor_width, cursor_height;
 	bool cursor_visible;
 	unsigned int bpp;
+
+	bool no_pll; /* tertiary pipe for IVB */
+	bool use_pll_a;
 };
 
 #define to_intel_crtc(x) container_of(x, struct intel_crtc, base)
@@ -184,7 +187,7 @@ struct intel_crtc {
 #define DIP_VERSION_AVI 0x2
 #define DIP_LEN_AVI     13
 
-#define DIP_TYPE_SPD	0x3
+#define DIP_TYPE_SPD	0x83
 #define DIP_VERSION_SPD	0x1
 #define DIP_LEN_SPD	25
 #define DIP_SPD_UNKNOWN	0
@@ -284,7 +287,7 @@ void
 intel_dp_set_m_n(struct drm_crtc *crtc, struct drm_display_mode *mode,
 		 struct drm_display_mode *adjusted_mode);
 extern bool intel_dpd_is_edp(struct drm_device *dev);
-extern void intel_edp_link_config (struct intel_encoder *, int *, int *);
+extern void intel_edp_link_config(struct intel_encoder *, int *, int *);
 extern bool intel_encoder_is_pch_edp(struct drm_encoder *encoder);
 
 /* intel_panel.c */
@@ -304,8 +307,8 @@ extern void intel_panel_destroy_backlight(struct drm_device *dev);
 extern enum drm_connector_status intel_panel_detect(struct drm_device *dev);
 
 extern void intel_crtc_load_lut(struct drm_crtc *crtc);
-extern void intel_encoder_prepare (struct drm_encoder *encoder);
-extern void intel_encoder_commit (struct drm_encoder *encoder);
+extern void intel_encoder_prepare(struct drm_encoder *encoder);
+extern void intel_encoder_commit(struct drm_encoder *encoder);
 extern void intel_encoder_destroy(struct drm_encoder *encoder);
 
 static inline struct intel_encoder *intel_attached_encoder(struct drm_connector *connector)
@@ -377,4 +380,8 @@ extern void intel_fb_output_poll_changed(struct drm_device *dev);
 extern void intel_fb_restore_mode(struct drm_device *dev);
 
 extern void intel_init_clock_gating(struct drm_device *dev);
+extern void intel_write_eld(struct drm_encoder *encoder,
+			    struct drm_display_mode *mode);
+extern void intel_cpt_verify_modeset(struct drm_device *dev, int pipe);
+
 #endif /* __INTEL_DRV_H__ */
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 226ba830f383..d4f5a0b2120d 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -69,8 +69,7 @@ void intel_dip_infoframe_csum(struct dip_infoframe *frame)
 	frame->checksum = 0;
 	frame->ecc = 0;
 
-	/* Header isn't part of the checksum */
-	for (i = 5; i < frame->len; i++)
+	for (i = 0; i < frame->len + DIP_HEADER_SIZE; i++)
 		sum += data[i];
 
 	frame->checksum = 0x100 - sum;
@@ -104,7 +103,7 @@ static u32 intel_infoframe_flags(struct dip_infoframe *frame)
 		flags |= VIDEO_DIP_ENABLE_AVI | VIDEO_DIP_FREQ_VSYNC;
 		break;
 	case DIP_TYPE_SPD:
-		flags |= VIDEO_DIP_ENABLE_SPD | VIDEO_DIP_FREQ_2VSYNC;
+		flags |= VIDEO_DIP_ENABLE_SPD | VIDEO_DIP_FREQ_VSYNC;
 		break;
 	default:
 		DRM_DEBUG_DRIVER("unknown info frame type %d\n", frame->type);
@@ -165,9 +164,9 @@ static void ironlake_write_infoframe(struct drm_encoder *encoder,
 
 	flags = intel_infoframe_index(frame);
 
-	val &= ~VIDEO_DIP_SELECT_MASK;
+	val &= ~(VIDEO_DIP_SELECT_MASK | 0xf); /* clear DIP data offset */
 
-	I915_WRITE(reg, val | flags);
+	I915_WRITE(reg, VIDEO_DIP_ENABLE | val | flags);
 
 	for (i = 0; i < len; i += 4) {
 		I915_WRITE(TVIDEO_DIP_DATA(intel_crtc->pipe), *data);
@@ -245,16 +244,17 @@ static void intel_hdmi_mode_set(struct drm_encoder *encoder,
 		sdvox |= HDMI_MODE_SELECT;
 
 	if (intel_hdmi->has_audio) {
+		DRM_DEBUG_DRIVER("Enabling HDMI audio on pipe %c\n",
+				 pipe_name(intel_crtc->pipe));
 		sdvox |= SDVO_AUDIO_ENABLE;
 		sdvox |= SDVO_NULL_PACKETS_DURING_VSYNC;
+		intel_write_eld(encoder, adjusted_mode);
 	}
 
-	if (intel_crtc->pipe == 1) {
-		if (HAS_PCH_CPT(dev))
-			sdvox |= PORT_TRANS_B_SEL_CPT;
-		else
-			sdvox |= SDVO_PIPE_B_SELECT;
-	}
+	if (HAS_PCH_CPT(dev))
+		sdvox |= PORT_TRANS_SEL_CPT(intel_crtc->pipe);
+	else if (intel_crtc->pipe == 1)
+		sdvox |= SDVO_PIPE_B_SELECT;
 
 	I915_WRITE(intel_hdmi->sdvox_reg, sdvox);
 	POSTING_READ(intel_hdmi->sdvox_reg);
@@ -486,6 +486,7 @@ void intel_hdmi_init(struct drm_device *dev, int sdvox_reg)
 	struct intel_encoder *intel_encoder;
 	struct intel_connector *intel_connector;
 	struct intel_hdmi *intel_hdmi;
+	int i;
 
 	intel_hdmi = kzalloc(sizeof(struct intel_hdmi), GFP_KERNEL);
 	if (!intel_hdmi)
@@ -511,7 +512,7 @@ void intel_hdmi_init(struct drm_device *dev, int sdvox_reg)
 	connector->polled = DRM_CONNECTOR_POLL_HPD;
 	connector->interlace_allowed = 0;
 	connector->doublescan_allowed = 0;
-	intel_encoder->crtc_mask = (1 << 0) | (1 << 1);
+	intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
 
 	/* Set up the DDC bus. */
 	if (sdvox_reg == SDVOB) {
@@ -538,10 +539,14 @@ void intel_hdmi_init(struct drm_device *dev, int sdvox_reg)
 
 	intel_hdmi->sdvox_reg = sdvox_reg;
 
-	if (!HAS_PCH_SPLIT(dev))
+	if (!HAS_PCH_SPLIT(dev)) {
 		intel_hdmi->write_infoframe = i9xx_write_infoframe;
-	else
+		I915_WRITE(VIDEO_DIP_CTL, 0);
+	} else {
 		intel_hdmi->write_infoframe = ironlake_write_infoframe;
+		for_each_pipe(i)
+			I915_WRITE(TVIDEO_DIP_CTL(i), 0);
+	}
 
 	drm_encoder_helper_add(&intel_encoder->base, &intel_hdmi_helper_funcs);
 
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index d98cee60b602..9ed5380e5a53 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -422,13 +422,7 @@ void intel_gmbus_set_speed(struct i2c_adapter *adapter, int speed)
 {
 	struct intel_gmbus *bus = to_intel_gmbus(adapter);
 
-	/* speed:
-	 * 0x0 = 100 KHz
-	 * 0x1 = 50 KHz
-	 * 0x2 = 400 KHz
-	 * 0x3 = 1000 Khz
-	 */
-	bus->reg0 = (bus->reg0 & ~(0x3 << 8)) | (speed << 8);
+	bus->reg0 = (bus->reg0 & ~(0x3 << 8)) | speed;
 }
 
 void intel_gmbus_force_bit(struct i2c_adapter *adapter, bool force_bit)
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 31da77f5c051..42f165a520de 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -888,9 +888,11 @@ bool intel_lvds_init(struct drm_device *dev)
 	intel_encoder->type = INTEL_OUTPUT_LVDS;
 
 	intel_encoder->clone_mask = (1 << INTEL_LVDS_CLONE_BIT);
-	intel_encoder->crtc_mask = (1 << 1);
-	if (INTEL_INFO(dev)->gen >= 5)
-		intel_encoder->crtc_mask |= (1 << 0);
+	if (HAS_PCH_SPLIT(dev))
+		intel_encoder->crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
+	else
+		intel_encoder->crtc_mask = (1 << 1);
+
 	drm_encoder_helper_add(encoder, &intel_lvds_helper_funcs);
 	drm_connector_helper_add(connector, &intel_lvds_connector_helper_funcs);
 	connector->display_info.subpixel_order = SubPixelHorizontalRGB;
diff --git a/drivers/gpu/drm/i915/intel_modes.c b/drivers/gpu/drm/i915/intel_modes.c
index 3b26a3ba02dd..be2c6fe07d12 100644
--- a/drivers/gpu/drm/i915/intel_modes.c
+++ b/drivers/gpu/drm/i915/intel_modes.c
@@ -26,6 +26,7 @@
 #include <linux/slab.h>
 #include <linux/i2c.h>
 #include <linux/fb.h>
+#include <drm/drm_edid.h>
 #include "drmP.h"
 #include "intel_drv.h"
 #include "i915_drv.h"
@@ -74,6 +75,7 @@ int intel_ddc_get_modes(struct drm_connector *connector,
 	if (edid) {
 		drm_mode_connector_update_edid_property(connector, edid);
 		ret = drm_add_edid_modes(connector, edid);
+		drm_edid_to_eld(connector, edid);
 		connector->display_info.raw_edid = NULL;
 		kfree(edid);
 	}
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index b8e8158bb16e..289140bc83cb 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -51,61 +51,61 @@
 #define MBOX_ASLE      (1<<2)
 
 struct opregion_header {
-       u8 signature[16];
-       u32 size;
-       u32 opregion_ver;
-       u8 bios_ver[32];
-       u8 vbios_ver[16];
-       u8 driver_ver[16];
-       u32 mboxes;
-       u8 reserved[164];
+	u8 signature[16];
+	u32 size;
+	u32 opregion_ver;
+	u8 bios_ver[32];
+	u8 vbios_ver[16];
+	u8 driver_ver[16];
+	u32 mboxes;
+	u8 reserved[164];
 } __attribute__((packed));
 
 /* OpRegion mailbox #1: public ACPI methods */
 struct opregion_acpi {
-       u32 drdy;       /* driver readiness */
-       u32 csts;       /* notification status */
-       u32 cevt;       /* current event */
-       u8 rsvd1[20];
-       u32 didl[8];    /* supported display devices ID list */
-       u32 cpdl[8];    /* currently presented display list */
-       u32 cadl[8];    /* currently active display list */
-       u32 nadl[8];    /* next active devices list */
-       u32 aslp;       /* ASL sleep time-out */
-       u32 tidx;       /* toggle table index */
-       u32 chpd;       /* current hotplug enable indicator */
-       u32 clid;       /* current lid state*/
-       u32 cdck;       /* current docking state */
-       u32 sxsw;       /* Sx state resume */
-       u32 evts;       /* ASL supported events */
-       u32 cnot;       /* current OS notification */
-       u32 nrdy;       /* driver status */
-       u8 rsvd2[60];
+	u32 drdy;       /* driver readiness */
+	u32 csts;       /* notification status */
+	u32 cevt;       /* current event */
+	u8 rsvd1[20];
+	u32 didl[8];    /* supported display devices ID list */
+	u32 cpdl[8];    /* currently presented display list */
+	u32 cadl[8];    /* currently active display list */
+	u32 nadl[8];    /* next active devices list */
+	u32 aslp;       /* ASL sleep time-out */
+	u32 tidx;       /* toggle table index */
+	u32 chpd;       /* current hotplug enable indicator */
+	u32 clid;       /* current lid state*/
+	u32 cdck;       /* current docking state */
+	u32 sxsw;       /* Sx state resume */
+	u32 evts;       /* ASL supported events */
+	u32 cnot;       /* current OS notification */
+	u32 nrdy;       /* driver status */
+	u8 rsvd2[60];
 } __attribute__((packed));
 
 /* OpRegion mailbox #2: SWSCI */
 struct opregion_swsci {
-       u32 scic;       /* SWSCI command|status|data */
-       u32 parm;       /* command parameters */
-       u32 dslp;       /* driver sleep time-out */
-       u8 rsvd[244];
+	u32 scic;       /* SWSCI command|status|data */
+	u32 parm;       /* command parameters */
+	u32 dslp;       /* driver sleep time-out */
+	u8 rsvd[244];
 } __attribute__((packed));
 
 /* OpRegion mailbox #3: ASLE */
 struct opregion_asle {
-       u32 ardy;       /* driver readiness */
-       u32 aslc;       /* ASLE interrupt command */
-       u32 tche;       /* technology enabled indicator */
-       u32 alsi;       /* current ALS illuminance reading */
-       u32 bclp;       /* backlight brightness to set */
-       u32 pfit;       /* panel fitting state */
-       u32 cblv;       /* current brightness level */
-       u16 bclm[20];   /* backlight level duty cycle mapping table */
-       u32 cpfm;       /* current panel fitting mode */
-       u32 epfm;       /* enabled panel fitting modes */
-       u8 plut[74];    /* panel LUT and identifier */
-       u32 pfmb;       /* PWM freq and min brightness */
-       u8 rsvd[102];
+	u32 ardy;       /* driver readiness */
+	u32 aslc;       /* ASLE interrupt command */
+	u32 tche;       /* technology enabled indicator */
+	u32 alsi;       /* current ALS illuminance reading */
+	u32 bclp;       /* backlight brightness to set */
+	u32 pfit;       /* panel fitting state */
+	u32 cblv;       /* current brightness level */
+	u16 bclm[20];   /* backlight level duty cycle mapping table */
+	u32 cpfm;       /* current panel fitting mode */
+	u32 epfm;       /* enabled panel fitting modes */
+	u8 plut[74];    /* panel LUT and identifier */
+	u32 pfmb;       /* PWM freq and min brightness */
+	u8 rsvd[102];
 } __attribute__((packed));
 
 /* ASLE irq request bits */
@@ -361,7 +361,7 @@ static void intel_didl_outputs(struct drm_device *dev)
 
 	list_for_each_entry(acpi_cdev, &acpi_video_bus->children, node) {
 		if (i >= 8) {
-			dev_printk (KERN_ERR, &dev->pdev->dev,
+			dev_printk(KERN_ERR, &dev->pdev->dev,
 				    "More than 8 outputs detected\n");
 			return;
 		}
@@ -387,7 +387,7 @@ blind_set:
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		int output_type = ACPI_OTHER_OUTPUT;
 		if (i >= 8) {
-			dev_printk (KERN_ERR, &dev->pdev->dev,
+			dev_printk(KERN_ERR, &dev->pdev->dev,
 				    "More than 8 outputs detected\n");
 			return;
 		}
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index d36038086826..cdf17d4cc1f7 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -117,57 +117,57 @@
 
 /* memory bufferd overlay registers */
 struct overlay_registers {
-    u32 OBUF_0Y;
-    u32 OBUF_1Y;
-    u32 OBUF_0U;
-    u32 OBUF_0V;
-    u32 OBUF_1U;
-    u32 OBUF_1V;
-    u32 OSTRIDE;
-    u32 YRGB_VPH;
-    u32 UV_VPH;
-    u32 HORZ_PH;
-    u32 INIT_PHS;
-    u32 DWINPOS;
-    u32 DWINSZ;
-    u32 SWIDTH;
-    u32 SWIDTHSW;
-    u32 SHEIGHT;
-    u32 YRGBSCALE;
-    u32 UVSCALE;
-    u32 OCLRC0;
-    u32 OCLRC1;
-    u32 DCLRKV;
-    u32 DCLRKM;
-    u32 SCLRKVH;
-    u32 SCLRKVL;
-    u32 SCLRKEN;
-    u32 OCONFIG;
-    u32 OCMD;
-    u32 RESERVED1; /* 0x6C */
-    u32 OSTART_0Y;
-    u32 OSTART_1Y;
-    u32 OSTART_0U;
-    u32 OSTART_0V;
-    u32 OSTART_1U;
-    u32 OSTART_1V;
-    u32 OTILEOFF_0Y;
-    u32 OTILEOFF_1Y;
-    u32 OTILEOFF_0U;
-    u32 OTILEOFF_0V;
-    u32 OTILEOFF_1U;
-    u32 OTILEOFF_1V;
-    u32 FASTHSCALE; /* 0xA0 */
-    u32 UVSCALEV; /* 0xA4 */
-    u32 RESERVEDC[(0x200 - 0xA8) / 4]; /* 0xA8 - 0x1FC */
-    u16 Y_VCOEFS[N_VERT_Y_TAPS * N_PHASES]; /* 0x200 */
-    u16 RESERVEDD[0x100 / 2 - N_VERT_Y_TAPS * N_PHASES];
-    u16 Y_HCOEFS[N_HORIZ_Y_TAPS * N_PHASES]; /* 0x300 */
-    u16 RESERVEDE[0x200 / 2 - N_HORIZ_Y_TAPS * N_PHASES];
-    u16 UV_VCOEFS[N_VERT_UV_TAPS * N_PHASES]; /* 0x500 */
-    u16 RESERVEDF[0x100 / 2 - N_VERT_UV_TAPS * N_PHASES];
-    u16 UV_HCOEFS[N_HORIZ_UV_TAPS * N_PHASES]; /* 0x600 */
-    u16 RESERVEDG[0x100 / 2 - N_HORIZ_UV_TAPS * N_PHASES];
+	u32 OBUF_0Y;
+	u32 OBUF_1Y;
+	u32 OBUF_0U;
+	u32 OBUF_0V;
+	u32 OBUF_1U;
+	u32 OBUF_1V;
+	u32 OSTRIDE;
+	u32 YRGB_VPH;
+	u32 UV_VPH;
+	u32 HORZ_PH;
+	u32 INIT_PHS;
+	u32 DWINPOS;
+	u32 DWINSZ;
+	u32 SWIDTH;
+	u32 SWIDTHSW;
+	u32 SHEIGHT;
+	u32 YRGBSCALE;
+	u32 UVSCALE;
+	u32 OCLRC0;
+	u32 OCLRC1;
+	u32 DCLRKV;
+	u32 DCLRKM;
+	u32 SCLRKVH;
+	u32 SCLRKVL;
+	u32 SCLRKEN;
+	u32 OCONFIG;
+	u32 OCMD;
+	u32 RESERVED1; /* 0x6C */
+	u32 OSTART_0Y;
+	u32 OSTART_1Y;
+	u32 OSTART_0U;
+	u32 OSTART_0V;
+	u32 OSTART_1U;
+	u32 OSTART_1V;
+	u32 OTILEOFF_0Y;
+	u32 OTILEOFF_1Y;
+	u32 OTILEOFF_0U;
+	u32 OTILEOFF_0V;
+	u32 OTILEOFF_1U;
+	u32 OTILEOFF_1V;
+	u32 FASTHSCALE; /* 0xA0 */
+	u32 UVSCALEV; /* 0xA4 */
+	u32 RESERVEDC[(0x200 - 0xA8) / 4]; /* 0xA8 - 0x1FC */
+	u16 Y_VCOEFS[N_VERT_Y_TAPS * N_PHASES]; /* 0x200 */
+	u16 RESERVEDD[0x100 / 2 - N_VERT_Y_TAPS * N_PHASES];
+	u16 Y_HCOEFS[N_HORIZ_Y_TAPS * N_PHASES]; /* 0x300 */
+	u16 RESERVEDE[0x200 / 2 - N_HORIZ_Y_TAPS * N_PHASES];
+	u16 UV_VCOEFS[N_VERT_UV_TAPS * N_PHASES]; /* 0x500 */
+	u16 RESERVEDF[0x100 / 2 - N_VERT_UV_TAPS * N_PHASES];
+	u16 UV_HCOEFS[N_HORIZ_UV_TAPS * N_PHASES]; /* 0x600 */
+	u16 RESERVEDG[0x100 / 2 - N_HORIZ_UV_TAPS * N_PHASES];
 };
 
 struct intel_overlay {
@@ -192,7 +192,7 @@ struct intel_overlay {
 static struct overlay_registers *
 intel_overlay_map_regs(struct intel_overlay *overlay)
 {
-        drm_i915_private_t *dev_priv = overlay->dev->dev_private;
+	drm_i915_private_t *dev_priv = overlay->dev->dev_private;
 	struct overlay_registers *regs;
 
 	if (OVERLAY_NEEDS_PHYSICAL(overlay->dev))
@@ -264,7 +264,7 @@ i830_activate_pipe_a(struct drm_device *dev)
 
 	mode = drm_mode_duplicate(dev, &vesa_640x480);
 	drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
-	if(!drm_crtc_helper_set_mode(&crtc->base, mode,
+	if (!drm_crtc_helper_set_mode(&crtc->base, mode,
 				       crtc->base.x, crtc->base.y,
 				       crtc->base.fb))
 		return 0;
@@ -332,7 +332,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 				  bool load_polyphase_filter)
 {
 	struct drm_device *dev = overlay->dev;
-        drm_i915_private_t *dev_priv = dev->dev_private;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct drm_i915_gem_request *request;
 	u32 flip_addr = overlay->flip_addr;
 	u32 tmp;
@@ -359,7 +359,7 @@ static int intel_overlay_continue(struct intel_overlay *overlay,
 	}
 	OUT_RING(MI_OVERLAY_FLIP | MI_OVERLAY_CONTINUE);
 	OUT_RING(flip_addr);
-        ADVANCE_LP_RING();
+	ADVANCE_LP_RING();
 
 	ret = i915_add_request(LP_RING(dev_priv), NULL, request);
 	if (ret) {
@@ -583,7 +583,7 @@ static u32 calc_swidthsw(struct drm_device *dev, u32 offset, u32 width)
 	ret = ((offset + width + mask) >> shift) - (offset >> shift);
 	if (!IS_GEN2(dev))
 		ret <<= 1;
-	ret -=1;
+	ret -= 1;
 	return ret << 2;
 }
 
@@ -817,7 +817,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
 	regs->SWIDTHSW = calc_swidthsw(overlay->dev,
 				       params->offset_Y, tmp_width);
 	regs->SHEIGHT = params->src_h;
-	regs->OBUF_0Y = new_bo->gtt_offset + params-> offset_Y;
+	regs->OBUF_0Y = new_bo->gtt_offset + params->offset_Y;
 	regs->OSTRIDE = params->stride_Y;
 
 	if (params->format & I915_OVERLAY_YUV_PLANAR) {
@@ -917,7 +917,7 @@ static void update_pfit_vscale_ratio(struct intel_overlay *overlay)
 	 * line with the intel documentation for the i965
 	 */
 	if (INTEL_INFO(dev)->gen >= 4) {
-	       	/* on i965 use the PGM reg to read out the autoscaler values */
+		/* on i965 use the PGM reg to read out the autoscaler values */
 		ratio = I915_READ(PFIT_PGM_RATIOS) >> PFIT_VERT_SCALE_SHIFT_965;
 	} else {
 		if (pfit_control & VERT_AUTO_SCALE)
@@ -1098,7 +1098,7 @@ static int intel_panel_fitter_pipe(struct drm_device *dev)
 }
 
 int intel_overlay_put_image(struct drm_device *dev, void *data,
-                            struct drm_file *file_priv)
+			    struct drm_file *file_priv)
 {
 	struct drm_intel_overlay_put_image *put_image_rec = data;
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -1301,10 +1301,10 @@ static int check_gamma(struct drm_intel_overlay_attrs *attrs)
 }
 
 int intel_overlay_attrs(struct drm_device *dev, void *data,
-                        struct drm_file *file_priv)
+			struct drm_file *file_priv)
 {
 	struct drm_intel_overlay_attrs *attrs = data;
-        drm_i915_private_t *dev_priv = dev->dev_private;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct intel_overlay *overlay;
 	struct overlay_registers *regs;
 	int ret;
@@ -1393,7 +1393,7 @@ out_unlock:
 
 void intel_setup_overlay(struct drm_device *dev)
 {
-        drm_i915_private_t *dev_priv = dev->dev_private;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct intel_overlay *overlay;
 	struct drm_i915_gem_object *reg_bo;
 	struct overlay_registers *regs;
@@ -1421,24 +1421,24 @@ void intel_setup_overlay(struct drm_device *dev)
 		ret = i915_gem_attach_phys_object(dev, reg_bo,
 						  I915_GEM_PHYS_OVERLAY_REGS,
 						  PAGE_SIZE);
-                if (ret) {
-                        DRM_ERROR("failed to attach phys overlay regs\n");
-                        goto out_free_bo;
-                }
+		if (ret) {
+			DRM_ERROR("failed to attach phys overlay regs\n");
+			goto out_free_bo;
+		}
 		overlay->flip_addr = reg_bo->phys_obj->handle->busaddr;
 	} else {
 		ret = i915_gem_object_pin(reg_bo, PAGE_SIZE, true);
 		if (ret) {
-                        DRM_ERROR("failed to pin overlay register bo\n");
-                        goto out_free_bo;
-                }
+			DRM_ERROR("failed to pin overlay register bo\n");
+			goto out_free_bo;
+		}
 		overlay->flip_addr = reg_bo->gtt_offset;
 
 		ret = i915_gem_object_set_to_gtt_domain(reg_bo, true);
 		if (ret) {
-                        DRM_ERROR("failed to move overlay register bo into the GTT\n");
-                        goto out_unpin_bo;
-                }
+			DRM_ERROR("failed to move overlay register bo into the GTT\n");
+			goto out_unpin_bo;
+		}
 	}
 
 	/* init all values */
@@ -1525,7 +1525,7 @@ static void intel_overlay_unmap_regs_atomic(struct intel_overlay *overlay,
 struct intel_overlay_error_state *
 intel_overlay_capture_error_state(struct drm_device *dev)
 {
-        drm_i915_private_t *dev_priv = dev->dev_private;
+	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct intel_overlay *overlay = dev_priv->overlay;
 	struct intel_overlay_error_state *error;
 	struct overlay_registers __iomem *regs;
diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c
index a9e0c7bcd317..499d4c0dbeeb 100644
--- a/drivers/gpu/drm/i915/intel_panel.c
+++ b/drivers/gpu/drm/i915/intel_panel.c
@@ -84,7 +84,7 @@ intel_pch_panel_fitting(struct drm_device *dev,
 			if (scaled_width > scaled_height) { /* pillar */
 				width = scaled_height / mode->vdisplay;
 				if (width & 1)
-				    	width++;
+					width++;
 				x = (adjusted_mode->hdisplay - width + 1) / 2;
 				y = 0;
 				height = adjusted_mode->vdisplay;
@@ -206,7 +206,7 @@ u32 intel_panel_get_backlight(struct drm_device *dev)
 		if (IS_PINEVIEW(dev))
 			val >>= 1;
 
-		if (is_backlight_combination_mode(dev)){
+		if (is_backlight_combination_mode(dev)) {
 			u8 lbpc;
 
 			val &= ~1;
@@ -226,7 +226,7 @@ static void intel_pch_panel_set_backlight(struct drm_device *dev, u32 level)
 	I915_WRITE(BLC_PWM_CPU_CTL, val | level);
 }
 
-void intel_panel_set_backlight(struct drm_device *dev, u32 level)
+static void intel_panel_actually_set_backlight(struct drm_device *dev, u32 level)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 tmp;
@@ -236,7 +236,7 @@ void intel_panel_set_backlight(struct drm_device *dev, u32 level)
 	if (HAS_PCH_SPLIT(dev))
 		return intel_pch_panel_set_backlight(dev, level);
 
-	if (is_backlight_combination_mode(dev)){
+	if (is_backlight_combination_mode(dev)) {
 		u32 max = intel_panel_get_max_backlight(dev);
 		u8 lbpc;
 
@@ -254,16 +254,21 @@ void intel_panel_set_backlight(struct drm_device *dev, u32 level)
 	I915_WRITE(BLC_PWM_CTL, tmp | level);
 }
 
-void intel_panel_disable_backlight(struct drm_device *dev)
+void intel_panel_set_backlight(struct drm_device *dev, u32 level)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	if (dev_priv->backlight_enabled) {
-		dev_priv->backlight_level = intel_panel_get_backlight(dev);
-		dev_priv->backlight_enabled = false;
-	}
+	dev_priv->backlight_level = level;
+	if (dev_priv->backlight_enabled)
+		intel_panel_actually_set_backlight(dev, level);
+}
+
+void intel_panel_disable_backlight(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	intel_panel_set_backlight(dev, 0);
+	dev_priv->backlight_enabled = false;
+	intel_panel_actually_set_backlight(dev, 0);
 }
 
 void intel_panel_enable_backlight(struct drm_device *dev)
@@ -273,8 +278,8 @@ void intel_panel_enable_backlight(struct drm_device *dev)
 	if (dev_priv->backlight_level == 0)
 		dev_priv->backlight_level = intel_panel_get_max_backlight(dev);
 
-	intel_panel_set_backlight(dev, dev_priv->backlight_level);
 	dev_priv->backlight_enabled = true;
+	intel_panel_actually_set_backlight(dev, dev_priv->backlight_level);
 }
 
 static void intel_panel_init_backlight(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c30626ea9f93..ca70e2f10445 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -34,6 +34,16 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
+/*
+ * 965+ support PIPE_CONTROL commands, which provide finer grained control
+ * over cache flushing.
+ */
+struct pipe_control {
+	struct drm_i915_gem_object *obj;
+	volatile u32 *cpu_page;
+	u32 gtt_offset;
+};
+
 static inline int ring_space(struct intel_ring_buffer *ring)
 {
 	int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
@@ -123,6 +133,118 @@ render_ring_flush(struct intel_ring_buffer *ring,
 	return 0;
 }
 
+/**
+ * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
+ * implementing two workarounds on gen6.  From section 1.4.7.1
+ * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
+ *
+ * [DevSNB-C+{W/A}] Before any depth stall flush (including those
+ * produced by non-pipelined state commands), software needs to first
+ * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
+ * 0.
+ *
+ * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
+ * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
+ *
+ * And the workaround for these two requires this workaround first:
+ *
+ * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
+ * BEFORE the pipe-control with a post-sync op and no write-cache
+ * flushes.
+ *
+ * And this last workaround is tricky because of the requirements on
+ * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
+ * volume 2 part 1:
+ *
+ *     "1 of the following must also be set:
+ *      - Render Target Cache Flush Enable ([12] of DW1)
+ *      - Depth Cache Flush Enable ([0] of DW1)
+ *      - Stall at Pixel Scoreboard ([1] of DW1)
+ *      - Depth Stall ([13] of DW1)
+ *      - Post-Sync Operation ([13] of DW1)
+ *      - Notify Enable ([8] of DW1)"
+ *
+ * The cache flushes require the workaround flush that triggered this
+ * one, so we can't use it.  Depth stall would trigger the same.
+ * Post-sync nonzero is what triggered this second workaround, so we
+ * can't use that one either.  Notify enable is IRQs, which aren't
+ * really our business.  That leaves only stall at scoreboard.
+ */
+static int
+intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
+{
+	struct pipe_control *pc = ring->private;
+	u32 scratch_addr = pc->gtt_offset + 128;
+	int ret;
+
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
+	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
+			PIPE_CONTROL_STALL_AT_SCOREBOARD);
+	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
+	intel_ring_emit(ring, 0); /* low dword */
+	intel_ring_emit(ring, 0); /* high dword */
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
+	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
+	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, 0);
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
+static int
+gen6_render_ring_flush(struct intel_ring_buffer *ring,
+                         u32 invalidate_domains, u32 flush_domains)
+{
+	u32 flags = 0;
+	struct pipe_control *pc = ring->private;
+	u32 scratch_addr = pc->gtt_offset + 128;
+	int ret;
+
+	/* Force SNB workarounds for PIPE_CONTROL flushes */
+	intel_emit_post_sync_nonzero_flush(ring);
+
+	/* Just flush everything.  Experiments have shown that reducing the
+	 * number of bits based on the write domains has little performance
+	 * impact.
+	 */
+	flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+	flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
+	flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
+	flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+	flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+	flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
+	flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+
+	ret = intel_ring_begin(ring, 6);
+	if (ret)
+		return ret;
+
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
+	intel_ring_emit(ring, flags);
+	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
+	intel_ring_emit(ring, 0); /* lower dword */
+	intel_ring_emit(ring, 0); /* uppwer dword */
+	intel_ring_emit(ring, MI_NOOP);
+	intel_ring_advance(ring);
+
+	return 0;
+}
+
 static void ring_write_tail(struct intel_ring_buffer *ring,
 			    u32 value)
 {
@@ -206,16 +328,6 @@ static int init_ring_common(struct intel_ring_buffer *ring)
 	return 0;
 }
 
-/*
- * 965+ support PIPE_CONTROL commands, which provide finer grained control
- * over cache flushing.
- */
-struct pipe_control {
-	struct drm_i915_gem_object *obj;
-	volatile u32 *cpu_page;
-	u32 gtt_offset;
-};
-
 static int
 init_pipe_control(struct intel_ring_buffer *ring)
 {
@@ -296,8 +408,7 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 				   GFX_MODE_ENABLE(GFX_REPLAY_MODE));
 	}
 
-	if (INTEL_INFO(dev)->gen >= 6) {
-	} else if (IS_GEN5(dev)) {
+	if (INTEL_INFO(dev)->gen >= 5) {
 		ret = init_pipe_control(ring);
 		if (ret)
 			return ret;
@@ -315,83 +426,131 @@ static void render_ring_cleanup(struct intel_ring_buffer *ring)
 }
 
 static void
-update_semaphore(struct intel_ring_buffer *ring, int i, u32 seqno)
+update_mboxes(struct intel_ring_buffer *ring,
+	    u32 seqno,
+	    u32 mmio_offset)
 {
-	struct drm_device *dev = ring->dev;
-	struct drm_i915_private *dev_priv = dev->dev_private;
-	int id;
-
-	/*
-	 * cs -> 1 = vcs, 0 = bcs
-	 * vcs -> 1 = bcs, 0 = cs,
-	 * bcs -> 1 = cs, 0 = vcs.
-	 */
-	id = ring - dev_priv->ring;
-	id += 2 - i;
-	id %= 3;
-
-	intel_ring_emit(ring,
-			MI_SEMAPHORE_MBOX |
-			MI_SEMAPHORE_REGISTER |
-			MI_SEMAPHORE_UPDATE);
+	intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
+			      MI_SEMAPHORE_GLOBAL_GTT |
+			      MI_SEMAPHORE_REGISTER |
+			      MI_SEMAPHORE_UPDATE);
 	intel_ring_emit(ring, seqno);
-	intel_ring_emit(ring,
-			RING_SYNC_0(dev_priv->ring[id].mmio_base) + 4*i);
+	intel_ring_emit(ring, mmio_offset);
 }
 
+/**
+ * gen6_add_request - Update the semaphore mailbox registers
+ * 
+ * @ring - ring that is adding a request
+ * @seqno - return seqno stuck into the ring
+ *
+ * Update the mailbox registers in the *other* rings with the current seqno.
+ * This acts like a signal in the canonical semaphore.
+ */
 static int
 gen6_add_request(struct intel_ring_buffer *ring,
-		 u32 *result)
+		 u32 *seqno)
 {
-	u32 seqno;
+	u32 mbox1_reg;
+	u32 mbox2_reg;
 	int ret;
 
 	ret = intel_ring_begin(ring, 10);
 	if (ret)
 		return ret;
 
-	seqno = i915_gem_get_seqno(ring->dev);
-	update_semaphore(ring, 0, seqno);
-	update_semaphore(ring, 1, seqno);
+	mbox1_reg = ring->signal_mbox[0];
+	mbox2_reg = ring->signal_mbox[1];
 
+	*seqno = i915_gem_get_seqno(ring->dev);
+
+	update_mboxes(ring, *seqno, mbox1_reg);
+	update_mboxes(ring, *seqno, mbox2_reg);
 	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
 	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
-	intel_ring_emit(ring, seqno);
+	intel_ring_emit(ring, *seqno);
 	intel_ring_emit(ring, MI_USER_INTERRUPT);
 	intel_ring_advance(ring);
 
-	*result = seqno;
 	return 0;
 }
 
-int
-intel_ring_sync(struct intel_ring_buffer *ring,
-		struct intel_ring_buffer *to,
+/**
+ * intel_ring_sync - sync the waiter to the signaller on seqno
+ *
+ * @waiter - ring that is waiting
+ * @signaller - ring which has, or will signal
+ * @seqno - seqno which the waiter will block on
+ */
+static int
+intel_ring_sync(struct intel_ring_buffer *waiter,
+		struct intel_ring_buffer *signaller,
+		int ring,
 		u32 seqno)
 {
 	int ret;
+	u32 dw1 = MI_SEMAPHORE_MBOX |
+		  MI_SEMAPHORE_COMPARE |
+		  MI_SEMAPHORE_REGISTER;
 
-	ret = intel_ring_begin(ring, 4);
+	ret = intel_ring_begin(waiter, 4);
 	if (ret)
 		return ret;
 
-	intel_ring_emit(ring,
-			MI_SEMAPHORE_MBOX |
-			MI_SEMAPHORE_REGISTER |
-			intel_ring_sync_index(ring, to) << 17 |
-			MI_SEMAPHORE_COMPARE);
-	intel_ring_emit(ring, seqno);
-	intel_ring_emit(ring, 0);
-	intel_ring_emit(ring, MI_NOOP);
-	intel_ring_advance(ring);
+	intel_ring_emit(waiter, dw1 | signaller->semaphore_register[ring]);
+	intel_ring_emit(waiter, seqno);
+	intel_ring_emit(waiter, 0);
+	intel_ring_emit(waiter, MI_NOOP);
+	intel_ring_advance(waiter);
 
 	return 0;
 }
 
+/* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */
+int
+render_ring_sync_to(struct intel_ring_buffer *waiter,
+		    struct intel_ring_buffer *signaller,
+		    u32 seqno)
+{
+	WARN_ON(signaller->semaphore_register[RCS] == MI_SEMAPHORE_SYNC_INVALID);
+	return intel_ring_sync(waiter,
+			       signaller,
+			       RCS,
+			       seqno);
+}
+
+/* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */
+int
+gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
+		      struct intel_ring_buffer *signaller,
+		      u32 seqno)
+{
+	WARN_ON(signaller->semaphore_register[VCS] == MI_SEMAPHORE_SYNC_INVALID);
+	return intel_ring_sync(waiter,
+			       signaller,
+			       VCS,
+			       seqno);
+}
+
+/* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */
+int
+gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
+		      struct intel_ring_buffer *signaller,
+		      u32 seqno)
+{
+	WARN_ON(signaller->semaphore_register[BCS] == MI_SEMAPHORE_SYNC_INVALID);
+	return intel_ring_sync(waiter,
+			       signaller,
+			       BCS,
+			       seqno);
+}
+
+
+
 #define PIPE_CONTROL_FLUSH(ring__, addr__)					\
 do {									\
-	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE |		\
-		 PIPE_CONTROL_DEPTH_STALL | 2);				\
+	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
+		 PIPE_CONTROL_DEPTH_STALL);				\
 	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
 	intel_ring_emit(ring__, 0);							\
 	intel_ring_emit(ring__, 0);							\
@@ -419,8 +578,9 @@ pc_render_add_request(struct intel_ring_buffer *ring,
 	if (ret)
 		return ret;
 
-	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE |
-			PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_TC_FLUSH);
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
+			PIPE_CONTROL_WRITE_FLUSH |
+			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
 	intel_ring_emit(ring, seqno);
 	intel_ring_emit(ring, 0);
@@ -435,8 +595,9 @@ pc_render_add_request(struct intel_ring_buffer *ring,
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
 	scratch_addr += 128;
 	PIPE_CONTROL_FLUSH(ring, scratch_addr);
-	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE |
-			PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_TC_FLUSH |
+	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
+			PIPE_CONTROL_WRITE_FLUSH |
+			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
 			PIPE_CONTROL_NOTIFY);
 	intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
 	intel_ring_emit(ring, seqno);
@@ -1026,7 +1187,12 @@ static const struct intel_ring_buffer render_ring = {
 	.irq_get		= render_ring_get_irq,
 	.irq_put		= render_ring_put_irq,
 	.dispatch_execbuffer	= render_ring_dispatch_execbuffer,
-       .cleanup			= render_ring_cleanup,
+	.cleanup		= render_ring_cleanup,
+	.sync_to		= render_ring_sync_to,
+	.semaphore_register	= {MI_SEMAPHORE_SYNC_INVALID,
+				   MI_SEMAPHORE_SYNC_RV,
+				   MI_SEMAPHORE_SYNC_RB},
+	.signal_mbox		= {GEN6_VRSYNC, GEN6_BRSYNC},
 };
 
 /* ring buffer for bit-stream decoder */
@@ -1050,23 +1216,23 @@ static const struct intel_ring_buffer bsd_ring = {
 static void gen6_bsd_ring_write_tail(struct intel_ring_buffer *ring,
 				     u32 value)
 {
-       drm_i915_private_t *dev_priv = ring->dev->dev_private;
+	drm_i915_private_t *dev_priv = ring->dev->dev_private;
 
        /* Every tail move must follow the sequence below */
-       I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
-	       GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
-	       GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_DISABLE);
-       I915_WRITE(GEN6_BSD_RNCID, 0x0);
-
-       if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
-                               GEN6_BSD_SLEEP_PSMI_CONTROL_IDLE_INDICATOR) == 0,
-                       50))
-               DRM_ERROR("timed out waiting for IDLE Indicator\n");
-
-       I915_WRITE_TAIL(ring, value);
-       I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
-	       GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
-	       GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_ENABLE);
+	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
+		GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
+		GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_DISABLE);
+	I915_WRITE(GEN6_BSD_RNCID, 0x0);
+
+	if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
+		GEN6_BSD_SLEEP_PSMI_CONTROL_IDLE_INDICATOR) == 0,
+		50))
+	DRM_ERROR("timed out waiting for IDLE Indicator\n");
+
+	I915_WRITE_TAIL(ring, value);
+	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
+		GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK |
+		GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_ENABLE);
 }
 
 static int gen6_ring_flush(struct intel_ring_buffer *ring,
@@ -1094,18 +1260,18 @@ static int
 gen6_ring_dispatch_execbuffer(struct intel_ring_buffer *ring,
 			      u32 offset, u32 len)
 {
-       int ret;
+	int ret;
 
-       ret = intel_ring_begin(ring, 2);
-       if (ret)
-	       return ret;
+	ret = intel_ring_begin(ring, 2);
+	if (ret)
+		return ret;
 
-       intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
-       /* bit0-7 is the length on GEN6+ */
-       intel_ring_emit(ring, offset);
-       intel_ring_advance(ring);
+	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965);
+	/* bit0-7 is the length on GEN6+ */
+	intel_ring_emit(ring, offset);
+	intel_ring_advance(ring);
 
-       return 0;
+	return 0;
 }
 
 static bool
@@ -1154,6 +1320,11 @@ static const struct intel_ring_buffer gen6_bsd_ring = {
 	.irq_get		= gen6_bsd_ring_get_irq,
 	.irq_put		= gen6_bsd_ring_put_irq,
 	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
+	.sync_to		= gen6_bsd_ring_sync_to,
+	.semaphore_register	= {MI_SEMAPHORE_SYNC_VR,
+				   MI_SEMAPHORE_SYNC_INVALID,
+				   MI_SEMAPHORE_SYNC_VB},
+	.signal_mbox		= {GEN6_RVSYNC, GEN6_BVSYNC},
 };
 
 /* Blitter support (SandyBridge+) */
@@ -1272,19 +1443,24 @@ static void blt_ring_cleanup(struct intel_ring_buffer *ring)
 }
 
 static const struct intel_ring_buffer gen6_blt_ring = {
-       .name			= "blt ring",
-       .id			= RING_BLT,
-       .mmio_base		= BLT_RING_BASE,
-       .size			= 32 * PAGE_SIZE,
-       .init			= blt_ring_init,
-       .write_tail		= ring_write_tail,
-       .flush			= blt_ring_flush,
-       .add_request		= gen6_add_request,
-       .get_seqno		= ring_get_seqno,
-       .irq_get			= blt_ring_get_irq,
-       .irq_put			= blt_ring_put_irq,
-       .dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
-       .cleanup			= blt_ring_cleanup,
+	.name			= "blt ring",
+	.id			= RING_BLT,
+	.mmio_base		= BLT_RING_BASE,
+	.size			= 32 * PAGE_SIZE,
+	.init			= blt_ring_init,
+	.write_tail		= ring_write_tail,
+	.flush			= blt_ring_flush,
+	.add_request		= gen6_add_request,
+	.get_seqno		= ring_get_seqno,
+	.irq_get		= blt_ring_get_irq,
+	.irq_put		= blt_ring_put_irq,
+	.dispatch_execbuffer	= gen6_ring_dispatch_execbuffer,
+	.cleanup		= blt_ring_cleanup,
+	.sync_to		= gen6_blt_ring_sync_to,
+	.semaphore_register	= {MI_SEMAPHORE_SYNC_BR,
+				   MI_SEMAPHORE_SYNC_BV,
+				   MI_SEMAPHORE_SYNC_INVALID},
+	.signal_mbox		= {GEN6_RBSYNC, GEN6_VBSYNC},
 };
 
 int intel_init_render_ring_buffer(struct drm_device *dev)
@@ -1295,6 +1471,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
 	*ring = render_ring;
 	if (INTEL_INFO(dev)->gen >= 6) {
 		ring->add_request = gen6_add_request;
+		ring->flush = gen6_render_ring_flush;
 		ring->irq_get = gen6_render_ring_get_irq;
 		ring->irq_put = gen6_render_ring_put_irq;
 	} else if (IS_GEN5(dev)) {
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 39ac2b634ae5..68281c96c558 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -2,10 +2,10 @@
 #define _INTEL_RINGBUFFER_H_
 
 enum {
-    RCS = 0x0,
-    VCS,
-    BCS,
-    I915_NUM_RINGS,
+	RCS = 0x0,
+	VCS,
+	BCS,
+	I915_NUM_RINGS,
 };
 
 struct  intel_hw_status_page {
@@ -75,7 +75,12 @@ struct  intel_ring_buffer {
 	int		(*dispatch_execbuffer)(struct intel_ring_buffer *ring,
 					       u32 offset, u32 length);
 	void		(*cleanup)(struct intel_ring_buffer *ring);
+	int		(*sync_to)(struct intel_ring_buffer *ring,
+				   struct intel_ring_buffer *to,
+				   u32 seqno);
 
+	u32		semaphore_register[3]; /*our mbox written by others */
+	u32		signal_mbox[2]; /* mboxes this ring signals to */
 	/**
 	 * List of objects currently involved in rendering from the
 	 * ringbuffer.
@@ -180,9 +185,6 @@ static inline void intel_ring_emit(struct intel_ring_buffer *ring,
 void intel_ring_advance(struct intel_ring_buffer *ring);
 
 u32 intel_ring_get_seqno(struct intel_ring_buffer *ring);
-int intel_ring_sync(struct intel_ring_buffer *ring,
-		    struct intel_ring_buffer *to,
-		    u32 seqno);
 
 int intel_init_render_ring_buffer(struct drm_device *dev);
 int intel_init_bsd_ring_buffer(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 6348c499616f..6db3b1ccb6eb 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -43,7 +43,7 @@
 #define SDVO_TV_MASK   (SDVO_OUTPUT_CVBS0 | SDVO_OUTPUT_SVID0)
 
 #define SDVO_OUTPUT_MASK (SDVO_TMDS_MASK | SDVO_RGB_MASK | SDVO_LVDS_MASK |\
-                         SDVO_TV_MASK)
+			SDVO_TV_MASK)
 
 #define IS_TV(c)	(c->output_flag & SDVO_TV_MASK)
 #define IS_TMDS(c)	(c->output_flag & SDVO_TMDS_MASK)
@@ -288,117 +288,117 @@ static const struct _sdvo_cmd_name {
 	u8 cmd;
 	const char *name;
 } sdvo_cmd_names[] = {
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_RESET),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_DEVICE_CAPS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FIRMWARE_REV),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TRAINED_INPUTS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ACTIVE_OUTPUTS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ACTIVE_OUTPUTS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_IN_OUT_MAP),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_IN_OUT_MAP),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ATTACHED_DISPLAYS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HOT_PLUG_SUPPORT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ACTIVE_HOT_PLUG),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ACTIVE_HOT_PLUG),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INTERRUPT_EVENT_SOURCE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TARGET_INPUT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TARGET_OUTPUT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_TIMINGS_PART1),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_TIMINGS_PART2),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART1),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART2),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART1),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OUTPUT_TIMINGS_PART1),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OUTPUT_TIMINGS_PART2),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_TIMINGS_PART1),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_TIMINGS_PART2),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_CREATE_PREFERRED_INPUT_TIMING),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART1),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART2),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_PIXEL_CLOCK_RANGE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_PIXEL_CLOCK_RANGE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_CLOCK_RATE_MULTS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CLOCK_RATE_MULT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CLOCK_RATE_MULT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_TV_FORMATS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_FORMAT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_FORMAT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_POWER_STATES),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POWER_STATE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ENCODER_POWER_STATE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_DISPLAY_POWER_STATE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTROL_BUS_SWITCH),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS),
-
-    /* Add the op code for SDVO enhancements */
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HPOS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HPOS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HPOS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_VPOS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_VPOS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_VPOS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SATURATION),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SATURATION),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SATURATION),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HUE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HUE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HUE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_CONTRAST),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CONTRAST),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTRAST),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_BRIGHTNESS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_BRIGHTNESS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_BRIGHTNESS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_H),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_H),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_H),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_V),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_V),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_V),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_FLICKER_FILTER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FLICKER_FILTER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_FLICKER_FILTER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_FLICKER_FILTER_ADAPTIVE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FLICKER_FILTER_ADAPTIVE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_FLICKER_FILTER_ADAPTIVE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_FLICKER_FILTER_2D),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FLICKER_FILTER_2D),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_FLICKER_FILTER_2D),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SHARPNESS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SHARPNESS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SHARPNESS),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_DOT_CRAWL),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_DOT_CRAWL),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_TV_CHROMA_FILTER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_CHROMA_FILTER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_CHROMA_FILTER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_TV_LUMA_FILTER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_LUMA_FILTER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_LUMA_FILTER),
-
-    /* HDMI op code */
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPP_ENCODE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ENCODE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ENCODE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_PIXEL_REPLI),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PIXEL_REPLI),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_COLORIMETRY_CAP),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_COLORIMETRY),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_COLORIMETRY),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_AUDIO_ENCRYPT_PREFER),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_AUDIO_STAT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_AUDIO_STAT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_INDEX),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_INDEX),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_INFO),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_AV_SPLIT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_AV_SPLIT),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_TXRATE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_TXRATE),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_DATA),
-    SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_DATA),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_RESET),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_DEVICE_CAPS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FIRMWARE_REV),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TRAINED_INPUTS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ACTIVE_OUTPUTS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ACTIVE_OUTPUTS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_IN_OUT_MAP),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_IN_OUT_MAP),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ATTACHED_DISPLAYS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HOT_PLUG_SUPPORT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ACTIVE_HOT_PLUG),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ACTIVE_HOT_PLUG),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INTERRUPT_EVENT_SOURCE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TARGET_INPUT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TARGET_OUTPUT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_TIMINGS_PART1),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_TIMINGS_PART2),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART1),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART2),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_INPUT_TIMINGS_PART1),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OUTPUT_TIMINGS_PART1),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OUTPUT_TIMINGS_PART2),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_TIMINGS_PART1),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_TIMINGS_PART2),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_CREATE_PREFERRED_INPUT_TIMING),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART1),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PREFERRED_INPUT_TIMING_PART2),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_INPUT_PIXEL_CLOCK_RANGE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OUTPUT_PIXEL_CLOCK_RANGE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_CLOCK_RATE_MULTS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CLOCK_RATE_MULT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CLOCK_RATE_MULT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_TV_FORMATS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_FORMAT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_FORMAT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_POWER_STATES),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_POWER_STATE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ENCODER_POWER_STATE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_DISPLAY_POWER_STATE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTROL_BUS_SWITCH),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS),
+
+	/* Add the op code for SDVO enhancements */
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HPOS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HPOS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HPOS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_VPOS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_VPOS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_VPOS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SATURATION),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SATURATION),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SATURATION),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_HUE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HUE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HUE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_CONTRAST),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_CONTRAST),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_CONTRAST),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_BRIGHTNESS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_BRIGHTNESS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_BRIGHTNESS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_H),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_H),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_H),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_OVERSCAN_V),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_OVERSCAN_V),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_OVERSCAN_V),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_FLICKER_FILTER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FLICKER_FILTER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_FLICKER_FILTER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_FLICKER_FILTER_ADAPTIVE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FLICKER_FILTER_ADAPTIVE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_FLICKER_FILTER_ADAPTIVE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_FLICKER_FILTER_2D),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_FLICKER_FILTER_2D),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_FLICKER_FILTER_2D),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_SHARPNESS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SHARPNESS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_SHARPNESS),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_DOT_CRAWL),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_DOT_CRAWL),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_TV_CHROMA_FILTER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_CHROMA_FILTER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_CHROMA_FILTER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_MAX_TV_LUMA_FILTER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_TV_LUMA_FILTER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_TV_LUMA_FILTER),
+
+	/* HDMI op code */
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_SUPP_ENCODE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_ENCODE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_ENCODE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_PIXEL_REPLI),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_PIXEL_REPLI),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_COLORIMETRY_CAP),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_COLORIMETRY),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_COLORIMETRY),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_AUDIO_ENCRYPT_PREFER),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_AUDIO_STAT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_AUDIO_STAT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_INDEX),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_INDEX),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_INFO),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_AV_SPLIT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_AV_SPLIT),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_TXRATE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_TXRATE),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_SET_HBUF_DATA),
+	SDVO_CMD_NAME_ENTRY(SDVO_CMD_GET_HBUF_DATA),
 };
 
 #define IS_SDVOB(reg)	(reg == SDVOB || reg == PCH_SDVOB)
@@ -1232,8 +1232,7 @@ static bool
 intel_sdvo_multifunc_encoder(struct intel_sdvo *intel_sdvo)
 {
 	/* Is there more than one type of output? */
-	int caps = intel_sdvo->caps.output_flags & 0xf;
-	return caps & -caps;
+	return hweight16(intel_sdvo->caps.output_flags) > 1;
 }
 
 static struct edid *
@@ -1254,7 +1253,7 @@ intel_sdvo_get_analog_edid(struct drm_connector *connector)
 }
 
 enum drm_connector_status
-intel_sdvo_hdmi_sink_detect(struct drm_connector *connector)
+intel_sdvo_tmds_sink_detect(struct drm_connector *connector)
 {
 	struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector);
 	enum drm_connector_status status;
@@ -1349,7 +1348,7 @@ intel_sdvo_detect(struct drm_connector *connector, bool force)
 	if ((intel_sdvo_connector->output_flag & response) == 0)
 		ret = connector_status_disconnected;
 	else if (IS_TMDS(intel_sdvo_connector))
-		ret = intel_sdvo_hdmi_sink_detect(connector);
+		ret = intel_sdvo_tmds_sink_detect(connector);
 	else {
 		struct edid *edid;
 
@@ -1896,7 +1895,7 @@ intel_sdvo_select_i2c_bus(struct drm_i915_private *dev_priv,
 			  struct intel_sdvo *sdvo, u32 reg)
 {
 	struct sdvo_device_mapping *mapping;
-	u8 pin, speed;
+	u8 pin;
 
 	if (IS_SDVOB(reg))
 		mapping = &dev_priv->sdvo_mappings[0];
@@ -1904,18 +1903,16 @@ intel_sdvo_select_i2c_bus(struct drm_i915_private *dev_priv,
 		mapping = &dev_priv->sdvo_mappings[1];
 
 	pin = GMBUS_PORT_DPB;
-	speed = GMBUS_RATE_1MHZ >> 8;
-	if (mapping->initialized) {
+	if (mapping->initialized)
 		pin = mapping->i2c_pin;
-		speed = mapping->i2c_speed;
-	}
 
 	if (pin < GMBUS_NUM_PORTS) {
 		sdvo->i2c = &dev_priv->gmbus[pin].adapter;
-		intel_gmbus_set_speed(sdvo->i2c, speed);
+		intel_gmbus_set_speed(sdvo->i2c, GMBUS_RATE_1MHZ);
 		intel_gmbus_force_bit(sdvo->i2c, true);
-	} else
+	} else {
 		sdvo->i2c = &dev_priv->gmbus[GMBUS_PORT_DPB].adapter;
+	}
 }
 
 static bool
@@ -2206,7 +2203,7 @@ intel_sdvo_output_setup(struct intel_sdvo *intel_sdvo, uint16_t flags)
 			      bytes[0], bytes[1]);
 		return false;
 	}
-	intel_sdvo->base.crtc_mask = (1 << 0) | (1 << 1);
+	intel_sdvo->base.crtc_mask = (1 << 0) | (1 << 1) | (1 << 2);
 
 	return true;
 }
@@ -2275,7 +2272,7 @@ static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo,
 		DRM_DEBUG_KMS(#name ": max %d, default %d, current %d\n", \
 			      data_value[0], data_value[1], response); \
 	} \
-} while(0)
+} while (0)
 
 static bool
 intel_sdvo_create_enhance_property_tv(struct intel_sdvo *intel_sdvo,
@@ -2442,7 +2439,7 @@ static bool intel_sdvo_create_enhance_property(struct intel_sdvo *intel_sdvo,
 
 	if (IS_TV(intel_sdvo_connector))
 		return intel_sdvo_create_enhance_property_tv(intel_sdvo, intel_sdvo_connector, enhancements.reply);
-	else if(IS_LVDS(intel_sdvo_connector))
+	else if (IS_LVDS(intel_sdvo_connector))
 		return intel_sdvo_create_enhance_property_lvds(intel_sdvo, intel_sdvo_connector, enhancements.reply);
 	else
 		return true;
diff --git a/drivers/gpu/drm/i915/intel_sdvo_regs.h b/drivers/gpu/drm/i915/intel_sdvo_regs.h
index 4f4e23bc2d16..4aa6f343e49a 100644
--- a/drivers/gpu/drm/i915/intel_sdvo_regs.h
+++ b/drivers/gpu/drm/i915/intel_sdvo_regs.h
@@ -46,63 +46,63 @@
 #define SDVO_OUTPUT_LAST    (14)
 
 struct intel_sdvo_caps {
-    u8 vendor_id;
-    u8 device_id;
-    u8 device_rev_id;
-    u8 sdvo_version_major;
-    u8 sdvo_version_minor;
-    unsigned int sdvo_inputs_mask:2;
-    unsigned int smooth_scaling:1;
-    unsigned int sharp_scaling:1;
-    unsigned int up_scaling:1;
-    unsigned int down_scaling:1;
-    unsigned int stall_support:1;
-    unsigned int pad:1;
-    u16 output_flags;
+	u8 vendor_id;
+	u8 device_id;
+	u8 device_rev_id;
+	u8 sdvo_version_major;
+	u8 sdvo_version_minor;
+	unsigned int sdvo_inputs_mask:2;
+	unsigned int smooth_scaling:1;
+	unsigned int sharp_scaling:1;
+	unsigned int up_scaling:1;
+	unsigned int down_scaling:1;
+	unsigned int stall_support:1;
+	unsigned int pad:1;
+	u16 output_flags;
 } __attribute__((packed));
 
 /** This matches the EDID DTD structure, more or less */
 struct intel_sdvo_dtd {
-    struct {
-	u16 clock;		/**< pixel clock, in 10kHz units */
-	u8 h_active;		/**< lower 8 bits (pixels) */
-	u8 h_blank;		/**< lower 8 bits (pixels) */
-	u8 h_high;		/**< upper 4 bits each h_active, h_blank */
-	u8 v_active;		/**< lower 8 bits (lines) */
-	u8 v_blank;		/**< lower 8 bits (lines) */
-	u8 v_high;		/**< upper 4 bits each v_active, v_blank */
-    } part1;
-
-    struct {
-	u8 h_sync_off;	/**< lower 8 bits, from hblank start */
-	u8 h_sync_width;	/**< lower 8 bits (pixels) */
-	/** lower 4 bits each vsync offset, vsync width */
-	u8 v_sync_off_width;
-	/**
-	 * 2 high bits of hsync offset, 2 high bits of hsync width,
-	 * bits 4-5 of vsync offset, and 2 high bits of vsync width.
-	 */
-	u8 sync_off_width_high;
-	u8 dtd_flags;
-	u8 sdvo_flags;
-	/** bits 6-7 of vsync offset at bits 6-7 */
-	u8 v_sync_off_high;
-	u8 reserved;
-    } part2;
+	struct {
+		u16 clock;	/**< pixel clock, in 10kHz units */
+		u8 h_active;	/**< lower 8 bits (pixels) */
+		u8 h_blank;	/**< lower 8 bits (pixels) */
+		u8 h_high;	/**< upper 4 bits each h_active, h_blank */
+		u8 v_active;	/**< lower 8 bits (lines) */
+		u8 v_blank;	/**< lower 8 bits (lines) */
+		u8 v_high;	/**< upper 4 bits each v_active, v_blank */
+	} part1;
+
+	struct {
+		u8 h_sync_off;	/**< lower 8 bits, from hblank start */
+		u8 h_sync_width;	/**< lower 8 bits (pixels) */
+		/** lower 4 bits each vsync offset, vsync width */
+		u8 v_sync_off_width;
+		/**
+		* 2 high bits of hsync offset, 2 high bits of hsync width,
+		* bits 4-5 of vsync offset, and 2 high bits of vsync width.
+		*/
+		u8 sync_off_width_high;
+		u8 dtd_flags;
+		u8 sdvo_flags;
+		/** bits 6-7 of vsync offset at bits 6-7 */
+		u8 v_sync_off_high;
+		u8 reserved;
+	} part2;
 } __attribute__((packed));
 
 struct intel_sdvo_pixel_clock_range {
-    u16 min;			/**< pixel clock, in 10kHz units */
-    u16 max;			/**< pixel clock, in 10kHz units */
+	u16 min;	/**< pixel clock, in 10kHz units */
+	u16 max;	/**< pixel clock, in 10kHz units */
 } __attribute__((packed));
 
 struct intel_sdvo_preferred_input_timing_args {
-    u16 clock;
-    u16 width;
-    u16 height;
-    u8	interlace:1;
-    u8	scaled:1;
-    u8	pad:6;
+	u16 clock;
+	u16 width;
+	u16 height;
+	u8	interlace:1;
+	u8	scaled:1;
+	u8	pad:6;
 } __attribute__((packed));
 
 /* I2C registers for SDVO */
@@ -154,9 +154,9 @@ struct intel_sdvo_preferred_input_timing_args {
  */
 #define SDVO_CMD_GET_TRAINED_INPUTS			0x03
 struct intel_sdvo_get_trained_inputs_response {
-    unsigned int input0_trained:1;
-    unsigned int input1_trained:1;
-    unsigned int pad:6;
+	unsigned int input0_trained:1;
+	unsigned int input1_trained:1;
+	unsigned int pad:6;
 } __attribute__((packed));
 
 /** Returns a struct intel_sdvo_output_flags of active outputs. */
@@ -177,7 +177,7 @@ struct intel_sdvo_get_trained_inputs_response {
  */
 #define SDVO_CMD_GET_IN_OUT_MAP				0x06
 struct intel_sdvo_in_out_map {
-    u16 in0, in1;
+	u16 in0, in1;
 };
 
 /**
@@ -210,10 +210,10 @@ struct intel_sdvo_in_out_map {
 
 #define SDVO_CMD_GET_INTERRUPT_EVENT_SOURCE		0x0f
 struct intel_sdvo_get_interrupt_event_source_response {
-    u16 interrupt_status;
-    unsigned int ambient_light_interrupt:1;
-    unsigned int hdmi_audio_encrypt_change:1;
-    unsigned int pad:6;
+	u16 interrupt_status;
+	unsigned int ambient_light_interrupt:1;
+	unsigned int hdmi_audio_encrypt_change:1;
+	unsigned int pad:6;
 } __attribute__((packed));
 
 /**
@@ -225,8 +225,8 @@ struct intel_sdvo_get_interrupt_event_source_response {
  */
 #define SDVO_CMD_SET_TARGET_INPUT			0x10
 struct intel_sdvo_set_target_input_args {
-    unsigned int target_1:1;
-    unsigned int pad:7;
+	unsigned int target_1:1;
+	unsigned int pad:7;
 } __attribute__((packed));
 
 /**
@@ -314,57 +314,57 @@ struct intel_sdvo_set_target_input_args {
 #define SDVO_CMD_GET_SUPPORTED_TV_FORMATS		0x27
 /** 6 bytes of bit flags for TV formats shared by all TV format functions */
 struct intel_sdvo_tv_format {
-    unsigned int ntsc_m:1;
-    unsigned int ntsc_j:1;
-    unsigned int ntsc_443:1;
-    unsigned int pal_b:1;
-    unsigned int pal_d:1;
-    unsigned int pal_g:1;
-    unsigned int pal_h:1;
-    unsigned int pal_i:1;
-
-    unsigned int pal_m:1;
-    unsigned int pal_n:1;
-    unsigned int pal_nc:1;
-    unsigned int pal_60:1;
-    unsigned int secam_b:1;
-    unsigned int secam_d:1;
-    unsigned int secam_g:1;
-    unsigned int secam_k:1;
-
-    unsigned int secam_k1:1;
-    unsigned int secam_l:1;
-    unsigned int secam_60:1;
-    unsigned int hdtv_std_smpte_240m_1080i_59:1;
-    unsigned int hdtv_std_smpte_240m_1080i_60:1;
-    unsigned int hdtv_std_smpte_260m_1080i_59:1;
-    unsigned int hdtv_std_smpte_260m_1080i_60:1;
-    unsigned int hdtv_std_smpte_274m_1080i_50:1;
-
-    unsigned int hdtv_std_smpte_274m_1080i_59:1;
-    unsigned int hdtv_std_smpte_274m_1080i_60:1;
-    unsigned int hdtv_std_smpte_274m_1080p_23:1;
-    unsigned int hdtv_std_smpte_274m_1080p_24:1;
-    unsigned int hdtv_std_smpte_274m_1080p_25:1;
-    unsigned int hdtv_std_smpte_274m_1080p_29:1;
-    unsigned int hdtv_std_smpte_274m_1080p_30:1;
-    unsigned int hdtv_std_smpte_274m_1080p_50:1;
-
-    unsigned int hdtv_std_smpte_274m_1080p_59:1;
-    unsigned int hdtv_std_smpte_274m_1080p_60:1;
-    unsigned int hdtv_std_smpte_295m_1080i_50:1;
-    unsigned int hdtv_std_smpte_295m_1080p_50:1;
-    unsigned int hdtv_std_smpte_296m_720p_59:1;
-    unsigned int hdtv_std_smpte_296m_720p_60:1;
-    unsigned int hdtv_std_smpte_296m_720p_50:1;
-    unsigned int hdtv_std_smpte_293m_480p_59:1;
-
-    unsigned int hdtv_std_smpte_170m_480i_59:1;
-    unsigned int hdtv_std_iturbt601_576i_50:1;
-    unsigned int hdtv_std_iturbt601_576p_50:1;
-    unsigned int hdtv_std_eia_7702a_480i_60:1;
-    unsigned int hdtv_std_eia_7702a_480p_60:1;
-    unsigned int pad:3;
+	unsigned int ntsc_m:1;
+	unsigned int ntsc_j:1;
+	unsigned int ntsc_443:1;
+	unsigned int pal_b:1;
+	unsigned int pal_d:1;
+	unsigned int pal_g:1;
+	unsigned int pal_h:1;
+	unsigned int pal_i:1;
+
+	unsigned int pal_m:1;
+	unsigned int pal_n:1;
+	unsigned int pal_nc:1;
+	unsigned int pal_60:1;
+	unsigned int secam_b:1;
+	unsigned int secam_d:1;
+	unsigned int secam_g:1;
+	unsigned int secam_k:1;
+
+	unsigned int secam_k1:1;
+	unsigned int secam_l:1;
+	unsigned int secam_60:1;
+	unsigned int hdtv_std_smpte_240m_1080i_59:1;
+	unsigned int hdtv_std_smpte_240m_1080i_60:1;
+	unsigned int hdtv_std_smpte_260m_1080i_59:1;
+	unsigned int hdtv_std_smpte_260m_1080i_60:1;
+	unsigned int hdtv_std_smpte_274m_1080i_50:1;
+
+	unsigned int hdtv_std_smpte_274m_1080i_59:1;
+	unsigned int hdtv_std_smpte_274m_1080i_60:1;
+	unsigned int hdtv_std_smpte_274m_1080p_23:1;
+	unsigned int hdtv_std_smpte_274m_1080p_24:1;
+	unsigned int hdtv_std_smpte_274m_1080p_25:1;
+	unsigned int hdtv_std_smpte_274m_1080p_29:1;
+	unsigned int hdtv_std_smpte_274m_1080p_30:1;
+	unsigned int hdtv_std_smpte_274m_1080p_50:1;
+
+	unsigned int hdtv_std_smpte_274m_1080p_59:1;
+	unsigned int hdtv_std_smpte_274m_1080p_60:1;
+	unsigned int hdtv_std_smpte_295m_1080i_50:1;
+	unsigned int hdtv_std_smpte_295m_1080p_50:1;
+	unsigned int hdtv_std_smpte_296m_720p_59:1;
+	unsigned int hdtv_std_smpte_296m_720p_60:1;
+	unsigned int hdtv_std_smpte_296m_720p_50:1;
+	unsigned int hdtv_std_smpte_293m_480p_59:1;
+
+	unsigned int hdtv_std_smpte_170m_480i_59:1;
+	unsigned int hdtv_std_iturbt601_576i_50:1;
+	unsigned int hdtv_std_iturbt601_576p_50:1;
+	unsigned int hdtv_std_eia_7702a_480i_60:1;
+	unsigned int hdtv_std_eia_7702a_480p_60:1;
+	unsigned int pad:3;
 } __attribute__((packed));
 
 #define SDVO_CMD_GET_TV_FORMAT				0x28
@@ -374,53 +374,53 @@ struct intel_sdvo_tv_format {
 /** Returns the resolutiosn that can be used with the given TV format */
 #define SDVO_CMD_GET_SDTV_RESOLUTION_SUPPORT		0x83
 struct intel_sdvo_sdtv_resolution_request {
-    unsigned int ntsc_m:1;
-    unsigned int ntsc_j:1;
-    unsigned int ntsc_443:1;
-    unsigned int pal_b:1;
-    unsigned int pal_d:1;
-    unsigned int pal_g:1;
-    unsigned int pal_h:1;
-    unsigned int pal_i:1;
-
-    unsigned int pal_m:1;
-    unsigned int pal_n:1;
-    unsigned int pal_nc:1;
-    unsigned int pal_60:1;
-    unsigned int secam_b:1;
-    unsigned int secam_d:1;
-    unsigned int secam_g:1;
-    unsigned int secam_k:1;
-
-    unsigned int secam_k1:1;
-    unsigned int secam_l:1;
-    unsigned int secam_60:1;
-    unsigned int pad:5;
+	unsigned int ntsc_m:1;
+	unsigned int ntsc_j:1;
+	unsigned int ntsc_443:1;
+	unsigned int pal_b:1;
+	unsigned int pal_d:1;
+	unsigned int pal_g:1;
+	unsigned int pal_h:1;
+	unsigned int pal_i:1;
+
+	unsigned int pal_m:1;
+	unsigned int pal_n:1;
+	unsigned int pal_nc:1;
+	unsigned int pal_60:1;
+	unsigned int secam_b:1;
+	unsigned int secam_d:1;
+	unsigned int secam_g:1;
+	unsigned int secam_k:1;
+
+	unsigned int secam_k1:1;
+	unsigned int secam_l:1;
+	unsigned int secam_60:1;
+	unsigned int pad:5;
 } __attribute__((packed));
 
 struct intel_sdvo_sdtv_resolution_reply {
-    unsigned int res_320x200:1;
-    unsigned int res_320x240:1;
-    unsigned int res_400x300:1;
-    unsigned int res_640x350:1;
-    unsigned int res_640x400:1;
-    unsigned int res_640x480:1;
-    unsigned int res_704x480:1;
-    unsigned int res_704x576:1;
-
-    unsigned int res_720x350:1;
-    unsigned int res_720x400:1;
-    unsigned int res_720x480:1;
-    unsigned int res_720x540:1;
-    unsigned int res_720x576:1;
-    unsigned int res_768x576:1;
-    unsigned int res_800x600:1;
-    unsigned int res_832x624:1;
-
-    unsigned int res_920x766:1;
-    unsigned int res_1024x768:1;
-    unsigned int res_1280x1024:1;
-    unsigned int pad:5;
+	unsigned int res_320x200:1;
+	unsigned int res_320x240:1;
+	unsigned int res_400x300:1;
+	unsigned int res_640x350:1;
+	unsigned int res_640x400:1;
+	unsigned int res_640x480:1;
+	unsigned int res_704x480:1;
+	unsigned int res_704x576:1;
+
+	unsigned int res_720x350:1;
+	unsigned int res_720x400:1;
+	unsigned int res_720x480:1;
+	unsigned int res_720x540:1;
+	unsigned int res_720x576:1;
+	unsigned int res_768x576:1;
+	unsigned int res_800x600:1;
+	unsigned int res_832x624:1;
+
+	unsigned int res_920x766:1;
+	unsigned int res_1024x768:1;
+	unsigned int res_1280x1024:1;
+	unsigned int pad:5;
 } __attribute__((packed));
 
 /* Get supported resolution with squire pixel aspect ratio that can be
@@ -428,90 +428,90 @@ struct intel_sdvo_sdtv_resolution_reply {
 #define SDVO_CMD_GET_SCALED_HDTV_RESOLUTION_SUPPORT		0x85
 
 struct intel_sdvo_hdtv_resolution_request {
-    unsigned int hdtv_std_smpte_240m_1080i_59:1;
-    unsigned int hdtv_std_smpte_240m_1080i_60:1;
-    unsigned int hdtv_std_smpte_260m_1080i_59:1;
-    unsigned int hdtv_std_smpte_260m_1080i_60:1;
-    unsigned int hdtv_std_smpte_274m_1080i_50:1;
-    unsigned int hdtv_std_smpte_274m_1080i_59:1;
-    unsigned int hdtv_std_smpte_274m_1080i_60:1;
-    unsigned int hdtv_std_smpte_274m_1080p_23:1;
-
-    unsigned int hdtv_std_smpte_274m_1080p_24:1;
-    unsigned int hdtv_std_smpte_274m_1080p_25:1;
-    unsigned int hdtv_std_smpte_274m_1080p_29:1;
-    unsigned int hdtv_std_smpte_274m_1080p_30:1;
-    unsigned int hdtv_std_smpte_274m_1080p_50:1;
-    unsigned int hdtv_std_smpte_274m_1080p_59:1;
-    unsigned int hdtv_std_smpte_274m_1080p_60:1;
-    unsigned int hdtv_std_smpte_295m_1080i_50:1;
-
-    unsigned int hdtv_std_smpte_295m_1080p_50:1;
-    unsigned int hdtv_std_smpte_296m_720p_59:1;
-    unsigned int hdtv_std_smpte_296m_720p_60:1;
-    unsigned int hdtv_std_smpte_296m_720p_50:1;
-    unsigned int hdtv_std_smpte_293m_480p_59:1;
-    unsigned int hdtv_std_smpte_170m_480i_59:1;
-    unsigned int hdtv_std_iturbt601_576i_50:1;
-    unsigned int hdtv_std_iturbt601_576p_50:1;
-
-    unsigned int hdtv_std_eia_7702a_480i_60:1;
-    unsigned int hdtv_std_eia_7702a_480p_60:1;
-    unsigned int pad:6;
+	unsigned int hdtv_std_smpte_240m_1080i_59:1;
+	unsigned int hdtv_std_smpte_240m_1080i_60:1;
+	unsigned int hdtv_std_smpte_260m_1080i_59:1;
+	unsigned int hdtv_std_smpte_260m_1080i_60:1;
+	unsigned int hdtv_std_smpte_274m_1080i_50:1;
+	unsigned int hdtv_std_smpte_274m_1080i_59:1;
+	unsigned int hdtv_std_smpte_274m_1080i_60:1;
+	unsigned int hdtv_std_smpte_274m_1080p_23:1;
+
+	unsigned int hdtv_std_smpte_274m_1080p_24:1;
+	unsigned int hdtv_std_smpte_274m_1080p_25:1;
+	unsigned int hdtv_std_smpte_274m_1080p_29:1;
+	unsigned int hdtv_std_smpte_274m_1080p_30:1;
+	unsigned int hdtv_std_smpte_274m_1080p_50:1;
+	unsigned int hdtv_std_smpte_274m_1080p_59:1;
+	unsigned int hdtv_std_smpte_274m_1080p_60:1;
+	unsigned int hdtv_std_smpte_295m_1080i_50:1;
+
+	unsigned int hdtv_std_smpte_295m_1080p_50:1;
+	unsigned int hdtv_std_smpte_296m_720p_59:1;
+	unsigned int hdtv_std_smpte_296m_720p_60:1;
+	unsigned int hdtv_std_smpte_296m_720p_50:1;
+	unsigned int hdtv_std_smpte_293m_480p_59:1;
+	unsigned int hdtv_std_smpte_170m_480i_59:1;
+	unsigned int hdtv_std_iturbt601_576i_50:1;
+	unsigned int hdtv_std_iturbt601_576p_50:1;
+
+	unsigned int hdtv_std_eia_7702a_480i_60:1;
+	unsigned int hdtv_std_eia_7702a_480p_60:1;
+	unsigned int pad:6;
 } __attribute__((packed));
 
 struct intel_sdvo_hdtv_resolution_reply {
-    unsigned int res_640x480:1;
-    unsigned int res_800x600:1;
-    unsigned int res_1024x768:1;
-    unsigned int res_1280x960:1;
-    unsigned int res_1400x1050:1;
-    unsigned int res_1600x1200:1;
-    unsigned int res_1920x1440:1;
-    unsigned int res_2048x1536:1;
-
-    unsigned int res_2560x1920:1;
-    unsigned int res_3200x2400:1;
-    unsigned int res_3840x2880:1;
-    unsigned int pad1:5;
-
-    unsigned int res_848x480:1;
-    unsigned int res_1064x600:1;
-    unsigned int res_1280x720:1;
-    unsigned int res_1360x768:1;
-    unsigned int res_1704x960:1;
-    unsigned int res_1864x1050:1;
-    unsigned int res_1920x1080:1;
-    unsigned int res_2128x1200:1;
-
-    unsigned int res_2560x1400:1;
-    unsigned int res_2728x1536:1;
-    unsigned int res_3408x1920:1;
-    unsigned int res_4264x2400:1;
-    unsigned int res_5120x2880:1;
-    unsigned int pad2:3;
-
-    unsigned int res_768x480:1;
-    unsigned int res_960x600:1;
-    unsigned int res_1152x720:1;
-    unsigned int res_1124x768:1;
-    unsigned int res_1536x960:1;
-    unsigned int res_1680x1050:1;
-    unsigned int res_1728x1080:1;
-    unsigned int res_1920x1200:1;
-
-    unsigned int res_2304x1440:1;
-    unsigned int res_2456x1536:1;
-    unsigned int res_3072x1920:1;
-    unsigned int res_3840x2400:1;
-    unsigned int res_4608x2880:1;
-    unsigned int pad3:3;
-
-    unsigned int res_1280x1024:1;
-    unsigned int pad4:7;
-
-    unsigned int res_1280x768:1;
-    unsigned int pad5:7;
+	unsigned int res_640x480:1;
+	unsigned int res_800x600:1;
+	unsigned int res_1024x768:1;
+	unsigned int res_1280x960:1;
+	unsigned int res_1400x1050:1;
+	unsigned int res_1600x1200:1;
+	unsigned int res_1920x1440:1;
+	unsigned int res_2048x1536:1;
+
+	unsigned int res_2560x1920:1;
+	unsigned int res_3200x2400:1;
+	unsigned int res_3840x2880:1;
+	unsigned int pad1:5;
+
+	unsigned int res_848x480:1;
+	unsigned int res_1064x600:1;
+	unsigned int res_1280x720:1;
+	unsigned int res_1360x768:1;
+	unsigned int res_1704x960:1;
+	unsigned int res_1864x1050:1;
+	unsigned int res_1920x1080:1;
+	unsigned int res_2128x1200:1;
+
+	unsigned int res_2560x1400:1;
+	unsigned int res_2728x1536:1;
+	unsigned int res_3408x1920:1;
+	unsigned int res_4264x2400:1;
+	unsigned int res_5120x2880:1;
+	unsigned int pad2:3;
+
+	unsigned int res_768x480:1;
+	unsigned int res_960x600:1;
+	unsigned int res_1152x720:1;
+	unsigned int res_1124x768:1;
+	unsigned int res_1536x960:1;
+	unsigned int res_1680x1050:1;
+	unsigned int res_1728x1080:1;
+	unsigned int res_1920x1200:1;
+
+	unsigned int res_2304x1440:1;
+	unsigned int res_2456x1536:1;
+	unsigned int res_3072x1920:1;
+	unsigned int res_3840x2400:1;
+	unsigned int res_4608x2880:1;
+	unsigned int pad3:3;
+
+	unsigned int res_1280x1024:1;
+	unsigned int pad4:7;
+
+	unsigned int res_1280x768:1;
+	unsigned int pad5:7;
 } __attribute__((packed));
 
 /* Get supported power state returns info for encoder and monitor, rely on
@@ -539,25 +539,25 @@ struct intel_sdvo_hdtv_resolution_reply {
  * The high fields are bits 8:9 of the 10-bit values.
  */
 struct sdvo_panel_power_sequencing {
-    u8 t0;
-    u8 t1;
-    u8 t2;
-    u8 t3;
-    u8 t4;
-
-    unsigned int t0_high:2;
-    unsigned int t1_high:2;
-    unsigned int t2_high:2;
-    unsigned int t3_high:2;
-
-    unsigned int t4_high:2;
-    unsigned int pad:6;
+	u8 t0;
+	u8 t1;
+	u8 t2;
+	u8 t3;
+	u8 t4;
+
+	unsigned int t0_high:2;
+	unsigned int t1_high:2;
+	unsigned int t2_high:2;
+	unsigned int t3_high:2;
+
+	unsigned int t4_high:2;
+	unsigned int pad:6;
 } __attribute__((packed));
 
 #define SDVO_CMD_GET_MAX_BACKLIGHT_LEVEL		0x30
 struct sdvo_max_backlight_reply {
-    u8 max_value;
-    u8 default_value;
+	u8 max_value;
+	u8 default_value;
 } __attribute__((packed));
 
 #define SDVO_CMD_GET_BACKLIGHT_LEVEL			0x31
@@ -565,16 +565,16 @@ struct sdvo_max_backlight_reply {
 
 #define SDVO_CMD_GET_AMBIENT_LIGHT			0x33
 struct sdvo_get_ambient_light_reply {
-    u16 trip_low;
-    u16 trip_high;
-    u16 value;
+	u16 trip_low;
+	u16 trip_high;
+	u16 value;
 } __attribute__((packed));
 #define SDVO_CMD_SET_AMBIENT_LIGHT			0x34
 struct sdvo_set_ambient_light_reply {
-    u16 trip_low;
-    u16 trip_high;
-    unsigned int enable:1;
-    unsigned int pad:7;
+	u16 trip_low;
+	u16 trip_high;
+	unsigned int enable:1;
+	unsigned int pad:7;
 } __attribute__((packed));
 
 /* Set display power state */
@@ -586,23 +586,23 @@ struct sdvo_set_ambient_light_reply {
 
 #define SDVO_CMD_GET_SUPPORTED_ENHANCEMENTS		0x84
 struct intel_sdvo_enhancements_reply {
-    unsigned int flicker_filter:1;
-    unsigned int flicker_filter_adaptive:1;
-    unsigned int flicker_filter_2d:1;
-    unsigned int saturation:1;
-    unsigned int hue:1;
-    unsigned int brightness:1;
-    unsigned int contrast:1;
-    unsigned int overscan_h:1;
-
-    unsigned int overscan_v:1;
-    unsigned int hpos:1;
-    unsigned int vpos:1;
-    unsigned int sharpness:1;
-    unsigned int dot_crawl:1;
-    unsigned int dither:1;
-    unsigned int tv_chroma_filter:1;
-    unsigned int tv_luma_filter:1;
+	unsigned int flicker_filter:1;
+	unsigned int flicker_filter_adaptive:1;
+	unsigned int flicker_filter_2d:1;
+	unsigned int saturation:1;
+	unsigned int hue:1;
+	unsigned int brightness:1;
+	unsigned int contrast:1;
+	unsigned int overscan_h:1;
+
+	unsigned int overscan_v:1;
+	unsigned int hpos:1;
+	unsigned int vpos:1;
+	unsigned int sharpness:1;
+	unsigned int dot_crawl:1;
+	unsigned int dither:1;
+	unsigned int tv_chroma_filter:1;
+	unsigned int tv_luma_filter:1;
 } __attribute__((packed));
 
 /* Picture enhancement limits below are dependent on the current TV format,
@@ -623,8 +623,8 @@ struct intel_sdvo_enhancements_reply {
 #define SDVO_CMD_GET_MAX_TV_CHROMA_FILTER		0x74
 #define SDVO_CMD_GET_MAX_TV_LUMA_FILTER			0x77
 struct intel_sdvo_enhancement_limits_reply {
-    u16 max_value;
-    u16 default_value;
+	u16 max_value;
+	u16 default_value;
 } __attribute__((packed));
 
 #define SDVO_CMD_GET_LVDS_PANEL_INFORMATION		0x7f
@@ -665,8 +665,8 @@ struct intel_sdvo_enhancement_limits_reply {
 #define SDVO_CMD_GET_TV_LUMA_FILTER			0x78
 #define SDVO_CMD_SET_TV_LUMA_FILTER			0x79
 struct intel_sdvo_enhancements_arg {
-    u16 value;
-}__attribute__((packed));
+	u16 value;
+} __attribute__((packed));
 
 #define SDVO_CMD_GET_DOT_CRAWL				0x70
 #define SDVO_CMD_SET_DOT_CRAWL				0x71
@@ -717,7 +717,7 @@ struct intel_sdvo_enhancements_arg {
 #define SDVO_CMD_GET_AUDIO_TX_INFO	0x9c
 #define SDVO_NEED_TO_STALL  (1 << 7)
 
-struct intel_sdvo_encode{
-    u8 dvi_rev;
-    u8 hdmi_rev;
+struct intel_sdvo_encode {
+	u8 dvi_rev;
+	u8 hdmi_rev;
 } __attribute__ ((packed));
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 210d570fd516..f3c6a9a8b081 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -194,10 +194,10 @@ static const u32 filter_table[] = {
  *
  *     if (f >= 1) {
  *         exp = 0x7;
- * 	   mant = 1 << 8;
+ *	   mant = 1 << 8;
  *     } else {
  *         for (exp = 0; exp < 3 && f < 0.5; exp++)
- * 	       f *= 2.0;
+ *	   f *= 2.0;
  *         mant = (f * (1 << 9) + 0.5);
  *         if (mant >= (1 << 9))
  *             mant = (1 << 9) - 1;
@@ -430,7 +430,7 @@ static const struct tv_mode tv_modes[] = {
 		.vsync_start_f1	= 6,		    .vsync_start_f2	= 7,
 		.vsync_len	= 6,
 
-		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_ena	= true,		    .veq_start_f1	= 0,
 		.veq_start_f2	= 1,		    .veq_len		= 18,
 
 		.vi_end_f1	= 20,		    .vi_end_f2		= 21,
@@ -472,7 +472,7 @@ static const struct tv_mode tv_modes[] = {
 		.vsync_start_f1 = 6,		    .vsync_start_f2	= 7,
 		.vsync_len	= 6,
 
-		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_ena	= true,		    .veq_start_f1	= 0,
 		.veq_start_f2	= 1,		    .veq_len		= 18,
 
 		.vi_end_f1	= 20,		    .vi_end_f2		= 21,
@@ -515,7 +515,7 @@ static const struct tv_mode tv_modes[] = {
 		.vsync_start_f1	= 6,	    .vsync_start_f2	= 7,
 		.vsync_len	= 6,
 
-		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_ena      = true,	    .veq_start_f1	= 0,
 		.veq_start_f2 = 1,	    .veq_len		= 18,
 
 		.vi_end_f1	= 20,		    .vi_end_f2		= 21,
@@ -558,7 +558,7 @@ static const struct tv_mode tv_modes[] = {
 		.vsync_start_f1	= 6,		    .vsync_start_f2	= 7,
 		.vsync_len	= 6,
 
-		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_ena	= true,		    .veq_start_f1	= 0,
 		.veq_start_f2	= 1,		    .veq_len		= 18,
 
 		.vi_end_f1	= 20,		    .vi_end_f2		= 21,
@@ -602,14 +602,14 @@ static const struct tv_mode tv_modes[] = {
 		.vsync_start_f1	= 6,	   .vsync_start_f2	= 7,
 		.vsync_len	= 6,
 
-		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_ena	= true,		    .veq_start_f1	= 0,
 		.veq_start_f2	= 1,		    .veq_len		= 18,
 
 		.vi_end_f1	= 24,		    .vi_end_f2		= 25,
 		.nbr_end	= 286,
 
 		.burst_ena	= true,
-		.hburst_start = 73,	    	    .hburst_len		= 34,
+		.hburst_start = 73,	    .hburst_len		= 34,
 		.vburst_start_f1 = 8,	    .vburst_end_f1	= 285,
 		.vburst_start_f2 = 8,	    .vburst_end_f2	= 286,
 		.vburst_start_f3 = 9,	    .vburst_end_f3	= 286,
@@ -646,7 +646,7 @@ static const struct tv_mode tv_modes[] = {
 		.vsync_start_f1	= 5,	    .vsync_start_f2	= 6,
 		.vsync_len	= 5,
 
-		.veq_ena	= true,		    .veq_start_f1    	= 0,
+		.veq_ena	= true,	    .veq_start_f1	= 0,
 		.veq_start_f2	= 1,	    .veq_len		= 15,
 
 		.vi_end_f1	= 24,		    .vi_end_f2		= 25,
@@ -675,7 +675,7 @@ static const struct tv_mode tv_modes[] = {
 	},
 	{
 		.name       = "480p@59.94Hz",
-		.clock 	= 107520,
+		.clock		= 107520,
 		.refresh	= 59940,
 		.oversample     = TV_OVERSAMPLE_4X,
 		.component_only = 1,
@@ -683,7 +683,7 @@ static const struct tv_mode tv_modes[] = {
 		.hsync_end      = 64,               .hblank_end         = 122,
 		.hblank_start   = 842,              .htotal             = 857,
 
-		.progressive    = true,.trilevel_sync = false,
+		.progressive    = true,		    .trilevel_sync = false,
 
 		.vsync_start_f1 = 12,               .vsync_start_f2     = 12,
 		.vsync_len      = 12,
@@ -699,7 +699,7 @@ static const struct tv_mode tv_modes[] = {
 	},
 	{
 		.name       = "480p@60Hz",
-		.clock 	= 107520,
+		.clock		= 107520,
 		.refresh	= 60000,
 		.oversample     = TV_OVERSAMPLE_4X,
 		.component_only = 1,
@@ -707,7 +707,7 @@ static const struct tv_mode tv_modes[] = {
 		.hsync_end      = 64,               .hblank_end         = 122,
 		.hblank_start   = 842,              .htotal             = 856,
 
-		.progressive    = true,.trilevel_sync = false,
+		.progressive    = true,		    .trilevel_sync = false,
 
 		.vsync_start_f1 = 12,               .vsync_start_f2     = 12,
 		.vsync_len      = 12,
@@ -723,7 +723,7 @@ static const struct tv_mode tv_modes[] = {
 	},
 	{
 		.name       = "576p",
-		.clock 	= 107520,
+		.clock		= 107520,
 		.refresh	= 50000,
 		.oversample     = TV_OVERSAMPLE_4X,
 		.component_only = 1,
@@ -755,7 +755,7 @@ static const struct tv_mode tv_modes[] = {
 		.hsync_end      = 80,               .hblank_end         = 300,
 		.hblank_start   = 1580,             .htotal             = 1649,
 
-		.progressive    = true, 	    .trilevel_sync = true,
+		.progressive	= true,		    .trilevel_sync = true,
 
 		.vsync_start_f1 = 10,               .vsync_start_f2     = 10,
 		.vsync_len      = 10,
@@ -779,7 +779,7 @@ static const struct tv_mode tv_modes[] = {
 		.hsync_end      = 80,               .hblank_end         = 300,
 		.hblank_start   = 1580,             .htotal             = 1651,
 
-		.progressive    = true, 	    .trilevel_sync = true,
+		.progressive	= true,		    .trilevel_sync = true,
 
 		.vsync_start_f1 = 10,               .vsync_start_f2     = 10,
 		.vsync_len      = 10,
@@ -803,7 +803,7 @@ static const struct tv_mode tv_modes[] = {
 		.hsync_end      = 80,               .hblank_end         = 300,
 		.hblank_start   = 1580,             .htotal             = 1979,
 
-		.progressive    = true, 	        .trilevel_sync = true,
+		.progressive	= true,		    .trilevel_sync = true,
 
 		.vsync_start_f1 = 10,               .vsync_start_f2     = 10,
 		.vsync_len      = 10,
@@ -828,12 +828,12 @@ static const struct tv_mode tv_modes[] = {
 		.hsync_end      = 88,               .hblank_end         = 235,
 		.hblank_start   = 2155,             .htotal             = 2639,
 
-		.progressive    = false, 	    .trilevel_sync = true,
+		.progressive	= false,	  .trilevel_sync = true,
 
 		.vsync_start_f1 = 4,              .vsync_start_f2     = 5,
 		.vsync_len      = 10,
 
-		.veq_ena	= true,		    .veq_start_f1    	= 4,
+		.veq_ena	= true,	    .veq_start_f1	= 4,
 		.veq_start_f2   = 4,	    .veq_len		= 10,
 
 
@@ -854,12 +854,12 @@ static const struct tv_mode tv_modes[] = {
 		.hsync_end      = 88,               .hblank_end         = 235,
 		.hblank_start   = 2155,             .htotal             = 2199,
 
-		.progressive    = false, 	    .trilevel_sync = true,
+		.progressive	= false,	    .trilevel_sync = true,
 
 		.vsync_start_f1 = 4,               .vsync_start_f2     = 5,
 		.vsync_len      = 10,
 
-		.veq_ena	= true,		    .veq_start_f1    	= 4,
+		.veq_ena	= true,		    .veq_start_f1	= 4,
 		.veq_start_f2	= 4,		    .veq_len		= 10,
 
 
@@ -880,16 +880,16 @@ static const struct tv_mode tv_modes[] = {
 		.hsync_end      = 88,               .hblank_end         = 235,
 		.hblank_start   = 2155,             .htotal             = 2201,
 
-		.progressive    = false, 	    .trilevel_sync = true,
+		.progressive	= false,	    .trilevel_sync = true,
 
 		.vsync_start_f1 = 4,            .vsync_start_f2    = 5,
 		.vsync_len      = 10,
 
 		.veq_ena	= true,		    .veq_start_f1	= 4,
-		.veq_start_f2 = 4,	    	    .veq_len = 10,
+		.veq_start_f2	= 4,		.veq_len	  = 10,
 
 
-		.vi_end_f1      = 21,           .vi_end_f2         	= 22,
+		.vi_end_f1	= 21,		.vi_end_f2	  = 22,
 		.nbr_end        = 539,
 
 		.burst_ena      = false,
@@ -916,7 +916,7 @@ intel_tv_dpms(struct drm_encoder *encoder, int mode)
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
-	switch(mode) {
+	switch (mode) {
 	case DRM_MODE_DPMS_ON:
 		I915_WRITE(TV_CTL, I915_READ(TV_CTL) | TV_ENC_ENABLE);
 		break;
@@ -933,7 +933,7 @@ intel_tv_mode_lookup(const char *tv_format)
 {
 	int i;
 
-	for (i = 0; i < sizeof(tv_modes) / sizeof (tv_modes[0]); i++) {
+	for (i = 0; i < sizeof(tv_modes) / sizeof(tv_modes[0]); i++) {
 		const struct tv_mode *tv_mode = &tv_modes[i];
 
 		if (!strcmp(tv_format, tv_mode->name))
@@ -1128,7 +1128,7 @@ intel_tv_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 	if (color_conversion) {
 		I915_WRITE(TV_CSC_Y, (color_conversion->ry << 16) |
 			   color_conversion->gy);
-		I915_WRITE(TV_CSC_Y2,(color_conversion->by << 16) |
+		I915_WRITE(TV_CSC_Y2, (color_conversion->by << 16) |
 			   color_conversion->ay);
 		I915_WRITE(TV_CSC_U, (color_conversion->ru << 16) |
 			   color_conversion->gu);
@@ -1232,7 +1232,7 @@ static const struct drm_display_mode reported_modes[] = {
  * \return false if TV is disconnected.
  */
 static int
-intel_tv_detect_type (struct intel_tv *intel_tv,
+intel_tv_detect_type(struct intel_tv *intel_tv,
 		      struct drm_connector *connector)
 {
 	struct drm_encoder *encoder = &intel_tv->base.base;
@@ -1486,7 +1486,7 @@ intel_tv_get_modes(struct drm_connector *connector)
 }
 
 static void
-intel_tv_destroy (struct drm_connector *connector)
+intel_tv_destroy(struct drm_connector *connector)
 {
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
index 0583677e4581..35ef5b1e3566 100644
--- a/drivers/gpu/drm/nouveau/Makefile
+++ b/drivers/gpu/drm/nouveau/Makefile
@@ -21,16 +21,17 @@ nouveau-y := nouveau_drv.o nouveau_state.o nouveau_channel.o nouveau_mem.o \
              nv40_grctx.o nv50_grctx.o nvc0_grctx.o \
              nv84_crypt.o \
              nva3_copy.o nvc0_copy.o \
-             nv40_mpeg.o nv50_mpeg.o \
+             nv31_mpeg.o nv50_mpeg.o \
              nv04_instmem.o nv50_instmem.o nvc0_instmem.o \
-             nv50_evo.o nv50_crtc.o nv50_dac.o nv50_sor.o \
-             nv50_cursor.o nv50_display.o \
              nv04_dac.o nv04_dfp.o nv04_tv.o nv17_tv.o nv17_tv_modes.o \
              nv04_crtc.o nv04_display.o nv04_cursor.o \
+             nv50_evo.o nv50_crtc.o nv50_dac.o nv50_sor.o \
+             nv50_cursor.o nv50_display.o \
+             nvd0_display.o \
              nv04_fbcon.o nv50_fbcon.o nvc0_fbcon.o \
              nv10_gpio.o nv50_gpio.o \
 	     nv50_calc.o \
-	     nv04_pm.o nv50_pm.o nva3_pm.o \
+	     nv04_pm.o nv40_pm.o nv50_pm.o nva3_pm.o nvc0_pm.o \
 	     nv50_vram.o nvc0_vram.o \
 	     nv50_vm.o nvc0_vm.o
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index 00a55dfdba82..fa22b28e8777 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -37,8 +37,10 @@
 #include "nouveau_drv.h"
 #include "nouveau_drm.h"
 #include "nouveau_reg.h"
+#include "nouveau_encoder.h"
 
-static int nv40_get_intensity(struct backlight_device *bd)
+static int
+nv40_get_intensity(struct backlight_device *bd)
 {
 	struct drm_device *dev = bl_get_data(bd);
 	int val = (nv_rd32(dev, NV40_PMC_BACKLIGHT) & NV40_PMC_BACKLIGHT_MASK)
@@ -47,7 +49,8 @@ static int nv40_get_intensity(struct backlight_device *bd)
 	return val;
 }
 
-static int nv40_set_intensity(struct backlight_device *bd)
+static int
+nv40_set_intensity(struct backlight_device *bd)
 {
 	struct drm_device *dev = bl_get_data(bd);
 	int val = bd->props.brightness;
@@ -65,30 +68,8 @@ static const struct backlight_ops nv40_bl_ops = {
 	.update_status = nv40_set_intensity,
 };
 
-static int nv50_get_intensity(struct backlight_device *bd)
-{
-	struct drm_device *dev = bl_get_data(bd);
-
-	return nv_rd32(dev, NV50_PDISPLAY_SOR_BACKLIGHT);
-}
-
-static int nv50_set_intensity(struct backlight_device *bd)
-{
-	struct drm_device *dev = bl_get_data(bd);
-	int val = bd->props.brightness;
-
-	nv_wr32(dev, NV50_PDISPLAY_SOR_BACKLIGHT,
-		val | NV50_PDISPLAY_SOR_BACKLIGHT_ENABLE);
-	return 0;
-}
-
-static const struct backlight_ops nv50_bl_ops = {
-	.options = BL_CORE_SUSPENDRESUME,
-	.get_brightness = nv50_get_intensity,
-	.update_status = nv50_set_intensity,
-};
-
-static int nouveau_nv40_backlight_init(struct drm_connector *connector)
+static int
+nv40_backlight_init(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -113,34 +94,129 @@ static int nouveau_nv40_backlight_init(struct drm_connector *connector)
 	return 0;
 }
 
-static int nouveau_nv50_backlight_init(struct drm_connector *connector)
+static int
+nv50_get_intensity(struct backlight_device *bd)
+{
+	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
+	struct drm_device *dev = nv_encoder->base.base.dev;
+	int or = nv_encoder->or;
+	u32 div = 1025;
+	u32 val;
+
+	val  = nv_rd32(dev, NV50_PDISP_SOR_PWM_CTL(or));
+	val &= NV50_PDISP_SOR_PWM_CTL_VAL;
+	return ((val * 100) + (div / 2)) / div;
+}
+
+static int
+nv50_set_intensity(struct backlight_device *bd)
+{
+	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
+	struct drm_device *dev = nv_encoder->base.base.dev;
+	int or = nv_encoder->or;
+	u32 div = 1025;
+	u32 val = (bd->props.brightness * div) / 100;
+
+	nv_wr32(dev, NV50_PDISP_SOR_PWM_CTL(or),
+		     NV50_PDISP_SOR_PWM_CTL_NEW | val);
+	return 0;
+}
+
+static const struct backlight_ops nv50_bl_ops = {
+	.options = BL_CORE_SUSPENDRESUME,
+	.get_brightness = nv50_get_intensity,
+	.update_status = nv50_set_intensity,
+};
+
+static int
+nva3_get_intensity(struct backlight_device *bd)
+{
+	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
+	struct drm_device *dev = nv_encoder->base.base.dev;
+	int or = nv_encoder->or;
+	u32 div, val;
+
+	div  = nv_rd32(dev, NV50_PDISP_SOR_PWM_DIV(or));
+	val  = nv_rd32(dev, NV50_PDISP_SOR_PWM_CTL(or));
+	val &= NVA3_PDISP_SOR_PWM_CTL_VAL;
+	if (div && div >= val)
+		return ((val * 100) + (div / 2)) / div;
+
+	return 100;
+}
+
+static int
+nva3_set_intensity(struct backlight_device *bd)
+{
+	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
+	struct drm_device *dev = nv_encoder->base.base.dev;
+	int or = nv_encoder->or;
+	u32 div, val;
+
+	div = nv_rd32(dev, NV50_PDISP_SOR_PWM_DIV(or));
+	val = (bd->props.brightness * div) / 100;
+	if (div) {
+		nv_wr32(dev, NV50_PDISP_SOR_PWM_CTL(or), val |
+			     NV50_PDISP_SOR_PWM_CTL_NEW |
+			     NVA3_PDISP_SOR_PWM_CTL_UNK);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static const struct backlight_ops nva3_bl_ops = {
+	.options = BL_CORE_SUSPENDRESUME,
+	.get_brightness = nva3_get_intensity,
+	.update_status = nva3_set_intensity,
+};
+
+static int
+nv50_backlight_init(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_encoder *nv_encoder;
 	struct backlight_properties props;
 	struct backlight_device *bd;
+	const struct backlight_ops *ops;
+
+	nv_encoder = find_encoder(connector, OUTPUT_LVDS);
+	if (!nv_encoder) {
+		nv_encoder = find_encoder(connector, OUTPUT_DP);
+		if (!nv_encoder)
+			return -ENODEV;
+	}
 
-	if (!nv_rd32(dev, NV50_PDISPLAY_SOR_BACKLIGHT))
+	if (!nv_rd32(dev, NV50_PDISP_SOR_PWM_CTL(nv_encoder->or)))
 		return 0;
 
+	if (dev_priv->chipset <= 0xa0 ||
+	    dev_priv->chipset == 0xaa ||
+	    dev_priv->chipset == 0xac)
+		ops = &nv50_bl_ops;
+	else
+		ops = &nva3_bl_ops;
+
 	memset(&props, 0, sizeof(struct backlight_properties));
 	props.type = BACKLIGHT_RAW;
-	props.max_brightness = 1025;
-	bd = backlight_device_register("nv_backlight", &connector->kdev, dev,
-				       &nv50_bl_ops, &props);
+	props.max_brightness = 100;
+	bd = backlight_device_register("nv_backlight", &connector->kdev,
+				       nv_encoder, ops, &props);
 	if (IS_ERR(bd))
 		return PTR_ERR(bd);
 
 	dev_priv->backlight = bd;
-	bd->props.brightness = nv50_get_intensity(bd);
+	bd->props.brightness = bd->ops->get_brightness(bd);
 	backlight_update_status(bd);
 	return 0;
 }
 
-int nouveau_backlight_init(struct drm_connector *connector)
+int
+nouveau_backlight_init(struct drm_device *dev)
 {
-	struct drm_device *dev = connector->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct drm_connector *connector;
 
 #ifdef CONFIG_ACPI
 	if (acpi_video_backlight_support()) {
@@ -150,21 +226,28 @@ int nouveau_backlight_init(struct drm_connector *connector)
 	}
 #endif
 
-	switch (dev_priv->card_type) {
-	case NV_40:
-		return nouveau_nv40_backlight_init(connector);
-	case NV_50:
-		return nouveau_nv50_backlight_init(connector);
-	default:
-		break;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS &&
+		    connector->connector_type != DRM_MODE_CONNECTOR_eDP)
+			continue;
+
+		switch (dev_priv->card_type) {
+		case NV_40:
+			return nv40_backlight_init(connector);
+		case NV_50:
+			return nv50_backlight_init(connector);
+		default:
+			break;
+		}
 	}
 
+
 	return 0;
 }
 
-void nouveau_backlight_exit(struct drm_connector *connector)
+void
+nouveau_backlight_exit(struct drm_device *dev)
 {
-	struct drm_device *dev = connector->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
 	if (dev_priv->backlight) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index b311faba34f8..032a82098136 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -296,6 +296,11 @@ munge_reg(struct nvbios *bios, uint32_t reg)
 	if (dev_priv->card_type < NV_50)
 		return reg;
 
+	if (reg & 0x80000000) {
+		BUG_ON(bios->display.crtc < 0);
+		reg += bios->display.crtc * 0x800;
+	}
+
 	if (reg & 0x40000000) {
 		BUG_ON(!dcbent);
 
@@ -304,7 +309,7 @@ munge_reg(struct nvbios *bios, uint32_t reg)
 			reg += 0x00000080;
 	}
 
-	reg &= ~0x60000000;
+	reg &= ~0xe0000000;
 	return reg;
 }
 
@@ -1174,22 +1179,19 @@ init_dp_condition(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 	 *
 	 */
 
-	struct bit_displayport_encoder_table *dpe = NULL;
 	struct dcb_entry *dcb = bios->display.output;
 	struct drm_device *dev = bios->dev;
 	uint8_t cond = bios->data[offset + 1];
-	int dummy;
+	uint8_t *table, *entry;
 
 	BIOSLOG(bios, "0x%04X: subop 0x%02X\n", offset, cond);
 
 	if (!iexec->execute)
 		return 3;
 
-	dpe = nouveau_bios_dp_table(dev, dcb, &dummy);
-	if (!dpe) {
-		NV_ERROR(dev, "0x%04X: INIT_3A: no encoder table!!\n", offset);
+	table = nouveau_dp_bios_data(dev, dcb, &entry);
+	if (!table)
 		return 3;
-	}
 
 	switch (cond) {
 	case 0:
@@ -1203,7 +1205,7 @@ init_dp_condition(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 		break;
 	case 1:
 	case 2:
-		if (!(dpe->unknown & cond))
+		if (!(entry[5] & cond))
 			iexec->execute = false;
 		break;
 	case 5:
@@ -3221,6 +3223,49 @@ init_8d(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 	return 1;
 }
 
+static void
+init_gpio_unknv50(struct nvbios *bios, struct dcb_gpio_entry *gpio)
+{
+	const uint32_t nv50_gpio_ctl[2] = { 0xe100, 0xe28c };
+	u32 r, s, v;
+
+	/* Not a clue, needs de-magicing */
+	r = nv50_gpio_ctl[gpio->line >> 4];
+	s = (gpio->line & 0x0f);
+	v = bios_rd32(bios, r) & ~(0x00010001 << s);
+	switch ((gpio->entry & 0x06000000) >> 25) {
+	case 1:
+		v |= (0x00000001 << s);
+		break;
+	case 2:
+		v |= (0x00010000 << s);
+		break;
+	default:
+		break;
+	}
+
+	bios_wr32(bios, r, v);
+}
+
+static void
+init_gpio_unknvd0(struct nvbios *bios, struct dcb_gpio_entry *gpio)
+{
+	u32 v, i;
+
+	v  = bios_rd32(bios, 0x00d610 + (gpio->line * 4));
+	v &= 0xffffff00;
+	v |= (gpio->entry & 0x00ff0000) >> 16;
+	bios_wr32(bios, 0x00d610 + (gpio->line * 4), v);
+
+	i = (gpio->entry & 0x1f000000) >> 24;
+	if (i) {
+		v  = bios_rd32(bios, 0x00d640 + ((i - 1) * 4));
+		v &= 0xffffff00;
+		v |= gpio->line;
+		bios_wr32(bios, 0x00d640 + ((i - 1) * 4), v);
+	}
+}
+
 static int
 init_gpio(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 {
@@ -3235,7 +3280,6 @@ init_gpio(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 
 	struct drm_nouveau_private *dev_priv = bios->dev->dev_private;
 	struct nouveau_gpio_engine *pgpio = &dev_priv->engine.gpio;
-	const uint32_t nv50_gpio_ctl[2] = { 0xe100, 0xe28c };
 	int i;
 
 	if (dev_priv->card_type < NV_50) {
@@ -3248,33 +3292,20 @@ init_gpio(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 
 	for (i = 0; i < bios->dcb.gpio.entries; i++) {
 		struct dcb_gpio_entry *gpio = &bios->dcb.gpio.entry[i];
-		uint32_t r, s, v;
 
 		BIOSLOG(bios, "0x%04X: Entry: 0x%08X\n", offset, gpio->entry);
 
 		BIOSLOG(bios, "0x%04X: set gpio 0x%02x, state %d\n",
 			offset, gpio->tag, gpio->state_default);
-		if (bios->execute)
-			pgpio->set(bios->dev, gpio->tag, gpio->state_default);
 
-		/* The NVIDIA binary driver doesn't appear to actually do
-		 * any of this, my VBIOS does however.
-		 */
-		/* Not a clue, needs de-magicing */
-		r = nv50_gpio_ctl[gpio->line >> 4];
-		s = (gpio->line & 0x0f);
-		v = bios_rd32(bios, r) & ~(0x00010001 << s);
-		switch ((gpio->entry & 0x06000000) >> 25) {
-		case 1:
-			v |= (0x00000001 << s);
-			break;
-		case 2:
-			v |= (0x00010000 << s);
-			break;
-		default:
-			break;
-		}
-		bios_wr32(bios, r, v);
+		if (!bios->execute)
+			continue;
+
+		pgpio->set(bios->dev, gpio->tag, gpio->state_default);
+		if (dev_priv->card_type < NV_D0)
+			init_gpio_unknv50(bios, gpio);
+		else
+			init_gpio_unknvd0(bios, gpio);
 	}
 
 	return 1;
@@ -3737,6 +3768,10 @@ parse_init_table(struct nvbios *bios, uint16_t offset, struct init_exec *iexec)
 	int count = 0, i, ret;
 	uint8_t id;
 
+	/* catch NULL script pointers */
+	if (offset == 0)
+		return 0;
+
 	/*
 	 * Loop until INIT_DONE causes us to break out of the loop
 	 * (or until offset > bios length just in case... )
@@ -4389,86 +4424,37 @@ int nouveau_bios_parse_lvds_table(struct drm_device *dev, int pxclk, bool *dl, b
 	return 0;
 }
 
-static uint8_t *
-bios_output_config_match(struct drm_device *dev, struct dcb_entry *dcbent,
-			 uint16_t record, int record_len, int record_nr,
-			 bool match_link)
+/* BIT 'U'/'d' table encoder subtables have hashes matching them to
+ * a particular set of encoders.
+ *
+ * This function returns true if a particular DCB entry matches.
+ */
+bool
+bios_encoder_match(struct dcb_entry *dcb, u32 hash)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->vbios;
-	uint32_t entry;
-	uint16_t table;
-	int i, v;
+	if ((hash & 0x000000f0) != (dcb->location << 4))
+		return false;
+	if ((hash & 0x0000000f) != dcb->type)
+		return false;
+	if (!(hash & (dcb->or << 16)))
+		return false;
 
-	switch (dcbent->type) {
+	switch (dcb->type) {
 	case OUTPUT_TMDS:
 	case OUTPUT_LVDS:
 	case OUTPUT_DP:
-		break;
-	default:
-		match_link = false;
-		break;
-	}
-
-	for (i = 0; i < record_nr; i++, record += record_len) {
-		table = ROM16(bios->data[record]);
-		if (!table)
-			continue;
-		entry = ROM32(bios->data[table]);
-
-		if (match_link) {
-			v = (entry & 0x00c00000) >> 22;
-			if (!(v & dcbent->sorconf.link))
-				continue;
+		if (hash & 0x00c00000) {
+			if (!(hash & (dcb->sorconf.link << 22)))
+				return false;
 		}
-
-		v = (entry & 0x000f0000) >> 16;
-		if (!(v & dcbent->or))
-			continue;
-
-		v = (entry & 0x000000f0) >> 4;
-		if (v != dcbent->location)
-			continue;
-
-		v = (entry & 0x0000000f);
-		if (v != dcbent->type)
-			continue;
-
-		return &bios->data[table];
-	}
-
-	return NULL;
-}
-
-void *
-nouveau_bios_dp_table(struct drm_device *dev, struct dcb_entry *dcbent,
-		      int *length)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvbios *bios = &dev_priv->vbios;
-	uint8_t *table;
-
-	if (!bios->display.dp_table_ptr) {
-		NV_ERROR(dev, "No pointer to DisplayPort table\n");
-		return NULL;
-	}
-	table = &bios->data[bios->display.dp_table_ptr];
-
-	if (table[0] != 0x20 && table[0] != 0x21) {
-		NV_ERROR(dev, "DisplayPort table version 0x%02x unknown\n",
-			 table[0]);
-		return NULL;
+	default:
+		return true;
 	}
-
-	*length = table[4];
-	return bios_output_config_match(dev, dcbent,
-					bios->display.dp_table_ptr + table[1],
-					table[2], table[3], table[0] >= 0x21);
 }
 
 int
-nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
-			       uint32_t sub, int pxclk)
+nouveau_bios_run_display_table(struct drm_device *dev, u16 type, int pclk,
+			       struct dcb_entry *dcbent, int crtc)
 {
 	/*
 	 * The display script table is located by the BIT 'U' table.
@@ -4498,7 +4484,7 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
 	uint8_t *table = &bios->data[bios->display.script_table_ptr];
 	uint8_t *otable = NULL;
 	uint16_t script;
-	int i = 0;
+	int i;
 
 	if (!bios->display.script_table_ptr) {
 		NV_ERROR(dev, "No pointer to output script table\n");
@@ -4550,30 +4536,33 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
 
 	NV_DEBUG_KMS(dev, "Searching for output entry for %d %d %d\n",
 			dcbent->type, dcbent->location, dcbent->or);
-	otable = bios_output_config_match(dev, dcbent, table[1] +
-					  bios->display.script_table_ptr,
-					  table[2], table[3], table[0] >= 0x21);
+	for (i = 0; i < table[3]; i++) {
+		otable = ROMPTR(bios, table[table[1] + (i * table[2])]);
+		if (otable && bios_encoder_match(dcbent, ROM32(otable[0])))
+			break;
+	}
+
 	if (!otable) {
 		NV_DEBUG_KMS(dev, "failed to match any output table\n");
 		return 1;
 	}
 
-	if (pxclk < -2 || pxclk > 0) {
+	if (pclk < -2 || pclk > 0) {
 		/* Try to find matching script table entry */
 		for (i = 0; i < otable[5]; i++) {
-			if (ROM16(otable[table[4] + i*6]) == sub)
+			if (ROM16(otable[table[4] + i*6]) == type)
 				break;
 		}
 
 		if (i == otable[5]) {
 			NV_ERROR(dev, "Table 0x%04x not found for %d/%d, "
 				      "using first\n",
-				 sub, dcbent->type, dcbent->or);
+				 type, dcbent->type, dcbent->or);
 			i = 0;
 		}
 	}
 
-	if (pxclk == 0) {
+	if (pclk == 0) {
 		script = ROM16(otable[6]);
 		if (!script) {
 			NV_DEBUG_KMS(dev, "output script 0 not found\n");
@@ -4581,9 +4570,9 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
 		}
 
 		NV_DEBUG_KMS(dev, "0x%04X: parsing output script 0\n", script);
-		nouveau_bios_run_init_table(dev, script, dcbent);
+		nouveau_bios_run_init_table(dev, script, dcbent, crtc);
 	} else
-	if (pxclk == -1) {
+	if (pclk == -1) {
 		script = ROM16(otable[8]);
 		if (!script) {
 			NV_DEBUG_KMS(dev, "output script 1 not found\n");
@@ -4591,9 +4580,9 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
 		}
 
 		NV_DEBUG_KMS(dev, "0x%04X: parsing output script 1\n", script);
-		nouveau_bios_run_init_table(dev, script, dcbent);
+		nouveau_bios_run_init_table(dev, script, dcbent, crtc);
 	} else
-	if (pxclk == -2) {
+	if (pclk == -2) {
 		if (table[4] >= 12)
 			script = ROM16(otable[10]);
 		else
@@ -4604,31 +4593,31 @@ nouveau_bios_run_display_table(struct drm_device *dev, struct dcb_entry *dcbent,
 		}
 
 		NV_DEBUG_KMS(dev, "0x%04X: parsing output script 2\n", script);
-		nouveau_bios_run_init_table(dev, script, dcbent);
+		nouveau_bios_run_init_table(dev, script, dcbent, crtc);
 	} else
-	if (pxclk > 0) {
+	if (pclk > 0) {
 		script = ROM16(otable[table[4] + i*6 + 2]);
 		if (script)
-			script = clkcmptable(bios, script, pxclk);
+			script = clkcmptable(bios, script, pclk);
 		if (!script) {
 			NV_DEBUG_KMS(dev, "clock script 0 not found\n");
 			return 1;
 		}
 
 		NV_DEBUG_KMS(dev, "0x%04X: parsing clock script 0\n", script);
-		nouveau_bios_run_init_table(dev, script, dcbent);
+		nouveau_bios_run_init_table(dev, script, dcbent, crtc);
 	} else
-	if (pxclk < 0) {
+	if (pclk < 0) {
 		script = ROM16(otable[table[4] + i*6 + 4]);
 		if (script)
-			script = clkcmptable(bios, script, -pxclk);
+			script = clkcmptable(bios, script, -pclk);
 		if (!script) {
 			NV_DEBUG_KMS(dev, "clock script 1 not found\n");
 			return 1;
 		}
 
 		NV_DEBUG_KMS(dev, "0x%04X: parsing clock script 1\n", script);
-		nouveau_bios_run_init_table(dev, script, dcbent);
+		nouveau_bios_run_init_table(dev, script, dcbent, crtc);
 	}
 
 	return 0;
@@ -5478,14 +5467,6 @@ parse_bit_U_tbl_entry(struct drm_device *dev, struct nvbios *bios,
 	return 0;
 }
 
-static int
-parse_bit_displayport_tbl_entry(struct drm_device *dev, struct nvbios *bios,
-				struct bit_entry *bitentry)
-{
-	bios->display.dp_table_ptr = ROM16(bios->data[bitentry->offset]);
-	return 0;
-}
-
 struct bit_table {
 	const char id;
 	int (* const parse_fn)(struct drm_device *, struct nvbios *, struct bit_entry *);
@@ -5559,7 +5540,6 @@ parse_bit_structure(struct nvbios *bios, const uint16_t bitoffset)
 	parse_bit_table(bios, bitoffset, &BIT_TABLE('L', lvds));
 	parse_bit_table(bios, bitoffset, &BIT_TABLE('T', tmds));
 	parse_bit_table(bios, bitoffset, &BIT_TABLE('U', U));
-	parse_bit_table(bios, bitoffset, &BIT_TABLE('d', displayport));
 
 	return 0;
 }
@@ -5884,9 +5864,15 @@ parse_dcb_gpio_table(struct nvbios *bios)
 			}
 
 			e->line = (e->entry & 0x0000001f) >> 0;
-			e->state_default = (e->entry & 0x01000000) >> 24;
-			e->state[0] = (e->entry & 0x18000000) >> 27;
-			e->state[1] = (e->entry & 0x60000000) >> 29;
+			if (gpio[0] == 0x40) {
+				e->state_default = (e->entry & 0x01000000) >> 24;
+				e->state[0] = (e->entry & 0x18000000) >> 27;
+				e->state[1] = (e->entry & 0x60000000) >> 29;
+			} else {
+				e->state_default = (e->entry & 0x00000080) >> 7;
+				e->state[0] = (entry[4] >> 4) & 3;
+				e->state[1] = (entry[4] >> 6) & 3;
+			}
 		}
 	}
 
@@ -6156,7 +6142,14 @@ parse_dcb20_entry(struct drm_device *dev, struct dcb_table *dcb,
 	}
 	case OUTPUT_DP:
 		entry->dpconf.sor.link = (conf & 0x00000030) >> 4;
-		entry->dpconf.link_bw = (conf & 0x00e00000) >> 21;
+		switch ((conf & 0x00e00000) >> 21) {
+		case 0:
+			entry->dpconf.link_bw = 162000;
+			break;
+		default:
+			entry->dpconf.link_bw = 270000;
+			break;
+		}
 		switch ((conf & 0x0f000000) >> 24) {
 		case 0xf:
 			entry->dpconf.link_nr = 4;
@@ -6769,7 +6762,7 @@ uint8_t *nouveau_bios_embedded_edid(struct drm_device *dev)
 
 void
 nouveau_bios_run_init_table(struct drm_device *dev, uint16_t table,
-			    struct dcb_entry *dcbent)
+			    struct dcb_entry *dcbent, int crtc)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nvbios *bios = &dev_priv->vbios;
@@ -6777,11 +6770,22 @@ nouveau_bios_run_init_table(struct drm_device *dev, uint16_t table,
 
 	spin_lock_bh(&bios->lock);
 	bios->display.output = dcbent;
+	bios->display.crtc = crtc;
 	parse_init_table(bios, table, &iexec);
 	bios->display.output = NULL;
 	spin_unlock_bh(&bios->lock);
 }
 
+void
+nouveau_bios_init_exec(struct drm_device *dev, uint16_t table)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvbios *bios = &dev_priv->vbios;
+	struct init_exec iexec = { true, false };
+
+	parse_init_table(bios, table, &iexec);
+}
+
 static bool NVInitVBIOS(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -6863,9 +6867,8 @@ nouveau_run_vbios_init(struct drm_device *dev)
 
 	if (dev_priv->card_type >= NV_50) {
 		for (i = 0; i < bios->dcb.entries; i++) {
-			nouveau_bios_run_display_table(dev,
-						       &bios->dcb.entry[i],
-						       0, 0);
+			nouveau_bios_run_display_table(dev, 0, 0,
+						       &bios->dcb.entry[i], -1);
 		}
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.h b/drivers/gpu/drm/nouveau/nouveau_bios.h
index 050c314119df..8adb69e4a6b1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.h
@@ -289,8 +289,8 @@ struct nvbios {
 
 	struct {
 		struct dcb_entry *output;
+		int crtc;
 		uint16_t script_table_ptr;
-		uint16_t dp_table_ptr;
 	} display;
 
 	struct {
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 890d50e4d682..7226f419e178 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -956,7 +956,7 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 			break;
 		}
 
-		if (dev_priv->card_type == NV_C0)
+		if (dev_priv->card_type >= NV_C0)
 			page_shift = node->page_shift;
 		else
 			page_shift = 12;
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index b0d753f45bbd..a319d5646ea9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -411,13 +411,17 @@ nouveau_ioctl_fifo_alloc(struct drm_device *dev, void *data,
 		return ret;
 	init->channel  = chan->id;
 
-	if (chan->dma.ib_max)
-		init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_VRAM |
-					NOUVEAU_GEM_DOMAIN_GART;
-	else if (chan->pushbuf_bo->bo.mem.mem_type == TTM_PL_VRAM)
+	if (nouveau_vram_pushbuf == 0) {
+		if (chan->dma.ib_max)
+			init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_VRAM |
+						NOUVEAU_GEM_DOMAIN_GART;
+		else if (chan->pushbuf_bo->bo.mem.mem_type == TTM_PL_VRAM)
+			init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_VRAM;
+		else
+			init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_GART;
+	} else {
 		init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_VRAM;
-	else
-		init->pushbuf_domains = NOUVEAU_GEM_DOMAIN_GART;
+	}
 
 	if (dev_priv->card_type < NV_C0) {
 		init->subchan[0].handle = NvM2MF;
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 939d4df07777..e0d275e1c96c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -39,7 +39,7 @@
 
 static void nouveau_connector_hotplug(void *, int);
 
-static struct nouveau_encoder *
+struct nouveau_encoder *
 find_encoder(struct drm_connector *connector, int type)
 {
 	struct drm_device *dev = connector->dev;
@@ -116,10 +116,6 @@ nouveau_connector_destroy(struct drm_connector *connector)
 				      nouveau_connector_hotplug, connector);
 	}
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_eDP)
-		nouveau_backlight_exit(connector);
-
 	kfree(nv_connector->edid);
 	drm_sysfs_connector_remove(connector);
 	drm_connector_cleanup(connector);
@@ -712,11 +708,8 @@ nouveau_connector_mode_valid(struct drm_connector *connector,
 	case OUTPUT_TV:
 		return get_slave_funcs(encoder)->mode_valid(encoder, mode);
 	case OUTPUT_DP:
-		if (nv_encoder->dp.link_bw == DP_LINK_BW_2_7)
-			max_clock = nv_encoder->dp.link_nr * 270000;
-		else
-			max_clock = nv_encoder->dp.link_nr * 162000;
-
+		max_clock  = nv_encoder->dp.link_nr;
+		max_clock *= nv_encoder->dp.link_bw;
 		clock = clock * nouveau_connector_bpp(connector) / 8;
 		break;
 	default:
@@ -871,7 +864,6 @@ nouveau_connector_create(struct drm_device *dev, int index)
 					dev->mode_config.scaling_mode_property,
 					nv_connector->scaling_mode);
 		}
-		connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 		/* fall-through */
 	case DCB_CONNECTOR_TV_0:
 	case DCB_CONNECTOR_TV_1:
@@ -888,27 +880,20 @@ nouveau_connector_create(struct drm_device *dev, int index)
 				dev->mode_config.dithering_mode_property,
 				nv_connector->use_dithering ?
 				DRM_MODE_DITHERING_ON : DRM_MODE_DITHERING_OFF);
-
-		if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS) {
-			if (dev_priv->card_type >= NV_50)
-				connector->polled = DRM_CONNECTOR_POLL_HPD;
-			else
-				connector->polled = DRM_CONNECTOR_POLL_CONNECT;
-		}
 		break;
 	}
 
-	if (pgpio->irq_register) {
+	if (nv_connector->dcb->gpio_tag != 0xff && pgpio->irq_register) {
 		pgpio->irq_register(dev, nv_connector->dcb->gpio_tag,
 				    nouveau_connector_hotplug, connector);
+
+		connector->polled = DRM_CONNECTOR_POLL_HPD;
+	} else {
+		connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 	}
 
 	drm_sysfs_connector_add(connector);
 
-	if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS ||
-	    connector->connector_type == DRM_MODE_CONNECTOR_eDP)
-		nouveau_backlight_init(connector);
-
 	dcb->drm = connector;
 	return dcb->drm;
 
@@ -925,22 +910,13 @@ nouveau_connector_hotplug(void *data, int plugged)
 	struct drm_connector *connector = data;
 	struct drm_device *dev = connector->dev;
 
-	NV_INFO(dev, "%splugged %s\n", plugged ? "" : "un",
-		drm_get_connector_name(connector));
-
-	if (connector->encoder && connector->encoder->crtc &&
-	    connector->encoder->crtc->enabled) {
-		struct nouveau_encoder *nv_encoder = nouveau_encoder(connector->encoder);
-		struct drm_encoder_helper_funcs *helper =
-			connector->encoder->helper_private;
+	NV_DEBUG(dev, "%splugged %s\n", plugged ? "" : "un",
+		 drm_get_connector_name(connector));
 
-		if (nv_encoder->dcb->type == OUTPUT_DP) {
-			if (plugged)
-				helper->dpms(connector->encoder, DRM_MODE_DPMS_ON);
-			else
-				helper->dpms(connector->encoder, DRM_MODE_DPMS_OFF);
-		}
-	}
+	if (plugged)
+		drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
+	else
+		drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
 
 	drm_helper_hpd_irq_event(dev);
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_crtc.h b/drivers/gpu/drm/nouveau/nouveau_crtc.h
index cb1ce2a09162..bf8e1289953d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_crtc.h
+++ b/drivers/gpu/drm/nouveau/nouveau_crtc.h
@@ -82,14 +82,13 @@ static inline struct drm_crtc *to_drm_crtc(struct nouveau_crtc *crtc)
 }
 
 int nv50_crtc_create(struct drm_device *dev, int index);
-int nv50_cursor_init(struct nouveau_crtc *);
-void nv50_cursor_fini(struct nouveau_crtc *);
 int nv50_crtc_cursor_set(struct drm_crtc *drm_crtc, struct drm_file *file_priv,
 			 uint32_t buffer_handle, uint32_t width,
 			 uint32_t height);
 int nv50_crtc_cursor_move(struct drm_crtc *drm_crtc, int x, int y);
 
 int nv04_cursor_init(struct nouveau_crtc *);
+int nv50_cursor_init(struct nouveau_crtc *);
 
 struct nouveau_connector *
 nouveau_crtc_connector_get(struct nouveau_crtc *crtc);
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index eb514ea29377..ddbabefb4273 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -105,9 +105,12 @@ nouveau_framebuffer_init(struct drm_device *dev,
 		if (dev_priv->chipset == 0x50)
 			nv_fb->r_format |= (tile_flags << 8);
 
-		if (!tile_flags)
-			nv_fb->r_pitch = 0x00100000 | fb->pitch;
-		else {
+		if (!tile_flags) {
+			if (dev_priv->card_type < NV_D0)
+				nv_fb->r_pitch = 0x00100000 | fb->pitch;
+			else
+				nv_fb->r_pitch = 0x01000000 | fb->pitch;
+		} else {
 			u32 mode = nvbo->tile_mode;
 			if (dev_priv->card_type >= NV_C0)
 				mode >>= 4;
diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
index 7beb82a0315d..de5efe71fefd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
@@ -28,418 +28,619 @@
 #include "nouveau_i2c.h"
 #include "nouveau_connector.h"
 #include "nouveau_encoder.h"
+#include "nouveau_crtc.h"
+
+/******************************************************************************
+ * aux channel util functions
+ *****************************************************************************/
+#define AUX_DBG(fmt, args...) do {                                             \
+	if (nouveau_reg_debug & NOUVEAU_REG_DEBUG_AUXCH) {                     \
+		NV_PRINTK(KERN_DEBUG, dev, "AUXCH(%d): " fmt, ch, ##args);     \
+	}                                                                      \
+} while (0)
+#define AUX_ERR(fmt, args...) NV_ERROR(dev, "AUXCH(%d): " fmt, ch, ##args)
+
+static void
+auxch_fini(struct drm_device *dev, int ch)
+{
+	nv_mask(dev, 0x00e4e4 + (ch * 0x50), 0x00310000, 0x00000000);
+}
 
 static int
-auxch_rd(struct drm_encoder *encoder, int address, uint8_t *buf, int size)
+auxch_init(struct drm_device *dev, int ch)
 {
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_i2c_chan *auxch;
-	int ret;
-
-	auxch = nouveau_i2c_find(dev, nv_encoder->dcb->i2c_index);
-	if (!auxch)
-		return -ENODEV;
-
-	ret = nouveau_dp_auxch(auxch, 9, address, buf, size);
-	if (ret)
-		return ret;
+	const u32 unksel = 1; /* nfi which to use, or if it matters.. */
+	const u32 ureq = unksel ? 0x00100000 : 0x00200000;
+	const u32 urep = unksel ? 0x01000000 : 0x02000000;
+	u32 ctrl, timeout;
+
+	/* wait up to 1ms for any previous transaction to be done... */
+	timeout = 1000;
+	do {
+		ctrl = nv_rd32(dev, 0x00e4e4 + (ch * 0x50));
+		udelay(1);
+		if (!timeout--) {
+			AUX_ERR("begin idle timeout 0x%08x", ctrl);
+			return -EBUSY;
+		}
+	} while (ctrl & 0x03010000);
+
+	/* set some magic, and wait up to 1ms for it to appear */
+	nv_mask(dev, 0x00e4e4 + (ch * 0x50), 0x00300000, ureq);
+	timeout = 1000;
+	do {
+		ctrl = nv_rd32(dev, 0x00e4e4 + (ch * 0x50));
+		udelay(1);
+		if (!timeout--) {
+			AUX_ERR("magic wait 0x%08x\n", ctrl);
+			auxch_fini(dev, ch);
+			return -EBUSY;
+		}
+	} while ((ctrl & 0x03000000) != urep);
 
 	return 0;
 }
 
 static int
-auxch_wr(struct drm_encoder *encoder, int address, uint8_t *buf, int size)
+auxch_tx(struct drm_device *dev, int ch, u8 type, u32 addr, u8 *data, u8 size)
 {
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_i2c_chan *auxch;
-	int ret;
+	u32 ctrl, stat, timeout, retries;
+	u32 xbuf[4] = {};
+	int ret, i;
 
-	auxch = nouveau_i2c_find(dev, nv_encoder->dcb->i2c_index);
-	if (!auxch)
-		return -ENODEV;
+	AUX_DBG("%d: 0x%08x %d\n", type, addr, size);
 
-	ret = nouveau_dp_auxch(auxch, 8, address, buf, size);
-	return ret;
-}
+	ret = auxch_init(dev, ch);
+	if (ret)
+		goto out;
 
-static int
-nouveau_dp_lane_count_set(struct drm_encoder *encoder, uint8_t cmd)
-{
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	uint32_t tmp;
-	int or = nv_encoder->or, link = !(nv_encoder->dcb->sorconf.link & 1);
-
-	tmp  = nv_rd32(dev, NV50_SOR_DP_CTRL(or, link));
-	tmp &= ~(NV50_SOR_DP_CTRL_ENHANCED_FRAME_ENABLED |
-		 NV50_SOR_DP_CTRL_LANE_MASK);
-	tmp |= ((1 << (cmd & DP_LANE_COUNT_MASK)) - 1) << 16;
-	if (cmd & DP_LANE_COUNT_ENHANCED_FRAME_EN)
-		tmp |= NV50_SOR_DP_CTRL_ENHANCED_FRAME_ENABLED;
-	nv_wr32(dev, NV50_SOR_DP_CTRL(or, link), tmp);
-
-	return auxch_wr(encoder, DP_LANE_COUNT_SET, &cmd, 1);
-}
+	stat = nv_rd32(dev, 0x00e4e8 + (ch * 0x50));
+	if (!(stat & 0x10000000)) {
+		AUX_DBG("sink not detected\n");
+		ret = -ENXIO;
+		goto out;
+	}
 
-static int
-nouveau_dp_link_bw_set(struct drm_encoder *encoder, uint8_t cmd)
-{
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	uint32_t tmp;
-	int reg = 0x614300 + (nv_encoder->or * 0x800);
+	if (!(type & 1)) {
+		memcpy(xbuf, data, size);
+		for (i = 0; i < 16; i += 4) {
+			AUX_DBG("wr 0x%08x\n", xbuf[i / 4]);
+			nv_wr32(dev, 0x00e4c0 + (ch * 0x50) + i, xbuf[i / 4]);
+		}
+	}
 
-	tmp  = nv_rd32(dev, reg);
-	tmp &= 0xfff3ffff;
-	if (cmd == DP_LINK_BW_2_7)
-		tmp |= 0x00040000;
-	nv_wr32(dev, reg, tmp);
+	ctrl  = nv_rd32(dev, 0x00e4e4 + (ch * 0x50));
+	ctrl &= ~0x0001f0ff;
+	ctrl |= type << 12;
+	ctrl |= size - 1;
+	nv_wr32(dev, 0x00e4e0 + (ch * 0x50), addr);
+
+	/* retry transaction a number of times on failure... */
+	ret = -EREMOTEIO;
+	for (retries = 0; retries < 32; retries++) {
+		/* reset, and delay a while if this is a retry */
+		nv_wr32(dev, 0x00e4e4 + (ch * 0x50), 0x80000000 | ctrl);
+		nv_wr32(dev, 0x00e4e4 + (ch * 0x50), 0x00000000 | ctrl);
+		if (retries)
+			udelay(400);
+
+		/* transaction request, wait up to 1ms for it to complete */
+		nv_wr32(dev, 0x00e4e4 + (ch * 0x50), 0x00010000 | ctrl);
+
+		timeout = 1000;
+		do {
+			ctrl = nv_rd32(dev, 0x00e4e4 + (ch * 0x50));
+			udelay(1);
+			if (!timeout--) {
+				AUX_ERR("tx req timeout 0x%08x\n", ctrl);
+				goto out;
+			}
+		} while (ctrl & 0x00010000);
 
-	return auxch_wr(encoder, DP_LINK_BW_SET, &cmd, 1);
-}
+		/* read status, and check if transaction completed ok */
+		stat = nv_mask(dev, 0x00e4e8 + (ch * 0x50), 0, 0);
+		if (!(stat & 0x000f0f00)) {
+			ret = 0;
+			break;
+		}
 
-static int
-nouveau_dp_link_train_set(struct drm_encoder *encoder, int pattern)
-{
-	struct drm_device *dev = encoder->dev;
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	uint32_t tmp;
-	uint8_t cmd;
-	int or = nv_encoder->or, link = !(nv_encoder->dcb->sorconf.link & 1);
-	int ret;
+		AUX_DBG("%02d 0x%08x 0x%08x\n", retries, ctrl, stat);
+	}
 
-	tmp  = nv_rd32(dev, NV50_SOR_DP_CTRL(or, link));
-	tmp &= ~NV50_SOR_DP_CTRL_TRAINING_PATTERN;
-	tmp |= (pattern << 24);
-	nv_wr32(dev, NV50_SOR_DP_CTRL(or, link), tmp);
+	if (type & 1) {
+		for (i = 0; i < 16; i += 4) {
+			xbuf[i / 4] = nv_rd32(dev, 0x00e4d0 + (ch * 0x50) + i);
+			AUX_DBG("rd 0x%08x\n", xbuf[i / 4]);
+		}
+		memcpy(data, xbuf, size);
+	}
 
-	ret = auxch_rd(encoder, DP_TRAINING_PATTERN_SET, &cmd, 1);
-	if (ret)
-		return ret;
-	cmd &= ~DP_TRAINING_PATTERN_MASK;
-	cmd |= (pattern & DP_TRAINING_PATTERN_MASK);
-	return auxch_wr(encoder, DP_TRAINING_PATTERN_SET, &cmd, 1);
+out:
+	auxch_fini(dev, ch);
+	return ret;
 }
 
-static int
-nouveau_dp_max_voltage_swing(struct drm_encoder *encoder)
+static u32
+dp_link_bw_get(struct drm_device *dev, int or, int link)
 {
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
-	struct bit_displayport_encoder_table_entry *dpse;
-	struct bit_displayport_encoder_table *dpe;
-	int i, dpe_headerlen, max_vs = 0;
-
-	dpe = nouveau_bios_dp_table(dev, nv_encoder->dcb, &dpe_headerlen);
-	if (!dpe)
-		return false;
-	dpse = (void *)((char *)dpe + dpe_headerlen);
+	u32 ctrl = nv_rd32(dev, 0x614300 + (or * 0x800));
+	if (!(ctrl & 0x000c0000))
+		return 162000;
+	return 270000;
+}
 
-	for (i = 0; i < dpe_headerlen; i++, dpse++) {
-		if (dpse->vs_level > max_vs)
-			max_vs = dpse->vs_level;
+static int
+dp_lane_count_get(struct drm_device *dev, int or, int link)
+{
+	u32 ctrl = nv_rd32(dev, NV50_SOR_DP_CTRL(or, link));
+	switch (ctrl & 0x000f0000) {
+	case 0x00010000: return 1;
+	case 0x00030000: return 2;
+	default:
+		return 4;
 	}
-
-	return max_vs;
 }
 
-static int
-nouveau_dp_max_pre_emphasis(struct drm_encoder *encoder, int vs)
+void
+nouveau_dp_tu_update(struct drm_device *dev, int or, int link, u32 clk, u32 bpp)
 {
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
-	struct bit_displayport_encoder_table_entry *dpse;
-	struct bit_displayport_encoder_table *dpe;
-	int i, dpe_headerlen, max_pre = 0;
+	const u32 symbol = 100000;
+	int bestTU = 0, bestVTUi = 0, bestVTUf = 0, bestVTUa = 0;
+	int TU, VTUi, VTUf, VTUa;
+	u64 link_data_rate, link_ratio, unk;
+	u32 best_diff = 64 * symbol;
+	u32 link_nr, link_bw, r;
+
+	/* calculate packed data rate for each lane */
+	link_nr = dp_lane_count_get(dev, or, link);
+	link_data_rate = (clk * bpp / 8) / link_nr;
+
+	/* calculate ratio of packed data rate to link symbol rate */
+	link_bw = dp_link_bw_get(dev, or, link);
+	link_ratio = link_data_rate * symbol;
+	r = do_div(link_ratio, link_bw);
+
+	for (TU = 64; TU >= 32; TU--) {
+		/* calculate average number of valid symbols in each TU */
+		u32 tu_valid = link_ratio * TU;
+		u32 calc, diff;
+
+		/* find a hw representation for the fraction.. */
+		VTUi = tu_valid / symbol;
+		calc = VTUi * symbol;
+		diff = tu_valid - calc;
+		if (diff) {
+			if (diff >= (symbol / 2)) {
+				VTUf = symbol / (symbol - diff);
+				if (symbol - (VTUf * diff))
+					VTUf++;
+
+				if (VTUf <= 15) {
+					VTUa  = 1;
+					calc += symbol - (symbol / VTUf);
+				} else {
+					VTUa  = 0;
+					VTUf  = 1;
+					calc += symbol;
+				}
+			} else {
+				VTUa  = 0;
+				VTUf  = min((int)(symbol / diff), 15);
+				calc += symbol / VTUf;
+			}
 
-	dpe = nouveau_bios_dp_table(dev, nv_encoder->dcb, &dpe_headerlen);
-	if (!dpe)
-		return false;
-	dpse = (void *)((char *)dpe + dpe_headerlen);
+			diff = calc - tu_valid;
+		} else {
+			/* no remainder, but the hw doesn't like the fractional
+			 * part to be zero.  decrement the integer part and
+			 * have the fraction add a whole symbol back
+			 */
+			VTUa = 0;
+			VTUf = 1;
+			VTUi--;
+		}
 
-	for (i = 0; i < dpe_headerlen; i++, dpse++) {
-		if (dpse->vs_level != vs)
-			continue;
+		if (diff < best_diff) {
+			best_diff = diff;
+			bestTU = TU;
+			bestVTUa = VTUa;
+			bestVTUf = VTUf;
+			bestVTUi = VTUi;
+			if (diff == 0)
+				break;
+		}
+	}
 
-		if (dpse->pre_level > max_pre)
-			max_pre = dpse->pre_level;
+	if (!bestTU) {
+		NV_ERROR(dev, "DP: unable to find suitable config\n");
+		return;
 	}
 
-	return max_pre;
+	/* XXX close to vbios numbers, but not right */
+	unk  = (symbol - link_ratio) * bestTU;
+	unk *= link_ratio;
+	r = do_div(unk, symbol);
+	r = do_div(unk, symbol);
+	unk += 6;
+
+	nv_mask(dev, NV50_SOR_DP_CTRL(or, link), 0x000001fc, bestTU << 2);
+	nv_mask(dev, NV50_SOR_DP_SCFG(or, link), 0x010f7f3f, bestVTUa << 24 |
+							     bestVTUf << 16 |
+							     bestVTUi << 8 |
+							     unk);
 }
 
-static bool
-nouveau_dp_link_train_adjust(struct drm_encoder *encoder, uint8_t *config)
+u8 *
+nouveau_dp_bios_data(struct drm_device *dev, struct dcb_entry *dcb, u8 **entry)
 {
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
-	struct bit_displayport_encoder_table *dpe;
-	int ret, i, dpe_headerlen, vs = 0, pre = 0;
-	uint8_t request[2];
-
-	dpe = nouveau_bios_dp_table(dev, nv_encoder->dcb, &dpe_headerlen);
-	if (!dpe)
-		return false;
-
-	ret = auxch_rd(encoder, DP_ADJUST_REQUEST_LANE0_1, request, 2);
-	if (ret)
-		return false;
-
-	NV_DEBUG_KMS(dev, "\t\tadjust 0x%02x 0x%02x\n", request[0], request[1]);
-
-	/* Keep all lanes at the same level.. */
-	for (i = 0; i < nv_encoder->dp.link_nr; i++) {
-		int lane_req = (request[i >> 1] >> ((i & 1) << 2)) & 0xf;
-		int lane_vs = lane_req & 3;
-		int lane_pre = (lane_req >> 2) & 3;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvbios *bios = &dev_priv->vbios;
+	struct bit_entry d;
+	u8 *table;
+	int i;
+
+	if (bit_table(dev, 'd', &d)) {
+		NV_ERROR(dev, "BIT 'd' table not found\n");
+		return NULL;
+	}
 
-		if (lane_vs > vs)
-			vs = lane_vs;
-		if (lane_pre > pre)
-			pre = lane_pre;
+	if (d.version != 1) {
+		NV_ERROR(dev, "BIT 'd' table version %d unknown\n", d.version);
+		return NULL;
 	}
 
-	if (vs >= nouveau_dp_max_voltage_swing(encoder)) {
-		vs  = nouveau_dp_max_voltage_swing(encoder);
-		vs |= 4;
+	table = ROMPTR(bios, d.data[0]);
+	if (!table) {
+		NV_ERROR(dev, "displayport table pointer invalid\n");
+		return NULL;
 	}
 
-	if (pre >= nouveau_dp_max_pre_emphasis(encoder, vs & 3)) {
-		pre  = nouveau_dp_max_pre_emphasis(encoder, vs & 3);
-		pre |= 4;
+	switch (table[0]) {
+	case 0x20:
+	case 0x21:
+	case 0x30:
+		break;
+	default:
+		NV_ERROR(dev, "displayport table 0x%02x unknown\n", table[0]);
+		return NULL;
 	}
 
-	/* Update the configuration for all lanes.. */
-	for (i = 0; i < nv_encoder->dp.link_nr; i++)
-		config[i] = (pre << 3) | vs;
+	for (i = 0; i < table[3]; i++) {
+		*entry = ROMPTR(bios, table[table[1] + (i * table[2])]);
+		if (*entry && bios_encoder_match(dcb, ROM32((*entry)[0])))
+			return table;
+	}
 
-	return true;
+	NV_ERROR(dev, "displayport encoder table not found\n");
+	return NULL;
 }
 
-static bool
-nouveau_dp_link_train_commit(struct drm_encoder *encoder, uint8_t *config)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct drm_device *dev = encoder->dev;
-	struct bit_displayport_encoder_table_entry *dpse;
-	struct bit_displayport_encoder_table *dpe;
-	int or = nv_encoder->or, link = !(nv_encoder->dcb->sorconf.link & 1);
-	int dpe_headerlen, ret, i;
+/******************************************************************************
+ * link training
+ *****************************************************************************/
+struct dp_state {
+	struct dcb_entry *dcb;
+	u8 *table;
+	u8 *entry;
+	int auxch;
+	int crtc;
+	int or;
+	int link;
+	u8 *dpcd;
+	int link_nr;
+	u32 link_bw;
+	u8  stat[6];
+	u8  conf[4];
+};
 
-	NV_DEBUG_KMS(dev, "\t\tconfig 0x%02x 0x%02x 0x%02x 0x%02x\n",
-		 config[0], config[1], config[2], config[3]);
+static void
+dp_set_link_config(struct drm_device *dev, struct dp_state *dp)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	int or = dp->or, link = dp->link;
+	u8 *entry, sink[2];
+	u32 dp_ctrl;
+	u16 script;
+
+	NV_DEBUG_KMS(dev, "%d lanes at %d KB/s\n", dp->link_nr, dp->link_bw);
+
+	/* set selected link rate on source */
+	switch (dp->link_bw) {
+	case 270000:
+		nv_mask(dev, 0x614300 + (or * 0x800), 0x000c0000, 0x00040000);
+		sink[0] = DP_LINK_BW_2_7;
+		break;
+	default:
+		nv_mask(dev, 0x614300 + (or * 0x800), 0x000c0000, 0x00000000);
+		sink[0] = DP_LINK_BW_1_62;
+		break;
+	}
 
-	dpe = nouveau_bios_dp_table(dev, nv_encoder->dcb, &dpe_headerlen);
-	if (!dpe)
-		return false;
-	dpse = (void *)((char *)dpe + dpe_headerlen);
+	/* offset +0x0a of each dp encoder table entry is a pointer to another
+	 * table, that has (among other things) pointers to more scripts that
+	 * need to be executed, this time depending on link speed.
+	 */
+	entry = ROMPTR(&dev_priv->vbios, dp->entry[10]);
+	if (entry) {
+		if (dp->table[0] < 0x30) {
+			while (dp->link_bw < (ROM16(entry[0]) * 10))
+				entry += 4;
+			script = ROM16(entry[2]);
+		} else {
+			while (dp->link_bw < (entry[0] * 27000))
+				entry += 3;
+			script = ROM16(entry[1]);
+		}
 
-	for (i = 0; i < dpe->record_nr; i++, dpse++) {
-		if (dpse->vs_level == (config[0] & 3) &&
-		    dpse->pre_level == ((config[0] >> 3) & 3))
-			break;
+		nouveau_bios_run_init_table(dev, script, dp->dcb, dp->crtc);
 	}
-	BUG_ON(i == dpe->record_nr);
-
-	for (i = 0; i < nv_encoder->dp.link_nr; i++) {
-		const int shift[4] = { 16, 8, 0, 24 };
-		uint32_t mask = 0xff << shift[i];
-		uint32_t reg0, reg1, reg2;
-
-		reg0  = nv_rd32(dev, NV50_SOR_DP_UNK118(or, link)) & ~mask;
-		reg0 |= (dpse->reg0 << shift[i]);
-		reg1  = nv_rd32(dev, NV50_SOR_DP_UNK120(or, link)) & ~mask;
-		reg1 |= (dpse->reg1 << shift[i]);
-		reg2  = nv_rd32(dev, NV50_SOR_DP_UNK130(or, link)) & 0xffff00ff;
-		reg2 |= (dpse->reg2 << 8);
-		nv_wr32(dev, NV50_SOR_DP_UNK118(or, link), reg0);
-		nv_wr32(dev, NV50_SOR_DP_UNK120(or, link), reg1);
-		nv_wr32(dev, NV50_SOR_DP_UNK130(or, link), reg2);
+
+	/* configure lane count on the source */
+	dp_ctrl = ((1 << dp->link_nr) - 1) << 16;
+	sink[1] = dp->link_nr;
+	if (dp->dpcd[2] & DP_ENHANCED_FRAME_CAP) {
+		dp_ctrl |= 0x00004000;
+		sink[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
 	}
 
-	ret = auxch_wr(encoder, DP_TRAINING_LANE0_SET, config, 4);
-	if (ret)
-		return false;
+	nv_mask(dev, NV50_SOR_DP_CTRL(or, link), 0x001f4000, dp_ctrl);
 
-	return true;
+	/* inform the sink of the new configuration */
+	auxch_tx(dev, dp->auxch, 8, DP_LINK_BW_SET, sink, 2);
 }
 
-bool
-nouveau_dp_link_train(struct drm_encoder *encoder)
+static void
+dp_set_training_pattern(struct drm_device *dev, struct dp_state *dp, u8 tp)
 {
-	struct drm_device *dev = encoder->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_gpio_engine *pgpio = &dev_priv->engine.gpio;
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_connector *nv_connector;
-	struct bit_displayport_encoder_table *dpe;
-	int dpe_headerlen;
-	uint8_t config[4], status[3];
-	bool cr_done, cr_max_vs, eq_done, hpd_state;
-	int ret = 0, i, tries, voltage;
+	u8 sink_tp;
 
-	NV_DEBUG_KMS(dev, "link training!!\n");
+	NV_DEBUG_KMS(dev, "training pattern %d\n", tp);
 
-	nv_connector = nouveau_encoder_connector_get(nv_encoder);
-	if (!nv_connector)
-		return false;
+	nv_mask(dev, NV50_SOR_DP_CTRL(dp->or, dp->link), 0x0f000000, tp << 24);
 
-	dpe = nouveau_bios_dp_table(dev, nv_encoder->dcb, &dpe_headerlen);
-	if (!dpe) {
-		NV_ERROR(dev, "SOR-%d: no DP encoder table!\n", nv_encoder->or);
-		return false;
-	}
+	auxch_tx(dev, dp->auxch, 9, DP_TRAINING_PATTERN_SET, &sink_tp, 1);
+	sink_tp &= ~DP_TRAINING_PATTERN_MASK;
+	sink_tp |= tp;
+	auxch_tx(dev, dp->auxch, 8, DP_TRAINING_PATTERN_SET, &sink_tp, 1);
+}
 
-	/* disable hotplug detect, this flips around on some panels during
-	 * link training.
-	 */
-	hpd_state = pgpio->irq_enable(dev, nv_connector->dcb->gpio_tag, false);
+static const u8 nv50_lane_map[] = { 16, 8, 0, 24 };
+static const u8 nvaf_lane_map[] = { 24, 16, 8, 0 };
+
+static int
+dp_link_train_commit(struct drm_device *dev, struct dp_state *dp)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	u32 mask = 0, drv = 0, pre = 0, unk = 0;
+	const u8 *shifts;
+	int link = dp->link;
+	int or = dp->or;
+	int i;
+
+	if (dev_priv->chipset != 0xaf)
+		shifts = nv50_lane_map;
+	else
+		shifts = nvaf_lane_map;
+
+	for (i = 0; i < dp->link_nr; i++) {
+		u8 *conf = dp->entry + dp->table[4];
+		u8 lane = (dp->stat[4 + (i >> 1)] >> ((i & 1) * 4)) & 0xf;
+		u8 lpre = (lane & 0x0c) >> 2;
+		u8 lvsw = (lane & 0x03) >> 0;
+
+		mask |= 0xff << shifts[i];
+		unk |= 1 << (shifts[i] >> 3);
+
+		dp->conf[i] = (lpre << 3) | lvsw;
+		if (lvsw == DP_TRAIN_VOLTAGE_SWING_1200)
+			dp->conf[i] |= DP_TRAIN_MAX_SWING_REACHED;
+		if (lpre == DP_TRAIN_PRE_EMPHASIS_9_5)
+			dp->conf[i] |= DP_TRAIN_MAX_PRE_EMPHASIS_REACHED;
+
+		NV_DEBUG_KMS(dev, "config lane %d %02x\n", i, dp->conf[i]);
+
+		if (dp->table[0] < 0x30) {
+			u8 *last = conf + (dp->entry[4] * dp->table[5]);
+			while (lvsw != conf[0] || lpre != conf[1]) {
+				conf += dp->table[5];
+				if (conf >= last)
+					return -EINVAL;
+			}
+
+			conf += 2;
+		} else {
+			/* no lookup table anymore, set entries for each
+			 * combination of voltage swing and pre-emphasis
+			 * level allowed by the DP spec.
+			 */
+			switch (lvsw) {
+			case 0: lpre += 0; break;
+			case 1: lpre += 4; break;
+			case 2: lpre += 7; break;
+			case 3: lpre += 9; break;
+			}
+
+			conf = conf + (lpre * dp->table[5]);
+			conf++;
+		}
 
-	if (dpe->script0) {
-		NV_DEBUG_KMS(dev, "SOR-%d: running DP script 0\n", nv_encoder->or);
-		nouveau_bios_run_init_table(dev, le16_to_cpu(dpe->script0),
-					    nv_encoder->dcb);
+		drv |= conf[0] << shifts[i];
+		pre |= conf[1] << shifts[i];
+		unk  = (unk & ~0x0000ff00) | (conf[2] << 8);
 	}
 
-train:
-	cr_done = eq_done = false;
+	nv_mask(dev, NV50_SOR_DP_UNK118(or, link), mask, drv);
+	nv_mask(dev, NV50_SOR_DP_UNK120(or, link), mask, pre);
+	nv_mask(dev, NV50_SOR_DP_UNK130(or, link), 0x0000ff0f, unk);
 
-	/* set link configuration */
-	NV_DEBUG_KMS(dev, "\tbegin train: bw %d, lanes %d\n",
-		 nv_encoder->dp.link_bw, nv_encoder->dp.link_nr);
+	return auxch_tx(dev, dp->auxch, 8, DP_TRAINING_LANE0_SET, dp->conf, 4);
+}
 
-	ret = nouveau_dp_link_bw_set(encoder, nv_encoder->dp.link_bw);
-	if (ret)
-		return false;
+static int
+dp_link_train_update(struct drm_device *dev, struct dp_state *dp, u32 delay)
+{
+	int ret;
 
-	config[0] = nv_encoder->dp.link_nr;
-	if (nv_encoder->dp.dpcd_version >= 0x11 &&
-	    nv_encoder->dp.enhanced_frame)
-		config[0] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+	udelay(delay);
 
-	ret = nouveau_dp_lane_count_set(encoder, config[0]);
+	ret = auxch_tx(dev, dp->auxch, 9, DP_LANE0_1_STATUS, dp->stat, 6);
 	if (ret)
-		return false;
+		return ret;
 
-	/* clock recovery */
-	NV_DEBUG_KMS(dev, "\tbegin cr\n");
-	ret = nouveau_dp_link_train_set(encoder, DP_TRAINING_PATTERN_1);
-	if (ret)
-		goto stop;
+	NV_DEBUG_KMS(dev, "status %02x %02x %02x %02x %02x %02x\n",
+		     dp->stat[0], dp->stat[1], dp->stat[2], dp->stat[3],
+		     dp->stat[4], dp->stat[5]);
+	return 0;
+}
 
-	tries = 0;
-	voltage = -1;
-	memset(config, 0x00, sizeof(config));
-	for (;;) {
-		if (!nouveau_dp_link_train_commit(encoder, config))
-			break;
+static int
+dp_link_train_cr(struct drm_device *dev, struct dp_state *dp)
+{
+	bool cr_done = false, abort = false;
+	int voltage = dp->conf[0] & DP_TRAIN_VOLTAGE_SWING_MASK;
+	int tries = 0, i;
 
-		udelay(100);
+	dp_set_training_pattern(dev, dp, DP_TRAINING_PATTERN_1);
 
-		ret = auxch_rd(encoder, DP_LANE0_1_STATUS, status, 2);
-		if (ret)
+	do {
+		if (dp_link_train_commit(dev, dp) ||
+		    dp_link_train_update(dev, dp, 100))
 			break;
-		NV_DEBUG_KMS(dev, "\t\tstatus: 0x%02x 0x%02x\n",
-			 status[0], status[1]);
 
 		cr_done = true;
-		cr_max_vs = false;
-		for (i = 0; i < nv_encoder->dp.link_nr; i++) {
-			int lane = (status[i >> 1] >> ((i & 1) * 4)) & 0xf;
-
+		for (i = 0; i < dp->link_nr; i++) {
+			u8 lane = (dp->stat[i >> 1] >> ((i & 1) * 4)) & 0xf;
 			if (!(lane & DP_LANE_CR_DONE)) {
 				cr_done = false;
-				if (config[i] & DP_TRAIN_MAX_PRE_EMPHASIS_REACHED)
-					cr_max_vs = true;
+				if (dp->conf[i] & DP_TRAIN_MAX_SWING_REACHED)
+					abort = true;
 				break;
 			}
 		}
 
-		if ((config[0] & DP_TRAIN_VOLTAGE_SWING_MASK) != voltage) {
-			voltage = config[0] & DP_TRAIN_VOLTAGE_SWING_MASK;
+		if ((dp->conf[0] & DP_TRAIN_VOLTAGE_SWING_MASK) != voltage) {
+			voltage = dp->conf[0] & DP_TRAIN_VOLTAGE_SWING_MASK;
 			tries = 0;
 		}
+	} while (!cr_done && !abort && ++tries < 5);
 
-		if (cr_done || cr_max_vs || (++tries == 5))
-			break;
-
-		if (!nouveau_dp_link_train_adjust(encoder, config))
-			break;
-	}
-
-	if (!cr_done)
-		goto stop;
+	return cr_done ? 0 : -1;
+}
 
-	/* channel equalisation */
-	NV_DEBUG_KMS(dev, "\tbegin eq\n");
-	ret = nouveau_dp_link_train_set(encoder, DP_TRAINING_PATTERN_2);
-	if (ret)
-		goto stop;
+static int
+dp_link_train_eq(struct drm_device *dev, struct dp_state *dp)
+{
+	bool eq_done, cr_done = true;
+	int tries = 0, i;
 
-	for (tries = 0; tries <= 5; tries++) {
-		udelay(400);
+	dp_set_training_pattern(dev, dp, DP_TRAINING_PATTERN_2);
 
-		ret = auxch_rd(encoder, DP_LANE0_1_STATUS, status, 3);
-		if (ret)
+	do {
+		if (dp_link_train_update(dev, dp, 400))
 			break;
-		NV_DEBUG_KMS(dev, "\t\tstatus: 0x%02x 0x%02x\n",
-			 status[0], status[1]);
 
-		eq_done = true;
-		if (!(status[2] & DP_INTERLANE_ALIGN_DONE))
-			eq_done = false;
-
-		for (i = 0; eq_done && i < nv_encoder->dp.link_nr; i++) {
-			int lane = (status[i >> 1] >> ((i & 1) * 4)) & 0xf;
-
-			if (!(lane & DP_LANE_CR_DONE)) {
+		eq_done = !!(dp->stat[2] & DP_INTERLANE_ALIGN_DONE);
+		for (i = 0; i < dp->link_nr && eq_done; i++) {
+			u8 lane = (dp->stat[i >> 1] >> ((i & 1) * 4)) & 0xf;
+			if (!(lane & DP_LANE_CR_DONE))
 				cr_done = false;
-				break;
-			}
-
 			if (!(lane & DP_LANE_CHANNEL_EQ_DONE) ||
-			    !(lane & DP_LANE_SYMBOL_LOCKED)) {
+			    !(lane & DP_LANE_SYMBOL_LOCKED))
 				eq_done = false;
-				break;
-			}
 		}
 
-		if (eq_done || !cr_done)
+		if (dp_link_train_commit(dev, dp))
 			break;
+	} while (!eq_done && cr_done && ++tries <= 5);
 
-		if (!nouveau_dp_link_train_adjust(encoder, config) ||
-		    !nouveau_dp_link_train_commit(encoder, config))
-			break;
-	}
+	return eq_done ? 0 : -1;
+}
 
-stop:
-	/* end link training */
-	ret = nouveau_dp_link_train_set(encoder, DP_TRAINING_PATTERN_DISABLE);
-	if (ret)
+bool
+nouveau_dp_link_train(struct drm_encoder *encoder, u32 datarate)
+{
+	struct drm_nouveau_private *dev_priv = encoder->dev->dev_private;
+	struct nouveau_gpio_engine *pgpio = &dev_priv->engine.gpio;
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
+	struct nouveau_connector *nv_connector =
+		nouveau_encoder_connector_get(nv_encoder);
+	struct drm_device *dev = encoder->dev;
+	struct nouveau_i2c_chan *auxch;
+	const u32 bw_list[] = { 270000, 162000, 0 };
+	const u32 *link_bw = bw_list;
+	struct dp_state dp;
+
+	auxch = nouveau_i2c_find(dev, nv_encoder->dcb->i2c_index);
+	if (!auxch)
 		return false;
 
-	/* retry at a lower setting, if possible */
-	if (!ret && !(eq_done && cr_done)) {
-		NV_DEBUG_KMS(dev, "\twe failed\n");
-		if (nv_encoder->dp.link_bw != DP_LINK_BW_1_62) {
-			NV_DEBUG_KMS(dev, "retry link training at low rate\n");
-			nv_encoder->dp.link_bw = DP_LINK_BW_1_62;
-			goto train;
-		}
+	dp.table = nouveau_dp_bios_data(dev, nv_encoder->dcb, &dp.entry);
+	if (!dp.table)
+		return -EINVAL;
+
+	dp.dcb = nv_encoder->dcb;
+	dp.crtc = nv_crtc->index;
+	dp.auxch = auxch->rd;
+	dp.or = nv_encoder->or;
+	dp.link = !(nv_encoder->dcb->sorconf.link & 1);
+	dp.dpcd = nv_encoder->dp.dpcd;
+
+	/* some sinks toggle hotplug in response to some of the actions
+	 * we take during link training (DP_SET_POWER is one), we need
+	 * to ignore them for the moment to avoid races.
+	 */
+	pgpio->irq_enable(dev, nv_connector->dcb->gpio_tag, false);
+
+	/* enable down-spreading, if possible */
+	if (dp.table[1] >= 16) {
+		u16 script = ROM16(dp.entry[14]);
+		if (nv_encoder->dp.dpcd[3] & 1)
+			script = ROM16(dp.entry[12]);
+
+		nouveau_bios_run_init_table(dev, script, dp.dcb, dp.crtc);
 	}
 
-	if (dpe->script1) {
-		NV_DEBUG_KMS(dev, "SOR-%d: running DP script 1\n", nv_encoder->or);
-		nouveau_bios_run_init_table(dev, le16_to_cpu(dpe->script1),
-					    nv_encoder->dcb);
+	/* execute pre-train script from vbios */
+	nouveau_bios_run_init_table(dev, ROM16(dp.entry[6]), dp.dcb, dp.crtc);
+
+	/* start off at highest link rate supported by encoder and display */
+	while (*link_bw > nv_encoder->dp.link_bw)
+		link_bw++;
+
+	while (link_bw[0]) {
+		/* find minimum required lane count at this link rate */
+		dp.link_nr = nv_encoder->dp.link_nr;
+		while ((dp.link_nr >> 1) * link_bw[0] > datarate)
+			dp.link_nr >>= 1;
+
+		/* drop link rate to minimum with this lane count */
+		while ((link_bw[1] * dp.link_nr) > datarate)
+			link_bw++;
+		dp.link_bw = link_bw[0];
+
+		/* program selected link configuration */
+		dp_set_link_config(dev, &dp);
+
+		/* attempt to train the link at this configuration */
+		memset(dp.stat, 0x00, sizeof(dp.stat));
+		if (!dp_link_train_cr(dev, &dp) &&
+		    !dp_link_train_eq(dev, &dp))
+			break;
+
+		/* retry at lower rate */
+		link_bw++;
 	}
 
-	/* re-enable hotplug detect */
-	pgpio->irq_enable(dev, nv_connector->dcb->gpio_tag, hpd_state);
+	/* finish link training */
+	dp_set_training_pattern(dev, &dp, DP_TRAINING_PATTERN_DISABLE);
 
-	return eq_done;
+	/* execute post-train script from vbios */
+	nouveau_bios_run_init_table(dev, ROM16(dp.entry[8]), dp.dcb, dp.crtc);
+
+	/* re-enable hotplug detect */
+	pgpio->irq_enable(dev, nv_connector->dcb->gpio_tag, true);
+	return true;
 }
 
 bool
@@ -447,31 +648,34 @@ nouveau_dp_detect(struct drm_encoder *encoder)
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct drm_device *dev = encoder->dev;
-	uint8_t dpcd[4];
+	struct nouveau_i2c_chan *auxch;
+	u8 *dpcd = nv_encoder->dp.dpcd;
 	int ret;
 
-	ret = auxch_rd(encoder, 0x0000, dpcd, 4);
-	if (ret)
+	auxch = nouveau_i2c_find(dev, nv_encoder->dcb->i2c_index);
+	if (!auxch)
 		return false;
 
-	NV_DEBUG_KMS(dev, "encoder: link_bw %d, link_nr %d\n"
-		      "display: link_bw %d, link_nr %d version 0x%02x\n",
-		 nv_encoder->dcb->dpconf.link_bw,
-		 nv_encoder->dcb->dpconf.link_nr,
-		 dpcd[1], dpcd[2] & 0x0f, dpcd[0]);
+	ret = auxch_tx(dev, auxch->rd, 9, DP_DPCD_REV, dpcd, 8);
+	if (ret)
+		return false;
 
-	nv_encoder->dp.dpcd_version = dpcd[0];
+	nv_encoder->dp.link_bw = 27000 * dpcd[1];
+	nv_encoder->dp.link_nr = dpcd[2] & DP_MAX_LANE_COUNT_MASK;
 
-	nv_encoder->dp.link_bw = dpcd[1];
-	if (nv_encoder->dp.link_bw != DP_LINK_BW_1_62 &&
-	    !nv_encoder->dcb->dpconf.link_bw)
-		nv_encoder->dp.link_bw = DP_LINK_BW_1_62;
+	NV_DEBUG_KMS(dev, "display: %dx%d dpcd 0x%02x\n",
+		     nv_encoder->dp.link_nr, nv_encoder->dp.link_bw, dpcd[0]);
+	NV_DEBUG_KMS(dev, "encoder: %dx%d\n",
+		     nv_encoder->dcb->dpconf.link_nr,
+		     nv_encoder->dcb->dpconf.link_bw);
 
-	nv_encoder->dp.link_nr = dpcd[2] & DP_MAX_LANE_COUNT_MASK;
-	if (nv_encoder->dp.link_nr > nv_encoder->dcb->dpconf.link_nr)
+	if (nv_encoder->dcb->dpconf.link_nr < nv_encoder->dp.link_nr)
 		nv_encoder->dp.link_nr = nv_encoder->dcb->dpconf.link_nr;
+	if (nv_encoder->dcb->dpconf.link_bw < nv_encoder->dp.link_bw)
+		nv_encoder->dp.link_bw = nv_encoder->dcb->dpconf.link_bw;
 
-	nv_encoder->dp.enhanced_frame = (dpcd[2] & DP_ENHANCED_FRAME_CAP);
+	NV_DEBUG_KMS(dev, "maximum: %dx%d\n",
+		     nv_encoder->dp.link_nr, nv_encoder->dp.link_bw);
 
 	return true;
 }
@@ -480,105 +684,13 @@ int
 nouveau_dp_auxch(struct nouveau_i2c_chan *auxch, int cmd, int addr,
 		 uint8_t *data, int data_nr)
 {
-	struct drm_device *dev = auxch->dev;
-	uint32_t tmp, ctrl, stat = 0, data32[4] = {};
-	int ret = 0, i, index = auxch->rd;
-
-	NV_DEBUG_KMS(dev, "ch %d cmd %d addr 0x%x len %d\n", index, cmd, addr, data_nr);
-
-	tmp = nv_rd32(dev, NV50_AUXCH_CTRL(auxch->rd));
-	nv_wr32(dev, NV50_AUXCH_CTRL(auxch->rd), tmp | 0x00100000);
-	tmp = nv_rd32(dev, NV50_AUXCH_CTRL(auxch->rd));
-	if (!(tmp & 0x01000000)) {
-		NV_ERROR(dev, "expected bit 24 == 1, got 0x%08x\n", tmp);
-		ret = -EIO;
-		goto out;
-	}
-
-	for (i = 0; i < 3; i++) {
-		tmp = nv_rd32(dev, NV50_AUXCH_STAT(auxch->rd));
-		if (tmp & NV50_AUXCH_STAT_STATE_READY)
-			break;
-		udelay(100);
-	}
-
-	if (i == 3) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	if (!(cmd & 1)) {
-		memcpy(data32, data, data_nr);
-		for (i = 0; i < 4; i++) {
-			NV_DEBUG_KMS(dev, "wr %d: 0x%08x\n", i, data32[i]);
-			nv_wr32(dev, NV50_AUXCH_DATA_OUT(index, i), data32[i]);
-		}
-	}
-
-	nv_wr32(dev, NV50_AUXCH_ADDR(index), addr);
-	ctrl  = nv_rd32(dev, NV50_AUXCH_CTRL(index));
-	ctrl &= ~(NV50_AUXCH_CTRL_CMD | NV50_AUXCH_CTRL_LEN);
-	ctrl |= (cmd << NV50_AUXCH_CTRL_CMD_SHIFT);
-	ctrl |= ((data_nr - 1) << NV50_AUXCH_CTRL_LEN_SHIFT);
-
-	for (i = 0; i < 16; i++) {
-		nv_wr32(dev, NV50_AUXCH_CTRL(index), ctrl | 0x80000000);
-		nv_wr32(dev, NV50_AUXCH_CTRL(index), ctrl);
-		nv_wr32(dev, NV50_AUXCH_CTRL(index), ctrl | 0x00010000);
-		if (!nv_wait(dev, NV50_AUXCH_CTRL(index),
-			     0x00010000, 0x00000000)) {
-			NV_ERROR(dev, "expected bit 16 == 0, got 0x%08x\n",
-				 nv_rd32(dev, NV50_AUXCH_CTRL(index)));
-			ret = -EBUSY;
-			goto out;
-		}
-
-		udelay(400);
-
-		stat = nv_rd32(dev, NV50_AUXCH_STAT(index));
-		if ((stat & NV50_AUXCH_STAT_REPLY_AUX) !=
-			    NV50_AUXCH_STAT_REPLY_AUX_DEFER)
-			break;
-	}
-
-	if (i == 16) {
-		NV_ERROR(dev, "auxch DEFER too many times, bailing\n");
-		ret = -EREMOTEIO;
-		goto out;
-	}
-
-	if (cmd & 1) {
-		if ((stat & NV50_AUXCH_STAT_COUNT) != data_nr) {
-			ret = -EREMOTEIO;
-			goto out;
-		}
-
-		for (i = 0; i < 4; i++) {
-			data32[i] = nv_rd32(dev, NV50_AUXCH_DATA_IN(index, i));
-			NV_DEBUG_KMS(dev, "rd %d: 0x%08x\n", i, data32[i]);
-		}
-		memcpy(data, data32, data_nr);
-	}
-
-out:
-	tmp = nv_rd32(dev, NV50_AUXCH_CTRL(auxch->rd));
-	nv_wr32(dev, NV50_AUXCH_CTRL(auxch->rd), tmp & ~0x00100000);
-	tmp = nv_rd32(dev, NV50_AUXCH_CTRL(auxch->rd));
-	if (tmp & 0x01000000) {
-		NV_ERROR(dev, "expected bit 24 == 0, got 0x%08x\n", tmp);
-		ret = -EIO;
-	}
-
-	udelay(400);
-
-	return ret ? ret : (stat & NV50_AUXCH_STAT_REPLY);
+	return auxch_tx(auxch->dev, auxch->rd, cmd, addr, data, data_nr);
 }
 
 static int
 nouveau_dp_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
 {
 	struct nouveau_i2c_chan *auxch = (struct nouveau_i2c_chan *)adap;
-	struct drm_device *dev = auxch->dev;
 	struct i2c_msg *msg = msgs;
 	int ret, mcnt = num;
 
@@ -602,19 +714,6 @@ nouveau_dp_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs, int num)
 			if (ret < 0)
 				return ret;
 
-			switch (ret & NV50_AUXCH_STAT_REPLY_I2C) {
-			case NV50_AUXCH_STAT_REPLY_I2C_ACK:
-				break;
-			case NV50_AUXCH_STAT_REPLY_I2C_NACK:
-				return -EREMOTEIO;
-			case NV50_AUXCH_STAT_REPLY_I2C_DEFER:
-				udelay(100);
-				continue;
-			default:
-				NV_ERROR(dev, "bad auxch reply: 0x%08x\n", ret);
-				return -EREMOTEIO;
-			}
-
 			ptr += cnt;
 			remaining -= cnt;
 		}
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index b30ddd8d2e2a..c1e01f37b9d1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -41,7 +41,7 @@ int nouveau_agpmode = -1;
 module_param_named(agpmode, nouveau_agpmode, int, 0400);
 
 MODULE_PARM_DESC(modeset, "Enable kernel modesetting");
-static int nouveau_modeset = -1; /* kms */
+int nouveau_modeset = -1;
 module_param_named(modeset, nouveau_modeset, int, 0400);
 
 MODULE_PARM_DESC(vbios, "Override default VBIOS location");
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index d7d51deb34b6..29837da1098b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -414,12 +414,13 @@ struct nouveau_gpio_engine {
 };
 
 struct nouveau_pm_voltage_level {
-	u8 voltage;
-	u8 vid;
+	u32 voltage; /* microvolts */
+	u8  vid;
 };
 
 struct nouveau_pm_voltage {
 	bool supported;
+	u8 version;
 	u8 vid_mask;
 
 	struct nouveau_pm_voltage_level *level;
@@ -428,17 +429,48 @@ struct nouveau_pm_voltage {
 
 struct nouveau_pm_memtiming {
 	int id;
-	u32 reg_100220;
-	u32 reg_100224;
-	u32 reg_100228;
-	u32 reg_10022c;
-	u32 reg_100230;
-	u32 reg_100234;
-	u32 reg_100238;
-	u32 reg_10023c;
-	u32 reg_100240;
+	u32 reg_0; /* 0x10f290 on Fermi, 0x100220 for older */
+	u32 reg_1;
+	u32 reg_2;
+	u32 reg_3;
+	u32 reg_4;
+	u32 reg_5;
+	u32 reg_6;
+	u32 reg_7;
+	u32 reg_8;
+	/* To be written to 0x1002c0 */
+	u8 CL;
+	u8 WR;
 };
 
+struct nouveau_pm_tbl_header{
+	u8 version;
+	u8 header_len;
+	u8 entry_cnt;
+	u8 entry_len;
+};
+
+struct nouveau_pm_tbl_entry{
+	u8 tWR;
+	u8 tUNK_1;
+	u8 tCL;
+	u8 tRP;		/* Byte 3 */
+	u8 empty_4;
+	u8 tRAS;	/* Byte 5 */
+	u8 empty_6;
+	u8 tRFC;	/* Byte 7 */
+	u8 empty_8;
+	u8 tRC;		/* Byte 9 */
+	u8 tUNK_10, tUNK_11, tUNK_12, tUNK_13, tUNK_14;
+	u8 empty_15,empty_16,empty_17;
+	u8 tUNK_18, tUNK_19, tUNK_20, tUNK_21;
+};
+
+/* nouveau_mem.c */
+void nv30_mem_timing_entry(struct drm_device *dev, struct nouveau_pm_tbl_header *hdr,
+							struct nouveau_pm_tbl_entry *e, uint8_t magic_number,
+							struct nouveau_pm_memtiming *timing);
+
 #define NOUVEAU_PM_MAX_LEVEL 8
 struct nouveau_pm_level {
 	struct device_attribute dev_attr;
@@ -448,11 +480,19 @@ struct nouveau_pm_level {
 	u32 core;
 	u32 memory;
 	u32 shader;
-	u32 unk05;
-	u32 unk0a;
-
-	u8 voltage;
-	u8 fanspeed;
+	u32 rop;
+	u32 copy;
+	u32 daemon;
+	u32 vdec;
+	u32 unk05;	/* nv50:nva3, roughly.. */
+	u32 unka0;	/* nva3:nvc0 */
+	u32 hub01;	/* nvc0- */
+	u32 hub06;	/* nvc0- */
+	u32 hub07;	/* nvc0- */
+
+	u32 volt_min; /* microvolts */
+	u32 volt_max;
+	u8  fanspeed;
 
 	u16 memscript;
 	struct nouveau_pm_memtiming *timing;
@@ -496,6 +536,11 @@ struct nouveau_pm_engine {
 	void *(*clock_pre)(struct drm_device *, struct nouveau_pm_level *,
 			   u32 id, int khz);
 	void (*clock_set)(struct drm_device *, void *);
+
+	int  (*clocks_get)(struct drm_device *, struct nouveau_pm_level *);
+	void *(*clocks_pre)(struct drm_device *, struct nouveau_pm_level *);
+	void (*clocks_set)(struct drm_device *, void *);
+
 	int (*voltage_get)(struct drm_device *);
 	int (*voltage_set)(struct drm_device *, int voltage);
 	int (*fanspeed_get)(struct drm_device *);
@@ -504,7 +549,7 @@ struct nouveau_pm_engine {
 };
 
 struct nouveau_vram_engine {
-	struct nouveau_mm *mm;
+	struct nouveau_mm mm;
 
 	int  (*init)(struct drm_device *);
 	void (*takedown)(struct drm_device *dev);
@@ -623,6 +668,7 @@ enum nouveau_card_type {
 	NV_40      = 0x40,
 	NV_50      = 0x50,
 	NV_C0      = 0xc0,
+	NV_D0      = 0xd0
 };
 
 struct drm_nouveau_private {
@@ -633,8 +679,8 @@ struct drm_nouveau_private {
 	enum nouveau_card_type card_type;
 	/* exact chipset, derived from NV_PMC_BOOT_0 */
 	int chipset;
-	int stepping;
 	int flags;
+	u32 crystal;
 
 	void __iomem *mmio;
 
@@ -721,7 +767,6 @@ struct drm_nouveau_private {
 	uint64_t vram_size;
 	uint64_t vram_sys_base;
 
-	uint64_t fb_phys;
 	uint64_t fb_available_size;
 	uint64_t fb_mappable_pages;
 	uint64_t fb_aper_free;
@@ -784,6 +829,7 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo)
 }
 
 /* nouveau_drv.c */
+extern int nouveau_modeset;
 extern int nouveau_agpmode;
 extern int nouveau_duallink;
 extern int nouveau_uscript_lvds;
@@ -824,6 +870,8 @@ extern bool nouveau_wait_eq(struct drm_device *, uint64_t timeout,
 			    uint32_t reg, uint32_t mask, uint32_t val);
 extern bool nouveau_wait_ne(struct drm_device *, uint64_t timeout,
 			    uint32_t reg, uint32_t mask, uint32_t val);
+extern bool nouveau_wait_cb(struct drm_device *, u64 timeout,
+			    bool (*cond)(void *), void *);
 extern bool nouveau_wait_for_idle(struct drm_device *);
 extern int  nouveau_card_init(struct drm_device *);
 
@@ -1006,15 +1054,15 @@ static inline int nouveau_acpi_edid(struct drm_device *dev, struct drm_connector
 
 /* nouveau_backlight.c */
 #ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT
-extern int nouveau_backlight_init(struct drm_connector *);
-extern void nouveau_backlight_exit(struct drm_connector *);
+extern int nouveau_backlight_init(struct drm_device *);
+extern void nouveau_backlight_exit(struct drm_device *);
 #else
-static inline int nouveau_backlight_init(struct drm_connector *dev)
+static inline int nouveau_backlight_init(struct drm_device *dev)
 {
 	return 0;
 }
 
-static inline void nouveau_backlight_exit(struct drm_connector *dev) { }
+static inline void nouveau_backlight_exit(struct drm_device *dev) { }
 #endif
 
 /* nouveau_bios.c */
@@ -1022,7 +1070,8 @@ extern int nouveau_bios_init(struct drm_device *);
 extern void nouveau_bios_takedown(struct drm_device *dev);
 extern int nouveau_run_vbios_init(struct drm_device *);
 extern void nouveau_bios_run_init_table(struct drm_device *, uint16_t table,
-					struct dcb_entry *);
+					struct dcb_entry *, int crtc);
+extern void nouveau_bios_init_exec(struct drm_device *, uint16_t table);
 extern struct dcb_gpio_entry *nouveau_bios_gpio_entry(struct drm_device *,
 						      enum dcb_gpio_tag);
 extern struct dcb_connector_table_entry *
@@ -1030,11 +1079,8 @@ nouveau_bios_connector_entry(struct drm_device *, int index);
 extern u32 get_pll_register(struct drm_device *, enum pll_types);
 extern int get_pll_limits(struct drm_device *, uint32_t limit_match,
 			  struct pll_lims *);
-extern int nouveau_bios_run_display_table(struct drm_device *,
-					  struct dcb_entry *,
-					  uint32_t script, int pxclk);
-extern void *nouveau_bios_dp_table(struct drm_device *, struct dcb_entry *,
-				   int *length);
+extern int nouveau_bios_run_display_table(struct drm_device *, u16 id, int clk,
+					  struct dcb_entry *, int crtc);
 extern bool nouveau_bios_fp_mode(struct drm_device *, struct drm_display_mode *);
 extern uint8_t *nouveau_bios_embedded_edid(struct drm_device *);
 extern int nouveau_bios_parse_lvds_table(struct drm_device *, int pxclk,
@@ -1043,6 +1089,7 @@ extern int run_tmds_table(struct drm_device *, struct dcb_entry *,
 			  int head, int pxclk);
 extern int call_lvds_script(struct drm_device *, struct dcb_entry *, int head,
 			    enum LVDS_script, int pxclk);
+bool bios_encoder_match(struct dcb_entry *, u32 hash);
 
 /* nouveau_ttm.c */
 int nouveau_ttm_global_init(struct drm_nouveau_private *);
@@ -1053,7 +1100,9 @@ int nouveau_ttm_mmap(struct file *, struct vm_area_struct *);
 int nouveau_dp_auxch(struct nouveau_i2c_chan *auxch, int cmd, int addr,
 		     uint8_t *data, int data_nr);
 bool nouveau_dp_detect(struct drm_encoder *);
-bool nouveau_dp_link_train(struct drm_encoder *);
+bool nouveau_dp_link_train(struct drm_encoder *, u32 datarate);
+void nouveau_dp_tu_update(struct drm_device *, int, int, u32, u32);
+u8 *nouveau_dp_bios_data(struct drm_device *, struct dcb_entry *, u8 **);
 
 /* nv04_fb.c */
 extern int  nv04_fb_init(struct drm_device *);
@@ -1179,8 +1228,8 @@ extern int  nva3_copy_create(struct drm_device *dev);
 /* nvc0_copy.c */
 extern int  nvc0_copy_create(struct drm_device *dev, int engine);
 
-/* nv40_mpeg.c */
-extern int  nv40_mpeg_create(struct drm_device *dev);
+/* nv31_mpeg.c */
+extern int  nv31_mpeg_create(struct drm_device *dev);
 
 /* nv50_mpeg.c */
 extern int  nv50_mpeg_create(struct drm_device *dev);
@@ -1265,6 +1314,11 @@ extern int nv04_display_create(struct drm_device *);
 extern int nv04_display_init(struct drm_device *);
 extern void nv04_display_destroy(struct drm_device *);
 
+/* nvd0_display.c */
+extern int nvd0_display_create(struct drm_device *);
+extern int nvd0_display_init(struct drm_device *);
+extern void nvd0_display_destroy(struct drm_device *);
+
 /* nv04_crtc.c */
 extern int nv04_crtc_create(struct drm_device *, int index);
 
@@ -1374,6 +1428,8 @@ int nv50_gpio_init(struct drm_device *dev);
 void nv50_gpio_fini(struct drm_device *dev);
 int nv50_gpio_get(struct drm_device *dev, enum dcb_gpio_tag tag);
 int nv50_gpio_set(struct drm_device *dev, enum dcb_gpio_tag tag, int state);
+int nvd0_gpio_get(struct drm_device *dev, enum dcb_gpio_tag tag);
+int nvd0_gpio_set(struct drm_device *dev, enum dcb_gpio_tag tag, int state);
 int  nv50_gpio_irq_register(struct drm_device *, enum dcb_gpio_tag,
 			    void (*)(void *, int), void *);
 void nv50_gpio_irq_unregister(struct drm_device *, enum dcb_gpio_tag,
@@ -1448,6 +1504,8 @@ static inline void nv_wr08(struct drm_device *dev, unsigned reg, u8 val)
 	nouveau_wait_eq(dev, 2000000000ULL, (reg), (mask), (val))
 #define nv_wait_ne(dev, reg, mask, val) \
 	nouveau_wait_ne(dev, 2000000000ULL, (reg), (mask), (val))
+#define nv_wait_cb(dev, func, data) \
+	nouveau_wait_cb(dev, 2000000000ULL, (func), (data))
 
 /* PRAMIN access */
 static inline u32 nv_ri32(struct drm_device *dev, unsigned offset)
@@ -1514,6 +1572,7 @@ enum {
 	NOUVEAU_REG_DEBUG_RMVIO          = 0x80,
 	NOUVEAU_REG_DEBUG_VGAATTR        = 0x100,
 	NOUVEAU_REG_DEBUG_EVO            = 0x200,
+	NOUVEAU_REG_DEBUG_AUXCH          = 0x400
 };
 
 #define NV_REG_DEBUG(type, dev, fmt, arg...) do { \
diff --git a/drivers/gpu/drm/nouveau/nouveau_encoder.h b/drivers/gpu/drm/nouveau/nouveau_encoder.h
index ae69b61d93db..e5d6e3faff3d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_encoder.h
+++ b/drivers/gpu/drm/nouveau/nouveau_encoder.h
@@ -49,17 +49,17 @@ struct nouveau_encoder {
 
 	union {
 		struct {
-			int mc_unknown;
-			uint32_t unk0;
-			uint32_t unk1;
-			int dpcd_version;
+			u8  dpcd[8];
 			int link_nr;
 			int link_bw;
-			bool enhanced_frame;
+			u32 datarate;
 		} dp;
 	};
 };
 
+struct nouveau_encoder *
+find_encoder(struct drm_connector *connector, int type);
+
 static inline struct nouveau_encoder *nouveau_encoder(struct drm_encoder *enc)
 {
 	struct drm_encoder_slave *slave = to_encoder_slave(enc);
@@ -83,21 +83,4 @@ nouveau_encoder_connector_get(struct nouveau_encoder *encoder);
 int nv50_sor_create(struct drm_connector *, struct dcb_entry *);
 int nv50_dac_create(struct drm_connector *, struct dcb_entry *);
 
-struct bit_displayport_encoder_table {
-	uint32_t match;
-	uint8_t  record_nr;
-	uint8_t  unknown;
-	uint16_t script0;
-	uint16_t script1;
-	uint16_t unknown_table;
-} __attribute__ ((packed));
-
-struct bit_displayport_encoder_table_entry {
-	uint8_t vs_level;
-	uint8_t pre_level;
-	uint8_t reg0;
-	uint8_t reg1;
-	uint8_t reg2;
-} __attribute__ ((packed));
-
 #endif /* __NOUVEAU_ENCODER_H__ */
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index c919cfc8f2fd..81116cfea275 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -519,7 +519,7 @@ nouveau_fence_channel_init(struct nouveau_channel *chan)
 	if (USE_SEMA(dev) && dev_priv->chipset < 0x84) {
 		struct ttm_mem_reg *mem = &dev_priv->fence.bo->bo.mem;
 
-		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
+		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_FROM_MEMORY,
 					     mem->start << PAGE_SHIFT,
 					     mem->size, NV_MEM_ACCESS_RW,
 					     NV_MEM_TARGET_VRAM, &obj);
diff --git a/drivers/gpu/drm/nouveau/nouveau_i2c.c b/drivers/gpu/drm/nouveau/nouveau_i2c.c
index cb389d014326..f6a27fabcfe0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_i2c.c
+++ b/drivers/gpu/drm/nouveau/nouveau_i2c.c
@@ -107,6 +107,13 @@ nv4e_i2c_getsda(void *data)
 	return !!((nv_rd32(dev, i2c->rd) >> 16) & 8);
 }
 
+static const uint32_t nv50_i2c_port[] = {
+	0x00e138, 0x00e150, 0x00e168, 0x00e180,
+	0x00e254, 0x00e274, 0x00e764, 0x00e780,
+	0x00e79c, 0x00e7b8
+};
+#define NV50_I2C_PORTS ARRAY_SIZE(nv50_i2c_port)
+
 static int
 nv50_i2c_getscl(void *data)
 {
@@ -130,28 +137,32 @@ static void
 nv50_i2c_setscl(void *data, int state)
 {
 	struct nouveau_i2c_chan *i2c = data;
-	struct drm_device *dev = i2c->dev;
 
-	nv_wr32(dev, i2c->wr, 4 | (i2c->data ? 2 : 0) | (state ? 1 : 0));
+	nv_wr32(i2c->dev, i2c->wr, 4 | (i2c->data ? 2 : 0) | (state ? 1 : 0));
 }
 
 static void
 nv50_i2c_setsda(void *data, int state)
 {
 	struct nouveau_i2c_chan *i2c = data;
-	struct drm_device *dev = i2c->dev;
 
-	nv_wr32(dev, i2c->wr,
-			(nv_rd32(dev, i2c->rd) & 1) | 4 | (state ? 2 : 0));
+	nv_mask(i2c->dev, i2c->wr, 0x00000006, 4 | (state ? 2 : 0));
 	i2c->data = state;
 }
 
-static const uint32_t nv50_i2c_port[] = {
-	0x00e138, 0x00e150, 0x00e168, 0x00e180,
-	0x00e254, 0x00e274, 0x00e764, 0x00e780,
-	0x00e79c, 0x00e7b8
-};
-#define NV50_I2C_PORTS ARRAY_SIZE(nv50_i2c_port)
+static int
+nvd0_i2c_getscl(void *data)
+{
+	struct nouveau_i2c_chan *i2c = data;
+	return !!(nv_rd32(i2c->dev, i2c->rd) & 0x10);
+}
+
+static int
+nvd0_i2c_getsda(void *data)
+{
+	struct nouveau_i2c_chan *i2c = data;
+	return !!(nv_rd32(i2c->dev, i2c->rd) & 0x20);
+}
 
 int
 nouveau_i2c_init(struct drm_device *dev, struct dcb_i2c_entry *entry, int index)
@@ -163,7 +174,8 @@ nouveau_i2c_init(struct drm_device *dev, struct dcb_i2c_entry *entry, int index)
 	if (entry->chan)
 		return -EEXIST;
 
-	if (dev_priv->card_type >= NV_50 && entry->read >= NV50_I2C_PORTS) {
+	if (dev_priv->card_type >= NV_50 &&
+	    dev_priv->card_type <= NV_C0 && entry->read >= NV50_I2C_PORTS) {
 		NV_ERROR(dev, "unknown i2c port %d\n", entry->read);
 		return -EINVAL;
 	}
@@ -192,10 +204,17 @@ nouveau_i2c_init(struct drm_device *dev, struct dcb_i2c_entry *entry, int index)
 	case 5:
 		i2c->bit.setsda = nv50_i2c_setsda;
 		i2c->bit.setscl = nv50_i2c_setscl;
-		i2c->bit.getsda = nv50_i2c_getsda;
-		i2c->bit.getscl = nv50_i2c_getscl;
-		i2c->rd = nv50_i2c_port[entry->read];
-		i2c->wr = i2c->rd;
+		if (dev_priv->card_type < NV_D0) {
+			i2c->bit.getsda = nv50_i2c_getsda;
+			i2c->bit.getscl = nv50_i2c_getscl;
+			i2c->rd = nv50_i2c_port[entry->read];
+			i2c->wr = i2c->rd;
+		} else {
+			i2c->bit.getsda = nvd0_i2c_getsda;
+			i2c->bit.getscl = nvd0_i2c_getscl;
+			i2c->rd = 0x00d014 + (entry->read * 0x20);
+			i2c->wr = i2c->rd;
+		}
 		break;
 	case 6:
 		i2c->rd = entry->read;
@@ -267,7 +286,10 @@ nouveau_i2c_find(struct drm_device *dev, int index)
 			val  = 0xe001;
 		}
 
-		nv_wr32(dev, reg, (nv_rd32(dev, reg) & ~0xf003) | val);
+		/* nfi, but neither auxch or i2c work if it's 1 */
+		nv_mask(dev, reg + 0x0c, 0x00000001, 0x00000000);
+		/* nfi, but switches auxch vs normal i2c */
+		nv_mask(dev, reg + 0x00, 0x0000f003, val);
 	}
 
 	if (!i2c->chan && nouveau_i2c_init(dev, i2c, index))
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index f9ae2fc3d6f1..36bec4807701 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -408,8 +408,6 @@ nouveau_mem_vram_init(struct drm_device *dev)
 	if (ret)
 		return ret;
 
-	dev_priv->fb_phys = pci_resource_start(dev->pdev, 1);
-
 	ret = nouveau_ttm_global_init(dev_priv);
 	if (ret)
 		return ret;
@@ -504,35 +502,146 @@ nouveau_mem_gart_init(struct drm_device *dev)
 	return 0;
 }
 
+/* XXX: For now a dummy. More samples required, possibly even a card
+ * Called from nouveau_perf.c */
+void nv30_mem_timing_entry(struct drm_device *dev, struct nouveau_pm_tbl_header *hdr,
+							struct nouveau_pm_tbl_entry *e, uint8_t magic_number,
+							struct nouveau_pm_memtiming *timing) {
+
+	NV_DEBUG(dev,"Timing entry format unknown, please contact nouveau developers");
+}
+
+void nv40_mem_timing_entry(struct drm_device *dev, struct nouveau_pm_tbl_header *hdr,
+							struct nouveau_pm_tbl_entry *e, uint8_t magic_number,
+							struct nouveau_pm_memtiming *timing) {
+
+	timing->reg_0 = (e->tRC << 24 | e->tRFC << 16 | e->tRAS << 8 | e->tRP);
+
+	/* XXX: I don't trust the -1's and +1's... they must come
+	 *      from somewhere! */
+	timing->reg_1 = (e->tWR + 2 + magic_number) << 24 |
+				  1 << 16 |
+				  (e->tUNK_1 + 2 + magic_number) << 8 |
+				  (e->tCL + 2 - magic_number);
+	timing->reg_2 = (magic_number << 24 | e->tUNK_12 << 16 | e->tUNK_11 << 8 | e->tUNK_10);
+	timing->reg_2 |= 0x20200000;
+
+	NV_DEBUG(dev, "Entry %d: 220: %08x %08x %08x\n", timing->id,
+		 timing->reg_0, timing->reg_1,timing->reg_2);
+}
+
+void nv50_mem_timing_entry(struct drm_device *dev, struct bit_entry *P, struct nouveau_pm_tbl_header *hdr,
+							struct nouveau_pm_tbl_entry *e, uint8_t magic_number,struct nouveau_pm_memtiming *timing) {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+	uint8_t unk18 = 1,
+		unk19 = 1,
+		unk20 = 0,
+		unk21 = 0;
+
+	switch (min(hdr->entry_len, (u8) 22)) {
+	case 22:
+		unk21 = e->tUNK_21;
+	case 21:
+		unk20 = e->tUNK_20;
+	case 20:
+		unk19 = e->tUNK_19;
+	case 19:
+		unk18 = e->tUNK_18;
+		break;
+	}
+
+	timing->reg_0 = (e->tRC << 24 | e->tRFC << 16 | e->tRAS << 8 | e->tRP);
+
+	/* XXX: I don't trust the -1's and +1's... they must come
+	 *      from somewhere! */
+	timing->reg_1 = (e->tWR + unk19 + 1 + magic_number) << 24 |
+				  max(unk18, (u8) 1) << 16 |
+				  (e->tUNK_1 + unk19 + 1 + magic_number) << 8;
+	if (dev_priv->chipset == 0xa8) {
+		timing->reg_1 |= (e->tCL - 1);
+	} else {
+		timing->reg_1 |= (e->tCL + 2 - magic_number);
+	}
+	timing->reg_2 = (e->tUNK_12 << 16 | e->tUNK_11 << 8 | e->tUNK_10);
+
+	timing->reg_5 = (e->tRAS << 24 | e->tRC);
+	timing->reg_5 += max(e->tUNK_10, e->tUNK_11) << 16;
+
+	if (P->version == 1) {
+		timing->reg_2 |= magic_number << 24;
+		timing->reg_3 = (0x14 + e->tCL) << 24 |
+						0x16 << 16 |
+						(e->tCL - 1) << 8 |
+						(e->tCL - 1);
+		timing->reg_4 = (nv_rd32(dev,0x10022c) & 0xffff0000) | e->tUNK_13 << 8  | e->tUNK_13;
+		timing->reg_5 |= (e->tCL + 2) << 8;
+		timing->reg_7 = 0x4000202 | (e->tCL - 1) << 16;
+	} else {
+		timing->reg_2 |= (unk19 - 1) << 24;
+		/* XXX: reg_10022c for recentish cards pretty much unknown*/
+		timing->reg_3 = e->tCL - 1;
+		timing->reg_4 = (unk20 << 24 | unk21 << 16 |
+							e->tUNK_13 << 8  | e->tUNK_13);
+		/* XXX: +6? */
+		timing->reg_5 |= (unk19 + 6) << 8;
+
+		/* XXX: reg_10023c currently unknown
+		 * 10023c seen as 06xxxxxx, 0bxxxxxx or 0fxxxxxx */
+		timing->reg_7 = 0x202;
+	}
+
+	NV_DEBUG(dev, "Entry %d: 220: %08x %08x %08x %08x\n", timing->id,
+		 timing->reg_0, timing->reg_1,
+		 timing->reg_2, timing->reg_3);
+	NV_DEBUG(dev, "         230: %08x %08x %08x %08x\n",
+		 timing->reg_4, timing->reg_5,
+		 timing->reg_6, timing->reg_7);
+	NV_DEBUG(dev, "         240: %08x\n", timing->reg_8);
+}
+
+void nvc0_mem_timing_entry(struct drm_device *dev, struct nouveau_pm_tbl_header *hdr,
+							struct nouveau_pm_tbl_entry *e, struct nouveau_pm_memtiming *timing) {
+	timing->reg_0 = (e->tRC << 24 | (e->tRFC & 0x7f) << 17 | e->tRAS << 8 | e->tRP);
+	timing->reg_1 = (nv_rd32(dev,0x10f294) & 0xff000000) | (e->tUNK_11&0x0f) << 20 | (e->tUNK_19 << 7) | (e->tCL & 0x0f);
+	timing->reg_2 = (nv_rd32(dev,0x10f298) & 0xff0000ff) | e->tWR << 16 | e->tUNK_1 << 8;
+	timing->reg_3 = e->tUNK_20 << 9 | e->tUNK_13;
+	timing->reg_4 = (nv_rd32(dev,0x10f2a0) & 0xfff000ff) | e->tUNK_12 << 15;
+	NV_DEBUG(dev, "Entry %d: 290: %08x %08x %08x %08x\n", timing->id,
+		 timing->reg_0, timing->reg_1,
+		 timing->reg_2, timing->reg_3);
+	NV_DEBUG(dev, "         2a0: %08x %08x %08x %08x\n",
+		 timing->reg_4, timing->reg_5,
+		 timing->reg_6, timing->reg_7);
+}
+
+/**
+ * Processes the Memory Timing BIOS table, stores generated
+ * register values
+ * @pre init scripts were run, memtiming regs are initialized
+ */
 void
 nouveau_mem_timing_init(struct drm_device *dev)
 {
-	/* cards < NVC0 only */
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
 	struct nouveau_pm_memtimings *memtimings = &pm->memtimings;
 	struct nvbios *bios = &dev_priv->vbios;
 	struct bit_entry P;
-	u8 tUNK_0, tUNK_1, tUNK_2;
-	u8 tRP;		/* Byte 3 */
-	u8 tRAS;	/* Byte 5 */
-	u8 tRFC;	/* Byte 7 */
-	u8 tRC;		/* Byte 9 */
-	u8 tUNK_10, tUNK_11, tUNK_12, tUNK_13, tUNK_14;
-	u8 tUNK_18, tUNK_19, tUNK_20, tUNK_21;
-	u8 magic_number = 0; /* Yeah... sorry*/
-	u8 *mem = NULL, *entry;
-	int i, recordlen, entries;
+	struct nouveau_pm_tbl_header *hdr = NULL;
+	uint8_t magic_number;
+	u8 *entry;
+	int i;
 
 	if (bios->type == NVBIOS_BIT) {
 		if (bit_table(dev, 'P', &P))
 			return;
 
 		if (P.version == 1)
-			mem = ROMPTR(bios, P.data[4]);
+			hdr = (struct nouveau_pm_tbl_header *) ROMPTR(bios, P.data[4]);
 		else
 		if (P.version == 2)
-			mem = ROMPTR(bios, P.data[8]);
+			hdr = (struct nouveau_pm_tbl_header *) ROMPTR(bios, P.data[8]);
 		else {
 			NV_WARN(dev, "unknown mem for BIT P %d\n", P.version);
 		}
@@ -541,150 +650,56 @@ nouveau_mem_timing_init(struct drm_device *dev)
 		return;
 	}
 
-	if (!mem) {
+	if (!hdr) {
 		NV_DEBUG(dev, "memory timing table pointer invalid\n");
 		return;
 	}
 
-	if (mem[0] != 0x10) {
-		NV_WARN(dev, "memory timing table 0x%02x unknown\n", mem[0]);
+	if (hdr->version != 0x10) {
+		NV_WARN(dev, "memory timing table 0x%02x unknown\n", hdr->version);
 		return;
 	}
 
 	/* validate record length */
-	entries   = mem[2];
-	recordlen = mem[3];
-	if (recordlen < 15) {
-		NV_ERROR(dev, "mem timing table length unknown: %d\n", mem[3]);
+	if (hdr->entry_len < 15) {
+		NV_ERROR(dev, "mem timing table length unknown: %d\n", hdr->entry_len);
 		return;
 	}
 
 	/* parse vbios entries into common format */
 	memtimings->timing =
-		kcalloc(entries, sizeof(*memtimings->timing), GFP_KERNEL);
+		kcalloc(hdr->entry_cnt, sizeof(*memtimings->timing), GFP_KERNEL);
 	if (!memtimings->timing)
 		return;
 
 	/* Get "some number" from the timing reg for NV_40 and NV_50
-	 * Used in calculations later */
-	if (dev_priv->card_type >= NV_40 && dev_priv->chipset < 0x98) {
+	 * Used in calculations later... source unknown */
+	magic_number = 0;
+	if (P.version == 1) {
 		magic_number = (nv_rd32(dev, 0x100228) & 0x0f000000) >> 24;
 	}
 
-	entry = mem + mem[1];
-	for (i = 0; i < entries; i++, entry += recordlen) {
+	entry = (u8*) hdr + hdr->header_len;
+	for (i = 0; i < hdr->entry_cnt; i++, entry += hdr->entry_len) {
 		struct nouveau_pm_memtiming *timing = &pm->memtimings.timing[i];
 		if (entry[0] == 0)
 			continue;
 
-		tUNK_18 = 1;
-		tUNK_19 = 1;
-		tUNK_20 = 0;
-		tUNK_21 = 0;
-		switch (min(recordlen, 22)) {
-		case 22:
-			tUNK_21 = entry[21];
-		case 21:
-			tUNK_20 = entry[20];
-		case 20:
-			tUNK_19 = entry[19];
-		case 19:
-			tUNK_18 = entry[18];
-		default:
-			tUNK_0  = entry[0];
-			tUNK_1  = entry[1];
-			tUNK_2  = entry[2];
-			tRP     = entry[3];
-			tRAS    = entry[5];
-			tRFC    = entry[7];
-			tRC     = entry[9];
-			tUNK_10 = entry[10];
-			tUNK_11 = entry[11];
-			tUNK_12 = entry[12];
-			tUNK_13 = entry[13];
-			tUNK_14 = entry[14];
-			break;
-		}
-
-		timing->reg_100220 = (tRC << 24 | tRFC << 16 | tRAS << 8 | tRP);
-
-		/* XXX: I don't trust the -1's and +1's... they must come
-		 *      from somewhere! */
-		timing->reg_100224 = (tUNK_0 + tUNK_19 + 1 + magic_number) << 24 |
-				      max(tUNK_18, (u8) 1) << 16 |
-				      (tUNK_1 + tUNK_19 + 1 + magic_number) << 8;
-		if (dev_priv->chipset == 0xa8) {
-			timing->reg_100224 |= (tUNK_2 - 1);
-		} else {
-			timing->reg_100224 |= (tUNK_2 + 2 - magic_number);
-		}
-
-		timing->reg_100228 = (tUNK_12 << 16 | tUNK_11 << 8 | tUNK_10);
-		if (dev_priv->chipset >= 0xa3 && dev_priv->chipset < 0xaa)
-			timing->reg_100228 |= (tUNK_19 - 1) << 24;
-		else
-			timing->reg_100228 |= magic_number << 24;
-
-		if (dev_priv->card_type == NV_40) {
-			/* NV40: don't know what the rest of the regs are..
-			 * And don't need to know either */
-			timing->reg_100228 |= 0x20200000;
-		} else if (dev_priv->card_type >= NV_50) {
-			if (dev_priv->chipset < 0x98 ||
-			    (dev_priv->chipset == 0x98 &&
-			     dev_priv->stepping <= 0xa1)) {
-				timing->reg_10022c = (0x14 + tUNK_2) << 24 |
-						     0x16 << 16 |
-						     (tUNK_2 - 1) << 8 |
-						     (tUNK_2 - 1);
-			} else {
-				/* XXX: reg_10022c for recentish cards */
-				timing->reg_10022c = tUNK_2 - 1;
-			}
-
-			timing->reg_100230 = (tUNK_20 << 24 | tUNK_21 << 16 |
-						  tUNK_13 << 8  | tUNK_13);
-
-			timing->reg_100234 = (tRAS << 24 | tRC);
-			timing->reg_100234 += max(tUNK_10, tUNK_11) << 16;
-
-			if (dev_priv->chipset < 0x98 ||
-			    (dev_priv->chipset == 0x98 &&
-			     dev_priv->stepping <= 0xa1)) {
-				timing->reg_100234 |= (tUNK_2 + 2) << 8;
-			} else {
-				/* XXX: +6? */
-				timing->reg_100234 |= (tUNK_19 + 6) << 8;
-			}
-
-			/* XXX; reg_100238
-			 * reg_100238: 0x00?????? */
-			timing->reg_10023c = 0x202;
-			if (dev_priv->chipset < 0x98 ||
-			    (dev_priv->chipset == 0x98 &&
-			     dev_priv->stepping <= 0xa1)) {
-				timing->reg_10023c |= 0x4000000 | (tUNK_2 - 1) << 16;
-			} else {
-				/* XXX: reg_10023c
-				 * currently unknown
-				 * 10023c seen as 06xxxxxx, 0bxxxxxx or 0fxxxxxx */
-			}
-
-			/* XXX: reg_100240? */
-		}
 		timing->id = i;
-
-		NV_DEBUG(dev, "Entry %d: 220: %08x %08x %08x %08x\n", i,
-			 timing->reg_100220, timing->reg_100224,
-			 timing->reg_100228, timing->reg_10022c);
-		NV_DEBUG(dev, "         230: %08x %08x %08x %08x\n",
-			 timing->reg_100230, timing->reg_100234,
-			 timing->reg_100238, timing->reg_10023c);
-		NV_DEBUG(dev, "         240: %08x\n", timing->reg_100240);
+		timing->WR = entry[0];
+		timing->CL = entry[2];
+
+		if(dev_priv->card_type <= NV_40) {
+			nv40_mem_timing_entry(dev,hdr,(struct nouveau_pm_tbl_entry*) entry,magic_number,&pm->memtimings.timing[i]);
+		} else if(dev_priv->card_type == NV_50){
+			nv50_mem_timing_entry(dev,&P,hdr,(struct nouveau_pm_tbl_entry*) entry,magic_number,&pm->memtimings.timing[i]);
+		} else if(dev_priv->card_type == NV_C0) {
+			nvc0_mem_timing_entry(dev,hdr,(struct nouveau_pm_tbl_entry*) entry,&pm->memtimings.timing[i]);
+		}
 	}
 
-	memtimings->nr_timing = entries;
-	memtimings->supported = (dev_priv->chipset <= 0x98);
+	memtimings->nr_timing = hdr->entry_cnt;
+	memtimings->supported = P.version == 1;
 }
 
 void
@@ -693,7 +708,10 @@ nouveau_mem_timing_fini(struct drm_device *dev)
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_pm_memtimings *mem = &dev_priv->engine.pm.memtimings;
 
-	kfree(mem->timing);
+	if(mem->timing) {
+		kfree(mem->timing);
+		mem->timing = NULL;
+	}
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/nouveau_mm.c b/drivers/gpu/drm/nouveau/nouveau_mm.c
index 1640dec3b823..b29ffb3d1408 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mm.c
@@ -27,7 +27,7 @@
 #include "nouveau_mm.h"
 
 static inline void
-region_put(struct nouveau_mm *rmm, struct nouveau_mm_node *a)
+region_put(struct nouveau_mm *mm, struct nouveau_mm_node *a)
 {
 	list_del(&a->nl_entry);
 	list_del(&a->fl_entry);
@@ -35,7 +35,7 @@ region_put(struct nouveau_mm *rmm, struct nouveau_mm_node *a)
 }
 
 static struct nouveau_mm_node *
-region_split(struct nouveau_mm *rmm, struct nouveau_mm_node *a, u32 size)
+region_split(struct nouveau_mm *mm, struct nouveau_mm_node *a, u32 size)
 {
 	struct nouveau_mm_node *b;
 
@@ -57,33 +57,33 @@ region_split(struct nouveau_mm *rmm, struct nouveau_mm_node *a, u32 size)
 	return b;
 }
 
-#define node(root, dir) ((root)->nl_entry.dir == &rmm->nodes) ? NULL : \
+#define node(root, dir) ((root)->nl_entry.dir == &mm->nodes) ? NULL : \
 	list_entry((root)->nl_entry.dir, struct nouveau_mm_node, nl_entry)
 
 void
-nouveau_mm_put(struct nouveau_mm *rmm, struct nouveau_mm_node *this)
+nouveau_mm_put(struct nouveau_mm *mm, struct nouveau_mm_node *this)
 {
 	struct nouveau_mm_node *prev = node(this, prev);
 	struct nouveau_mm_node *next = node(this, next);
 
-	list_add(&this->fl_entry, &rmm->free);
+	list_add(&this->fl_entry, &mm->free);
 	this->type = 0;
 
 	if (prev && prev->type == 0) {
 		prev->length += this->length;
-		region_put(rmm, this);
+		region_put(mm, this);
 		this = prev;
 	}
 
 	if (next && next->type == 0) {
 		next->offset  = this->offset;
 		next->length += this->length;
-		region_put(rmm, this);
+		region_put(mm, this);
 	}
 }
 
 int
-nouveau_mm_get(struct nouveau_mm *rmm, int type, u32 size, u32 size_nc,
+nouveau_mm_get(struct nouveau_mm *mm, int type, u32 size, u32 size_nc,
 	       u32 align, struct nouveau_mm_node **pnode)
 {
 	struct nouveau_mm_node *prev, *this, *next;
@@ -92,17 +92,17 @@ nouveau_mm_get(struct nouveau_mm *rmm, int type, u32 size, u32 size_nc,
 	u32 splitoff;
 	u32 s, e;
 
-	list_for_each_entry(this, &rmm->free, fl_entry) {
+	list_for_each_entry(this, &mm->free, fl_entry) {
 		e = this->offset + this->length;
 		s = this->offset;
 
 		prev = node(this, prev);
 		if (prev && prev->type != type)
-			s = roundup(s, rmm->block_size);
+			s = roundup(s, mm->block_size);
 
 		next = node(this, next);
 		if (next && next->type != type)
-			e = rounddown(e, rmm->block_size);
+			e = rounddown(e, mm->block_size);
 
 		s  = (s + align_mask) & ~align_mask;
 		e &= ~align_mask;
@@ -110,10 +110,10 @@ nouveau_mm_get(struct nouveau_mm *rmm, int type, u32 size, u32 size_nc,
 			continue;
 
 		splitoff = s - this->offset;
-		if (splitoff && !region_split(rmm, this, splitoff))
+		if (splitoff && !region_split(mm, this, splitoff))
 			return -ENOMEM;
 
-		this = region_split(rmm, this, min(size, e - s));
+		this = region_split(mm, this, min(size, e - s));
 		if (!this)
 			return -ENOMEM;
 
@@ -127,52 +127,49 @@ nouveau_mm_get(struct nouveau_mm *rmm, int type, u32 size, u32 size_nc,
 }
 
 int
-nouveau_mm_init(struct nouveau_mm **prmm, u32 offset, u32 length, u32 block)
+nouveau_mm_init(struct nouveau_mm *mm, u32 offset, u32 length, u32 block)
 {
-	struct nouveau_mm *rmm;
-	struct nouveau_mm_node *heap;
+	struct nouveau_mm_node *node;
+
+	if (block) {
+		mutex_init(&mm->mutex);
+		INIT_LIST_HEAD(&mm->nodes);
+		INIT_LIST_HEAD(&mm->free);
+		mm->block_size = block;
+		mm->heap_nodes = 0;
+	}
 
-	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
-	if (!heap)
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
 		return -ENOMEM;
-	heap->offset = roundup(offset, block);
-	heap->length = rounddown(offset + length, block) - heap->offset;
+	node->offset = roundup(offset, mm->block_size);
+	node->length = rounddown(offset + length, mm->block_size) - node->offset;
 
-	rmm = kzalloc(sizeof(*rmm), GFP_KERNEL);
-	if (!rmm) {
-		kfree(heap);
-		return -ENOMEM;
-	}
-	rmm->block_size = block;
-	mutex_init(&rmm->mutex);
-	INIT_LIST_HEAD(&rmm->nodes);
-	INIT_LIST_HEAD(&rmm->free);
-	list_add(&heap->nl_entry, &rmm->nodes);
-	list_add(&heap->fl_entry, &rmm->free);
-
-	*prmm = rmm;
+	list_add_tail(&node->nl_entry, &mm->nodes);
+	list_add_tail(&node->fl_entry, &mm->free);
+	mm->heap_nodes++;
 	return 0;
 }
 
 int
-nouveau_mm_fini(struct nouveau_mm **prmm)
+nouveau_mm_fini(struct nouveau_mm *mm)
 {
-	struct nouveau_mm *rmm = *prmm;
 	struct nouveau_mm_node *node, *heap =
-		list_first_entry(&rmm->nodes, struct nouveau_mm_node, nl_entry);
-
-	if (!list_is_singular(&rmm->nodes)) {
-		printk(KERN_ERR "nouveau_mm not empty at destroy time!\n");
-		list_for_each_entry(node, &rmm->nodes, nl_entry) {
-			printk(KERN_ERR "0x%02x: 0x%08x 0x%08x\n",
-			       node->type, node->offset, node->length);
+		list_first_entry(&mm->nodes, struct nouveau_mm_node, nl_entry);
+	int nodes = 0;
+
+	list_for_each_entry(node, &mm->nodes, nl_entry) {
+		if (nodes++ == mm->heap_nodes) {
+			printk(KERN_ERR "nouveau_mm in use at destroy time!\n");
+			list_for_each_entry(node, &mm->nodes, nl_entry) {
+				printk(KERN_ERR "0x%02x: 0x%08x 0x%08x\n",
+				       node->type, node->offset, node->length);
+			}
+			WARN_ON(1);
+			return -EBUSY;
 		}
-		WARN_ON(1);
-		return -EBUSY;
 	}
 
 	kfree(heap);
-	kfree(rmm);
-	*prmm = NULL;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_mm.h b/drivers/gpu/drm/nouveau/nouveau_mm.h
index b9c016d21553..57a600c35c95 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_mm.h
@@ -42,10 +42,11 @@ struct nouveau_mm {
 	struct mutex mutex;
 
 	u32 block_size;
+	int heap_nodes;
 };
 
-int  nouveau_mm_init(struct nouveau_mm **, u32 offset, u32 length, u32 block);
-int  nouveau_mm_fini(struct nouveau_mm **);
+int  nouveau_mm_init(struct nouveau_mm *, u32 offset, u32 length, u32 block);
+int  nouveau_mm_fini(struct nouveau_mm *);
 int  nouveau_mm_pre(struct nouveau_mm *);
 int  nouveau_mm_get(struct nouveau_mm *, int type, u32 size, u32 size_nc,
 		    u32 align, struct nouveau_mm_node **);
diff --git a/drivers/gpu/drm/nouveau/nouveau_object.c b/drivers/gpu/drm/nouveau/nouveau_object.c
index 159b7c437d3f..02222c540aee 100644
--- a/drivers/gpu/drm/nouveau/nouveau_object.c
+++ b/drivers/gpu/drm/nouveau/nouveau_object.c
@@ -693,6 +693,7 @@ nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan)
 static int
 nvc0_gpuobj_channel_init(struct nouveau_channel *chan, struct nouveau_vm *vm)
 {
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
 	struct drm_device *dev = chan->dev;
 	struct nouveau_gpuobj *pgd = NULL;
 	struct nouveau_vm_pgd *vpgd;
@@ -722,6 +723,9 @@ nvc0_gpuobj_channel_init(struct nouveau_channel *chan, struct nouveau_vm *vm)
 	nv_wo32(chan->ramin, 0x020c, 0x000000ff);
 
 	/* map display semaphore buffers into channel's vm */
+	if (dev_priv->card_type >= NV_D0)
+		return 0;
+
 	for (i = 0; i < 2; i++) {
 		struct nv50_display_crtc *dispc = &nv50_display(dev)->crtc[i];
 
@@ -746,7 +750,7 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
 	int ret, i;
 
 	NV_DEBUG(dev, "ch%d vram=0x%08x tt=0x%08x\n", chan->id, vram_h, tt_h);
-	if (dev_priv->card_type == NV_C0)
+	if (dev_priv->card_type >= NV_C0)
 		return nvc0_gpuobj_channel_init(chan, vm);
 
 	/* Allocate a chunk of memory for per-channel object storage */
@@ -793,7 +797,7 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
 			return ret;
 
 		/* dma objects for display sync channel semaphore blocks */
-		for (i = 0; i < 2; i++) {
+		for (i = 0; i < dev->mode_config.num_crtc; i++) {
 			struct nouveau_gpuobj *sem = NULL;
 			struct nv50_display_crtc *dispc =
 				&nv50_display(dev)->crtc[i];
@@ -875,18 +879,18 @@ nouveau_gpuobj_channel_takedown(struct nouveau_channel *chan)
 
 	NV_DEBUG(dev, "ch%d\n", chan->id);
 
-	if (dev_priv->card_type >= NV_50) {
+	if (dev_priv->card_type >= NV_50 && dev_priv->card_type <= NV_C0) {
 		struct nv50_display *disp = nv50_display(dev);
 
-		for (i = 0; i < 2; i++) {
+		for (i = 0; i < dev->mode_config.num_crtc; i++) {
 			struct nv50_display_crtc *dispc = &disp->crtc[i];
 			nouveau_bo_vma_del(dispc->sem.bo, &chan->dispc_vma[i]);
 		}
-
-		nouveau_vm_ref(NULL, &chan->vm, chan->vm_pd);
-		nouveau_gpuobj_ref(NULL, &chan->vm_pd);
 	}
 
+	nouveau_vm_ref(NULL, &chan->vm, chan->vm_pd);
+	nouveau_gpuobj_ref(NULL, &chan->vm_pd);
+
 	if (drm_mm_initialized(&chan->ramin_heap))
 		drm_mm_takedown(&chan->ramin_heap);
 	nouveau_gpuobj_ref(NULL, &chan->ramin);
diff --git a/drivers/gpu/drm/nouveau/nouveau_perf.c b/drivers/gpu/drm/nouveau/nouveau_perf.c
index ef9dec0e6f8b..9f178aa94162 100644
--- a/drivers/gpu/drm/nouveau/nouveau_perf.c
+++ b/drivers/gpu/drm/nouveau/nouveau_perf.c
@@ -127,13 +127,57 @@ nouveau_perf_timing(struct drm_device *dev, struct bit_entry *P,
 
 	entry += ramcfg * recordlen;
 	if (entry[1] >= pm->memtimings.nr_timing) {
-		NV_WARN(dev, "timingset %d does not exist\n", entry[1]);
+		if (entry[1] != 0xff)
+			NV_WARN(dev, "timingset %d does not exist\n", entry[1]);
 		return NULL;
 	}
 
 	return &pm->memtimings.timing[entry[1]];
 }
 
+static void
+nouveau_perf_voltage(struct drm_device *dev, struct bit_entry *P,
+		     struct nouveau_pm_level *perflvl)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvbios *bios = &dev_priv->vbios;
+	u8 *vmap;
+	int id;
+
+	id = perflvl->volt_min;
+	perflvl->volt_min = 0;
+
+	/* boards using voltage table version <0x40 store the voltage
+	 * level directly in the perflvl entry as a multiple of 10mV
+	 */
+	if (dev_priv->engine.pm.voltage.version < 0x40) {
+		perflvl->volt_min = id * 10000;
+		perflvl->volt_max = perflvl->volt_min;
+		return;
+	}
+
+	/* on newer ones, the perflvl stores an index into yet another
+	 * vbios table containing a min/max voltage value for the perflvl
+	 */
+	if (P->version != 2 || P->length < 34) {
+		NV_DEBUG(dev, "where's our volt map table ptr? %d %d\n",
+			 P->version, P->length);
+		return;
+	}
+
+	vmap = ROMPTR(bios, P->data[32]);
+	if (!vmap) {
+		NV_DEBUG(dev, "volt map table pointer invalid\n");
+		return;
+	}
+
+	if (id < vmap[3]) {
+		vmap += vmap[1] + (vmap[2] * id);
+		perflvl->volt_min = ROM32(vmap[0]);
+		perflvl->volt_max = ROM32(vmap[4]);
+	}
+}
+
 void
 nouveau_perf_init(struct drm_device *dev)
 {
@@ -141,6 +185,8 @@ nouveau_perf_init(struct drm_device *dev)
 	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
 	struct nvbios *bios = &dev_priv->vbios;
 	struct bit_entry P;
+	struct nouveau_pm_memtimings *memtimings = &pm->memtimings;
+	struct nouveau_pm_tbl_header mt_hdr;
 	u8 version, headerlen, recordlen, entries;
 	u8 *perf, *entry;
 	int vid, i;
@@ -188,6 +234,22 @@ nouveau_perf_init(struct drm_device *dev)
 	}
 
 	entry = perf + headerlen;
+
+	/* For version 0x15, initialize memtiming table */
+	if(version == 0x15) {
+		memtimings->timing =
+				kcalloc(entries, sizeof(*memtimings->timing), GFP_KERNEL);
+		if(!memtimings) {
+			NV_WARN(dev,"Could not allocate memtiming table\n");
+			return;
+		}
+
+		mt_hdr.entry_cnt = entries;
+		mt_hdr.entry_len = 14;
+		mt_hdr.version = version;
+		mt_hdr.header_len = 4;
+	}
+
 	for (i = 0; i < entries; i++) {
 		struct nouveau_pm_level *perflvl = &pm->perflvl[pm->nr_perflvl];
 
@@ -203,7 +265,8 @@ nouveau_perf_init(struct drm_device *dev)
 		case 0x13:
 		case 0x15:
 			perflvl->fanspeed = entry[55];
-			perflvl->voltage = (recordlen > 56) ? entry[56] : 0;
+			if (recordlen > 56)
+				perflvl->volt_min = entry[56];
 			perflvl->core = ROM32(entry[1]) * 10;
 			perflvl->memory = ROM32(entry[5]) * 20;
 			break;
@@ -211,9 +274,10 @@ nouveau_perf_init(struct drm_device *dev)
 		case 0x23:
 		case 0x24:
 			perflvl->fanspeed = entry[4];
-			perflvl->voltage = entry[5];
-			perflvl->core = ROM16(entry[6]) * 1000;
-
+			perflvl->volt_min = entry[5];
+			perflvl->shader = ROM16(entry[6]) * 1000;
+			perflvl->core = perflvl->shader;
+			perflvl->core += (signed char)entry[8] * 1000;
 			if (dev_priv->chipset == 0x49 ||
 			    dev_priv->chipset == 0x4b)
 				perflvl->memory = ROM16(entry[11]) * 1000;
@@ -223,7 +287,7 @@ nouveau_perf_init(struct drm_device *dev)
 			break;
 		case 0x25:
 			perflvl->fanspeed = entry[4];
-			perflvl->voltage = entry[5];
+			perflvl->volt_min = entry[5];
 			perflvl->core = ROM16(entry[6]) * 1000;
 			perflvl->shader = ROM16(entry[10]) * 1000;
 			perflvl->memory = ROM16(entry[12]) * 1000;
@@ -232,7 +296,7 @@ nouveau_perf_init(struct drm_device *dev)
 			perflvl->memscript = ROM16(entry[2]);
 		case 0x35:
 			perflvl->fanspeed = entry[6];
-			perflvl->voltage = entry[7];
+			perflvl->volt_min = entry[7];
 			perflvl->core = ROM16(entry[8]) * 1000;
 			perflvl->shader = ROM16(entry[10]) * 1000;
 			perflvl->memory = ROM16(entry[12]) * 1000;
@@ -240,30 +304,34 @@ nouveau_perf_init(struct drm_device *dev)
 			perflvl->unk05 = ROM16(entry[16]) * 1000;
 			break;
 		case 0x40:
-#define subent(n) entry[perf[2] + ((n) * perf[3])]
+#define subent(n) (ROM16(entry[perf[2] + ((n) * perf[3])]) & 0xfff) * 1000
 			perflvl->fanspeed = 0; /*XXX*/
-			perflvl->voltage = entry[2];
+			perflvl->volt_min = entry[2];
 			if (dev_priv->card_type == NV_50) {
-				perflvl->core = ROM16(subent(0)) & 0xfff;
-				perflvl->shader = ROM16(subent(1)) & 0xfff;
-				perflvl->memory = ROM16(subent(2)) & 0xfff;
+				perflvl->core   = subent(0);
+				perflvl->shader = subent(1);
+				perflvl->memory = subent(2);
+				perflvl->vdec   = subent(3);
+				perflvl->unka0  = subent(4);
 			} else {
-				perflvl->shader = ROM16(subent(3)) & 0xfff;
+				perflvl->hub06  = subent(0);
+				perflvl->hub01  = subent(1);
+				perflvl->copy   = subent(2);
+				perflvl->shader = subent(3);
+				perflvl->rop    = subent(4);
+				perflvl->memory = subent(5);
+				perflvl->vdec   = subent(6);
+				perflvl->daemon = subent(10);
+				perflvl->hub07  = subent(11);
 				perflvl->core   = perflvl->shader / 2;
-				perflvl->unk0a  = ROM16(subent(4)) & 0xfff;
-				perflvl->memory = ROM16(subent(5)) & 0xfff;
 			}
-
-			perflvl->core *= 1000;
-			perflvl->shader *= 1000;
-			perflvl->memory *= 1000;
-			perflvl->unk0a *= 1000;
 			break;
 		}
 
 		/* make sure vid is valid */
-		if (pm->voltage.supported && perflvl->voltage) {
-			vid = nouveau_volt_vid_lookup(dev, perflvl->voltage);
+		nouveau_perf_voltage(dev, &P, perflvl);
+		if (pm->voltage.supported && perflvl->volt_min) {
+			vid = nouveau_volt_vid_lookup(dev, perflvl->volt_min);
 			if (vid < 0) {
 				NV_DEBUG(dev, "drop perflvl %d, bad vid\n", i);
 				entry += recordlen;
@@ -272,7 +340,11 @@ nouveau_perf_init(struct drm_device *dev)
 		}
 
 		/* get the corresponding memory timings */
-		if (version > 0x15) {
+		if (version == 0x15) {
+			memtimings->timing[i].id = i;
+			nv30_mem_timing_entry(dev,&mt_hdr,(struct nouveau_pm_tbl_entry*) &entry[41],0,&memtimings->timing[i]);
+			perflvl->timing = &memtimings->timing[i];
+		} else if (version > 0x15) {
 			/* last 3 args are for < 0x40, ignored for >= 0x40 */
 			perflvl->timing =
 				nouveau_perf_timing(dev, &P,
diff --git a/drivers/gpu/drm/nouveau/nouveau_pm.c b/drivers/gpu/drm/nouveau/nouveau_pm.c
index da8d994d5e8a..a539fd257921 100644
--- a/drivers/gpu/drm/nouveau/nouveau_pm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_pm.c
@@ -64,18 +64,26 @@ nouveau_pm_perflvl_set(struct drm_device *dev, struct nouveau_pm_level *perflvl)
 	if (perflvl == pm->cur)
 		return 0;
 
-	if (pm->voltage.supported && pm->voltage_set && perflvl->voltage) {
-		ret = pm->voltage_set(dev, perflvl->voltage);
+	if (pm->voltage.supported && pm->voltage_set && perflvl->volt_min) {
+		ret = pm->voltage_set(dev, perflvl->volt_min);
 		if (ret) {
 			NV_ERROR(dev, "voltage_set %d failed: %d\n",
-				 perflvl->voltage, ret);
+				 perflvl->volt_min, ret);
 		}
 	}
 
-	nouveau_pm_clock_set(dev, perflvl, PLL_CORE, perflvl->core);
-	nouveau_pm_clock_set(dev, perflvl, PLL_SHADER, perflvl->shader);
-	nouveau_pm_clock_set(dev, perflvl, PLL_MEMORY, perflvl->memory);
-	nouveau_pm_clock_set(dev, perflvl, PLL_UNK05, perflvl->unk05);
+	if (pm->clocks_pre) {
+		void *state = pm->clocks_pre(dev, perflvl);
+		if (IS_ERR(state))
+			return PTR_ERR(state);
+		pm->clocks_set(dev, state);
+	} else
+	if (pm->clock_set) {
+		nouveau_pm_clock_set(dev, perflvl, PLL_CORE, perflvl->core);
+		nouveau_pm_clock_set(dev, perflvl, PLL_SHADER, perflvl->shader);
+		nouveau_pm_clock_set(dev, perflvl, PLL_MEMORY, perflvl->memory);
+		nouveau_pm_clock_set(dev, perflvl, PLL_UNK05, perflvl->unk05);
+	}
 
 	pm->cur = perflvl;
 	return 0;
@@ -92,9 +100,6 @@ nouveau_pm_profile_set(struct drm_device *dev, const char *profile)
 	if (nouveau_perflvl_wr != 7777)
 		return -EPERM;
 
-	if (!pm->clock_set)
-		return -EINVAL;
-
 	if (!strncmp(profile, "boot", 4))
 		perflvl = &pm->boot;
 	else {
@@ -123,31 +128,37 @@ nouveau_pm_perflvl_get(struct drm_device *dev, struct nouveau_pm_level *perflvl)
 	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
 	int ret;
 
-	if (!pm->clock_get)
-		return -EINVAL;
-
 	memset(perflvl, 0, sizeof(*perflvl));
 
-	ret = pm->clock_get(dev, PLL_CORE);
-	if (ret > 0)
-		perflvl->core = ret;
+	if (pm->clocks_get) {
+		ret = pm->clocks_get(dev, perflvl);
+		if (ret)
+			return ret;
+	} else
+	if (pm->clock_get) {
+		ret = pm->clock_get(dev, PLL_CORE);
+		if (ret > 0)
+			perflvl->core = ret;
 
-	ret = pm->clock_get(dev, PLL_MEMORY);
-	if (ret > 0)
-		perflvl->memory = ret;
+		ret = pm->clock_get(dev, PLL_MEMORY);
+		if (ret > 0)
+			perflvl->memory = ret;
 
-	ret = pm->clock_get(dev, PLL_SHADER);
-	if (ret > 0)
-		perflvl->shader = ret;
+		ret = pm->clock_get(dev, PLL_SHADER);
+		if (ret > 0)
+			perflvl->shader = ret;
 
-	ret = pm->clock_get(dev, PLL_UNK05);
-	if (ret > 0)
-		perflvl->unk05 = ret;
+		ret = pm->clock_get(dev, PLL_UNK05);
+		if (ret > 0)
+			perflvl->unk05 = ret;
+	}
 
 	if (pm->voltage.supported && pm->voltage_get) {
 		ret = pm->voltage_get(dev);
-		if (ret > 0)
-			perflvl->voltage = ret;
+		if (ret > 0) {
+			perflvl->volt_min = ret;
+			perflvl->volt_max = ret;
+		}
 	}
 
 	return 0;
@@ -156,7 +167,7 @@ nouveau_pm_perflvl_get(struct drm_device *dev, struct nouveau_pm_level *perflvl)
 static void
 nouveau_pm_perflvl_info(struct nouveau_pm_level *perflvl, char *ptr, int len)
 {
-	char c[16], s[16], v[16], f[16], t[16];
+	char c[16], s[16], v[32], f[16], t[16], m[16];
 
 	c[0] = '\0';
 	if (perflvl->core)
@@ -166,9 +177,19 @@ nouveau_pm_perflvl_info(struct nouveau_pm_level *perflvl, char *ptr, int len)
 	if (perflvl->shader)
 		snprintf(s, sizeof(s), " shader %dMHz", perflvl->shader / 1000);
 
+	m[0] = '\0';
+	if (perflvl->memory)
+		snprintf(m, sizeof(m), " memory %dMHz", perflvl->memory / 1000);
+
 	v[0] = '\0';
-	if (perflvl->voltage)
-		snprintf(v, sizeof(v), " voltage %dmV", perflvl->voltage * 10);
+	if (perflvl->volt_min && perflvl->volt_min != perflvl->volt_max) {
+		snprintf(v, sizeof(v), " voltage %dmV-%dmV",
+			 perflvl->volt_min / 1000, perflvl->volt_max / 1000);
+	} else
+	if (perflvl->volt_min) {
+		snprintf(v, sizeof(v), " voltage %dmV",
+			 perflvl->volt_min / 1000);
+	}
 
 	f[0] = '\0';
 	if (perflvl->fanspeed)
@@ -178,8 +199,7 @@ nouveau_pm_perflvl_info(struct nouveau_pm_level *perflvl, char *ptr, int len)
 	if (perflvl->timing)
 		snprintf(t, sizeof(t), " timing %d", perflvl->timing->id);
 
-	snprintf(ptr, len, "memory %dMHz%s%s%s%s%s\n", perflvl->memory / 1000,
-		 c, s, v, f, t);
+	snprintf(ptr, len, "%s%s%s%s%s%s\n", c, s, m, t, v, f);
 }
 
 static ssize_t
@@ -190,7 +210,7 @@ nouveau_pm_get_perflvl_info(struct device *d,
 	char *ptr = buf;
 	int len = PAGE_SIZE;
 
-	snprintf(ptr, len, "%d: ", perflvl->id);
+	snprintf(ptr, len, "%d:", perflvl->id);
 	ptr += strlen(buf);
 	len -= strlen(buf);
 
@@ -211,9 +231,9 @@ nouveau_pm_get_perflvl(struct device *d, struct device_attribute *a, char *buf)
 	if (!pm->cur)
 		snprintf(ptr, len, "setting: boot\n");
 	else if (pm->cur == &pm->boot)
-		snprintf(ptr, len, "setting: boot\nc: ");
+		snprintf(ptr, len, "setting: boot\nc:");
 	else
-		snprintf(ptr, len, "setting: static %d\nc: ", pm->cur->id);
+		snprintf(ptr, len, "setting: static %d\nc:", pm->cur->id);
 	ptr += strlen(buf);
 	len -= strlen(buf);
 
@@ -292,7 +312,7 @@ nouveau_sysfs_fini(struct drm_device *dev)
 	}
 }
 
-#ifdef CONFIG_HWMON
+#if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE))
 static ssize_t
 nouveau_hwmon_show_temp(struct device *d, struct device_attribute *a, char *buf)
 {
@@ -409,7 +429,7 @@ static const struct attribute_group hwmon_attrgroup = {
 static int
 nouveau_hwmon_init(struct drm_device *dev)
 {
-#ifdef CONFIG_HWMON
+#if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE))
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
 	struct device *hwmon_dev;
@@ -442,7 +462,7 @@ nouveau_hwmon_init(struct drm_device *dev)
 static void
 nouveau_hwmon_fini(struct drm_device *dev)
 {
-#ifdef CONFIG_HWMON
+#if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE))
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
 
@@ -488,7 +508,7 @@ nouveau_pm_init(struct drm_device *dev)
 	NV_INFO(dev, "%d available performance level(s)\n", pm->nr_perflvl);
 	for (i = 0; i < pm->nr_perflvl; i++) {
 		nouveau_pm_perflvl_info(&pm->perflvl[i], info, sizeof(info));
-		NV_INFO(dev, "%d: %s", pm->perflvl[i].id, info);
+		NV_INFO(dev, "%d:%s", pm->perflvl[i].id, info);
 	}
 
 	/* determine current ("boot") performance level */
@@ -498,7 +518,7 @@ nouveau_pm_init(struct drm_device *dev)
 		pm->cur = &pm->boot;
 
 		nouveau_pm_perflvl_info(&pm->boot, info, sizeof(info));
-		NV_INFO(dev, "c: %s", info);
+		NV_INFO(dev, "c:%s", info);
 	}
 
 	/* switch performance levels now if requested */
diff --git a/drivers/gpu/drm/nouveau/nouveau_pm.h b/drivers/gpu/drm/nouveau/nouveau_pm.h
index 4a9838ddacec..8ac02cdd03a1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_pm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_pm.h
@@ -52,6 +52,11 @@ void *nv04_pm_clock_pre(struct drm_device *, struct nouveau_pm_level *,
 			u32 id, int khz);
 void nv04_pm_clock_set(struct drm_device *, void *);
 
+/* nv40_pm.c */
+int nv40_pm_clocks_get(struct drm_device *, struct nouveau_pm_level *);
+void *nv40_pm_clocks_pre(struct drm_device *, struct nouveau_pm_level *);
+void nv40_pm_clocks_set(struct drm_device *, void *);
+
 /* nv50_pm.c */
 int nv50_pm_clock_get(struct drm_device *, u32 id);
 void *nv50_pm_clock_pre(struct drm_device *, struct nouveau_pm_level *,
@@ -59,10 +64,12 @@ void *nv50_pm_clock_pre(struct drm_device *, struct nouveau_pm_level *,
 void nv50_pm_clock_set(struct drm_device *, void *);
 
 /* nva3_pm.c */
-int nva3_pm_clock_get(struct drm_device *, u32 id);
-void *nva3_pm_clock_pre(struct drm_device *, struct nouveau_pm_level *,
-			u32 id, int khz);
-void nva3_pm_clock_set(struct drm_device *, void *);
+int nva3_pm_clocks_get(struct drm_device *, struct nouveau_pm_level *);
+void *nva3_pm_clocks_pre(struct drm_device *, struct nouveau_pm_level *);
+void nva3_pm_clocks_set(struct drm_device *, void *);
+
+/* nvc0_pm.c */
+int nvc0_pm_clocks_get(struct drm_device *, struct nouveau_pm_level *);
 
 /* nouveau_temp.c */
 void nouveau_temp_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h
index f18cdfc3400f..43a96b99e180 100644
--- a/drivers/gpu/drm/nouveau/nouveau_reg.h
+++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
@@ -826,9 +826,12 @@
 #define NV50_PDISPLAY_SOR_DPMS_STATE_ACTIVE                          0x00030000
 #define NV50_PDISPLAY_SOR_DPMS_STATE_BLANKED                         0x00080000
 #define NV50_PDISPLAY_SOR_DPMS_STATE_WAIT                            0x10000000
-#define NV50_PDISPLAY_SOR_BACKLIGHT                                  0x0061c084
-#define NV50_PDISPLAY_SOR_BACKLIGHT_ENABLE                           0x80000000
-#define NV50_PDISPLAY_SOR_BACKLIGHT_LEVEL                            0x00000fff
+#define NV50_PDISP_SOR_PWM_DIV(i)                     (0x0061c080 + (i) * 0x800)
+#define NV50_PDISP_SOR_PWM_CTL(i)                     (0x0061c084 + (i) * 0x800)
+#define NV50_PDISP_SOR_PWM_CTL_NEW                                   0x80000000
+#define NVA3_PDISP_SOR_PWM_CTL_UNK                                   0x40000000
+#define NV50_PDISP_SOR_PWM_CTL_VAL                                   0x000007ff
+#define NVA3_PDISP_SOR_PWM_CTL_VAL                                   0x00ffffff
 #define NV50_SOR_DP_CTRL(i, l)           (0x0061c10c + (i) * 0x800 + (l) * 0x80)
 #define NV50_SOR_DP_CTRL_ENABLED                                     0x00000001
 #define NV50_SOR_DP_CTRL_ENHANCED_FRAME_ENABLED                      0x00004000
@@ -843,7 +846,7 @@
 #define NV50_SOR_DP_CTRL_TRAINING_PATTERN_2                          0x02000000
 #define NV50_SOR_DP_UNK118(i, l)         (0x0061c118 + (i) * 0x800 + (l) * 0x80)
 #define NV50_SOR_DP_UNK120(i, l)         (0x0061c120 + (i) * 0x800 + (l) * 0x80)
-#define NV50_SOR_DP_UNK128(i, l)         (0x0061c128 + (i) * 0x800 + (l) * 0x80)
+#define NV50_SOR_DP_SCFG(i, l)           (0x0061c128 + (i) * 0x800 + (l) * 0x80)
 #define NV50_SOR_DP_UNK130(i, l)         (0x0061c130 + (i) * 0x800 + (l) * 0x80)
 
 #define NV50_PDISPLAY_USER(i)                        ((i) * 0x1000 + 0x00640000)
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index 2706cb3d871a..b75258a9fe44 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -12,8 +12,8 @@ struct nouveau_sgdma_be {
 	struct drm_device *dev;
 
 	dma_addr_t *pages;
-	bool *ttm_alloced;
 	unsigned nr_pages;
+	bool unmap_pages;
 
 	u64 offset;
 	bool bound;
@@ -26,43 +26,28 @@ nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
 	struct drm_device *dev = nvbe->dev;
+	int i;
 
 	NV_DEBUG(nvbe->dev, "num_pages = %ld\n", num_pages);
 
-	if (nvbe->pages)
-		return -EINVAL;
-
-	nvbe->pages = kmalloc(sizeof(dma_addr_t) * num_pages, GFP_KERNEL);
-	if (!nvbe->pages)
-		return -ENOMEM;
+	nvbe->pages = dma_addrs;
+	nvbe->nr_pages = num_pages;
+	nvbe->unmap_pages = true;
 
-	nvbe->ttm_alloced = kmalloc(sizeof(bool) * num_pages, GFP_KERNEL);
-	if (!nvbe->ttm_alloced) {
-		kfree(nvbe->pages);
-		nvbe->pages = NULL;
-		return -ENOMEM;
+	/* this code path isn't called and is incorrect anyways */
+	if (0) { /* dma_addrs[0] != DMA_ERROR_CODE) { */
+		nvbe->unmap_pages = false;
+		return 0;
 	}
 
-	nvbe->nr_pages = 0;
-	while (num_pages--) {
-		/* this code path isn't called and is incorrect anyways */
-		if (0) { /*dma_addrs[nvbe->nr_pages] != DMA_ERROR_CODE)*/
-			nvbe->pages[nvbe->nr_pages] =
-					dma_addrs[nvbe->nr_pages];
-		 	nvbe->ttm_alloced[nvbe->nr_pages] = true;
-		} else {
-			nvbe->pages[nvbe->nr_pages] =
-				pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0,
-				     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-			if (pci_dma_mapping_error(dev->pdev,
-						  nvbe->pages[nvbe->nr_pages])) {
-				be->func->clear(be);
-				return -EFAULT;
-			}
-			nvbe->ttm_alloced[nvbe->nr_pages] = false;
+	for (i = 0; i < num_pages; i++) {
+		nvbe->pages[i] = pci_map_page(dev->pdev, pages[i], 0,
+					      PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+		if (pci_dma_mapping_error(dev->pdev, nvbe->pages[i])) {
+			nvbe->nr_pages = --i;
+			be->func->clear(be);
+			return -EFAULT;
 		}
-
-		nvbe->nr_pages++;
 	}
 
 	return 0;
@@ -72,25 +57,16 @@ static void
 nouveau_sgdma_clear(struct ttm_backend *be)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
-	struct drm_device *dev;
-
-	if (nvbe && nvbe->pages) {
-		dev = nvbe->dev;
-		NV_DEBUG(dev, "\n");
+	struct drm_device *dev = nvbe->dev;
 
-		if (nvbe->bound)
-			be->func->unbind(be);
+	if (nvbe->bound)
+		be->func->unbind(be);
 
+	if (nvbe->unmap_pages) {
 		while (nvbe->nr_pages--) {
-			if (!nvbe->ttm_alloced[nvbe->nr_pages])
-				pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
+			pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
 				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 		}
-		kfree(nvbe->pages);
-		kfree(nvbe->ttm_alloced);
-		nvbe->pages = NULL;
-		nvbe->ttm_alloced = NULL;
-		nvbe->nr_pages = 0;
 	}
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 10656e430b44..82478e0998e5 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -286,9 +286,9 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->gpio.get		= nv10_gpio_get;
 		engine->gpio.set		= nv10_gpio_set;
 		engine->gpio.irq_enable		= NULL;
-		engine->pm.clock_get		= nv04_pm_clock_get;
-		engine->pm.clock_pre		= nv04_pm_clock_pre;
-		engine->pm.clock_set		= nv04_pm_clock_set;
+		engine->pm.clocks_get		= nv40_pm_clocks_get;
+		engine->pm.clocks_pre		= nv40_pm_clocks_pre;
+		engine->pm.clocks_set		= nv40_pm_clocks_set;
 		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
 		engine->pm.voltage_set		= nouveau_voltage_gpio_set;
 		engine->pm.temp_get		= nv40_temp_get;
@@ -299,7 +299,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 	case 0x50:
 	case 0x80: /* gotta love NVIDIA's consistency.. */
 	case 0x90:
-	case 0xA0:
+	case 0xa0:
 		engine->instmem.init		= nv50_instmem_init;
 		engine->instmem.takedown	= nv50_instmem_takedown;
 		engine->instmem.suspend		= nv50_instmem_suspend;
@@ -359,9 +359,9 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 			engine->pm.clock_set	= nv50_pm_clock_set;
 			break;
 		default:
-			engine->pm.clock_get	= nva3_pm_clock_get;
-			engine->pm.clock_pre	= nva3_pm_clock_pre;
-			engine->pm.clock_set	= nva3_pm_clock_set;
+			engine->pm.clocks_get	= nva3_pm_clocks_get;
+			engine->pm.clocks_pre	= nva3_pm_clocks_pre;
+			engine->pm.clocks_set	= nva3_pm_clocks_set;
 			break;
 		}
 		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
@@ -376,7 +376,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->vram.put		= nv50_vram_del;
 		engine->vram.flags_valid	= nv50_vram_flags_valid;
 		break;
-	case 0xC0:
+	case 0xc0:
 		engine->instmem.init		= nvc0_instmem_init;
 		engine->instmem.takedown	= nvc0_instmem_takedown;
 		engine->instmem.suspend		= nvc0_instmem_suspend;
@@ -422,12 +422,73 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->vram.put		= nv50_vram_del;
 		engine->vram.flags_valid	= nvc0_vram_flags_valid;
 		engine->pm.temp_get		= nv84_temp_get;
+		engine->pm.clocks_get		= nvc0_pm_clocks_get;
+		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
+		engine->pm.voltage_set		= nouveau_voltage_gpio_set;
+		break;
+	case 0xd0:
+		engine->instmem.init		= nvc0_instmem_init;
+		engine->instmem.takedown	= nvc0_instmem_takedown;
+		engine->instmem.suspend		= nvc0_instmem_suspend;
+		engine->instmem.resume		= nvc0_instmem_resume;
+		engine->instmem.get		= nv50_instmem_get;
+		engine->instmem.put		= nv50_instmem_put;
+		engine->instmem.map		= nv50_instmem_map;
+		engine->instmem.unmap		= nv50_instmem_unmap;
+		engine->instmem.flush		= nv84_instmem_flush;
+		engine->mc.init			= nv50_mc_init;
+		engine->mc.takedown		= nv50_mc_takedown;
+		engine->timer.init		= nv04_timer_init;
+		engine->timer.read		= nv04_timer_read;
+		engine->timer.takedown		= nv04_timer_takedown;
+		engine->fb.init			= nvc0_fb_init;
+		engine->fb.takedown		= nvc0_fb_takedown;
+		engine->fifo.channels		= 128;
+		engine->fifo.init		= nvc0_fifo_init;
+		engine->fifo.takedown		= nvc0_fifo_takedown;
+		engine->fifo.disable		= nvc0_fifo_disable;
+		engine->fifo.enable		= nvc0_fifo_enable;
+		engine->fifo.reassign		= nvc0_fifo_reassign;
+		engine->fifo.channel_id		= nvc0_fifo_channel_id;
+		engine->fifo.create_context	= nvc0_fifo_create_context;
+		engine->fifo.destroy_context	= nvc0_fifo_destroy_context;
+		engine->fifo.load_context	= nvc0_fifo_load_context;
+		engine->fifo.unload_context	= nvc0_fifo_unload_context;
+		engine->display.early_init	= nouveau_stub_init;
+		engine->display.late_takedown	= nouveau_stub_takedown;
+		engine->display.create		= nvd0_display_create;
+		engine->display.init		= nvd0_display_init;
+		engine->display.destroy		= nvd0_display_destroy;
+		engine->gpio.init		= nv50_gpio_init;
+		engine->gpio.takedown		= nouveau_stub_takedown;
+		engine->gpio.get		= nvd0_gpio_get;
+		engine->gpio.set		= nvd0_gpio_set;
+		engine->gpio.irq_register	= nv50_gpio_irq_register;
+		engine->gpio.irq_unregister	= nv50_gpio_irq_unregister;
+		engine->gpio.irq_enable		= nv50_gpio_irq_enable;
+		engine->vram.init		= nvc0_vram_init;
+		engine->vram.takedown		= nv50_vram_fini;
+		engine->vram.get		= nvc0_vram_new;
+		engine->vram.put		= nv50_vram_del;
+		engine->vram.flags_valid	= nvc0_vram_flags_valid;
+		engine->pm.clocks_get		= nvc0_pm_clocks_get;
+		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
+		engine->pm.voltage_set		= nouveau_voltage_gpio_set;
 		break;
 	default:
 		NV_ERROR(dev, "NV%02x unsupported\n", dev_priv->chipset);
 		return 1;
 	}
 
+	/* headless mode */
+	if (nouveau_modeset == 2) {
+		engine->display.early_init = nouveau_stub_init;
+		engine->display.late_takedown = nouveau_stub_takedown;
+		engine->display.create = nouveau_stub_init;
+		engine->display.init = nouveau_stub_init;
+		engine->display.destroy = nouveau_stub_takedown;
+	}
+
 	return 0;
 }
 
@@ -449,21 +510,6 @@ nouveau_vga_set_decode(void *priv, bool state)
 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 }
 
-static int
-nouveau_card_init_channel(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int ret;
-
-	ret = nouveau_channel_alloc(dev, &dev_priv->channel, NULL,
-				    NvDmaFB, NvDmaTT);
-	if (ret)
-		return ret;
-
-	mutex_unlock(&dev_priv->channel->mutex);
-	return 0;
-}
-
 static void nouveau_switcheroo_set_state(struct pci_dev *pdev,
 					 enum vga_switcheroo_state state)
 {
@@ -630,8 +676,11 @@ nouveau_card_init(struct drm_device *dev)
 			break;
 		}
 
-		if (dev_priv->card_type == NV_40)
-			nv40_mpeg_create(dev);
+		if (dev_priv->card_type == NV_40 ||
+		    dev_priv->chipset == 0x31 ||
+		    dev_priv->chipset == 0x34 ||
+		    dev_priv->chipset == 0x36)
+			nv31_mpeg_create(dev);
 		else
 		if (dev_priv->card_type == NV_50 &&
 		    (dev_priv->chipset < 0x98 || dev_priv->chipset == 0xa0))
@@ -651,41 +700,69 @@ nouveau_card_init(struct drm_device *dev)
 			goto out_engine;
 	}
 
-	ret = engine->display.create(dev);
+	ret = nouveau_irq_init(dev);
 	if (ret)
 		goto out_fifo;
 
-	ret = drm_vblank_init(dev, nv_two_heads(dev) ? 2 : 1);
-	if (ret)
-		goto out_vblank;
+	/* initialise general modesetting */
+	drm_mode_config_init(dev);
+	drm_mode_create_scaling_mode_property(dev);
+	drm_mode_create_dithering_property(dev);
+	dev->mode_config.funcs = (void *)&nouveau_mode_config_funcs;
+	dev->mode_config.fb_base = pci_resource_start(dev->pdev, 1);
+	dev->mode_config.min_width = 0;
+	dev->mode_config.min_height = 0;
+	if (dev_priv->card_type < NV_10) {
+		dev->mode_config.max_width = 2048;
+		dev->mode_config.max_height = 2048;
+	} else
+	if (dev_priv->card_type < NV_50) {
+		dev->mode_config.max_width = 4096;
+		dev->mode_config.max_height = 4096;
+	} else {
+		dev->mode_config.max_width = 8192;
+		dev->mode_config.max_height = 8192;
+	}
 
-	ret = nouveau_irq_init(dev);
+	ret = engine->display.create(dev);
 	if (ret)
-		goto out_vblank;
+		goto out_irq;
 
-	/* what about PVIDEO/PCRTC/PRAMDAC etc? */
+	nouveau_backlight_init(dev);
 
 	if (dev_priv->eng[NVOBJ_ENGINE_GR]) {
 		ret = nouveau_fence_init(dev);
 		if (ret)
-			goto out_irq;
+			goto out_disp;
 
-		ret = nouveau_card_init_channel(dev);
+		ret = nouveau_channel_alloc(dev, &dev_priv->channel, NULL,
+					    NvDmaFB, NvDmaTT);
 		if (ret)
 			goto out_fence;
+
+		mutex_unlock(&dev_priv->channel->mutex);
+	}
+
+	if (dev->mode_config.num_crtc) {
+		ret = drm_vblank_init(dev, dev->mode_config.num_crtc);
+		if (ret)
+			goto out_chan;
+
+		nouveau_fbcon_init(dev);
+		drm_kms_helper_poll_init(dev);
 	}
 
-	nouveau_fbcon_init(dev);
-	drm_kms_helper_poll_init(dev);
 	return 0;
 
+out_chan:
+	nouveau_channel_put_unlocked(&dev_priv->channel);
 out_fence:
 	nouveau_fence_fini(dev);
+out_disp:
+	nouveau_backlight_exit(dev);
+	engine->display.destroy(dev);
 out_irq:
 	nouveau_irq_fini(dev);
-out_vblank:
-	drm_vblank_cleanup(dev);
-	engine->display.destroy(dev);
 out_fifo:
 	if (!dev_priv->noaccel)
 		engine->fifo.takedown(dev);
@@ -732,15 +809,20 @@ static void nouveau_card_takedown(struct drm_device *dev)
 	struct nouveau_engine *engine = &dev_priv->engine;
 	int e;
 
-	drm_kms_helper_poll_fini(dev);
-	nouveau_fbcon_fini(dev);
+	if (dev->mode_config.num_crtc) {
+		drm_kms_helper_poll_fini(dev);
+		nouveau_fbcon_fini(dev);
+		drm_vblank_cleanup(dev);
+	}
 
 	if (dev_priv->channel) {
 		nouveau_channel_put_unlocked(&dev_priv->channel);
 		nouveau_fence_fini(dev);
 	}
 
+	nouveau_backlight_exit(dev);
 	engine->display.destroy(dev);
+	drm_mode_config_cleanup(dev);
 
 	if (!dev_priv->noaccel) {
 		engine->fifo.takedown(dev);
@@ -774,7 +856,6 @@ static void nouveau_card_takedown(struct drm_device *dev)
 	engine->vram.takedown(dev);
 
 	nouveau_irq_fini(dev);
-	drm_vblank_cleanup(dev);
 
 	nouveau_pm_fini(dev);
 	nouveau_bios_takedown(dev);
@@ -907,7 +988,7 @@ static int nouveau_remove_conflicting_drivers(struct drm_device *dev)
 int nouveau_load(struct drm_device *dev, unsigned long flags)
 {
 	struct drm_nouveau_private *dev_priv;
-	uint32_t reg0;
+	uint32_t reg0, strap;
 	resource_size_t mmio_start_offs;
 	int ret;
 
@@ -951,13 +1032,11 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 
 	/* Time to determine the card architecture */
 	reg0 = nv_rd32(dev, NV03_PMC_BOOT_0);
-	dev_priv->stepping = 0; /* XXX: add stepping for pre-NV10? */
 
 	/* We're dealing with >=NV10 */
 	if ((reg0 & 0x0f000000) > 0) {
 		/* Bit 27-20 contain the architecture in hex */
 		dev_priv->chipset = (reg0 & 0xff00000) >> 20;
-		dev_priv->stepping = (reg0 & 0xff);
 	/* NV04 or NV05 */
 	} else if ((reg0 & 0xff00fff0) == 0x20004000) {
 		if (reg0 & 0x00f00000)
@@ -987,6 +1066,9 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 	case 0xc0:
 		dev_priv->card_type = NV_C0;
 		break;
+	case 0xd0:
+		dev_priv->card_type = NV_D0;
+		break;
 	default:
 		NV_INFO(dev, "Unsupported chipset 0x%08x\n", reg0);
 		ret = -EINVAL;
@@ -996,6 +1078,23 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 	NV_INFO(dev, "Detected an NV%2x generation card (0x%08x)\n",
 		dev_priv->card_type, reg0);
 
+	/* determine frequency of timing crystal */
+	strap = nv_rd32(dev, 0x101000);
+	if ( dev_priv->chipset < 0x17 ||
+	    (dev_priv->chipset >= 0x20 && dev_priv->chipset <= 0x25))
+		strap &= 0x00000040;
+	else
+		strap &= 0x00400040;
+
+	switch (strap) {
+	case 0x00000000: dev_priv->crystal = 13500; break;
+	case 0x00000040: dev_priv->crystal = 14318; break;
+	case 0x00400000: dev_priv->crystal = 27000; break;
+	case 0x00400040: dev_priv->crystal = 25000; break;
+	}
+
+	NV_DEBUG(dev, "crystal freq: %dKHz\n", dev_priv->crystal);
+
 	/* Determine whether we'll attempt acceleration or not, some
 	 * cards are disabled by default here due to them being known
 	 * non-functional, or never been tested due to lack of hw.
@@ -1030,7 +1129,7 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 			ioremap(pci_resource_start(dev->pdev, ramin_bar),
 				dev_priv->ramin_size);
 		if (!dev_priv->ramin) {
-			NV_ERROR(dev, "Failed to PRAMIN BAR");
+			NV_ERROR(dev, "Failed to map PRAMIN BAR\n");
 			ret = -ENOMEM;
 			goto err_mmio;
 		}
@@ -1130,7 +1229,7 @@ int nouveau_ioctl_getparam(struct drm_device *dev, void *data,
 		getparam->value = 1;
 		break;
 	case NOUVEAU_GETPARAM_HAS_PAGEFLIP:
-		getparam->value = 1;
+		getparam->value = dev_priv->card_type < NV_D0;
 		break;
 	case NOUVEAU_GETPARAM_GRAPH_UNITS:
 		/* NV40 and NV50 versions are quite different, but register
@@ -1198,6 +1297,23 @@ nouveau_wait_ne(struct drm_device *dev, uint64_t timeout,
 	return false;
 }
 
+/* Wait until cond(data) == true, up until timeout has hit */
+bool
+nouveau_wait_cb(struct drm_device *dev, u64 timeout,
+		bool (*cond)(void *), void *data)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_timer_engine *ptimer = &dev_priv->engine.timer;
+	u64 start = ptimer->read(dev);
+
+	do {
+		if (cond(data) == true)
+			return true;
+	} while (ptimer->read(dev) - start < timeout);
+
+	return false;
+}
+
 /* Waits for PGRAPH to go completely idle */
 bool nouveau_wait_for_idle(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_vm.c b/drivers/gpu/drm/nouveau/nouveau_vm.c
index 244fd38fdb84..ef0832b29ad2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vm.c
@@ -172,9 +172,9 @@ nouveau_vm_unmap_pgt(struct nouveau_vm *vm, int big, u32 fpde, u32 lpde)
 			vm->map_pgt(vpgd->obj, pde, vpgt->obj);
 		}
 
-		mutex_unlock(&vm->mm->mutex);
+		mutex_unlock(&vm->mm.mutex);
 		nouveau_gpuobj_ref(NULL, &pgt);
-		mutex_lock(&vm->mm->mutex);
+		mutex_lock(&vm->mm.mutex);
 	}
 }
 
@@ -191,18 +191,18 @@ nouveau_vm_map_pgt(struct nouveau_vm *vm, u32 pde, u32 type)
 	pgt_size  = (1 << (vm->pgt_bits + 12)) >> type;
 	pgt_size *= 8;
 
-	mutex_unlock(&vm->mm->mutex);
+	mutex_unlock(&vm->mm.mutex);
 	ret = nouveau_gpuobj_new(vm->dev, NULL, pgt_size, 0x1000,
 				 NVOBJ_FLAG_ZERO_ALLOC, &pgt);
-	mutex_lock(&vm->mm->mutex);
+	mutex_lock(&vm->mm.mutex);
 	if (unlikely(ret))
 		return ret;
 
 	/* someone beat us to filling the PDE while we didn't have the lock */
 	if (unlikely(vpgt->refcount[big]++)) {
-		mutex_unlock(&vm->mm->mutex);
+		mutex_unlock(&vm->mm.mutex);
 		nouveau_gpuobj_ref(NULL, &pgt);
-		mutex_lock(&vm->mm->mutex);
+		mutex_lock(&vm->mm.mutex);
 		return 0;
 	}
 
@@ -223,10 +223,10 @@ nouveau_vm_get(struct nouveau_vm *vm, u64 size, u32 page_shift,
 	u32 fpde, lpde, pde;
 	int ret;
 
-	mutex_lock(&vm->mm->mutex);
-	ret = nouveau_mm_get(vm->mm, page_shift, msize, 0, align, &vma->node);
+	mutex_lock(&vm->mm.mutex);
+	ret = nouveau_mm_get(&vm->mm, page_shift, msize, 0, align, &vma->node);
 	if (unlikely(ret != 0)) {
-		mutex_unlock(&vm->mm->mutex);
+		mutex_unlock(&vm->mm.mutex);
 		return ret;
 	}
 
@@ -245,13 +245,13 @@ nouveau_vm_get(struct nouveau_vm *vm, u64 size, u32 page_shift,
 		if (ret) {
 			if (pde != fpde)
 				nouveau_vm_unmap_pgt(vm, big, fpde, pde - 1);
-			nouveau_mm_put(vm->mm, vma->node);
-			mutex_unlock(&vm->mm->mutex);
+			nouveau_mm_put(&vm->mm, vma->node);
+			mutex_unlock(&vm->mm.mutex);
 			vma->node = NULL;
 			return ret;
 		}
 	}
-	mutex_unlock(&vm->mm->mutex);
+	mutex_unlock(&vm->mm.mutex);
 
 	vma->vm     = vm;
 	vma->offset = (u64)vma->node->offset << 12;
@@ -270,11 +270,11 @@ nouveau_vm_put(struct nouveau_vma *vma)
 	fpde = (vma->node->offset >> vm->pgt_bits);
 	lpde = (vma->node->offset + vma->node->length - 1) >> vm->pgt_bits;
 
-	mutex_lock(&vm->mm->mutex);
+	mutex_lock(&vm->mm.mutex);
 	nouveau_vm_unmap_pgt(vm, vma->node->type != vm->spg_shift, fpde, lpde);
-	nouveau_mm_put(vm->mm, vma->node);
+	nouveau_mm_put(&vm->mm, vma->node);
 	vma->node = NULL;
-	mutex_unlock(&vm->mm->mutex);
+	mutex_unlock(&vm->mm.mutex);
 }
 
 int
@@ -306,7 +306,7 @@ nouveau_vm_new(struct drm_device *dev, u64 offset, u64 length, u64 mm_offset,
 			block = length;
 
 	} else
-	if (dev_priv->card_type == NV_C0) {
+	if (dev_priv->card_type >= NV_C0) {
 		vm->map_pgt = nvc0_vm_map_pgt;
 		vm->map = nvc0_vm_map;
 		vm->map_sg = nvc0_vm_map_sg;
@@ -360,11 +360,11 @@ nouveau_vm_link(struct nouveau_vm *vm, struct nouveau_gpuobj *pgd)
 
 	nouveau_gpuobj_ref(pgd, &vpgd->obj);
 
-	mutex_lock(&vm->mm->mutex);
+	mutex_lock(&vm->mm.mutex);
 	for (i = vm->fpde; i <= vm->lpde; i++)
 		vm->map_pgt(pgd, i, vm->pgt[i - vm->fpde].obj);
 	list_add(&vpgd->head, &vm->pgd_list);
-	mutex_unlock(&vm->mm->mutex);
+	mutex_unlock(&vm->mm.mutex);
 	return 0;
 }
 
@@ -377,7 +377,7 @@ nouveau_vm_unlink(struct nouveau_vm *vm, struct nouveau_gpuobj *mpgd)
 	if (!mpgd)
 		return;
 
-	mutex_lock(&vm->mm->mutex);
+	mutex_lock(&vm->mm.mutex);
 	list_for_each_entry_safe(vpgd, tmp, &vm->pgd_list, head) {
 		if (vpgd->obj == mpgd) {
 			pgd = vpgd->obj;
@@ -386,7 +386,7 @@ nouveau_vm_unlink(struct nouveau_vm *vm, struct nouveau_gpuobj *mpgd)
 			break;
 		}
 	}
-	mutex_unlock(&vm->mm->mutex);
+	mutex_unlock(&vm->mm.mutex);
 
 	nouveau_gpuobj_ref(NULL, &pgd);
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_vm.h b/drivers/gpu/drm/nouveau/nouveau_vm.h
index 579ca8cc223c..6ce995f7797e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_vm.h
@@ -51,7 +51,7 @@ struct nouveau_vma {
 
 struct nouveau_vm {
 	struct drm_device *dev;
-	struct nouveau_mm *mm;
+	struct nouveau_mm mm;
 	int refcount;
 
 	struct list_head pgd_list;
diff --git a/drivers/gpu/drm/nouveau/nouveau_volt.c b/drivers/gpu/drm/nouveau/nouveau_volt.c
index 75e872741d92..86d03e15735d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_volt.c
+++ b/drivers/gpu/drm/nouveau/nouveau_volt.c
@@ -27,7 +27,7 @@
 #include "nouveau_drv.h"
 #include "nouveau_pm.h"
 
-static const enum dcb_gpio_tag vidtag[] = { 0x04, 0x05, 0x06, 0x1a };
+static const enum dcb_gpio_tag vidtag[] = { 0x04, 0x05, 0x06, 0x1a, 0x73 };
 static int nr_vidtag = sizeof(vidtag) / sizeof(vidtag[0]);
 
 int
@@ -170,6 +170,13 @@ nouveau_volt_init(struct drm_device *dev)
 		 */
 		vidshift  = 2;
 		break;
+	case 0x40:
+		headerlen = volt[1];
+		recordlen = volt[2];
+		entries   = volt[3]; /* not a clue what the entries are for.. */
+		vidmask   = volt[11]; /* guess.. */
+		vidshift  = 0;
+		break;
 	default:
 		NV_WARN(dev, "voltage table 0x%02x unknown\n", volt[0]);
 		return;
@@ -197,16 +204,37 @@ nouveau_volt_init(struct drm_device *dev)
 	}
 
 	/* parse vbios entries into common format */
-	voltage->level = kcalloc(entries, sizeof(*voltage->level), GFP_KERNEL);
-	if (!voltage->level)
-		return;
+	voltage->version = volt[0];
+	if (voltage->version < 0x40) {
+		voltage->nr_level = entries;
+		voltage->level =
+			kcalloc(entries, sizeof(*voltage->level), GFP_KERNEL);
+		if (!voltage->level)
+			return;
 
-	entry = volt + headerlen;
-	for (i = 0; i < entries; i++, entry += recordlen) {
-		voltage->level[i].voltage = entry[0];
-		voltage->level[i].vid     = entry[1] >> vidshift;
+		entry = volt + headerlen;
+		for (i = 0; i < entries; i++, entry += recordlen) {
+			voltage->level[i].voltage = entry[0] * 10000;
+			voltage->level[i].vid     = entry[1] >> vidshift;
+		}
+	} else {
+		u32 volt_uv = ROM32(volt[4]);
+		s16 step_uv = ROM16(volt[8]);
+		u8 vid;
+
+		voltage->nr_level = voltage->vid_mask + 1;
+		voltage->level = kcalloc(voltage->nr_level,
+					 sizeof(*voltage->level), GFP_KERNEL);
+		if (!voltage->level)
+			return;
+
+		for (vid = 0; vid <= voltage->vid_mask; vid++) {
+			voltage->level[vid].voltage = volt_uv;
+			voltage->level[vid].vid = vid;
+			volt_uv += step_uv;
+		}
 	}
-	voltage->nr_level  = entries;
+
 	voltage->supported = true;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv04_display.c b/drivers/gpu/drm/nouveau/nv04_display.c
index 1715e1464b7d..6bd8518d7b2e 100644
--- a/drivers/gpu/drm/nouveau/nv04_display.c
+++ b/drivers/gpu/drm/nouveau/nv04_display.c
@@ -126,27 +126,6 @@ nv04_display_create(struct drm_device *dev)
 
 	nouveau_hw_save_vga_fonts(dev, 1);
 
-	drm_mode_config_init(dev);
-	drm_mode_create_scaling_mode_property(dev);
-	drm_mode_create_dithering_property(dev);
-
-	dev->mode_config.funcs = (void *)&nouveau_mode_config_funcs;
-
-	dev->mode_config.min_width = 0;
-	dev->mode_config.min_height = 0;
-	switch (dev_priv->card_type) {
-	case NV_04:
-		dev->mode_config.max_width = 2048;
-		dev->mode_config.max_height = 2048;
-		break;
-	default:
-		dev->mode_config.max_width = 4096;
-		dev->mode_config.max_height = 4096;
-		break;
-	}
-
-	dev->mode_config.fb_base = dev_priv->fb_phys;
-
 	nv04_crtc_create(dev, 0);
 	if (nv_two_heads(dev))
 		nv04_crtc_create(dev, 1);
@@ -235,8 +214,6 @@ nv04_display_destroy(struct drm_device *dev)
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head)
 		crtc->funcs->restore(crtc);
 
-	drm_mode_config_cleanup(dev);
-
 	nouveau_hw_save_vga_fonts(dev, 0);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv04_pm.c b/drivers/gpu/drm/nouveau/nv04_pm.c
index eb1c70dd82ed..9ae92a87b8cc 100644
--- a/drivers/gpu/drm/nouveau/nv04_pm.c
+++ b/drivers/gpu/drm/nouveau/nv04_pm.c
@@ -68,6 +68,7 @@ void
 nv04_pm_clock_set(struct drm_device *dev, void *pre_state)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_timer_engine *ptimer = &dev_priv->engine.timer;
 	struct nv04_pm_state *state = pre_state;
 	u32 reg = state->pll.reg;
 
@@ -85,6 +86,9 @@ nv04_pm_clock_set(struct drm_device *dev, void *pre_state)
 		nv_mask(dev, 0x1002c0, 0, 1 << 8);
 	}
 
+	if (reg == NV_PRAMDAC_NVPLL_COEFF)
+		ptimer->init(dev);
+
 	kfree(state);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv04_timer.c b/drivers/gpu/drm/nouveau/nv04_timer.c
index 1d09ddd57399..263301b809dd 100644
--- a/drivers/gpu/drm/nouveau/nv04_timer.c
+++ b/drivers/gpu/drm/nouveau/nv04_timer.c
@@ -6,43 +6,75 @@
 int
 nv04_timer_init(struct drm_device *dev)
 {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	u32 m, n, d;
+
 	nv_wr32(dev, NV04_PTIMER_INTR_EN_0, 0x00000000);
 	nv_wr32(dev, NV04_PTIMER_INTR_0, 0xFFFFFFFF);
 
-	/* Just use the pre-existing values when possible for now; these regs
-	 * are not written in nv (driver writer missed a /4 on the address), and
-	 * writing 8 and 3 to the correct regs breaks the timings on the LVDS
-	 * hardware sequencing microcode.
-	 * A correct solution (involving calculations with the GPU PLL) can
-	 * be done when kernel modesetting lands
-	 */
-	if (!nv_rd32(dev, NV04_PTIMER_NUMERATOR) ||
-				!nv_rd32(dev, NV04_PTIMER_DENOMINATOR)) {
-		nv_wr32(dev, NV04_PTIMER_NUMERATOR, 0x00000008);
-		nv_wr32(dev, NV04_PTIMER_DENOMINATOR, 0x00000003);
+	/* aim for 31.25MHz, which gives us nanosecond timestamps */
+	d = 1000000 / 32;
+
+	/* determine base clock for timer source */
+	if (dev_priv->chipset < 0x40) {
+		n = dev_priv->engine.pm.clock_get(dev, PLL_CORE);
+	} else
+	if (dev_priv->chipset == 0x40) {
+		/*XXX: figure this out */
+		n = 0;
+	} else {
+		n = dev_priv->crystal;
+		m = 1;
+		while (n < (d * 2)) {
+			n += (n / m);
+			m++;
+		}
+
+		nv_wr32(dev, 0x009220, m - 1);
+	}
+
+	if (!n) {
+		NV_WARN(dev, "PTIMER: unknown input clock freq\n");
+		if (!nv_rd32(dev, NV04_PTIMER_NUMERATOR) ||
+		    !nv_rd32(dev, NV04_PTIMER_DENOMINATOR)) {
+			nv_wr32(dev, NV04_PTIMER_NUMERATOR, 1);
+			nv_wr32(dev, NV04_PTIMER_DENOMINATOR, 1);
+		}
+		return 0;
+	}
+
+	/* reduce ratio to acceptable values */
+	while (((n % 5) == 0) && ((d % 5) == 0)) {
+		n /= 5;
+		d /= 5;
 	}
 
+	while (((n % 2) == 0) && ((d % 2) == 0)) {
+		n /= 2;
+		d /= 2;
+	}
+
+	while (n > 0xffff || d > 0xffff) {
+		n >>= 1;
+		d >>= 1;
+	}
+
+	nv_wr32(dev, NV04_PTIMER_NUMERATOR, n);
+	nv_wr32(dev, NV04_PTIMER_DENOMINATOR, d);
 	return 0;
 }
 
-uint64_t
+u64
 nv04_timer_read(struct drm_device *dev)
 {
-	uint32_t low;
-	/* From kmmio dumps on nv28 this looks like how the blob does this.
-	 * It reads the high dword twice, before and after.
-	 * The only explanation seems to be that the 64-bit timer counter
-	 * advances between high and low dword reads and may corrupt the
-	 * result. Not confirmed.
-	 */
-	uint32_t high2 = nv_rd32(dev, NV04_PTIMER_TIME_1);
-	uint32_t high1;
+	u32 hi, lo;
+
 	do {
-		high1 = high2;
-		low = nv_rd32(dev, NV04_PTIMER_TIME_0);
-		high2 = nv_rd32(dev, NV04_PTIMER_TIME_1);
-	} while (high1 != high2);
-	return (((uint64_t)high2) << 32) | (uint64_t)low;
+		hi = nv_rd32(dev, NV04_PTIMER_TIME_1);
+		lo = nv_rd32(dev, NV04_PTIMER_TIME_0);
+	} while (hi != nv_rd32(dev, NV04_PTIMER_TIME_1));
+
+	return ((u64)hi << 32 | lo);
 }
 
 void
diff --git a/drivers/gpu/drm/nouveau/nv40_mpeg.c b/drivers/gpu/drm/nouveau/nv31_mpeg.c
index ad03a0e1fc7d..6f06a0713f00 100644
--- a/drivers/gpu/drm/nouveau/nv40_mpeg.c
+++ b/drivers/gpu/drm/nouveau/nv31_mpeg.c
@@ -26,10 +26,32 @@
 #include "nouveau_drv.h"
 #include "nouveau_ramht.h"
 
-struct nv40_mpeg_engine {
+struct nv31_mpeg_engine {
 	struct nouveau_exec_engine base;
+	atomic_t refcount;
 };
 
+
+static int
+nv31_mpeg_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct nv31_mpeg_engine *pmpeg = nv_engine(chan->dev, engine);
+
+	if (!atomic_add_unless(&pmpeg->refcount, 1, 1))
+		return -EBUSY;
+
+	chan->engctx[engine] = (void *)0xdeadcafe;
+	return 0;
+}
+
+static void
+nv31_mpeg_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nv31_mpeg_engine *pmpeg = nv_engine(chan->dev, engine);
+	atomic_dec(&pmpeg->refcount);
+	chan->engctx[engine] = NULL;
+}
+
 static int
 nv40_mpeg_context_new(struct nouveau_channel *chan, int engine)
 {
@@ -81,7 +103,7 @@ nv40_mpeg_context_del(struct nouveau_channel *chan, int engine)
 }
 
 static int
-nv40_mpeg_object_new(struct nouveau_channel *chan, int engine,
+nv31_mpeg_object_new(struct nouveau_channel *chan, int engine,
 		      u32 handle, u16 class)
 {
 	struct drm_device *dev = chan->dev;
@@ -103,10 +125,10 @@ nv40_mpeg_object_new(struct nouveau_channel *chan, int engine,
 }
 
 static int
-nv40_mpeg_init(struct drm_device *dev, int engine)
+nv31_mpeg_init(struct drm_device *dev, int engine)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nv40_mpeg_engine *pmpeg = nv_engine(dev, engine);
+	struct nv31_mpeg_engine *pmpeg = nv_engine(dev, engine);
 	int i;
 
 	/* VPE init */
@@ -121,7 +143,7 @@ nv40_mpeg_init(struct drm_device *dev, int engine)
 	/* PMPEG init */
 	nv_wr32(dev, 0x00b32c, 0x00000000);
 	nv_wr32(dev, 0x00b314, 0x00000100);
-	nv_wr32(dev, 0x00b220, 0x00000044);
+	nv_wr32(dev, 0x00b220, nv44_graph_class(dev) ? 0x00000044 : 0x00000031);
 	nv_wr32(dev, 0x00b300, 0x02001ec1);
 	nv_mask(dev, 0x00b32c, 0x00000001, 0x00000001);
 
@@ -137,7 +159,7 @@ nv40_mpeg_init(struct drm_device *dev, int engine)
 }
 
 static int
-nv40_mpeg_fini(struct drm_device *dev, int engine, bool suspend)
+nv31_mpeg_fini(struct drm_device *dev, int engine, bool suspend)
 {
 	/*XXX: context save? */
 	nv_mask(dev, 0x00b32c, 0x00000001, 0x00000000);
@@ -146,7 +168,7 @@ nv40_mpeg_fini(struct drm_device *dev, int engine, bool suspend)
 }
 
 static int
-nv40_mpeg_mthd_dma(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
+nv31_mpeg_mthd_dma(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
 {
 	struct drm_device *dev = chan->dev;
 	u32 inst = data << 4;
@@ -184,13 +206,17 @@ nv40_mpeg_mthd_dma(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
 }
 
 static int
-nv40_mpeg_isr_chid(struct drm_device *dev, u32 inst)
+nv31_mpeg_isr_chid(struct drm_device *dev, u32 inst)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *ctx;
 	unsigned long flags;
 	int i;
 
+	/* hardcode drm channel id on nv3x, so swmthd lookup works */
+	if (dev_priv->card_type < NV_40)
+		return 0;
+
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
 	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
 		if (!dev_priv->channels.ptr[i])
@@ -205,7 +231,7 @@ nv40_mpeg_isr_chid(struct drm_device *dev, u32 inst)
 }
 
 static void
-nv40_vpe_set_tile_region(struct drm_device *dev, int i)
+nv31_vpe_set_tile_region(struct drm_device *dev, int i)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i];
@@ -216,10 +242,10 @@ nv40_vpe_set_tile_region(struct drm_device *dev, int i)
 }
 
 static void
-nv40_mpeg_isr(struct drm_device *dev)
+nv31_mpeg_isr(struct drm_device *dev)
 {
 	u32 inst = (nv_rd32(dev, 0x00b318) & 0x000fffff) << 4;
-	u32 chid = nv40_mpeg_isr_chid(dev, inst);
+	u32 chid = nv31_mpeg_isr_chid(dev, inst);
 	u32 stat = nv_rd32(dev, 0x00b100);
 	u32 type = nv_rd32(dev, 0x00b230);
 	u32 mthd = nv_rd32(dev, 0x00b234);
@@ -249,10 +275,10 @@ nv40_mpeg_isr(struct drm_device *dev)
 }
 
 static void
-nv40_vpe_isr(struct drm_device *dev)
+nv31_vpe_isr(struct drm_device *dev)
 {
 	if (nv_rd32(dev, 0x00b100))
-		nv40_mpeg_isr(dev);
+		nv31_mpeg_isr(dev);
 
 	if (nv_rd32(dev, 0x00b800)) {
 		u32 stat = nv_rd32(dev, 0x00b800);
@@ -262,9 +288,9 @@ nv40_vpe_isr(struct drm_device *dev)
 }
 
 static void
-nv40_mpeg_destroy(struct drm_device *dev, int engine)
+nv31_mpeg_destroy(struct drm_device *dev, int engine)
 {
-	struct nv40_mpeg_engine *pmpeg = nv_engine(dev, engine);
+	struct nv31_mpeg_engine *pmpeg = nv_engine(dev, engine);
 
 	nouveau_irq_unregister(dev, 0);
 
@@ -273,34 +299,41 @@ nv40_mpeg_destroy(struct drm_device *dev, int engine)
 }
 
 int
-nv40_mpeg_create(struct drm_device *dev)
+nv31_mpeg_create(struct drm_device *dev)
 {
-	struct nv40_mpeg_engine *pmpeg;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv31_mpeg_engine *pmpeg;
 
 	pmpeg = kzalloc(sizeof(*pmpeg), GFP_KERNEL);
 	if (!pmpeg)
 		return -ENOMEM;
-
-	pmpeg->base.destroy = nv40_mpeg_destroy;
-	pmpeg->base.init = nv40_mpeg_init;
-	pmpeg->base.fini = nv40_mpeg_fini;
-	pmpeg->base.context_new = nv40_mpeg_context_new;
-	pmpeg->base.context_del = nv40_mpeg_context_del;
-	pmpeg->base.object_new = nv40_mpeg_object_new;
+	atomic_set(&pmpeg->refcount, 0);
+
+	pmpeg->base.destroy = nv31_mpeg_destroy;
+	pmpeg->base.init = nv31_mpeg_init;
+	pmpeg->base.fini = nv31_mpeg_fini;
+	if (dev_priv->card_type < NV_40) {
+		pmpeg->base.context_new = nv31_mpeg_context_new;
+		pmpeg->base.context_del = nv31_mpeg_context_del;
+	} else {
+		pmpeg->base.context_new = nv40_mpeg_context_new;
+		pmpeg->base.context_del = nv40_mpeg_context_del;
+	}
+	pmpeg->base.object_new = nv31_mpeg_object_new;
 
 	/* ISR vector, PMC_ENABLE bit,  and TILE regs are shared between
 	 * all VPE engines, for this driver's purposes the PMPEG engine
 	 * will be treated as the "master" and handle the global VPE
 	 * bits too
 	 */
-	pmpeg->base.set_tile_region = nv40_vpe_set_tile_region;
-	nouveau_irq_register(dev, 0, nv40_vpe_isr);
+	pmpeg->base.set_tile_region = nv31_vpe_set_tile_region;
+	nouveau_irq_register(dev, 0, nv31_vpe_isr);
 
 	NVOBJ_ENGINE_ADD(dev, MPEG, &pmpeg->base);
 	NVOBJ_CLASS(dev, 0x3174, MPEG);
-	NVOBJ_MTHD (dev, 0x3174, 0x0190, nv40_mpeg_mthd_dma);
-	NVOBJ_MTHD (dev, 0x3174, 0x01a0, nv40_mpeg_mthd_dma);
-	NVOBJ_MTHD (dev, 0x3174, 0x01b0, nv40_mpeg_mthd_dma);
+	NVOBJ_MTHD (dev, 0x3174, 0x0190, nv31_mpeg_mthd_dma);
+	NVOBJ_MTHD (dev, 0x3174, 0x01a0, nv31_mpeg_mthd_dma);
+	NVOBJ_MTHD (dev, 0x3174, 0x01b0, nv31_mpeg_mthd_dma);
 
 #if 0
 	NVOBJ_ENGINE_ADD(dev, ME, &pme->base);
diff --git a/drivers/gpu/drm/nouveau/nv40_pm.c b/drivers/gpu/drm/nouveau/nv40_pm.c
new file mode 100644
index 000000000000..bbc0b9c7e1f7
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nv40_pm.c
@@ -0,0 +1,338 @@
+/*
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_bios.h"
+#include "nouveau_pm.h"
+#include "nouveau_hw.h"
+
+#define min2(a,b) ((a) < (b) ? (a) : (b))
+
+static u32
+read_pll_1(struct drm_device *dev, u32 reg)
+{
+	u32 ctrl = nv_rd32(dev, reg + 0x00);
+	int P = (ctrl & 0x00070000) >> 16;
+	int N = (ctrl & 0x0000ff00) >> 8;
+	int M = (ctrl & 0x000000ff) >> 0;
+	u32 ref = 27000, clk = 0;
+
+	if (ctrl & 0x80000000)
+		clk = ref * N / M;
+
+	return clk >> P;
+}
+
+static u32
+read_pll_2(struct drm_device *dev, u32 reg)
+{
+	u32 ctrl = nv_rd32(dev, reg + 0x00);
+	u32 coef = nv_rd32(dev, reg + 0x04);
+	int N2 = (coef & 0xff000000) >> 24;
+	int M2 = (coef & 0x00ff0000) >> 16;
+	int N1 = (coef & 0x0000ff00) >> 8;
+	int M1 = (coef & 0x000000ff) >> 0;
+	int P = (ctrl & 0x00070000) >> 16;
+	u32 ref = 27000, clk = 0;
+
+	if (ctrl & 0x80000000)
+		clk = ref * N1 / M1;
+
+	if (!(ctrl & 0x00000100)) {
+		if (ctrl & 0x40000000)
+			clk = clk * N2 / M2;
+	}
+
+	return clk >> P;
+}
+
+static u32
+read_clk(struct drm_device *dev, u32 src)
+{
+	switch (src) {
+	case 3:
+		return read_pll_2(dev, 0x004000);
+	case 2:
+		return read_pll_1(dev, 0x004008);
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+int
+nv40_pm_clocks_get(struct drm_device *dev, struct nouveau_pm_level *perflvl)
+{
+	u32 ctrl = nv_rd32(dev, 0x00c040);
+
+	perflvl->core   = read_clk(dev, (ctrl & 0x00000003) >> 0);
+	perflvl->shader = read_clk(dev, (ctrl & 0x00000030) >> 4);
+	perflvl->memory = read_pll_2(dev, 0x4020);
+	return 0;
+}
+
+struct nv40_pm_state {
+	u32 ctrl;
+	u32 npll_ctrl;
+	u32 npll_coef;
+	u32 spll;
+	u32 mpll_ctrl;
+	u32 mpll_coef;
+};
+
+static int
+nv40_calc_pll(struct drm_device *dev, u32 reg, struct pll_lims *pll,
+	      u32 clk, int *N1, int *M1, int *N2, int *M2, int *log2P)
+{
+	struct nouveau_pll_vals coef;
+	int ret;
+
+	ret = get_pll_limits(dev, reg, pll);
+	if (ret)
+		return ret;
+
+	if (clk < pll->vco1.maxfreq)
+		pll->vco2.maxfreq = 0;
+
+	ret = nouveau_calc_pll_mnp(dev, pll, clk, &coef);
+	if (ret == 0)
+		return -ERANGE;
+
+	*N1 = coef.N1;
+	*M1 = coef.M1;
+	if (N2 && M2) {
+		if (pll->vco2.maxfreq) {
+			*N2 = coef.N2;
+			*M2 = coef.M2;
+		} else {
+			*N2 = 1;
+			*M2 = 1;
+		}
+	}
+	*log2P = coef.log2P;
+	return 0;
+}
+
+void *
+nv40_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl)
+{
+	struct nv40_pm_state *info;
+	struct pll_lims pll;
+	int N1, N2, M1, M2, log2P;
+	int ret;
+
+	info = kmalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	/* core/geometric clock */
+	ret = nv40_calc_pll(dev, 0x004000, &pll, perflvl->core,
+			    &N1, &M1, &N2, &M2, &log2P);
+	if (ret < 0)
+		goto out;
+
+	if (N2 == M2) {
+		info->npll_ctrl = 0x80000100 | (log2P << 16);
+		info->npll_coef = (N1 << 8) | M1;
+	} else {
+		info->npll_ctrl = 0xc0000000 | (log2P << 16);
+		info->npll_coef = (N2 << 24) | (M2 << 16) | (N1 << 8) | M1;
+	}
+
+	/* use the second PLL for shader/rop clock, if it differs from core */
+	if (perflvl->shader && perflvl->shader != perflvl->core) {
+		ret = nv40_calc_pll(dev, 0x004008, &pll, perflvl->shader,
+				    &N1, &M1, NULL, NULL, &log2P);
+		if (ret < 0)
+			goto out;
+
+		info->spll = 0xc0000000 | (log2P << 16) | (N1 << 8) | M1;
+		info->ctrl = 0x00000223;
+	} else {
+		info->spll = 0x00000000;
+		info->ctrl = 0x00000333;
+	}
+
+	/* memory clock */
+	ret = nv40_calc_pll(dev, 0x004020, &pll, perflvl->memory,
+			    &N1, &M1, &N2, &M2, &log2P);
+	if (ret < 0)
+		goto out;
+
+	info->mpll_ctrl  = 0x80000000 | (log2P << 16);
+	info->mpll_ctrl |= min2(pll.log2p_bias + log2P, pll.max_log2p) << 20;
+	if (N2 == M2) {
+		info->mpll_ctrl |= 0x00000100;
+		info->mpll_coef  = (N1 << 8) | M1;
+	} else {
+		info->mpll_ctrl |= 0x40000000;
+		info->mpll_coef  = (N2 << 24) | (M2 << 16) | (N1 << 8) | M1;
+	}
+
+out:
+	if (ret < 0) {
+		kfree(info);
+		info = ERR_PTR(ret);
+	}
+	return info;
+}
+
+static bool
+nv40_pm_gr_idle(void *data)
+{
+	struct drm_device *dev = data;
+
+	if ((nv_rd32(dev, 0x400760) & 0x000000f0) >> 4 !=
+	    (nv_rd32(dev, 0x400760) & 0x0000000f))
+		return false;
+
+	if (nv_rd32(dev, 0x400700))
+		return false;
+
+	return true;
+}
+
+void
+nv40_pm_clocks_set(struct drm_device *dev, void *pre_state)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv40_pm_state *info = pre_state;
+	unsigned long flags;
+	struct bit_entry M;
+	u32 crtc_mask = 0;
+	u8 sr1[2];
+	int i;
+
+	/* determine which CRTCs are active, fetch VGA_SR1 for each */
+	for (i = 0; i < 2; i++) {
+		u32 vbl = nv_rd32(dev, 0x600808 + (i * 0x2000));
+		u32 cnt = 0;
+		do {
+			if (vbl != nv_rd32(dev, 0x600808 + (i * 0x2000))) {
+				nv_wr08(dev, 0x0c03c4 + (i * 0x2000), 0x01);
+				sr1[i] = nv_rd08(dev, 0x0c03c5 + (i * 0x2000));
+				if (!(sr1[i] & 0x20))
+					crtc_mask |= (1 << i);
+				break;
+			}
+			udelay(1);
+		} while (cnt++ < 32);
+	}
+
+	/* halt and idle engines */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_mask(dev, 0x002500, 0x00000001, 0x00000000);
+	if (!nv_wait(dev, 0x002500, 0x00000010, 0x00000000))
+		goto resume;
+	nv_mask(dev, 0x003220, 0x00000001, 0x00000000);
+	if (!nv_wait(dev, 0x003220, 0x00000010, 0x00000000))
+		goto resume;
+	nv_mask(dev, 0x003200, 0x00000001, 0x00000000);
+	nv04_fifo_cache_pull(dev, false);
+
+	if (!nv_wait_cb(dev, nv40_pm_gr_idle, dev))
+		goto resume;
+
+	/* set engine clocks */
+	nv_mask(dev, 0x00c040, 0x00000333, 0x00000000);
+	nv_wr32(dev, 0x004004, info->npll_coef);
+	nv_mask(dev, 0x004000, 0xc0070100, info->npll_ctrl);
+	nv_mask(dev, 0x004008, 0xc007ffff, info->spll);
+	mdelay(5);
+	nv_mask(dev, 0x00c040, 0x00000333, info->ctrl);
+
+	/* wait for vblank start on active crtcs, disable memory access */
+	for (i = 0; i < 2; i++) {
+		if (!(crtc_mask & (1 << i)))
+			continue;
+		nv_wait(dev, 0x600808 + (i * 0x2000), 0x00010000, 0x00000000);
+		nv_wait(dev, 0x600808 + (i * 0x2000), 0x00010000, 0x00010000);
+		nv_wr08(dev, 0x0c03c4 + (i * 0x2000), 0x01);
+		nv_wr08(dev, 0x0c03c5 + (i * 0x2000), sr1[i] | 0x20);
+	}
+
+	/* prepare ram for reclocking */
+	nv_wr32(dev, 0x1002d4, 0x00000001); /* precharge */
+	nv_wr32(dev, 0x1002d0, 0x00000001); /* refresh */
+	nv_wr32(dev, 0x1002d0, 0x00000001); /* refresh */
+	nv_mask(dev, 0x100210, 0x80000000, 0x00000000); /* no auto refresh */
+	nv_wr32(dev, 0x1002dc, 0x00000001); /* enable self-refresh */
+
+	/* change the PLL of each memory partition */
+	nv_mask(dev, 0x00c040, 0x0000c000, 0x00000000);
+	switch (dev_priv->chipset) {
+	case 0x40:
+	case 0x45:
+	case 0x41:
+	case 0x42:
+	case 0x47:
+		nv_mask(dev, 0x004044, 0xc0771100, info->mpll_ctrl);
+		nv_mask(dev, 0x00402c, 0xc0771100, info->mpll_ctrl);
+		nv_wr32(dev, 0x004048, info->mpll_coef);
+		nv_wr32(dev, 0x004030, info->mpll_coef);
+	case 0x43:
+	case 0x49:
+	case 0x4b:
+		nv_mask(dev, 0x004038, 0xc0771100, info->mpll_ctrl);
+		nv_wr32(dev, 0x00403c, info->mpll_coef);
+	default:
+		nv_mask(dev, 0x004020, 0xc0771100, info->mpll_ctrl);
+		nv_wr32(dev, 0x004024, info->mpll_coef);
+		break;
+	}
+	udelay(100);
+	nv_mask(dev, 0x00c040, 0x0000c000, 0x0000c000);
+
+	/* re-enable normal operation of memory controller */
+	nv_wr32(dev, 0x1002dc, 0x00000000);
+	nv_mask(dev, 0x100210, 0x80000000, 0x80000000);
+	udelay(100);
+
+	/* execute memory reset script from vbios */
+	if (!bit_table(dev, 'M', &M))
+		nouveau_bios_init_exec(dev, ROM16(M.data[0]));
+
+	/* make sure we're in vblank (hopefully the same one as before), and
+	 * then re-enable crtc memory access
+	 */
+	for (i = 0; i < 2; i++) {
+		if (!(crtc_mask & (1 << i)))
+			continue;
+		nv_wait(dev, 0x600808 + (i * 0x2000), 0x00010000, 0x00010000);
+		nv_wr08(dev, 0x0c03c4 + (i * 0x2000), 0x01);
+		nv_wr08(dev, 0x0c03c5 + (i * 0x2000), sr1[i]);
+	}
+
+	/* resume engines */
+resume:
+	nv_wr32(dev, 0x003250, 0x00000001);
+	nv_mask(dev, 0x003220, 0x00000001, 0x00000001);
+	nv_wr32(dev, 0x003200, 0x00000001);
+	nv_wr32(dev, 0x002500, 0x00000001);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
+	kfree(info);
+}
diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
index 5d989073ba6e..882080e0b4f5 100644
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
@@ -329,8 +329,6 @@ nv50_crtc_destroy(struct drm_crtc *crtc)
 
 	drm_crtc_cleanup(&nv_crtc->base);
 
-	nv50_cursor_fini(nv_crtc);
-
 	nouveau_bo_unmap(nv_crtc->lut.nvbo);
 	nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
 	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
diff --git a/drivers/gpu/drm/nouveau/nv50_cursor.c b/drivers/gpu/drm/nouveau/nv50_cursor.c
index 9752c35bb84b..adfc9b607a50 100644
--- a/drivers/gpu/drm/nouveau/nv50_cursor.c
+++ b/drivers/gpu/drm/nouveau/nv50_cursor.c
@@ -137,21 +137,3 @@ nv50_cursor_init(struct nouveau_crtc *nv_crtc)
 	nv_crtc->cursor.show = nv50_cursor_show;
 	return 0;
 }
-
-void
-nv50_cursor_fini(struct nouveau_crtc *nv_crtc)
-{
-	struct drm_device *dev = nv_crtc->base.dev;
-	int idx = nv_crtc->index;
-
-	NV_DEBUG_KMS(dev, "\n");
-
-	nv_wr32(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(idx), 0);
-	if (!nv_wait(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(idx),
-		     NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_STATUS, 0)) {
-		NV_ERROR(dev, "timeout: CURSOR_CTRL2_STATUS == 0\n");
-		NV_ERROR(dev, "CURSOR_CTRL2 = 0x%08x\n",
-			 nv_rd32(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(idx)));
-	}
-}
-
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index db1a5f4b711d..d23ca00e7d62 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -247,6 +247,16 @@ static int nv50_display_disable(struct drm_device *dev)
 		}
 	}
 
+	for (i = 0; i < 2; i++) {
+		nv_wr32(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i), 0);
+		if (!nv_wait(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i),
+			     NV50_PDISPLAY_CURSOR_CURSOR_CTRL2_STATUS, 0)) {
+			NV_ERROR(dev, "timeout: CURSOR_CTRL2_STATUS == 0\n");
+			NV_ERROR(dev, "CURSOR_CTRL2 = 0x%08x\n",
+				 nv_rd32(dev, NV50_PDISPLAY_CURSOR_CURSOR_CTRL2(i)));
+		}
+	}
+
 	nv50_evo_fini(dev);
 
 	for (i = 0; i < 3; i++) {
@@ -286,23 +296,6 @@ int nv50_display_create(struct drm_device *dev)
 		return -ENOMEM;
 	dev_priv->engine.display.priv = priv;
 
-	/* init basic kernel modesetting */
-	drm_mode_config_init(dev);
-
-	/* Initialise some optional connector properties. */
-	drm_mode_create_scaling_mode_property(dev);
-	drm_mode_create_dithering_property(dev);
-
-	dev->mode_config.min_width = 0;
-	dev->mode_config.min_height = 0;
-
-	dev->mode_config.funcs = (void *)&nouveau_mode_config_funcs;
-
-	dev->mode_config.max_width = 8192;
-	dev->mode_config.max_height = 8192;
-
-	dev->mode_config.fb_base = dev_priv->fb_phys;
-
 	/* Create CRTC objects */
 	for (i = 0; i < 2; i++)
 		nv50_crtc_create(dev, i);
@@ -364,8 +357,6 @@ nv50_display_destroy(struct drm_device *dev)
 
 	NV_DEBUG_KMS(dev, "\n");
 
-	drm_mode_config_cleanup(dev);
-
 	nv50_display_disable(dev);
 	nouveau_irq_unregister(dev, 26);
 	kfree(disp);
@@ -698,7 +689,7 @@ nv50_display_unk10_handler(struct drm_device *dev)
 		struct dcb_entry *dcb = &dev_priv->vbios.dcb.entry[i];
 
 		if (dcb->type == type && (dcb->or & (1 << or))) {
-			nouveau_bios_run_display_table(dev, dcb, 0, -1);
+			nouveau_bios_run_display_table(dev, 0, -1, dcb, -1);
 			disp->irq.dcb = dcb;
 			goto ack;
 		}
@@ -711,37 +702,6 @@ ack:
 }
 
 static void
-nv50_display_unk20_dp_hack(struct drm_device *dev, struct dcb_entry *dcb)
-{
-	int or = ffs(dcb->or) - 1, link = !(dcb->dpconf.sor.link & 1);
-	struct drm_encoder *encoder;
-	uint32_t tmp, unk0 = 0, unk1 = 0;
-
-	if (dcb->type != OUTPUT_DP)
-		return;
-
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
-		struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-
-		if (nv_encoder->dcb == dcb) {
-			unk0 = nv_encoder->dp.unk0;
-			unk1 = nv_encoder->dp.unk1;
-			break;
-		}
-	}
-
-	if (unk0 || unk1) {
-		tmp  = nv_rd32(dev, NV50_SOR_DP_CTRL(or, link));
-		tmp &= 0xfffffe03;
-		nv_wr32(dev, NV50_SOR_DP_CTRL(or, link), tmp | unk0);
-
-		tmp  = nv_rd32(dev, NV50_SOR_DP_UNK128(or, link));
-		tmp &= 0xfef080c0;
-		nv_wr32(dev, NV50_SOR_DP_UNK128(or, link), tmp | unk1);
-	}
-}
-
-static void
 nv50_display_unk20_handler(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -753,7 +713,7 @@ nv50_display_unk20_handler(struct drm_device *dev)
 	NV_DEBUG_KMS(dev, "0x610030: 0x%08x\n", unk30);
 	dcb = disp->irq.dcb;
 	if (dcb) {
-		nouveau_bios_run_display_table(dev, dcb, 0, -2);
+		nouveau_bios_run_display_table(dev, 0, -2, dcb, -1);
 		disp->irq.dcb = NULL;
 	}
 
@@ -837,9 +797,15 @@ nv50_display_unk20_handler(struct drm_device *dev)
 	}
 
 	script = nv50_display_script_select(dev, dcb, mc, pclk);
-	nouveau_bios_run_display_table(dev, dcb, script, pclk);
+	nouveau_bios_run_display_table(dev, script, pclk, dcb, -1);
 
-	nv50_display_unk20_dp_hack(dev, dcb);
+	if (type == OUTPUT_DP) {
+		int link = !(dcb->dpconf.sor.link & 1);
+		if ((mc & 0x000f0000) == 0x00020000)
+			nouveau_dp_tu_update(dev, or, link, pclk, 18);
+		else
+			nouveau_dp_tu_update(dev, or, link, pclk, 24);
+	}
 
 	if (dcb->type != OUTPUT_ANALOG) {
 		tmp = nv_rd32(dev, NV50_PDISPLAY_SOR_CLK_CTRL2(or));
@@ -904,7 +870,7 @@ nv50_display_unk40_handler(struct drm_device *dev)
 	if (!dcb)
 		goto ack;
 
-	nouveau_bios_run_display_table(dev, dcb, script, -pclk);
+	nouveau_bios_run_display_table(dev, script, -pclk, dcb, -1);
 	nv50_display_unk40_dp_set_tmds(dev, dcb);
 
 ack:
diff --git a/drivers/gpu/drm/nouveau/nv50_gpio.c b/drivers/gpu/drm/nouveau/nv50_gpio.c
index d4f4206dad7e..793a5ccca121 100644
--- a/drivers/gpu/drm/nouveau/nv50_gpio.c
+++ b/drivers/gpu/drm/nouveau/nv50_gpio.c
@@ -98,6 +98,37 @@ nv50_gpio_set(struct drm_device *dev, enum dcb_gpio_tag tag, int state)
 }
 
 int
+nvd0_gpio_get(struct drm_device *dev, enum dcb_gpio_tag tag)
+{
+	struct dcb_gpio_entry *gpio;
+	u32 v;
+
+	gpio = nouveau_bios_gpio_entry(dev, tag);
+	if (!gpio)
+		return -ENOENT;
+
+	v  = nv_rd32(dev, 0x00d610 + (gpio->line * 4));
+	v &= 0x00004000;
+	return (!!v == (gpio->state[1] & 1));
+}
+
+int
+nvd0_gpio_set(struct drm_device *dev, enum dcb_gpio_tag tag, int state)
+{
+	struct dcb_gpio_entry *gpio;
+	u32 v;
+
+	gpio = nouveau_bios_gpio_entry(dev, tag);
+	if (!gpio)
+		return -ENOENT;
+
+	v = gpio->state[state] ^ 2;
+
+	nv_mask(dev, 0x00d610 + (gpio->line * 4), 0x00003000, v << 12);
+	return 0;
+}
+
+int
 nv50_gpio_irq_register(struct drm_device *dev, enum dcb_gpio_tag tag,
 		       void (*handler)(void *, int), void *data)
 {
diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c
index d43c46caa76e..8c979b31ff61 100644
--- a/drivers/gpu/drm/nouveau/nv50_graph.c
+++ b/drivers/gpu/drm/nouveau/nv50_graph.c
@@ -120,70 +120,62 @@ nv50_graph_unload_context(struct drm_device *dev)
 	return 0;
 }
 
-static void
-nv50_graph_init_reset(struct drm_device *dev)
-{
-	uint32_t pmc_e = NV_PMC_ENABLE_PGRAPH | (1 << 21);
-	NV_DEBUG(dev, "\n");
-
-	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) & ~pmc_e);
-	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) |  pmc_e);
-}
-
-static void
-nv50_graph_init_intr(struct drm_device *dev)
-{
-	NV_DEBUG(dev, "\n");
-
-	nv_wr32(dev, NV03_PGRAPH_INTR, 0xffffffff);
-	nv_wr32(dev, 0x400138, 0xffffffff);
-	nv_wr32(dev, NV40_PGRAPH_INTR_EN, 0xffffffff);
-}
-
-static void
-nv50_graph_init_regs__nv(struct drm_device *dev)
+static int
+nv50_graph_init(struct drm_device *dev, int engine)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t units = nv_rd32(dev, 0x1540);
+	struct nv50_graph_engine *pgraph = nv_engine(dev, engine);
+	u32 units = nv_rd32(dev, 0x001540);
 	int i;
 
 	NV_DEBUG(dev, "\n");
 
+	/* master reset */
+	nv_mask(dev, 0x000200, 0x00200100, 0x00000000);
+	nv_mask(dev, 0x000200, 0x00200100, 0x00200100);
+	nv_wr32(dev, 0x40008c, 0x00000004); /* HW_CTX_SWITCH_ENABLED */
+
+	/* reset/enable traps and interrupts */
 	nv_wr32(dev, 0x400804, 0xc0000000);
 	nv_wr32(dev, 0x406800, 0xc0000000);
 	nv_wr32(dev, 0x400c04, 0xc0000000);
 	nv_wr32(dev, 0x401800, 0xc0000000);
 	nv_wr32(dev, 0x405018, 0xc0000000);
 	nv_wr32(dev, 0x402000, 0xc0000000);
-
 	for (i = 0; i < 16; i++) {
-		if (units & 1 << i) {
-			if (dev_priv->chipset < 0xa0) {
-				nv_wr32(dev, 0x408900 + (i << 12), 0xc0000000);
-				nv_wr32(dev, 0x408e08 + (i << 12), 0xc0000000);
-				nv_wr32(dev, 0x408314 + (i << 12), 0xc0000000);
-			} else {
-				nv_wr32(dev, 0x408600 + (i << 11), 0xc0000000);
-				nv_wr32(dev, 0x408708 + (i << 11), 0xc0000000);
-				nv_wr32(dev, 0x40831c + (i << 11), 0xc0000000);
-			}
+		if (!(units & (1 << i)))
+			continue;
+
+		if (dev_priv->chipset < 0xa0) {
+			nv_wr32(dev, 0x408900 + (i << 12), 0xc0000000);
+			nv_wr32(dev, 0x408e08 + (i << 12), 0xc0000000);
+			nv_wr32(dev, 0x408314 + (i << 12), 0xc0000000);
+		} else {
+			nv_wr32(dev, 0x408600 + (i << 11), 0xc0000000);
+			nv_wr32(dev, 0x408708 + (i << 11), 0xc0000000);
+			nv_wr32(dev, 0x40831c + (i << 11), 0xc0000000);
 		}
 	}
 
 	nv_wr32(dev, 0x400108, 0xffffffff);
-
-	nv_wr32(dev, 0x400824, 0x00004000);
+	nv_wr32(dev, 0x400138, 0xffffffff);
+	nv_wr32(dev, 0x400100, 0xffffffff);
+	nv_wr32(dev, 0x40013c, 0xffffffff);
 	nv_wr32(dev, 0x400500, 0x00010001);
-}
-
-static void
-nv50_graph_init_zcull(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int i;
-
-	NV_DEBUG(dev, "\n");
 
+	/* upload context program, initialise ctxctl defaults */
+	nv_wr32(dev, 0x400324, 0x00000000);
+	for (i = 0; i < pgraph->ctxprog_size; i++)
+		nv_wr32(dev, 0x400328, pgraph->ctxprog[i]);
+	nv_wr32(dev, 0x400824, 0x00000000);
+	nv_wr32(dev, 0x400828, 0x00000000);
+	nv_wr32(dev, 0x40082c, 0x00000000);
+	nv_wr32(dev, 0x400830, 0x00000000);
+	nv_wr32(dev, 0x400724, 0x00000000);
+	nv_wr32(dev, 0x40032c, 0x00000000);
+	nv_wr32(dev, 0x400320, 4);	/* CTXCTL_CMD = NEWCTXDMA */
+
+	/* some unknown zcull magic */
 	switch (dev_priv->chipset & 0xf0) {
 	case 0x50:
 	case 0x80:
@@ -212,43 +204,7 @@ nv50_graph_init_zcull(struct drm_device *dev)
 		nv_wr32(dev, 0x402c28 + (i * 8), 0x00000000);
 		nv_wr32(dev, 0x402c2c + (i * 8), 0x00000000);
 	}
-}
-
-static int
-nv50_graph_init_ctxctl(struct drm_device *dev)
-{
-	struct nv50_graph_engine *pgraph = nv_engine(dev, NVOBJ_ENGINE_GR);
-	int i;
-
-	NV_DEBUG(dev, "\n");
-
-	nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0);
-	for (i = 0; i < pgraph->ctxprog_size; i++)
-		nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA, pgraph->ctxprog[i]);
-
-	nv_wr32(dev, 0x40008c, 0x00000004); /* HW_CTX_SWITCH_ENABLED */
-	nv_wr32(dev, 0x400320, 4);
-	nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR, 0);
-	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_POINTER, 0);
-	return 0;
-}
-
-static int
-nv50_graph_init(struct drm_device *dev, int engine)
-{
-	int ret;
-
-	NV_DEBUG(dev, "\n");
-
-	nv50_graph_init_reset(dev);
-	nv50_graph_init_regs__nv(dev);
-	nv50_graph_init_zcull(dev);
-
-	ret = nv50_graph_init_ctxctl(dev);
-	if (ret)
-		return ret;
 
-	nv50_graph_init_intr(dev);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nv50_grctx.c b/drivers/gpu/drm/nouveau/nv50_grctx.c
index de9abff12b90..d05c2c3b2444 100644
--- a/drivers/gpu/drm/nouveau/nv50_grctx.c
+++ b/drivers/gpu/drm/nouveau/nv50_grctx.c
@@ -40,6 +40,12 @@
 #define CP_FLAG_UNK0B                 ((0 * 32) + 0xb)
 #define CP_FLAG_UNK0B_CLEAR           0
 #define CP_FLAG_UNK0B_SET             1
+#define CP_FLAG_XFER_SWITCH           ((0 * 32) + 0xe)
+#define CP_FLAG_XFER_SWITCH_DISABLE   0
+#define CP_FLAG_XFER_SWITCH_ENABLE    1
+#define CP_FLAG_STATE                 ((0 * 32) + 0x1c)
+#define CP_FLAG_STATE_STOPPED         0
+#define CP_FLAG_STATE_RUNNING         1
 #define CP_FLAG_UNK1D                 ((0 * 32) + 0x1d)
 #define CP_FLAG_UNK1D_CLEAR           0
 #define CP_FLAG_UNK1D_SET             1
@@ -194,6 +200,9 @@ nv50_grctx_init(struct nouveau_grctx *ctx)
 				   "the devs.\n");
 		return -ENOSYS;
 	}
+
+	cp_set (ctx, STATE, RUNNING);
+	cp_set (ctx, XFER_SWITCH, ENABLE);
 	/* decide whether we're loading/unloading the context */
 	cp_bra (ctx, AUTO_SAVE, PENDING, cp_setup_save);
 	cp_bra (ctx, USER_SAVE, PENDING, cp_setup_save);
@@ -260,6 +269,8 @@ nv50_grctx_init(struct nouveau_grctx *ctx)
 	cp_name(ctx, cp_exit);
 	cp_set (ctx, USER_SAVE, NOT_PENDING);
 	cp_set (ctx, USER_LOAD, NOT_PENDING);
+	cp_set (ctx, XFER_SWITCH, DISABLE);
+	cp_set (ctx, STATE, STOPPED);
 	cp_out (ctx, CP_END);
 	ctx->ctxvals_pos += 0x400; /* padding... no idea why you need it */
 
diff --git a/drivers/gpu/drm/nouveau/nv50_pm.c b/drivers/gpu/drm/nouveau/nv50_pm.c
index 8a2810011bda..3d5a86b98282 100644
--- a/drivers/gpu/drm/nouveau/nv50_pm.c
+++ b/drivers/gpu/drm/nouveau/nv50_pm.c
@@ -115,15 +115,15 @@ nv50_pm_clock_set(struct drm_device *dev, void *pre_state)
 	    BIT_M.version == 1 && BIT_M.length >= 0x0b) {
 		script = ROM16(BIT_M.data[0x05]);
 		if (script)
-			nouveau_bios_run_init_table(dev, script, NULL);
+			nouveau_bios_run_init_table(dev, script, NULL, -1);
 		script = ROM16(BIT_M.data[0x07]);
 		if (script)
-			nouveau_bios_run_init_table(dev, script, NULL);
+			nouveau_bios_run_init_table(dev, script, NULL, -1);
 		script = ROM16(BIT_M.data[0x09]);
 		if (script)
-			nouveau_bios_run_init_table(dev, script, NULL);
+			nouveau_bios_run_init_table(dev, script, NULL, -1);
 
-		nouveau_bios_run_init_table(dev, perflvl->memscript, NULL);
+		nouveau_bios_run_init_table(dev, perflvl->memscript, NULL, -1);
 	}
 
 	if (state->type == PLL_MEMORY) {
diff --git a/drivers/gpu/drm/nouveau/nv50_sor.c b/drivers/gpu/drm/nouveau/nv50_sor.c
index ffe8b483b7b0..2633aa8554eb 100644
--- a/drivers/gpu/drm/nouveau/nv50_sor.c
+++ b/drivers/gpu/drm/nouveau/nv50_sor.c
@@ -124,7 +124,7 @@ nv50_sor_dpms(struct drm_encoder *encoder, int mode)
 		if (mode == DRM_MODE_DPMS_ON) {
 			u8 status = DP_SET_POWER_D0;
 			nouveau_dp_auxch(auxch, 8, DP_SET_POWER, &status, 1);
-			nouveau_dp_link_train(encoder);
+			nouveau_dp_link_train(encoder, nv_encoder->dp.datarate);
 		} else {
 			u8 status = DP_SET_POWER_D3;
 			nouveau_dp_auxch(auxch, 8, DP_SET_POWER, &status, 1);
@@ -187,14 +187,13 @@ nv50_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct drm_device *dev = encoder->dev;
 	struct nouveau_crtc *crtc = nouveau_crtc(encoder->crtc);
+	struct nouveau_connector *nv_connector;
 	uint32_t mode_ctl = 0;
 	int ret;
 
 	NV_DEBUG_KMS(dev, "or %d type %d -> crtc %d\n",
 		     nv_encoder->or, nv_encoder->dcb->type, crtc->index);
 
-	nv50_sor_dpms(encoder, DRM_MODE_DPMS_ON);
-
 	switch (nv_encoder->dcb->type) {
 	case OUTPUT_TMDS:
 		if (nv_encoder->dcb->sorconf.link & 1) {
@@ -206,7 +205,15 @@ nv50_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 			mode_ctl = 0x0200;
 		break;
 	case OUTPUT_DP:
-		mode_ctl |= (nv_encoder->dp.mc_unknown << 16);
+		nv_connector = nouveau_encoder_connector_get(nv_encoder);
+		if (nv_connector && nv_connector->base.display_info.bpc == 6) {
+			nv_encoder->dp.datarate = crtc->mode->clock * 18 / 8;
+			mode_ctl |= 0x00020000;
+		} else {
+			nv_encoder->dp.datarate = crtc->mode->clock * 24 / 8;
+			mode_ctl |= 0x00050000;
+		}
+
 		if (nv_encoder->dcb->sorconf.link & 1)
 			mode_ctl |= 0x00000800;
 		else
@@ -227,6 +234,8 @@ nv50_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 	if (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC)
 		mode_ctl |= NV50_EVO_SOR_MODE_CTRL_NVSYNC;
 
+	nv50_sor_dpms(encoder, DRM_MODE_DPMS_ON);
+
 	ret = RING_SPACE(evo, 2);
 	if (ret) {
 		NV_ERROR(dev, "no space while connecting SOR\n");
@@ -313,31 +322,6 @@ nv50_sor_create(struct drm_connector *connector, struct dcb_entry *entry)
 	encoder->possible_crtcs = entry->heads;
 	encoder->possible_clones = 0;
 
-	if (nv_encoder->dcb->type == OUTPUT_DP) {
-		int or = nv_encoder->or, link = !(entry->dpconf.sor.link & 1);
-		uint32_t tmp;
-
-		tmp = nv_rd32(dev, 0x61c700 + (or * 0x800));
-		if (!tmp)
-			tmp = nv_rd32(dev, 0x610798 + (or * 8));
-
-		switch ((tmp & 0x00000f00) >> 8) {
-		case 8:
-		case 9:
-			nv_encoder->dp.mc_unknown = (tmp & 0x000f0000) >> 16;
-			tmp = nv_rd32(dev, NV50_SOR_DP_CTRL(or, link));
-			nv_encoder->dp.unk0 = tmp & 0x000001fc;
-			tmp = nv_rd32(dev, NV50_SOR_DP_UNK128(or, link));
-			nv_encoder->dp.unk1 = tmp & 0x010f7f3f;
-			break;
-		default:
-			break;
-		}
-
-		if (!nv_encoder->dp.mc_unknown)
-			nv_encoder->dp.mc_unknown = 5;
-	}
-
 	drm_mode_connector_attach_encoder(connector, encoder);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv50_vram.c b/drivers/gpu/drm/nouveau/nv50_vram.c
index af32daecd1ed..9da23838e63e 100644
--- a/drivers/gpu/drm/nouveau/nv50_vram.c
+++ b/drivers/gpu/drm/nouveau/nv50_vram.c
@@ -51,7 +51,7 @@ void
 nv50_vram_del(struct drm_device *dev, struct nouveau_mem **pmem)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_mm *mm = dev_priv->engine.vram.mm;
+	struct nouveau_mm *mm = &dev_priv->engine.vram.mm;
 	struct nouveau_mm_node *this;
 	struct nouveau_mem *mem;
 
@@ -82,7 +82,7 @@ nv50_vram_new(struct drm_device *dev, u64 size, u32 align, u32 size_nc,
 	      u32 memtype, struct nouveau_mem **pmem)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_mm *mm = dev_priv->engine.vram.mm;
+	struct nouveau_mm *mm = &dev_priv->engine.vram.mm;
 	struct nouveau_mm_node *r;
 	struct nouveau_mem *mem;
 	int comp = (memtype & 0x300) >> 8;
diff --git a/drivers/gpu/drm/nouveau/nva3_pm.c b/drivers/gpu/drm/nouveau/nva3_pm.c
index e4b2b9e934b2..618c144b7a30 100644
--- a/drivers/gpu/drm/nouveau/nva3_pm.c
+++ b/drivers/gpu/drm/nouveau/nva3_pm.c
@@ -27,178 +27,316 @@
 #include "nouveau_bios.h"
 #include "nouveau_pm.h"
 
-/* This is actually a lot more complex than it appears here, but hopefully
- * this should be able to deal with what the VBIOS leaves for us..
- *
- * If not, well, I'll jump off that bridge when I come to it.
- */
+static u32 read_clk(struct drm_device *, int, bool);
+static u32 read_pll(struct drm_device *, int, u32);
 
-struct nva3_pm_state {
-	enum pll_types type;
-	u32 src0;
-	u32 src1;
-	u32 ctrl;
-	u32 coef;
-	u32 old_pnm;
-	u32 new_pnm;
-	u32 new_div;
-};
+static u32
+read_vco(struct drm_device *dev, int clk)
+{
+	u32 sctl = nv_rd32(dev, 0x4120 + (clk * 4));
+	if ((sctl & 0x00000030) != 0x00000030)
+		return read_pll(dev, 0x41, 0x00e820);
+	return read_pll(dev, 0x42, 0x00e8a0);
+}
 
-static int
-nva3_pm_pll_offset(u32 id)
+static u32
+read_clk(struct drm_device *dev, int clk, bool ignore_en)
 {
-	static const u32 pll_map[] = {
-		0x00, PLL_CORE,
-		0x01, PLL_SHADER,
-		0x02, PLL_MEMORY,
-		0x00, 0x00
-	};
-	const u32 *map = pll_map;
-
-	while (map[1]) {
-		if (id == map[1])
-			return map[0];
-		map += 2;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	u32 sctl, sdiv, sclk;
+
+	/* refclk for the 0xe8xx plls is a fixed frequency */
+	if (clk >= 0x40) {
+		if (dev_priv->chipset == 0xaf) {
+			/* no joke.. seriously.. sigh.. */
+			return nv_rd32(dev, 0x00471c) * 1000;
+		}
+
+		return dev_priv->crystal;
 	}
 
-	return -ENOENT;
+	sctl = nv_rd32(dev, 0x4120 + (clk * 4));
+	if (!ignore_en && !(sctl & 0x00000100))
+		return 0;
+
+	switch (sctl & 0x00003000) {
+	case 0x00000000:
+		return dev_priv->crystal;
+	case 0x00002000:
+		if (sctl & 0x00000040)
+			return 108000;
+		return 100000;
+	case 0x00003000:
+		sclk = read_vco(dev, clk);
+		sdiv = ((sctl & 0x003f0000) >> 16) + 2;
+		return (sclk * 2) / sdiv;
+	default:
+		return 0;
+	}
 }
 
-int
-nva3_pm_clock_get(struct drm_device *dev, u32 id)
+static u32
+read_pll(struct drm_device *dev, int clk, u32 pll)
+{
+	u32 ctrl = nv_rd32(dev, pll + 0);
+	u32 sclk = 0, P = 1, N = 1, M = 1;
+
+	if (!(ctrl & 0x00000008)) {
+		if (ctrl & 0x00000001) {
+			u32 coef = nv_rd32(dev, pll + 4);
+			M = (coef & 0x000000ff) >> 0;
+			N = (coef & 0x0000ff00) >> 8;
+			P = (coef & 0x003f0000) >> 16;
+
+			/* no post-divider on these.. */
+			if ((pll & 0x00ff00) == 0x00e800)
+				P = 1;
+
+			sclk = read_clk(dev, 0x00 + clk, false);
+		}
+	} else {
+		sclk = read_clk(dev, 0x10 + clk, false);
+	}
+
+	return sclk * N / (M * P);
+}
+
+struct creg {
+	u32 clk;
+	u32 pll;
+};
+
+static int
+calc_clk(struct drm_device *dev, int clk, u32 pll, u32 khz, struct creg *reg)
 {
-	u32 src0, src1, ctrl, coef;
-	struct pll_lims pll;
-	int ret, off;
-	int P, N, M;
+	struct pll_lims limits;
+	u32 oclk, sclk, sdiv;
+	int P, N, M, diff;
+	int ret;
+
+	reg->pll = 0;
+	reg->clk = 0;
+	if (!khz) {
+		NV_DEBUG(dev, "no clock for 0x%04x/0x%02x\n", pll, clk);
+		return 0;
+	}
 
-	ret = get_pll_limits(dev, id, &pll);
+	switch (khz) {
+	case 27000:
+		reg->clk = 0x00000100;
+		return khz;
+	case 100000:
+		reg->clk = 0x00002100;
+		return khz;
+	case 108000:
+		reg->clk = 0x00002140;
+		return khz;
+	default:
+		sclk = read_vco(dev, clk);
+		sdiv = min((sclk * 2) / (khz - 2999), (u32)65);
+		/* if the clock has a PLL attached, and we can get a within
+		 * [-2, 3) MHz of a divider, we'll disable the PLL and use
+		 * the divider instead.
+		 *
+		 * divider can go as low as 2, limited here because NVIDIA
+		 * and the VBIOS on my NVA8 seem to prefer using the PLL
+		 * for 810MHz - is there a good reason?
+		 */
+		if (sdiv > 4) {
+			oclk = (sclk * 2) / sdiv;
+			diff = khz - oclk;
+			if (!pll || (diff >= -2000 && diff < 3000)) {
+				reg->clk = (((sdiv - 2) << 16) | 0x00003100);
+				return oclk;
+			}
+		}
+
+		if (!pll) {
+			NV_ERROR(dev, "bad freq %02x: %d %d\n", clk, khz, sclk);
+			return -ERANGE;
+		}
+
+		break;
+	}
+
+	ret = get_pll_limits(dev, pll, &limits);
 	if (ret)
 		return ret;
 
-	off = nva3_pm_pll_offset(id);
-	if (off < 0)
-		return off;
+	limits.refclk = read_clk(dev, clk - 0x10, true);
+	if (!limits.refclk)
+		return -EINVAL;
+
+	ret = nva3_calc_pll(dev, &limits, khz, &N, NULL, &M, &P);
+	if (ret >= 0) {
+		reg->clk = nv_rd32(dev, 0x4120 + (clk * 4));
+		reg->pll = (P << 16) | (N << 8) | M;
+	}
+	return ret;
+}
+
+static void
+prog_pll(struct drm_device *dev, int clk, u32 pll, struct creg *reg)
+{
+	const u32 src0 = 0x004120 + (clk * 4);
+	const u32 src1 = 0x004160 + (clk * 4);
+	const u32 ctrl = pll + 0;
+	const u32 coef = pll + 4;
+	u32 cntl;
+
+	if (!reg->clk && !reg->pll) {
+		NV_DEBUG(dev, "no clock for %02x\n", clk);
+		return;
+	}
 
-	src0 = nv_rd32(dev, 0x4120 + (off * 4));
-	src1 = nv_rd32(dev, 0x4160 + (off * 4));
-	ctrl = nv_rd32(dev, pll.reg + 0);
-	coef = nv_rd32(dev, pll.reg + 4);
-	NV_DEBUG(dev, "PLL %02x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
-		      id, src0, src1, ctrl, coef);
+	cntl = nv_rd32(dev, ctrl) & 0xfffffff2;
+	if (reg->pll) {
+		nv_mask(dev, src0, 0x00000101, 0x00000101);
+		nv_wr32(dev, coef, reg->pll);
+		nv_wr32(dev, ctrl, cntl | 0x00000015);
+		nv_mask(dev, src1, 0x00000100, 0x00000000);
+		nv_mask(dev, src1, 0x00000001, 0x00000000);
+	} else {
+		nv_mask(dev, src1, 0x003f3141, 0x00000101 | reg->clk);
+		nv_wr32(dev, ctrl, cntl | 0x0000001d);
+		nv_mask(dev, ctrl, 0x00000001, 0x00000000);
+		nv_mask(dev, src0, 0x00000100, 0x00000000);
+		nv_mask(dev, src0, 0x00000001, 0x00000000);
+	}
+}
 
-	if (ctrl & 0x00000008) {
-		u32 div = ((src1 & 0x003c0000) >> 18) + 1;
-		return (pll.refclk * 2) / div;
+static void
+prog_clk(struct drm_device *dev, int clk, struct creg *reg)
+{
+	if (!reg->clk) {
+		NV_DEBUG(dev, "no clock for %02x\n", clk);
+		return;
 	}
 
-	P = (coef & 0x003f0000) >> 16;
-	N = (coef & 0x0000ff00) >> 8;
-	M = (coef & 0x000000ff);
-	return pll.refclk * N / M / P;
+	nv_mask(dev, 0x004120 + (clk * 4), 0x003f3141, 0x00000101 | reg->clk);
+}
+
+int
+nva3_pm_clocks_get(struct drm_device *dev, struct nouveau_pm_level *perflvl)
+{
+	perflvl->core   = read_pll(dev, 0x00, 0x4200);
+	perflvl->shader = read_pll(dev, 0x01, 0x4220);
+	perflvl->memory = read_pll(dev, 0x02, 0x4000);
+	perflvl->unka0  = read_clk(dev, 0x20, false);
+	perflvl->vdec   = read_clk(dev, 0x21, false);
+	perflvl->daemon = read_clk(dev, 0x25, false);
+	perflvl->copy   = perflvl->core;
+	return 0;
 }
 
+struct nva3_pm_state {
+	struct creg nclk;
+	struct creg sclk;
+	struct creg mclk;
+	struct creg vdec;
+	struct creg unka0;
+};
+
 void *
-nva3_pm_clock_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl,
-		  u32 id, int khz)
+nva3_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl)
 {
-	struct nva3_pm_state *pll;
-	struct pll_lims limits;
-	int N, M, P, diff;
-	int ret, off;
+	struct nva3_pm_state *info;
+	int ret;
 
-	ret = get_pll_limits(dev, id, &limits);
+	info = kzalloc(sizeof(*info), GFP_KERNEL);
+	if (!info)
+		return ERR_PTR(-ENOMEM);
+
+	ret = calc_clk(dev, 0x10, 0x4200, perflvl->core, &info->nclk);
 	if (ret < 0)
-		return (ret == -ENOENT) ? NULL : ERR_PTR(ret);
+		goto out;
 
-	off = nva3_pm_pll_offset(id);
-	if (id < 0)
-		return ERR_PTR(-EINVAL);
+	ret = calc_clk(dev, 0x11, 0x4220, perflvl->shader, &info->sclk);
+	if (ret < 0)
+		goto out;
 
+	ret = calc_clk(dev, 0x12, 0x4000, perflvl->memory, &info->mclk);
+	if (ret < 0)
+		goto out;
 
-	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
-	if (!pll)
-		return ERR_PTR(-ENOMEM);
-	pll->type = id;
-	pll->src0 = 0x004120 + (off * 4);
-	pll->src1 = 0x004160 + (off * 4);
-	pll->ctrl = limits.reg + 0;
-	pll->coef = limits.reg + 4;
-
-	/* If target clock is within [-2, 3) MHz of a divisor, we'll
-	 * use that instead of calculating MNP values
-	 */
-	pll->new_div = min((limits.refclk * 2) / (khz - 2999), 16);
-	if (pll->new_div) {
-		diff = khz - ((limits.refclk * 2) / pll->new_div);
-		if (diff < -2000 || diff >= 3000)
-			pll->new_div = 0;
-	}
+	ret = calc_clk(dev, 0x20, 0x0000, perflvl->unka0, &info->unka0);
+	if (ret < 0)
+		goto out;
 
-	if (!pll->new_div) {
-		ret = nva3_calc_pll(dev, &limits, khz, &N, NULL, &M, &P);
-		if (ret < 0)
-			return ERR_PTR(ret);
+	ret = calc_clk(dev, 0x21, 0x0000, perflvl->vdec, &info->vdec);
+	if (ret < 0)
+		goto out;
 
-		pll->new_pnm = (P << 16) | (N << 8) | M;
-		pll->new_div = 2 - 1;
-	} else {
-		pll->new_pnm = 0;
-		pll->new_div--;
+out:
+	if (ret < 0) {
+		kfree(info);
+		info = ERR_PTR(ret);
 	}
+	return info;
+}
+
+static bool
+nva3_pm_grcp_idle(void *data)
+{
+	struct drm_device *dev = data;
 
-	if ((nv_rd32(dev, pll->src1) & 0x00000101) != 0x00000101)
-		pll->old_pnm = nv_rd32(dev, pll->coef);
-	return pll;
+	if (!(nv_rd32(dev, 0x400304) & 0x00000001))
+		return true;
+	if (nv_rd32(dev, 0x400308) == 0x0050001c)
+		return true;
+	return false;
 }
 
 void
-nva3_pm_clock_set(struct drm_device *dev, void *pre_state)
+nva3_pm_clocks_set(struct drm_device *dev, void *pre_state)
 {
-	struct nva3_pm_state *pll = pre_state;
-	u32 ctrl = 0;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nva3_pm_state *info = pre_state;
+	unsigned long flags;
 
-	/* For the memory clock, NVIDIA will build a "script" describing
-	 * the reclocking process and ask PDAEMON to execute it.
-	 */
-	if (pll->type == PLL_MEMORY) {
-		nv_wr32(dev, 0x100210, 0);
-		nv_wr32(dev, 0x1002dc, 1);
-		nv_wr32(dev, 0x004018, 0x00001000);
-		ctrl = 0x18000100;
+	/* prevent any new grctx switches from starting */
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_wr32(dev, 0x400324, 0x00000000);
+	nv_wr32(dev, 0x400328, 0x0050001c); /* wait flag 0x1c */
+	/* wait for any pending grctx switches to complete */
+	if (!nv_wait_cb(dev, nva3_pm_grcp_idle, dev)) {
+		NV_ERROR(dev, "pm: ctxprog didn't go idle\n");
+		goto cleanup;
 	}
-
-	if (pll->old_pnm || !pll->new_pnm) {
-		nv_mask(dev, pll->src1, 0x003c0101, 0x00000101 |
-						    (pll->new_div << 18));
-		nv_wr32(dev, pll->ctrl, 0x0001001d | ctrl);
-		nv_mask(dev, pll->ctrl, 0x00000001, 0x00000000);
+	/* freeze PFIFO */
+	nv_mask(dev, 0x002504, 0x00000001, 0x00000001);
+	if (!nv_wait(dev, 0x002504, 0x00000010, 0x00000010)) {
+		NV_ERROR(dev, "pm: fifo didn't go idle\n");
+		goto cleanup;
 	}
 
-	if (pll->new_pnm) {
-		nv_mask(dev, pll->src0, 0x00000101, 0x00000101);
-		nv_wr32(dev, pll->coef, pll->new_pnm);
-		nv_wr32(dev, pll->ctrl, 0x0001001d | ctrl);
-		nv_mask(dev, pll->ctrl, 0x00000010, 0x00000000);
-		nv_mask(dev, pll->ctrl, 0x00020010, 0x00020010);
-		nv_wr32(dev, pll->ctrl, 0x00010015 | ctrl);
-		nv_mask(dev, pll->src1, 0x00000100, 0x00000000);
-		nv_mask(dev, pll->src1, 0x00000001, 0x00000000);
-		if (pll->type == PLL_MEMORY)
-			nv_wr32(dev, 0x4018, 0x10005000);
-	} else {
-		nv_mask(dev, pll->ctrl, 0x00000001, 0x00000000);
-		nv_mask(dev, pll->src0, 0x00000100, 0x00000000);
-		nv_mask(dev, pll->src0, 0x00000001, 0x00000000);
-		if (pll->type == PLL_MEMORY)
-			nv_wr32(dev, 0x4018, 0x1000d000);
-	}
+	prog_pll(dev, 0x00, 0x004200, &info->nclk);
+	prog_pll(dev, 0x01, 0x004220, &info->sclk);
+	prog_clk(dev, 0x20, &info->unka0);
+	prog_clk(dev, 0x21, &info->vdec);
 
-	if (pll->type == PLL_MEMORY) {
+	if (info->mclk.clk || info->mclk.pll) {
+		nv_wr32(dev, 0x100210, 0);
+		nv_wr32(dev, 0x1002dc, 1);
+		nv_wr32(dev, 0x004018, 0x00001000);
+		prog_pll(dev, 0x02, 0x004000, &info->mclk);
+		if (nv_rd32(dev, 0x4000) & 0x00000008)
+			nv_wr32(dev, 0x004018, 0x1000d000);
+		else
+			nv_wr32(dev, 0x004018, 0x10005000);
 		nv_wr32(dev, 0x1002dc, 0);
 		nv_wr32(dev, 0x100210, 0x80000000);
 	}
 
-	kfree(pll);
+cleanup:
+	/* unfreeze PFIFO */
+	nv_mask(dev, 0x002504, 0x00000001, 0x00000000);
+	/* restore ctxprog to normal */
+	nv_wr32(dev, 0x400324, 0x00000000);
+	nv_wr32(dev, 0x400328, 0x0070009c); /* set flag 0x1c */
+	/* unblock it if necessary */
+	if (nv_rd32(dev, 0x400308) == 0x0050001c)
+		nv_mask(dev, 0x400824, 0x10000000, 0x10000000);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+	kfree(info);
 }
-
diff --git a/drivers/gpu/drm/nouveau/nvc0_fb.c b/drivers/gpu/drm/nouveau/nvc0_fb.c
index 08e6b118f021..5bf55038fd92 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fb.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fb.c
@@ -32,6 +32,30 @@ struct nvc0_fb_priv {
 	dma_addr_t r100c10;
 };
 
+static inline void
+nvc0_mfb_subp_isr(struct drm_device *dev, int unit, int subp)
+{
+	u32 subp_base = 0x141000 + (unit * 0x2000) + (subp * 0x400);
+	u32 stat = nv_rd32(dev, subp_base + 0x020);
+
+	if (stat) {
+		NV_INFO(dev, "PMFB%d_SUBP%d: 0x%08x\n", unit, subp, stat);
+		nv_wr32(dev, subp_base + 0x020, stat);
+	}
+}
+
+static void
+nvc0_mfb_isr(struct drm_device *dev)
+{
+	u32 units = nv_rd32(dev, 0x00017c);
+	while (units) {
+		u32 subp, unit = ffs(units) - 1;
+		for (subp = 0; subp < 2; subp++)
+			nvc0_mfb_subp_isr(dev, unit, subp);
+		units &= ~(1 << unit);
+	}
+}
+
 static void
 nvc0_fb_destroy(struct drm_device *dev)
 {
@@ -39,6 +63,8 @@ nvc0_fb_destroy(struct drm_device *dev)
 	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
 	struct nvc0_fb_priv *priv = pfb->priv;
 
+	nouveau_irq_unregister(dev, 25);
+
 	if (priv->r100c10_page) {
 		pci_unmap_page(dev->pdev, priv->r100c10, PAGE_SIZE,
 			       PCI_DMA_BIDIRECTIONAL);
@@ -74,6 +100,7 @@ nvc0_fb_create(struct drm_device *dev)
 		return -EFAULT;
 	}
 
+	nouveau_irq_register(dev, 25, nvc0_mfb_isr);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvc0_fifo.c b/drivers/gpu/drm/nouveau/nvc0_fifo.c
index 6f9f341c3e86..dcbe0d5d0241 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fifo.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fifo.c
@@ -322,7 +322,7 @@ nvc0_fifo_init(struct drm_device *dev)
 	}
 
 	/* PSUBFIFO[n] */
-	for (i = 0; i < 3; i++) {
+	for (i = 0; i < priv->spoon_nr; i++) {
 		nv_mask(dev, 0x04013c + (i * 0x2000), 0x10000100, 0x00000000);
 		nv_wr32(dev, 0x040108 + (i * 0x2000), 0xffffffff); /* INTR */
 		nv_wr32(dev, 0x04010c + (i * 0x2000), 0xfffffeff); /* INTR_EN */
diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.c b/drivers/gpu/drm/nouveau/nvc0_graph.c
index 5b2f6f420468..4b8d0b3f7d2b 100644
--- a/drivers/gpu/drm/nouveau/nvc0_graph.c
+++ b/drivers/gpu/drm/nouveau/nvc0_graph.c
@@ -390,7 +390,7 @@ nvc0_graph_init_gpc_0(struct drm_device *dev)
 	}
 
 	nv_wr32(dev, GPC_BCAST(0x1bd4), magicgpc918);
-	nv_wr32(dev, GPC_BCAST(0x08ac), priv->rop_nr);
+	nv_wr32(dev, GPC_BCAST(0x08ac), nv_rd32(dev, 0x100800));
 }
 
 static void
@@ -700,22 +700,6 @@ nvc0_graph_isr(struct drm_device *dev)
 	nv_wr32(dev, 0x400500, 0x00010001);
 }
 
-static void
-nvc0_runk140_isr(struct drm_device *dev)
-{
-	u32 units = nv_rd32(dev, 0x00017c) & 0x1f;
-
-	while (units) {
-		u32 unit = ffs(units) - 1;
-		u32 reg = 0x140000 + unit * 0x2000;
-		u32 st0 = nv_mask(dev, reg + 0x1020, 0, 0);
-		u32 st1 = nv_mask(dev, reg + 0x1420, 0, 0);
-
-		NV_DEBUG(dev, "PRUNK140: %d 0x%08x 0x%08x\n", unit, st0, st1);
-		units &= ~(1 << unit);
-	}
-}
-
 static int
 nvc0_graph_create_fw(struct drm_device *dev, const char *fwname,
 		     struct nvc0_graph_fuc *fuc)
@@ -764,7 +748,6 @@ nvc0_graph_destroy(struct drm_device *dev, int engine)
 	}
 
 	nouveau_irq_unregister(dev, 12);
-	nouveau_irq_unregister(dev, 25);
 
 	nouveau_gpuobj_ref(NULL, &priv->unk4188b8);
 	nouveau_gpuobj_ref(NULL, &priv->unk4188b4);
@@ -803,7 +786,6 @@ nvc0_graph_create(struct drm_device *dev)
 
 	NVOBJ_ENGINE_ADD(dev, GR, &priv->base);
 	nouveau_irq_register(dev, 12, nvc0_graph_isr);
-	nouveau_irq_register(dev, 25, nvc0_runk140_isr);
 
 	if (nouveau_ctxfw) {
 		NV_INFO(dev, "PGRAPH: using external firmware\n");
@@ -864,6 +846,9 @@ nvc0_graph_create(struct drm_device *dev)
 	case 0xce: /* 4/4/0/0, 4 */
 		priv->magic_not_rop_nr = 0x03;
 		break;
+	case 0xcf: /* 4/0/0/0, 3 */
+		priv->magic_not_rop_nr = 0x03;
+		break;
 	}
 
 	if (!priv->magic_not_rop_nr) {
@@ -889,20 +874,3 @@ error:
 	nvc0_graph_destroy(dev, NVOBJ_ENGINE_GR);
 	return ret;
 }
-
-MODULE_FIRMWARE("nouveau/nvc0_fuc409c");
-MODULE_FIRMWARE("nouveau/nvc0_fuc409d");
-MODULE_FIRMWARE("nouveau/nvc0_fuc41ac");
-MODULE_FIRMWARE("nouveau/nvc0_fuc41ad");
-MODULE_FIRMWARE("nouveau/nvc3_fuc409c");
-MODULE_FIRMWARE("nouveau/nvc3_fuc409d");
-MODULE_FIRMWARE("nouveau/nvc3_fuc41ac");
-MODULE_FIRMWARE("nouveau/nvc3_fuc41ad");
-MODULE_FIRMWARE("nouveau/nvc4_fuc409c");
-MODULE_FIRMWARE("nouveau/nvc4_fuc409d");
-MODULE_FIRMWARE("nouveau/nvc4_fuc41ac");
-MODULE_FIRMWARE("nouveau/nvc4_fuc41ad");
-MODULE_FIRMWARE("nouveau/fuc409c");
-MODULE_FIRMWARE("nouveau/fuc409d");
-MODULE_FIRMWARE("nouveau/fuc41ac");
-MODULE_FIRMWARE("nouveau/fuc41ad");
diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.h b/drivers/gpu/drm/nouveau/nvc0_graph.h
index 55689e997286..636fe9812f79 100644
--- a/drivers/gpu/drm/nouveau/nvc0_graph.h
+++ b/drivers/gpu/drm/nouveau/nvc0_graph.h
@@ -82,6 +82,7 @@ nvc0_graph_class(struct drm_device *dev)
 	case 0xc3:
 	case 0xc4:
 	case 0xce: /* guess, mmio trace shows only 0x9097 state */
+	case 0xcf: /* guess, mmio trace shows only 0x9097 state */
 		return 0x9097;
 	case 0xc1:
 		return 0x9197;
diff --git a/drivers/gpu/drm/nouveau/nvc0_grctx.c b/drivers/gpu/drm/nouveau/nvc0_grctx.c
index 31018eaf5279..dd0e6a736b3b 100644
--- a/drivers/gpu/drm/nouveau/nvc0_grctx.c
+++ b/drivers/gpu/drm/nouveau/nvc0_grctx.c
@@ -1678,7 +1678,10 @@ nvc0_grctx_generate_tp(struct drm_device *dev)
 	nv_wr32(dev, 0x419c04, 0x00000006);
 	nv_wr32(dev, 0x419c08, 0x00000002);
 	nv_wr32(dev, 0x419c20, 0x00000000);
-	nv_wr32(dev, 0x419cb0, 0x00060048); //XXX: 0xce 0x00020048
+	if (chipset == 0xce || chipset == 0xcf)
+		nv_wr32(dev, 0x419cb0, 0x00020048);
+	else
+		nv_wr32(dev, 0x419cb0, 0x00060048);
 	nv_wr32(dev, 0x419ce8, 0x00000000);
 	nv_wr32(dev, 0x419cf4, 0x00000183);
 	nv_wr32(dev, 0x419d20, chipset != 0xc1 ? 0x02180000 : 0x12180000);
@@ -1783,11 +1786,7 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
 	nv_wr32(dev, 0x40587c, 0x00000000);
 
 	if (1) {
-		const u8 chipset_tp_max[] = { 16, 4, 0, 4, 8, 0, 0, 0,
-					      16, 0, 0, 0, 0, 0, 8, 0 };
-		u8 max = chipset_tp_max[dev_priv->chipset & 0x0f];
-		u8 tpnr[GPC_MAX];
-		u8 data[TP_MAX];
+		u8 tpnr[GPC_MAX], data[TP_MAX];
 
 		memcpy(tpnr, priv->tp_nr, sizeof(priv->tp_nr));
 		memset(data, 0x1f, sizeof(data));
@@ -1801,7 +1800,7 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
 			data[tp] = gpc;
 		}
 
-		for (i = 0; i < max / 4; i++)
+		for (i = 0; i < 4; i++)
 			nv_wr32(dev, 0x4060a8 + (i * 4), ((u32 *)data)[i]);
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc b/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc
index 0ec2add72a76..06f5e26d1e0f 100644
--- a/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc
+++ b/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc
@@ -77,6 +77,11 @@ chipsets:
 .b16 nvc0_gpc_mmio_tail
 .b16 nvc0_tpc_mmio_head
 .b16 nvc3_tpc_mmio_tail
+.b8  0xcf 0 0 0
+.b16 nvc0_gpc_mmio_head
+.b16 nvc0_gpc_mmio_tail
+.b16 nvc0_tpc_mmio_head
+.b16 nvcf_tpc_mmio_tail
 .b8  0 0 0 0
 
 // GPC mmio lists
@@ -134,8 +139,9 @@ mmctx_data(0x000750, 2)
 nvc0_tpc_mmio_tail:
 mmctx_data(0x000758, 1)
 mmctx_data(0x0002c4, 1)
-mmctx_data(0x0004bc, 1)
 mmctx_data(0x0006e0, 1)
+nvcf_tpc_mmio_tail:
+mmctx_data(0x0004bc, 1)
 nvc3_tpc_mmio_tail:
 mmctx_data(0x000544, 1)
 nvc1_tpc_mmio_tail:
diff --git a/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc.h b/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc.h
index 1896c898f5ba..6f820324480e 100644
--- a/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc.h
+++ b/drivers/gpu/drm/nouveau/nvc0_grgpc.fuc.h
@@ -25,23 +25,26 @@ uint32_t nvc0_grgpc_data[] = {
 	0x00000000,
 	0x00000000,
 	0x000000c0,
-	0x011000b0,
-	0x01640114,
+	0x011c00bc,
+	0x01700120,
 	0x000000c1,
-	0x011400b0,
-	0x01780114,
+	0x012000bc,
+	0x01840120,
 	0x000000c3,
-	0x011000b0,
-	0x01740114,
+	0x011c00bc,
+	0x01800120,
 	0x000000c4,
-	0x011000b0,
-	0x01740114,
+	0x011c00bc,
+	0x01800120,
 	0x000000c8,
-	0x011000b0,
-	0x01640114,
+	0x011c00bc,
+	0x01700120,
 	0x000000ce,
-	0x011000b0,
-	0x01740114,
+	0x011c00bc,
+	0x01800120,
+	0x000000cf,
+	0x011c00bc,
+	0x017c0120,
 	0x00000000,
 	0x00000380,
 	0x14000400,
@@ -90,8 +93,8 @@ uint32_t nvc0_grgpc_data[] = {
 	0x04000750,
 	0x00000758,
 	0x000002c4,
-	0x000004bc,
 	0x000006e0,
+	0x000004bc,
 	0x00000544,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvc0_grhub.fuc b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc
index a1a599124cf4..e4f8c7e89ddd 100644
--- a/drivers/gpu/drm/nouveau/nvc0_grhub.fuc
+++ b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc
@@ -56,6 +56,9 @@ chipsets:
 .b8  0xce 0 0 0
 .b16 nvc0_hub_mmio_head
 .b16 nvc0_hub_mmio_tail
+.b8  0xcf 0 0 0
+.b16 nvc0_hub_mmio_head
+.b16 nvc0_hub_mmio_tail
 .b8  0 0 0 0
 
 nvc0_hub_mmio_head:
diff --git a/drivers/gpu/drm/nouveau/nvc0_grhub.fuc.h b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc.h
index b3b541b6d044..241d3263f1e5 100644
--- a/drivers/gpu/drm/nouveau/nvc0_grhub.fuc.h
+++ b/drivers/gpu/drm/nouveau/nvc0_grhub.fuc.h
@@ -23,17 +23,19 @@ uint32_t nvc0_grhub_data[] = {
 	0x00000000,
 	0x00000000,
 	0x000000c0,
-	0x012c0090,
+	0x01340098,
 	0x000000c1,
-	0x01300090,
+	0x01380098,
 	0x000000c3,
-	0x012c0090,
+	0x01340098,
 	0x000000c4,
-	0x012c0090,
+	0x01340098,
 	0x000000c8,
-	0x012c0090,
+	0x01340098,
 	0x000000ce,
-	0x012c0090,
+	0x01340098,
+	0x000000cf,
+	0x01340098,
 	0x00000000,
 	0x0417e91c,
 	0x04400204,
@@ -190,8 +192,6 @@ uint32_t nvc0_grhub_data[] = {
 	0x00000000,
 	0x00000000,
 	0x00000000,
-	0x00000000,
-	0x00000000,
 };
 
 uint32_t nvc0_grhub_code[] = {
diff --git a/drivers/gpu/drm/nouveau/nvc0_pm.c b/drivers/gpu/drm/nouveau/nvc0_pm.c
new file mode 100644
index 000000000000..929aded35cb5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvc0_pm.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_bios.h"
+#include "nouveau_pm.h"
+
+static u32 read_div(struct drm_device *, int, u32, u32);
+static u32 read_pll(struct drm_device *, u32);
+
+static u32
+read_vco(struct drm_device *dev, u32 dsrc)
+{
+	u32 ssrc = nv_rd32(dev, dsrc);
+	if (!(ssrc & 0x00000100))
+		return read_pll(dev, 0x00e800);
+	return read_pll(dev, 0x00e820);
+}
+
+static u32
+read_pll(struct drm_device *dev, u32 pll)
+{
+	u32 ctrl = nv_rd32(dev, pll + 0);
+	u32 coef = nv_rd32(dev, pll + 4);
+	u32 P = (coef & 0x003f0000) >> 16;
+	u32 N = (coef & 0x0000ff00) >> 8;
+	u32 M = (coef & 0x000000ff) >> 0;
+	u32 sclk, doff;
+
+	if (!(ctrl & 0x00000001))
+		return 0;
+
+	switch (pll & 0xfff000) {
+	case 0x00e000:
+		sclk = 27000;
+		P = 1;
+		break;
+	case 0x137000:
+		doff = (pll - 0x137000) / 0x20;
+		sclk = read_div(dev, doff, 0x137120, 0x137140);
+		break;
+	case 0x132000:
+		switch (pll) {
+		case 0x132000:
+			sclk = read_pll(dev, 0x132020);
+			break;
+		case 0x132020:
+			sclk = read_div(dev, 0, 0x137320, 0x137330);
+			break;
+		default:
+			return 0;
+		}
+		break;
+	default:
+		return 0;
+	}
+
+	return sclk * N / M / P;
+}
+
+static u32
+read_div(struct drm_device *dev, int doff, u32 dsrc, u32 dctl)
+{
+	u32 ssrc = nv_rd32(dev, dsrc + (doff * 4));
+	u32 sctl = nv_rd32(dev, dctl + (doff * 4));
+
+	switch (ssrc & 0x00000003) {
+	case 0:
+		if ((ssrc & 0x00030000) != 0x00030000)
+			return 27000;
+		return 108000;
+	case 2:
+		return 100000;
+	case 3:
+		if (sctl & 0x80000000) {
+			u32 sclk = read_vco(dev, dsrc + (doff * 4));
+			u32 sdiv = (sctl & 0x0000003f) + 2;
+			return (sclk * 2) / sdiv;
+		}
+
+		return read_vco(dev, dsrc + (doff * 4));
+	default:
+		return 0;
+	}
+}
+
+static u32
+read_mem(struct drm_device *dev)
+{
+	u32 ssel = nv_rd32(dev, 0x1373f0);
+	if (ssel & 0x00000001)
+		return read_div(dev, 0, 0x137300, 0x137310);
+	return read_pll(dev, 0x132000);
+}
+
+static u32
+read_clk(struct drm_device *dev, int clk)
+{
+	u32 sctl = nv_rd32(dev, 0x137250 + (clk * 4));
+	u32 ssel = nv_rd32(dev, 0x137100);
+	u32 sclk, sdiv;
+
+	if (ssel & (1 << clk)) {
+		if (clk < 7)
+			sclk = read_pll(dev, 0x137000 + (clk * 0x20));
+		else
+			sclk = read_pll(dev, 0x1370e0);
+		sdiv = ((sctl & 0x00003f00) >> 8) + 2;
+	} else {
+		sclk = read_div(dev, clk, 0x137160, 0x1371d0);
+		sdiv = ((sctl & 0x0000003f) >> 0) + 2;
+	}
+
+	if (sctl & 0x80000000)
+		return (sclk * 2) / sdiv;
+	return sclk;
+}
+
+int
+nvc0_pm_clocks_get(struct drm_device *dev, struct nouveau_pm_level *perflvl)
+{
+	perflvl->shader = read_clk(dev, 0x00);
+	perflvl->core   = perflvl->shader / 2;
+	perflvl->memory = read_mem(dev);
+	perflvl->rop    = read_clk(dev, 0x01);
+	perflvl->hub07  = read_clk(dev, 0x02);
+	perflvl->hub06  = read_clk(dev, 0x07);
+	perflvl->hub01  = read_clk(dev, 0x08);
+	perflvl->copy   = read_clk(dev, 0x09);
+	perflvl->daemon = read_clk(dev, 0x0c);
+	perflvl->vdec   = read_clk(dev, 0x0e);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvc0_vram.c b/drivers/gpu/drm/nouveau/nvc0_vram.c
index e45a24d84e98..edbfe9360ae2 100644
--- a/drivers/gpu/drm/nouveau/nvc0_vram.c
+++ b/drivers/gpu/drm/nouveau/nvc0_vram.c
@@ -61,7 +61,7 @@ nvc0_vram_new(struct drm_device *dev, u64 size, u32 align, u32 ncmin,
 	      u32 type, struct nouveau_mem **pmem)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_mm *mm = dev_priv->engine.vram.mm;
+	struct nouveau_mm *mm = &dev_priv->engine.vram.mm;
 	struct nouveau_mm_node *r;
 	struct nouveau_mem *mem;
 	int ret;
@@ -106,12 +106,50 @@ nvc0_vram_init(struct drm_device *dev)
 	struct nouveau_vram_engine *vram = &dev_priv->engine.vram;
 	const u32 rsvd_head = ( 256 * 1024) >> 12; /* vga memory */
 	const u32 rsvd_tail = (1024 * 1024) >> 12; /* vbios etc */
-	u32 length;
+	u32 parts = nv_rd32(dev, 0x121c74);
+	u32 bsize = nv_rd32(dev, 0x10f20c);
+	u32 offset, length;
+	bool uniform = true;
+	int ret, i;
 
-	dev_priv->vram_size  = nv_rd32(dev, 0x10f20c) << 20;
-	dev_priv->vram_size *= nv_rd32(dev, 0x121c74);
+	NV_DEBUG(dev, "0x100800: 0x%08x\n", nv_rd32(dev, 0x100800));
+	NV_DEBUG(dev, "parts 0x%08x bcast_mem_amount 0x%08x\n", parts, bsize);
 
-	length = (dev_priv->vram_size >> 12) - rsvd_head - rsvd_tail;
+	/* read amount of vram attached to each memory controller */
+	for (i = 0; i < parts; i++) {
+		u32 psize = nv_rd32(dev, 0x11020c + (i * 0x1000));
+		if (psize != bsize) {
+			if (psize < bsize)
+				bsize = psize;
+			uniform = false;
+		}
+
+		NV_DEBUG(dev, "%d: mem_amount 0x%08x\n", i, psize);
+
+		dev_priv->vram_size += (u64)psize << 20;
+	}
+
+	/* if all controllers have the same amount attached, there's no holes */
+	if (uniform) {
+		offset = rsvd_head;
+		length = (dev_priv->vram_size >> 12) - rsvd_head - rsvd_tail;
+		return nouveau_mm_init(&vram->mm, offset, length, 1);
+	}
 
-	return nouveau_mm_init(&vram->mm, rsvd_head, length, 1);
+	/* otherwise, address lowest common amount from 0GiB */
+	ret = nouveau_mm_init(&vram->mm, rsvd_head, (bsize << 8) * parts, 1);
+	if (ret)
+		return ret;
+
+	/* and the rest starting from (8GiB + common_size) */
+	offset = (0x0200000000ULL >> 12) + (bsize << 8);
+	length = (dev_priv->vram_size >> 12) - (bsize << 8) - rsvd_tail;
+
+	ret = nouveau_mm_init(&vram->mm, offset, length, 0);
+	if (ret) {
+		nouveau_mm_fini(&vram->mm);
+		return ret;
+	}
+
+	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvd0_display.c b/drivers/gpu/drm/nouveau/nvd0_display.c
new file mode 100644
index 000000000000..23d63b4b3d77
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvd0_display.c
@@ -0,0 +1,1473 @@
+/*
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <linux/dma-mapping.h>
+
+#include "drmP.h"
+#include "drm_crtc_helper.h"
+
+#include "nouveau_drv.h"
+#include "nouveau_connector.h"
+#include "nouveau_encoder.h"
+#include "nouveau_crtc.h"
+#include "nouveau_dma.h"
+#include "nouveau_fb.h"
+#include "nv50_display.h"
+
+struct nvd0_display {
+	struct nouveau_gpuobj *mem;
+	struct {
+		dma_addr_t handle;
+		u32 *ptr;
+	} evo[1];
+
+	struct tasklet_struct tasklet;
+	u32 modeset;
+};
+
+static struct nvd0_display *
+nvd0_display(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	return dev_priv->engine.display.priv;
+}
+
+static inline int
+evo_icmd(struct drm_device *dev, int id, u32 mthd, u32 data)
+{
+	int ret = 0;
+	nv_mask(dev, 0x610700 + (id * 0x10), 0x00000001, 0x00000001);
+	nv_wr32(dev, 0x610704 + (id * 0x10), data);
+	nv_mask(dev, 0x610704 + (id * 0x10), 0x80000ffc, 0x80000000 | mthd);
+	if (!nv_wait(dev, 0x610704 + (id * 0x10), 0x80000000, 0x00000000))
+		ret = -EBUSY;
+	nv_mask(dev, 0x610700 + (id * 0x10), 0x00000001, 0x00000000);
+	return ret;
+}
+
+static u32 *
+evo_wait(struct drm_device *dev, int id, int nr)
+{
+	struct nvd0_display *disp = nvd0_display(dev);
+	u32 put = nv_rd32(dev, 0x640000 + (id * 0x1000)) / 4;
+
+	if (put + nr >= (PAGE_SIZE / 4)) {
+		disp->evo[id].ptr[put] = 0x20000000;
+
+		nv_wr32(dev, 0x640000 + (id * 0x1000), 0x00000000);
+		if (!nv_wait(dev, 0x640004 + (id * 0x1000), ~0, 0x00000000)) {
+			NV_ERROR(dev, "evo %d dma stalled\n", id);
+			return NULL;
+		}
+
+		put = 0;
+	}
+
+	return disp->evo[id].ptr + put;
+}
+
+static void
+evo_kick(u32 *push, struct drm_device *dev, int id)
+{
+	struct nvd0_display *disp = nvd0_display(dev);
+	nv_wr32(dev, 0x640000 + (id * 0x1000), (push - disp->evo[id].ptr) << 2);
+}
+
+#define evo_mthd(p,m,s) *((p)++) = (((s) << 18) | (m))
+#define evo_data(p,d)   *((p)++) = (d)
+
+static struct drm_crtc *
+nvd0_display_crtc_get(struct drm_encoder *encoder)
+{
+	return nouveau_encoder(encoder)->crtc;
+}
+
+/******************************************************************************
+ * CRTC
+ *****************************************************************************/
+static int
+nvd0_crtc_set_dither(struct nouveau_crtc *nv_crtc, bool on, bool update)
+{
+	struct drm_device *dev = nv_crtc->base.dev;
+	u32 *push, mode;
+
+	mode = 0x00000000;
+	if (on) {
+		/* 0x11: 6bpc dynamic 2x2
+		 * 0x13: 8bpc dynamic 2x2
+		 * 0x19: 6bpc static 2x2
+		 * 0x1b: 8bpc static 2x2
+		 * 0x21: 6bpc temporal
+		 * 0x23: 8bpc temporal
+		 */
+		mode = 0x00000011;
+	}
+
+	push = evo_wait(dev, 0, 4);
+	if (push) {
+		evo_mthd(push, 0x0490 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, mode);
+		if (update) {
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+		}
+		evo_kick(push, dev, 0);
+	}
+
+	return 0;
+}
+
+static int
+nvd0_crtc_set_scale(struct nouveau_crtc *nv_crtc, int type, bool update)
+{
+	struct drm_display_mode *mode = &nv_crtc->base.mode;
+	struct drm_device *dev = nv_crtc->base.dev;
+	struct nouveau_connector *nv_connector;
+	u32 *push, outX, outY;
+
+	outX = mode->hdisplay;
+	outY = mode->vdisplay;
+
+	nv_connector = nouveau_crtc_connector_get(nv_crtc);
+	if (nv_connector && nv_connector->native_mode) {
+		struct drm_display_mode *native = nv_connector->native_mode;
+		u32 xratio = (native->hdisplay << 19) / mode->hdisplay;
+		u32 yratio = (native->vdisplay << 19) / mode->vdisplay;
+
+		switch (type) {
+		case DRM_MODE_SCALE_ASPECT:
+			if (xratio > yratio) {
+				outX = (mode->hdisplay * yratio) >> 19;
+				outY = (mode->vdisplay * yratio) >> 19;
+			} else {
+				outX = (mode->hdisplay * xratio) >> 19;
+				outY = (mode->vdisplay * xratio) >> 19;
+			}
+			break;
+		case DRM_MODE_SCALE_FULLSCREEN:
+			outX = native->hdisplay;
+			outY = native->vdisplay;
+			break;
+		default:
+			break;
+		}
+	}
+
+	push = evo_wait(dev, 0, 16);
+	if (push) {
+		evo_mthd(push, 0x04c0 + (nv_crtc->index * 0x300), 3);
+		evo_data(push, (outY << 16) | outX);
+		evo_data(push, (outY << 16) | outX);
+		evo_data(push, (outY << 16) | outX);
+		evo_mthd(push, 0x0494 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x04b8 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, (mode->vdisplay << 16) | mode->hdisplay);
+		if (update) {
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+		}
+		evo_kick(push, dev, 0);
+	}
+
+	return 0;
+}
+
+static int
+nvd0_crtc_set_image(struct nouveau_crtc *nv_crtc, struct drm_framebuffer *fb,
+		    int x, int y, bool update)
+{
+	struct nouveau_framebuffer *nvfb = nouveau_framebuffer(fb);
+	u32 *push;
+
+	push = evo_wait(fb->dev, 0, 16);
+	if (push) {
+		evo_mthd(push, 0x0460 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, nvfb->nvbo->bo.offset >> 8);
+		evo_mthd(push, 0x0468 + (nv_crtc->index * 0x300), 4);
+		evo_data(push, (fb->height << 16) | fb->width);
+		evo_data(push, nvfb->r_pitch);
+		evo_data(push, nvfb->r_format);
+		evo_data(push, nvfb->r_dma);
+		evo_mthd(push, 0x04b0 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, (y << 16) | x);
+		if (update) {
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+		}
+		evo_kick(push, fb->dev, 0);
+	}
+
+	nv_crtc->fb.tile_flags = nvfb->r_dma;
+	return 0;
+}
+
+static void
+nvd0_crtc_cursor_show(struct nouveau_crtc *nv_crtc, bool show, bool update)
+{
+	struct drm_device *dev = nv_crtc->base.dev;
+	u32 *push = evo_wait(dev, 0, 16);
+	if (push) {
+		if (show) {
+			evo_mthd(push, 0x0480 + (nv_crtc->index * 0x300), 2);
+			evo_data(push, 0x85000000);
+			evo_data(push, nv_crtc->cursor.nvbo->bo.offset >> 8);
+			evo_mthd(push, 0x048c + (nv_crtc->index * 0x300), 1);
+			evo_data(push, NvEvoVRAM);
+		} else {
+			evo_mthd(push, 0x0480 + (nv_crtc->index * 0x300), 1);
+			evo_data(push, 0x05000000);
+			evo_mthd(push, 0x048c + (nv_crtc->index * 0x300), 1);
+			evo_data(push, 0x00000000);
+		}
+
+		if (update) {
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+		}
+
+		evo_kick(push, dev, 0);
+	}
+}
+
+static void
+nvd0_crtc_dpms(struct drm_crtc *crtc, int mode)
+{
+}
+
+static void
+nvd0_crtc_prepare(struct drm_crtc *crtc)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	u32 *push;
+
+	push = evo_wait(crtc->dev, 0, 2);
+	if (push) {
+		evo_mthd(push, 0x0474 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x0440 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, 0x03000000);
+		evo_mthd(push, 0x045c + (nv_crtc->index * 0x300), 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, crtc->dev, 0);
+	}
+
+	nvd0_crtc_cursor_show(nv_crtc, false, false);
+}
+
+static void
+nvd0_crtc_commit(struct drm_crtc *crtc)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	u32 *push;
+
+	push = evo_wait(crtc->dev, 0, 32);
+	if (push) {
+		evo_mthd(push, 0x0474 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, nv_crtc->fb.tile_flags);
+		evo_mthd(push, 0x0440 + (nv_crtc->index * 0x300), 4);
+		evo_data(push, 0x83000000);
+		evo_data(push, nv_crtc->lut.nvbo->bo.offset >> 8);
+		evo_data(push, 0x00000000);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x045c + (nv_crtc->index * 0x300), 1);
+		evo_data(push, NvEvoVRAM);
+		evo_mthd(push, 0x0430 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, 0xffffff00);
+		evo_kick(push, crtc->dev, 0);
+	}
+
+	nvd0_crtc_cursor_show(nv_crtc, nv_crtc->cursor.visible, true);
+}
+
+static bool
+nvd0_crtc_mode_fixup(struct drm_crtc *crtc, struct drm_display_mode *mode,
+		     struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static int
+nvd0_crtc_swap_fbs(struct drm_crtc *crtc, struct drm_framebuffer *old_fb)
+{
+	struct nouveau_framebuffer *nvfb = nouveau_framebuffer(crtc->fb);
+	int ret;
+
+	ret = nouveau_bo_pin(nvfb->nvbo, TTM_PL_FLAG_VRAM);
+	if (ret)
+		return ret;
+
+	if (old_fb) {
+		nvfb = nouveau_framebuffer(old_fb);
+		nouveau_bo_unpin(nvfb->nvbo);
+	}
+
+	return 0;
+}
+
+static int
+nvd0_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *umode,
+		   struct drm_display_mode *mode, int x, int y,
+		   struct drm_framebuffer *old_fb)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	struct nouveau_connector *nv_connector;
+	u32 htotal = mode->htotal;
+	u32 vtotal = mode->vtotal;
+	u32 hsyncw = mode->hsync_end - mode->hsync_start - 1;
+	u32 vsyncw = mode->vsync_end - mode->vsync_start - 1;
+	u32 hfrntp = mode->hsync_start - mode->hdisplay;
+	u32 vfrntp = mode->vsync_start - mode->vdisplay;
+	u32 hbackp = mode->htotal - mode->hsync_end;
+	u32 vbackp = mode->vtotal - mode->vsync_end;
+	u32 hss2be = hsyncw + hbackp;
+	u32 vss2be = vsyncw + vbackp;
+	u32 hss2de = htotal - hfrntp;
+	u32 vss2de = vtotal - vfrntp;
+	u32 syncs, *push;
+	int ret;
+
+	syncs = 0x00000001;
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		syncs |= 0x00000008;
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		syncs |= 0x00000010;
+
+	ret = nvd0_crtc_swap_fbs(crtc, old_fb);
+	if (ret)
+		return ret;
+
+	push = evo_wait(crtc->dev, 0, 64);
+	if (push) {
+		evo_mthd(push, 0x0410 + (nv_crtc->index * 0x300), 5);
+		evo_data(push, 0x00000000);
+		evo_data(push, (vtotal << 16) | htotal);
+		evo_data(push, (vsyncw << 16) | hsyncw);
+		evo_data(push, (vss2be << 16) | hss2be);
+		evo_data(push, (vss2de << 16) | hss2de);
+		evo_mthd(push, 0x042c + (nv_crtc->index * 0x300), 1);
+		evo_data(push, 0x00000000); /* ??? */
+		evo_mthd(push, 0x0450 + (nv_crtc->index * 0x300), 3);
+		evo_data(push, mode->clock * 1000);
+		evo_data(push, 0x00200000); /* ??? */
+		evo_data(push, mode->clock * 1000);
+		evo_mthd(push, 0x0404 + (nv_crtc->index * 0x300), 1);
+		evo_data(push, syncs);
+		evo_kick(push, crtc->dev, 0);
+	}
+
+	nv_connector = nouveau_crtc_connector_get(nv_crtc);
+	nvd0_crtc_set_dither(nv_crtc, nv_connector->use_dithering, false);
+	nvd0_crtc_set_scale(nv_crtc, nv_connector->scaling_mode, false);
+	nvd0_crtc_set_image(nv_crtc, crtc->fb, x, y, false);
+	return 0;
+}
+
+static int
+nvd0_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
+			struct drm_framebuffer *old_fb)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	int ret;
+
+	if (!crtc->fb) {
+		NV_DEBUG_KMS(crtc->dev, "No FB bound\n");
+		return 0;
+	}
+
+	ret = nvd0_crtc_swap_fbs(crtc, old_fb);
+	if (ret)
+		return ret;
+
+	nvd0_crtc_set_image(nv_crtc, crtc->fb, x, y, true);
+	return 0;
+}
+
+static int
+nvd0_crtc_mode_set_base_atomic(struct drm_crtc *crtc,
+			       struct drm_framebuffer *fb, int x, int y,
+			       enum mode_set_atomic state)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	nvd0_crtc_set_image(nv_crtc, fb, x, y, true);
+	return 0;
+}
+
+static void
+nvd0_crtc_lut_load(struct drm_crtc *crtc)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	void __iomem *lut = nvbo_kmap_obj_iovirtual(nv_crtc->lut.nvbo);
+	int i;
+
+	for (i = 0; i < 256; i++) {
+		writew(0x6000 + (nv_crtc->lut.r[i] >> 2), lut + (i * 0x20) + 0);
+		writew(0x6000 + (nv_crtc->lut.g[i] >> 2), lut + (i * 0x20) + 2);
+		writew(0x6000 + (nv_crtc->lut.b[i] >> 2), lut + (i * 0x20) + 4);
+	}
+}
+
+static int
+nvd0_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
+		     uint32_t handle, uint32_t width, uint32_t height)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct drm_gem_object *gem;
+	struct nouveau_bo *nvbo;
+	bool visible = (handle != 0);
+	int i, ret = 0;
+
+	if (visible) {
+		if (width != 64 || height != 64)
+			return -EINVAL;
+
+		gem = drm_gem_object_lookup(dev, file_priv, handle);
+		if (unlikely(!gem))
+			return -ENOENT;
+		nvbo = nouveau_gem_object(gem);
+
+		ret = nouveau_bo_map(nvbo);
+		if (ret == 0) {
+			for (i = 0; i < 64 * 64; i++) {
+				u32 v = nouveau_bo_rd32(nvbo, i);
+				nouveau_bo_wr32(nv_crtc->cursor.nvbo, i, v);
+			}
+			nouveau_bo_unmap(nvbo);
+		}
+
+		drm_gem_object_unreference_unlocked(gem);
+	}
+
+	if (visible != nv_crtc->cursor.visible) {
+		nvd0_crtc_cursor_show(nv_crtc, visible, true);
+		nv_crtc->cursor.visible = visible;
+	}
+
+	return ret;
+}
+
+static int
+nvd0_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	const u32 data = (y << 16) | x;
+
+	nv_wr32(crtc->dev, 0x64d084 + (nv_crtc->index * 0x1000), data);
+	nv_wr32(crtc->dev, 0x64d080 + (nv_crtc->index * 0x1000), 0x00000000);
+	return 0;
+}
+
+static void
+nvd0_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
+		    uint32_t start, uint32_t size)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	u32 end = max(start + size, (u32)256);
+	u32 i;
+
+	for (i = start; i < end; i++) {
+		nv_crtc->lut.r[i] = r[i];
+		nv_crtc->lut.g[i] = g[i];
+		nv_crtc->lut.b[i] = b[i];
+	}
+
+	nvd0_crtc_lut_load(crtc);
+}
+
+static void
+nvd0_crtc_destroy(struct drm_crtc *crtc)
+{
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
+	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
+	nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
+	nouveau_bo_unmap(nv_crtc->lut.nvbo);
+	nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
+	drm_crtc_cleanup(crtc);
+	kfree(crtc);
+}
+
+static const struct drm_crtc_helper_funcs nvd0_crtc_hfunc = {
+	.dpms = nvd0_crtc_dpms,
+	.prepare = nvd0_crtc_prepare,
+	.commit = nvd0_crtc_commit,
+	.mode_fixup = nvd0_crtc_mode_fixup,
+	.mode_set = nvd0_crtc_mode_set,
+	.mode_set_base = nvd0_crtc_mode_set_base,
+	.mode_set_base_atomic = nvd0_crtc_mode_set_base_atomic,
+	.load_lut = nvd0_crtc_lut_load,
+};
+
+static const struct drm_crtc_funcs nvd0_crtc_func = {
+	.cursor_set = nvd0_crtc_cursor_set,
+	.cursor_move = nvd0_crtc_cursor_move,
+	.gamma_set = nvd0_crtc_gamma_set,
+	.set_config = drm_crtc_helper_set_config,
+	.destroy = nvd0_crtc_destroy,
+};
+
+static void
+nvd0_cursor_set_pos(struct nouveau_crtc *nv_crtc, int x, int y)
+{
+}
+
+static void
+nvd0_cursor_set_offset(struct nouveau_crtc *nv_crtc, uint32_t offset)
+{
+}
+
+static int
+nvd0_crtc_create(struct drm_device *dev, int index)
+{
+	struct nouveau_crtc *nv_crtc;
+	struct drm_crtc *crtc;
+	int ret, i;
+
+	nv_crtc = kzalloc(sizeof(*nv_crtc), GFP_KERNEL);
+	if (!nv_crtc)
+		return -ENOMEM;
+
+	nv_crtc->index = index;
+	nv_crtc->set_dither = nvd0_crtc_set_dither;
+	nv_crtc->set_scale = nvd0_crtc_set_scale;
+	nv_crtc->cursor.set_offset = nvd0_cursor_set_offset;
+	nv_crtc->cursor.set_pos = nvd0_cursor_set_pos;
+	for (i = 0; i < 256; i++) {
+		nv_crtc->lut.r[i] = i << 8;
+		nv_crtc->lut.g[i] = i << 8;
+		nv_crtc->lut.b[i] = i << 8;
+	}
+
+	crtc = &nv_crtc->base;
+	drm_crtc_init(dev, crtc, &nvd0_crtc_func);
+	drm_crtc_helper_add(crtc, &nvd0_crtc_hfunc);
+	drm_mode_crtc_set_gamma_size(crtc, 256);
+
+	ret = nouveau_bo_new(dev, 64 * 64 * 4, 0x100, TTM_PL_FLAG_VRAM,
+			     0, 0x0000, &nv_crtc->cursor.nvbo);
+	if (!ret) {
+		ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM);
+		if (!ret)
+			ret = nouveau_bo_map(nv_crtc->cursor.nvbo);
+		if (ret)
+			nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
+	}
+
+	if (ret)
+		goto out;
+
+	ret = nouveau_bo_new(dev, 8192, 0x100, TTM_PL_FLAG_VRAM,
+			     0, 0x0000, &nv_crtc->lut.nvbo);
+	if (!ret) {
+		ret = nouveau_bo_pin(nv_crtc->lut.nvbo, TTM_PL_FLAG_VRAM);
+		if (!ret)
+			ret = nouveau_bo_map(nv_crtc->lut.nvbo);
+		if (ret)
+			nouveau_bo_ref(NULL, &nv_crtc->lut.nvbo);
+	}
+
+	if (ret)
+		goto out;
+
+	nvd0_crtc_lut_load(crtc);
+
+out:
+	if (ret)
+		nvd0_crtc_destroy(crtc);
+	return ret;
+}
+
+/******************************************************************************
+ * DAC
+ *****************************************************************************/
+static void
+nvd0_dac_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct drm_device *dev = encoder->dev;
+	int or = nv_encoder->or;
+	u32 dpms_ctrl;
+
+	dpms_ctrl = 0x80000000;
+	if (mode == DRM_MODE_DPMS_STANDBY || mode == DRM_MODE_DPMS_OFF)
+		dpms_ctrl |= 0x00000001;
+	if (mode == DRM_MODE_DPMS_SUSPEND || mode == DRM_MODE_DPMS_OFF)
+		dpms_ctrl |= 0x00000004;
+
+	nv_wait(dev, 0x61a004 + (or * 0x0800), 0x80000000, 0x00000000);
+	nv_mask(dev, 0x61a004 + (or * 0x0800), 0xc000007f, dpms_ctrl);
+	nv_wait(dev, 0x61a004 + (or * 0x0800), 0x80000000, 0x00000000);
+}
+
+static bool
+nvd0_dac_mode_fixup(struct drm_encoder *encoder, struct drm_display_mode *mode,
+		    struct drm_display_mode *adjusted_mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_connector *nv_connector;
+
+	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+	if (nv_connector && nv_connector->native_mode) {
+		if (nv_connector->scaling_mode != DRM_MODE_SCALE_NONE) {
+			int id = adjusted_mode->base.id;
+			*adjusted_mode = *nv_connector->native_mode;
+			adjusted_mode->base.id = id;
+		}
+	}
+
+	return true;
+}
+
+static void
+nvd0_dac_prepare(struct drm_encoder *encoder)
+{
+}
+
+static void
+nvd0_dac_commit(struct drm_encoder *encoder)
+{
+}
+
+static void
+nvd0_dac_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
+		  struct drm_display_mode *adjusted_mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
+	u32 *push;
+
+	nvd0_dac_dpms(encoder, DRM_MODE_DPMS_ON);
+
+	push = evo_wait(encoder->dev, 0, 4);
+	if (push) {
+		evo_mthd(push, 0x0180 + (nv_encoder->or * 0x20), 2);
+		evo_data(push, 1 << nv_crtc->index);
+		evo_data(push, 0x00ff);
+		evo_kick(push, encoder->dev, 0);
+	}
+
+	nv_encoder->crtc = encoder->crtc;
+}
+
+static void
+nvd0_dac_disconnect(struct drm_encoder *encoder)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct drm_device *dev = encoder->dev;
+	u32 *push;
+
+	if (nv_encoder->crtc) {
+		nvd0_crtc_prepare(nv_encoder->crtc);
+
+		push = evo_wait(dev, 0, 4);
+		if (push) {
+			evo_mthd(push, 0x0180 + (nv_encoder->or * 0x20), 1);
+			evo_data(push, 0x00000000);
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+			evo_kick(push, dev, 0);
+		}
+
+		nv_encoder->crtc = NULL;
+	}
+}
+
+static enum drm_connector_status
+nvd0_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector)
+{
+	enum drm_connector_status status = connector_status_disconnected;
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct drm_device *dev = encoder->dev;
+	int or = nv_encoder->or;
+	u32 load;
+
+	nv_wr32(dev, 0x61a00c + (or * 0x800), 0x00100000);
+	udelay(9500);
+	nv_wr32(dev, 0x61a00c + (or * 0x800), 0x80000000);
+
+	load = nv_rd32(dev, 0x61a00c + (or * 0x800));
+	if ((load & 0x38000000) == 0x38000000)
+		status = connector_status_connected;
+
+	nv_wr32(dev, 0x61a00c + (or * 0x800), 0x00000000);
+	return status;
+}
+
+static void
+nvd0_dac_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+	kfree(encoder);
+}
+
+static const struct drm_encoder_helper_funcs nvd0_dac_hfunc = {
+	.dpms = nvd0_dac_dpms,
+	.mode_fixup = nvd0_dac_mode_fixup,
+	.prepare = nvd0_dac_prepare,
+	.commit = nvd0_dac_commit,
+	.mode_set = nvd0_dac_mode_set,
+	.disable = nvd0_dac_disconnect,
+	.get_crtc = nvd0_display_crtc_get,
+	.detect = nvd0_dac_detect
+};
+
+static const struct drm_encoder_funcs nvd0_dac_func = {
+	.destroy = nvd0_dac_destroy,
+};
+
+static int
+nvd0_dac_create(struct drm_connector *connector, struct dcb_entry *dcbe)
+{
+	struct drm_device *dev = connector->dev;
+	struct nouveau_encoder *nv_encoder;
+	struct drm_encoder *encoder;
+
+	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
+	if (!nv_encoder)
+		return -ENOMEM;
+	nv_encoder->dcb = dcbe;
+	nv_encoder->or = ffs(dcbe->or) - 1;
+
+	encoder = to_drm_encoder(nv_encoder);
+	encoder->possible_crtcs = dcbe->heads;
+	encoder->possible_clones = 0;
+	drm_encoder_init(dev, encoder, &nvd0_dac_func, DRM_MODE_ENCODER_DAC);
+	drm_encoder_helper_add(encoder, &nvd0_dac_hfunc);
+
+	drm_mode_connector_attach_encoder(connector, encoder);
+	return 0;
+}
+
+/******************************************************************************
+ * SOR
+ *****************************************************************************/
+static void
+nvd0_sor_dpms(struct drm_encoder *encoder, int mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct drm_device *dev = encoder->dev;
+	struct drm_encoder *partner;
+	int or = nv_encoder->or;
+	u32 dpms_ctrl;
+
+	nv_encoder->last_dpms = mode;
+
+	list_for_each_entry(partner, &dev->mode_config.encoder_list, head) {
+		struct nouveau_encoder *nv_partner = nouveau_encoder(partner);
+
+		if (partner->encoder_type != DRM_MODE_ENCODER_TMDS)
+			continue;
+
+		if (nv_partner != nv_encoder &&
+		    nv_partner->dcb->or == nv_encoder->or) {
+			if (nv_partner->last_dpms == DRM_MODE_DPMS_ON)
+				return;
+			break;
+		}
+	}
+
+	dpms_ctrl  = (mode == DRM_MODE_DPMS_ON);
+	dpms_ctrl |= 0x80000000;
+
+	nv_wait(dev, 0x61c004 + (or * 0x0800), 0x80000000, 0x00000000);
+	nv_mask(dev, 0x61c004 + (or * 0x0800), 0x80000001, dpms_ctrl);
+	nv_wait(dev, 0x61c004 + (or * 0x0800), 0x80000000, 0x00000000);
+	nv_wait(dev, 0x61c030 + (or * 0x0800), 0x10000000, 0x00000000);
+}
+
+static bool
+nvd0_sor_mode_fixup(struct drm_encoder *encoder, struct drm_display_mode *mode,
+		    struct drm_display_mode *adjusted_mode)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_connector *nv_connector;
+
+	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+	if (nv_connector && nv_connector->native_mode) {
+		if (nv_connector->scaling_mode != DRM_MODE_SCALE_NONE) {
+			int id = adjusted_mode->base.id;
+			*adjusted_mode = *nv_connector->native_mode;
+			adjusted_mode->base.id = id;
+		}
+	}
+
+	return true;
+}
+
+static void
+nvd0_sor_prepare(struct drm_encoder *encoder)
+{
+}
+
+static void
+nvd0_sor_commit(struct drm_encoder *encoder)
+{
+}
+
+static void
+nvd0_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *umode,
+		  struct drm_display_mode *mode)
+{
+	struct drm_nouveau_private *dev_priv = encoder->dev->dev_private;
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
+	struct nouveau_connector *nv_connector;
+	struct nvbios *bios = &dev_priv->vbios;
+	u32 mode_ctrl = (1 << nv_crtc->index);
+	u32 *push, or_config;
+
+	nv_connector = nouveau_encoder_connector_get(nv_encoder);
+	switch (nv_encoder->dcb->type) {
+	case OUTPUT_TMDS:
+		if (nv_encoder->dcb->sorconf.link & 1) {
+			if (mode->clock < 165000)
+				mode_ctrl |= 0x00000100;
+			else
+				mode_ctrl |= 0x00000500;
+		} else {
+			mode_ctrl |= 0x00000200;
+		}
+
+		or_config = (mode_ctrl & 0x00000f00) >> 8;
+		if (mode->clock >= 165000)
+			or_config |= 0x0100;
+		break;
+	case OUTPUT_LVDS:
+		or_config = (mode_ctrl & 0x00000f00) >> 8;
+		if (bios->fp_no_ddc) {
+			if (bios->fp.dual_link)
+				or_config |= 0x0100;
+			if (bios->fp.if_is_24bit)
+				or_config |= 0x0200;
+		} else {
+			if (nv_connector->dcb->type == DCB_CONNECTOR_LVDS_SPWG) {
+				if (((u8 *)nv_connector->edid)[121] == 2)
+					or_config |= 0x0100;
+			} else
+			if (mode->clock >= bios->fp.duallink_transition_clk) {
+				or_config |= 0x0100;
+			}
+
+			if (or_config & 0x0100) {
+				if (bios->fp.strapless_is_24bit & 2)
+					or_config |= 0x0200;
+			} else {
+				if (bios->fp.strapless_is_24bit & 1)
+					or_config |= 0x0200;
+			}
+
+			if (nv_connector->base.display_info.bpc == 8)
+				or_config |= 0x0200;
+
+		}
+		break;
+	default:
+		BUG_ON(1);
+		break;
+	}
+
+	nvd0_sor_dpms(encoder, DRM_MODE_DPMS_ON);
+
+	push = evo_wait(encoder->dev, 0, 4);
+	if (push) {
+		evo_mthd(push, 0x0200 + (nv_encoder->or * 0x20), 2);
+		evo_data(push, mode_ctrl);
+		evo_data(push, or_config);
+		evo_kick(push, encoder->dev, 0);
+	}
+
+	nv_encoder->crtc = encoder->crtc;
+}
+
+static void
+nvd0_sor_disconnect(struct drm_encoder *encoder)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct drm_device *dev = encoder->dev;
+	u32 *push;
+
+	if (nv_encoder->crtc) {
+		nvd0_crtc_prepare(nv_encoder->crtc);
+
+		push = evo_wait(dev, 0, 4);
+		if (push) {
+			evo_mthd(push, 0x0200 + (nv_encoder->or * 0x20), 1);
+			evo_data(push, 0x00000000);
+			evo_mthd(push, 0x0080, 1);
+			evo_data(push, 0x00000000);
+			evo_kick(push, dev, 0);
+		}
+
+		nv_encoder->crtc = NULL;
+		nv_encoder->last_dpms = DRM_MODE_DPMS_OFF;
+	}
+}
+
+static void
+nvd0_sor_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+	kfree(encoder);
+}
+
+static const struct drm_encoder_helper_funcs nvd0_sor_hfunc = {
+	.dpms = nvd0_sor_dpms,
+	.mode_fixup = nvd0_sor_mode_fixup,
+	.prepare = nvd0_sor_prepare,
+	.commit = nvd0_sor_commit,
+	.mode_set = nvd0_sor_mode_set,
+	.disable = nvd0_sor_disconnect,
+	.get_crtc = nvd0_display_crtc_get,
+};
+
+static const struct drm_encoder_funcs nvd0_sor_func = {
+	.destroy = nvd0_sor_destroy,
+};
+
+static int
+nvd0_sor_create(struct drm_connector *connector, struct dcb_entry *dcbe)
+{
+	struct drm_device *dev = connector->dev;
+	struct nouveau_encoder *nv_encoder;
+	struct drm_encoder *encoder;
+
+	nv_encoder = kzalloc(sizeof(*nv_encoder), GFP_KERNEL);
+	if (!nv_encoder)
+		return -ENOMEM;
+	nv_encoder->dcb = dcbe;
+	nv_encoder->or = ffs(dcbe->or) - 1;
+	nv_encoder->last_dpms = DRM_MODE_DPMS_OFF;
+
+	encoder = to_drm_encoder(nv_encoder);
+	encoder->possible_crtcs = dcbe->heads;
+	encoder->possible_clones = 0;
+	drm_encoder_init(dev, encoder, &nvd0_sor_func, DRM_MODE_ENCODER_TMDS);
+	drm_encoder_helper_add(encoder, &nvd0_sor_hfunc);
+
+	drm_mode_connector_attach_encoder(connector, encoder);
+	return 0;
+}
+
+/******************************************************************************
+ * IRQ
+ *****************************************************************************/
+static struct dcb_entry *
+lookup_dcb(struct drm_device *dev, int id, u32 mc)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	int type, or, i;
+
+	if (id < 4) {
+		type = OUTPUT_ANALOG;
+		or   = id;
+	} else {
+		switch (mc & 0x00000f00) {
+		case 0x00000000: type = OUTPUT_LVDS; break;
+		case 0x00000100: type = OUTPUT_TMDS; break;
+		case 0x00000200: type = OUTPUT_TMDS; break;
+		case 0x00000500: type = OUTPUT_TMDS; break;
+		default:
+			NV_ERROR(dev, "PDISP: unknown SOR mc 0x%08x\n", mc);
+			return NULL;
+		}
+
+		or = id - 4;
+	}
+
+	for (i = 0; i < dev_priv->vbios.dcb.entries; i++) {
+		struct dcb_entry *dcb = &dev_priv->vbios.dcb.entry[i];
+		if (dcb->type == type && (dcb->or & (1 << or)))
+			return dcb;
+	}
+
+	NV_ERROR(dev, "PDISP: DCB for %d/0x%08x not found\n", id, mc);
+	return NULL;
+}
+
+static void
+nvd0_display_unk1_handler(struct drm_device *dev, u32 crtc, u32 mask)
+{
+	struct dcb_entry *dcb;
+	int i;
+
+	for (i = 0; mask && i < 8; i++) {
+		u32 mcc = nv_rd32(dev, 0x640180 + (i * 0x20));
+		if (!(mcc & (1 << crtc)))
+			continue;
+
+		dcb = lookup_dcb(dev, i, mcc);
+		if (!dcb)
+			continue;
+
+		nouveau_bios_run_display_table(dev, 0x0000, -1, dcb, crtc);
+	}
+
+	nv_wr32(dev, 0x6101d4, 0x00000000);
+	nv_wr32(dev, 0x6109d4, 0x00000000);
+	nv_wr32(dev, 0x6101d0, 0x80000000);
+}
+
+static void
+nvd0_display_unk2_handler(struct drm_device *dev, u32 crtc, u32 mask)
+{
+	struct dcb_entry *dcb;
+	u32 or, tmp, pclk;
+	int i;
+
+	for (i = 0; mask && i < 8; i++) {
+		u32 mcc = nv_rd32(dev, 0x640180 + (i * 0x20));
+		if (!(mcc & (1 << crtc)))
+			continue;
+
+		dcb = lookup_dcb(dev, i, mcc);
+		if (!dcb)
+			continue;
+
+		nouveau_bios_run_display_table(dev, 0x0000, -2, dcb, crtc);
+	}
+
+	pclk = nv_rd32(dev, 0x660450 + (crtc * 0x300)) / 1000;
+	if (mask & 0x00010000) {
+		nv50_crtc_set_clock(dev, crtc, pclk);
+	}
+
+	for (i = 0; mask && i < 8; i++) {
+		u32 mcp = nv_rd32(dev, 0x660180 + (i * 0x20));
+		u32 cfg = nv_rd32(dev, 0x660184 + (i * 0x20));
+		if (!(mcp & (1 << crtc)))
+			continue;
+
+		dcb = lookup_dcb(dev, i, mcp);
+		if (!dcb)
+			continue;
+		or = ffs(dcb->or) - 1;
+
+		nouveau_bios_run_display_table(dev, cfg, pclk, dcb, crtc);
+
+		nv_wr32(dev, 0x612200 + (crtc * 0x800), 0x00000000);
+		switch (dcb->type) {
+		case OUTPUT_ANALOG:
+			nv_wr32(dev, 0x612280 + (or * 0x800), 0x00000000);
+			break;
+		case OUTPUT_TMDS:
+		case OUTPUT_LVDS:
+			if (cfg & 0x00000100)
+				tmp = 0x00000101;
+			else
+				tmp = 0x00000000;
+
+			nv_mask(dev, 0x612300 + (or * 0x800), 0x00000707, tmp);
+			break;
+		default:
+			break;
+		}
+
+		break;
+	}
+
+	nv_wr32(dev, 0x6101d4, 0x00000000);
+	nv_wr32(dev, 0x6109d4, 0x00000000);
+	nv_wr32(dev, 0x6101d0, 0x80000000);
+}
+
+static void
+nvd0_display_unk4_handler(struct drm_device *dev, u32 crtc, u32 mask)
+{
+	struct dcb_entry *dcb;
+	int pclk, i;
+
+	pclk = nv_rd32(dev, 0x660450 + (crtc * 0x300)) / 1000;
+
+	for (i = 0; mask && i < 8; i++) {
+		u32 mcp = nv_rd32(dev, 0x660180 + (i * 0x20));
+		u32 cfg = nv_rd32(dev, 0x660184 + (i * 0x20));
+		if (!(mcp & (1 << crtc)))
+			continue;
+
+		dcb = lookup_dcb(dev, i, mcp);
+		if (!dcb)
+			continue;
+
+		nouveau_bios_run_display_table(dev, cfg, -pclk, dcb, crtc);
+	}
+
+	nv_wr32(dev, 0x6101d4, 0x00000000);
+	nv_wr32(dev, 0x6109d4, 0x00000000);
+	nv_wr32(dev, 0x6101d0, 0x80000000);
+}
+
+static void
+nvd0_display_bh(unsigned long data)
+{
+	struct drm_device *dev = (struct drm_device *)data;
+	struct nvd0_display *disp = nvd0_display(dev);
+	u32 mask, crtc;
+	int i;
+
+	if (drm_debug & (DRM_UT_DRIVER | DRM_UT_KMS)) {
+		NV_INFO(dev, "PDISP: modeset req %d\n", disp->modeset);
+		NV_INFO(dev, " STAT: 0x%08x 0x%08x 0x%08x\n",
+			 nv_rd32(dev, 0x6101d0),
+			 nv_rd32(dev, 0x6101d4), nv_rd32(dev, 0x6109d4));
+		for (i = 0; i < 8; i++) {
+			NV_INFO(dev, " %s%d: 0x%08x 0x%08x\n",
+				i < 4 ? "DAC" : "SOR", i,
+				nv_rd32(dev, 0x640180 + (i * 0x20)),
+				nv_rd32(dev, 0x660180 + (i * 0x20)));
+		}
+	}
+
+	mask = nv_rd32(dev, 0x6101d4);
+	crtc = 0;
+	if (!mask) {
+		mask = nv_rd32(dev, 0x6109d4);
+		crtc = 1;
+	}
+
+	if (disp->modeset & 0x00000001)
+		nvd0_display_unk1_handler(dev, crtc, mask);
+	if (disp->modeset & 0x00000002)
+		nvd0_display_unk2_handler(dev, crtc, mask);
+	if (disp->modeset & 0x00000004)
+		nvd0_display_unk4_handler(dev, crtc, mask);
+}
+
+static void
+nvd0_display_intr(struct drm_device *dev)
+{
+	struct nvd0_display *disp = nvd0_display(dev);
+	u32 intr = nv_rd32(dev, 0x610088);
+
+	if (intr & 0x00000002) {
+		u32 stat = nv_rd32(dev, 0x61009c);
+		int chid = ffs(stat) - 1;
+		if (chid >= 0) {
+			u32 mthd = nv_rd32(dev, 0x6101f0 + (chid * 12));
+			u32 data = nv_rd32(dev, 0x6101f4 + (chid * 12));
+			u32 unkn = nv_rd32(dev, 0x6101f8 + (chid * 12));
+
+			NV_INFO(dev, "EvoCh: chid %d mthd 0x%04x data 0x%08x "
+				     "0x%08x 0x%08x\n",
+				chid, (mthd & 0x0000ffc), data, mthd, unkn);
+			nv_wr32(dev, 0x61009c, (1 << chid));
+			nv_wr32(dev, 0x6101f0 + (chid * 12), 0x90000000);
+		}
+
+		intr &= ~0x00000002;
+	}
+
+	if (intr & 0x00100000) {
+		u32 stat = nv_rd32(dev, 0x6100ac);
+
+		if (stat & 0x00000007) {
+			disp->modeset = stat;
+			tasklet_schedule(&disp->tasklet);
+
+			nv_wr32(dev, 0x6100ac, (stat & 0x00000007));
+			stat &= ~0x00000007;
+		}
+
+		if (stat) {
+			NV_INFO(dev, "PDISP: unknown intr24 0x%08x\n", stat);
+			nv_wr32(dev, 0x6100ac, stat);
+		}
+
+		intr &= ~0x00100000;
+	}
+
+	if (intr & 0x01000000) {
+		u32 stat = nv_rd32(dev, 0x6100bc);
+		nv_wr32(dev, 0x6100bc, stat);
+		intr &= ~0x01000000;
+	}
+
+	if (intr & 0x02000000) {
+		u32 stat = nv_rd32(dev, 0x6108bc);
+		nv_wr32(dev, 0x6108bc, stat);
+		intr &= ~0x02000000;
+	}
+
+	if (intr)
+		NV_INFO(dev, "PDISP: unknown intr 0x%08x\n", intr);
+}
+
+/******************************************************************************
+ * Init
+ *****************************************************************************/
+static void
+nvd0_display_fini(struct drm_device *dev)
+{
+	int i;
+
+	/* fini cursors */
+	for (i = 14; i >= 13; i--) {
+		if (!(nv_rd32(dev, 0x610490 + (i * 0x10)) & 0x00000001))
+			continue;
+
+		nv_mask(dev, 0x610490 + (i * 0x10), 0x00000001, 0x00000000);
+		nv_wait(dev, 0x610490 + (i * 0x10), 0x00010000, 0x00000000);
+		nv_mask(dev, 0x610090, 1 << i, 0x00000000);
+		nv_mask(dev, 0x6100a0, 1 << i, 0x00000000);
+	}
+
+	/* fini master */
+	if (nv_rd32(dev, 0x610490) & 0x00000010) {
+		nv_mask(dev, 0x610490, 0x00000010, 0x00000000);
+		nv_mask(dev, 0x610490, 0x00000003, 0x00000000);
+		nv_wait(dev, 0x610490, 0x80000000, 0x00000000);
+		nv_mask(dev, 0x610090, 0x00000001, 0x00000000);
+		nv_mask(dev, 0x6100a0, 0x00000001, 0x00000000);
+	}
+}
+
+int
+nvd0_display_init(struct drm_device *dev)
+{
+	struct nvd0_display *disp = nvd0_display(dev);
+	u32 *push;
+	int i;
+
+	if (nv_rd32(dev, 0x6100ac) & 0x00000100) {
+		nv_wr32(dev, 0x6100ac, 0x00000100);
+		nv_mask(dev, 0x6194e8, 0x00000001, 0x00000000);
+		if (!nv_wait(dev, 0x6194e8, 0x00000002, 0x00000000)) {
+			NV_ERROR(dev, "PDISP: 0x6194e8 0x%08x\n",
+				 nv_rd32(dev, 0x6194e8));
+			return -EBUSY;
+		}
+	}
+
+	/* nfi what these are exactly, i do know that SOR_MODE_CTRL won't
+	 * work at all unless you do the SOR part below.
+	 */
+	for (i = 0; i < 3; i++) {
+		u32 dac = nv_rd32(dev, 0x61a000 + (i * 0x800));
+		nv_wr32(dev, 0x6101c0 + (i * 0x800), dac);
+	}
+
+	for (i = 0; i < 4; i++) {
+		u32 sor = nv_rd32(dev, 0x61c000 + (i * 0x800));
+		nv_wr32(dev, 0x6301c4 + (i * 0x800), sor);
+	}
+
+	for (i = 0; i < 2; i++) {
+		u32 crtc0 = nv_rd32(dev, 0x616104 + (i * 0x800));
+		u32 crtc1 = nv_rd32(dev, 0x616108 + (i * 0x800));
+		u32 crtc2 = nv_rd32(dev, 0x61610c + (i * 0x800));
+		nv_wr32(dev, 0x6101b4 + (i * 0x800), crtc0);
+		nv_wr32(dev, 0x6101b8 + (i * 0x800), crtc1);
+		nv_wr32(dev, 0x6101bc + (i * 0x800), crtc2);
+	}
+
+	/* point at our hash table / objects, enable interrupts */
+	nv_wr32(dev, 0x610010, (disp->mem->vinst >> 8) | 9);
+	nv_mask(dev, 0x6100b0, 0x00000307, 0x00000307);
+
+	/* init master */
+	nv_wr32(dev, 0x610494, (disp->evo[0].handle >> 8) | 3);
+	nv_wr32(dev, 0x610498, 0x00010000);
+	nv_wr32(dev, 0x61049c, 0x00000001);
+	nv_mask(dev, 0x610490, 0x00000010, 0x00000010);
+	nv_wr32(dev, 0x640000, 0x00000000);
+	nv_wr32(dev, 0x610490, 0x01000013);
+	if (!nv_wait(dev, 0x610490, 0x80000000, 0x00000000)) {
+		NV_ERROR(dev, "PDISP: master 0x%08x\n",
+			 nv_rd32(dev, 0x610490));
+		return -EBUSY;
+	}
+	nv_mask(dev, 0x610090, 0x00000001, 0x00000001);
+	nv_mask(dev, 0x6100a0, 0x00000001, 0x00000001);
+
+	/* init cursors */
+	for (i = 13; i <= 14; i++) {
+		nv_wr32(dev, 0x610490 + (i * 0x10), 0x00000001);
+		if (!nv_wait(dev, 0x610490 + (i * 0x10), 0x00010000, 0x00010000)) {
+			NV_ERROR(dev, "PDISP: curs%d 0x%08x\n", i,
+				 nv_rd32(dev, 0x610490 + (i * 0x10)));
+			return -EBUSY;
+		}
+
+		nv_mask(dev, 0x610090, 1 << i, 1 << i);
+		nv_mask(dev, 0x6100a0, 1 << i, 1 << i);
+	}
+
+	push = evo_wait(dev, 0, 32);
+	if (!push)
+		return -EBUSY;
+	evo_mthd(push, 0x0088, 1);
+	evo_data(push, NvEvoSync);
+	evo_mthd(push, 0x0084, 1);
+	evo_data(push, 0x00000000);
+	evo_mthd(push, 0x0084, 1);
+	evo_data(push, 0x80000000);
+	evo_mthd(push, 0x008c, 1);
+	evo_data(push, 0x00000000);
+	evo_kick(push, dev, 0);
+
+	return 0;
+}
+
+void
+nvd0_display_destroy(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvd0_display *disp = nvd0_display(dev);
+	struct pci_dev *pdev = dev->pdev;
+
+	nvd0_display_fini(dev);
+
+	pci_free_consistent(pdev, PAGE_SIZE, disp->evo[0].ptr, disp->evo[0].handle);
+	nouveau_gpuobj_ref(NULL, &disp->mem);
+	nouveau_irq_unregister(dev, 26);
+
+	dev_priv->engine.display.priv = NULL;
+	kfree(disp);
+}
+
+int
+nvd0_display_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
+	struct dcb_table *dcb = &dev_priv->vbios.dcb;
+	struct drm_connector *connector, *tmp;
+	struct pci_dev *pdev = dev->pdev;
+	struct nvd0_display *disp;
+	struct dcb_entry *dcbe;
+	int ret, i;
+
+	disp = kzalloc(sizeof(*disp), GFP_KERNEL);
+	if (!disp)
+		return -ENOMEM;
+	dev_priv->engine.display.priv = disp;
+
+	/* create crtc objects to represent the hw heads */
+	for (i = 0; i < 2; i++) {
+		ret = nvd0_crtc_create(dev, i);
+		if (ret)
+			goto out;
+	}
+
+	/* create encoder/connector objects based on VBIOS DCB table */
+	for (i = 0, dcbe = &dcb->entry[0]; i < dcb->entries; i++, dcbe++) {
+		connector = nouveau_connector_create(dev, dcbe->connector);
+		if (IS_ERR(connector))
+			continue;
+
+		if (dcbe->location != DCB_LOC_ON_CHIP) {
+			NV_WARN(dev, "skipping off-chip encoder %d/%d\n",
+				dcbe->type, ffs(dcbe->or) - 1);
+			continue;
+		}
+
+		switch (dcbe->type) {
+		case OUTPUT_TMDS:
+		case OUTPUT_LVDS:
+			nvd0_sor_create(connector, dcbe);
+			break;
+		case OUTPUT_ANALOG:
+			nvd0_dac_create(connector, dcbe);
+			break;
+		default:
+			NV_WARN(dev, "skipping unsupported encoder %d/%d\n",
+				dcbe->type, ffs(dcbe->or) - 1);
+			continue;
+		}
+	}
+
+	/* cull any connectors we created that don't have an encoder */
+	list_for_each_entry_safe(connector, tmp, &dev->mode_config.connector_list, head) {
+		if (connector->encoder_ids[0])
+			continue;
+
+		NV_WARN(dev, "%s has no encoders, removing\n",
+			drm_get_connector_name(connector));
+		connector->funcs->destroy(connector);
+	}
+
+	/* setup interrupt handling */
+	tasklet_init(&disp->tasklet, nvd0_display_bh, (unsigned long)dev);
+	nouveau_irq_register(dev, 26, nvd0_display_intr);
+
+	/* hash table and dma objects for the memory areas we care about */
+	ret = nouveau_gpuobj_new(dev, NULL, 0x4000, 0x10000,
+				 NVOBJ_FLAG_ZERO_ALLOC, &disp->mem);
+	if (ret)
+		goto out;
+
+	nv_wo32(disp->mem, 0x1000, 0x00000049);
+	nv_wo32(disp->mem, 0x1004, (disp->mem->vinst + 0x2000) >> 8);
+	nv_wo32(disp->mem, 0x1008, (disp->mem->vinst + 0x2fff) >> 8);
+	nv_wo32(disp->mem, 0x100c, 0x00000000);
+	nv_wo32(disp->mem, 0x1010, 0x00000000);
+	nv_wo32(disp->mem, 0x1014, 0x00000000);
+	nv_wo32(disp->mem, 0x0000, NvEvoSync);
+	nv_wo32(disp->mem, 0x0004, (0x1000 << 9) | 0x00000001);
+
+	nv_wo32(disp->mem, 0x1020, 0x00000049);
+	nv_wo32(disp->mem, 0x1024, 0x00000000);
+	nv_wo32(disp->mem, 0x1028, (dev_priv->vram_size - 1) >> 8);
+	nv_wo32(disp->mem, 0x102c, 0x00000000);
+	nv_wo32(disp->mem, 0x1030, 0x00000000);
+	nv_wo32(disp->mem, 0x1034, 0x00000000);
+	nv_wo32(disp->mem, 0x0008, NvEvoVRAM);
+	nv_wo32(disp->mem, 0x000c, (0x1020 << 9) | 0x00000001);
+
+	nv_wo32(disp->mem, 0x1040, 0x00000009);
+	nv_wo32(disp->mem, 0x1044, 0x00000000);
+	nv_wo32(disp->mem, 0x1048, (dev_priv->vram_size - 1) >> 8);
+	nv_wo32(disp->mem, 0x104c, 0x00000000);
+	nv_wo32(disp->mem, 0x1050, 0x00000000);
+	nv_wo32(disp->mem, 0x1054, 0x00000000);
+	nv_wo32(disp->mem, 0x0010, NvEvoVRAM_LP);
+	nv_wo32(disp->mem, 0x0014, (0x1040 << 9) | 0x00000001);
+
+	nv_wo32(disp->mem, 0x1060, 0x0fe00009);
+	nv_wo32(disp->mem, 0x1064, 0x00000000);
+	nv_wo32(disp->mem, 0x1068, (dev_priv->vram_size - 1) >> 8);
+	nv_wo32(disp->mem, 0x106c, 0x00000000);
+	nv_wo32(disp->mem, 0x1070, 0x00000000);
+	nv_wo32(disp->mem, 0x1074, 0x00000000);
+	nv_wo32(disp->mem, 0x0018, NvEvoFB32);
+	nv_wo32(disp->mem, 0x001c, (0x1060 << 9) | 0x00000001);
+
+	pinstmem->flush(dev);
+
+	/* push buffers for evo channels */
+	disp->evo[0].ptr =
+		pci_alloc_consistent(pdev, PAGE_SIZE, &disp->evo[0].handle);
+	if (!disp->evo[0].ptr) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = nvd0_display_init(dev);
+	if (ret)
+		goto out;
+
+out:
+	if (ret)
+		nvd0_display_destroy(dev);
+	return ret;
+}
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index c4ffa14fb2f4..ed406e8404a3 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -39,7 +39,7 @@
 
 static void evergreen_gpu_init(struct radeon_device *rdev);
 void evergreen_fini(struct radeon_device *rdev);
-static void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
+void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
 
 void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev)
 {
@@ -935,6 +935,9 @@ int evergreen_pcie_gart_enable(struct radeon_device *rdev)
 	WREG32(VM_CONTEXT1_CNTL, 0);
 
 	evergreen_pcie_gart_tlb_flush(rdev);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)rdev->gart.table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
@@ -2586,7 +2589,7 @@ int evergreen_irq_set(struct radeon_device *rdev)
 	return 0;
 }
 
-static inline void evergreen_irq_ack(struct radeon_device *rdev)
+static void evergreen_irq_ack(struct radeon_device *rdev)
 {
 	u32 tmp;
 
@@ -2697,7 +2700,7 @@ void evergreen_irq_suspend(struct radeon_device *rdev)
 	r600_rlc_stop(rdev);
 }
 
-static inline u32 evergreen_get_ih_wptr(struct radeon_device *rdev)
+static u32 evergreen_get_ih_wptr(struct radeon_device *rdev)
 {
 	u32 wptr, tmp;
 
@@ -3003,8 +3006,7 @@ static int evergreen_startup(struct radeon_device *rdev)
 	int r;
 
 	/* enable pcie gen2 link */
-	if (!ASIC_IS_DCE5(rdev))
-		evergreen_pcie_gen2_enable(rdev);
+	evergreen_pcie_gen2_enable(rdev);
 
 	if (ASIC_IS_DCE5(rdev)) {
 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
@@ -3041,7 +3043,7 @@ static int evergreen_startup(struct radeon_device *rdev)
 
 	r = evergreen_blit_init(rdev);
 	if (r) {
-		evergreen_blit_fini(rdev);
+		r600_blit_fini(rdev);
 		rdev->asic->copy = NULL;
 		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
 	}
@@ -3107,45 +3109,14 @@ int evergreen_resume(struct radeon_device *rdev)
 
 int evergreen_suspend(struct radeon_device *rdev)
 {
-	int r;
-
 	/* FIXME: we should wait for ring to be empty */
 	r700_cp_stop(rdev);
 	rdev->cp.ready = false;
 	evergreen_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	evergreen_pcie_gart_disable(rdev);
+	r600_blit_suspend(rdev);
 
-	/* unpin shaders bo */
-	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-	if (likely(r == 0)) {
-		radeon_bo_unpin(rdev->r600_blit.shader_obj);
-		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-	}
-
-	return 0;
-}
-
-int evergreen_copy_blit(struct radeon_device *rdev,
-			uint64_t src_offset,
-			uint64_t dst_offset,
-			unsigned num_gpu_pages,
-			struct radeon_fence *fence)
-{
-	int r;
-
-	mutex_lock(&rdev->r600_blit.mutex);
-	rdev->r600_blit.vb_ib = NULL;
-	r = evergreen_blit_prepare_copy(rdev, num_gpu_pages * RADEON_GPU_PAGE_SIZE);
-	if (r) {
-		if (rdev->r600_blit.vb_ib)
-			radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
-		mutex_unlock(&rdev->r600_blit.mutex);
-		return r;
-	}
-	evergreen_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages * RADEON_GPU_PAGE_SIZE);
-	evergreen_blit_done_copy(rdev, fence);
-	mutex_unlock(&rdev->r600_blit.mutex);
 	return 0;
 }
 
@@ -3257,7 +3228,7 @@ int evergreen_init(struct radeon_device *rdev)
 
 void evergreen_fini(struct radeon_device *rdev)
 {
-	evergreen_blit_fini(rdev);
+	r600_blit_fini(rdev);
 	r700_cp_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
@@ -3273,7 +3244,7 @@ void evergreen_fini(struct radeon_device *rdev)
 	rdev->bios = NULL;
 }
 
-static void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
+void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
 {
 	u32 link_width_cntl, speed_cntl;
 
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 2eb251858e72..dcf11bbc06d9 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -56,7 +56,9 @@ set_render_target(struct radeon_device *rdev, int format,
 	if (h < 8)
 		h = 8;
 
-	cb_color_info = ((format << 2) | (1 << 24) | (1 << 8));
+	cb_color_info = CB_FORMAT(format) |
+		CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) |
+		CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
 	pitch = (w / 8) - 1;
 	slice = ((w * h) / 64) - 1;
 
@@ -67,7 +69,7 @@ set_render_target(struct radeon_device *rdev, int format,
 	radeon_ring_write(rdev, slice);
 	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, cb_color_info);
-	radeon_ring_write(rdev, (1 << 4));
+	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, (w - 1) | ((h - 1) << 16));
 	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, 0);
@@ -133,12 +135,16 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 	u32 sq_vtx_constant_word2, sq_vtx_constant_word3;
 
 	/* high addr, stride */
-	sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
+	sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) |
+		SQ_VTXC_STRIDE(16);
 #ifdef __BIG_ENDIAN
-	sq_vtx_constant_word2 |= (2 << 30);
+	sq_vtx_constant_word2 |= SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32);
 #endif
 	/* xyzw swizzles */
-	sq_vtx_constant_word3 = (0 << 3) | (1 << 6) | (2 << 9) | (3 << 12);
+	sq_vtx_constant_word3 = SQ_VTCX_SEL_X(SQ_SEL_X) |
+		SQ_VTCX_SEL_Y(SQ_SEL_Y) |
+		SQ_VTCX_SEL_Z(SQ_SEL_Z) |
+		SQ_VTCX_SEL_W(SQ_SEL_W);
 
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
 	radeon_ring_write(rdev, 0x580);
@@ -149,7 +155,7 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, 0);
 	radeon_ring_write(rdev, 0);
-	radeon_ring_write(rdev, SQ_TEX_VTX_VALID_BUFFER << 30);
+	radeon_ring_write(rdev, S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_BUFFER));
 
 	if ((rdev->family == CHIP_CEDAR) ||
 	    (rdev->family == CHIP_PALM) ||
@@ -176,14 +182,19 @@ set_tex_resource(struct radeon_device *rdev,
 	if (h < 1)
 		h = 1;
 
-	sq_tex_resource_word0 = (1 << 0); /* 2D */
+	sq_tex_resource_word0 = TEX_DIM(SQ_TEX_DIM_2D);
 	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 6) |
 				  ((w - 1) << 18));
-	sq_tex_resource_word1 = ((h - 1) << 0) | (1 << 28);
+	sq_tex_resource_word1 = ((h - 1) << 0) |
+				TEX_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
 	/* xyzw swizzles */
-	sq_tex_resource_word4 = (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25);
+	sq_tex_resource_word4 = TEX_DST_SEL_X(SQ_SEL_X) |
+				TEX_DST_SEL_Y(SQ_SEL_Y) |
+				TEX_DST_SEL_Z(SQ_SEL_Z) |
+				TEX_DST_SEL_W(SQ_SEL_W);
 
-	sq_tex_resource_word7 = format | (SQ_TEX_VTX_VALID_TEXTURE << 30);
+	sq_tex_resource_word7 = format |
+		S__SQ_CONSTANT_TYPE(SQ_TEX_VTX_VALID_TEXTURE);
 
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 8));
 	radeon_ring_write(rdev, 0);
@@ -584,31 +595,6 @@ set_default_state(struct radeon_device *rdev)
 
 }
 
-static inline uint32_t i2f(uint32_t input)
-{
-	u32 result, i, exponent, fraction;
-
-	if ((input & 0x3fff) == 0)
-		result = 0; /* 0 is a special case */
-	else {
-		exponent = 140; /* exponent biased by 127; */
-		fraction = (input & 0x3fff) << 10; /* cheat and only
-						      handle numbers below 2^^15 */
-		for (i = 0; i < 14; i++) {
-			if (fraction & 0x800000)
-				break;
-			else {
-				fraction = fraction << 1; /* keep
-							     shifting left until top bit = 1 */
-				exponent = exponent - 1;
-			}
-		}
-		result = exponent << 23 | (fraction & 0x7fffff); /* mask
-								    off top bit; assumed 1 */
-	}
-	return result;
-}
-
 int evergreen_blit_init(struct radeon_device *rdev)
 {
 	u32 obj_size;
@@ -617,6 +603,24 @@ int evergreen_blit_init(struct radeon_device *rdev)
 	u32 packet2s[16];
 	int num_packet2s = 0;
 
+	rdev->r600_blit.primitives.set_render_target = set_render_target;
+	rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync;
+	rdev->r600_blit.primitives.set_shaders = set_shaders;
+	rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource;
+	rdev->r600_blit.primitives.set_tex_resource = set_tex_resource;
+	rdev->r600_blit.primitives.set_scissors = set_scissors;
+	rdev->r600_blit.primitives.draw_auto = draw_auto;
+	rdev->r600_blit.primitives.set_default_state = set_default_state;
+
+	rdev->r600_blit.ring_size_common = 55; /* shaders + def state */
+	rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */
+	rdev->r600_blit.ring_size_common += 5; /* done copy */
+	rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */
+
+	rdev->r600_blit.ring_size_per_loop = 74;
+
+	rdev->r600_blit.max_dim = 16384;
+
 	/* pin copy shader into vram if already initialized */
 	if (rdev->r600_blit.shader_obj)
 		goto done;
@@ -712,277 +716,3 @@ done:
 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
 	return 0;
 }
-
-void evergreen_blit_fini(struct radeon_device *rdev)
-{
-	int r;
-
-	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
-	if (rdev->r600_blit.shader_obj == NULL)
-		return;
-	/* If we can't reserve the bo, unref should be enough to destroy
-	 * it when it becomes idle.
-	 */
-	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-	if (!r) {
-		radeon_bo_unpin(rdev->r600_blit.shader_obj);
-		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-	}
-	radeon_bo_unref(&rdev->r600_blit.shader_obj);
-}
-
-static int evergreen_vb_ib_get(struct radeon_device *rdev)
-{
-	int r;
-	r = radeon_ib_get(rdev, &rdev->r600_blit.vb_ib);
-	if (r) {
-		DRM_ERROR("failed to get IB for vertex buffer\n");
-		return r;
-	}
-
-	rdev->r600_blit.vb_total = 64*1024;
-	rdev->r600_blit.vb_used = 0;
-	return 0;
-}
-
-static void evergreen_vb_ib_put(struct radeon_device *rdev)
-{
-	radeon_fence_emit(rdev, rdev->r600_blit.vb_ib->fence);
-	radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
-}
-
-int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
-{
-	int r;
-	int ring_size, line_size;
-	int max_size;
-	/* loops of emits + fence emit possible */
-	int dwords_per_loop = 74, num_loops;
-
-	r = evergreen_vb_ib_get(rdev);
-	if (r)
-		return r;
-
-	/* 8 bpp vs 32 bpp for xfer unit */
-	if (size_bytes & 3)
-		line_size = 8192;
-	else
-		line_size = 8192 * 4;
-
-	max_size = 8192 * line_size;
-
-	/* major loops cover the max size transfer */
-	num_loops = ((size_bytes + max_size) / max_size);
-	/* minor loops cover the extra non aligned bits */
-	num_loops += ((size_bytes % line_size) ? 1 : 0);
-	/* calculate number of loops correctly */
-	ring_size = num_loops * dwords_per_loop;
-	/* set default  + shaders */
-	ring_size += 55; /* shaders + def state */
-	ring_size += 10; /* fence emit for VB IB */
-	ring_size += 5; /* done copy */
-	ring_size += 10; /* fence emit for done copy */
-	r = radeon_ring_lock(rdev, ring_size);
-	if (r)
-		return r;
-
-	set_default_state(rdev); /* 36 */
-	set_shaders(rdev); /* 16 */
-	return 0;
-}
-
-void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
-{
-	int r;
-
-	if (rdev->r600_blit.vb_ib)
-		evergreen_vb_ib_put(rdev);
-
-	if (fence)
-		r = radeon_fence_emit(rdev, fence);
-
-	radeon_ring_unlock_commit(rdev);
-}
-
-void evergreen_kms_blit_copy(struct radeon_device *rdev,
-			     u64 src_gpu_addr, u64 dst_gpu_addr,
-			     int size_bytes)
-{
-	int max_bytes;
-	u64 vb_gpu_addr;
-	u32 *vb;
-
-	DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
-		  size_bytes, rdev->r600_blit.vb_used);
-	vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
-	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
-		max_bytes = 8192;
-
-		while (size_bytes) {
-			int cur_size = size_bytes;
-			int src_x = src_gpu_addr & 255;
-			int dst_x = dst_gpu_addr & 255;
-			int h = 1;
-			src_gpu_addr = src_gpu_addr & ~255ULL;
-			dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
-			if (!src_x && !dst_x) {
-				h = (cur_size / max_bytes);
-				if (h > 8192)
-					h = 8192;
-				if (h == 0)
-					h = 1;
-				else
-					cur_size = max_bytes;
-			} else {
-				if (cur_size > max_bytes)
-					cur_size = max_bytes;
-				if (cur_size > (max_bytes - dst_x))
-					cur_size = (max_bytes - dst_x);
-				if (cur_size > (max_bytes - src_x))
-					cur_size = (max_bytes - src_x);
-			}
-
-			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
-				WARN_ON(1);
-			}
-
-			vb[0] = i2f(dst_x);
-			vb[1] = 0;
-			vb[2] = i2f(src_x);
-			vb[3] = 0;
-
-			vb[4] = i2f(dst_x);
-			vb[5] = i2f(h);
-			vb[6] = i2f(src_x);
-			vb[7] = i2f(h);
-
-			vb[8] = i2f(dst_x + cur_size);
-			vb[9] = i2f(h);
-			vb[10] = i2f(src_x + cur_size);
-			vb[11] = i2f(h);
-
-			/* src 10 */
-			set_tex_resource(rdev, FMT_8,
-					 src_x + cur_size, h, src_x + cur_size,
-					 src_gpu_addr);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
-
-
-			/* dst 17 */
-			set_render_target(rdev, COLOR_8,
-					  dst_x + cur_size, h,
-					  dst_gpu_addr);
-
-			/* scissors 12 */
-			set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
-
-			/* 15 */
-			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
-			set_vtx_resource(rdev, vb_gpu_addr);
-
-			/* draw 10 */
-			draw_auto(rdev);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
-					    cur_size * h, dst_gpu_addr);
-
-			vb += 12;
-			rdev->r600_blit.vb_used += 12 * 4;
-
-			src_gpu_addr += cur_size * h;
-			dst_gpu_addr += cur_size * h;
-			size_bytes -= cur_size * h;
-		}
-	} else {
-		max_bytes = 8192 * 4;
-
-		while (size_bytes) {
-			int cur_size = size_bytes;
-			int src_x = (src_gpu_addr & 255);
-			int dst_x = (dst_gpu_addr & 255);
-			int h = 1;
-			src_gpu_addr = src_gpu_addr & ~255ULL;
-			dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
-			if (!src_x && !dst_x) {
-				h = (cur_size / max_bytes);
-				if (h > 8192)
-					h = 8192;
-				if (h == 0)
-					h = 1;
-				else
-					cur_size = max_bytes;
-			} else {
-				if (cur_size > max_bytes)
-					cur_size = max_bytes;
-				if (cur_size > (max_bytes - dst_x))
-					cur_size = (max_bytes - dst_x);
-				if (cur_size > (max_bytes - src_x))
-					cur_size = (max_bytes - src_x);
-			}
-
-			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
-				WARN_ON(1);
-			}
-
-			vb[0] = i2f(dst_x / 4);
-			vb[1] = 0;
-			vb[2] = i2f(src_x / 4);
-			vb[3] = 0;
-
-			vb[4] = i2f(dst_x / 4);
-			vb[5] = i2f(h);
-			vb[6] = i2f(src_x / 4);
-			vb[7] = i2f(h);
-
-			vb[8] = i2f((dst_x + cur_size) / 4);
-			vb[9] = i2f(h);
-			vb[10] = i2f((src_x + cur_size) / 4);
-			vb[11] = i2f(h);
-
-			/* src 10 */
-			set_tex_resource(rdev, FMT_8_8_8_8,
-					 (src_x + cur_size) / 4,
-					 h, (src_x + cur_size) / 4,
-					 src_gpu_addr);
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
-
-			/* dst 17 */
-			set_render_target(rdev, COLOR_8_8_8_8,
-					  (dst_x + cur_size) / 4, h,
-					  dst_gpu_addr);
-
-			/* scissors 12  */
-			set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
-
-			/* Vertex buffer setup 15 */
-			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
-			set_vtx_resource(rdev, vb_gpu_addr);
-
-			/* draw 10 */
-			draw_auto(rdev);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
-					    cur_size * h, dst_gpu_addr);
-
-			/* 74 ring dwords per loop */
-			vb += 12;
-			rdev->r600_blit.vb_used += 12 * 4;
-
-			src_gpu_addr += cur_size * h;
-			dst_gpu_addr += cur_size * h;
-			size_bytes -= cur_size * h;
-		}
-	}
-}
-
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index a134790903d3..7fdfa8ea7570 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -122,12 +122,6 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track)
 	track->db_s_write_bo = NULL;
 }
 
-static inline int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
-{
-	/* XXX fill in */
-	return 0;
-}
-
 static int evergreen_cs_track_check(struct radeon_cs_parser *p)
 {
 	struct evergreen_cs_track *track = p->track;
@@ -236,28 +230,6 @@ static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
 }
 
 /**
- * evergreen_cs_packet_next_is_pkt3_nop() - test if next packet is packet3 nop for reloc
- * @parser:		parser structure holding parsing context.
- *
- * Check next packet is relocation packet3, do bo validation and compute
- * GPU offset using the provided start.
- **/
-static inline int evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
-{
-	struct radeon_cs_packet p3reloc;
-	int r;
-
-	r = evergreen_cs_packet_parse(p, &p3reloc, p->idx);
-	if (r) {
-		return 0;
-	}
-	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
-		return 0;
-	}
-	return 1;
-}
-
-/**
  * evergreen_cs_packet_next_vline() - parse userspace VLINE packet
  * @parser:		parser structure holding parsing context.
  *
@@ -414,7 +386,7 @@ static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
  * if register is safe. If register is not flag as safe this function
  * will test it against a list of register needind special handling.
  */
-static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
+static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 {
 	struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
 	struct radeon_cs_reloc *reloc;
@@ -990,7 +962,7 @@ static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u3
  * This function will check that the resource has valid field and that
  * the texture and mipmap bo object are big enough to cover this resource.
  */
-static inline int evergreen_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
+static int evergreen_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
 						   struct radeon_bo *texture,
 						   struct radeon_bo *mipmap)
 {
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 7363d9dec909..b937c49054d9 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -941,11 +941,15 @@
 #define	CB_COLOR0_SLICE					0x28c68
 #define	CB_COLOR0_VIEW					0x28c6c
 #define	CB_COLOR0_INFO					0x28c70
+#	define CB_FORMAT(x)				((x) << 2)
 #       define CB_ARRAY_MODE(x)                         ((x) << 8)
 #       define ARRAY_LINEAR_GENERAL                     0
 #       define ARRAY_LINEAR_ALIGNED                     1
 #       define ARRAY_1D_TILED_THIN1                     2
 #       define ARRAY_2D_TILED_THIN1                     4
+#	define CB_SOURCE_FORMAT(x)			((x) << 24)
+#	define CB_SF_EXPORT_FULL			0
+#	define CB_SF_EXPORT_NORM			1
 #define	CB_COLOR0_ATTRIB				0x28c74
 #define	CB_COLOR0_DIM					0x28c78
 /* only CB0-7 blocks have these regs */
@@ -1107,15 +1111,53 @@
 #define	CB_COLOR7_CLEAR_WORD3				0x28e3c
 
 #define SQ_TEX_RESOURCE_WORD0_0                         0x30000
+#	define TEX_DIM(x)				((x) << 0)
+#	define SQ_TEX_DIM_1D				0
+#	define SQ_TEX_DIM_2D				1
+#	define SQ_TEX_DIM_3D				2
+#	define SQ_TEX_DIM_CUBEMAP			3
+#	define SQ_TEX_DIM_1D_ARRAY			4
+#	define SQ_TEX_DIM_2D_ARRAY			5
+#	define SQ_TEX_DIM_2D_MSAA			6
+#	define SQ_TEX_DIM_2D_ARRAY_MSAA			7
 #define SQ_TEX_RESOURCE_WORD1_0                         0x30004
 #       define TEX_ARRAY_MODE(x)                        ((x) << 28)
 #define SQ_TEX_RESOURCE_WORD2_0                         0x30008
 #define SQ_TEX_RESOURCE_WORD3_0                         0x3000C
 #define SQ_TEX_RESOURCE_WORD4_0                         0x30010
+#	define TEX_DST_SEL_X(x)				((x) << 16)
+#	define TEX_DST_SEL_Y(x)				((x) << 19)
+#	define TEX_DST_SEL_Z(x)				((x) << 22)
+#	define TEX_DST_SEL_W(x)				((x) << 25)
+#	define SQ_SEL_X					0
+#	define SQ_SEL_Y					1
+#	define SQ_SEL_Z					2
+#	define SQ_SEL_W					3
+#	define SQ_SEL_0					4
+#	define SQ_SEL_1					5
 #define SQ_TEX_RESOURCE_WORD5_0                         0x30014
 #define SQ_TEX_RESOURCE_WORD6_0                         0x30018
 #define SQ_TEX_RESOURCE_WORD7_0                         0x3001c
 
+#define SQ_VTX_CONSTANT_WORD0_0				0x30000
+#define SQ_VTX_CONSTANT_WORD1_0				0x30004
+#define SQ_VTX_CONSTANT_WORD2_0				0x30008
+#	define SQ_VTXC_BASE_ADDR_HI(x)			((x) << 0)
+#	define SQ_VTXC_STRIDE(x)			((x) << 8)
+#	define SQ_VTXC_ENDIAN_SWAP(x)			((x) << 30)
+#	define SQ_ENDIAN_NONE				0
+#	define SQ_ENDIAN_8IN16				1
+#	define SQ_ENDIAN_8IN32				2
+#define SQ_VTX_CONSTANT_WORD3_0				0x3000C
+#	define SQ_VTCX_SEL_X(x)				((x) << 3)
+#	define SQ_VTCX_SEL_Y(x)				((x) << 6)
+#	define SQ_VTCX_SEL_Z(x)				((x) << 9)
+#	define SQ_VTCX_SEL_W(x)				((x) << 12)
+#define SQ_VTX_CONSTANT_WORD4_0				0x30010
+#define SQ_VTX_CONSTANT_WORD5_0                         0x30014
+#define SQ_VTX_CONSTANT_WORD6_0                         0x30018
+#define SQ_VTX_CONSTANT_WORD7_0                         0x3001c
+
 /* cayman 3D regs */
 #define CAYMAN_VGT_OFFCHIP_LDS_BASE			0x89B0
 #define CAYMAN_DB_EQAA					0x28804
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 8c79ca97753d..556b7bc3418b 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -40,6 +40,7 @@ extern void evergreen_mc_program(struct radeon_device *rdev);
 extern void evergreen_irq_suspend(struct radeon_device *rdev);
 extern int evergreen_mc_init(struct radeon_device *rdev);
 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
+extern void evergreen_pcie_gen2_enable(struct radeon_device *rdev);
 
 #define EVERGREEN_PFP_UCODE_SIZE 1120
 #define EVERGREEN_PM4_UCODE_SIZE 1376
@@ -967,6 +968,9 @@ int cayman_pcie_gart_enable(struct radeon_device *rdev)
 	WREG32(VM_CONTEXT1_CNTL, 0);
 
 	cayman_pcie_gart_tlb_flush(rdev);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)rdev->gart.table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
@@ -1341,6 +1345,9 @@ static int cayman_startup(struct radeon_device *rdev)
 {
 	int r;
 
+	/* enable pcie gen2 link */
+	evergreen_pcie_gen2_enable(rdev);
+
 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->rlc_fw || !rdev->mc_fw) {
 		r = ni_init_microcode(rdev);
 		if (r) {
@@ -1362,7 +1369,7 @@ static int cayman_startup(struct radeon_device *rdev)
 
 	r = evergreen_blit_init(rdev);
 	if (r) {
-		evergreen_blit_fini(rdev);
+		r600_blit_fini(rdev);
 		rdev->asic->copy = NULL;
 		dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r);
 	}
@@ -1423,21 +1430,13 @@ int cayman_resume(struct radeon_device *rdev)
 
 int cayman_suspend(struct radeon_device *rdev)
 {
-	int r;
-
 	/* FIXME: we should wait for ring to be empty */
 	cayman_cp_enable(rdev, false);
 	rdev->cp.ready = false;
 	evergreen_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	cayman_pcie_gart_disable(rdev);
-
-	/* unpin shaders bo */
-	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-	if (likely(r == 0)) {
-		radeon_bo_unpin(rdev->r600_blit.shader_obj);
-		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-	}
+	r600_blit_suspend(rdev);
 
 	return 0;
 }
@@ -1550,7 +1549,7 @@ int cayman_init(struct radeon_device *rdev)
 
 void cayman_fini(struct radeon_device *rdev)
 {
-	evergreen_blit_fini(rdev);
+	r600_blit_fini(rdev);
 	cayman_cp_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 7fcdbbbf2979..8f8b8fa14357 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -68,6 +68,108 @@ MODULE_FIRMWARE(FIRMWARE_R520);
  * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
  */
 
+int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
+			    struct radeon_cs_packet *pkt,
+			    unsigned idx,
+			    unsigned reg)
+{
+	int r;
+	u32 tile_flags = 0;
+	u32 tmp;
+	struct radeon_cs_reloc *reloc;
+	u32 value;
+
+	r = r100_cs_packet_next_reloc(p, &reloc);
+	if (r) {
+		DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+			  idx, reg);
+		r100_cs_dump_packet(p, pkt);
+		return r;
+	}
+	value = radeon_get_ib_value(p, idx);
+	tmp = value & 0x003fffff;
+	tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
+
+	if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+		tile_flags |= RADEON_DST_TILE_MACRO;
+	if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+		if (reg == RADEON_SRC_PITCH_OFFSET) {
+			DRM_ERROR("Cannot src blit from microtiled surface\n");
+			r100_cs_dump_packet(p, pkt);
+			return -EINVAL;
+		}
+		tile_flags |= RADEON_DST_TILE_MICRO;
+	}
+
+	tmp |= tile_flags;
+	p->ib->ptr[idx] = (value & 0x3fc00000) | tmp;
+	return 0;
+}
+
+int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
+			     struct radeon_cs_packet *pkt,
+			     int idx)
+{
+	unsigned c, i;
+	struct radeon_cs_reloc *reloc;
+	struct r100_cs_track *track;
+	int r = 0;
+	volatile uint32_t *ib;
+	u32 idx_value;
+
+	ib = p->ib->ptr;
+	track = (struct r100_cs_track *)p->track;
+	c = radeon_get_ib_value(p, idx++) & 0x1F;
+	if (c > 16) {
+	    DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
+		      pkt->opcode);
+	    r100_cs_dump_packet(p, pkt);
+	    return -EINVAL;
+	}
+	track->num_arrays = c;
+	for (i = 0; i < (c - 1); i+=2, idx+=3) {
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n",
+				  pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		idx_value = radeon_get_ib_value(p, idx);
+		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+
+		track->arrays[i + 0].esize = idx_value >> 8;
+		track->arrays[i + 0].robj = reloc->robj;
+		track->arrays[i + 0].esize &= 0x7F;
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n",
+				  pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
+		track->arrays[i + 1].robj = reloc->robj;
+		track->arrays[i + 1].esize = idx_value >> 24;
+		track->arrays[i + 1].esize &= 0x7F;
+	}
+	if (c & 1) {
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n",
+					  pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		idx_value = radeon_get_ib_value(p, idx);
+		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+		track->arrays[i + 0].robj = reloc->robj;
+		track->arrays[i + 0].esize = idx_value >> 8;
+		track->arrays[i + 0].esize &= 0x7F;
+	}
+	return r;
+}
+
 void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
 {
 	/* enable the pflip int */
@@ -513,6 +615,9 @@ int r100_pci_gart_enable(struct radeon_device *rdev)
 	tmp = RREG32(RADEON_AIC_CNTL) | RADEON_PCIGART_TRANSLATE_EN;
 	WREG32(RADEON_AIC_CNTL, tmp);
 	r100_pci_gart_tlb_flush(rdev);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)rdev->gart.table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
@@ -588,7 +693,7 @@ void r100_irq_disable(struct radeon_device *rdev)
 	WREG32(R_000044_GEN_INT_STATUS, tmp);
 }
 
-static inline uint32_t r100_irq_ack(struct radeon_device *rdev)
+static uint32_t r100_irq_ack(struct radeon_device *rdev)
 {
 	uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
 	uint32_t irq_mask = RADEON_SW_INT_TEST |
@@ -3147,7 +3252,7 @@ void r100_bandwidth_update(struct radeon_device *rdev)
 	}
 }
 
-static inline void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
+static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
 {
 	DRM_ERROR("pitch                      %d\n", t->pitch);
 	DRM_ERROR("use_pitch                  %d\n", t->use_pitch);
@@ -3965,3 +4070,43 @@ int r100_init(struct radeon_device *rdev)
 	}
 	return 0;
 }
+
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	if (reg < rdev->rmmio_size)
+		return readl(((void __iomem *)rdev->rmmio) + reg);
+	else {
+		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+		return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+	}
+}
+
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	if (reg < rdev->rmmio_size)
+		writel(v, ((void __iomem *)rdev->rmmio) + reg);
+	else {
+		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+		writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+	}
+}
+
+u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
+{
+	if (reg < rdev->rio_mem_size)
+		return ioread32(rdev->rio_mem + reg);
+	else {
+		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
+		return ioread32(rdev->rio_mem + RADEON_MM_DATA);
+	}
+}
+
+void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+	if (reg < rdev->rio_mem_size)
+		iowrite32(v, rdev->rio_mem + reg);
+	else {
+		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
+		iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
+	}
+}
diff --git a/drivers/gpu/drm/radeon/r100_track.h b/drivers/gpu/drm/radeon/r100_track.h
index 686f9dc5d4bd..6a603b378adb 100644
--- a/drivers/gpu/drm/radeon/r100_track.h
+++ b/drivers/gpu/drm/radeon/r100_track.h
@@ -92,106 +92,10 @@ int r200_packet0_check(struct radeon_cs_parser *p,
 		       struct radeon_cs_packet *pkt,
 		       unsigned idx, unsigned reg);
 
-
-
-static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
-					  struct radeon_cs_packet *pkt,
-					  unsigned idx,
-					  unsigned reg)
-{
-	int r;
-	u32 tile_flags = 0;
-	u32 tmp;
-	struct radeon_cs_reloc *reloc;
-	u32 value;
-
-	r = r100_cs_packet_next_reloc(p, &reloc);
-	if (r) {
-		DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
-			  idx, reg);
-		r100_cs_dump_packet(p, pkt);
-		return r;
-	}
-	value = radeon_get_ib_value(p, idx);
-	tmp = value & 0x003fffff;
-	tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
-
-	if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
-		tile_flags |= RADEON_DST_TILE_MACRO;
-	if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
-		if (reg == RADEON_SRC_PITCH_OFFSET) {
-			DRM_ERROR("Cannot src blit from microtiled surface\n");
-			r100_cs_dump_packet(p, pkt);
-			return -EINVAL;
-		}
-		tile_flags |= RADEON_DST_TILE_MICRO;
-	}
-
-	tmp |= tile_flags;
-	p->ib->ptr[idx] = (value & 0x3fc00000) | tmp;
-	return 0;
-}
-
-static inline int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
-					   struct radeon_cs_packet *pkt,
-					   int idx)
-{
-	unsigned c, i;
-	struct radeon_cs_reloc *reloc;
-	struct r100_cs_track *track;
-	int r = 0;
-	volatile uint32_t *ib;
-	u32 idx_value;
-
-	ib = p->ib->ptr;
-	track = (struct r100_cs_track *)p->track;
-	c = radeon_get_ib_value(p, idx++) & 0x1F;
-	if (c > 16) {
-	    DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
-		      pkt->opcode);
-	    r100_cs_dump_packet(p, pkt);
-	    return -EINVAL;
-	}
-	track->num_arrays = c;
-	for (i = 0; i < (c - 1); i+=2, idx+=3) {
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for packet3 %d\n",
-				  pkt->opcode);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
-		idx_value = radeon_get_ib_value(p, idx);
-		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
-
-		track->arrays[i + 0].esize = idx_value >> 8;
-		track->arrays[i + 0].robj = reloc->robj;
-		track->arrays[i + 0].esize &= 0x7F;
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for packet3 %d\n",
-				  pkt->opcode);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
-		ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
-		track->arrays[i + 1].robj = reloc->robj;
-		track->arrays[i + 1].esize = idx_value >> 24;
-		track->arrays[i + 1].esize &= 0x7F;
-	}
-	if (c & 1) {
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for packet3 %d\n",
-					  pkt->opcode);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
-		idx_value = radeon_get_ib_value(p, idx);
-		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
-		track->arrays[i + 0].robj = reloc->robj;
-		track->arrays[i + 0].esize = idx_value >> 8;
-		track->arrays[i + 0].esize &= 0x7F;
-	}
-	return r;
-}
+int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
+			    struct radeon_cs_packet *pkt,
+			    unsigned idx,
+			    unsigned reg);
+int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
+			     struct radeon_cs_packet *pkt,
+			     int idx);
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 55a7f190027e..33f2b68c680b 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -144,8 +144,9 @@ int rv370_pcie_gart_enable(struct radeon_device *rdev)
 	tmp |= RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
 	WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
 	rv370_pcie_gart_tlb_flush(rdev);
-	DRM_INFO("PCIE GART of %uM enabled (table at 0x%08X).\n",
-		 (unsigned)(rdev->mc.gtt_size >> 20), table_addr);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c
index c5c2742e4140..1fe98b421c9b 100644
--- a/drivers/gpu/drm/radeon/r300_cmdbuf.c
+++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c
@@ -791,7 +791,7 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
 /**
  * Emit the sequence to pacify R300.
  */
-static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
+static void r300_pacify(drm_radeon_private_t *dev_priv)
 {
 	uint32_t cache_z, cache_3d, cache_2d;
 	RING_LOCALS;
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 720dd99163f8..12470b090ddf 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -993,6 +993,9 @@ int r600_pcie_gart_enable(struct radeon_device *rdev)
 		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
 
 	r600_pcie_gart_tlb_flush(rdev);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)rdev->gart.table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
@@ -2362,19 +2365,33 @@ int r600_copy_blit(struct radeon_device *rdev,
 
 	mutex_lock(&rdev->r600_blit.mutex);
 	rdev->r600_blit.vb_ib = NULL;
-	r = r600_blit_prepare_copy(rdev, num_gpu_pages * RADEON_GPU_PAGE_SIZE);
+	r = r600_blit_prepare_copy(rdev, num_gpu_pages);
 	if (r) {
 		if (rdev->r600_blit.vb_ib)
 			radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
 		mutex_unlock(&rdev->r600_blit.mutex);
 		return r;
 	}
-	r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages * RADEON_GPU_PAGE_SIZE);
+	r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages);
 	r600_blit_done_copy(rdev, fence);
 	mutex_unlock(&rdev->r600_blit.mutex);
 	return 0;
 }
 
+void r600_blit_suspend(struct radeon_device *rdev)
+{
+	int r;
+
+	/* unpin shaders bo */
+	if (rdev->r600_blit.shader_obj) {
+		r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
+		if (!r) {
+			radeon_bo_unpin(rdev->r600_blit.shader_obj);
+			radeon_bo_unreserve(rdev->r600_blit.shader_obj);
+		}
+	}
+}
+
 int r600_set_surface_reg(struct radeon_device *rdev, int reg,
 			 uint32_t tiling_flags, uint32_t pitch,
 			 uint32_t offset, uint32_t obj_size)
@@ -2494,8 +2511,6 @@ int r600_resume(struct radeon_device *rdev)
 
 int r600_suspend(struct radeon_device *rdev)
 {
-	int r;
-
 	r600_audio_fini(rdev);
 	/* FIXME: we should wait for ring to be empty */
 	r600_cp_stop(rdev);
@@ -2503,14 +2518,8 @@ int r600_suspend(struct radeon_device *rdev)
 	r600_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	r600_pcie_gart_disable(rdev);
-	/* unpin shaders bo */
-	if (rdev->r600_blit.shader_obj) {
-		r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-		if (!r) {
-			radeon_bo_unpin(rdev->r600_blit.shader_obj);
-			radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-		}
-	}
+	r600_blit_suspend(rdev);
+
 	return 0;
 }
 
@@ -3137,7 +3146,7 @@ int r600_irq_set(struct radeon_device *rdev)
 	return 0;
 }
 
-static inline void r600_irq_ack(struct radeon_device *rdev)
+static void r600_irq_ack(struct radeon_device *rdev)
 {
 	u32 tmp;
 
@@ -3238,7 +3247,7 @@ void r600_irq_disable(struct radeon_device *rdev)
 	r600_disable_interrupt_state(rdev);
 }
 
-static inline u32 r600_get_ih_wptr(struct radeon_device *rdev)
+static u32 r600_get_ih_wptr(struct radeon_device *rdev)
 {
 	u32 wptr, tmp;
 
diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
index 7f1043448d25..3c031a48205d 100644
--- a/drivers/gpu/drm/radeon/r600_blit.c
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -41,7 +41,7 @@
 #define COLOR_5_6_5           0x8
 #define COLOR_8_8_8_8         0x1a
 
-static inline void
+static void
 set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
 {
 	u32 cb_color_info;
@@ -99,7 +99,7 @@ set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64
 	ADVANCE_RING();
 }
 
-static inline void
+static void
 cp_set_surface_sync(drm_radeon_private_t *dev_priv,
 		    u32 sync_type, u32 size, u64 mc_addr)
 {
@@ -121,7 +121,7 @@ cp_set_surface_sync(drm_radeon_private_t *dev_priv,
 	ADVANCE_RING();
 }
 
-static inline void
+static void
 set_shaders(struct drm_device *dev)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
@@ -184,7 +184,7 @@ set_shaders(struct drm_device *dev)
 			    R600_SH_ACTION_ENA, 512, gpu_addr);
 }
 
-static inline void
+static void
 set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
 {
 	uint32_t sq_vtx_constant_word2;
@@ -220,7 +220,7 @@ set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
 				    R600_VC_ACTION_ENA, 48, gpu_addr);
 }
 
-static inline void
+static void
 set_tex_resource(drm_radeon_private_t *dev_priv,
 		 int format, int w, int h, int pitch, u64 gpu_addr)
 {
@@ -258,7 +258,7 @@ set_tex_resource(drm_radeon_private_t *dev_priv,
 
 }
 
-static inline void
+static void
 set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
 {
 	RING_LOCALS;
@@ -282,7 +282,7 @@ set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
 	ADVANCE_RING();
 }
 
-static inline void
+static void
 draw_auto(drm_radeon_private_t *dev_priv)
 {
 	RING_LOCALS;
@@ -311,7 +311,7 @@ draw_auto(drm_radeon_private_t *dev_priv)
 	COMMIT_RING();
 }
 
-static inline void
+static void
 set_default_state(drm_radeon_private_t *dev_priv)
 {
 	int i;
@@ -489,7 +489,7 @@ set_default_state(drm_radeon_private_t *dev_priv)
 	ADVANCE_RING();
 }
 
-static inline uint32_t i2f(uint32_t input)
+static uint32_t i2f(uint32_t input)
 {
 	u32 result, i, exponent, fraction;
 
@@ -515,7 +515,7 @@ static inline uint32_t i2f(uint32_t input)
 }
 
 
-static inline int r600_nomm_get_vb(struct drm_device *dev)
+static int r600_nomm_get_vb(struct drm_device *dev)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	dev_priv->blit_vb = radeon_freelist_get(dev);
@@ -526,7 +526,7 @@ static inline int r600_nomm_get_vb(struct drm_device *dev)
 	return 0;
 }
 
-static inline void r600_nomm_put_vb(struct drm_device *dev)
+static void r600_nomm_put_vb(struct drm_device *dev)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 
@@ -534,7 +534,7 @@ static inline void r600_nomm_put_vb(struct drm_device *dev)
 	radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
 }
 
-static inline void *r600_nomm_get_vb_ptr(struct drm_device *dev)
+static void *r600_nomm_get_vb_ptr(struct drm_device *dev)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	return (((char *)dev->agp_buffer_map->handle +
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 9aa74c3f8cb6..c4cf1308d4a1 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -42,6 +42,9 @@
 #define COLOR_5_6_5           0x8
 #define COLOR_8_8_8_8         0x1a
 
+#define RECT_UNIT_H           32
+#define RECT_UNIT_W           (RADEON_GPU_PAGE_SIZE / 4 / RECT_UNIT_H)
+
 /* emits 21 on rv770+, 23 on r600 */
 static void
 set_render_target(struct radeon_device *rdev, int format,
@@ -54,7 +57,9 @@ set_render_target(struct radeon_device *rdev, int format,
 	if (h < 8)
 		h = 8;
 
-	cb_color_info = ((format << 2) | (1 << 27) | (1 << 8));
+	cb_color_info = CB_FORMAT(format) |
+		CB_SOURCE_FORMAT(CB_SF_EXPORT_NORM) |
+		CB_ARRAY_MODE(ARRAY_1D_TILED_THIN1);
 	pitch = (w / 8) - 1;
 	slice = ((w * h) / 64) - 1;
 
@@ -164,9 +169,10 @@ set_vtx_resource(struct radeon_device *rdev, u64 gpu_addr)
 {
 	u32 sq_vtx_constant_word2;
 
-	sq_vtx_constant_word2 = ((upper_32_bits(gpu_addr) & 0xff) | (16 << 8));
+	sq_vtx_constant_word2 = SQ_VTXC_BASE_ADDR_HI(upper_32_bits(gpu_addr) & 0xff) |
+		SQ_VTXC_STRIDE(16);
 #ifdef __BIG_ENDIAN
-	sq_vtx_constant_word2 |= (2 << 30);
+	sq_vtx_constant_word2 |=  SQ_VTXC_ENDIAN_SWAP(SQ_ENDIAN_8IN32);
 #endif
 
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
@@ -202,18 +208,19 @@ set_tex_resource(struct radeon_device *rdev,
 	if (h < 1)
 		h = 1;
 
-	sq_tex_resource_word0 = (1 << 0) | (1 << 3);
-	sq_tex_resource_word0 |= ((((pitch >> 3) - 1) << 8) |
-				  ((w - 1) << 19));
+	sq_tex_resource_word0 = S_038000_DIM(V_038000_SQ_TEX_DIM_2D) |
+		S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
+	sq_tex_resource_word0 |= S_038000_PITCH((pitch >> 3) - 1) |
+		S_038000_TEX_WIDTH(w - 1);
 
-	sq_tex_resource_word1 = (format << 26);
-	sq_tex_resource_word1 |= ((h - 1) << 0);
+	sq_tex_resource_word1 = S_038004_DATA_FORMAT(format);
+	sq_tex_resource_word1 |= S_038004_TEX_HEIGHT(h - 1);
 
-	sq_tex_resource_word4 = ((1 << 14) |
-				 (0 << 16) |
-				 (1 << 19) |
-				 (2 << 22) |
-				 (3 << 25));
+	sq_tex_resource_word4 = S_038010_REQUEST_SIZE(1) |
+		S_038010_DST_SEL_X(SQ_SEL_X) |
+		S_038010_DST_SEL_Y(SQ_SEL_Y) |
+		S_038010_DST_SEL_Z(SQ_SEL_Z) |
+		S_038010_DST_SEL_W(SQ_SEL_W);
 
 	radeon_ring_write(rdev, PACKET3(PACKET3_SET_RESOURCE, 7));
 	radeon_ring_write(rdev, 0);
@@ -450,7 +457,7 @@ set_default_state(struct radeon_device *rdev)
 	radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
 }
 
-static inline uint32_t i2f(uint32_t input)
+static uint32_t i2f(uint32_t input)
 {
 	u32 result, i, exponent, fraction;
 
@@ -483,6 +490,27 @@ int r600_blit_init(struct radeon_device *rdev)
 	u32 packet2s[16];
 	int num_packet2s = 0;
 
+	rdev->r600_blit.primitives.set_render_target = set_render_target;
+	rdev->r600_blit.primitives.cp_set_surface_sync = cp_set_surface_sync;
+	rdev->r600_blit.primitives.set_shaders = set_shaders;
+	rdev->r600_blit.primitives.set_vtx_resource = set_vtx_resource;
+	rdev->r600_blit.primitives.set_tex_resource = set_tex_resource;
+	rdev->r600_blit.primitives.set_scissors = set_scissors;
+	rdev->r600_blit.primitives.draw_auto = draw_auto;
+	rdev->r600_blit.primitives.set_default_state = set_default_state;
+
+	rdev->r600_blit.ring_size_common = 40; /* shaders + def state */
+	rdev->r600_blit.ring_size_common += 10; /* fence emit for VB IB */
+	rdev->r600_blit.ring_size_common += 5; /* done copy */
+	rdev->r600_blit.ring_size_common += 10; /* fence emit for done copy */
+
+	rdev->r600_blit.ring_size_per_loop = 76;
+	/* set_render_target emits 2 extra dwords on rv6xx */
+	if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770)
+		rdev->r600_blit.ring_size_per_loop += 2;
+
+	rdev->r600_blit.max_dim = 8192;
+
 	/* pin copy shader into vram if already initialized */
 	if (rdev->r600_blit.shader_obj)
 		goto done;
@@ -600,47 +628,80 @@ static void r600_vb_ib_put(struct radeon_device *rdev)
 	radeon_ib_free(rdev, &rdev->r600_blit.vb_ib);
 }
 
-int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes)
+static unsigned r600_blit_create_rect(unsigned num_gpu_pages,
+				      int *width, int *height, int max_dim)
+{
+	unsigned max_pages;
+	unsigned pages = num_gpu_pages;
+	int w, h;
+
+	if (num_gpu_pages == 0) {
+		/* not supposed to be called with no pages, but just in case */
+		h = 0;
+		w = 0;
+		pages = 0;
+		WARN_ON(1);
+	} else {
+		int rect_order = 2;
+		h = RECT_UNIT_H;
+		while (num_gpu_pages / rect_order) {
+			h *= 2;
+			rect_order *= 4;
+			if (h >= max_dim) {
+				h = max_dim;
+				break;
+			}
+		}
+		max_pages = (max_dim * h) / (RECT_UNIT_W * RECT_UNIT_H);
+		if (pages > max_pages)
+			pages = max_pages;
+		w = (pages * RECT_UNIT_W * RECT_UNIT_H) / h;
+		w = (w / RECT_UNIT_W) * RECT_UNIT_W;
+		pages = (w * h) / (RECT_UNIT_W * RECT_UNIT_H);
+		BUG_ON(pages == 0);
+	}
+
+
+	DRM_DEBUG("blit_rectangle: h=%d, w=%d, pages=%d\n", h, w, pages);
+
+	/* return width and height only of the caller wants it */
+	if (height)
+		*height = h;
+	if (width)
+		*width = w;
+
+	return pages;
+}
+
+
+int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages)
 {
 	int r;
-	int ring_size, line_size;
-	int max_size;
-	/* loops of emits 64 + fence emit possible */
-	int dwords_per_loop = 76, num_loops;
+	int ring_size;
+	int num_loops = 0;
+	int dwords_per_loop = rdev->r600_blit.ring_size_per_loop;
 
 	r = r600_vb_ib_get(rdev);
 	if (r)
 		return r;
 
-	/* set_render_target emits 2 extra dwords on rv6xx */
-	if (rdev->family > CHIP_R600 && rdev->family < CHIP_RV770)
-		dwords_per_loop += 2;
-
-	/* 8 bpp vs 32 bpp for xfer unit */
-	if (size_bytes & 3)
-		line_size = 8192;
-	else
-		line_size = 8192*4;
-
-	max_size = 8192 * line_size;
+	/* num loops */
+	while (num_gpu_pages) {
+		num_gpu_pages -=
+			r600_blit_create_rect(num_gpu_pages, NULL, NULL,
+					      rdev->r600_blit.max_dim);
+		num_loops++;
+	}
 
-	/* major loops cover the max size transfer */
-	num_loops = ((size_bytes + max_size) / max_size);
-	/* minor loops cover the extra non aligned bits */
-	num_loops += ((size_bytes % line_size) ? 1 : 0);
 	/* calculate number of loops correctly */
 	ring_size = num_loops * dwords_per_loop;
-	/* set default  + shaders */
-	ring_size += 40; /* shaders + def state */
-	ring_size += 10; /* fence emit for VB IB */
-	ring_size += 5; /* done copy */
-	ring_size += 10; /* fence emit for done copy */
+	ring_size += rdev->r600_blit.ring_size_common;
 	r = radeon_ring_lock(rdev, ring_size);
 	if (r)
 		return r;
 
-	set_default_state(rdev); /* 14 */
-	set_shaders(rdev); /* 26 */
+	rdev->r600_blit.primitives.set_default_state(rdev);
+	rdev->r600_blit.primitives.set_shaders(rdev);
 	return 0;
 }
 
@@ -659,182 +720,64 @@ void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence)
 
 void r600_kms_blit_copy(struct radeon_device *rdev,
 			u64 src_gpu_addr, u64 dst_gpu_addr,
-			int size_bytes)
+			unsigned num_gpu_pages)
 {
-	int max_bytes;
 	u64 vb_gpu_addr;
 	u32 *vb;
 
-	DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr, dst_gpu_addr,
-		  size_bytes, rdev->r600_blit.vb_used);
+	DRM_DEBUG("emitting copy %16llx %16llx %d %d\n",
+		  src_gpu_addr, dst_gpu_addr,
+		  num_gpu_pages, rdev->r600_blit.vb_used);
 	vb = (u32 *)(rdev->r600_blit.vb_ib->ptr + rdev->r600_blit.vb_used);
-	if ((size_bytes & 3) || (src_gpu_addr & 3) || (dst_gpu_addr & 3)) {
-		max_bytes = 8192;
-
-		while (size_bytes) {
-			int cur_size = size_bytes;
-			int src_x = src_gpu_addr & 255;
-			int dst_x = dst_gpu_addr & 255;
-			int h = 1;
-			src_gpu_addr = src_gpu_addr & ~255ULL;
-			dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
-			if (!src_x && !dst_x) {
-				h = (cur_size / max_bytes);
-				if (h > 8192)
-					h = 8192;
-				if (h == 0)
-					h = 1;
-				else
-					cur_size = max_bytes;
-			} else {
-				if (cur_size > max_bytes)
-					cur_size = max_bytes;
-				if (cur_size > (max_bytes - dst_x))
-					cur_size = (max_bytes - dst_x);
-				if (cur_size > (max_bytes - src_x))
-					cur_size = (max_bytes - src_x);
-			}
-
-			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
-				WARN_ON(1);
-			}
-
-			vb[0] = i2f(dst_x);
-			vb[1] = 0;
-			vb[2] = i2f(src_x);
-			vb[3] = 0;
-
-			vb[4] = i2f(dst_x);
-			vb[5] = i2f(h);
-			vb[6] = i2f(src_x);
-			vb[7] = i2f(h);
-
-			vb[8] = i2f(dst_x + cur_size);
-			vb[9] = i2f(h);
-			vb[10] = i2f(src_x + cur_size);
-			vb[11] = i2f(h);
-
-			/* src 9 */
-			set_tex_resource(rdev, FMT_8,
-					 src_x + cur_size, h, src_x + cur_size,
-					 src_gpu_addr);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
 
-			/* dst 23 */
-			set_render_target(rdev, COLOR_8,
-					  dst_x + cur_size, h,
-					  dst_gpu_addr);
+	while (num_gpu_pages) {
+		int w, h;
+		unsigned size_in_bytes;
+		unsigned pages_per_loop =
+			r600_blit_create_rect(num_gpu_pages, &w, &h,
+					      rdev->r600_blit.max_dim);
 
-			/* scissors 12 */
-			set_scissors(rdev, dst_x, 0, dst_x + cur_size, h);
+		size_in_bytes = pages_per_loop * RADEON_GPU_PAGE_SIZE;
+		DRM_DEBUG("rectangle w=%d h=%d\n", w, h);
 
-			/* 14 */
-			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
-			set_vtx_resource(rdev, vb_gpu_addr);
-
-			/* draw 10 */
-			draw_auto(rdev);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
-					    cur_size * h, dst_gpu_addr);
-
-			vb += 12;
-			rdev->r600_blit.vb_used += 12 * 4;
-
-			src_gpu_addr += cur_size * h;
-			dst_gpu_addr += cur_size * h;
-			size_bytes -= cur_size * h;
+		if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
+			WARN_ON(1);
 		}
-	} else {
-		max_bytes = 8192 * 4;
-
-		while (size_bytes) {
-			int cur_size = size_bytes;
-			int src_x = (src_gpu_addr & 255);
-			int dst_x = (dst_gpu_addr & 255);
-			int h = 1;
-			src_gpu_addr = src_gpu_addr & ~255ULL;
-			dst_gpu_addr = dst_gpu_addr & ~255ULL;
-
-			if (!src_x && !dst_x) {
-				h = (cur_size / max_bytes);
-				if (h > 8192)
-					h = 8192;
-				if (h == 0)
-					h = 1;
-				else
-					cur_size = max_bytes;
-			} else {
-				if (cur_size > max_bytes)
-					cur_size = max_bytes;
-				if (cur_size > (max_bytes - dst_x))
-					cur_size = (max_bytes - dst_x);
-				if (cur_size > (max_bytes - src_x))
-					cur_size = (max_bytes - src_x);
-			}
-
-			if ((rdev->r600_blit.vb_used + 48) > rdev->r600_blit.vb_total) {
-				WARN_ON(1);
-			}
 
-			vb[0] = i2f(dst_x / 4);
-			vb[1] = 0;
-			vb[2] = i2f(src_x / 4);
-			vb[3] = 0;
-
-			vb[4] = i2f(dst_x / 4);
-			vb[5] = i2f(h);
-			vb[6] = i2f(src_x / 4);
-			vb[7] = i2f(h);
-
-			vb[8] = i2f((dst_x + cur_size) / 4);
-			vb[9] = i2f(h);
-			vb[10] = i2f((src_x + cur_size) / 4);
-			vb[11] = i2f(h);
-
-			/* src 9 */
-			set_tex_resource(rdev, FMT_8_8_8_8,
-					 (src_x + cur_size) / 4,
-					 h, (src_x + cur_size) / 4,
-					 src_gpu_addr);
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_TC_ACTION_ENA, (src_x + cur_size * h), src_gpu_addr);
-
-			/* dst 23 */
-			set_render_target(rdev, COLOR_8_8_8_8,
-					  (dst_x + cur_size) / 4, h,
-					  dst_gpu_addr);
-
-			/* scissors 12  */
-			set_scissors(rdev, (dst_x / 4), 0, (dst_x + cur_size / 4), h);
-
-			/* Vertex buffer setup 14 */
-			vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
-			set_vtx_resource(rdev, vb_gpu_addr);
-
-			/* draw 10 */
-			draw_auto(rdev);
-
-			/* 5 */
-			cp_set_surface_sync(rdev,
-					    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
-					    cur_size * h, dst_gpu_addr);
-
-			/* 78 ring dwords per loop */
-			vb += 12;
-			rdev->r600_blit.vb_used += 12 * 4;
-
-			src_gpu_addr += cur_size * h;
-			dst_gpu_addr += cur_size * h;
-			size_bytes -= cur_size * h;
-		}
+		vb[0] = 0;
+		vb[1] = 0;
+		vb[2] = 0;
+		vb[3] = 0;
+
+		vb[4] = 0;
+		vb[5] = i2f(h);
+		vb[6] = 0;
+		vb[7] = i2f(h);
+
+		vb[8] = i2f(w);
+		vb[9] = i2f(h);
+		vb[10] = i2f(w);
+		vb[11] = i2f(h);
+
+		rdev->r600_blit.primitives.set_tex_resource(rdev, FMT_8_8_8_8,
+							    w, h, w, src_gpu_addr);
+		rdev->r600_blit.primitives.cp_set_surface_sync(rdev,
+							       PACKET3_TC_ACTION_ENA,
+							       size_in_bytes, src_gpu_addr);
+		rdev->r600_blit.primitives.set_render_target(rdev, COLOR_8_8_8_8,
+							     w, h, dst_gpu_addr);
+		rdev->r600_blit.primitives.set_scissors(rdev, 0, 0, w, h);
+		vb_gpu_addr = rdev->r600_blit.vb_ib->gpu_addr + rdev->r600_blit.vb_used;
+		rdev->r600_blit.primitives.set_vtx_resource(rdev, vb_gpu_addr);
+		rdev->r600_blit.primitives.draw_auto(rdev);
+		rdev->r600_blit.primitives.cp_set_surface_sync(rdev,
+				    PACKET3_CB_ACTION_ENA | PACKET3_CB0_DEST_BASE_ENA,
+				    size_in_bytes, dst_gpu_addr);
+
+		vb += 12;
+		rdev->r600_blit.vb_used += 4*12;
+		src_gpu_addr += size_in_bytes;
+		dst_gpu_addr += size_in_bytes;
+		num_gpu_pages -= pages_per_loop;
 	}
 }
-
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index cf83aa05a684..0a2e023c1557 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -162,7 +162,7 @@ static const struct gpu_formats color_formats_table[] = {
 	[V_038004_FMT_32_AS_32_32_32_32] = { 1, 1, 4, 0, CHIP_CEDAR},
 };
 
-static inline bool fmt_is_valid_color(u32 format)
+static bool fmt_is_valid_color(u32 format)
 {
 	if (format >= ARRAY_SIZE(color_formats_table))
 		return false;
@@ -173,7 +173,7 @@ static inline bool fmt_is_valid_color(u32 format)
 	return false;
 }
 
-static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family)
+static bool fmt_is_valid_texture(u32 format, enum radeon_family family)
 {
 	if (format >= ARRAY_SIZE(color_formats_table))
 		return false;
@@ -187,7 +187,7 @@ static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family)
 	return false;
 }
 
-static inline int fmt_get_blocksize(u32 format)
+static int fmt_get_blocksize(u32 format)
 {
 	if (format >= ARRAY_SIZE(color_formats_table))
 		return 0;
@@ -195,7 +195,7 @@ static inline int fmt_get_blocksize(u32 format)
 	return color_formats_table[format].blocksize;
 }
 
-static inline int fmt_get_nblocksx(u32 format, u32 w)
+static int fmt_get_nblocksx(u32 format, u32 w)
 {
 	unsigned bw;
 
@@ -209,7 +209,7 @@ static inline int fmt_get_nblocksx(u32 format, u32 w)
 	return (w + bw - 1) / bw;
 }
 
-static inline int fmt_get_nblocksy(u32 format, u32 h)
+static int fmt_get_nblocksy(u32 format, u32 h)
 {
 	unsigned bh;
 
@@ -223,25 +223,6 @@ static inline int fmt_get_nblocksy(u32 format, u32 h)
 	return (h + bh - 1) / bh;
 }
 
-static inline int r600_bpe_from_format(u32 *bpe, u32 format)
-{
- 	unsigned res;
-
-	if (format >= ARRAY_SIZE(color_formats_table))
-		goto fail;
-
-	res = color_formats_table[format].blocksize;
-	if (res == 0)
-		goto fail;
-
-	*bpe = res;
-	return 0;
-
-fail:
-	*bpe = 16;
-	return -EINVAL;
-}
-
 struct array_mode_checker {
 	int array_mode;
 	u32 group_size;
@@ -252,7 +233,7 @@ struct array_mode_checker {
 };
 
 /* returns alignment in pixels for pitch/height/depth and bytes for base */
-static inline int r600_get_array_mode_alignment(struct array_mode_checker *values,
+static int r600_get_array_mode_alignment(struct array_mode_checker *values,
 						u32 *pitch_align,
 						u32 *height_align,
 						u32 *depth_align,
@@ -331,7 +312,7 @@ static void r600_cs_track_init(struct r600_cs_track *track)
 	track->db_depth_control = 0xFFFFFFFF;
 }
 
-static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
+static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
 {
 	struct r600_cs_track *track = p->track;
 	u32 slice_tile_max, size, tmp;
@@ -737,7 +718,7 @@ static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
  * Check next packet is relocation packet3, do bo validation and compute
  * GPU offset using the provided start.
  **/
-static inline int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
+static int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
 {
 	struct radeon_cs_packet p3reloc;
 	int r;
@@ -911,7 +892,7 @@ static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
  * if register is safe. If register is not flag as safe this function
  * will test it against a list of register needind special handling.
  */
-static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
+static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 {
 	struct r600_cs_track *track = (struct r600_cs_track *)p->track;
 	struct radeon_cs_reloc *reloc;
@@ -1215,7 +1196,7 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx
 	return 0;
 }
 
-static inline unsigned mip_minify(unsigned size, unsigned level)
+static unsigned mip_minify(unsigned size, unsigned level)
 {
 	unsigned val;
 
@@ -1285,7 +1266,7 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel,
  * This function will check that the resource has valid field and that
  * the texture and mipmap bo object are big enough to cover this resource.
  */
-static inline int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
+static int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
 					      struct radeon_bo *texture,
 					      struct radeon_bo *mipmap,
 					      u64 base_offset,
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index 0245ae6c204e..bfe1b5d92afe 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -79,6 +79,11 @@
 #define CB_COLOR0_SIZE                                  0x28060
 #define CB_COLOR0_VIEW                                  0x28080
 #define CB_COLOR0_INFO                                  0x280a0
+#	define CB_FORMAT(x)				((x) << 2)
+#       define CB_ARRAY_MODE(x)                         ((x) << 8)
+#	define CB_SOURCE_FORMAT(x)			((x) << 27)
+#	define CB_SF_EXPORT_FULL			0
+#	define CB_SF_EXPORT_NORM			1
 #define CB_COLOR0_TILE                                  0x280c0
 #define CB_COLOR0_FRAG                                  0x280e0
 #define CB_COLOR0_MASK                                  0x28100
@@ -417,6 +422,17 @@
 #define	SQ_PGM_START_VS					0x28858
 #define SQ_PGM_RESOURCES_VS                             0x28868
 #define SQ_PGM_CF_OFFSET_VS                             0x288d0
+
+#define SQ_VTX_CONSTANT_WORD0_0				0x30000
+#define SQ_VTX_CONSTANT_WORD1_0				0x30004
+#define SQ_VTX_CONSTANT_WORD2_0				0x30008
+#	define SQ_VTXC_BASE_ADDR_HI(x)			((x) << 0)
+#	define SQ_VTXC_STRIDE(x)			((x) << 8)
+#	define SQ_VTXC_ENDIAN_SWAP(x)			((x) << 30)
+#	define SQ_ENDIAN_NONE				0
+#	define SQ_ENDIAN_8IN16				1
+#	define SQ_ENDIAN_8IN32				2
+#define SQ_VTX_CONSTANT_WORD3_0				0x3000c
 #define	SQ_VTX_CONSTANT_WORD6_0				0x38018
 #define		S__SQ_VTX_CONSTANT_TYPE(x)			(((x) & 3) << 30)
 #define		G__SQ_VTX_CONSTANT_TYPE(x)			(((x) >> 30) & 3)
@@ -1352,6 +1368,12 @@
 #define   S_038010_DST_SEL_W(x)                        (((x) & 0x7) << 25)
 #define   G_038010_DST_SEL_W(x)                        (((x) >> 25) & 0x7)
 #define   C_038010_DST_SEL_W                           0xF1FFFFFF
+#	define SQ_SEL_X					0
+#	define SQ_SEL_Y					1
+#	define SQ_SEL_Z					2
+#	define SQ_SEL_W					3
+#	define SQ_SEL_0					4
+#	define SQ_SEL_1					5
 #define   S_038010_BASE_LEVEL(x)                       (((x) & 0xF) << 28)
 #define   G_038010_BASE_LEVEL(x)                       (((x) >> 28) & 0xF)
 #define   C_038010_BASE_LEVEL                          0x0FFFFFFF
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index c1e056b35b29..e3170c794c1d 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -102,7 +102,7 @@ extern int radeon_pcie_gen2;
 #define RADEON_FENCE_JIFFIES_TIMEOUT	(HZ / 2)
 /* RADEON_IB_POOL_SIZE must be a power of 2 */
 #define RADEON_IB_POOL_SIZE		16
-#define RADEON_DEBUGFS_MAX_NUM_FILES	32
+#define RADEON_DEBUGFS_MAX_COMPONENTS	32
 #define RADEONFB_CONN_LIMIT		4
 #define RADEON_BIOS_NUM_SCRATCH		8
 
@@ -523,9 +523,30 @@ struct r600_ih {
 	bool                    enabled;
 };
 
+struct r600_blit_cp_primitives {
+	void (*set_render_target)(struct radeon_device *rdev, int format,
+				  int w, int h, u64 gpu_addr);
+	void (*cp_set_surface_sync)(struct radeon_device *rdev,
+				    u32 sync_type, u32 size,
+				    u64 mc_addr);
+	void (*set_shaders)(struct radeon_device *rdev);
+	void (*set_vtx_resource)(struct radeon_device *rdev, u64 gpu_addr);
+	void (*set_tex_resource)(struct radeon_device *rdev,
+				 int format, int w, int h, int pitch,
+				 u64 gpu_addr);
+	void (*set_scissors)(struct radeon_device *rdev, int x1, int y1,
+			     int x2, int y2);
+	void (*draw_auto)(struct radeon_device *rdev);
+	void (*set_default_state)(struct radeon_device *rdev);
+};
+
 struct r600_blit {
 	struct mutex		mutex;
 	struct radeon_bo	*shader_obj;
+	struct r600_blit_cp_primitives primitives;
+	int max_dim;
+	int ring_size_common;
+	int ring_size_per_loop;
 	u64 shader_gpu_addr;
 	u32 vs_offset, ps_offset;
 	u32 state_offset;
@@ -534,6 +555,8 @@ struct r600_blit {
 	struct radeon_ib *vb_ib;
 };
 
+void r600_blit_suspend(struct radeon_device *rdev);
+
 int radeon_ib_get(struct radeon_device *rdev, struct radeon_ib **ib);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib);
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
@@ -601,32 +624,7 @@ struct radeon_cs_parser {
 
 extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx);
 extern int radeon_cs_finish_pages(struct radeon_cs_parser *p);
-
-
-static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
-{
-	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
-	u32 pg_idx, pg_offset;
-	u32 idx_value = 0;
-	int new_page;
-
-	pg_idx = (idx * 4) / PAGE_SIZE;
-	pg_offset = (idx * 4) % PAGE_SIZE;
-
-	if (ibc->kpage_idx[0] == pg_idx)
-		return ibc->kpage[0][pg_offset/4];
-	if (ibc->kpage_idx[1] == pg_idx)
-		return ibc->kpage[1][pg_offset/4];
-
-	new_page = radeon_cs_update_pages(p, pg_idx);
-	if (new_page < 0) {
-		p->parser_error = new_page;
-		return 0;
-	}
-
-	idx_value = ibc->kpage[new_page][pg_offset/4];
-	return idx_value;
-}
+extern u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx);
 
 struct radeon_cs_packet {
 	unsigned	idx;
@@ -869,7 +867,7 @@ struct radeon_pm {
 /*
  * Benchmarking
  */
-void radeon_benchmark(struct radeon_device *rdev);
+void radeon_benchmark(struct radeon_device *rdev, int test_number);
 
 
 /*
@@ -1252,45 +1250,10 @@ int radeon_device_init(struct radeon_device *rdev,
 void radeon_device_fini(struct radeon_device *rdev);
 int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
 
-static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
-{
-	if (reg < rdev->rmmio_size)
-		return readl((rdev->rmmio) + reg);
-	else {
-		writel(reg, (rdev->rmmio) + RADEON_MM_INDEX);
-		return readl((rdev->rmmio) + RADEON_MM_DATA);
-	}
-}
-
-static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
-{
-	if (reg < rdev->rmmio_size)
-		writel(v, (rdev->rmmio) + reg);
-	else {
-		writel(reg, (rdev->rmmio) + RADEON_MM_INDEX);
-		writel(v, (rdev->rmmio) + RADEON_MM_DATA);
-	}
-}
-
-static inline u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
-{
-	if (reg < rdev->rio_mem_size)
-		return ioread32(rdev->rio_mem + reg);
-	else {
-		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
-		return ioread32(rdev->rio_mem + RADEON_MM_DATA);
-	}
-}
-
-static inline void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-	if (reg < rdev->rio_mem_size)
-		iowrite32(v, rdev->rio_mem + reg);
-	else {
-		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
-		iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
-	}
-}
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg);
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
+void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
 
 /*
  * Cast helper
@@ -1413,19 +1376,19 @@ void radeon_atombios_fini(struct radeon_device *rdev);
 /*
  * RING helpers.
  */
+
+#if DRM_DEBUG_CODE == 0
 static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 {
-#if DRM_DEBUG_CODE
-	if (rdev->cp.count_dw <= 0) {
-		DRM_ERROR("radeon: writting more dword to ring than expected !\n");
-	}
-#endif
 	rdev->cp.ring[rdev->cp.wptr++] = v;
 	rdev->cp.wptr &= rdev->cp.ptr_mask;
 	rdev->cp.count_dw--;
 	rdev->cp.ring_free_dw--;
 }
-
+#else
+/* With debugging this is just too big to inline */
+void radeon_ring_write(struct radeon_device *rdev, uint32_t v);
+#endif
 
 /*
  * ASICs macro.
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index df8218bb83a6..e2944566ffea 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -765,9 +765,9 @@ static struct radeon_asic evergreen_asic = {
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
-	.copy_blit = &evergreen_copy_blit,
+	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
-	.copy = &evergreen_copy_blit,
+	.copy = &r600_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
@@ -812,9 +812,9 @@ static struct radeon_asic sumo_asic = {
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
-	.copy_blit = &evergreen_copy_blit,
+	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
-	.copy = &evergreen_copy_blit,
+	.copy = &r600_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = NULL,
@@ -859,9 +859,9 @@ static struct radeon_asic btc_asic = {
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
-	.copy_blit = &evergreen_copy_blit,
+	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
-	.copy = &evergreen_copy_blit,
+	.copy = &r600_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
@@ -906,9 +906,9 @@ static struct radeon_asic cayman_asic = {
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
-	.copy_blit = &evergreen_copy_blit,
+	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
-	.copy = &evergreen_copy_blit,
+	.copy = &r600_copy_blit,
 	.get_engine_clock = &radeon_atom_get_engine_clock,
 	.set_engine_clock = &radeon_atom_set_engine_clock,
 	.get_memory_clock = &radeon_atom_get_memory_clock,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 3dedaa07aac1..85f14f0337e4 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -364,11 +364,11 @@ void r600_hdmi_init(struct drm_encoder *encoder);
 int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder);
 void r600_hdmi_update_audio_settings(struct drm_encoder *encoder);
 /* r600 blit */
-int r600_blit_prepare_copy(struct radeon_device *rdev, int size_bytes);
+int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages);
 void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence);
 void r600_kms_blit_copy(struct radeon_device *rdev,
 			u64 src_gpu_addr, u64 dst_gpu_addr,
-			int size_bytes);
+			unsigned num_gpu_pages);
 
 /*
  * rv770,rv730,rv710,rv740
@@ -401,9 +401,6 @@ bool evergreen_gpu_is_lockup(struct radeon_device *rdev);
 int evergreen_asic_reset(struct radeon_device *rdev);
 void evergreen_bandwidth_update(struct radeon_device *rdev);
 void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
-int evergreen_copy_blit(struct radeon_device *rdev,
-			uint64_t src_offset, uint64_t dst_offset,
-			unsigned num_gpu_pages, struct radeon_fence *fence);
 void evergreen_hpd_init(struct radeon_device *rdev);
 void evergreen_hpd_fini(struct radeon_device *rdev);
 bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -421,13 +418,6 @@ extern u32 evergreen_page_flip(struct radeon_device *rdev, int crtc, u64 crtc_ba
 extern void evergreen_post_page_flip(struct radeon_device *rdev, int crtc);
 void evergreen_disable_interrupt_state(struct radeon_device *rdev);
 int evergreen_blit_init(struct radeon_device *rdev);
-void evergreen_blit_fini(struct radeon_device *rdev);
-/* evergreen blit */
-int evergreen_blit_prepare_copy(struct radeon_device *rdev, int size_bytes);
-void evergreen_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence);
-void evergreen_kms_blit_copy(struct radeon_device *rdev,
-			     u64 src_gpu_addr, u64 dst_gpu_addr,
-			     int size_bytes);
 
 /*
  * cayman
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index bf2b61584cdb..08d0b94332e6 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -62,7 +62,7 @@ union atom_supported_devices {
 	struct _ATOM_SUPPORTED_DEVICES_INFO_2d1 info_2d1;
 };
 
-static inline struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rdev,
+static struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rdev,
 							       uint8_t id)
 {
 	struct atom_context *ctx = rdev->mode_info.atom_context;
@@ -228,7 +228,7 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev)
 	}
 }
 
-static inline struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
+static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
 							u8 id)
 {
 	struct atom_context *ctx = rdev->mode_info.atom_context;
diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c
index 10191d9372d8..5cafc90de7f8 100644
--- a/drivers/gpu/drm/radeon/radeon_benchmark.c
+++ b/drivers/gpu/drm/radeon/radeon_benchmark.c
@@ -26,21 +26,81 @@
 #include "radeon_reg.h"
 #include "radeon.h"
 
-void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize,
-			   unsigned sdomain, unsigned ddomain)
+#define RADEON_BENCHMARK_COPY_BLIT 1
+#define RADEON_BENCHMARK_COPY_DMA  0
+
+#define RADEON_BENCHMARK_ITERATIONS 1024
+#define RADEON_BENCHMARK_COMMON_MODES_N 17
+
+static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size,
+				    uint64_t saddr, uint64_t daddr,
+				    int flag, int n)
+{
+	unsigned long start_jiffies;
+	unsigned long end_jiffies;
+	struct radeon_fence *fence = NULL;
+	int i, r;
+
+	start_jiffies = jiffies;
+	for (i = 0; i < n; i++) {
+		r = radeon_fence_create(rdev, &fence);
+		if (r)
+			return r;
+
+		switch (flag) {
+		case RADEON_BENCHMARK_COPY_DMA:
+			r = radeon_copy_dma(rdev, saddr, daddr,
+					    size / RADEON_GPU_PAGE_SIZE,
+					    fence);
+			break;
+		case RADEON_BENCHMARK_COPY_BLIT:
+			r = radeon_copy_blit(rdev, saddr, daddr,
+					     size / RADEON_GPU_PAGE_SIZE,
+					     fence);
+			break;
+		default:
+			DRM_ERROR("Unknown copy method\n");
+			r = -EINVAL;
+		}
+		if (r)
+			goto exit_do_move;
+		r = radeon_fence_wait(fence, false);
+		if (r)
+			goto exit_do_move;
+		radeon_fence_unref(&fence);
+	}
+	end_jiffies = jiffies;
+	r = jiffies_to_msecs(end_jiffies - start_jiffies);
+
+exit_do_move:
+	if (fence)
+		radeon_fence_unref(&fence);
+	return r;
+}
+
+
+static void radeon_benchmark_log_results(int n, unsigned size,
+					 unsigned int time,
+					 unsigned sdomain, unsigned ddomain,
+					 char *kind)
+{
+	unsigned int throughput = (n * (size >> 10)) / time;
+	DRM_INFO("radeon: %s %u bo moves of %u kB from"
+		 " %d to %d in %u ms, throughput: %u Mb/s or %u MB/s\n",
+		 kind, n, size >> 10, sdomain, ddomain, time,
+		 throughput * 8, throughput);
+}
+
+static void radeon_benchmark_move(struct radeon_device *rdev, unsigned size,
+				  unsigned sdomain, unsigned ddomain)
 {
 	struct radeon_bo *dobj = NULL;
 	struct radeon_bo *sobj = NULL;
-	struct radeon_fence *fence = NULL;
 	uint64_t saddr, daddr;
-	unsigned long start_jiffies;
-	unsigned long end_jiffies;
-	unsigned long time;
-	unsigned i, n, size;
-	int r;
+	int r, n;
+	unsigned int time;
 
-	size = bsize;
-	n = 1024;
+	n = RADEON_BENCHMARK_ITERATIONS;
 	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, sdomain, &sobj);
 	if (r) {
 		goto out_cleanup;
@@ -67,65 +127,26 @@ void radeon_benchmark_move(struct radeon_device *rdev, unsigned bsize,
 	}
 
 	/* r100 doesn't have dma engine so skip the test */
-	if (rdev->asic->copy_dma) {
-
-		start_jiffies = jiffies;
-		for (i = 0; i < n; i++) {
-			r = radeon_fence_create(rdev, &fence);
-			if (r) {
-				goto out_cleanup;
-			}
-
-			r = radeon_copy_dma(rdev, saddr, daddr,
-					size / RADEON_GPU_PAGE_SIZE, fence);
-
-			if (r) {
-				goto out_cleanup;
-			}
-			r = radeon_fence_wait(fence, false);
-			if (r) {
-				goto out_cleanup;
-			}
-			radeon_fence_unref(&fence);
-		}
-		end_jiffies = jiffies;
-		time = end_jiffies - start_jiffies;
-		time = jiffies_to_msecs(time);
-		if (time > 0) {
-			i = ((n * size) >> 10) / time;
-			printk(KERN_INFO "radeon: dma %u bo moves of %ukb from"
-					" %d to %d in %lums (%ukb/ms %ukb/s %uM/s)\n",
-					n, size >> 10,
-					sdomain, ddomain, time,
-					i, i * 1000, (i * 1000) / 1024);
-		}
-	}
-
-	start_jiffies = jiffies;
-	for (i = 0; i < n; i++) {
-		r = radeon_fence_create(rdev, &fence);
-		if (r) {
-			goto out_cleanup;
-		}
-		r = radeon_copy_blit(rdev, saddr, daddr, size / RADEON_GPU_PAGE_SIZE, fence);
-		if (r) {
-			goto out_cleanup;
-		}
-		r = radeon_fence_wait(fence, false);
-		if (r) {
+	/* also, VRAM-to-VRAM test doesn't make much sense for DMA */
+	/* skip it as well if domains are the same */
+	if ((rdev->asic->copy_dma) && (sdomain != ddomain)) {
+		time = radeon_benchmark_do_move(rdev, size, saddr, daddr,
+						RADEON_BENCHMARK_COPY_DMA, n);
+		if (time < 0)
 			goto out_cleanup;
-		}
-		radeon_fence_unref(&fence);
-	}
-	end_jiffies = jiffies;
-	time = end_jiffies - start_jiffies;
-	time = jiffies_to_msecs(time);
-	if (time > 0) {
-		i = ((n * size) >> 10) / time;
-		printk(KERN_INFO "radeon: blit %u bo moves of %ukb from %d to %d"
-		       " in %lums (%ukb/ms %ukb/s %uM/s)\n", n, size >> 10,
-		       sdomain, ddomain, time, i, i * 1000, (i * 1000) / 1024);
+		if (time > 0)
+			radeon_benchmark_log_results(n, size, time,
+						     sdomain, ddomain, "dma");
 	}
+
+	time = radeon_benchmark_do_move(rdev, size, saddr, daddr,
+					RADEON_BENCHMARK_COPY_BLIT, n);
+	if (time < 0)
+		goto out_cleanup;
+	if (time > 0)
+		radeon_benchmark_log_results(n, size, time,
+					     sdomain, ddomain, "blit");
+
 out_cleanup:
 	if (sobj) {
 		r = radeon_bo_reserve(sobj, false);
@@ -143,18 +164,92 @@ out_cleanup:
 		}
 		radeon_bo_unref(&dobj);
 	}
-	if (fence) {
-		radeon_fence_unref(&fence);
-	}
+
 	if (r) {
-		printk(KERN_WARNING "Error while benchmarking BO move.\n");
+		DRM_ERROR("Error while benchmarking BO move.\n");
 	}
 }
 
-void radeon_benchmark(struct radeon_device *rdev)
+void radeon_benchmark(struct radeon_device *rdev, int test_number)
 {
-	radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_GTT,
-			      RADEON_GEM_DOMAIN_VRAM);
-	radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM,
-			      RADEON_GEM_DOMAIN_GTT);
+	int i;
+	int common_modes[RADEON_BENCHMARK_COMMON_MODES_N] = {
+		640 * 480 * 4,
+		720 * 480 * 4,
+		800 * 600 * 4,
+		848 * 480 * 4,
+		1024 * 768 * 4,
+		1152 * 768 * 4,
+		1280 * 720 * 4,
+		1280 * 800 * 4,
+		1280 * 854 * 4,
+		1280 * 960 * 4,
+		1280 * 1024 * 4,
+		1440 * 900 * 4,
+		1400 * 1050 * 4,
+		1680 * 1050 * 4,
+		1600 * 1200 * 4,
+		1920 * 1080 * 4,
+		1920 * 1200 * 4
+	};
+
+	switch (test_number) {
+	case 1:
+		/* simple test, VRAM to GTT and GTT to VRAM */
+		radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_GTT,
+				      RADEON_GEM_DOMAIN_VRAM);
+		radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM,
+				      RADEON_GEM_DOMAIN_GTT);
+		break;
+	case 2:
+		/* simple test, VRAM to VRAM */
+		radeon_benchmark_move(rdev, 1024*1024, RADEON_GEM_DOMAIN_VRAM,
+				      RADEON_GEM_DOMAIN_VRAM);
+		break;
+	case 3:
+		/* GTT to VRAM, buffer size sweep, powers of 2 */
+		for (i = 1; i <= 65536; i <<= 1)
+			radeon_benchmark_move(rdev, i*1024,
+					      RADEON_GEM_DOMAIN_GTT,
+					      RADEON_GEM_DOMAIN_VRAM);
+		break;
+	case 4:
+		/* VRAM to GTT, buffer size sweep, powers of 2 */
+		for (i = 1; i <= 65536; i <<= 1)
+			radeon_benchmark_move(rdev, i*1024,
+					      RADEON_GEM_DOMAIN_VRAM,
+					      RADEON_GEM_DOMAIN_GTT);
+		break;
+	case 5:
+		/* VRAM to VRAM, buffer size sweep, powers of 2 */
+		for (i = 1; i <= 65536; i <<= 1)
+			radeon_benchmark_move(rdev, i*1024,
+					      RADEON_GEM_DOMAIN_VRAM,
+					      RADEON_GEM_DOMAIN_VRAM);
+		break;
+	case 6:
+		/* GTT to VRAM, buffer size sweep, common modes */
+		for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++)
+			radeon_benchmark_move(rdev, common_modes[i],
+					      RADEON_GEM_DOMAIN_GTT,
+					      RADEON_GEM_DOMAIN_VRAM);
+		break;
+	case 7:
+		/* VRAM to GTT, buffer size sweep, common modes */
+		for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++)
+			radeon_benchmark_move(rdev, common_modes[i],
+					      RADEON_GEM_DOMAIN_VRAM,
+					      RADEON_GEM_DOMAIN_GTT);
+		break;
+	case 8:
+		/* VRAM to VRAM, buffer size sweep, common modes */
+		for (i = 1; i < RADEON_BENCHMARK_COMMON_MODES_N; i++)
+			radeon_benchmark_move(rdev, common_modes[i],
+					      RADEON_GEM_DOMAIN_VRAM,
+					      RADEON_GEM_DOMAIN_VRAM);
+		break;
+
+	default:
+		DRM_ERROR("Unknown benchmark\n");
+	}
 }
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 63675241c7ff..8bf83c4b4147 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -620,8 +620,8 @@ static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rde
 		i2c.y_data_mask = 0x80;
 	} else {
 		/* default masks for ddc pads */
-		i2c.mask_clk_mask = RADEON_GPIO_EN_1;
-		i2c.mask_data_mask = RADEON_GPIO_EN_0;
+		i2c.mask_clk_mask = RADEON_GPIO_MASK_1;
+		i2c.mask_data_mask = RADEON_GPIO_MASK_0;
 		i2c.a_clk_mask = RADEON_GPIO_A_1;
 		i2c.a_data_mask = RADEON_GPIO_A_0;
 		i2c.en_clk_mask = RADEON_GPIO_EN_1;
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 449c3d8c6836..dec6cbe6a0a6 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -724,6 +724,7 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
 		dret = radeon_ddc_probe(radeon_connector,
 					radeon_connector->requires_extended_probe);
 	if (dret) {
+		radeon_connector->detected_by_load = false;
 		if (radeon_connector->edid) {
 			kfree(radeon_connector->edid);
 			radeon_connector->edid = NULL;
@@ -750,12 +751,21 @@ radeon_vga_detect(struct drm_connector *connector, bool force)
 	} else {
 
 		/* if we aren't forcing don't do destructive polling */
-		if (!force)
-			return connector->status;
+		if (!force) {
+			/* only return the previous status if we last
+			 * detected a monitor via load.
+			 */
+			if (radeon_connector->detected_by_load)
+				return connector->status;
+			else
+				return ret;
+		}
 
 		if (radeon_connector->dac_load_detect && encoder) {
 			encoder_funcs = encoder->helper_private;
 			ret = encoder_funcs->detect(encoder, connector);
+			if (ret == connector_status_connected)
+				radeon_connector->detected_by_load = true;
 		}
 	}
 
@@ -897,6 +907,7 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
 		dret = radeon_ddc_probe(radeon_connector,
 					radeon_connector->requires_extended_probe);
 	if (dret) {
+		radeon_connector->detected_by_load = false;
 		if (radeon_connector->edid) {
 			kfree(radeon_connector->edid);
 			radeon_connector->edid = NULL;
@@ -959,8 +970,18 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
 	if ((ret == connector_status_connected) && (radeon_connector->use_digital == true))
 		goto out;
 
+	/* DVI-D and HDMI-A are digital only */
+	if ((connector->connector_type == DRM_MODE_CONNECTOR_DVID) ||
+	    (connector->connector_type == DRM_MODE_CONNECTOR_HDMIA))
+		goto out;
+
+	/* if we aren't forcing don't do destructive polling */
 	if (!force) {
-		ret = connector->status;
+		/* only return the previous status if we last
+		 * detected a monitor via load.
+		 */
+		if (radeon_connector->detected_by_load)
+			ret = connector->status;
 		goto out;
 	}
 
@@ -984,6 +1005,7 @@ radeon_dvi_detect(struct drm_connector *connector, bool force)
 					ret = encoder_funcs->detect(encoder, connector);
 					if (ret == connector_status_connected) {
 						radeon_connector->use_digital = false;
+						radeon_connector->detected_by_load = true;
 					}
 				}
 				break;
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index b51e15725c6e..c33bc914d93d 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -750,14 +750,15 @@ int radeon_device_init(struct radeon_device *rdev,
 
 	/* set DMA mask + need_dma32 flags.
 	 * PCIE - can handle 40-bits.
-	 * IGP - can handle 40-bits (in theory)
+	 * IGP - can handle 40-bits
 	 * AGP - generally dma32 is safest
-	 * PCI - only dma32
+	 * PCI - dma32 for legacy pci gart, 40 bits on newer asics
 	 */
 	rdev->need_dma32 = false;
 	if (rdev->flags & RADEON_IS_AGP)
 		rdev->need_dma32 = true;
-	if (rdev->flags & RADEON_IS_PCI)
+	if ((rdev->flags & RADEON_IS_PCI) &&
+	    (rdev->family < CHIP_RS400))
 		rdev->need_dma32 = true;
 
 	dma_bits = rdev->need_dma32 ? 32 : 40;
@@ -817,7 +818,7 @@ int radeon_device_init(struct radeon_device *rdev,
 		radeon_test_moves(rdev);
 	}
 	if (radeon_benchmarking) {
-		radeon_benchmark(rdev);
+		radeon_benchmark(rdev, radeon_benchmarking);
 	}
 	return 0;
 }
@@ -981,7 +982,7 @@ struct radeon_debugfs {
 	struct drm_info_list	*files;
 	unsigned		num_files;
 };
-static struct radeon_debugfs _radeon_debugfs[RADEON_DEBUGFS_MAX_NUM_FILES];
+static struct radeon_debugfs _radeon_debugfs[RADEON_DEBUGFS_MAX_COMPONENTS];
 static unsigned _radeon_debugfs_count = 0;
 
 int radeon_debugfs_add_files(struct radeon_device *rdev,
@@ -996,14 +997,17 @@ int radeon_debugfs_add_files(struct radeon_device *rdev,
 			return 0;
 		}
 	}
-	if ((_radeon_debugfs_count + nfiles) > RADEON_DEBUGFS_MAX_NUM_FILES) {
-		DRM_ERROR("Reached maximum number of debugfs files.\n");
-		DRM_ERROR("Report so we increase RADEON_DEBUGFS_MAX_NUM_FILES.\n");
+
+	i = _radeon_debugfs_count + 1;
+	if (i > RADEON_DEBUGFS_MAX_COMPONENTS) {
+		DRM_ERROR("Reached maximum number of debugfs components.\n");
+		DRM_ERROR("Report so we increase "
+		          "RADEON_DEBUGFS_MAX_COMPONENTS.\n");
 		return -EINVAL;
 	}
 	_radeon_debugfs[_radeon_debugfs_count].files = files;
 	_radeon_debugfs[_radeon_debugfs_count].num_files = nfiles;
-	_radeon_debugfs_count++;
+	_radeon_debugfs_count = i;
 #if defined(CONFIG_DEBUG_FS)
 	drm_debugfs_create_files(files, nfiles,
 				 rdev->ddev->control->debugfs_root,
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 3475a09f946b..76ec0e9ed8ae 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -263,7 +263,7 @@ retry:
 		 */
 		if (seq == rdev->fence_drv.last_seq && radeon_gpu_is_lockup(rdev)) {
 			/* good news we believe it's a lockup */
-			WARN(1, "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
+			printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
 			     fence->seq, seq);
 			/* FIXME: what should we do ? marking everyone
 			 * as signaled for now
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index a533f52fd163..fdc3a9a54bf8 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -142,7 +142,7 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
 	u64 page_base;
 
 	if (!rdev->gart.ready) {
-		WARN(1, "trying to unbind memory to unitialized GART !\n");
+		WARN(1, "trying to unbind memory from uninitialized GART !\n");
 		return;
 	}
 	t = offset / RADEON_GPU_PAGE_SIZE;
@@ -174,7 +174,7 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 	int i, j;
 
 	if (!rdev->gart.ready) {
-		WARN(1, "trying to bind memory to unitialized GART !\n");
+		WARN(1, "trying to bind memory to uninitialized GART !\n");
 		return -EINVAL;
 	}
 	t = offset / RADEON_GPU_PAGE_SIZE;
diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
index 6c111c1fa3f9..02cb7da4124d 100644
--- a/drivers/gpu/drm/radeon/radeon_i2c.c
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c
@@ -81,8 +81,9 @@ bool radeon_ddc_probe(struct radeon_connector *radeon_connector, bool requires_e
 
 /* bit banging i2c */
 
-static void radeon_i2c_do_lock(struct radeon_i2c_chan *i2c, int lock_state)
+static int pre_xfer(struct i2c_adapter *i2c_adap)
 {
+	struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
 	struct radeon_device *rdev = i2c->dev->dev_private;
 	struct radeon_i2c_bus_rec *rec = &i2c->rec;
 	uint32_t temp;
@@ -137,19 +138,30 @@ static void radeon_i2c_do_lock(struct radeon_i2c_chan *i2c, int lock_state)
 	WREG32(rec->en_data_reg, temp);
 
 	/* mask the gpio pins for software use */
-	temp = RREG32(rec->mask_clk_reg);
-	if (lock_state)
-		temp |= rec->mask_clk_mask;
-	else
-		temp &= ~rec->mask_clk_mask;
+	temp = RREG32(rec->mask_clk_reg) | rec->mask_clk_mask;
 	WREG32(rec->mask_clk_reg, temp);
 	temp = RREG32(rec->mask_clk_reg);
 
+	temp = RREG32(rec->mask_data_reg) | rec->mask_data_mask;
+	WREG32(rec->mask_data_reg, temp);
 	temp = RREG32(rec->mask_data_reg);
-	if (lock_state)
-		temp |= rec->mask_data_mask;
-	else
-		temp &= ~rec->mask_data_mask;
+
+	return 0;
+}
+
+static void post_xfer(struct i2c_adapter *i2c_adap)
+{
+	struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
+	struct radeon_device *rdev = i2c->dev->dev_private;
+	struct radeon_i2c_bus_rec *rec = &i2c->rec;
+	uint32_t temp;
+
+	/* unmask the gpio pins for software use */
+	temp = RREG32(rec->mask_clk_reg) & ~rec->mask_clk_mask;
+	WREG32(rec->mask_clk_reg, temp);
+	temp = RREG32(rec->mask_clk_reg);
+
+	temp = RREG32(rec->mask_data_reg) & ~rec->mask_data_mask;
 	WREG32(rec->mask_data_reg, temp);
 	temp = RREG32(rec->mask_data_reg);
 }
@@ -209,22 +221,6 @@ static void set_data(void *i2c_priv, int data)
 	WREG32(rec->en_data_reg, val);
 }
 
-static int pre_xfer(struct i2c_adapter *i2c_adap)
-{
-	struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
-
-	radeon_i2c_do_lock(i2c, 1);
-
-	return 0;
-}
-
-static void post_xfer(struct i2c_adapter *i2c_adap)
-{
-	struct radeon_i2c_chan *i2c = i2c_get_adapdata(i2c_adap);
-
-	radeon_i2c_do_lock(i2c, 0);
-}
-
 /* hw i2c */
 
 static u32 radeon_get_i2c_prescale(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/radeon_irq.c b/drivers/gpu/drm/radeon/radeon_irq.c
index 465746bd51b7..00da38424dfc 100644
--- a/drivers/gpu/drm/radeon/radeon_irq.c
+++ b/drivers/gpu/drm/radeon/radeon_irq.c
@@ -129,7 +129,7 @@ void radeon_disable_vblank(struct drm_device *dev, int crtc)
 	}
 }
 
-static inline u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int)
+static u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int)
 {
 	u32 irqs = RADEON_READ(RADEON_GEN_INT_STATUS);
 	u32 irq_mask = RADEON_SW_INT_TEST;
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_tv.c b/drivers/gpu/drm/radeon/radeon_legacy_tv.c
index c7b6cb428d09..b37ec0f1413a 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_tv.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_tv.c
@@ -864,7 +864,7 @@ void radeon_legacy_tv_adjust_crtc_reg(struct drm_encoder *encoder,
 	*v_sync_strt_wid = tmp;
 }
 
-static inline int get_post_div(int value)
+static int get_post_div(int value)
 {
 	int post_div;
 	switch (value) {
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 68820f5f6303..ed0178f03235 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -447,6 +447,7 @@ struct radeon_connector {
 	struct edid *edid;
 	void *con_priv;
 	bool dac_load_detect;
+	bool detected_by_load; /* if the connection status was determined by load */
 	uint16_t connector_object_id;
 	struct radeon_hpd hpd;
 	struct radeon_router router;
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 976c3b1b1b6e..1c851521f458 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -515,3 +515,44 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	}
 	return 0;
 }
+
+int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
+{
+	int r;
+
+	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
+	if (unlikely(r != 0))
+		return r;
+	spin_lock(&bo->tbo.bdev->fence_lock);
+	if (mem_type)
+		*mem_type = bo->tbo.mem.mem_type;
+	if (bo->tbo.sync_obj)
+		r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
+	spin_unlock(&bo->tbo.bdev->fence_lock);
+	ttm_bo_unreserve(&bo->tbo);
+	return r;
+}
+
+
+/**
+ * radeon_bo_reserve - reserve bo
+ * @bo:		bo structure
+ * @no_wait:		don't sleep while trying to reserve (return -EBUSY)
+ *
+ * Returns:
+ * -EBUSY: buffer is busy and @no_wait is true
+ * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
+ * a signal. Release all buffer reservations and return to user-space.
+ */
+int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait)
+{
+	int r;
+
+	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS)
+			dev_err(bo->rdev->dev, "%p reserve failed\n", bo);
+		return r;
+	}
+	return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index ede6c13628f2..b07f0f9b8627 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -52,28 +52,7 @@ static inline unsigned radeon_mem_type_to_domain(u32 mem_type)
 	return 0;
 }
 
-/**
- * radeon_bo_reserve - reserve bo
- * @bo:		bo structure
- * @no_wait:		don't sleep while trying to reserve (return -EBUSY)
- *
- * Returns:
- * -EBUSY: buffer is busy and @no_wait is true
- * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
- * a signal. Release all buffer reservations and return to user-space.
- */
-static inline int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait)
-{
-	int r;
-
-	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
-	if (unlikely(r != 0)) {
-		if (r != -ERESTARTSYS)
-			dev_err(bo->rdev->dev, "%p reserve failed\n", bo);
-		return r;
-	}
-	return 0;
-}
+int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait);
 
 static inline void radeon_bo_unreserve(struct radeon_bo *bo)
 {
@@ -118,23 +97,8 @@ static inline u64 radeon_bo_mmap_offset(struct radeon_bo *bo)
 	return bo->tbo.addr_space_offset;
 }
 
-static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
-					bool no_wait)
-{
-	int r;
-
-	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
-	if (unlikely(r != 0))
-		return r;
-	spin_lock(&bo->tbo.bdev->fence_lock);
-	if (mem_type)
-		*mem_type = bo->tbo.mem.mem_type;
-	if (bo->tbo.sync_obj)
-		r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
-	spin_unlock(&bo->tbo.bdev->fence_lock);
-	ttm_bo_unreserve(&bo->tbo);
-	return r;
-}
+extern int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
+			  bool no_wait);
 
 extern int radeon_bo_create(struct radeon_device *rdev,
 				unsigned long size, int byte_align,
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 08c0233db1b8..49d58202202c 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -35,6 +35,44 @@
 
 int radeon_debugfs_ib_init(struct radeon_device *rdev);
 
+u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
+{
+	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
+	u32 pg_idx, pg_offset;
+	u32 idx_value = 0;
+	int new_page;
+
+	pg_idx = (idx * 4) / PAGE_SIZE;
+	pg_offset = (idx * 4) % PAGE_SIZE;
+
+	if (ibc->kpage_idx[0] == pg_idx)
+		return ibc->kpage[0][pg_offset/4];
+	if (ibc->kpage_idx[1] == pg_idx)
+		return ibc->kpage[1][pg_offset/4];
+
+	new_page = radeon_cs_update_pages(p, pg_idx);
+	if (new_page < 0) {
+		p->parser_error = new_page;
+		return 0;
+	}
+
+	idx_value = ibc->kpage[new_page][pg_offset/4];
+	return idx_value;
+}
+
+void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
+{
+#if DRM_DEBUG_CODE
+	if (rdev->cp.count_dw <= 0) {
+		DRM_ERROR("radeon: writting more dword to ring than expected !\n");
+	}
+#endif
+	rdev->cp.ring[rdev->cp.wptr++] = v;
+	rdev->cp.wptr &= rdev->cp.ptr_mask;
+	rdev->cp.count_dw--;
+	rdev->cp.ring_free_dw--;
+}
+
 void radeon_ib_bogus_cleanup(struct radeon_device *rdev)
 {
 	struct radeon_ib *ib, *n;
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 92e7ea73b7c5..e8422ae7fe74 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -272,12 +272,12 @@ static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
 	return 0;
 }
 
-static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
-						     dev_priv,
-						     struct drm_file *file_priv,
-						     drm_radeon_kcmd_buffer_t *
-						     cmdbuf,
-						     unsigned int *cmdsz)
+static int radeon_check_and_fixup_packet3(drm_radeon_private_t *
+					  dev_priv,
+					  struct drm_file *file_priv,
+					  drm_radeon_kcmd_buffer_t *
+					  cmdbuf,
+					  unsigned int *cmdsz)
 {
 	u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 	u32 offset, narrays;
@@ -446,8 +446,8 @@ static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
  * CP hardware state programming functions
  */
 
-static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
-					     struct drm_clip_rect * box)
+static void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
+				  struct drm_clip_rect * box)
 {
 	RING_LOCALS;
 
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index aa6a66eeb4ec..89a6e1ecea8d 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -182,6 +182,9 @@ int rs400_gart_enable(struct radeon_device *rdev)
 	/* Enable gart */
 	WREG32_MC(RS480_AGP_ADDRESS_SPACE_SIZE, (RS480_GART_EN | size_reg));
 	rs400_gart_tlb_flush(rdev);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)rdev->gart.table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index 4b5d0e6974a8..9320dd6404f6 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -484,6 +484,9 @@ static int rs600_gart_enable(struct radeon_device *rdev)
 	tmp = RREG32_MC(R_000009_MC_CNTL1);
 	WREG32_MC(R_000009_MC_CNTL1, (tmp | S_000009_ENABLE_PAGE_TABLES(1)));
 	rs600_gart_tlb_flush(rdev);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)rdev->gart.table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index b13c2eedc321..87cc1feee3ac 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -161,6 +161,9 @@ int rv770_pcie_gart_enable(struct radeon_device *rdev)
 		WREG32(VM_CONTEXT0_CNTL + (i * 4), 0);
 
 	r600_pcie_gart_tlb_flush(rdev);
+	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
+		 (unsigned)(rdev->mc.gtt_size >> 20),
+		 (unsigned long long)rdev->gart.table_addr);
 	rdev->gart.ready = true;
 	return 0;
 }
@@ -1184,8 +1187,6 @@ int rv770_resume(struct radeon_device *rdev)
 
 int rv770_suspend(struct radeon_device *rdev)
 {
-	int r;
-
 	r600_audio_fini(rdev);
 	/* FIXME: we should wait for ring to be empty */
 	r700_cp_stop(rdev);
@@ -1193,14 +1194,8 @@ int rv770_suspend(struct radeon_device *rdev)
 	r600_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	rv770_pcie_gart_disable(rdev);
-	/* unpin shaders bo */
-	if (rdev->r600_blit.shader_obj) {
-		r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-		if (likely(r == 0)) {
-			radeon_bo_unpin(rdev->r600_blit.shader_obj);
-			radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-		}
-	}
+	r600_blit_suspend(rdev);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index ef06194c5aa6..617b64678fc6 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1293,6 +1293,7 @@ int ttm_bo_create(struct ttm_bo_device *bdev,
 
 	return ret;
 }
+EXPORT_SYMBOL(ttm_bo_create);
 
 static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 					unsigned mem_type, bool allow_errors)
diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig
index 30ad13344f7b..794ff67c5701 100644
--- a/drivers/gpu/drm/vmwgfx/Kconfig
+++ b/drivers/gpu/drm/vmwgfx/Kconfig
@@ -7,7 +7,8 @@ config DRM_VMWGFX
 	select FB_CFB_IMAGEBLIT
 	select DRM_TTM
 	help
-	  KMS enabled DRM driver for SVGA2 virtual hardware.
-
-	  If unsure say n. The compiled module will be
-	  called vmwgfx.ko
+	  Choose this option if you would like to run 3D acceleration
+	  in a VMware virtual machine.
+	  This is a KMS enabled DRM driver for the VMware SVGA2
+	  virtual hardware.
+	  The compiled module will be called "vmwgfx.ko".
diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index c9281a1b1d3b..586869c8c11f 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -4,6 +4,7 @@ ccflags-y := -Iinclude/drm
 vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
 	    vmwgfx_fb.o vmwgfx_ioctl.o vmwgfx_resource.o vmwgfx_buffer.o \
 	    vmwgfx_fifo.o vmwgfx_irq.o vmwgfx_ldu.o vmwgfx_ttm_glue.o \
-	    vmwgfx_overlay.o vmwgfx_fence.o vmwgfx_gmrid_manager.o
+	    vmwgfx_overlay.o vmwgfx_marker.o vmwgfx_gmrid_manager.o \
+	    vmwgfx_fence.o vmwgfx_dmabuf.o vmwgfx_scrn.o
 
 obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/svga3d_reg.h b/drivers/gpu/drm/vmwgfx/svga3d_reg.h
index 77cb45331000..d0e085ee8249 100644
--- a/drivers/gpu/drm/vmwgfx/svga3d_reg.h
+++ b/drivers/gpu/drm/vmwgfx/svga3d_reg.h
@@ -57,7 +57,8 @@ typedef enum {
    SVGA3D_HWVERSION_WS6_B1    = SVGA3D_MAKE_HWVERSION(1, 1),
    SVGA3D_HWVERSION_FUSION_11 = SVGA3D_MAKE_HWVERSION(1, 4),
    SVGA3D_HWVERSION_WS65_B1   = SVGA3D_MAKE_HWVERSION(2, 0),
-   SVGA3D_HWVERSION_CURRENT   = SVGA3D_HWVERSION_WS65_B1,
+   SVGA3D_HWVERSION_WS8_B1    = SVGA3D_MAKE_HWVERSION(2, 1),
+   SVGA3D_HWVERSION_CURRENT   = SVGA3D_HWVERSION_WS8_B1,
 } SVGA3dHardwareVersion;
 
 /*
@@ -67,7 +68,8 @@ typedef enum {
 typedef uint32 SVGA3dBool; /* 32-bit Bool definition */
 #define SVGA3D_NUM_CLIPPLANES                   6
 #define SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS  8
-
+#define SVGA3D_MAX_CONTEXT_IDS                  256
+#define SVGA3D_MAX_SURFACE_IDS                  (32 * 1024)
 
 /*
  * Surface formats.
@@ -79,76 +81,91 @@ typedef uint32 SVGA3dBool; /* 32-bit Bool definition */
  */
 
 typedef enum SVGA3dSurfaceFormat {
-   SVGA3D_FORMAT_INVALID = 0,
+   SVGA3D_FORMAT_INVALID               = 0,
 
-   SVGA3D_X8R8G8B8       = 1,
-   SVGA3D_A8R8G8B8       = 2,
+   SVGA3D_X8R8G8B8                     = 1,
+   SVGA3D_A8R8G8B8                     = 2,
 
-   SVGA3D_R5G6B5         = 3,
-   SVGA3D_X1R5G5B5       = 4,
-   SVGA3D_A1R5G5B5       = 5,
-   SVGA3D_A4R4G4B4       = 6,
+   SVGA3D_R5G6B5                       = 3,
+   SVGA3D_X1R5G5B5                     = 4,
+   SVGA3D_A1R5G5B5                     = 5,
+   SVGA3D_A4R4G4B4                     = 6,
 
-   SVGA3D_Z_D32          = 7,
-   SVGA3D_Z_D16          = 8,
-   SVGA3D_Z_D24S8        = 9,
-   SVGA3D_Z_D15S1        = 10,
+   SVGA3D_Z_D32                        = 7,
+   SVGA3D_Z_D16                        = 8,
+   SVGA3D_Z_D24S8                      = 9,
+   SVGA3D_Z_D15S1                      = 10,
 
-   SVGA3D_LUMINANCE8            = 11,
-   SVGA3D_LUMINANCE4_ALPHA4     = 12,
-   SVGA3D_LUMINANCE16           = 13,
-   SVGA3D_LUMINANCE8_ALPHA8     = 14,
+   SVGA3D_LUMINANCE8                   = 11,
+   SVGA3D_LUMINANCE4_ALPHA4            = 12,
+   SVGA3D_LUMINANCE16                  = 13,
+   SVGA3D_LUMINANCE8_ALPHA8            = 14,
 
-   SVGA3D_DXT1           = 15,
-   SVGA3D_DXT2           = 16,
-   SVGA3D_DXT3           = 17,
-   SVGA3D_DXT4           = 18,
-   SVGA3D_DXT5           = 19,
+   SVGA3D_DXT1                         = 15,
+   SVGA3D_DXT2                         = 16,
+   SVGA3D_DXT3                         = 17,
+   SVGA3D_DXT4                         = 18,
+   SVGA3D_DXT5                         = 19,
 
-   SVGA3D_BUMPU8V8       = 20,
-   SVGA3D_BUMPL6V5U5     = 21,
-   SVGA3D_BUMPX8L8V8U8   = 22,
-   SVGA3D_BUMPL8V8U8     = 23,
+   SVGA3D_BUMPU8V8                     = 20,
+   SVGA3D_BUMPL6V5U5                   = 21,
+   SVGA3D_BUMPX8L8V8U8                 = 22,
+   SVGA3D_BUMPL8V8U8                   = 23,
 
-   SVGA3D_ARGB_S10E5     = 24,   /* 16-bit floating-point ARGB */
-   SVGA3D_ARGB_S23E8     = 25,   /* 32-bit floating-point ARGB */
+   SVGA3D_ARGB_S10E5                   = 24,   /* 16-bit floating-point ARGB */
+   SVGA3D_ARGB_S23E8                   = 25,   /* 32-bit floating-point ARGB */
 
-   SVGA3D_A2R10G10B10    = 26,
+   SVGA3D_A2R10G10B10                  = 26,
 
    /* signed formats */
-   SVGA3D_V8U8           = 27,
-   SVGA3D_Q8W8V8U8       = 28,
-   SVGA3D_CxV8U8         = 29,
+   SVGA3D_V8U8                         = 27,
+   SVGA3D_Q8W8V8U8                     = 28,
+   SVGA3D_CxV8U8                       = 29,
 
    /* mixed formats */
-   SVGA3D_X8L8V8U8       = 30,
-   SVGA3D_A2W10V10U10    = 31,
+   SVGA3D_X8L8V8U8                     = 30,
+   SVGA3D_A2W10V10U10                  = 31,
 
-   SVGA3D_ALPHA8         = 32,
+   SVGA3D_ALPHA8                       = 32,
 
    /* Single- and dual-component floating point formats */
-   SVGA3D_R_S10E5        = 33,
-   SVGA3D_R_S23E8        = 34,
-   SVGA3D_RG_S10E5       = 35,
-   SVGA3D_RG_S23E8       = 36,
+   SVGA3D_R_S10E5                      = 33,
+   SVGA3D_R_S23E8                      = 34,
+   SVGA3D_RG_S10E5                     = 35,
+   SVGA3D_RG_S23E8                     = 36,
 
    /*
     * Any surface can be used as a buffer object, but SVGA3D_BUFFER is
     * the most efficient format to use when creating new surfaces
     * expressly for index or vertex data.
     */
-   SVGA3D_BUFFER         = 37,
 
-   SVGA3D_Z_D24X8        = 38,
+   SVGA3D_BUFFER                       = 37,
+
+   SVGA3D_Z_D24X8                      = 38,
 
-   SVGA3D_V16U16         = 39,
+   SVGA3D_V16U16                       = 39,
 
-   SVGA3D_G16R16         = 40,
-   SVGA3D_A16B16G16R16   = 41,
+   SVGA3D_G16R16                       = 40,
+   SVGA3D_A16B16G16R16                 = 41,
 
    /* Packed Video formats */
-   SVGA3D_UYVY           = 42,
-   SVGA3D_YUY2           = 43,
+   SVGA3D_UYVY                         = 42,
+   SVGA3D_YUY2                         = 43,
+
+   /* Planar video formats */
+   SVGA3D_NV12                         = 44,
+
+   /* Video format with alpha */
+   SVGA3D_AYUV                         = 45,
+
+   SVGA3D_BC4_UNORM                    = 108,
+   SVGA3D_BC5_UNORM                    = 111,
+
+   /* Advanced D3D9 depth formats. */
+   SVGA3D_Z_DF16                       = 118,
+   SVGA3D_Z_DF24                       = 119,
+   SVGA3D_Z_D24S8_INT                  = 120,
 
    SVGA3D_FORMAT_MAX
 } SVGA3dSurfaceFormat;
@@ -414,10 +431,20 @@ typedef enum {
    SVGA3D_RS_SRCBLENDALPHA             = 94,    /* SVGA3dBlendOp */
    SVGA3D_RS_DSTBLENDALPHA             = 95,    /* SVGA3dBlendOp */
    SVGA3D_RS_BLENDEQUATIONALPHA        = 96,    /* SVGA3dBlendEquation */
+   SVGA3D_RS_TRANSPARENCYANTIALIAS     = 97,    /* SVGA3dTransparencyAntialiasType */
+   SVGA3D_RS_LINEAA                    = 98,    /* SVGA3dBool */
+   SVGA3D_RS_LINEWIDTH                 = 99,    /* float */
    SVGA3D_RS_MAX
 } SVGA3dRenderStateName;
 
 typedef enum {
+   SVGA3D_TRANSPARENCYANTIALIAS_NORMAL            = 0,
+   SVGA3D_TRANSPARENCYANTIALIAS_ALPHATOCOVERAGE   = 1,
+   SVGA3D_TRANSPARENCYANTIALIAS_SUPERSAMPLE       = 2,
+   SVGA3D_TRANSPARENCYANTIALIAS_MAX
+} SVGA3dTransparencyAntialiasType;
+
+typedef enum {
    SVGA3D_VERTEXMATERIAL_NONE     = 0,    /* Use the value in the current material */
    SVGA3D_VERTEXMATERIAL_DIFFUSE  = 1,    /* Use the value in the diffuse component */
    SVGA3D_VERTEXMATERIAL_SPECULAR = 2,    /* Use the value in the specular component */
@@ -728,10 +755,10 @@ typedef enum {
    SVGA3D_TEX_FILTER_NEAREST        = 1,
    SVGA3D_TEX_FILTER_LINEAR         = 2,
    SVGA3D_TEX_FILTER_ANISOTROPIC    = 3,
-   SVGA3D_TEX_FILTER_FLATCUBIC      = 4, // Deprecated, not implemented
-   SVGA3D_TEX_FILTER_GAUSSIANCUBIC  = 5, // Deprecated, not implemented
-   SVGA3D_TEX_FILTER_PYRAMIDALQUAD  = 6, // Not currently implemented
-   SVGA3D_TEX_FILTER_GAUSSIANQUAD   = 7, // Not currently implemented
+   SVGA3D_TEX_FILTER_FLATCUBIC      = 4, /* Deprecated, not implemented */
+   SVGA3D_TEX_FILTER_GAUSSIANCUBIC  = 5, /* Deprecated, not implemented */
+   SVGA3D_TEX_FILTER_PYRAMIDALQUAD  = 6, /* Not currently implemented */
+   SVGA3D_TEX_FILTER_GAUSSIANQUAD   = 7, /* Not currently implemented */
    SVGA3D_TEX_FILTER_MAX
 } SVGA3dTextureFilter;
 
@@ -799,19 +826,19 @@ typedef enum {
 
 typedef enum {
    SVGA3D_DECLUSAGE_POSITION     = 0,
-   SVGA3D_DECLUSAGE_BLENDWEIGHT,       //  1
-   SVGA3D_DECLUSAGE_BLENDINDICES,      //  2
-   SVGA3D_DECLUSAGE_NORMAL,            //  3
-   SVGA3D_DECLUSAGE_PSIZE,             //  4
-   SVGA3D_DECLUSAGE_TEXCOORD,          //  5
-   SVGA3D_DECLUSAGE_TANGENT,           //  6
-   SVGA3D_DECLUSAGE_BINORMAL,          //  7
-   SVGA3D_DECLUSAGE_TESSFACTOR,        //  8
-   SVGA3D_DECLUSAGE_POSITIONT,         //  9
-   SVGA3D_DECLUSAGE_COLOR,             // 10
-   SVGA3D_DECLUSAGE_FOG,               // 11
-   SVGA3D_DECLUSAGE_DEPTH,             // 12
-   SVGA3D_DECLUSAGE_SAMPLE,            // 13
+   SVGA3D_DECLUSAGE_BLENDWEIGHT,       /*  1 */
+   SVGA3D_DECLUSAGE_BLENDINDICES,      /*  2 */
+   SVGA3D_DECLUSAGE_NORMAL,            /*  3 */
+   SVGA3D_DECLUSAGE_PSIZE,             /*  4 */
+   SVGA3D_DECLUSAGE_TEXCOORD,          /*  5 */
+   SVGA3D_DECLUSAGE_TANGENT,           /*  6 */
+   SVGA3D_DECLUSAGE_BINORMAL,          /*  7 */
+   SVGA3D_DECLUSAGE_TESSFACTOR,        /*  8 */
+   SVGA3D_DECLUSAGE_POSITIONT,         /*  9 */
+   SVGA3D_DECLUSAGE_COLOR,             /* 10 */
+   SVGA3D_DECLUSAGE_FOG,               /* 11 */
+   SVGA3D_DECLUSAGE_DEPTH,             /* 12 */
+   SVGA3D_DECLUSAGE_SAMPLE,            /* 13 */
    SVGA3D_DECLUSAGE_MAX
 } SVGA3dDeclUsage;
 
@@ -819,10 +846,10 @@ typedef enum {
    SVGA3D_DECLMETHOD_DEFAULT     = 0,
    SVGA3D_DECLMETHOD_PARTIALU,
    SVGA3D_DECLMETHOD_PARTIALV,
-   SVGA3D_DECLMETHOD_CROSSUV,          // Normal
+   SVGA3D_DECLMETHOD_CROSSUV,          /* Normal */
    SVGA3D_DECLMETHOD_UV,
-   SVGA3D_DECLMETHOD_LOOKUP,           // Lookup a displacement map
-   SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, // Lookup a pre-sampled displacement map
+   SVGA3D_DECLMETHOD_LOOKUP,           /* Lookup a displacement map */
+   SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, /* Lookup a pre-sampled displacement map */
 } SVGA3dDeclMethod;
 
 typedef enum {
@@ -930,7 +957,6 @@ typedef enum {
 } SVGA3dCubeFace;
 
 typedef enum {
-   SVGA3D_SHADERTYPE_COMPILED_DX8               = 0,
    SVGA3D_SHADERTYPE_VS                         = 1,
    SVGA3D_SHADERTYPE_PS                         = 2,
    SVGA3D_SHADERTYPE_MAX
@@ -968,12 +994,18 @@ typedef enum {
 } SVGA3dTransferType;
 
 /*
- * The maximum number vertex arrays we're guaranteed to support in
+ * The maximum number of vertex arrays we're guaranteed to support in
  * SVGA_3D_CMD_DRAWPRIMITIVES.
  */
 #define SVGA3D_MAX_VERTEX_ARRAYS   32
 
 /*
+ * The maximum number of primitive ranges we're guaranteed to support
+ * in SVGA_3D_CMD_DRAWPRIMITIVES.
+ */
+#define SVGA3D_MAX_DRAW_PRIMITIVE_RANGES 32
+
+/*
  * Identifiers for commands in the command FIFO.
  *
  * IDs between 1000 and 1039 (inclusive) were used by obsolete versions of
@@ -990,7 +1022,7 @@ typedef enum {
 #define SVGA_3D_CMD_LEGACY_BASE            1000
 #define SVGA_3D_CMD_BASE                   1040
 
-#define SVGA_3D_CMD_SURFACE_DEFINE         SVGA_3D_CMD_BASE + 0
+#define SVGA_3D_CMD_SURFACE_DEFINE         SVGA_3D_CMD_BASE + 0     /* Deprecated */
 #define SVGA_3D_CMD_SURFACE_DESTROY        SVGA_3D_CMD_BASE + 1
 #define SVGA_3D_CMD_SURFACE_COPY           SVGA_3D_CMD_BASE + 2
 #define SVGA_3D_CMD_SURFACE_STRETCHBLT     SVGA_3D_CMD_BASE + 3
@@ -1008,7 +1040,7 @@ typedef enum {
 #define SVGA_3D_CMD_SETVIEWPORT            SVGA_3D_CMD_BASE + 15
 #define SVGA_3D_CMD_SETCLIPPLANE           SVGA_3D_CMD_BASE + 16
 #define SVGA_3D_CMD_CLEAR                  SVGA_3D_CMD_BASE + 17
-#define SVGA_3D_CMD_PRESENT                SVGA_3D_CMD_BASE + 18    // Deprecated
+#define SVGA_3D_CMD_PRESENT                SVGA_3D_CMD_BASE + 18    /* Deprecated */
 #define SVGA_3D_CMD_SHADER_DEFINE          SVGA_3D_CMD_BASE + 19
 #define SVGA_3D_CMD_SHADER_DESTROY         SVGA_3D_CMD_BASE + 20
 #define SVGA_3D_CMD_SET_SHADER             SVGA_3D_CMD_BASE + 21
@@ -1018,9 +1050,13 @@ typedef enum {
 #define SVGA_3D_CMD_BEGIN_QUERY            SVGA_3D_CMD_BASE + 25
 #define SVGA_3D_CMD_END_QUERY              SVGA_3D_CMD_BASE + 26
 #define SVGA_3D_CMD_WAIT_FOR_QUERY         SVGA_3D_CMD_BASE + 27
-#define SVGA_3D_CMD_PRESENT_READBACK       SVGA_3D_CMD_BASE + 28    // Deprecated
+#define SVGA_3D_CMD_PRESENT_READBACK       SVGA_3D_CMD_BASE + 28    /* Deprecated */
 #define SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN SVGA_3D_CMD_BASE + 29
-#define SVGA_3D_CMD_MAX                    SVGA_3D_CMD_BASE + 30
+#define SVGA_3D_CMD_SURFACE_DEFINE_V2      SVGA_3D_CMD_BASE + 30
+#define SVGA_3D_CMD_GENERATE_MIPMAPS       SVGA_3D_CMD_BASE + 31
+#define SVGA_3D_CMD_ACTIVATE_SURFACE       SVGA_3D_CMD_BASE + 40
+#define SVGA_3D_CMD_DEACTIVATE_SURFACE     SVGA_3D_CMD_BASE + 41
+#define SVGA_3D_CMD_MAX                    SVGA_3D_CMD_BASE + 42
 
 #define SVGA_3D_CMD_FUTURE_MAX             2000
 
@@ -1031,9 +1067,9 @@ typedef enum {
 typedef struct {
    union {
       struct {
-         uint16  function;       // SVGA3dFogFunction
-         uint8   type;           // SVGA3dFogType
-         uint8   base;           // SVGA3dFogBase
+         uint16  function;       /* SVGA3dFogFunction */
+         uint8   type;           /* SVGA3dFogType */
+         uint8   base;           /* SVGA3dFogBase */
       };
       uint32     uintValue;
    };
@@ -1109,6 +1145,8 @@ typedef enum {
    SVGA3D_SURFACE_HINT_RENDERTARGET    = (1 << 6),
    SVGA3D_SURFACE_HINT_DEPTHSTENCIL    = (1 << 7),
    SVGA3D_SURFACE_HINT_WRITEONLY       = (1 << 8),
+   SVGA3D_SURFACE_MASKABLE_ANTIALIAS   = (1 << 9),
+   SVGA3D_SURFACE_AUTOGENMIPMAPS       = (1 << 10),
 } SVGA3dSurfaceFlags;
 
 typedef
@@ -1121,6 +1159,12 @@ struct {
    uint32                      sid;
    SVGA3dSurfaceFlags          surfaceFlags;
    SVGA3dSurfaceFormat         format;
+   /*
+    * If surfaceFlags has SVGA3D_SURFACE_CUBEMAP bit set, all SVGA3dSurfaceFace
+    * structures must have the same value of numMipLevels field.
+    * Otherwise, all but the first SVGA3dSurfaceFace structures must have the
+    * numMipLevels set to 0.
+    */
    SVGA3dSurfaceFace           face[SVGA3D_MAX_SURFACE_FACES];
    /*
     * Followed by an SVGA3dSize structure for each mip level in each face.
@@ -1135,6 +1179,31 @@ struct {
 
 typedef
 struct {
+   uint32                      sid;
+   SVGA3dSurfaceFlags          surfaceFlags;
+   SVGA3dSurfaceFormat         format;
+   /*
+    * If surfaceFlags has SVGA3D_SURFACE_CUBEMAP bit set, all SVGA3dSurfaceFace
+    * structures must have the same value of numMipLevels field.
+    * Otherwise, all but the first SVGA3dSurfaceFace structures must have the
+    * numMipLevels set to 0.
+    */
+   SVGA3dSurfaceFace           face[SVGA3D_MAX_SURFACE_FACES];
+   uint32                      multisampleCount;
+   SVGA3dTextureFilter         autogenFilter;
+   /*
+    * Followed by an SVGA3dSize structure for each mip level in each face.
+    *
+    * A note on surface sizes: Sizes are always specified in pixels,
+    * even if the true surface size is not a multiple of the minimum
+    * block size of the surface's format. For example, a 3x3x1 DXT1
+    * compressed texture would actually be stored as a 4x4x1 image in
+    * memory.
+    */
+} SVGA3dCmdDefineSurface_v2;     /* SVGA_3D_CMD_SURFACE_DEFINE_V2 */
+
+typedef
+struct {
    uint32               sid;
 } SVGA3dCmdDestroySurface;      /* SVGA_3D_CMD_SURFACE_DESTROY */
 
@@ -1474,10 +1543,12 @@ struct {
     * SVGA3dCmdDrawPrimitives structure. In order,
     * they are:
     *
-    * 1. SVGA3dVertexDecl, quantity 'numVertexDecls'
-    * 2. SVGA3dPrimitiveRange, quantity 'numRanges'
+    * 1. SVGA3dVertexDecl, quantity 'numVertexDecls', but no more than
+    *    SVGA3D_MAX_VERTEX_ARRAYS;
+    * 2. SVGA3dPrimitiveRange, quantity 'numRanges', but no more than
+    *    SVGA3D_MAX_DRAW_PRIMITIVE_RANGES;
     * 3. Optionally, SVGA3dVertexDivisor, quantity 'numVertexDecls' (contains
-    *    the frequency divisor for this the corresponding vertex decl)
+    *    the frequency divisor for the corresponding vertex decl).
     */
 } SVGA3dCmdDrawPrimitives;      /* SVGA_3D_CMD_DRAWPRIMITIVES */
 
@@ -1671,6 +1742,12 @@ struct {
    /* Clipping: zero or more SVGASignedRects follow */
 } SVGA3dCmdBlitSurfaceToScreen;         /* SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN */
 
+typedef
+struct {
+   uint32               sid;
+   SVGA3dTextureFilter  filter;
+} SVGA3dCmdGenerateMipmaps;             /* SVGA_3D_CMD_GENERATE_MIPMAPS */
+
 
 /*
  * Capability query index.
@@ -1774,6 +1851,32 @@ typedef enum {
    SVGA3D_DEVCAP_SURFACEFMT_A16B16G16R16           = 67,
    SVGA3D_DEVCAP_SURFACEFMT_UYVY                   = 68,
    SVGA3D_DEVCAP_SURFACEFMT_YUY2                   = 69,
+   SVGA3D_DEVCAP_MULTISAMPLE_NONMASKABLESAMPLES    = 70,
+   SVGA3D_DEVCAP_MULTISAMPLE_MASKABLESAMPLES       = 71,
+   SVGA3D_DEVCAP_ALPHATOCOVERAGE                   = 72,
+   SVGA3D_DEVCAP_SUPERSAMPLE                       = 73,
+   SVGA3D_DEVCAP_AUTOGENMIPMAPS                    = 74,
+   SVGA3D_DEVCAP_SURFACEFMT_NV12                   = 75,
+   SVGA3D_DEVCAP_SURFACEFMT_AYUV                   = 76,
+
+   /*
+    * This is the maximum number of SVGA context IDs that the guest
+    * can define using SVGA_3D_CMD_CONTEXT_DEFINE.
+    */
+   SVGA3D_DEVCAP_MAX_CONTEXT_IDS                   = 77,
+
+   /*
+    * This is the maximum number of SVGA surface IDs that the guest
+    * can define using SVGA_3D_CMD_SURFACE_DEFINE*.
+    */
+   SVGA3D_DEVCAP_MAX_SURFACE_IDS                   = 78,
+
+   SVGA3D_DEVCAP_SURFACEFMT_Z_DF16                 = 79,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_DF24                 = 80,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8_INT            = 81,
+
+   SVGA3D_DEVCAP_SURFACEFMT_BC4_UNORM              = 82,
+   SVGA3D_DEVCAP_SURFACEFMT_BC5_UNORM              = 83,
 
    /*
     * Don't add new caps into the previous section; the values in this
diff --git a/drivers/gpu/drm/vmwgfx/svga_escape.h b/drivers/gpu/drm/vmwgfx/svga_escape.h
index 7b85e9b8c854..8e8d9682e018 100644
--- a/drivers/gpu/drm/vmwgfx/svga_escape.h
+++ b/drivers/gpu/drm/vmwgfx/svga_escape.h
@@ -75,7 +75,7 @@
  */
 
 #define SVGA_ESCAPE_VMWARE_HINT               0x00030000
-#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN    0x00030001  // Deprecated
+#define SVGA_ESCAPE_VMWARE_HINT_FULLSCREEN    0x00030001  /* Deprecated */
 
 typedef
 struct {
diff --git a/drivers/gpu/drm/vmwgfx/svga_overlay.h b/drivers/gpu/drm/vmwgfx/svga_overlay.h
index f753d73c14b4..f38416fcb046 100644
--- a/drivers/gpu/drm/vmwgfx/svga_overlay.h
+++ b/drivers/gpu/drm/vmwgfx/svga_overlay.h
@@ -38,9 +38,9 @@
  * Video formats we support
  */
 
-#define VMWARE_FOURCC_YV12 0x32315659 // 'Y' 'V' '1' '2'
-#define VMWARE_FOURCC_YUY2 0x32595559 // 'Y' 'U' 'Y' '2'
-#define VMWARE_FOURCC_UYVY 0x59565955 // 'U' 'Y' 'V' 'Y'
+#define VMWARE_FOURCC_YV12 0x32315659 /* 'Y' 'V' '1' '2' */
+#define VMWARE_FOURCC_YUY2 0x32595559 /* 'Y' 'U' 'Y' '2' */
+#define VMWARE_FOURCC_UYVY 0x59565955 /* 'U' 'Y' 'V' 'Y' */
 
 typedef enum {
    SVGA_OVERLAY_FORMAT_INVALID = 0,
@@ -68,7 +68,7 @@ struct SVGAEscapeVideoSetRegs {
       uint32 streamId;
    } header;
 
-   // May include zero or more items.
+   /* May include zero or more items. */
    struct {
       uint32 registerId;
       uint32 value;
@@ -134,12 +134,12 @@ struct {
  */
 
 static inline bool
-VMwareVideoGetAttributes(const SVGAOverlayFormat format,    // IN
-                         uint32 *width,                     // IN / OUT
-                         uint32 *height,                    // IN / OUT
-                         uint32 *size,                      // OUT
-                         uint32 *pitches,                   // OUT (optional)
-                         uint32 *offsets)                   // OUT (optional)
+VMwareVideoGetAttributes(const SVGAOverlayFormat format,    /* IN */
+                         uint32 *width,                     /* IN / OUT */
+                         uint32 *height,                    /* IN / OUT */
+                         uint32 *size,                      /* OUT */
+                         uint32 *pitches,                   /* OUT (optional) */
+                         uint32 *offsets)                   /* OUT (optional) */
 {
     int tmp;
 
@@ -198,4 +198,4 @@ VMwareVideoGetAttributes(const SVGAOverlayFormat format,    // IN
     return true;
 }
 
-#endif // _SVGA_OVERLAY_H_
+#endif /* _SVGA_OVERLAY_H_ */
diff --git a/drivers/gpu/drm/vmwgfx/svga_reg.h b/drivers/gpu/drm/vmwgfx/svga_reg.h
index 1b96c2ec07dd..01f63cb49678 100644
--- a/drivers/gpu/drm/vmwgfx/svga_reg.h
+++ b/drivers/gpu/drm/vmwgfx/svga_reg.h
@@ -39,6 +39,15 @@
 #define PCI_DEVICE_ID_VMWARE_SVGA2      0x0405
 
 /*
+ * SVGA_REG_ENABLE bit definitions.
+ */
+#define SVGA_REG_ENABLE_DISABLE     0
+#define SVGA_REG_ENABLE_ENABLE      1
+#define SVGA_REG_ENABLE_HIDE        2
+#define SVGA_REG_ENABLE_ENABLE_HIDE (SVGA_REG_ENABLE_ENABLE |\
+				     SVGA_REG_ENABLE_HIDE)
+
+/*
  * Legal values for the SVGA_REG_CURSOR_ON register in old-fashioned
  * cursor bypass mode. This is still supported, but no new guest
  * drivers should use it.
@@ -158,7 +167,9 @@ enum {
    SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH = 44,
 
    SVGA_REG_TRACES = 45,            /* Enable trace-based updates even when FIFO is on */
-   SVGA_REG_TOP = 46,               /* Must be 1 more than the last register */
+   SVGA_REG_GMRS_MAX_PAGES = 46,    /* Maximum number of 4KB pages for all GMRs */
+   SVGA_REG_MEMORY_SIZE = 47,       /* Total dedicated device memory excluding FIFO */
+   SVGA_REG_TOP = 48,               /* Must be 1 more than the last register */
 
    SVGA_PALETTE_BASE = 1024,        /* Base of SVGA color map */
    /* Next 768 (== 256*3) registers exist for colormap */
@@ -265,7 +276,7 @@ enum {
  * possible.
  */
 #define SVGA_GMR_NULL         ((uint32) -1)
-#define SVGA_GMR_FRAMEBUFFER  ((uint32) -2)  // Guest Framebuffer (GFB)
+#define SVGA_GMR_FRAMEBUFFER  ((uint32) -2)  /* Guest Framebuffer (GFB) */
 
 typedef
 struct SVGAGuestMemDescriptor {
@@ -306,13 +317,35 @@ struct SVGAGMRImageFormat {
       struct {
          uint32 bitsPerPixel : 8;
          uint32 colorDepth   : 8;
-         uint32 reserved     : 16;  // Must be zero
+         uint32 reserved     : 16;  /* Must be zero */
       };
 
       uint32 value;
    };
 } SVGAGMRImageFormat;
 
+typedef
+struct SVGAGuestImage {
+   SVGAGuestPtr         ptr;
+
+   /*
+    * A note on interpretation of pitch: This value of pitch is the
+    * number of bytes between vertically adjacent image
+    * blocks. Normally this is the number of bytes between the first
+    * pixel of two adjacent scanlines. With compressed textures,
+    * however, this may represent the number of bytes between
+    * compression blocks rather than between rows of pixels.
+    *
+    * XXX: Compressed textures currently must be tightly packed in guest memory.
+    *
+    * If the image is 1-dimensional, pitch is ignored.
+    *
+    * If 'pitch' is zero, the SVGA3D device calculates a pitch value
+    * assuming each row of blocks is tightly packed.
+    */
+   uint32 pitch;
+} SVGAGuestImage;
+
 /*
  * SVGAColorBGRX --
  *
@@ -328,7 +361,7 @@ struct SVGAColorBGRX {
          uint32 b : 8;
          uint32 g : 8;
          uint32 r : 8;
-         uint32 x : 8;  // Unused
+         uint32 x : 8;  /* Unused */
       };
 
       uint32 value;
@@ -370,23 +403,34 @@ struct SVGASignedPoint {
  *  Note the holes in the bitfield. Missing bits have been deprecated,
  *  and must not be reused. Those capabilities will never be reported
  *  by new versions of the SVGA device.
+ *
+ * SVGA_CAP_GMR2 --
+ *    Provides asynchronous commands to define and remap guest memory
+ *    regions.  Adds device registers SVGA_REG_GMRS_MAX_PAGES and
+ *    SVGA_REG_MEMORY_SIZE.
+ *
+ * SVGA_CAP_SCREEN_OBJECT_2 --
+ *    Allow screen object support, and require backing stores from the
+ *    guest for each screen object.
  */
 
 #define SVGA_CAP_NONE               0x00000000
 #define SVGA_CAP_RECT_COPY          0x00000002
 #define SVGA_CAP_CURSOR             0x00000020
-#define SVGA_CAP_CURSOR_BYPASS      0x00000040   // Legacy (Use Cursor Bypass 3 instead)
-#define SVGA_CAP_CURSOR_BYPASS_2    0x00000080   // Legacy (Use Cursor Bypass 3 instead)
+#define SVGA_CAP_CURSOR_BYPASS      0x00000040   /* Legacy (Use Cursor Bypass 3 instead) */
+#define SVGA_CAP_CURSOR_BYPASS_2    0x00000080   /* Legacy (Use Cursor Bypass 3 instead) */
 #define SVGA_CAP_8BIT_EMULATION     0x00000100
 #define SVGA_CAP_ALPHA_CURSOR       0x00000200
 #define SVGA_CAP_3D                 0x00004000
 #define SVGA_CAP_EXTENDED_FIFO      0x00008000
-#define SVGA_CAP_MULTIMON           0x00010000   // Legacy multi-monitor support
+#define SVGA_CAP_MULTIMON           0x00010000   /* Legacy multi-monitor support */
 #define SVGA_CAP_PITCHLOCK          0x00020000
 #define SVGA_CAP_IRQMASK            0x00040000
-#define SVGA_CAP_DISPLAY_TOPOLOGY   0x00080000   // Legacy multi-monitor support
+#define SVGA_CAP_DISPLAY_TOPOLOGY   0x00080000   /* Legacy multi-monitor support */
 #define SVGA_CAP_GMR                0x00100000
 #define SVGA_CAP_TRACES             0x00200000
+#define SVGA_CAP_GMR2               0x00400000
+#define SVGA_CAP_SCREEN_OBJECT_2    0x00800000
 
 
 /*
@@ -431,7 +475,7 @@ enum {
 
    SVGA_FIFO_CAPABILITIES = 4,
    SVGA_FIFO_FLAGS,
-   // Valid with SVGA_FIFO_CAP_FENCE:
+   /* Valid with SVGA_FIFO_CAP_FENCE: */
    SVGA_FIFO_FENCE,
 
    /*
@@ -444,33 +488,47 @@ enum {
     * extended FIFO.
     */
 
-   // Valid if exists (i.e. if extended FIFO enabled):
+   /* Valid if exists (i.e. if extended FIFO enabled): */
    SVGA_FIFO_3D_HWVERSION,       /* See SVGA3dHardwareVersion in svga3d_reg.h */
-   // Valid with SVGA_FIFO_CAP_PITCHLOCK:
+   /* Valid with SVGA_FIFO_CAP_PITCHLOCK: */
    SVGA_FIFO_PITCHLOCK,
 
-   // Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3:
+   /* Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3: */
    SVGA_FIFO_CURSOR_ON,          /* Cursor bypass 3 show/hide register */
    SVGA_FIFO_CURSOR_X,           /* Cursor bypass 3 x register */
    SVGA_FIFO_CURSOR_Y,           /* Cursor bypass 3 y register */
    SVGA_FIFO_CURSOR_COUNT,       /* Incremented when any of the other 3 change */
    SVGA_FIFO_CURSOR_LAST_UPDATED,/* Last time the host updated the cursor */
 
-   // Valid with SVGA_FIFO_CAP_RESERVE:
+   /* Valid with SVGA_FIFO_CAP_RESERVE: */
    SVGA_FIFO_RESERVED,           /* Bytes past NEXT_CMD with real contents */
 
    /*
-    * Valid with SVGA_FIFO_CAP_SCREEN_OBJECT:
+    * Valid with SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2:
     *
     * By default this is SVGA_ID_INVALID, to indicate that the cursor
     * coordinates are specified relative to the virtual root. If this
     * is set to a specific screen ID, cursor position is reinterpreted
-    * as a signed offset relative to that screen's origin. This is the
-    * only way to place the cursor on a non-rooted screen.
+    * as a signed offset relative to that screen's origin.
     */
    SVGA_FIFO_CURSOR_SCREEN_ID,
 
    /*
+    * Valid with SVGA_FIFO_CAP_DEAD
+    *
+    * An arbitrary value written by the host, drivers should not use it.
+    */
+   SVGA_FIFO_DEAD,
+
+   /*
+    * Valid with SVGA_FIFO_CAP_3D_HWVERSION_REVISED:
+    *
+    * Contains 3D HWVERSION (see SVGA3dHardwareVersion in svga3d_reg.h)
+    * on platforms that can enforce graphics resource limits.
+    */
+   SVGA_FIFO_3D_HWVERSION_REVISED,
+
+   /*
     * XXX: The gap here, up until SVGA_FIFO_3D_CAPS, can be used for new
     * registers, but this must be done carefully and with judicious use of
     * capability bits, since comparisons based on SVGA_FIFO_MIN aren't
@@ -508,7 +566,7 @@ enum {
     * sets SVGA_FIFO_MIN high enough to leave room for them.
     */
 
-   // Valid if register exists:
+   /* Valid if register exists: */
    SVGA_FIFO_GUEST_3D_HWVERSION, /* Guest driver's 3D version */
    SVGA_FIFO_FENCE_GOAL,         /* Matching target for SVGA_IRQFLAG_FENCE_GOAL */
    SVGA_FIFO_BUSY,               /* See "FIFO Synchronization Registers" */
@@ -709,6 +767,37 @@ enum {
  *
  *       - When a screen is resized, either using Screen Object commands or
  *         legacy multimon registers, its contents are preserved.
+ *
+ * SVGA_FIFO_CAP_GMR2 --
+ *
+ *    Provides new commands to define and remap guest memory regions (GMR).
+ *
+ *    New 2D commands:
+ *       DEFINE_GMR2, REMAP_GMR2.
+ *
+ * SVGA_FIFO_CAP_3D_HWVERSION_REVISED --
+ *
+ *    Indicates new register SVGA_FIFO_3D_HWVERSION_REVISED exists.
+ *    This register may replace SVGA_FIFO_3D_HWVERSION on platforms
+ *    that enforce graphics resource limits.  This allows the platform
+ *    to clear SVGA_FIFO_3D_HWVERSION and disable 3D in legacy guest
+ *    drivers that do not limit their resources.
+ *
+ *    Note this is an alias to SVGA_FIFO_CAP_GMR2 because these indicators
+ *    are codependent (and thus we use a single capability bit).
+ *
+ * SVGA_FIFO_CAP_SCREEN_OBJECT_2 --
+ *
+ *    Modifies the DEFINE_SCREEN command to include a guest provided
+ *    backing store in GMR memory and the bytesPerLine for the backing
+ *    store.  This capability requires the use of a backing store when
+ *    creating screen objects.  However if SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    is present then backing stores are optional.
+ *
+ * SVGA_FIFO_CAP_DEAD --
+ *
+ *    Drivers should not use this cap bit.  This cap bit can not be
+ *    reused since some hosts already expose it.
  */
 
 #define SVGA_FIFO_CAP_NONE                  0
@@ -720,6 +809,10 @@ enum {
 #define SVGA_FIFO_CAP_ESCAPE            (1<<5)
 #define SVGA_FIFO_CAP_RESERVE           (1<<6)
 #define SVGA_FIFO_CAP_SCREEN_OBJECT     (1<<7)
+#define SVGA_FIFO_CAP_GMR2              (1<<8)
+#define SVGA_FIFO_CAP_3D_HWVERSION_REVISED  SVGA_FIFO_CAP_GMR2
+#define SVGA_FIFO_CAP_SCREEN_OBJECT_2   (1<<9)
+#define SVGA_FIFO_CAP_DEAD              (1<<10)
 
 
 /*
@@ -730,7 +823,7 @@ enum {
 
 #define SVGA_FIFO_FLAG_NONE                 0
 #define SVGA_FIFO_FLAG_ACCELFRONT       (1<<0)
-#define SVGA_FIFO_FLAG_RESERVED        (1<<31) // Internal use only
+#define SVGA_FIFO_FLAG_RESERVED        (1<<31) /* Internal use only */
 
 /*
  * FIFO reservation sentinel value
@@ -763,22 +856,22 @@ enum {
    SVGA_VIDEO_DATA_OFFSET,
    SVGA_VIDEO_FORMAT,
    SVGA_VIDEO_COLORKEY,
-   SVGA_VIDEO_SIZE,          // Deprecated
+   SVGA_VIDEO_SIZE,          /* Deprecated */
    SVGA_VIDEO_WIDTH,
    SVGA_VIDEO_HEIGHT,
    SVGA_VIDEO_SRC_X,
    SVGA_VIDEO_SRC_Y,
    SVGA_VIDEO_SRC_WIDTH,
    SVGA_VIDEO_SRC_HEIGHT,
-   SVGA_VIDEO_DST_X,         // Signed int32
-   SVGA_VIDEO_DST_Y,         // Signed int32
+   SVGA_VIDEO_DST_X,         /* Signed int32 */
+   SVGA_VIDEO_DST_Y,         /* Signed int32 */
    SVGA_VIDEO_DST_WIDTH,
    SVGA_VIDEO_DST_HEIGHT,
    SVGA_VIDEO_PITCH_1,
    SVGA_VIDEO_PITCH_2,
    SVGA_VIDEO_PITCH_3,
-   SVGA_VIDEO_DATA_GMRID,    // Optional, defaults to SVGA_GMR_FRAMEBUFFER
-   SVGA_VIDEO_DST_SCREEN_ID, // Optional, defaults to virtual coords (SVGA_ID_INVALID)
+   SVGA_VIDEO_DATA_GMRID,    /* Optional, defaults to SVGA_GMR_FRAMEBUFFER */
+   SVGA_VIDEO_DST_SCREEN_ID, /* Optional, defaults to virtual coords (SVGA_ID_INVALID) */
    SVGA_VIDEO_NUM_REGS
 };
 
@@ -829,15 +922,51 @@ typedef struct SVGAOverlayUnit {
  *    compatibility. New flags can be added, and the struct may grow,
  *    but existing fields must retain their meaning.
  *
+ *    Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2 are required fields of
+ *    a SVGAGuestPtr that is used to back the screen contents.  This
+ *    memory must come from the GFB.  The guest is not allowed to
+ *    access the memory and doing so will have undefined results.  The
+ *    backing store is required to be page aligned and the size is
+ *    padded to the next page boundry.  The number of pages is:
+ *       (bytesPerLine * size.width * 4 + PAGE_SIZE - 1) / PAGE_SIZE
+ *
+ *    The pitch in the backingStore is required to be at least large
+ *    enough to hold a 32bbp scanline.  It is recommended that the
+ *    driver pad bytesPerLine for a potential performance win.
+ *
+ *    The cloneCount field is treated as a hint from the guest that
+ *    the user wants this display to be cloned, countCount times.  A
+ *    value of zero means no cloning should happen.
+ */
+
+#define SVGA_SCREEN_MUST_BE_SET     (1 << 0) /* Must be set or results undefined */
+#define SVGA_SCREEN_HAS_ROOT SVGA_SCREEN_MUST_BE_SET /* Deprecated */
+#define SVGA_SCREEN_IS_PRIMARY      (1 << 1) /* Guest considers this screen to be 'primary' */
+#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2) /* Guest is running a fullscreen app here */
+
+/*
+ * Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2.  When the screen is
+ * deactivated the base layer is defined to lose all contents and
+ * become black.  When a screen is deactivated the backing store is
+ * optional.  When set backingPtr and bytesPerLine will be ignored.
  */
+#define SVGA_SCREEN_DEACTIVATE  (1 << 3)
 
-#define SVGA_SCREEN_HAS_ROOT    (1 << 0)  // Screen is present in the virtual coord space
-#define SVGA_SCREEN_IS_PRIMARY  (1 << 1)  // Guest considers this screen to be 'primary'
-#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2)   // Guest is running a fullscreen app here
+/*
+ * Added with SVGA_FIFO_CAP_SCREEN_OBJECT_2.  When this flag is set
+ * the screen contents will be outputted as all black to the user
+ * though the base layer contents is preserved.  The screen base layer
+ * can still be read and written to like normal though the no visible
+ * effect will be seen by the user.  When the flag is changed the
+ * screen will be blanked or redrawn to the current contents as needed
+ * without any extra commands from the driver.  This flag only has an
+ * effect when the screen is not deactivated.
+ */
+#define SVGA_SCREEN_BLANKING (1 << 4)
 
 typedef
 struct SVGAScreenObject {
-   uint32 structSize;   // sizeof(SVGAScreenObject)
+   uint32 structSize;   /* sizeof(SVGAScreenObject) */
    uint32 id;
    uint32 flags;
    struct {
@@ -847,7 +976,14 @@ struct SVGAScreenObject {
    struct {
       int32 x;
       int32 y;
-   } root;              // Only used if SVGA_SCREEN_HAS_ROOT is set.
+   } root;
+
+   /*
+    * Added and required by SVGA_FIFO_CAP_SCREEN_OBJECT_2, optional
+    * with SVGA_FIFO_CAP_SCREEN_OBJECT.
+    */
+   SVGAGuestImage backingStore;
+   uint32 cloneCount;
 } SVGAScreenObject;
 
 
@@ -885,6 +1021,8 @@ typedef enum {
    SVGA_CMD_BLIT_SCREEN_TO_GMRFB  = 38,
    SVGA_CMD_ANNOTATION_FILL       = 39,
    SVGA_CMD_ANNOTATION_COPY       = 40,
+   SVGA_CMD_DEFINE_GMR2           = 41,
+   SVGA_CMD_REMAP_GMR2            = 42,
    SVGA_CMD_MAX
 } SVGAFifoCmdId;
 
@@ -920,7 +1058,7 @@ typedef enum {
  */
 
 typedef
-struct {
+struct SVGAFifoCmdUpdate {
    uint32 x;
    uint32 y;
    uint32 width;
@@ -939,7 +1077,7 @@ struct {
  */
 
 typedef
-struct {
+struct SVGAFifoCmdRectCopy {
    uint32 srcX;
    uint32 srcY;
    uint32 destX;
@@ -963,14 +1101,14 @@ struct {
  */
 
 typedef
-struct {
-   uint32 id;             // Reserved, must be zero.
+struct SVGAFifoCmdDefineCursor {
+   uint32 id;             /* Reserved, must be zero. */
    uint32 hotspotX;
    uint32 hotspotY;
    uint32 width;
    uint32 height;
-   uint32 andMaskDepth;   // Value must be 1 or equal to BITS_PER_PIXEL
-   uint32 xorMaskDepth;   // Value must be 1 or equal to BITS_PER_PIXEL
+   uint32 andMaskDepth;   /* Value must be 1 or equal to BITS_PER_PIXEL */
+   uint32 xorMaskDepth;   /* Value must be 1 or equal to BITS_PER_PIXEL */
    /*
     * Followed by scanline data for AND mask, then XOR mask.
     * Each scanline is padded to a 32-bit boundary.
@@ -992,8 +1130,8 @@ struct {
  */
 
 typedef
-struct {
-   uint32 id;             // Reserved, must be zero.
+struct SVGAFifoCmdDefineAlphaCursor {
+   uint32 id;             /* Reserved, must be zero. */
    uint32 hotspotX;
    uint32 hotspotY;
    uint32 width;
@@ -1015,7 +1153,7 @@ struct {
  */
 
 typedef
-struct {
+struct SVGAFifoCmdUpdateVerbose {
    uint32 x;
    uint32 y;
    uint32 width;
@@ -1040,13 +1178,13 @@ struct {
 #define  SVGA_ROP_COPY                    0x03
 
 typedef
-struct {
-   uint32 color;     // In the same format as the GFB
+struct SVGAFifoCmdFrontRopFill {
+   uint32 color;     /* In the same format as the GFB */
    uint32 x;
    uint32 y;
    uint32 width;
    uint32 height;
-   uint32 rop;       // Must be SVGA_ROP_COPY
+   uint32 rop;       /* Must be SVGA_ROP_COPY */
 } SVGAFifoCmdFrontRopFill;
 
 
@@ -1083,7 +1221,7 @@ struct {
  */
 
 typedef
-struct {
+struct SVGAFifoCmdEscape {
    uint32 nsid;
    uint32 size;
    /* followed by 'size' bytes of data */
@@ -1113,12 +1251,12 @@ struct {
  *    registers (SVGA_REG_NUM_GUEST_DISPLAYS, SVGA_REG_DISPLAY_*).
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
 struct {
-   SVGAScreenObject screen;   // Variable-length according to version
+   SVGAScreenObject screen;   /* Variable-length according to version */
 } SVGAFifoCmdDefineScreen;
 
 
@@ -1129,7 +1267,7 @@ struct {
  *    re-use.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1182,7 +1320,7 @@ struct {
  *    GMRFB.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1219,7 +1357,7 @@ struct {
  *    SVGA_CMD_ANNOTATION_* commands for details.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1267,7 +1405,7 @@ struct {
  *    the time any subsequent FENCE commands are reached.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1302,7 +1440,7 @@ struct {
  *    user's display is being remoted over a network connection.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1334,7 +1472,7 @@ struct {
  *    undefined.
  *
  * Availability:
- *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT or SVGA_FIFO_CAP_SCREEN_OBJECT_2
  */
 
 typedef
@@ -1343,4 +1481,72 @@ struct {
    uint32           srcScreenId;
 } SVGAFifoCmdAnnotationCopy;
 
+
+/*
+ * SVGA_CMD_DEFINE_GMR2 --
+ *
+ *    Define guest memory region v2.  See the description of GMRs above.
+ *
+ * Availability:
+ *    SVGA_CAP_GMR2
+ */
+
+typedef
+struct {
+   uint32 gmrId;
+   uint32 numPages;
+} SVGAFifoCmdDefineGMR2;
+
+
+/*
+ * SVGA_CMD_REMAP_GMR2 --
+ *
+ *    Remap guest memory region v2.  See the description of GMRs above.
+ *
+ *    This command allows guest to modify a portion of an existing GMR by
+ *    invalidating it or reassigning it to different guest physical pages.
+ *    The pages are identified by physical page number (PPN).  The pages
+ *    are assumed to be pinned and valid for DMA operations.
+ *
+ *    Description of command flags:
+ *
+ *    SVGA_REMAP_GMR2_VIA_GMR: If enabled, references a PPN list in a GMR.
+ *       The PPN list must not overlap with the remap region (this can be
+ *       handled trivially by referencing a separate GMR).  If flag is
+ *       disabled, PPN list is appended to SVGARemapGMR command.
+ *
+ *    SVGA_REMAP_GMR2_PPN64: If set, PPN list is in PPN64 format, otherwise
+ *       it is in PPN32 format.
+ *
+ *    SVGA_REMAP_GMR2_SINGLE_PPN: If set, PPN list contains a single entry.
+ *       A single PPN can be used to invalidate a portion of a GMR or
+ *       map it to to a single guest scratch page.
+ *
+ * Availability:
+ *    SVGA_CAP_GMR2
+ */
+
+typedef enum {
+   SVGA_REMAP_GMR2_PPN32         = 0,
+   SVGA_REMAP_GMR2_VIA_GMR       = (1 << 0),
+   SVGA_REMAP_GMR2_PPN64         = (1 << 1),
+   SVGA_REMAP_GMR2_SINGLE_PPN    = (1 << 2),
+} SVGARemapGMR2Flags;
+
+typedef
+struct {
+   uint32 gmrId;
+   SVGARemapGMR2Flags flags;
+   uint32 offsetPages; /* offset in pages to begin remap */
+   uint32 numPages; /* number of pages to remap */
+   /*
+    * Followed by additional data depending on SVGARemapGMR2Flags.
+    *
+    * If flag SVGA_REMAP_GMR2_VIA_GMR is set, single SVGAGuestPtr follows.
+    * Otherwise an array of page descriptors in PPN32 or PPN64 format
+    * (according to flag SVGA_REMAP_GMR2_PPN64) follows.  If flag
+    * SVGA_REMAP_GMR2_SINGLE_PPN is set, array contains a single entry.
+    */
+} SVGAFifoCmdRemapGMR2;
+
 #endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 87e43e0733bf..5a72ed908232 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -42,6 +42,10 @@ static uint32_t sys_placement_flags = TTM_PL_FLAG_SYSTEM |
 static uint32_t gmr_placement_flags = VMW_PL_FLAG_GMR |
 	TTM_PL_FLAG_CACHED;
 
+static uint32_t gmr_ne_placement_flags = VMW_PL_FLAG_GMR |
+	TTM_PL_FLAG_CACHED |
+	TTM_PL_FLAG_NO_EVICT;
+
 struct ttm_placement vmw_vram_placement = {
 	.fpfn = 0,
 	.lpfn = 0,
@@ -56,6 +60,11 @@ static uint32_t vram_gmr_placement_flags[] = {
 	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
 };
 
+static uint32_t gmr_vram_placement_flags[] = {
+	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED,
+	TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED
+};
+
 struct ttm_placement vmw_vram_gmr_placement = {
 	.fpfn = 0,
 	.lpfn = 0,
@@ -65,6 +74,20 @@ struct ttm_placement vmw_vram_gmr_placement = {
 	.busy_placement = &gmr_placement_flags
 };
 
+static uint32_t vram_gmr_ne_placement_flags[] = {
+	TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT,
+	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED | TTM_PL_FLAG_NO_EVICT
+};
+
+struct ttm_placement vmw_vram_gmr_ne_placement = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.num_placement = 2,
+	.placement = vram_gmr_ne_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &gmr_ne_placement_flags
+};
+
 struct ttm_placement vmw_vram_sys_placement = {
 	.fpfn = 0,
 	.lpfn = 0,
@@ -92,6 +115,30 @@ struct ttm_placement vmw_sys_placement = {
 	.busy_placement = &sys_placement_flags
 };
 
+static uint32_t evictable_placement_flags[] = {
+	TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED,
+	TTM_PL_FLAG_VRAM | TTM_PL_FLAG_CACHED,
+	VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED
+};
+
+struct ttm_placement vmw_evictable_placement = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.num_placement = 3,
+	.placement = evictable_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags
+};
+
+struct ttm_placement vmw_srf_placement = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.num_placement = 1,
+	.num_busy_placement = 2,
+	.placement = &gmr_placement_flags,
+	.busy_placement = gmr_vram_placement_flags
+};
+
 struct vmw_ttm_backend {
 	struct ttm_backend backend;
 	struct page **pages;
@@ -274,39 +321,39 @@ static int vmw_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 
 static void *vmw_sync_obj_ref(void *sync_obj)
 {
-	return sync_obj;
+
+	return (void *)
+		vmw_fence_obj_reference((struct vmw_fence_obj *) sync_obj);
 }
 
 static void vmw_sync_obj_unref(void **sync_obj)
 {
-	*sync_obj = NULL;
+	vmw_fence_obj_unreference((struct vmw_fence_obj **) sync_obj);
 }
 
 static int vmw_sync_obj_flush(void *sync_obj, void *sync_arg)
 {
-	struct vmw_private *dev_priv = (struct vmw_private *)sync_arg;
-
-	mutex_lock(&dev_priv->hw_mutex);
-	vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC);
-	mutex_unlock(&dev_priv->hw_mutex);
+	vmw_fence_obj_flush((struct vmw_fence_obj *) sync_obj);
 	return 0;
 }
 
 static bool vmw_sync_obj_signaled(void *sync_obj, void *sync_arg)
 {
-	struct vmw_private *dev_priv = (struct vmw_private *)sync_arg;
-	uint32_t sequence = (unsigned long) sync_obj;
+	unsigned long flags = (unsigned long) sync_arg;
+	return	vmw_fence_obj_signaled((struct vmw_fence_obj *) sync_obj,
+				       (uint32_t) flags);
 
-	return vmw_fence_signaled(dev_priv, sequence);
 }
 
 static int vmw_sync_obj_wait(void *sync_obj, void *sync_arg,
 			     bool lazy, bool interruptible)
 {
-	struct vmw_private *dev_priv = (struct vmw_private *)sync_arg;
-	uint32_t sequence = (unsigned long) sync_obj;
+	unsigned long flags = (unsigned long) sync_arg;
 
-	return vmw_wait_fence(dev_priv, false, sequence, false, 3*HZ);
+	return vmw_fence_obj_wait((struct vmw_fence_obj *) sync_obj,
+				  (uint32_t) flags,
+				  lazy, interruptible,
+				  VMW_FENCE_WAIT_TIMEOUT);
 }
 
 struct ttm_bo_driver vmw_bo_driver = {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
new file mode 100644
index 000000000000..3fa884db08ab
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
@@ -0,0 +1,322 @@
+/**************************************************************************
+ *
+ * Copyright © 2011 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "ttm/ttm_placement.h"
+
+#include "drmP.h"
+#include "vmwgfx_drv.h"
+
+
+/**
+ * vmw_dmabuf_to_placement - Validate a buffer to placement.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @pin:  Pin buffer if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * May only be called by the current master since it assumes that the
+ * master lock is the current master's lock.
+ * This function takes the master's lock in write mode.
+ * Flushes and unpins the query bo to avoid failures.
+ *
+ * Returns
+ *  -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_placement(struct vmw_private *dev_priv,
+			    struct vmw_dma_buffer *buf,
+			    struct ttm_placement *placement,
+			    bool interruptible)
+{
+	struct vmw_master *vmaster = dev_priv->active_master;
+	struct ttm_buffer_object *bo = &buf->base;
+	int ret;
+
+	ret = ttm_write_lock(&vmaster->lock, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
+
+	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
+	if (unlikely(ret != 0))
+		goto err;
+
+	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+
+	ttm_bo_unreserve(bo);
+
+err:
+	ttm_write_unlock(&vmaster->lock);
+	return ret;
+}
+
+/**
+ * vmw_dmabuf_to_vram_or_gmr - Move a buffer to vram or gmr.
+ *
+ * May only be called by the current master since it assumes that the
+ * master lock is the current master's lock.
+ * This function takes the master's lock in write mode.
+ * Flushes and unpins the query bo if @pin == true to avoid failures.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @pin:  Pin buffer if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_vram_or_gmr(struct vmw_private *dev_priv,
+			      struct vmw_dma_buffer *buf,
+			      bool pin, bool interruptible)
+{
+	struct vmw_master *vmaster = dev_priv->active_master;
+	struct ttm_buffer_object *bo = &buf->base;
+	struct ttm_placement *placement;
+	int ret;
+
+	ret = ttm_write_lock(&vmaster->lock, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (pin)
+		vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
+
+	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
+	if (unlikely(ret != 0))
+		goto err;
+
+	/**
+	 * Put BO in VRAM if there is space, otherwise as a GMR.
+	 * If there is no space in VRAM and GMR ids are all used up,
+	 * start evicting GMRs to make room. If the DMA buffer can't be
+	 * used as a GMR, this will return -ENOMEM.
+	 */
+
+	if (pin)
+		placement = &vmw_vram_gmr_ne_placement;
+	else
+		placement = &vmw_vram_gmr_placement;
+
+	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+	if (likely(ret == 0) || ret == -ERESTARTSYS)
+		goto err_unreserve;
+
+
+	/**
+	 * If that failed, try VRAM again, this time evicting
+	 * previous contents.
+	 */
+
+	if (pin)
+		placement = &vmw_vram_ne_placement;
+	else
+		placement = &vmw_vram_placement;
+
+	ret = ttm_bo_validate(bo, placement, interruptible, false, false);
+
+err_unreserve:
+	ttm_bo_unreserve(bo);
+err:
+	ttm_write_unlock(&vmaster->lock);
+	return ret;
+}
+
+/**
+ * vmw_dmabuf_to_vram - Move a buffer to vram.
+ *
+ * May only be called by the current master since it assumes that the
+ * master lock is the current master's lock.
+ * This function takes the master's lock in write mode.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @pin:  Pin buffer in vram if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_vram(struct vmw_private *dev_priv,
+		       struct vmw_dma_buffer *buf,
+		       bool pin, bool interruptible)
+{
+	struct ttm_placement *placement;
+
+	if (pin)
+		placement = &vmw_vram_ne_placement;
+	else
+		placement = &vmw_vram_placement;
+
+	return vmw_dmabuf_to_placement(dev_priv, buf,
+				       placement,
+				       interruptible);
+}
+
+/**
+ * vmw_dmabuf_to_start_of_vram - Move a buffer to start of vram.
+ *
+ * May only be called by the current master since it assumes that the
+ * master lock is the current master's lock.
+ * This function takes the master's lock in write mode.
+ * Flushes and unpins the query bo if @pin == true to avoid failures.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to move.
+ * @pin:  Pin buffer in vram if true.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_to_start_of_vram(struct vmw_private *dev_priv,
+				struct vmw_dma_buffer *buf,
+				bool pin, bool interruptible)
+{
+	struct vmw_master *vmaster = dev_priv->active_master;
+	struct ttm_buffer_object *bo = &buf->base;
+	struct ttm_placement placement;
+	int ret = 0;
+
+	if (pin)
+		placement = vmw_vram_ne_placement;
+	else
+		placement = vmw_vram_placement;
+	placement.lpfn = bo->num_pages;
+
+	ret = ttm_write_lock(&vmaster->lock, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (pin)
+		vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
+
+	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
+	if (unlikely(ret != 0))
+		goto err_unlock;
+
+	/* Is this buffer already in vram but not at the start of it? */
+	if (bo->mem.mem_type == TTM_PL_VRAM &&
+	    bo->mem.start < bo->num_pages &&
+	    bo->mem.start > 0)
+		(void) ttm_bo_validate(bo, &vmw_sys_placement, false,
+				       false, false);
+
+	ret = ttm_bo_validate(bo, &placement, interruptible, false, false);
+
+	/* For some reason we didn't up at the start of vram */
+	WARN_ON(ret == 0 && bo->offset != 0);
+
+	ttm_bo_unreserve(bo);
+err_unlock:
+	ttm_write_unlock(&vmaster->lock);
+
+	return ret;
+}
+
+
+/**
+ * vmw_dmabuf_upin - Unpin the buffer given buffer, does not move the buffer.
+ *
+ * May only be called by the current master since it assumes that the
+ * master lock is the current master's lock.
+ * This function takes the master's lock in write mode.
+ *
+ * @dev_priv:  Driver private.
+ * @buf:  DMA buffer to unpin.
+ * @interruptible:  Use interruptible wait.
+ *
+ * Returns
+ * -ERESTARTSYS if interrupted by a signal.
+ */
+int vmw_dmabuf_unpin(struct vmw_private *dev_priv,
+		     struct vmw_dma_buffer *buf,
+		     bool interruptible)
+{
+	/*
+	 * We could in theory early out if the buffer is
+	 * unpinned but we need to lock and reserve the buffer
+	 * anyways so we don't gain much by that.
+	 */
+	return vmw_dmabuf_to_placement(dev_priv, buf,
+				       &vmw_evictable_placement,
+				       interruptible);
+}
+
+
+/**
+ * vmw_bo_get_guest_ptr - Get the guest ptr representing the current placement
+ * of a buffer.
+ *
+ * @bo: Pointer to a struct ttm_buffer_object. Must be pinned or reserved.
+ * @ptr: SVGAGuestPtr returning the result.
+ */
+void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *bo,
+			  SVGAGuestPtr *ptr)
+{
+	if (bo->mem.mem_type == TTM_PL_VRAM) {
+		ptr->gmrId = SVGA_GMR_FRAMEBUFFER;
+		ptr->offset = bo->offset;
+	} else {
+		ptr->gmrId = bo->mem.start;
+		ptr->offset = 0;
+	}
+}
+
+
+/**
+ * vmw_bo_pin - Pin or unpin a buffer object without moving it.
+ *
+ * @bo: The buffer object. Must be reserved, and present either in VRAM
+ * or GMR memory.
+ * @pin: Whether to pin or unpin.
+ *
+ */
+void vmw_bo_pin(struct ttm_buffer_object *bo, bool pin)
+{
+	uint32_t pl_flags;
+	struct ttm_placement placement;
+	uint32_t old_mem_type = bo->mem.mem_type;
+	int ret;
+
+	BUG_ON(!atomic_read(&bo->reserved));
+	BUG_ON(old_mem_type != TTM_PL_VRAM &&
+	       old_mem_type != VMW_PL_FLAG_GMR);
+
+	pl_flags = TTM_PL_FLAG_VRAM | VMW_PL_FLAG_GMR | TTM_PL_FLAG_CACHED;
+	if (pin)
+		pl_flags |= TTM_PL_FLAG_NO_EVICT;
+
+	memset(&placement, 0, sizeof(placement));
+	placement.num_placement = 1;
+	placement.placement = &pl_flags;
+
+	ret = ttm_bo_validate(bo, &placement, false, true, true);
+
+	BUG_ON(ret != 0 || bo->mem.mem_type != old_mem_type);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 96949b93d920..13afddc1f034 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -82,16 +82,27 @@
 #define DRM_IOCTL_VMW_EXECBUF					\
 	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_EXECBUF,		\
 		struct drm_vmw_execbuf_arg)
-#define DRM_IOCTL_VMW_FIFO_DEBUG				\
-	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_FIFO_DEBUG,		\
-		 struct drm_vmw_fifo_debug_arg)
+#define DRM_IOCTL_VMW_GET_3D_CAP				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_GET_3D_CAP,		\
+		 struct drm_vmw_get_3d_cap_arg)
 #define DRM_IOCTL_VMW_FENCE_WAIT				\
 	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_FENCE_WAIT,		\
 		 struct drm_vmw_fence_wait_arg)
-#define DRM_IOCTL_VMW_UPDATE_LAYOUT				\
-	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_UPDATE_LAYOUT,	\
-		 struct drm_vmw_update_layout_arg)
-
+#define DRM_IOCTL_VMW_FENCE_SIGNALED				\
+	DRM_IOWR(DRM_COMMAND_BASE + DRM_VMW_FENCE_SIGNALED,	\
+		 struct drm_vmw_fence_signaled_arg)
+#define DRM_IOCTL_VMW_FENCE_UNREF				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_FENCE_UNREF,		\
+		 struct drm_vmw_fence_arg)
+#define DRM_IOCTL_VMW_FENCE_EVENT				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_FENCE_EVENT,		\
+		 struct drm_vmw_fence_event_arg)
+#define DRM_IOCTL_VMW_PRESENT					\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_PRESENT,		\
+		 struct drm_vmw_present_arg)
+#define DRM_IOCTL_VMW_PRESENT_READBACK				\
+	DRM_IOW(DRM_COMMAND_BASE + DRM_VMW_PRESENT_READBACK,	\
+		 struct drm_vmw_present_readback_arg)
 
 /**
  * The core DRM version of this macro doesn't account for
@@ -135,12 +146,25 @@ static struct drm_ioctl_desc vmw_ioctls[] = {
 		      DRM_AUTH | DRM_UNLOCKED),
 	VMW_IOCTL_DEF(VMW_EXECBUF, vmw_execbuf_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(VMW_FIFO_DEBUG, vmw_fifo_debug_ioctl,
-		      DRM_AUTH | DRM_ROOT_ONLY | DRM_MASTER | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(VMW_FENCE_WAIT, vmw_fence_wait_ioctl,
+	VMW_IOCTL_DEF(VMW_FENCE_WAIT, vmw_fence_obj_wait_ioctl,
+		      DRM_AUTH | DRM_UNLOCKED),
+	VMW_IOCTL_DEF(VMW_FENCE_SIGNALED,
+		      vmw_fence_obj_signaled_ioctl,
+		      DRM_AUTH | DRM_UNLOCKED),
+	VMW_IOCTL_DEF(VMW_FENCE_UNREF, vmw_fence_obj_unref_ioctl,
+		      DRM_AUTH | DRM_UNLOCKED),
+	VMW_IOCTL_DEF(VMW_FENCE_EVENT,
+		      vmw_fence_event_ioctl,
+		      DRM_AUTH | DRM_UNLOCKED),
+	VMW_IOCTL_DEF(VMW_GET_3D_CAP, vmw_get_cap_3d_ioctl,
 		      DRM_AUTH | DRM_UNLOCKED),
-	VMW_IOCTL_DEF(VMW_UPDATE_LAYOUT, vmw_kms_update_layout_ioctl,
-		      DRM_MASTER | DRM_CONTROL_ALLOW | DRM_UNLOCKED)
+
+	/* these allow direct access to the framebuffers mark as master only */
+	VMW_IOCTL_DEF(VMW_PRESENT, vmw_present_ioctl,
+		      DRM_MASTER | DRM_AUTH | DRM_UNLOCKED),
+	VMW_IOCTL_DEF(VMW_PRESENT_READBACK,
+		      vmw_present_readback_ioctl,
+		      DRM_MASTER | DRM_AUTH | DRM_UNLOCKED),
 };
 
 static struct pci_device_id vmw_pci_id_list[] = {
@@ -189,8 +213,78 @@ static void vmw_print_capabilities(uint32_t capabilities)
 		DRM_INFO("  GMR.\n");
 	if (capabilities & SVGA_CAP_TRACES)
 		DRM_INFO("  Traces.\n");
+	if (capabilities & SVGA_CAP_GMR2)
+		DRM_INFO("  GMR2.\n");
+	if (capabilities & SVGA_CAP_SCREEN_OBJECT_2)
+		DRM_INFO("  Screen Object 2.\n");
+}
+
+
+/**
+ * vmw_execbuf_prepare_dummy_query - Initialize a query result structure at
+ * the start of a buffer object.
+ *
+ * @dev_priv: The device private structure.
+ *
+ * This function will idle the buffer using an uninterruptible wait, then
+ * map the first page and initialize a pending occlusion query result structure,
+ * Finally it will unmap the buffer.
+ *
+ * TODO: Since we're only mapping a single page, we should optimize the map
+ * to use kmap_atomic / iomap_atomic.
+ */
+static void vmw_dummy_query_bo_prepare(struct vmw_private *dev_priv)
+{
+	struct ttm_bo_kmap_obj map;
+	volatile SVGA3dQueryResult *result;
+	bool dummy;
+	int ret;
+	struct ttm_bo_device *bdev = &dev_priv->bdev;
+	struct ttm_buffer_object *bo = dev_priv->dummy_query_bo;
+
+	ttm_bo_reserve(bo, false, false, false, 0);
+	spin_lock(&bdev->fence_lock);
+	ret = ttm_bo_wait(bo, false, false, false);
+	spin_unlock(&bdev->fence_lock);
+	if (unlikely(ret != 0))
+		(void) vmw_fallback_wait(dev_priv, false, true, 0, false,
+					 10*HZ);
+
+	ret = ttm_bo_kmap(bo, 0, 1, &map);
+	if (likely(ret == 0)) {
+		result = ttm_kmap_obj_virtual(&map, &dummy);
+		result->totalSize = sizeof(*result);
+		result->state = SVGA3D_QUERYSTATE_PENDING;
+		result->result32 = 0xff;
+		ttm_bo_kunmap(&map);
+	} else
+		DRM_ERROR("Dummy query buffer map failed.\n");
+	ttm_bo_unreserve(bo);
+}
+
+
+/**
+ * vmw_dummy_query_bo_create - create a bo to hold a dummy query result
+ *
+ * @dev_priv: A device private structure.
+ *
+ * This function creates a small buffer object that holds the query
+ * result for dummy queries emitted as query barriers.
+ * No interruptible waits are done within this function.
+ *
+ * Returns an error if bo creation fails.
+ */
+static int vmw_dummy_query_bo_create(struct vmw_private *dev_priv)
+{
+	return ttm_bo_create(&dev_priv->bdev,
+			     PAGE_SIZE,
+			     ttm_bo_type_device,
+			     &vmw_vram_sys_placement,
+			     0, 0, false, NULL,
+			     &dev_priv->dummy_query_bo);
 }
 
+
 static int vmw_request_device(struct vmw_private *dev_priv)
 {
 	int ret;
@@ -200,16 +294,42 @@ static int vmw_request_device(struct vmw_private *dev_priv)
 		DRM_ERROR("Unable to initialize FIFO.\n");
 		return ret;
 	}
+	vmw_fence_fifo_up(dev_priv->fman);
+	ret = vmw_dummy_query_bo_create(dev_priv);
+	if (unlikely(ret != 0))
+		goto out_no_query_bo;
+	vmw_dummy_query_bo_prepare(dev_priv);
 
 	return 0;
+
+out_no_query_bo:
+	vmw_fence_fifo_down(dev_priv->fman);
+	vmw_fifo_release(dev_priv, &dev_priv->fifo);
+	return ret;
 }
 
 static void vmw_release_device(struct vmw_private *dev_priv)
 {
+	/*
+	 * Previous destructions should've released
+	 * the pinned bo.
+	 */
+
+	BUG_ON(dev_priv->pinned_bo != NULL);
+
+	ttm_bo_unref(&dev_priv->dummy_query_bo);
+	vmw_fence_fifo_down(dev_priv->fman);
 	vmw_fifo_release(dev_priv, &dev_priv->fifo);
 }
 
-int vmw_3d_resource_inc(struct vmw_private *dev_priv)
+/**
+ * Increase the 3d resource refcount.
+ * If the count was prevously zero, initialize the fifo, switching to svga
+ * mode. Note that the master holds a ref as well, and may request an
+ * explicit switch to svga mode if fb is not running, using @unhide_svga.
+ */
+int vmw_3d_resource_inc(struct vmw_private *dev_priv,
+			bool unhide_svga)
 {
 	int ret = 0;
 
@@ -218,19 +338,42 @@ int vmw_3d_resource_inc(struct vmw_private *dev_priv)
 		ret = vmw_request_device(dev_priv);
 		if (unlikely(ret != 0))
 			--dev_priv->num_3d_resources;
+	} else if (unhide_svga) {
+		mutex_lock(&dev_priv->hw_mutex);
+		vmw_write(dev_priv, SVGA_REG_ENABLE,
+			  vmw_read(dev_priv, SVGA_REG_ENABLE) &
+			  ~SVGA_REG_ENABLE_HIDE);
+		mutex_unlock(&dev_priv->hw_mutex);
 	}
+
 	mutex_unlock(&dev_priv->release_mutex);
 	return ret;
 }
 
-
-void vmw_3d_resource_dec(struct vmw_private *dev_priv)
+/**
+ * Decrease the 3d resource refcount.
+ * If the count reaches zero, disable the fifo, switching to vga mode.
+ * Note that the master holds a refcount as well, and may request an
+ * explicit switch to vga mode when it releases its refcount to account
+ * for the situation of an X server vt switch to VGA with 3d resources
+ * active.
+ */
+void vmw_3d_resource_dec(struct vmw_private *dev_priv,
+			 bool hide_svga)
 {
 	int32_t n3d;
 
 	mutex_lock(&dev_priv->release_mutex);
 	if (unlikely(--dev_priv->num_3d_resources == 0))
 		vmw_release_device(dev_priv);
+	else if (hide_svga) {
+		mutex_lock(&dev_priv->hw_mutex);
+		vmw_write(dev_priv, SVGA_REG_ENABLE,
+			  vmw_read(dev_priv, SVGA_REG_ENABLE) |
+			  SVGA_REG_ENABLE_HIDE);
+		mutex_unlock(&dev_priv->hw_mutex);
+	}
+
 	n3d = (int32_t) dev_priv->num_3d_resources;
 	mutex_unlock(&dev_priv->release_mutex);
 
@@ -252,7 +395,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 
 	dev_priv->dev = dev;
 	dev_priv->vmw_chipset = chipset;
-	dev_priv->last_read_sequence = (uint32_t) -100;
+	dev_priv->last_read_seqno = (uint32_t) -100;
 	mutex_init(&dev_priv->hw_mutex);
 	mutex_init(&dev_priv->cmdbuf_mutex);
 	mutex_init(&dev_priv->release_mutex);
@@ -263,8 +406,10 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	mutex_init(&dev_priv->init_mutex);
 	init_waitqueue_head(&dev_priv->fence_queue);
 	init_waitqueue_head(&dev_priv->fifo_queue);
-	atomic_set(&dev_priv->fence_queue_waiters, 0);
+	dev_priv->fence_queue_waiters = 0;
 	atomic_set(&dev_priv->fifo_queue_waiters, 0);
+	INIT_LIST_HEAD(&dev_priv->surface_lru);
+	dev_priv->used_memory_size = 0;
 
 	dev_priv->io_start = pci_resource_start(dev->pdev, 0);
 	dev_priv->vram_start = pci_resource_start(dev->pdev, 1);
@@ -285,6 +430,10 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 
 	dev_priv->capabilities = vmw_read(dev_priv, SVGA_REG_CAPABILITIES);
 
+	dev_priv->vram_size = vmw_read(dev_priv, SVGA_REG_VRAM_SIZE);
+	dev_priv->mmio_size = vmw_read(dev_priv, SVGA_REG_MEM_SIZE);
+	dev_priv->fb_max_width = vmw_read(dev_priv, SVGA_REG_MAX_WIDTH);
+	dev_priv->fb_max_height = vmw_read(dev_priv, SVGA_REG_MAX_HEIGHT);
 	if (dev_priv->capabilities & SVGA_CAP_GMR) {
 		dev_priv->max_gmr_descriptors =
 			vmw_read(dev_priv,
@@ -292,11 +441,19 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 		dev_priv->max_gmr_ids =
 			vmw_read(dev_priv, SVGA_REG_GMR_MAX_IDS);
 	}
-
-	dev_priv->vram_size = vmw_read(dev_priv, SVGA_REG_VRAM_SIZE);
-	dev_priv->mmio_size = vmw_read(dev_priv, SVGA_REG_MEM_SIZE);
-	dev_priv->fb_max_width = vmw_read(dev_priv, SVGA_REG_MAX_WIDTH);
-	dev_priv->fb_max_height = vmw_read(dev_priv, SVGA_REG_MAX_HEIGHT);
+	if (dev_priv->capabilities & SVGA_CAP_GMR2) {
+		dev_priv->max_gmr_pages =
+			vmw_read(dev_priv, SVGA_REG_GMRS_MAX_PAGES);
+		dev_priv->memory_size =
+			vmw_read(dev_priv, SVGA_REG_MEMORY_SIZE);
+		dev_priv->memory_size -= dev_priv->vram_size;
+	} else {
+		/*
+		 * An arbitrary limit of 512MiB on surface
+		 * memory. But all HWV8 hardware supports GMR2.
+		 */
+		dev_priv->memory_size = 512*1024*1024;
+	}
 
 	mutex_unlock(&dev_priv->hw_mutex);
 
@@ -308,6 +465,12 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 		DRM_INFO("Max GMR descriptors is %u\n",
 			 (unsigned)dev_priv->max_gmr_descriptors);
 	}
+	if (dev_priv->capabilities & SVGA_CAP_GMR2) {
+		DRM_INFO("Max number of GMR pages is %u\n",
+			 (unsigned)dev_priv->max_gmr_pages);
+		DRM_INFO("Max dedicated hypervisor surface memory is %u kiB\n",
+			 (unsigned)dev_priv->memory_size / 1024);
+	}
 	DRM_INFO("VRAM at 0x%08x size is %u kiB\n",
 		 dev_priv->vram_start, dev_priv->vram_size / 1024);
 	DRM_INFO("MMIO at 0x%08x size is %u kiB\n",
@@ -394,22 +557,34 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 			goto out_no_device;
 		}
 	}
+
+	dev_priv->fman = vmw_fence_manager_init(dev_priv);
+	if (unlikely(dev_priv->fman == NULL))
+		goto out_no_fman;
+
+	/* Need to start the fifo to check if we can do screen objects */
+	ret = vmw_3d_resource_inc(dev_priv, true);
+	if (unlikely(ret != 0))
+		goto out_no_fifo;
+	vmw_kms_save_vga(dev_priv);
+
+	/* Start kms and overlay systems, needs fifo. */
 	ret = vmw_kms_init(dev_priv);
 	if (unlikely(ret != 0))
 		goto out_no_kms;
 	vmw_overlay_init(dev_priv);
+
+	/* 3D Depends on Screen Objects being used. */
+	DRM_INFO("Detected %sdevice 3D availability.\n",
+		 vmw_fifo_have_3d(dev_priv) ?
+		 "" : "no ");
+
+	/* We might be done with the fifo now */
 	if (dev_priv->enable_fb) {
-		ret = vmw_3d_resource_inc(dev_priv);
-		if (unlikely(ret != 0))
-			goto out_no_fifo;
-		vmw_kms_save_vga(dev_priv);
 		vmw_fb_init(dev_priv);
-		DRM_INFO("%s", vmw_fifo_have_3d(dev_priv) ?
-			 "Detected device 3D availability.\n" :
-			 "Detected no device 3D availability.\n");
 	} else {
-		DRM_INFO("Delayed 3D detection since we're not "
-			 "running the device in SVGA mode yet.\n");
+		vmw_kms_restore_vga(dev_priv);
+		vmw_3d_resource_dec(dev_priv, true);
 	}
 
 	if (dev_priv->capabilities & SVGA_CAP_IRQMASK) {
@@ -426,15 +601,19 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	return 0;
 
 out_no_irq:
-	if (dev_priv->enable_fb) {
+	if (dev_priv->enable_fb)
 		vmw_fb_close(dev_priv);
-		vmw_kms_restore_vga(dev_priv);
-		vmw_3d_resource_dec(dev_priv);
-	}
-out_no_fifo:
 	vmw_overlay_close(dev_priv);
 	vmw_kms_close(dev_priv);
 out_no_kms:
+	/* We still have a 3D resource reference held */
+	if (dev_priv->enable_fb) {
+		vmw_kms_restore_vga(dev_priv);
+		vmw_3d_resource_dec(dev_priv, false);
+	}
+out_no_fifo:
+	vmw_fence_manager_takedown(dev_priv->fman);
+out_no_fman:
 	if (dev_priv->stealth)
 		pci_release_region(dev->pdev, 2);
 	else
@@ -467,15 +646,18 @@ static int vmw_driver_unload(struct drm_device *dev)
 
 	unregister_pm_notifier(&dev_priv->pm_nb);
 
+	if (dev_priv->ctx.cmd_bounce)
+		vfree(dev_priv->ctx.cmd_bounce);
 	if (dev_priv->capabilities & SVGA_CAP_IRQMASK)
 		drm_irq_uninstall(dev_priv->dev);
 	if (dev_priv->enable_fb) {
 		vmw_fb_close(dev_priv);
 		vmw_kms_restore_vga(dev_priv);
-		vmw_3d_resource_dec(dev_priv);
+		vmw_3d_resource_dec(dev_priv, false);
 	}
 	vmw_kms_close(dev_priv);
 	vmw_overlay_close(dev_priv);
+	vmw_fence_manager_takedown(dev_priv->fman);
 	if (dev_priv->stealth)
 		pci_release_region(dev->pdev, 2);
 	else
@@ -646,7 +828,7 @@ static int vmw_master_set(struct drm_device *dev,
 	int ret = 0;
 
 	if (!dev_priv->enable_fb) {
-		ret = vmw_3d_resource_inc(dev_priv);
+		ret = vmw_3d_resource_inc(dev_priv, true);
 		if (unlikely(ret != 0))
 			return ret;
 		vmw_kms_save_vga(dev_priv);
@@ -688,7 +870,7 @@ out_no_active_lock:
 		vmw_write(dev_priv, SVGA_REG_TRACES, 1);
 		mutex_unlock(&dev_priv->hw_mutex);
 		vmw_kms_restore_vga(dev_priv);
-		vmw_3d_resource_dec(dev_priv);
+		vmw_3d_resource_dec(dev_priv, true);
 	}
 	return ret;
 }
@@ -709,7 +891,7 @@ static void vmw_master_drop(struct drm_device *dev,
 
 	vmw_fp->locked_master = drm_master_get(file_priv->master);
 	ret = ttm_vt_lock(&vmaster->lock, false, vmw_fp->tfile);
-	vmw_kms_idle_workqueues(vmaster);
+	vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
 
 	if (unlikely((ret != 0))) {
 		DRM_ERROR("Unable to lock TTM at VT switch.\n");
@@ -726,7 +908,7 @@ static void vmw_master_drop(struct drm_device *dev,
 		vmw_write(dev_priv, SVGA_REG_TRACES, 1);
 		mutex_unlock(&dev_priv->hw_mutex);
 		vmw_kms_restore_vga(dev_priv);
-		vmw_3d_resource_dec(dev_priv);
+		vmw_3d_resource_dec(dev_priv, true);
 	}
 
 	dev_priv->active_master = &dev_priv->fbdev_master;
@@ -761,6 +943,7 @@ static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
 		 * This empties VRAM and unbinds all GMR bindings.
 		 * Buffer contents is moved to swappable memory.
 		 */
+		vmw_execbuf_release_pinned_bo(dev_priv, false, 0);
 		ttm_bo_swapout_all(&dev_priv->bdev);
 
 		break;
@@ -835,7 +1018,7 @@ static int vmw_pm_prepare(struct device *kdev)
 	 */
 	dev_priv->suspended = true;
 	if (dev_priv->enable_fb)
-		vmw_3d_resource_dec(dev_priv);
+			vmw_3d_resource_dec(dev_priv, true);
 
 	if (dev_priv->num_3d_resources != 0) {
 
@@ -843,7 +1026,7 @@ static int vmw_pm_prepare(struct device *kdev)
 			 "while 3D resources are active.\n");
 
 		if (dev_priv->enable_fb)
-			vmw_3d_resource_inc(dev_priv);
+			vmw_3d_resource_inc(dev_priv, true);
 		dev_priv->suspended = false;
 		return -EBUSY;
 	}
@@ -862,7 +1045,7 @@ static void vmw_pm_complete(struct device *kdev)
 	 * start fifo.
 	 */
 	if (dev_priv->enable_fb)
-		vmw_3d_resource_inc(dev_priv);
+			vmw_3d_resource_inc(dev_priv, false);
 
 	dev_priv->suspended = false;
 }
@@ -886,6 +1069,8 @@ static struct drm_driver driver = {
 	.irq_uninstall = vmw_irq_uninstall,
 	.irq_handler = vmw_irq_handler,
 	.get_vblank_counter = vmw_get_vblank_counter,
+	.enable_vblank = vmw_enable_vblank,
+	.disable_vblank = vmw_disable_vblank,
 	.reclaim_buffers_locked = NULL,
 	.ioctls = vmw_ioctls,
 	.num_ioctls = DRM_ARRAY_SIZE(vmw_ioctls),
@@ -902,7 +1087,8 @@ static struct drm_driver driver = {
 		 .release = drm_release,
 		 .unlocked_ioctl = vmw_unlocked_ioctl,
 		 .mmap = vmw_mmap,
-		 .poll = drm_poll,
+		 .poll = vmw_fops_poll,
+		 .read = vmw_fops_read,
 		 .fasync = drm_fasync,
 #if defined(CONFIG_COMPAT)
 		 .compat_ioctl = drm_compat_ioctl,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 10fc01f69c40..30589d0aecd9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -38,20 +38,27 @@
 #include "ttm/ttm_lock.h"
 #include "ttm/ttm_execbuf_util.h"
 #include "ttm/ttm_module.h"
+#include "vmwgfx_fence.h"
 
-#define VMWGFX_DRIVER_DATE "20100927"
-#define VMWGFX_DRIVER_MAJOR 1
-#define VMWGFX_DRIVER_MINOR 4
+#define VMWGFX_DRIVER_DATE "20111008"
+#define VMWGFX_DRIVER_MAJOR 2
+#define VMWGFX_DRIVER_MINOR 2
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
 #define VMWGFX_MAX_RELOCATIONS 2048
-#define VMWGFX_MAX_GMRS 2048
+#define VMWGFX_MAX_VALIDATIONS 2048
 #define VMWGFX_MAX_DISPLAYS 16
+#define VMWGFX_CMD_BOUNCE_INIT_SIZE 32768
 
 #define VMW_PL_GMR TTM_PL_PRIV0
 #define VMW_PL_FLAG_GMR TTM_PL_FLAG_PRIV0
 
+#define VMW_RES_CONTEXT ttm_driver_type0
+#define VMW_RES_SURFACE ttm_driver_type1
+#define VMW_RES_STREAM ttm_driver_type2
+#define VMW_RES_FENCE ttm_driver_type3
+
 struct vmw_fpriv {
 	struct drm_master *locked_master;
 	struct ttm_object_file *tfile;
@@ -72,9 +79,11 @@ struct vmw_resource {
 	int id;
 	enum ttm_object_type res_type;
 	bool avail;
+	void (*remove_from_lists) (struct vmw_resource *res);
 	void (*hw_destroy) (struct vmw_resource *res);
 	void (*res_free) (struct vmw_resource *res);
-
+	struct list_head validate_head;
+	struct list_head query_head; /* Protected by the cmdbuf mutex */
 	/* TODO is a generic snooper needed? */
 #if 0
 	void (*snoop)(struct vmw_resource *res,
@@ -90,8 +99,12 @@ struct vmw_cursor_snooper {
 	uint32_t *image;
 };
 
+struct vmw_framebuffer;
+struct vmw_surface_offset;
+
 struct vmw_surface {
 	struct vmw_resource res;
+	struct list_head lru_head; /* Protected by the resource lock */
 	uint32_t flags;
 	uint32_t format;
 	uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES];
@@ -102,9 +115,12 @@ struct vmw_surface {
 
 	/* TODO so far just a extra pointer */
 	struct vmw_cursor_snooper snooper;
+	struct ttm_buffer_object *backup;
+	struct vmw_surface_offset *offsets;
+	uint32_t backup_size;
 };
 
-struct vmw_fence_queue {
+struct vmw_marker_queue {
 	struct list_head head;
 	struct timespec lag;
 	struct timespec lag_time;
@@ -115,16 +131,12 @@ struct vmw_fifo_state {
 	unsigned long reserved_size;
 	__le32 *dynamic_buffer;
 	__le32 *static_buffer;
-	__le32 *last_buffer;
-	uint32_t last_data_size;
-	uint32_t last_buffer_size;
-	bool last_buffer_add;
 	unsigned long static_buffer_size;
 	bool using_bounce_buffer;
 	uint32_t capabilities;
 	struct mutex fifo_mutex;
 	struct rw_semaphore rwsem;
-	struct vmw_fence_queue fence_queue;
+	struct vmw_marker_queue marker_queue;
 };
 
 struct vmw_relocation {
@@ -136,6 +148,8 @@ struct vmw_sw_context{
 	struct ida bo_list;
 	uint32_t last_cid;
 	bool cid_valid;
+	bool kernel; /**< is the called made from the kernel */
+	struct vmw_resource *cur_ctx;
 	uint32_t last_sid;
 	uint32_t sid_translation;
 	bool sid_valid;
@@ -143,8 +157,16 @@ struct vmw_sw_context{
 	struct list_head validate_nodes;
 	struct vmw_relocation relocs[VMWGFX_MAX_RELOCATIONS];
 	uint32_t cur_reloc;
-	struct ttm_validate_buffer val_bufs[VMWGFX_MAX_GMRS];
+	struct ttm_validate_buffer val_bufs[VMWGFX_MAX_VALIDATIONS];
 	uint32_t cur_val_buf;
+	uint32_t *cmd_bounce;
+	uint32_t cmd_bounce_size;
+	struct list_head resource_list;
+	uint32_t fence_flags;
+	struct list_head query_list;
+	struct ttm_buffer_object *cur_query_bo;
+	uint32_t cur_query_cid;
+	bool query_cid_valid;
 };
 
 struct vmw_legacy_display;
@@ -185,6 +207,8 @@ struct vmw_private {
 	uint32_t capabilities;
 	uint32_t max_gmr_descriptors;
 	uint32_t max_gmr_ids;
+	uint32_t max_gmr_pages;
+	uint32_t memory_size;
 	bool has_gmr;
 	struct mutex hw_mutex;
 
@@ -195,12 +219,7 @@ struct vmw_private {
 	struct vmw_vga_topology_state vga_save[VMWGFX_MAX_DISPLAYS];
 	uint32_t vga_width;
 	uint32_t vga_height;
-	uint32_t vga_depth;
 	uint32_t vga_bpp;
-	uint32_t vga_pseudo;
-	uint32_t vga_red_mask;
-	uint32_t vga_green_mask;
-	uint32_t vga_blue_mask;
 	uint32_t vga_bpl;
 	uint32_t vga_pitchlock;
 
@@ -212,6 +231,7 @@ struct vmw_private {
 
 	void *fb_info;
 	struct vmw_legacy_display *ldu_priv;
+	struct vmw_screen_object_display *sou_priv;
 	struct vmw_overlay *overlay_priv;
 
 	/*
@@ -240,13 +260,16 @@ struct vmw_private {
 	 * Fencing and IRQs.
 	 */
 
-	atomic_t fence_seq;
+	atomic_t marker_seq;
 	wait_queue_head_t fence_queue;
 	wait_queue_head_t fifo_queue;
-	atomic_t fence_queue_waiters;
+	int fence_queue_waiters; /* Protected by hw_mutex */
+	int goal_queue_waiters; /* Protected by hw_mutex */
 	atomic_t fifo_queue_waiters;
-	uint32_t last_read_sequence;
+	uint32_t last_read_seqno;
 	spinlock_t irq_lock;
+	struct vmw_fence_manager *fman;
+	uint32_t irq_mask;
 
 	/*
 	 * Device state
@@ -285,6 +308,26 @@ struct vmw_private {
 
 	struct mutex release_mutex;
 	uint32_t num_3d_resources;
+
+	/*
+	 * Query processing. These members
+	 * are protected by the cmdbuf mutex.
+	 */
+
+	struct ttm_buffer_object *dummy_query_bo;
+	struct ttm_buffer_object *pinned_bo;
+	uint32_t query_cid;
+	bool dummy_query_bo_pinned;
+
+	/*
+	 * Surface swapping. The "surface_lru" list is protected by the
+	 * resource lock in order to be able to destroy a surface and take
+	 * it off the lru atomically. "used_memory_size" is currently
+	 * protected by the cmdbuf mutex for simplicity.
+	 */
+
+	struct list_head surface_lru;
+	uint32_t used_memory_size;
 };
 
 static inline struct vmw_private *vmw_priv(struct drm_device *dev)
@@ -319,8 +362,8 @@ static inline uint32_t vmw_read(struct vmw_private *dev_priv,
 	return val;
 }
 
-int vmw_3d_resource_inc(struct vmw_private *dev_priv);
-void vmw_3d_resource_dec(struct vmw_private *dev_priv);
+int vmw_3d_resource_inc(struct vmw_private *dev_priv, bool unhide_svga);
+void vmw_3d_resource_dec(struct vmw_private *dev_priv, bool hide_svga);
 
 /**
  * GMR utilities - vmwgfx_gmr.c
@@ -345,7 +388,8 @@ extern int vmw_context_define_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file_priv);
 extern int vmw_context_check(struct vmw_private *dev_priv,
 			     struct ttm_object_file *tfile,
-			     int id);
+			     int id,
+			     struct vmw_resource **p_res);
 extern void vmw_surface_res_free(struct vmw_resource *res);
 extern int vmw_surface_init(struct vmw_private *dev_priv,
 			    struct vmw_surface *srf,
@@ -363,6 +407,8 @@ extern int vmw_surface_reference_ioctl(struct drm_device *dev, void *data,
 extern int vmw_surface_check(struct vmw_private *dev_priv,
 			     struct ttm_object_file *tfile,
 			     uint32_t handle, int *id);
+extern int vmw_surface_validate(struct vmw_private *dev_priv,
+				struct vmw_surface *srf);
 extern void vmw_dmabuf_bo_free(struct ttm_buffer_object *bo);
 extern int vmw_dmabuf_init(struct vmw_private *dev_priv,
 			   struct vmw_dma_buffer *vmw_bo,
@@ -378,10 +424,6 @@ extern uint32_t vmw_dmabuf_validate_node(struct ttm_buffer_object *bo,
 extern void vmw_dmabuf_validate_clear(struct ttm_buffer_object *bo);
 extern int vmw_user_dmabuf_lookup(struct ttm_object_file *tfile,
 				  uint32_t id, struct vmw_dma_buffer **out);
-extern int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv,
-				       struct vmw_dma_buffer *bo);
-extern int vmw_dmabuf_from_vram(struct vmw_private *vmw_priv,
-				struct vmw_dma_buffer *bo);
 extern int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
 				  struct drm_file *file_priv);
 extern int vmw_stream_unref_ioctl(struct drm_device *dev, void *data,
@@ -390,7 +432,30 @@ extern int vmw_user_stream_lookup(struct vmw_private *dev_priv,
 				  struct ttm_object_file *tfile,
 				  uint32_t *inout_id,
 				  struct vmw_resource **out);
+extern void vmw_resource_unreserve(struct list_head *list);
 
+/**
+ * DMA buffer helper routines - vmwgfx_dmabuf.c
+ */
+extern int vmw_dmabuf_to_placement(struct vmw_private *vmw_priv,
+				   struct vmw_dma_buffer *bo,
+				   struct ttm_placement *placement,
+				   bool interruptible);
+extern int vmw_dmabuf_to_vram(struct vmw_private *dev_priv,
+			      struct vmw_dma_buffer *buf,
+			      bool pin, bool interruptible);
+extern int vmw_dmabuf_to_vram_or_gmr(struct vmw_private *dev_priv,
+				     struct vmw_dma_buffer *buf,
+				     bool pin, bool interruptible);
+extern int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv,
+				       struct vmw_dma_buffer *bo,
+				       bool pin, bool interruptible);
+extern int vmw_dmabuf_unpin(struct vmw_private *vmw_priv,
+			    struct vmw_dma_buffer *bo,
+			    bool interruptible);
+extern void vmw_bo_get_guest_ptr(const struct ttm_buffer_object *buf,
+				 SVGAGuestPtr *ptr);
+extern void vmw_bo_pin(struct ttm_buffer_object *bo, bool pin);
 
 /**
  * Misc Ioctl functionality - vmwgfx_ioctl.c
@@ -398,8 +463,16 @@ extern int vmw_user_stream_lookup(struct vmw_private *dev_priv,
 
 extern int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *file_priv);
-extern int vmw_fifo_debug_ioctl(struct drm_device *dev, void *data,
+extern int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
+extern int vmw_present_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv);
+extern int vmw_present_readback_ioctl(struct drm_device *dev, void *data,
+				      struct drm_file *file_priv);
+extern unsigned int vmw_fops_poll(struct file *filp,
+				  struct poll_table_struct *wait);
+extern ssize_t vmw_fops_read(struct file *filp, char __user *buffer,
+			     size_t count, loff_t *offset);
 
 /**
  * Fifo utilities - vmwgfx_fifo.c
@@ -412,11 +485,12 @@ extern void vmw_fifo_release(struct vmw_private *dev_priv,
 extern void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes);
 extern void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes);
 extern int vmw_fifo_send_fence(struct vmw_private *dev_priv,
-			       uint32_t *sequence);
+			       uint32_t *seqno);
 extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason);
-extern int vmw_fifo_mmap(struct file *filp, struct vm_area_struct *vma);
 extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv);
 extern bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv);
+extern int vmw_fifo_emit_dummy_query(struct vmw_private *dev_priv,
+				     uint32_t cid);
 
 /**
  * TTM glue - vmwgfx_ttm_glue.c
@@ -434,7 +508,10 @@ extern struct ttm_placement vmw_vram_placement;
 extern struct ttm_placement vmw_vram_ne_placement;
 extern struct ttm_placement vmw_vram_sys_placement;
 extern struct ttm_placement vmw_vram_gmr_placement;
+extern struct ttm_placement vmw_vram_gmr_ne_placement;
 extern struct ttm_placement vmw_sys_placement;
+extern struct ttm_placement vmw_evictable_placement;
+extern struct ttm_placement vmw_srf_placement;
 extern struct ttm_bo_driver vmw_bo_driver;
 extern int vmw_dma_quiescent(struct drm_device *dev);
 
@@ -444,45 +521,70 @@ extern int vmw_dma_quiescent(struct drm_device *dev);
 
 extern int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv);
+extern int vmw_execbuf_process(struct drm_file *file_priv,
+			       struct vmw_private *dev_priv,
+			       void __user *user_commands,
+			       void *kernel_commands,
+			       uint32_t command_size,
+			       uint64_t throttle_us,
+			       struct drm_vmw_fence_rep __user
+			       *user_fence_rep);
+
+extern void
+vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
+			      bool only_on_cid_match, uint32_t cid);
+
+extern int vmw_execbuf_fence_commands(struct drm_file *file_priv,
+				      struct vmw_private *dev_priv,
+				      struct vmw_fence_obj **p_fence,
+				      uint32_t *p_handle);
+extern void vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
+					struct vmw_fpriv *vmw_fp,
+					int ret,
+					struct drm_vmw_fence_rep __user
+					*user_fence_rep,
+					struct vmw_fence_obj *fence,
+					uint32_t fence_handle);
 
 /**
  * IRQs and wating - vmwgfx_irq.c
  */
 
 extern irqreturn_t vmw_irq_handler(DRM_IRQ_ARGS);
-extern int vmw_wait_fence(struct vmw_private *dev_priv, bool lazy,
-			  uint32_t sequence, bool interruptible,
-			  unsigned long timeout);
+extern int vmw_wait_seqno(struct vmw_private *dev_priv, bool lazy,
+			     uint32_t seqno, bool interruptible,
+			     unsigned long timeout);
 extern void vmw_irq_preinstall(struct drm_device *dev);
 extern int vmw_irq_postinstall(struct drm_device *dev);
 extern void vmw_irq_uninstall(struct drm_device *dev);
-extern bool vmw_fence_signaled(struct vmw_private *dev_priv,
-			       uint32_t sequence);
-extern int vmw_fence_wait_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv);
+extern bool vmw_seqno_passed(struct vmw_private *dev_priv,
+				uint32_t seqno);
 extern int vmw_fallback_wait(struct vmw_private *dev_priv,
 			     bool lazy,
 			     bool fifo_idle,
-			     uint32_t sequence,
+			     uint32_t seqno,
 			     bool interruptible,
 			     unsigned long timeout);
-extern void vmw_update_sequence(struct vmw_private *dev_priv,
+extern void vmw_update_seqno(struct vmw_private *dev_priv,
 				struct vmw_fifo_state *fifo_state);
-
+extern void vmw_seqno_waiter_add(struct vmw_private *dev_priv);
+extern void vmw_seqno_waiter_remove(struct vmw_private *dev_priv);
+extern void vmw_goal_waiter_add(struct vmw_private *dev_priv);
+extern void vmw_goal_waiter_remove(struct vmw_private *dev_priv);
 
 /**
- * Rudimentary fence objects currently used only for throttling -
- * vmwgfx_fence.c
+ * Rudimentary fence-like objects currently used only for throttling -
+ * vmwgfx_marker.c
  */
 
-extern void vmw_fence_queue_init(struct vmw_fence_queue *queue);
-extern void vmw_fence_queue_takedown(struct vmw_fence_queue *queue);
-extern int vmw_fence_push(struct vmw_fence_queue *queue,
-			  uint32_t sequence);
-extern int vmw_fence_pull(struct vmw_fence_queue *queue,
-			  uint32_t signaled_sequence);
+extern void vmw_marker_queue_init(struct vmw_marker_queue *queue);
+extern void vmw_marker_queue_takedown(struct vmw_marker_queue *queue);
+extern int vmw_marker_push(struct vmw_marker_queue *queue,
+			  uint32_t seqno);
+extern int vmw_marker_pull(struct vmw_marker_queue *queue,
+			  uint32_t signaled_seqno);
 extern int vmw_wait_lag(struct vmw_private *dev_priv,
-			struct vmw_fence_queue *queue, uint32_t us);
+			struct vmw_marker_queue *queue, uint32_t us);
 
 /**
  * Kernel framebuffer - vmwgfx_fb.c
@@ -508,16 +610,29 @@ void vmw_kms_cursor_snoop(struct vmw_surface *srf,
 			  struct ttm_object_file *tfile,
 			  struct ttm_buffer_object *bo,
 			  SVGA3dCmdHeader *header);
-void vmw_kms_write_svga(struct vmw_private *vmw_priv,
-			unsigned width, unsigned height, unsigned pitch,
-			unsigned bbp, unsigned depth);
-int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv);
+int vmw_kms_write_svga(struct vmw_private *vmw_priv,
+		       unsigned width, unsigned height, unsigned pitch,
+		       unsigned bpp, unsigned depth);
 void vmw_kms_idle_workqueues(struct vmw_master *vmaster);
 bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv,
 				uint32_t pitch,
 				uint32_t height);
 u32 vmw_get_vblank_counter(struct drm_device *dev, int crtc);
+int vmw_enable_vblank(struct drm_device *dev, int crtc);
+void vmw_disable_vblank(struct drm_device *dev, int crtc);
+int vmw_kms_present(struct vmw_private *dev_priv,
+		    struct drm_file *file_priv,
+		    struct vmw_framebuffer *vfb,
+		    struct vmw_surface *surface,
+		    uint32_t sid, int32_t destX, int32_t destY,
+		    struct drm_vmw_rect *clips,
+		    uint32_t num_clips);
+int vmw_kms_readback(struct vmw_private *dev_priv,
+		     struct drm_file *file_priv,
+		     struct vmw_framebuffer *vfb,
+		     struct drm_vmw_fence_rep __user *user_fence_rep,
+		     struct drm_vmw_rect *clips,
+		     uint32_t num_clips);
 
 /**
  * Overlay control - vmwgfx_overlay.c
@@ -576,4 +691,8 @@ static inline struct vmw_dma_buffer *vmw_dmabuf_reference(struct vmw_dma_buffer
 	return NULL;
 }
 
+static inline struct ttm_mem_global *vmw_mem_glob(struct vmw_private *dev_priv)
+{
+	return (struct ttm_mem_global *) dev_priv->mem_global_ref.object;
+}
 #endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 41b95ed6dbcd..40932fbdac0f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -44,10 +44,71 @@ static int vmw_cmd_ok(struct vmw_private *dev_priv,
 	return 0;
 }
 
+static void vmw_resource_to_validate_list(struct vmw_sw_context *sw_context,
+					  struct vmw_resource **p_res)
+{
+	struct vmw_resource *res = *p_res;
+
+	if (list_empty(&res->validate_head)) {
+		list_add_tail(&res->validate_head, &sw_context->resource_list);
+		*p_res = NULL;
+	} else
+		vmw_resource_unreference(p_res);
+}
+
+/**
+ * vmw_bo_to_validate_list - add a bo to a validate list
+ *
+ * @sw_context: The software context used for this command submission batch.
+ * @bo: The buffer object to add.
+ * @fence_flags: Fence flags to be or'ed with any other fence flags for
+ * this buffer on this submission batch.
+ * @p_val_node: If non-NULL Will be updated with the validate node number
+ * on return.
+ *
+ * Returns -EINVAL if the limit of number of buffer objects per command
+ * submission is reached.
+ */
+static int vmw_bo_to_validate_list(struct vmw_sw_context *sw_context,
+				   struct ttm_buffer_object *bo,
+				   uint32_t fence_flags,
+				   uint32_t *p_val_node)
+{
+	uint32_t val_node;
+	struct ttm_validate_buffer *val_buf;
+
+	val_node = vmw_dmabuf_validate_node(bo, sw_context->cur_val_buf);
+
+	if (unlikely(val_node >= VMWGFX_MAX_VALIDATIONS)) {
+		DRM_ERROR("Max number of DMA buffers per submission"
+			  " exceeded.\n");
+		return -EINVAL;
+	}
+
+	val_buf = &sw_context->val_bufs[val_node];
+	if (unlikely(val_node == sw_context->cur_val_buf)) {
+		val_buf->new_sync_obj_arg = NULL;
+		val_buf->bo = ttm_bo_reference(bo);
+		list_add_tail(&val_buf->head, &sw_context->validate_nodes);
+		++sw_context->cur_val_buf;
+	}
+
+	val_buf->new_sync_obj_arg = (void *)
+		((unsigned long) val_buf->new_sync_obj_arg | fence_flags);
+	sw_context->fence_flags |= fence_flags;
+
+	if (p_val_node)
+		*p_val_node = val_node;
+
+	return 0;
+}
+
 static int vmw_cmd_cid_check(struct vmw_private *dev_priv,
 			     struct vmw_sw_context *sw_context,
 			     SVGA3dCmdHeader *header)
 {
+	struct vmw_resource *ctx;
+
 	struct vmw_cid_cmd {
 		SVGA3dCmdHeader header;
 		__le32 cid;
@@ -58,7 +119,8 @@ static int vmw_cmd_cid_check(struct vmw_private *dev_priv,
 	if (likely(sw_context->cid_valid && cmd->cid == sw_context->last_cid))
 		return 0;
 
-	ret = vmw_context_check(dev_priv, sw_context->tfile, cmd->cid);
+	ret = vmw_context_check(dev_priv, sw_context->tfile, cmd->cid,
+				&ctx);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Could not find or use context %u\n",
 			  (unsigned) cmd->cid);
@@ -67,6 +129,8 @@ static int vmw_cmd_cid_check(struct vmw_private *dev_priv,
 
 	sw_context->last_cid = cmd->cid;
 	sw_context->cid_valid = true;
+	sw_context->cur_ctx = ctx;
+	vmw_resource_to_validate_list(sw_context, &ctx);
 
 	return 0;
 }
@@ -75,29 +139,45 @@ static int vmw_cmd_sid_check(struct vmw_private *dev_priv,
 			     struct vmw_sw_context *sw_context,
 			     uint32_t *sid)
 {
+	struct vmw_surface *srf;
+	int ret;
+	struct vmw_resource *res;
+
 	if (*sid == SVGA3D_INVALID_ID)
 		return 0;
 
-	if (unlikely((!sw_context->sid_valid  ||
-		      *sid != sw_context->last_sid))) {
-		int real_id;
-		int ret = vmw_surface_check(dev_priv, sw_context->tfile,
-					    *sid, &real_id);
+	if (likely((sw_context->sid_valid  &&
+		      *sid == sw_context->last_sid))) {
+		*sid = sw_context->sid_translation;
+		return 0;
+	}
 
-		if (unlikely(ret != 0)) {
-			DRM_ERROR("Could ot find or use surface 0x%08x "
-				  "address 0x%08lx\n",
-				  (unsigned int) *sid,
-				  (unsigned long) sid);
-			return ret;
-		}
+	ret = vmw_user_surface_lookup_handle(dev_priv,
+					     sw_context->tfile,
+					     *sid, &srf);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Could ot find or use surface 0x%08x "
+			  "address 0x%08lx\n",
+			  (unsigned int) *sid,
+			  (unsigned long) sid);
+		return ret;
+	}
 
-		sw_context->last_sid = *sid;
-		sw_context->sid_valid = true;
-		*sid = real_id;
-		sw_context->sid_translation = real_id;
-	} else
-		*sid = sw_context->sid_translation;
+	ret = vmw_surface_validate(dev_priv, srf);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Could not validate surface.\n");
+		vmw_surface_unreference(&srf);
+		return ret;
+	}
+
+	sw_context->last_sid = *sid;
+	sw_context->sid_valid = true;
+	sw_context->sid_translation = srf->res.id;
+	*sid = sw_context->sid_translation;
+
+	res = &srf->res;
+	vmw_resource_to_validate_list(sw_context, &res);
 
 	return 0;
 }
@@ -166,6 +246,12 @@ static int vmw_cmd_blt_surf_screen_check(struct vmw_private *dev_priv,
 	} *cmd;
 
 	cmd = container_of(header, struct vmw_sid_cmd, header);
+
+	if (unlikely(!sw_context->kernel)) {
+		DRM_ERROR("Kernel only SVGA3d command: %u.\n", cmd->header.id);
+		return -EPERM;
+	}
+
 	return vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.srcImage.sid);
 }
 
@@ -178,10 +264,179 @@ static int vmw_cmd_present_check(struct vmw_private *dev_priv,
 		SVGA3dCmdPresent body;
 	} *cmd;
 
+
 	cmd = container_of(header, struct vmw_sid_cmd, header);
+
+	if (unlikely(!sw_context->kernel)) {
+		DRM_ERROR("Kernel only SVGA3d command: %u.\n", cmd->header.id);
+		return -EPERM;
+	}
+
 	return vmw_cmd_sid_check(dev_priv, sw_context, &cmd->body.sid);
 }
 
+/**
+ * vmw_query_bo_switch_prepare - Prepare to switch pinned buffer for queries.
+ *
+ * @dev_priv: The device private structure.
+ * @cid: The hardware context for the next query.
+ * @new_query_bo: The new buffer holding query results.
+ * @sw_context: The software context used for this command submission.
+ *
+ * This function checks whether @new_query_bo is suitable for holding
+ * query results, and if another buffer currently is pinned for query
+ * results. If so, the function prepares the state of @sw_context for
+ * switching pinned buffers after successful submission of the current
+ * command batch. It also checks whether we're using a new query context.
+ * In that case, it makes sure we emit a query barrier for the old
+ * context before the current query buffer is fenced.
+ */
+static int vmw_query_bo_switch_prepare(struct vmw_private *dev_priv,
+				       uint32_t cid,
+				       struct ttm_buffer_object *new_query_bo,
+				       struct vmw_sw_context *sw_context)
+{
+	int ret;
+	bool add_cid = false;
+	uint32_t cid_to_add;
+
+	if (unlikely(new_query_bo != sw_context->cur_query_bo)) {
+
+		if (unlikely(new_query_bo->num_pages > 4)) {
+			DRM_ERROR("Query buffer too large.\n");
+			return -EINVAL;
+		}
+
+		if (unlikely(sw_context->cur_query_bo != NULL)) {
+			BUG_ON(!sw_context->query_cid_valid);
+			add_cid = true;
+			cid_to_add = sw_context->cur_query_cid;
+			ret = vmw_bo_to_validate_list(sw_context,
+						      sw_context->cur_query_bo,
+						      DRM_VMW_FENCE_FLAG_EXEC,
+						      NULL);
+			if (unlikely(ret != 0))
+				return ret;
+		}
+		sw_context->cur_query_bo = new_query_bo;
+
+		ret = vmw_bo_to_validate_list(sw_context,
+					      dev_priv->dummy_query_bo,
+					      DRM_VMW_FENCE_FLAG_EXEC,
+					      NULL);
+		if (unlikely(ret != 0))
+			return ret;
+
+	}
+
+	if (unlikely(cid != sw_context->cur_query_cid &&
+		     sw_context->query_cid_valid)) {
+		add_cid = true;
+		cid_to_add = sw_context->cur_query_cid;
+	}
+
+	sw_context->cur_query_cid = cid;
+	sw_context->query_cid_valid = true;
+
+	if (add_cid) {
+		struct vmw_resource *ctx = sw_context->cur_ctx;
+
+		if (list_empty(&ctx->query_head))
+			list_add_tail(&ctx->query_head,
+				      &sw_context->query_list);
+		ret = vmw_bo_to_validate_list(sw_context,
+					      dev_priv->dummy_query_bo,
+					      DRM_VMW_FENCE_FLAG_EXEC,
+					      NULL);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+	return 0;
+}
+
+
+/**
+ * vmw_query_bo_switch_commit - Finalize switching pinned query buffer
+ *
+ * @dev_priv: The device private structure.
+ * @sw_context: The software context used for this command submission batch.
+ *
+ * This function will check if we're switching query buffers, and will then,
+ * if no other query waits are issued this command submission batch,
+ * issue a dummy occlusion query wait used as a query barrier. When the fence
+ * object following that query wait has signaled, we are sure that all
+ * preseding queries have finished, and the old query buffer can be unpinned.
+ * However, since both the new query buffer and the old one are fenced with
+ * that fence, we can do an asynchronus unpin now, and be sure that the
+ * old query buffer won't be moved until the fence has signaled.
+ *
+ * As mentioned above, both the new - and old query buffers need to be fenced
+ * using a sequence emitted *after* calling this function.
+ */
+static void vmw_query_bo_switch_commit(struct vmw_private *dev_priv,
+				     struct vmw_sw_context *sw_context)
+{
+
+	struct vmw_resource *ctx, *next_ctx;
+	int ret;
+
+	/*
+	 * The validate list should still hold references to all
+	 * contexts here.
+	 */
+
+	list_for_each_entry_safe(ctx, next_ctx, &sw_context->query_list,
+				 query_head) {
+		list_del_init(&ctx->query_head);
+
+		BUG_ON(list_empty(&ctx->validate_head));
+
+		ret = vmw_fifo_emit_dummy_query(dev_priv, ctx->id);
+
+		if (unlikely(ret != 0))
+			DRM_ERROR("Out of fifo space for dummy query.\n");
+	}
+
+	if (dev_priv->pinned_bo != sw_context->cur_query_bo) {
+		if (dev_priv->pinned_bo) {
+			vmw_bo_pin(dev_priv->pinned_bo, false);
+			ttm_bo_unref(&dev_priv->pinned_bo);
+		}
+
+		vmw_bo_pin(sw_context->cur_query_bo, true);
+
+		/*
+		 * We pin also the dummy_query_bo buffer so that we
+		 * don't need to validate it when emitting
+		 * dummy queries in context destroy paths.
+		 */
+
+		vmw_bo_pin(dev_priv->dummy_query_bo, true);
+		dev_priv->dummy_query_bo_pinned = true;
+
+		dev_priv->query_cid = sw_context->cur_query_cid;
+		dev_priv->pinned_bo =
+			ttm_bo_reference(sw_context->cur_query_bo);
+	}
+}
+
+/**
+ * vmw_query_switch_backoff - clear query barrier list
+ * @sw_context: The sw context used for this submission batch.
+ *
+ * This function is used as part of an error path, where a previously
+ * set up list of query barriers needs to be cleared.
+ *
+ */
+static void vmw_query_switch_backoff(struct vmw_sw_context *sw_context)
+{
+	struct list_head *list, *next;
+
+	list_for_each_safe(list, next, &sw_context->query_list) {
+		list_del_init(list);
+	}
+}
+
 static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 				   struct vmw_sw_context *sw_context,
 				   SVGAGuestPtr *ptr,
@@ -191,8 +446,6 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 	struct ttm_buffer_object *bo;
 	uint32_t handle = ptr->gmrId;
 	struct vmw_relocation *reloc;
-	uint32_t cur_validate_node;
-	struct ttm_validate_buffer *val_buf;
 	int ret;
 
 	ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo);
@@ -212,22 +465,11 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 	reloc = &sw_context->relocs[sw_context->cur_reloc++];
 	reloc->location = ptr;
 
-	cur_validate_node = vmw_dmabuf_validate_node(bo, sw_context->cur_val_buf);
-	if (unlikely(cur_validate_node >= VMWGFX_MAX_GMRS)) {
-		DRM_ERROR("Max number of DMA buffers per submission"
-			  " exceeded.\n");
-		ret = -EINVAL;
+	ret = vmw_bo_to_validate_list(sw_context, bo, DRM_VMW_FENCE_FLAG_EXEC,
+				      &reloc->index);
+	if (unlikely(ret != 0))
 		goto out_no_reloc;
-	}
 
-	reloc->index = cur_validate_node;
-	if (unlikely(cur_validate_node == sw_context->cur_val_buf)) {
-		val_buf = &sw_context->val_bufs[cur_validate_node];
-		val_buf->bo = ttm_bo_reference(bo);
-		val_buf->new_sync_obj_arg = (void *) dev_priv;
-		list_add_tail(&val_buf->head, &sw_context->validate_nodes);
-		++sw_context->cur_val_buf;
-	}
 	*vmw_bo_p = vmw_bo;
 	return 0;
 
@@ -259,8 +501,11 @@ static int vmw_cmd_end_query(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		return ret;
 
+	ret = vmw_query_bo_switch_prepare(dev_priv, cmd->q.cid,
+					  &vmw_bo->base, sw_context);
+
 	vmw_dmabuf_unreference(&vmw_bo);
-	return 0;
+	return ret;
 }
 
 static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
@@ -273,6 +518,7 @@ static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
 		SVGA3dCmdWaitForQuery q;
 	} *cmd;
 	int ret;
+	struct vmw_resource *ctx;
 
 	cmd = container_of(header, struct vmw_query_cmd, header);
 	ret = vmw_cmd_cid_check(dev_priv, sw_context, header);
@@ -286,10 +532,19 @@ static int vmw_cmd_wait_query(struct vmw_private *dev_priv,
 		return ret;
 
 	vmw_dmabuf_unreference(&vmw_bo);
+
+	/*
+	 * This wait will act as a barrier for previous waits for this
+	 * context.
+	 */
+
+	ctx = sw_context->cur_ctx;
+	if (!list_empty(&ctx->query_head))
+		list_del_init(&ctx->query_head);
+
 	return 0;
 }
 
-
 static int vmw_cmd_dma(struct vmw_private *dev_priv,
 		       struct vmw_sw_context *sw_context,
 		       SVGA3dCmdHeader *header)
@@ -302,6 +557,7 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 		SVGA3dCmdSurfaceDMA dma;
 	} *cmd;
 	int ret;
+	struct vmw_resource *res;
 
 	cmd = container_of(header, struct vmw_dma_cmd, header);
 	ret = vmw_translate_guest_ptr(dev_priv, sw_context,
@@ -318,18 +574,28 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 		goto out_no_reloc;
 	}
 
-	/**
+	ret = vmw_surface_validate(dev_priv, srf);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Culd not validate surface.\n");
+		goto out_no_validate;
+	}
+
+	/*
 	 * Patch command stream with device SID.
 	 */
-
 	cmd->dma.host.sid = srf->res.id;
 	vmw_kms_cursor_snoop(srf, sw_context->tfile, bo, header);
-	/**
-	 * FIXME: May deadlock here when called from the
-	 * command parsing code.
-	 */
-	vmw_surface_unreference(&srf);
 
+	vmw_dmabuf_unreference(&vmw_bo);
+
+	res = &srf->res;
+	vmw_resource_to_validate_list(sw_context, &res);
+
+	return 0;
+
+out_no_validate:
+	vmw_surface_unreference(&srf);
 out_no_reloc:
 	vmw_dmabuf_unreference(&vmw_bo);
 	return ret;
@@ -419,6 +685,71 @@ static int vmw_cmd_tex_state(struct vmw_private *dev_priv,
 	return 0;
 }
 
+static int vmw_cmd_check_define_gmrfb(struct vmw_private *dev_priv,
+				      struct vmw_sw_context *sw_context,
+				      void *buf)
+{
+	struct vmw_dma_buffer *vmw_bo;
+	int ret;
+
+	struct {
+		uint32_t header;
+		SVGAFifoCmdDefineGMRFB body;
+	} *cmd = buf;
+
+	ret = vmw_translate_guest_ptr(dev_priv, sw_context,
+				      &cmd->body.ptr,
+				      &vmw_bo);
+	if (unlikely(ret != 0))
+		return ret;
+
+	vmw_dmabuf_unreference(&vmw_bo);
+
+	return ret;
+}
+
+static int vmw_cmd_check_not_3d(struct vmw_private *dev_priv,
+				struct vmw_sw_context *sw_context,
+				void *buf, uint32_t *size)
+{
+	uint32_t size_remaining = *size;
+	uint32_t cmd_id;
+
+	cmd_id = le32_to_cpu(((uint32_t *)buf)[0]);
+	switch (cmd_id) {
+	case SVGA_CMD_UPDATE:
+		*size = sizeof(uint32_t) + sizeof(SVGAFifoCmdUpdate);
+		break;
+	case SVGA_CMD_DEFINE_GMRFB:
+		*size = sizeof(uint32_t) + sizeof(SVGAFifoCmdDefineGMRFB);
+		break;
+	case SVGA_CMD_BLIT_GMRFB_TO_SCREEN:
+		*size = sizeof(uint32_t) + sizeof(SVGAFifoCmdBlitGMRFBToScreen);
+		break;
+	case SVGA_CMD_BLIT_SCREEN_TO_GMRFB:
+		*size = sizeof(uint32_t) + sizeof(SVGAFifoCmdBlitGMRFBToScreen);
+		break;
+	default:
+		DRM_ERROR("Unsupported SVGA command: %u.\n", cmd_id);
+		return -EINVAL;
+	}
+
+	if (*size > size_remaining) {
+		DRM_ERROR("Invalid SVGA command (size mismatch):"
+			  " %u.\n", cmd_id);
+		return -EINVAL;
+	}
+
+	if (unlikely(!sw_context->kernel)) {
+		DRM_ERROR("Kernel only SVGA command: %u.\n", cmd_id);
+		return -EPERM;
+	}
+
+	if (cmd_id == SVGA_CMD_DEFINE_GMRFB)
+		return vmw_cmd_check_define_gmrfb(dev_priv, sw_context, buf);
+
+	return 0;
+}
 
 typedef int (*vmw_cmd_func) (struct vmw_private *,
 			     struct vmw_sw_context *,
@@ -471,11 +802,11 @@ static int vmw_cmd_check(struct vmw_private *dev_priv,
 	SVGA3dCmdHeader *header = (SVGA3dCmdHeader *) buf;
 	int ret;
 
-	cmd_id = ((uint32_t *)buf)[0];
-	if (cmd_id == SVGA_CMD_UPDATE) {
-		*size = 5 << 2;
-		return 0;
-	}
+	cmd_id = le32_to_cpu(((uint32_t *)buf)[0]);
+	/* Handle any none 3D commands */
+	if (unlikely(cmd_id < SVGA_CMD_MAX))
+		return vmw_cmd_check_not_3d(dev_priv, sw_context, buf, size);
+
 
 	cmd_id = le32_to_cpu(header->id);
 	*size = le32_to_cpu(header->size) + sizeof(SVGA3dCmdHeader);
@@ -500,7 +831,8 @@ out_err:
 
 static int vmw_cmd_check_all(struct vmw_private *dev_priv,
 			     struct vmw_sw_context *sw_context,
-			     void *buf, uint32_t size)
+			     void *buf,
+			     uint32_t size)
 {
 	int32_t cur_size = size;
 	int ret;
@@ -550,7 +882,11 @@ static void vmw_apply_relocations(struct vmw_sw_context *sw_context)
 static void vmw_clear_validations(struct vmw_sw_context *sw_context)
 {
 	struct ttm_validate_buffer *entry, *next;
+	struct vmw_resource *res, *res_next;
 
+	/*
+	 * Drop references to DMA buffers held during command submission.
+	 */
 	list_for_each_entry_safe(entry, next, &sw_context->validate_nodes,
 				 head) {
 		list_del(&entry->head);
@@ -559,6 +895,16 @@ static void vmw_clear_validations(struct vmw_sw_context *sw_context)
 		sw_context->cur_val_buf--;
 	}
 	BUG_ON(sw_context->cur_val_buf != 0);
+
+	/*
+	 * Drop references to resources held during command submission.
+	 */
+	vmw_resource_unreserve(&sw_context->resource_list);
+	list_for_each_entry_safe(res, res_next, &sw_context->resource_list,
+				 validate_head) {
+		list_del_init(&res->validate_head);
+		vmw_resource_unreference(&res);
+	}
 }
 
 static int vmw_validate_single_buffer(struct vmw_private *dev_priv,
@@ -566,6 +912,16 @@ static int vmw_validate_single_buffer(struct vmw_private *dev_priv,
 {
 	int ret;
 
+
+	/*
+	 * Don't validate pinned buffers.
+	 */
+
+	if (bo == dev_priv->pinned_bo ||
+	    (bo == dev_priv->dummy_query_bo &&
+	     dev_priv->dummy_query_bo_pinned))
+		return 0;
+
 	/**
 	 * Put BO in VRAM if there is space, otherwise as a GMR.
 	 * If there is no space in VRAM and GMR ids are all used up,
@@ -602,57 +958,208 @@ static int vmw_validate_buffers(struct vmw_private *dev_priv,
 	return 0;
 }
 
-int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
-		      struct drm_file *file_priv)
+static int vmw_resize_cmd_bounce(struct vmw_sw_context *sw_context,
+				 uint32_t size)
+{
+	if (likely(sw_context->cmd_bounce_size >= size))
+		return 0;
+
+	if (sw_context->cmd_bounce_size == 0)
+		sw_context->cmd_bounce_size = VMWGFX_CMD_BOUNCE_INIT_SIZE;
+
+	while (sw_context->cmd_bounce_size < size) {
+		sw_context->cmd_bounce_size =
+			PAGE_ALIGN(sw_context->cmd_bounce_size +
+				   (sw_context->cmd_bounce_size >> 1));
+	}
+
+	if (sw_context->cmd_bounce != NULL)
+		vfree(sw_context->cmd_bounce);
+
+	sw_context->cmd_bounce = vmalloc(sw_context->cmd_bounce_size);
+
+	if (sw_context->cmd_bounce == NULL) {
+		DRM_ERROR("Failed to allocate command bounce buffer.\n");
+		sw_context->cmd_bounce_size = 0;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * vmw_execbuf_fence_commands - create and submit a command stream fence
+ *
+ * Creates a fence object and submits a command stream marker.
+ * If this fails for some reason, We sync the fifo and return NULL.
+ * It is then safe to fence buffers with a NULL pointer.
+ *
+ * If @p_handle is not NULL @file_priv must also not be NULL. Creates
+ * a userspace handle if @p_handle is not NULL, otherwise not.
+ */
+
+int vmw_execbuf_fence_commands(struct drm_file *file_priv,
+			       struct vmw_private *dev_priv,
+			       struct vmw_fence_obj **p_fence,
+			       uint32_t *p_handle)
 {
-	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct drm_vmw_execbuf_arg *arg = (struct drm_vmw_execbuf_arg *)data;
-	struct drm_vmw_fence_rep fence_rep;
-	struct drm_vmw_fence_rep __user *user_fence_rep;
-	int ret;
-	void *user_cmd;
-	void *cmd;
 	uint32_t sequence;
-	struct vmw_sw_context *sw_context = &dev_priv->ctx;
-	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	int ret;
+	bool synced = false;
 
-	ret = ttm_read_lock(&vmaster->lock, true);
-	if (unlikely(ret != 0))
-		return ret;
+	/* p_handle implies file_priv. */
+	BUG_ON(p_handle != NULL && file_priv == NULL);
 
-	ret = mutex_lock_interruptible(&dev_priv->cmdbuf_mutex);
+	ret = vmw_fifo_send_fence(dev_priv, &sequence);
 	if (unlikely(ret != 0)) {
-		ret = -ERESTARTSYS;
-		goto out_no_cmd_mutex;
+		DRM_ERROR("Fence submission error. Syncing.\n");
+		synced = true;
 	}
 
-	cmd = vmw_fifo_reserve(dev_priv, arg->command_size);
-	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Failed reserving fifo space for commands.\n");
-		ret = -ENOMEM;
-		goto out_unlock;
+	if (p_handle != NULL)
+		ret = vmw_user_fence_create(file_priv, dev_priv->fman,
+					    sequence,
+					    DRM_VMW_FENCE_FLAG_EXEC,
+					    p_fence, p_handle);
+	else
+		ret = vmw_fence_create(dev_priv->fman, sequence,
+				       DRM_VMW_FENCE_FLAG_EXEC,
+				       p_fence);
+
+	if (unlikely(ret != 0 && !synced)) {
+		(void) vmw_fallback_wait(dev_priv, false, false,
+					 sequence, false,
+					 VMW_FENCE_WAIT_TIMEOUT);
+		*p_fence = NULL;
 	}
 
-	user_cmd = (void __user *)(unsigned long)arg->commands;
-	ret = copy_from_user(cmd, user_cmd, arg->command_size);
+	return 0;
+}
 
-	if (unlikely(ret != 0)) {
-		ret = -EFAULT;
-		DRM_ERROR("Failed copying commands.\n");
-		goto out_commit;
+/**
+ * vmw_execbuf_copy_fence_user - copy fence object information to
+ * user-space.
+ *
+ * @dev_priv: Pointer to a vmw_private struct.
+ * @vmw_fp: Pointer to the struct vmw_fpriv representing the calling file.
+ * @ret: Return value from fence object creation.
+ * @user_fence_rep: User space address of a struct drm_vmw_fence_rep to
+ * which the information should be copied.
+ * @fence: Pointer to the fenc object.
+ * @fence_handle: User-space fence handle.
+ *
+ * This function copies fence information to user-space. If copying fails,
+ * The user-space struct drm_vmw_fence_rep::error member is hopefully
+ * left untouched, and if it's preloaded with an -EFAULT by user-space,
+ * the error will hopefully be detected.
+ * Also if copying fails, user-space will be unable to signal the fence
+ * object so we wait for it immediately, and then unreference the
+ * user-space reference.
+ */
+void
+vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv,
+			    struct vmw_fpriv *vmw_fp,
+			    int ret,
+			    struct drm_vmw_fence_rep __user *user_fence_rep,
+			    struct vmw_fence_obj *fence,
+			    uint32_t fence_handle)
+{
+	struct drm_vmw_fence_rep fence_rep;
+
+	if (user_fence_rep == NULL)
+		return;
+
+	memset(&fence_rep, 0, sizeof(fence_rep));
+
+	fence_rep.error = ret;
+	if (ret == 0) {
+		BUG_ON(fence == NULL);
+
+		fence_rep.handle = fence_handle;
+		fence_rep.seqno = fence->seqno;
+		vmw_update_seqno(dev_priv, &dev_priv->fifo);
+		fence_rep.passed_seqno = dev_priv->last_read_seqno;
 	}
 
+	/*
+	 * copy_to_user errors will be detected by user space not
+	 * seeing fence_rep::error filled in. Typically
+	 * user-space would have pre-set that member to -EFAULT.
+	 */
+	ret = copy_to_user(user_fence_rep, &fence_rep,
+			   sizeof(fence_rep));
+
+	/*
+	 * User-space lost the fence object. We need to sync
+	 * and unreference the handle.
+	 */
+	if (unlikely(ret != 0) && (fence_rep.error == 0)) {
+		ttm_ref_object_base_unref(vmw_fp->tfile,
+					  fence_handle, TTM_REF_USAGE);
+		DRM_ERROR("Fence copy error. Syncing.\n");
+		(void) vmw_fence_obj_wait(fence, fence->signal_mask,
+					  false, false,
+					  VMW_FENCE_WAIT_TIMEOUT);
+	}
+}
+
+int vmw_execbuf_process(struct drm_file *file_priv,
+			struct vmw_private *dev_priv,
+			void __user *user_commands,
+			void *kernel_commands,
+			uint32_t command_size,
+			uint64_t throttle_us,
+			struct drm_vmw_fence_rep __user *user_fence_rep)
+{
+	struct vmw_sw_context *sw_context = &dev_priv->ctx;
+	struct vmw_fence_obj *fence;
+	uint32_t handle;
+	void *cmd;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev_priv->cmdbuf_mutex);
+	if (unlikely(ret != 0))
+		return -ERESTARTSYS;
+
+	if (kernel_commands == NULL) {
+		sw_context->kernel = false;
+
+		ret = vmw_resize_cmd_bounce(sw_context, command_size);
+		if (unlikely(ret != 0))
+			goto out_unlock;
+
+
+		ret = copy_from_user(sw_context->cmd_bounce,
+				     user_commands, command_size);
+
+		if (unlikely(ret != 0)) {
+			ret = -EFAULT;
+			DRM_ERROR("Failed copying commands.\n");
+			goto out_unlock;
+		}
+		kernel_commands = sw_context->cmd_bounce;
+	} else
+		sw_context->kernel = true;
+
 	sw_context->tfile = vmw_fpriv(file_priv)->tfile;
 	sw_context->cid_valid = false;
 	sw_context->sid_valid = false;
 	sw_context->cur_reloc = 0;
 	sw_context->cur_val_buf = 0;
+	sw_context->fence_flags = 0;
+	INIT_LIST_HEAD(&sw_context->query_list);
+	INIT_LIST_HEAD(&sw_context->resource_list);
+	sw_context->cur_query_bo = dev_priv->pinned_bo;
+	sw_context->cur_query_cid = dev_priv->query_cid;
+	sw_context->query_cid_valid = (dev_priv->pinned_bo != NULL);
 
 	INIT_LIST_HEAD(&sw_context->validate_nodes);
 
-	ret = vmw_cmd_check_all(dev_priv, sw_context, cmd, arg->command_size);
+	ret = vmw_cmd_check_all(dev_priv, sw_context, kernel_commands,
+				command_size);
 	if (unlikely(ret != 0))
 		goto out_err;
+
 	ret = ttm_eu_reserve_buffers(&sw_context->validate_nodes);
 	if (unlikely(ret != 0))
 		goto out_err;
@@ -663,57 +1170,206 @@ int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
 
 	vmw_apply_relocations(sw_context);
 
-	if (arg->throttle_us) {
-		ret = vmw_wait_lag(dev_priv, &dev_priv->fifo.fence_queue,
-				   arg->throttle_us);
+	if (throttle_us) {
+		ret = vmw_wait_lag(dev_priv, &dev_priv->fifo.marker_queue,
+				   throttle_us);
 
 		if (unlikely(ret != 0))
-			goto out_err;
+			goto out_throttle;
 	}
 
-	vmw_fifo_commit(dev_priv, arg->command_size);
-
-	ret = vmw_fifo_send_fence(dev_priv, &sequence);
+	cmd = vmw_fifo_reserve(dev_priv, command_size);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed reserving fifo space for commands.\n");
+		ret = -ENOMEM;
+		goto out_throttle;
+	}
 
-	ttm_eu_fence_buffer_objects(&sw_context->validate_nodes,
-				    (void *)(unsigned long) sequence);
-	vmw_clear_validations(sw_context);
-	mutex_unlock(&dev_priv->cmdbuf_mutex);
+	memcpy(cmd, kernel_commands, command_size);
+	vmw_fifo_commit(dev_priv, command_size);
 
+	vmw_query_bo_switch_commit(dev_priv, sw_context);
+	ret = vmw_execbuf_fence_commands(file_priv, dev_priv,
+					 &fence,
+					 (user_fence_rep) ? &handle : NULL);
 	/*
 	 * This error is harmless, because if fence submission fails,
-	 * vmw_fifo_send_fence will sync.
+	 * vmw_fifo_send_fence will sync. The error will be propagated to
+	 * user-space in @fence_rep
 	 */
 
 	if (ret != 0)
 		DRM_ERROR("Fence submission error. Syncing.\n");
 
-	fence_rep.error = ret;
-	fence_rep.fence_seq = (uint64_t) sequence;
-	fence_rep.pad64 = 0;
-
-	user_fence_rep = (struct drm_vmw_fence_rep __user *)
-	    (unsigned long)arg->fence_rep;
+	ttm_eu_fence_buffer_objects(&sw_context->validate_nodes,
+				    (void *) fence);
 
-	/*
-	 * copy_to_user errors will be detected by user space not
-	 * seeing fence_rep::error filled in.
-	 */
+	vmw_clear_validations(sw_context);
+	vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret,
+				    user_fence_rep, fence, handle);
 
-	ret = copy_to_user(user_fence_rep, &fence_rep, sizeof(fence_rep));
+	if (likely(fence != NULL))
+		vmw_fence_obj_unreference(&fence);
 
-	vmw_kms_cursor_post_execbuf(dev_priv);
-	ttm_read_unlock(&vmaster->lock);
+	mutex_unlock(&dev_priv->cmdbuf_mutex);
 	return 0;
+
 out_err:
 	vmw_free_relocations(sw_context);
+out_throttle:
+	vmw_query_switch_backoff(sw_context);
 	ttm_eu_backoff_reservation(&sw_context->validate_nodes);
 	vmw_clear_validations(sw_context);
-out_commit:
-	vmw_fifo_commit(dev_priv, 0);
 out_unlock:
 	mutex_unlock(&dev_priv->cmdbuf_mutex);
-out_no_cmd_mutex:
+	return ret;
+}
+
+/**
+ * vmw_execbuf_unpin_panic - Idle the fifo and unpin the query buffer.
+ *
+ * @dev_priv: The device private structure.
+ *
+ * This function is called to idle the fifo and unpin the query buffer
+ * if the normal way to do this hits an error, which should typically be
+ * extremely rare.
+ */
+static void vmw_execbuf_unpin_panic(struct vmw_private *dev_priv)
+{
+	DRM_ERROR("Can't unpin query buffer. Trying to recover.\n");
+
+	(void) vmw_fallback_wait(dev_priv, false, true, 0, false, 10*HZ);
+	vmw_bo_pin(dev_priv->pinned_bo, false);
+	vmw_bo_pin(dev_priv->dummy_query_bo, false);
+	dev_priv->dummy_query_bo_pinned = false;
+}
+
+
+/**
+ * vmw_execbuf_release_pinned_bo - Flush queries and unpin the pinned
+ * query bo.
+ *
+ * @dev_priv: The device private structure.
+ * @only_on_cid_match: Only flush and unpin if the current active query cid
+ * matches @cid.
+ * @cid: Optional context id to match.
+ *
+ * This function should be used to unpin the pinned query bo, or
+ * as a query barrier when we need to make sure that all queries have
+ * finished before the next fifo command. (For example on hardware
+ * context destructions where the hardware may otherwise leak unfinished
+ * queries).
+ *
+ * This function does not return any failure codes, but make attempts
+ * to do safe unpinning in case of errors.
+ *
+ * The function will synchronize on the previous query barrier, and will
+ * thus not finish until that barrier has executed.
+ */
+void vmw_execbuf_release_pinned_bo(struct vmw_private *dev_priv,
+				   bool only_on_cid_match, uint32_t cid)
+{
+	int ret = 0;
+	struct list_head validate_list;
+	struct ttm_validate_buffer pinned_val, query_val;
+	struct vmw_fence_obj *fence;
+
+	mutex_lock(&dev_priv->cmdbuf_mutex);
+
+	if (dev_priv->pinned_bo == NULL)
+		goto out_unlock;
+
+	if (only_on_cid_match && cid != dev_priv->query_cid)
+		goto out_unlock;
+
+	INIT_LIST_HEAD(&validate_list);
+
+	pinned_val.new_sync_obj_arg = (void *)(unsigned long)
+		DRM_VMW_FENCE_FLAG_EXEC;
+	pinned_val.bo = ttm_bo_reference(dev_priv->pinned_bo);
+	list_add_tail(&pinned_val.head, &validate_list);
+
+	query_val.new_sync_obj_arg = pinned_val.new_sync_obj_arg;
+	query_val.bo = ttm_bo_reference(dev_priv->dummy_query_bo);
+	list_add_tail(&query_val.head, &validate_list);
+
+	do {
+		ret = ttm_eu_reserve_buffers(&validate_list);
+	} while (ret == -ERESTARTSYS);
+
+	if (unlikely(ret != 0)) {
+		vmw_execbuf_unpin_panic(dev_priv);
+		goto out_no_reserve;
+	}
+
+	ret = vmw_fifo_emit_dummy_query(dev_priv, dev_priv->query_cid);
+	if (unlikely(ret != 0)) {
+		vmw_execbuf_unpin_panic(dev_priv);
+		goto out_no_emit;
+	}
+
+	vmw_bo_pin(dev_priv->pinned_bo, false);
+	vmw_bo_pin(dev_priv->dummy_query_bo, false);
+	dev_priv->dummy_query_bo_pinned = false;
+
+	(void) vmw_execbuf_fence_commands(NULL, dev_priv, &fence, NULL);
+	ttm_eu_fence_buffer_objects(&validate_list, (void *) fence);
+
+	ttm_bo_unref(&query_val.bo);
+	ttm_bo_unref(&pinned_val.bo);
+	ttm_bo_unref(&dev_priv->pinned_bo);
+
+out_unlock:
+	mutex_unlock(&dev_priv->cmdbuf_mutex);
+	return;
+
+out_no_emit:
+	ttm_eu_backoff_reservation(&validate_list);
+out_no_reserve:
+	ttm_bo_unref(&query_val.bo);
+	ttm_bo_unref(&pinned_val.bo);
+	ttm_bo_unref(&dev_priv->pinned_bo);
+	mutex_unlock(&dev_priv->cmdbuf_mutex);
+}
+
+
+int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_vmw_execbuf_arg *arg = (struct drm_vmw_execbuf_arg *)data;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	int ret;
+
+	/*
+	 * This will allow us to extend the ioctl argument while
+	 * maintaining backwards compatibility:
+	 * We take different code paths depending on the value of
+	 * arg->version.
+	 */
+
+	if (unlikely(arg->version != DRM_VMW_EXECBUF_VERSION)) {
+		DRM_ERROR("Incorrect execbuf version.\n");
+		DRM_ERROR("You're running outdated experimental "
+			  "vmwgfx user-space drivers.");
+		return -EINVAL;
+	}
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = vmw_execbuf_process(file_priv, dev_priv,
+				  (void __user *)(unsigned long)arg->commands,
+				  NULL, arg->command_size, arg->throttle_us,
+				  (void __user *)(unsigned long)arg->fence_rep);
+
+	if (unlikely(ret != 0))
+		goto out_unlock;
+
+	vmw_kms_cursor_post_execbuf(dev_priv);
+
+out_unlock:
 	ttm_read_unlock(&vmaster->lock);
 	return ret;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index bfab60c938ac..070797b7b03a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -158,10 +158,14 @@ static int vmw_fb_set_par(struct fb_info *info)
 {
 	struct vmw_fb_par *par = info->par;
 	struct vmw_private *vmw_priv = par->vmw_priv;
+	int ret;
+
+	ret = vmw_kms_write_svga(vmw_priv, info->var.xres, info->var.yres,
+				 info->fix.line_length,
+				 par->bpp, par->depth);
+	if (ret)
+		return ret;
 
-	vmw_kms_write_svga(vmw_priv, info->var.xres, info->var.yres,
-			   info->fix.line_length,
-			   par->bpp, par->depth);
 	if (vmw_priv->capabilities & SVGA_CAP_DISPLAY_TOPOLOGY) {
 		/* TODO check if pitch and offset changes */
 		vmw_write(vmw_priv, SVGA_REG_NUM_GUEST_DISPLAYS, 1);
@@ -405,14 +409,14 @@ int vmw_fb_init(struct vmw_private *vmw_priv)
 	struct fb_info *info;
 	unsigned initial_width, initial_height;
 	unsigned fb_width, fb_height;
-	unsigned fb_bbp, fb_depth, fb_offset, fb_pitch, fb_size;
+	unsigned fb_bpp, fb_depth, fb_offset, fb_pitch, fb_size;
 	int ret;
 
 	/* XXX These shouldn't be hardcoded. */
 	initial_width = 800;
 	initial_height = 600;
 
-	fb_bbp = 32;
+	fb_bpp = 32;
 	fb_depth = 24;
 
 	/* XXX As shouldn't these be as well. */
@@ -422,7 +426,7 @@ int vmw_fb_init(struct vmw_private *vmw_priv)
 	initial_width = min(fb_width, initial_width);
 	initial_height = min(fb_height, initial_height);
 
-	fb_pitch = fb_width * fb_bbp / 8;
+	fb_pitch = fb_width * fb_bpp / 8;
 	fb_size = fb_pitch * fb_height;
 	fb_offset = vmw_read(vmw_priv, SVGA_REG_FB_OFFSET);
 
@@ -437,7 +441,7 @@ int vmw_fb_init(struct vmw_private *vmw_priv)
 	par = info->par;
 	par->vmw_priv = vmw_priv;
 	par->depth = fb_depth;
-	par->bpp = fb_bbp;
+	par->bpp = fb_bpp;
 	par->vmalloc = NULL;
 	par->max_width = fb_width;
 	par->max_height = fb_height;
@@ -588,58 +592,6 @@ int vmw_fb_close(struct vmw_private *vmw_priv)
 	return 0;
 }
 
-int vmw_dmabuf_from_vram(struct vmw_private *vmw_priv,
-			 struct vmw_dma_buffer *vmw_bo)
-{
-	struct ttm_buffer_object *bo = &vmw_bo->base;
-	int ret = 0;
-
-	ret = ttm_bo_reserve(bo, false, false, false, 0);
-	if (unlikely(ret != 0))
-		return ret;
-
-	ret = ttm_bo_validate(bo, &vmw_sys_placement, false, false, false);
-	ttm_bo_unreserve(bo);
-
-	return ret;
-}
-
-int vmw_dmabuf_to_start_of_vram(struct vmw_private *vmw_priv,
-				struct vmw_dma_buffer *vmw_bo)
-{
-	struct ttm_buffer_object *bo = &vmw_bo->base;
-	struct ttm_placement ne_placement = vmw_vram_ne_placement;
-	int ret = 0;
-
-	ne_placement.lpfn = bo->num_pages;
-
-	/* interuptable? */
-	ret = ttm_write_lock(&vmw_priv->active_master->lock, false);
-	if (unlikely(ret != 0))
-		return ret;
-
-	ret = ttm_bo_reserve(bo, false, false, false, 0);
-	if (unlikely(ret != 0))
-		goto err_unlock;
-
-	if (bo->mem.mem_type == TTM_PL_VRAM &&
-	    bo->mem.start < bo->num_pages &&
-	    bo->mem.start > 0)
-		(void) ttm_bo_validate(bo, &vmw_sys_placement, false,
-				       false, false);
-
-	ret = ttm_bo_validate(bo, &ne_placement, false, false, false);
-
-	/* Could probably bug on */
-	WARN_ON(bo->offset != 0);
-
-	ttm_bo_unreserve(bo);
-err_unlock:
-	ttm_write_unlock(&vmw_priv->active_master->lock);
-
-	return ret;
-}
-
 int vmw_fb_off(struct vmw_private *vmw_priv)
 {
 	struct fb_info *info;
@@ -661,7 +613,7 @@ int vmw_fb_off(struct vmw_private *vmw_priv)
 	par->bo_ptr = NULL;
 	ttm_bo_kunmap(&par->map);
 
-	vmw_dmabuf_from_vram(vmw_priv, par->vmw_bo);
+	vmw_dmabuf_unpin(vmw_priv, par->vmw_bo, false);
 
 	return 0;
 }
@@ -687,7 +639,7 @@ int vmw_fb_on(struct vmw_private *vmw_priv)
 	/* Make sure that all overlays are stoped when we take over */
 	vmw_overlay_stop_all(vmw_priv);
 
-	ret = vmw_dmabuf_to_start_of_vram(vmw_priv, par->vmw_bo);
+	ret = vmw_dmabuf_to_start_of_vram(vmw_priv, par->vmw_bo, true, false);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("could not move buffer to start of VRAM\n");
 		goto err_no_buffer;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index 61eacc1b5ca3..15fb26088d68 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright (C) 2010 VMware, Inc., Palo Alto, CA., USA
+ * Copyright © 2011 VMware, Inc., Palo Alto, CA., USA
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -25,149 +25,1100 @@
  *
  **************************************************************************/
 
-
+#include "drmP.h"
 #include "vmwgfx_drv.h"
 
-struct vmw_fence {
-	struct list_head head;
-	uint32_t sequence;
-	struct timespec submitted;
+#define VMW_FENCE_WRAP (1 << 31)
+
+struct vmw_fence_manager {
+	int num_fence_objects;
+	struct vmw_private *dev_priv;
+	spinlock_t lock;
+	struct list_head fence_list;
+	struct work_struct work;
+	u32 user_fence_size;
+	u32 fence_size;
+	u32 event_fence_action_size;
+	bool fifo_down;
+	struct list_head cleanup_list;
+	uint32_t pending_actions[VMW_ACTION_MAX];
+	struct mutex goal_irq_mutex;
+	bool goal_irq_on; /* Protected by @goal_irq_mutex */
+	bool seqno_valid; /* Protected by @lock, and may not be set to true
+			     without the @goal_irq_mutex held. */
 };
 
-void vmw_fence_queue_init(struct vmw_fence_queue *queue)
+struct vmw_user_fence {
+	struct ttm_base_object base;
+	struct vmw_fence_obj fence;
+};
+
+/**
+ * struct vmw_event_fence_action - fence action that delivers a drm event.
+ *
+ * @e: A struct drm_pending_event that controls the event delivery.
+ * @action: A struct vmw_fence_action to hook up to a fence.
+ * @fence: A referenced pointer to the fence to keep it alive while @action
+ * hangs on it.
+ * @dev: Pointer to a struct drm_device so we can access the event stuff.
+ * @kref: Both @e and @action has destructors, so we need to refcount.
+ * @size: Size accounted for this object.
+ * @tv_sec: If non-null, the variable pointed to will be assigned
+ * current time tv_sec val when the fence signals.
+ * @tv_usec: Must be set if @tv_sec is set, and the variable pointed to will
+ * be assigned the current time tv_usec val when the fence signals.
+ */
+struct vmw_event_fence_action {
+	struct drm_pending_event e;
+	struct vmw_fence_action action;
+	struct vmw_fence_obj *fence;
+	struct drm_device *dev;
+	struct kref kref;
+	uint32_t size;
+	uint32_t *tv_sec;
+	uint32_t *tv_usec;
+};
+
+/**
+ * Note on fencing subsystem usage of irqs:
+ * Typically the vmw_fences_update function is called
+ *
+ * a) When a new fence seqno has been submitted by the fifo code.
+ * b) On-demand when we have waiters. Sleeping waiters will switch on the
+ * ANY_FENCE irq and call vmw_fences_update function each time an ANY_FENCE
+ * irq is received. When the last fence waiter is gone, that IRQ is masked
+ * away.
+ *
+ * In situations where there are no waiters and we don't submit any new fences,
+ * fence objects may not be signaled. This is perfectly OK, since there are
+ * no consumers of the signaled data, but that is NOT ok when there are fence
+ * actions attached to a fence. The fencing subsystem then makes use of the
+ * FENCE_GOAL irq and sets the fence goal seqno to that of the next fence
+ * which has an action attached, and each time vmw_fences_update is called,
+ * the subsystem makes sure the fence goal seqno is updated.
+ *
+ * The fence goal seqno irq is on as long as there are unsignaled fence
+ * objects with actions attached to them.
+ */
+
+static void vmw_fence_obj_destroy_locked(struct kref *kref)
+{
+	struct vmw_fence_obj *fence =
+		container_of(kref, struct vmw_fence_obj, kref);
+
+	struct vmw_fence_manager *fman = fence->fman;
+	unsigned int num_fences;
+
+	list_del_init(&fence->head);
+	num_fences = --fman->num_fence_objects;
+	spin_unlock_irq(&fman->lock);
+	if (fence->destroy)
+		fence->destroy(fence);
+	else
+		kfree(fence);
+
+	spin_lock_irq(&fman->lock);
+}
+
+
+/**
+ * Execute signal actions on fences recently signaled.
+ * This is done from a workqueue so we don't have to execute
+ * signal actions from atomic context.
+ */
+
+static void vmw_fence_work_func(struct work_struct *work)
 {
-	INIT_LIST_HEAD(&queue->head);
-	queue->lag = ns_to_timespec(0);
-	getrawmonotonic(&queue->lag_time);
-	spin_lock_init(&queue->lock);
+	struct vmw_fence_manager *fman =
+		container_of(work, struct vmw_fence_manager, work);
+	struct list_head list;
+	struct vmw_fence_action *action, *next_action;
+	bool seqno_valid;
+
+	do {
+		INIT_LIST_HEAD(&list);
+		mutex_lock(&fman->goal_irq_mutex);
+
+		spin_lock_irq(&fman->lock);
+		list_splice_init(&fman->cleanup_list, &list);
+		seqno_valid = fman->seqno_valid;
+		spin_unlock_irq(&fman->lock);
+
+		if (!seqno_valid && fman->goal_irq_on) {
+			fman->goal_irq_on = false;
+			vmw_goal_waiter_remove(fman->dev_priv);
+		}
+		mutex_unlock(&fman->goal_irq_mutex);
+
+		if (list_empty(&list))
+			return;
+
+		/*
+		 * At this point, only we should be able to manipulate the
+		 * list heads of the actions we have on the private list.
+		 * hence fman::lock not held.
+		 */
+
+		list_for_each_entry_safe(action, next_action, &list, head) {
+			list_del_init(&action->head);
+			if (action->cleanup)
+				action->cleanup(action);
+		}
+	} while (1);
 }
 
-void vmw_fence_queue_takedown(struct vmw_fence_queue *queue)
+struct vmw_fence_manager *vmw_fence_manager_init(struct vmw_private *dev_priv)
 {
-	struct vmw_fence *fence, *next;
+	struct vmw_fence_manager *fman = kzalloc(sizeof(*fman), GFP_KERNEL);
 
-	spin_lock(&queue->lock);
-	list_for_each_entry_safe(fence, next, &queue->head, head) {
-		kfree(fence);
-	}
-	spin_unlock(&queue->lock);
+	if (unlikely(fman == NULL))
+		return NULL;
+
+	fman->dev_priv = dev_priv;
+	spin_lock_init(&fman->lock);
+	INIT_LIST_HEAD(&fman->fence_list);
+	INIT_LIST_HEAD(&fman->cleanup_list);
+	INIT_WORK(&fman->work, &vmw_fence_work_func);
+	fman->fifo_down = true;
+	fman->user_fence_size = ttm_round_pot(sizeof(struct vmw_user_fence));
+	fman->fence_size = ttm_round_pot(sizeof(struct vmw_fence_obj));
+	fman->event_fence_action_size =
+		ttm_round_pot(sizeof(struct vmw_event_fence_action));
+	mutex_init(&fman->goal_irq_mutex);
+
+	return fman;
 }
 
-int vmw_fence_push(struct vmw_fence_queue *queue,
-		   uint32_t sequence)
+void vmw_fence_manager_takedown(struct vmw_fence_manager *fman)
 {
-	struct vmw_fence *fence = kmalloc(sizeof(*fence), GFP_KERNEL);
+	unsigned long irq_flags;
+	bool lists_empty;
 
-	if (unlikely(!fence))
-		return -ENOMEM;
+	(void) cancel_work_sync(&fman->work);
 
-	fence->sequence = sequence;
-	getrawmonotonic(&fence->submitted);
-	spin_lock(&queue->lock);
-	list_add_tail(&fence->head, &queue->head);
-	spin_unlock(&queue->lock);
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	lists_empty = list_empty(&fman->fence_list) &&
+		list_empty(&fman->cleanup_list);
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
 
-	return 0;
+	BUG_ON(!lists_empty);
+	kfree(fman);
 }
 
-int vmw_fence_pull(struct vmw_fence_queue *queue,
-		   uint32_t signaled_sequence)
+static int vmw_fence_obj_init(struct vmw_fence_manager *fman,
+			      struct vmw_fence_obj *fence,
+			      u32 seqno,
+			      uint32_t mask,
+			      void (*destroy) (struct vmw_fence_obj *fence))
 {
-	struct vmw_fence *fence, *next;
-	struct timespec now;
-	bool updated = false;
+	unsigned long irq_flags;
+	unsigned int num_fences;
+	int ret = 0;
 
-	spin_lock(&queue->lock);
-	getrawmonotonic(&now);
+	fence->seqno = seqno;
+	INIT_LIST_HEAD(&fence->seq_passed_actions);
+	fence->fman = fman;
+	fence->signaled = 0;
+	fence->signal_mask = mask;
+	kref_init(&fence->kref);
+	fence->destroy = destroy;
+	init_waitqueue_head(&fence->queue);
 
-	if (list_empty(&queue->head)) {
-		queue->lag = ns_to_timespec(0);
-		queue->lag_time = now;
-		updated = true;
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	if (unlikely(fman->fifo_down)) {
+		ret = -EBUSY;
 		goto out_unlock;
 	}
+	list_add_tail(&fence->head, &fman->fence_list);
+	num_fences = ++fman->num_fence_objects;
 
-	list_for_each_entry_safe(fence, next, &queue->head, head) {
-		if (signaled_sequence - fence->sequence > (1 << 30))
-			continue;
+out_unlock:
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+	return ret;
 
-		queue->lag = timespec_sub(now, fence->submitted);
-		queue->lag_time = now;
-		updated = true;
-		list_del(&fence->head);
-		kfree(fence);
+}
+
+struct vmw_fence_obj *vmw_fence_obj_reference(struct vmw_fence_obj *fence)
+{
+	if (unlikely(fence == NULL))
+		return NULL;
+
+	kref_get(&fence->kref);
+	return fence;
+}
+
+/**
+ * vmw_fence_obj_unreference
+ *
+ * Note that this function may not be entered with disabled irqs since
+ * it may re-enable them in the destroy function.
+ *
+ */
+void vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p)
+{
+	struct vmw_fence_obj *fence = *fence_p;
+	struct vmw_fence_manager *fman;
+
+	if (unlikely(fence == NULL))
+		return;
+
+	fman = fence->fman;
+	*fence_p = NULL;
+	spin_lock_irq(&fman->lock);
+	BUG_ON(atomic_read(&fence->kref.refcount) == 0);
+	kref_put(&fence->kref, vmw_fence_obj_destroy_locked);
+	spin_unlock_irq(&fman->lock);
+}
+
+void vmw_fences_perform_actions(struct vmw_fence_manager *fman,
+				struct list_head *list)
+{
+	struct vmw_fence_action *action, *next_action;
+
+	list_for_each_entry_safe(action, next_action, list, head) {
+		list_del_init(&action->head);
+		fman->pending_actions[action->type]--;
+		if (action->seq_passed != NULL)
+			action->seq_passed(action);
+
+		/*
+		 * Add the cleanup action to the cleanup list so that
+		 * it will be performed by a worker task.
+		 */
+
+		list_add_tail(&action->head, &fman->cleanup_list);
 	}
+}
+
+/**
+ * vmw_fence_goal_new_locked - Figure out a new device fence goal
+ * seqno if needed.
+ *
+ * @fman: Pointer to a fence manager.
+ * @passed_seqno: The seqno the device currently signals as passed.
+ *
+ * This function should be called with the fence manager lock held.
+ * It is typically called when we have a new passed_seqno, and
+ * we might need to update the fence goal. It checks to see whether
+ * the current fence goal has already passed, and, in that case,
+ * scans through all unsignaled fences to get the next fence object with an
+ * action attached, and sets the seqno of that fence as a new fence goal.
+ *
+ * returns true if the device goal seqno was updated. False otherwise.
+ */
+static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman,
+				      u32 passed_seqno)
+{
+	u32 goal_seqno;
+	__le32 __iomem *fifo_mem;
+	struct vmw_fence_obj *fence;
+
+	if (likely(!fman->seqno_valid))
+		return false;
+
+	fifo_mem = fman->dev_priv->mmio_virt;
+	goal_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE_GOAL);
+	if (likely(passed_seqno - goal_seqno >= VMW_FENCE_WRAP))
+		return false;
+
+	fman->seqno_valid = false;
+	list_for_each_entry(fence, &fman->fence_list, head) {
+		if (!list_empty(&fence->seq_passed_actions)) {
+			fman->seqno_valid = true;
+			iowrite32(fence->seqno,
+				  fifo_mem + SVGA_FIFO_FENCE_GOAL);
+			break;
+		}
+	}
+
+	return true;
+}
 
-out_unlock:
-	spin_unlock(&queue->lock);
 
-	return (updated) ? 0 : -EBUSY;
+/**
+ * vmw_fence_goal_check_locked - Replace the device fence goal seqno if
+ * needed.
+ *
+ * @fence: Pointer to a struct vmw_fence_obj the seqno of which should be
+ * considered as a device fence goal.
+ *
+ * This function should be called with the fence manager lock held.
+ * It is typically called when an action has been attached to a fence to
+ * check whether the seqno of that fence should be used for a fence
+ * goal interrupt. This is typically needed if the current fence goal is
+ * invalid, or has a higher seqno than that of the current fence object.
+ *
+ * returns true if the device goal seqno was updated. False otherwise.
+ */
+static bool vmw_fence_goal_check_locked(struct vmw_fence_obj *fence)
+{
+	u32 goal_seqno;
+	__le32 __iomem *fifo_mem;
+
+	if (fence->signaled & DRM_VMW_FENCE_FLAG_EXEC)
+		return false;
+
+	fifo_mem = fence->fman->dev_priv->mmio_virt;
+	goal_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE_GOAL);
+	if (likely(fence->fman->seqno_valid &&
+		   goal_seqno - fence->seqno < VMW_FENCE_WRAP))
+		return false;
+
+	iowrite32(fence->seqno, fifo_mem + SVGA_FIFO_FENCE_GOAL);
+	fence->fman->seqno_valid = true;
+
+	return true;
 }
 
-static struct timespec vmw_timespec_add(struct timespec t1,
-					struct timespec t2)
+void vmw_fences_update(struct vmw_fence_manager *fman)
 {
-	t1.tv_sec += t2.tv_sec;
-	t1.tv_nsec += t2.tv_nsec;
-	if (t1.tv_nsec >= 1000000000L) {
-		t1.tv_sec += 1;
-		t1.tv_nsec -= 1000000000L;
+	unsigned long flags;
+	struct vmw_fence_obj *fence, *next_fence;
+	struct list_head action_list;
+	bool needs_rerun;
+	uint32_t seqno, new_seqno;
+	__le32 __iomem *fifo_mem = fman->dev_priv->mmio_virt;
+
+	seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+rerun:
+	spin_lock_irqsave(&fman->lock, flags);
+	list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) {
+		if (seqno - fence->seqno < VMW_FENCE_WRAP) {
+			list_del_init(&fence->head);
+			fence->signaled |= DRM_VMW_FENCE_FLAG_EXEC;
+			INIT_LIST_HEAD(&action_list);
+			list_splice_init(&fence->seq_passed_actions,
+					 &action_list);
+			vmw_fences_perform_actions(fman, &action_list);
+			wake_up_all(&fence->queue);
+		} else
+			break;
 	}
 
-	return t1;
+	needs_rerun = vmw_fence_goal_new_locked(fman, seqno);
+
+	if (!list_empty(&fman->cleanup_list))
+		(void) schedule_work(&fman->work);
+	spin_unlock_irqrestore(&fman->lock, flags);
+
+	/*
+	 * Rerun if the fence goal seqno was updated, and the
+	 * hardware might have raced with that update, so that
+	 * we missed a fence_goal irq.
+	 */
+
+	if (unlikely(needs_rerun)) {
+		new_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+		if (new_seqno != seqno) {
+			seqno = new_seqno;
+			goto rerun;
+		}
+	}
+}
+
+bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
+			    uint32_t flags)
+{
+	struct vmw_fence_manager *fman = fence->fman;
+	unsigned long irq_flags;
+	uint32_t signaled;
+
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	signaled = fence->signaled;
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+
+	flags &= fence->signal_mask;
+	if ((signaled & flags) == flags)
+		return 1;
+
+	if ((signaled & DRM_VMW_FENCE_FLAG_EXEC) == 0)
+		vmw_fences_update(fman);
+
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	signaled = fence->signaled;
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+
+	return ((signaled & flags) == flags);
 }
 
-static struct timespec vmw_fifo_lag(struct vmw_fence_queue *queue)
+int vmw_fence_obj_wait(struct vmw_fence_obj *fence,
+		       uint32_t flags, bool lazy,
+		       bool interruptible, unsigned long timeout)
 {
-	struct timespec now;
+	struct vmw_private *dev_priv = fence->fman->dev_priv;
+	long ret;
+
+	if (likely(vmw_fence_obj_signaled(fence, flags)))
+		return 0;
+
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+	vmw_seqno_waiter_add(dev_priv);
+
+	if (interruptible)
+		ret = wait_event_interruptible_timeout
+			(fence->queue,
+			 vmw_fence_obj_signaled(fence, flags),
+			 timeout);
+	else
+		ret = wait_event_timeout
+			(fence->queue,
+			 vmw_fence_obj_signaled(fence, flags),
+			 timeout);
+
+	vmw_seqno_waiter_remove(dev_priv);
+
+	if (unlikely(ret == 0))
+		ret = -EBUSY;
+	else if (likely(ret > 0))
+		ret = 0;
+
+	return ret;
+}
+
+void vmw_fence_obj_flush(struct vmw_fence_obj *fence)
+{
+	struct vmw_private *dev_priv = fence->fman->dev_priv;
+
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+}
+
+static void vmw_fence_destroy(struct vmw_fence_obj *fence)
+{
+	struct vmw_fence_manager *fman = fence->fman;
+
+	kfree(fence);
+	/*
+	 * Free kernel space accounting.
+	 */
+	ttm_mem_global_free(vmw_mem_glob(fman->dev_priv),
+			    fman->fence_size);
+}
+
+int vmw_fence_create(struct vmw_fence_manager *fman,
+		     uint32_t seqno,
+		     uint32_t mask,
+		     struct vmw_fence_obj **p_fence)
+{
+	struct ttm_mem_global *mem_glob = vmw_mem_glob(fman->dev_priv);
+	struct vmw_fence_obj *fence;
+	int ret;
+
+	ret = ttm_mem_global_alloc(mem_glob, fman->fence_size,
+				   false, false);
+	if (unlikely(ret != 0))
+		return ret;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (unlikely(fence == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_object;
+	}
+
+	ret = vmw_fence_obj_init(fman, fence, seqno, mask,
+				 vmw_fence_destroy);
+	if (unlikely(ret != 0))
+		goto out_err_init;
+
+	*p_fence = fence;
+	return 0;
+
+out_err_init:
+	kfree(fence);
+out_no_object:
+	ttm_mem_global_free(mem_glob, fman->fence_size);
+	return ret;
+}
+
+
+static void vmw_user_fence_destroy(struct vmw_fence_obj *fence)
+{
+	struct vmw_user_fence *ufence =
+		container_of(fence, struct vmw_user_fence, fence);
+	struct vmw_fence_manager *fman = fence->fman;
+
+	kfree(ufence);
+	/*
+	 * Free kernel space accounting.
+	 */
+	ttm_mem_global_free(vmw_mem_glob(fman->dev_priv),
+			    fman->user_fence_size);
+}
+
+static void vmw_user_fence_base_release(struct ttm_base_object **p_base)
+{
+	struct ttm_base_object *base = *p_base;
+	struct vmw_user_fence *ufence =
+		container_of(base, struct vmw_user_fence, base);
+	struct vmw_fence_obj *fence = &ufence->fence;
+
+	*p_base = NULL;
+	vmw_fence_obj_unreference(&fence);
+}
+
+int vmw_user_fence_create(struct drm_file *file_priv,
+			  struct vmw_fence_manager *fman,
+			  uint32_t seqno,
+			  uint32_t mask,
+			  struct vmw_fence_obj **p_fence,
+			  uint32_t *p_handle)
+{
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_user_fence *ufence;
+	struct vmw_fence_obj *tmp;
+	struct ttm_mem_global *mem_glob = vmw_mem_glob(fman->dev_priv);
+	int ret;
+
+	/*
+	 * Kernel memory space accounting, since this object may
+	 * be created by a user-space request.
+	 */
+
+	ret = ttm_mem_global_alloc(mem_glob, fman->user_fence_size,
+				   false, false);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ufence = kzalloc(sizeof(*ufence), GFP_KERNEL);
+	if (unlikely(ufence == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_object;
+	}
+
+	ret = vmw_fence_obj_init(fman, &ufence->fence, seqno,
+				 mask, vmw_user_fence_destroy);
+	if (unlikely(ret != 0)) {
+		kfree(ufence);
+		goto out_no_object;
+	}
+
+	/*
+	 * The base object holds a reference which is freed in
+	 * vmw_user_fence_base_release.
+	 */
+	tmp = vmw_fence_obj_reference(&ufence->fence);
+	ret = ttm_base_object_init(tfile, &ufence->base, false,
+				   VMW_RES_FENCE,
+				   &vmw_user_fence_base_release, NULL);
 
-	spin_lock(&queue->lock);
-	getrawmonotonic(&now);
-	queue->lag = vmw_timespec_add(queue->lag,
-				      timespec_sub(now, queue->lag_time));
-	queue->lag_time = now;
-	spin_unlock(&queue->lock);
-	return queue->lag;
+
+	if (unlikely(ret != 0)) {
+		/*
+		 * Free the base object's reference
+		 */
+		vmw_fence_obj_unreference(&tmp);
+		goto out_err;
+	}
+
+	*p_fence = &ufence->fence;
+	*p_handle = ufence->base.hash.key;
+
+	return 0;
+out_err:
+	tmp = &ufence->fence;
+	vmw_fence_obj_unreference(&tmp);
+out_no_object:
+	ttm_mem_global_free(mem_glob, fman->user_fence_size);
+	return ret;
 }
 
 
-static bool vmw_lag_lt(struct vmw_fence_queue *queue,
-		       uint32_t us)
+/**
+ * vmw_fence_fifo_down - signal all unsignaled fence objects.
+ */
+
+void vmw_fence_fifo_down(struct vmw_fence_manager *fman)
+{
+	unsigned long irq_flags;
+	struct list_head action_list;
+	int ret;
+
+	/*
+	 * The list may be altered while we traverse it, so always
+	 * restart when we've released the fman->lock.
+	 */
+
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	fman->fifo_down = true;
+	while (!list_empty(&fman->fence_list)) {
+		struct vmw_fence_obj *fence =
+			list_entry(fman->fence_list.prev, struct vmw_fence_obj,
+				   head);
+		kref_get(&fence->kref);
+		spin_unlock_irq(&fman->lock);
+
+		ret = vmw_fence_obj_wait(fence, fence->signal_mask,
+					 false, false,
+					 VMW_FENCE_WAIT_TIMEOUT);
+
+		if (unlikely(ret != 0)) {
+			list_del_init(&fence->head);
+			fence->signaled |= DRM_VMW_FENCE_FLAG_EXEC;
+			INIT_LIST_HEAD(&action_list);
+			list_splice_init(&fence->seq_passed_actions,
+					 &action_list);
+			vmw_fences_perform_actions(fman, &action_list);
+			wake_up_all(&fence->queue);
+		}
+
+		spin_lock_irq(&fman->lock);
+
+		BUG_ON(!list_empty(&fence->head));
+		kref_put(&fence->kref, vmw_fence_obj_destroy_locked);
+	}
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+}
+
+void vmw_fence_fifo_up(struct vmw_fence_manager *fman)
 {
-	struct timespec lag, cond;
+	unsigned long irq_flags;
 
-	cond = ns_to_timespec((s64) us * 1000);
-	lag = vmw_fifo_lag(queue);
-	return (timespec_compare(&lag, &cond) < 1);
+	spin_lock_irqsave(&fman->lock, irq_flags);
+	fman->fifo_down = false;
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
 }
 
-int vmw_wait_lag(struct vmw_private *dev_priv,
-		 struct vmw_fence_queue *queue, uint32_t us)
+
+int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv)
 {
-	struct vmw_fence *fence;
-	uint32_t sequence;
+	struct drm_vmw_fence_wait_arg *arg =
+	    (struct drm_vmw_fence_wait_arg *)data;
+	unsigned long timeout;
+	struct ttm_base_object *base;
+	struct vmw_fence_obj *fence;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
 	int ret;
+	uint64_t wait_timeout = ((uint64_t)arg->timeout_us * HZ);
+
+	/*
+	 * 64-bit division not present on 32-bit systems, so do an
+	 * approximation. (Divide by 1000000).
+	 */
+
+	wait_timeout = (wait_timeout >> 20) + (wait_timeout >> 24) -
+	  (wait_timeout >> 26);
+
+	if (!arg->cookie_valid) {
+		arg->cookie_valid = 1;
+		arg->kernel_cookie = jiffies + wait_timeout;
+	}
+
+	base = ttm_base_object_lookup(tfile, arg->handle);
+	if (unlikely(base == NULL)) {
+		printk(KERN_ERR "Wait invalid fence object handle "
+		       "0x%08lx.\n",
+		       (unsigned long)arg->handle);
+		return -EINVAL;
+	}
+
+	fence = &(container_of(base, struct vmw_user_fence, base)->fence);
+
+	timeout = jiffies;
+	if (time_after_eq(timeout, (unsigned long)arg->kernel_cookie)) {
+		ret = ((vmw_fence_obj_signaled(fence, arg->flags)) ?
+		       0 : -EBUSY);
+		goto out;
+	}
+
+	timeout = (unsigned long)arg->kernel_cookie - timeout;
+
+	ret = vmw_fence_obj_wait(fence, arg->flags, arg->lazy, true, timeout);
+
+out:
+	ttm_base_object_unref(&base);
+
+	/*
+	 * Optionally unref the fence object.
+	 */
+
+	if (ret == 0 && (arg->wait_options & DRM_VMW_WAIT_OPTION_UNREF))
+		return ttm_ref_object_base_unref(tfile, arg->handle,
+						 TTM_REF_USAGE);
+	return ret;
+}
+
+int vmw_fence_obj_signaled_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file_priv)
+{
+	struct drm_vmw_fence_signaled_arg *arg =
+		(struct drm_vmw_fence_signaled_arg *) data;
+	struct ttm_base_object *base;
+	struct vmw_fence_obj *fence;
+	struct vmw_fence_manager *fman;
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_private *dev_priv = vmw_priv(dev);
+
+	base = ttm_base_object_lookup(tfile, arg->handle);
+	if (unlikely(base == NULL)) {
+		printk(KERN_ERR "Fence signaled invalid fence object handle "
+		       "0x%08lx.\n",
+		       (unsigned long)arg->handle);
+		return -EINVAL;
+	}
+
+	fence = &(container_of(base, struct vmw_user_fence, base)->fence);
+	fman = fence->fman;
+
+	arg->signaled = vmw_fence_obj_signaled(fence, arg->flags);
+	spin_lock_irq(&fman->lock);
+
+	arg->signaled_flags = fence->signaled;
+	arg->passed_seqno = dev_priv->last_read_seqno;
+	spin_unlock_irq(&fman->lock);
+
+	ttm_base_object_unref(&base);
+
+	return 0;
+}
+
+
+int vmw_fence_obj_unref_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv)
+{
+	struct drm_vmw_fence_arg *arg =
+		(struct drm_vmw_fence_arg *) data;
+
+	return ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
+					 arg->handle,
+					 TTM_REF_USAGE);
+}
+
+/**
+ * vmw_event_fence_action_destroy
+ *
+ * @kref: The struct kref embedded in a struct vmw_event_fence_action.
+ *
+ * The vmw_event_fence_action destructor that may be called either after
+ * the fence action cleanup, or when the event is delivered.
+ * It frees both the vmw_event_fence_action struct and the actual
+ * event structure copied to user-space.
+ */
+static void vmw_event_fence_action_destroy(struct kref *kref)
+{
+	struct vmw_event_fence_action *eaction =
+		container_of(kref, struct vmw_event_fence_action, kref);
+	struct ttm_mem_global *mem_glob =
+		vmw_mem_glob(vmw_priv(eaction->dev));
+	uint32_t size = eaction->size;
+
+	kfree(eaction->e.event);
+	kfree(eaction);
+	ttm_mem_global_free(mem_glob, size);
+}
+
+
+/**
+ * vmw_event_fence_action_delivered
+ *
+ * @e: The struct drm_pending_event embedded in a struct
+ * vmw_event_fence_action.
+ *
+ * The struct drm_pending_event destructor that is called by drm
+ * once the event is delivered. Since we don't know whether this function
+ * will be called before or after the fence action destructor, we
+ * free a refcount and destroy if it becomes zero.
+ */
+static void vmw_event_fence_action_delivered(struct drm_pending_event *e)
+{
+	struct vmw_event_fence_action *eaction =
+		container_of(e, struct vmw_event_fence_action, e);
+
+	kref_put(&eaction->kref, vmw_event_fence_action_destroy);
+}
+
+
+/**
+ * vmw_event_fence_action_seq_passed
+ *
+ * @action: The struct vmw_fence_action embedded in a struct
+ * vmw_event_fence_action.
+ *
+ * This function is called when the seqno of the fence where @action is
+ * attached has passed. It queues the event on the submitter's event list.
+ * This function is always called from atomic context, and may be called
+ * from irq context. It ups a refcount reflecting that we now have two
+ * destructors.
+ */
+static void vmw_event_fence_action_seq_passed(struct vmw_fence_action *action)
+{
+	struct vmw_event_fence_action *eaction =
+		container_of(action, struct vmw_event_fence_action, action);
+	struct drm_device *dev = eaction->dev;
+	struct drm_file *file_priv = eaction->e.file_priv;
+	unsigned long irq_flags;
+
+	kref_get(&eaction->kref);
+	spin_lock_irqsave(&dev->event_lock, irq_flags);
+
+	if (likely(eaction->tv_sec != NULL)) {
+		struct timeval tv;
+
+		do_gettimeofday(&tv);
+		*eaction->tv_sec = tv.tv_sec;
+		*eaction->tv_usec = tv.tv_usec;
+	}
+
+	list_add_tail(&eaction->e.link, &file_priv->event_list);
+	wake_up_all(&file_priv->event_wait);
+	spin_unlock_irqrestore(&dev->event_lock, irq_flags);
+}
+
+/**
+ * vmw_event_fence_action_cleanup
+ *
+ * @action: The struct vmw_fence_action embedded in a struct
+ * vmw_event_fence_action.
+ *
+ * This function is the struct vmw_fence_action destructor. It's typically
+ * called from a workqueue.
+ */
+static void vmw_event_fence_action_cleanup(struct vmw_fence_action *action)
+{
+	struct vmw_event_fence_action *eaction =
+		container_of(action, struct vmw_event_fence_action, action);
+
+	vmw_fence_obj_unreference(&eaction->fence);
+	kref_put(&eaction->kref, vmw_event_fence_action_destroy);
+}
+
+
+/**
+ * vmw_fence_obj_add_action - Add an action to a fence object.
+ *
+ * @fence - The fence object.
+ * @action - The action to add.
+ *
+ * Note that the action callbacks may be executed before this function
+ * returns.
+ */
+void vmw_fence_obj_add_action(struct vmw_fence_obj *fence,
+			      struct vmw_fence_action *action)
+{
+	struct vmw_fence_manager *fman = fence->fman;
+	unsigned long irq_flags;
+	bool run_update = false;
+
+	mutex_lock(&fman->goal_irq_mutex);
+	spin_lock_irqsave(&fman->lock, irq_flags);
+
+	fman->pending_actions[action->type]++;
+	if (fence->signaled & DRM_VMW_FENCE_FLAG_EXEC) {
+		struct list_head action_list;
 
-	while (!vmw_lag_lt(queue, us)) {
-		spin_lock(&queue->lock);
-		if (list_empty(&queue->head))
-			sequence = atomic_read(&dev_priv->fence_seq);
-		else {
-			fence = list_first_entry(&queue->head,
-						 struct vmw_fence, head);
-			sequence = fence->sequence;
+		INIT_LIST_HEAD(&action_list);
+		list_add_tail(&action->head, &action_list);
+		vmw_fences_perform_actions(fman, &action_list);
+	} else {
+		list_add_tail(&action->head, &fence->seq_passed_actions);
+
+		/*
+		 * This function may set fman::seqno_valid, so it must
+		 * be run with the goal_irq_mutex held.
+		 */
+		run_update = vmw_fence_goal_check_locked(fence);
+	}
+
+	spin_unlock_irqrestore(&fman->lock, irq_flags);
+
+	if (run_update) {
+		if (!fman->goal_irq_on) {
+			fman->goal_irq_on = true;
+			vmw_goal_waiter_add(fman->dev_priv);
 		}
-		spin_unlock(&queue->lock);
+		vmw_fences_update(fman);
+	}
+	mutex_unlock(&fman->goal_irq_mutex);
 
-		ret = vmw_wait_fence(dev_priv, false, sequence, true,
-				     3*HZ);
+}
 
-		if (unlikely(ret != 0))
-			return ret;
+/**
+ * vmw_event_fence_action_create - Post an event for sending when a fence
+ * object seqno has passed.
+ *
+ * @file_priv: The file connection on which the event should be posted.
+ * @fence: The fence object on which to post the event.
+ * @event: Event to be posted. This event should've been alloced
+ * using k[mz]alloc, and should've been completely initialized.
+ * @interruptible: Interruptible waits if possible.
+ *
+ * As a side effect, the object pointed to by @event may have been
+ * freed when this function returns. If this function returns with
+ * an error code, the caller needs to free that object.
+ */
+
+int vmw_event_fence_action_create(struct drm_file *file_priv,
+				  struct vmw_fence_obj *fence,
+				  struct drm_event *event,
+				  uint32_t *tv_sec,
+				  uint32_t *tv_usec,
+				  bool interruptible)
+{
+	struct vmw_event_fence_action *eaction;
+	struct ttm_mem_global *mem_glob =
+		vmw_mem_glob(fence->fman->dev_priv);
+	struct vmw_fence_manager *fman = fence->fman;
+	uint32_t size = fman->event_fence_action_size +
+		ttm_round_pot(event->length);
+	int ret;
+
+	/*
+	 * Account for internal structure size as well as the
+	 * event size itself.
+	 */
+
+	ret = ttm_mem_global_alloc(mem_glob, size, false, interruptible);
+	if (unlikely(ret != 0))
+		return ret;
 
-		(void) vmw_fence_pull(queue, sequence);
+	eaction = kzalloc(sizeof(*eaction), GFP_KERNEL);
+	if (unlikely(eaction == NULL)) {
+		ttm_mem_global_free(mem_glob, size);
+		return -ENOMEM;
 	}
+
+	eaction->e.event = event;
+	eaction->e.file_priv = file_priv;
+	eaction->e.destroy = vmw_event_fence_action_delivered;
+
+	eaction->action.seq_passed = vmw_event_fence_action_seq_passed;
+	eaction->action.cleanup = vmw_event_fence_action_cleanup;
+	eaction->action.type = VMW_ACTION_EVENT;
+
+	eaction->fence = vmw_fence_obj_reference(fence);
+	eaction->dev = fman->dev_priv->dev;
+	eaction->size = size;
+	eaction->tv_sec = tv_sec;
+	eaction->tv_usec = tv_usec;
+
+	kref_init(&eaction->kref);
+	vmw_fence_obj_add_action(fence, &eaction->action);
+
 	return 0;
 }
 
+int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_vmw_fence_event_arg *arg =
+		(struct drm_vmw_fence_event_arg *) data;
+	struct vmw_fence_obj *fence = NULL;
+	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
+	struct drm_vmw_fence_rep __user *user_fence_rep =
+		(struct drm_vmw_fence_rep __user *)(unsigned long)
+		arg->fence_rep;
+	uint32_t handle;
+	unsigned long irq_flags;
+	struct drm_vmw_event_fence *event;
+	int ret;
+
+	/*
+	 * Look up an existing fence object,
+	 * and if user-space wants a new reference,
+	 * add one.
+	 */
+	if (arg->handle) {
+		struct ttm_base_object *base =
+			ttm_base_object_lookup(vmw_fp->tfile, arg->handle);
 
+		if (unlikely(base == NULL)) {
+			DRM_ERROR("Fence event invalid fence object handle "
+				  "0x%08lx.\n",
+				  (unsigned long)arg->handle);
+			return -EINVAL;
+		}
+		fence = &(container_of(base, struct vmw_user_fence,
+				       base)->fence);
+		(void) vmw_fence_obj_reference(fence);
+
+		if (user_fence_rep != NULL) {
+			bool existed;
+
+			ret = ttm_ref_object_add(vmw_fp->tfile, base,
+						 TTM_REF_USAGE, &existed);
+			if (unlikely(ret != 0)) {
+				DRM_ERROR("Failed to reference a fence "
+					  "object.\n");
+				goto out_no_ref_obj;
+			}
+			handle = base->hash.key;
+		}
+		ttm_base_object_unref(&base);
+	}
+
+	/*
+	 * Create a new fence object.
+	 */
+	if (!fence) {
+		ret = vmw_execbuf_fence_commands(file_priv, dev_priv,
+						 &fence,
+						 (user_fence_rep) ?
+						 &handle : NULL);
+		if (unlikely(ret != 0)) {
+			DRM_ERROR("Fence event failed to create fence.\n");
+			return ret;
+		}
+	}
+
+	BUG_ON(fence == NULL);
+
+	spin_lock_irqsave(&dev->event_lock, irq_flags);
+
+	ret = (file_priv->event_space < sizeof(*event)) ? -EBUSY : 0;
+	if (likely(ret == 0))
+		file_priv->event_space -= sizeof(*event);
+
+	spin_unlock_irqrestore(&dev->event_lock, irq_flags);
+
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Failed to allocate event space for this file.\n");
+		goto out_no_event_space;
+	}
+
+	event = kzalloc(sizeof(*event), GFP_KERNEL);
+	if (unlikely(event == NULL)) {
+		DRM_ERROR("Failed to allocate an event.\n");
+		goto out_no_event;
+	}
+
+	event->base.type = DRM_VMW_EVENT_FENCE_SIGNALED;
+	event->base.length = sizeof(*event);
+	event->user_data = arg->user_data;
+
+	if (arg->flags & DRM_VMW_FE_FLAG_REQ_TIME)
+		ret = vmw_event_fence_action_create(file_priv, fence,
+						    &event->base,
+						    &event->tv_sec,
+						    &event->tv_usec,
+						    true);
+	else
+		ret = vmw_event_fence_action_create(file_priv, fence,
+						    &event->base,
+						    NULL,
+						    NULL,
+						    true);
+
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Failed to attach event to fence.\n");
+		goto out_no_attach;
+	}
+
+	vmw_execbuf_copy_fence_user(dev_priv, vmw_fp, 0, user_fence_rep, fence,
+				    handle);
+	vmw_fence_obj_unreference(&fence);
+	return 0;
+out_no_attach:
+	kfree(event);
+out_no_event:
+	spin_lock_irqsave(&dev->event_lock, irq_flags);
+	file_priv->event_space += sizeof(*event);
+	spin_unlock_irqrestore(&dev->event_lock, irq_flags);
+out_no_event_space:
+	if (user_fence_rep != NULL)
+		ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
+					  handle, TTM_REF_USAGE);
+out_no_ref_obj:
+	vmw_fence_obj_unreference(&fence);
+	return ret;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
new file mode 100644
index 000000000000..0854a2096b55
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.h
@@ -0,0 +1,113 @@
+/**************************************************************************
+ *
+ * Copyright © 2011 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef _VMWGFX_FENCE_H_
+
+#define VMW_FENCE_WAIT_TIMEOUT (5*HZ)
+
+struct vmw_private;
+
+struct vmw_fence_manager;
+
+/**
+ *
+ *
+ */
+enum vmw_action_type {
+	VMW_ACTION_EVENT = 0,
+	VMW_ACTION_MAX
+};
+
+struct vmw_fence_action {
+	struct list_head head;
+	enum vmw_action_type type;
+	void (*seq_passed) (struct vmw_fence_action *action);
+	void (*cleanup) (struct vmw_fence_action *action);
+};
+
+struct vmw_fence_obj {
+	struct kref kref;
+	u32 seqno;
+
+	struct vmw_fence_manager *fman;
+	struct list_head head;
+	uint32_t signaled;
+	uint32_t signal_mask;
+	struct list_head seq_passed_actions;
+	void (*destroy)(struct vmw_fence_obj *fence);
+	wait_queue_head_t queue;
+};
+
+extern struct vmw_fence_manager *
+vmw_fence_manager_init(struct vmw_private *dev_priv);
+
+extern void vmw_fence_manager_takedown(struct vmw_fence_manager *fman);
+
+extern void vmw_fence_obj_unreference(struct vmw_fence_obj **fence_p);
+
+extern struct vmw_fence_obj *
+vmw_fence_obj_reference(struct vmw_fence_obj *fence);
+
+extern void vmw_fences_update(struct vmw_fence_manager *fman);
+
+extern bool vmw_fence_obj_signaled(struct vmw_fence_obj *fence,
+				   uint32_t flags);
+
+extern int vmw_fence_obj_wait(struct vmw_fence_obj *fence, uint32_t flags,
+			      bool lazy,
+			      bool interruptible, unsigned long timeout);
+
+extern void vmw_fence_obj_flush(struct vmw_fence_obj *fence);
+
+extern int vmw_fence_create(struct vmw_fence_manager *fman,
+			    uint32_t seqno,
+			    uint32_t mask,
+			    struct vmw_fence_obj **p_fence);
+
+extern int vmw_user_fence_create(struct drm_file *file_priv,
+				 struct vmw_fence_manager *fman,
+				 uint32_t sequence,
+				 uint32_t mask,
+				 struct vmw_fence_obj **p_fence,
+				 uint32_t *p_handle);
+
+extern void vmw_fence_fifo_up(struct vmw_fence_manager *fman);
+
+extern void vmw_fence_fifo_down(struct vmw_fence_manager *fman);
+
+extern int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file_priv);
+
+extern int vmw_fence_obj_signaled_ioctl(struct drm_device *dev, void *data,
+					struct drm_file *file_priv);
+
+extern int vmw_fence_obj_unref_ioctl(struct drm_device *dev, void *data,
+				     struct drm_file *file_priv);
+extern int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
+				 struct drm_file *file_priv);
+
+#endif /* _VMWGFX_FENCE_H_ */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index 635c0ffee7fe..03bbc2a6f9a7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -45,7 +45,11 @@ bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
 	if (hwversion == 0)
 		return false;
 
-	if (hwversion < SVGA3D_HWVERSION_WS65_B1)
+	if (hwversion < SVGA3D_HWVERSION_WS8_B1)
+		return false;
+
+	/* Non-Screen Object path does not support surfaces */
+	if (!dev_priv->sou_priv)
 		return false;
 
 	return true;
@@ -72,22 +76,12 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 	uint32_t max;
 	uint32_t min;
 	uint32_t dummy;
-	int ret;
 
 	fifo->static_buffer_size = VMWGFX_FIFO_STATIC_SIZE;
 	fifo->static_buffer = vmalloc(fifo->static_buffer_size);
 	if (unlikely(fifo->static_buffer == NULL))
 		return -ENOMEM;
 
-	fifo->last_buffer_size = VMWGFX_FIFO_STATIC_SIZE;
-	fifo->last_data_size = 0;
-	fifo->last_buffer_add = false;
-	fifo->last_buffer = vmalloc(fifo->last_buffer_size);
-	if (unlikely(fifo->last_buffer == NULL)) {
-		ret = -ENOMEM;
-		goto out_err;
-	}
-
 	fifo->dynamic_buffer = NULL;
 	fifo->reserved_size = 0;
 	fifo->using_bounce_buffer = false;
@@ -137,14 +131,10 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 		 (unsigned int) min,
 		 (unsigned int) fifo->capabilities);
 
-	atomic_set(&dev_priv->fence_seq, dev_priv->last_read_sequence);
-	iowrite32(dev_priv->last_read_sequence, fifo_mem + SVGA_FIFO_FENCE);
-	vmw_fence_queue_init(&fifo->fence_queue);
+	atomic_set(&dev_priv->marker_seq, dev_priv->last_read_seqno);
+	iowrite32(dev_priv->last_read_seqno, fifo_mem + SVGA_FIFO_FENCE);
+	vmw_marker_queue_init(&fifo->marker_queue);
 	return vmw_fifo_send_fence(dev_priv, &dummy);
-out_err:
-	vfree(fifo->static_buffer);
-	fifo->static_buffer = NULL;
-	return ret;
 }
 
 void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
@@ -170,7 +160,7 @@ void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 	while (vmw_read(dev_priv, SVGA_REG_BUSY) != 0)
 		vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC);
 
-	dev_priv->last_read_sequence = ioread32(fifo_mem + SVGA_FIFO_FENCE);
+	dev_priv->last_read_seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
 
 	vmw_write(dev_priv, SVGA_REG_CONFIG_DONE,
 		  dev_priv->config_done_state);
@@ -180,12 +170,7 @@ void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo)
 		  dev_priv->traces_state);
 
 	mutex_unlock(&dev_priv->hw_mutex);
-	vmw_fence_queue_takedown(&fifo->fence_queue);
-
-	if (likely(fifo->last_buffer != NULL)) {
-		vfree(fifo->last_buffer);
-		fifo->last_buffer = NULL;
-	}
+	vmw_marker_queue_takedown(&fifo->marker_queue);
 
 	if (likely(fifo->static_buffer != NULL)) {
 		vfree(fifo->static_buffer);
@@ -262,9 +247,8 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
 		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
 		outl(SVGA_IRQFLAG_FIFO_PROGRESS,
 		     dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
-		vmw_write(dev_priv, SVGA_REG_IRQMASK,
-			  vmw_read(dev_priv, SVGA_REG_IRQMASK) |
-			  SVGA_IRQFLAG_FIFO_PROGRESS);
+		dev_priv->irq_mask |= SVGA_IRQFLAG_FIFO_PROGRESS;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
 	mutex_unlock(&dev_priv->hw_mutex);
@@ -286,9 +270,8 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
 	mutex_lock(&dev_priv->hw_mutex);
 	if (atomic_dec_and_test(&dev_priv->fifo_queue_waiters)) {
 		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
-		vmw_write(dev_priv, SVGA_REG_IRQMASK,
-			  vmw_read(dev_priv, SVGA_REG_IRQMASK) &
-			  ~SVGA_IRQFLAG_FIFO_PROGRESS);
+		dev_priv->irq_mask &= ~SVGA_IRQFLAG_FIFO_PROGRESS;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
 		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
 	}
 	mutex_unlock(&dev_priv->hw_mutex);
@@ -296,6 +279,16 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
 	return ret;
 }
 
+/**
+ * Reserve @bytes number of bytes in the fifo.
+ *
+ * This function will return NULL (error) on two conditions:
+ *  If it timeouts waiting for fifo space, or if @bytes is larger than the
+ *   available fifo space.
+ *
+ * Returns:
+ *   Pointer to the fifo, or null on error (possible hardware hang).
+ */
 void *vmw_fifo_reserve(struct vmw_private *dev_priv, uint32_t bytes)
 {
 	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
@@ -466,7 +459,7 @@ void vmw_fifo_commit(struct vmw_private *dev_priv, uint32_t bytes)
 	mutex_unlock(&fifo_state->fifo_mutex);
 }
 
-int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *sequence)
+int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *seqno)
 {
 	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
 	struct svga_fifo_cmd_fence *cmd_fence;
@@ -476,16 +469,16 @@ int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *sequence)
 
 	fm = vmw_fifo_reserve(dev_priv, bytes);
 	if (unlikely(fm == NULL)) {
-		*sequence = atomic_read(&dev_priv->fence_seq);
+		*seqno = atomic_read(&dev_priv->marker_seq);
 		ret = -ENOMEM;
-		(void)vmw_fallback_wait(dev_priv, false, true, *sequence,
+		(void)vmw_fallback_wait(dev_priv, false, true, *seqno,
 					false, 3*HZ);
 		goto out_err;
 	}
 
 	do {
-		*sequence = atomic_add_return(1, &dev_priv->fence_seq);
-	} while (*sequence == 0);
+		*seqno = atomic_add_return(1, &dev_priv->marker_seq);
+	} while (*seqno == 0);
 
 	if (!(fifo_state->capabilities & SVGA_FIFO_CAP_FENCE)) {
 
@@ -502,61 +495,68 @@ int vmw_fifo_send_fence(struct vmw_private *dev_priv, uint32_t *sequence)
 	cmd_fence = (struct svga_fifo_cmd_fence *)
 	    ((unsigned long)fm + sizeof(__le32));
 
-	iowrite32(*sequence, &cmd_fence->fence);
-	fifo_state->last_buffer_add = true;
+	iowrite32(*seqno, &cmd_fence->fence);
 	vmw_fifo_commit(dev_priv, bytes);
-	fifo_state->last_buffer_add = false;
-	(void) vmw_fence_push(&fifo_state->fence_queue, *sequence);
-	vmw_update_sequence(dev_priv, fifo_state);
+	(void) vmw_marker_push(&fifo_state->marker_queue, *seqno);
+	vmw_update_seqno(dev_priv, fifo_state);
 
 out_err:
 	return ret;
 }
 
 /**
- * Map the first page of the FIFO read-only to user-space.
+ * vmw_fifo_emit_dummy_query - emits a dummy query to the fifo.
+ *
+ * @dev_priv: The device private structure.
+ * @cid: The hardware context id used for the query.
+ *
+ * This function is used to emit a dummy occlusion query with
+ * no primitives rendered between query begin and query end.
+ * It's used to provide a query barrier, in order to know that when
+ * this query is finished, all preceding queries are also finished.
+ *
+ * A Query results structure should have been initialized at the start
+ * of the dev_priv->dummy_query_bo buffer object. And that buffer object
+ * must also be either reserved or pinned when this function is called.
+ *
+ * Returns -ENOMEM on failure to reserve fifo space.
  */
-
-static int vmw_fifo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int vmw_fifo_emit_dummy_query(struct vmw_private *dev_priv,
+			      uint32_t cid)
 {
-	int ret;
-	unsigned long address = (unsigned long)vmf->virtual_address;
+	/*
+	 * A query wait without a preceding query end will
+	 * actually finish all queries for this cid
+	 * without writing to the query result structure.
+	 */
 
-	if (address != vma->vm_start)
-		return VM_FAULT_SIGBUS;
+	struct ttm_buffer_object *bo = dev_priv->dummy_query_bo;
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdWaitForQuery body;
+	} *cmd;
 
-	ret = vm_insert_pfn(vma, address, vma->vm_pgoff);
-	if (likely(ret == -EBUSY || ret == 0))
-		return VM_FAULT_NOPAGE;
-	else if (ret == -ENOMEM)
-		return VM_FAULT_OOM;
+	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
 
-	return VM_FAULT_SIGBUS;
-}
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Out of fifo space for dummy query.\n");
+		return -ENOMEM;
+	}
 
-static struct vm_operations_struct vmw_fifo_vm_ops = {
-	.fault = vmw_fifo_vm_fault,
-	.open = NULL,
-	.close = NULL
-};
+	cmd->header.id = SVGA_3D_CMD_WAIT_FOR_QUERY;
+	cmd->header.size = sizeof(cmd->body);
+	cmd->body.cid = cid;
+	cmd->body.type = SVGA3D_QUERYTYPE_OCCLUSION;
+
+	if (bo->mem.mem_type == TTM_PL_VRAM) {
+		cmd->body.guestResult.gmrId = SVGA_GMR_FRAMEBUFFER;
+		cmd->body.guestResult.offset = bo->offset;
+	} else {
+		cmd->body.guestResult.gmrId = bo->mem.start;
+		cmd->body.guestResult.offset = 0;
+	}
+
+	vmw_fifo_commit(dev_priv, sizeof(*cmd));
 
-int vmw_fifo_mmap(struct file *filp, struct vm_area_struct *vma)
-{
-	struct drm_file *file_priv;
-	struct vmw_private *dev_priv;
-
-	file_priv = filp->private_data;
-	dev_priv = vmw_priv(file_priv->minor->dev);
-
-	if (vma->vm_pgoff != (dev_priv->mmio_start >> PAGE_SHIFT) ||
-	    (vma->vm_end - vma->vm_start) != PAGE_SIZE)
-		return -EINVAL;
-
-	vma->vm_flags &= ~(VM_WRITE | VM_MAYWRITE);
-	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_SHARED;
-	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
-	vma->vm_page_prot = ttm_io_prot(TTM_PL_FLAG_UNCACHED,
-					vma->vm_page_prot);
-	vma->vm_ops = &vmw_fifo_vm_ops;
 	return 0;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c
index de0c5948521d..f4e7763a7694 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * Copyright © 2009-2011 VMware, Inc., Palo Alto, CA., USA
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -29,6 +29,77 @@
 #include "drmP.h"
 #include "ttm/ttm_bo_driver.h"
 
+#define VMW_PPN_SIZE sizeof(unsigned long)
+
+static int vmw_gmr2_bind(struct vmw_private *dev_priv,
+			 struct page *pages[],
+			 unsigned long num_pages,
+			 int gmr_id)
+{
+	SVGAFifoCmdDefineGMR2 define_cmd;
+	SVGAFifoCmdRemapGMR2 remap_cmd;
+	uint32_t define_size = sizeof(define_cmd) + 4;
+	uint32_t remap_size = VMW_PPN_SIZE * num_pages + sizeof(remap_cmd) + 4;
+	uint32_t *cmd;
+	uint32_t *cmd_orig;
+	uint32_t i;
+
+	cmd_orig = cmd = vmw_fifo_reserve(dev_priv, define_size + remap_size);
+	if (unlikely(cmd == NULL))
+		return -ENOMEM;
+
+	define_cmd.gmrId = gmr_id;
+	define_cmd.numPages = num_pages;
+
+	remap_cmd.gmrId = gmr_id;
+	remap_cmd.flags = (VMW_PPN_SIZE > sizeof(*cmd)) ?
+		SVGA_REMAP_GMR2_PPN64 : SVGA_REMAP_GMR2_PPN32;
+	remap_cmd.offsetPages = 0;
+	remap_cmd.numPages = num_pages;
+
+	*cmd++ = SVGA_CMD_DEFINE_GMR2;
+	memcpy(cmd, &define_cmd, sizeof(define_cmd));
+	cmd += sizeof(define_cmd) / sizeof(uint32);
+
+	*cmd++ = SVGA_CMD_REMAP_GMR2;
+	memcpy(cmd, &remap_cmd, sizeof(remap_cmd));
+	cmd += sizeof(remap_cmd) / sizeof(uint32);
+
+	for (i = 0; i < num_pages; ++i) {
+		if (VMW_PPN_SIZE > 4)
+			*cmd = page_to_pfn(*pages++);
+		else
+			*((uint64_t *)cmd) = page_to_pfn(*pages++);
+
+		cmd += VMW_PPN_SIZE / sizeof(*cmd);
+	}
+
+	vmw_fifo_commit(dev_priv, define_size + remap_size);
+
+	return 0;
+}
+
+static void vmw_gmr2_unbind(struct vmw_private *dev_priv,
+			    int gmr_id)
+{
+	SVGAFifoCmdDefineGMR2 define_cmd;
+	uint32_t define_size = sizeof(define_cmd) + 4;
+	uint32_t *cmd;
+
+	cmd = vmw_fifo_reserve(dev_priv, define_size);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("GMR2 unbind failed.\n");
+		return;
+	}
+	define_cmd.gmrId = gmr_id;
+	define_cmd.numPages = 0;
+
+	*cmd++ = SVGA_CMD_DEFINE_GMR2;
+	memcpy(cmd, &define_cmd, sizeof(define_cmd));
+
+	vmw_fifo_commit(dev_priv, define_size);
+}
+
 /**
  * FIXME: Adjust to the ttm lowmem / highmem storage to minimize
  * the number of used descriptors.
@@ -170,6 +241,9 @@ int vmw_gmr_bind(struct vmw_private *dev_priv,
 	struct list_head desc_pages;
 	int ret;
 
+	if (likely(dev_priv->capabilities & SVGA_CAP_GMR2))
+		return vmw_gmr2_bind(dev_priv, pages, num_pages, gmr_id);
+
 	if (unlikely(!(dev_priv->capabilities & SVGA_CAP_GMR)))
 		return -EINVAL;
 
@@ -192,6 +266,11 @@ int vmw_gmr_bind(struct vmw_private *dev_priv,
 
 void vmw_gmr_unbind(struct vmw_private *dev_priv, int gmr_id)
 {
+	if (likely(dev_priv->capabilities & SVGA_CAP_GMR2)) {
+		vmw_gmr2_unbind(dev_priv, gmr_id);
+		return;
+	}
+
 	mutex_lock(&dev_priv->hw_mutex);
 	vmw_write(dev_priv, SVGA_REG_GMR_ID, gmr_id);
 	wmb();
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
index ac6e0d1bd629..5f717152cff5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c
@@ -40,6 +40,8 @@ struct vmwgfx_gmrid_man {
 	spinlock_t lock;
 	struct ida gmr_ida;
 	uint32_t max_gmr_ids;
+	uint32_t max_gmr_pages;
+	uint32_t used_gmr_pages;
 };
 
 static int vmw_gmrid_man_get_node(struct ttm_mem_type_manager *man,
@@ -49,33 +51,50 @@ static int vmw_gmrid_man_get_node(struct ttm_mem_type_manager *man,
 {
 	struct vmwgfx_gmrid_man *gman =
 		(struct vmwgfx_gmrid_man *)man->priv;
-	int ret;
+	int ret = 0;
 	int id;
 
 	mem->mm_node = NULL;
 
-	do {
-		if (unlikely(ida_pre_get(&gman->gmr_ida, GFP_KERNEL) == 0))
-			return -ENOMEM;
+	spin_lock(&gman->lock);
+
+	if (gman->max_gmr_pages > 0) {
+		gman->used_gmr_pages += bo->num_pages;
+		if (unlikely(gman->used_gmr_pages > gman->max_gmr_pages))
+			goto out_err_locked;
+	}
 
+	do {
+		spin_unlock(&gman->lock);
+		if (unlikely(ida_pre_get(&gman->gmr_ida, GFP_KERNEL) == 0)) {
+			ret = -ENOMEM;
+			goto out_err;
+		}
 		spin_lock(&gman->lock);
-		ret = ida_get_new(&gman->gmr_ida, &id);
 
+		ret = ida_get_new(&gman->gmr_ida, &id);
 		if (unlikely(ret == 0 && id >= gman->max_gmr_ids)) {
 			ida_remove(&gman->gmr_ida, id);
-			spin_unlock(&gman->lock);
-			return 0;
+			ret = 0;
+			goto out_err_locked;
 		}
-
-		spin_unlock(&gman->lock);
-
 	} while (ret == -EAGAIN);
 
 	if (likely(ret == 0)) {
 		mem->mm_node = gman;
 		mem->start = id;
-	}
+		mem->num_pages = bo->num_pages;
+	} else
+		goto out_err_locked;
+
+	spin_unlock(&gman->lock);
+	return 0;
 
+out_err:
+	spin_lock(&gman->lock);
+out_err_locked:
+	gman->used_gmr_pages -= bo->num_pages;
+	spin_unlock(&gman->lock);
 	return ret;
 }
 
@@ -88,6 +107,7 @@ static void vmw_gmrid_man_put_node(struct ttm_mem_type_manager *man,
 	if (mem->mm_node) {
 		spin_lock(&gman->lock);
 		ida_remove(&gman->gmr_ida, mem->start);
+		gman->used_gmr_pages -= mem->num_pages;
 		spin_unlock(&gman->lock);
 		mem->mm_node = NULL;
 	}
@@ -96,6 +116,8 @@ static void vmw_gmrid_man_put_node(struct ttm_mem_type_manager *man,
 static int vmw_gmrid_man_init(struct ttm_mem_type_manager *man,
 			      unsigned long p_size)
 {
+	struct vmw_private *dev_priv =
+		container_of(man->bdev, struct vmw_private, bdev);
 	struct vmwgfx_gmrid_man *gman =
 		kzalloc(sizeof(*gman), GFP_KERNEL);
 
@@ -103,6 +125,8 @@ static int vmw_gmrid_man_init(struct ttm_mem_type_manager *man,
 		return -ENOMEM;
 
 	spin_lock_init(&gman->lock);
+	gman->max_gmr_pages = dev_priv->max_gmr_pages;
+	gman->used_gmr_pages = 0;
 	ida_init(&gman->gmr_ida);
 	gman->max_gmr_ids = p_size;
 	man->priv = (void *) gman;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 570d57775a58..3f6343502d1f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -27,6 +27,7 @@
 
 #include "vmwgfx_drv.h"
 #include "vmwgfx_drm.h"
+#include "vmwgfx_kms.h"
 
 int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv)
@@ -45,9 +46,6 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 	case DRM_VMW_PARAM_3D:
 		param->value = vmw_fifo_have_3d(dev_priv) ? 1 : 0;
 		break;
-	case DRM_VMW_PARAM_FIFO_OFFSET:
-		param->value = dev_priv->mmio_start;
-		break;
 	case DRM_VMW_PARAM_HW_CAPS:
 		param->value = dev_priv->capabilities;
 		break;
@@ -57,6 +55,13 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 	case DRM_VMW_PARAM_MAX_FB_SIZE:
 		param->value = dev_priv->vram_size;
 		break;
+	case DRM_VMW_PARAM_FIFO_HW_VERSION:
+	{
+		__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+
+		param->value = ioread32(fifo_mem + SVGA_FIFO_3D_HWVERSION);
+		break;
+	}
 	default:
 		DRM_ERROR("Illegal vmwgfx get param request: %d\n",
 			  param->param);
@@ -66,25 +71,259 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
-int vmw_fifo_debug_ioctl(struct drm_device *dev, void *data,
+
+int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv)
 {
+	struct drm_vmw_get_3d_cap_arg *arg =
+		(struct drm_vmw_get_3d_cap_arg *) data;
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_fifo_state *fifo_state = &dev_priv->fifo;
-	struct drm_vmw_fifo_debug_arg *arg =
-	    (struct drm_vmw_fifo_debug_arg *)data;
-	__le32 __user *buffer = (__le32 __user *)
-	    (unsigned long)arg->debug_buffer;
+	uint32_t size;
+	__le32 __iomem *fifo_mem;
+	void __user *buffer = (void __user *)((unsigned long)(arg->buffer));
+	void *bounce;
+	int ret;
 
-	if (unlikely(fifo_state->last_buffer == NULL))
+	if (unlikely(arg->pad64 != 0)) {
+		DRM_ERROR("Illegal GET_3D_CAP argument.\n");
 		return -EINVAL;
+	}
+
+	size = (SVGA_FIFO_3D_CAPS_LAST - SVGA_FIFO_3D_CAPS + 1) << 2;
+
+	if (arg->max_size < size)
+		size = arg->max_size;
+
+	bounce = vmalloc(size);
+	if (unlikely(bounce == NULL)) {
+		DRM_ERROR("Failed to allocate bounce buffer for 3D caps.\n");
+		return -ENOMEM;
+	}
+
+	fifo_mem = dev_priv->mmio_virt;
+	memcpy_fromio(bounce, &fifo_mem[SVGA_FIFO_3D_CAPS], size);
+
+	ret = copy_to_user(buffer, bounce, size);
+	vfree(bounce);
+
+	if (unlikely(ret != 0))
+		DRM_ERROR("Failed to report 3D caps info.\n");
+
+	return ret;
+}
+
+int vmw_present_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv)
+{
+	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_vmw_present_arg *arg =
+		(struct drm_vmw_present_arg *)data;
+	struct vmw_surface *surface;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	struct drm_vmw_rect __user *clips_ptr;
+	struct drm_vmw_rect *clips = NULL;
+	struct drm_mode_object *obj;
+	struct vmw_framebuffer *vfb;
+	uint32_t num_clips;
+	int ret;
+
+	num_clips = arg->num_clips;
+	clips_ptr = (struct drm_vmw_rect *)(unsigned long)arg->clips_ptr;
+
+	if (unlikely(num_clips == 0))
+		return 0;
+
+	if (clips_ptr == NULL) {
+		DRM_ERROR("Variable clips_ptr must be specified.\n");
+		ret = -EINVAL;
+		goto out_clips;
+	}
+
+	clips = kzalloc(num_clips * sizeof(*clips), GFP_KERNEL);
+	if (clips == NULL) {
+		DRM_ERROR("Failed to allocate clip rect list.\n");
+		ret = -ENOMEM;
+		goto out_clips;
+	}
+
+	ret = copy_from_user(clips, clips_ptr, num_clips * sizeof(*clips));
+	if (ret) {
+		DRM_ERROR("Failed to copy clip rects from userspace.\n");
+		ret = -EFAULT;
+		goto out_no_copy;
+	}
+
+	ret = mutex_lock_interruptible(&dev->mode_config.mutex);
+	if (unlikely(ret != 0)) {
+		ret = -ERESTARTSYS;
+		goto out_no_mode_mutex;
+	}
+
+	obj = drm_mode_object_find(dev, arg->fb_id, DRM_MODE_OBJECT_FB);
+	if (!obj) {
+		DRM_ERROR("Invalid framebuffer id.\n");
+		ret = -EINVAL;
+		goto out_no_fb;
+	}
+
+	vfb = vmw_framebuffer_to_vfb(obj_to_fb(obj));
+	if (!vfb->dmabuf) {
+		DRM_ERROR("Framebuffer not dmabuf backed.\n");
+		ret = -EINVAL;
+		goto out_no_fb;
+	}
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		goto out_no_ttm_lock;
+
+	ret = vmw_user_surface_lookup_handle(dev_priv, tfile, arg->sid,
+					     &surface);
+	if (ret)
+		goto out_no_surface;
+
+	ret = vmw_kms_present(dev_priv, file_priv,
+			      vfb, surface, arg->sid,
+			      arg->dest_x, arg->dest_y,
+			      clips, num_clips);
 
-	if (arg->debug_buffer_size < fifo_state->last_data_size) {
-		arg->used_size = arg->debug_buffer_size;
-		arg->did_not_fit = 1;
-	} else {
-		arg->used_size = fifo_state->last_data_size;
-		arg->did_not_fit = 0;
+	/* vmw_user_surface_lookup takes one ref so does new_fb */
+	vmw_surface_unreference(&surface);
+
+out_no_surface:
+	ttm_read_unlock(&vmaster->lock);
+out_no_ttm_lock:
+out_no_fb:
+	mutex_unlock(&dev->mode_config.mutex);
+out_no_mode_mutex:
+out_no_copy:
+	kfree(clips);
+out_clips:
+	return ret;
+}
+
+int vmw_present_readback_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv)
+{
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_vmw_present_readback_arg *arg =
+		(struct drm_vmw_present_readback_arg *)data;
+	struct drm_vmw_fence_rep __user *user_fence_rep =
+		(struct drm_vmw_fence_rep __user *)
+		(unsigned long)arg->fence_rep;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	struct drm_vmw_rect __user *clips_ptr;
+	struct drm_vmw_rect *clips = NULL;
+	struct drm_mode_object *obj;
+	struct vmw_framebuffer *vfb;
+	uint32_t num_clips;
+	int ret;
+
+	num_clips = arg->num_clips;
+	clips_ptr = (struct drm_vmw_rect *)(unsigned long)arg->clips_ptr;
+
+	if (unlikely(num_clips == 0))
+		return 0;
+
+	if (clips_ptr == NULL) {
+		DRM_ERROR("Argument clips_ptr must be specified.\n");
+		ret = -EINVAL;
+		goto out_clips;
+	}
+
+	clips = kzalloc(num_clips * sizeof(*clips), GFP_KERNEL);
+	if (clips == NULL) {
+		DRM_ERROR("Failed to allocate clip rect list.\n");
+		ret = -ENOMEM;
+		goto out_clips;
+	}
+
+	ret = copy_from_user(clips, clips_ptr, num_clips * sizeof(*clips));
+	if (ret) {
+		DRM_ERROR("Failed to copy clip rects from userspace.\n");
+		ret = -EFAULT;
+		goto out_no_copy;
+	}
+
+	ret = mutex_lock_interruptible(&dev->mode_config.mutex);
+	if (unlikely(ret != 0)) {
+		ret = -ERESTARTSYS;
+		goto out_no_mode_mutex;
+	}
+
+	obj = drm_mode_object_find(dev, arg->fb_id, DRM_MODE_OBJECT_FB);
+	if (!obj) {
+		DRM_ERROR("Invalid framebuffer id.\n");
+		ret = -EINVAL;
+		goto out_no_fb;
+	}
+
+	vfb = vmw_framebuffer_to_vfb(obj_to_fb(obj));
+	if (!vfb->dmabuf) {
+		DRM_ERROR("Framebuffer not dmabuf backed.\n");
+		ret = -EINVAL;
+		goto out_no_fb;
 	}
-	return copy_to_user(buffer, fifo_state->last_buffer, arg->used_size);
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		goto out_no_ttm_lock;
+
+	ret = vmw_kms_readback(dev_priv, file_priv,
+			       vfb, user_fence_rep,
+			       clips, num_clips);
+
+	ttm_read_unlock(&vmaster->lock);
+out_no_ttm_lock:
+out_no_fb:
+	mutex_unlock(&dev->mode_config.mutex);
+out_no_mode_mutex:
+out_no_copy:
+	kfree(clips);
+out_clips:
+	return ret;
+}
+
+
+/**
+ * vmw_fops_poll - wrapper around the drm_poll function
+ *
+ * @filp: See the linux fops poll documentation.
+ * @wait: See the linux fops poll documentation.
+ *
+ * Wrapper around the drm_poll function that makes sure the device is
+ * processing the fifo if drm_poll decides to wait.
+ */
+unsigned int vmw_fops_poll(struct file *filp, struct poll_table_struct *wait)
+{
+	struct drm_file *file_priv = filp->private_data;
+	struct vmw_private *dev_priv =
+		vmw_priv(file_priv->minor->dev);
+
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+	return drm_poll(filp, wait);
+}
+
+
+/**
+ * vmw_fops_read - wrapper around the drm_read function
+ *
+ * @filp: See the linux fops read documentation.
+ * @buffer: See the linux fops read documentation.
+ * @count: See the linux fops read documentation.
+ * offset: See the linux fops read documentation.
+ *
+ * Wrapper around the drm_read function that makes sure the device is
+ * processing the fifo if drm_read decides to wait.
+ */
+ssize_t vmw_fops_read(struct file *filp, char __user *buffer,
+		      size_t count, loff_t *offset)
+{
+	struct drm_file *file_priv = filp->private_data;
+	struct vmw_private *dev_priv =
+		vmw_priv(file_priv->minor->dev);
+
+	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+	return drm_read(filp, buffer, count, offset);
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index e92298a6a383..cabc95f7517e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -34,26 +34,33 @@ irqreturn_t vmw_irq_handler(DRM_IRQ_ARGS)
 {
 	struct drm_device *dev = (struct drm_device *)arg;
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	uint32_t status;
+	uint32_t status, masked_status;
 
 	spin_lock(&dev_priv->irq_lock);
 	status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+	masked_status = status & dev_priv->irq_mask;
 	spin_unlock(&dev_priv->irq_lock);
 
-	if (status & SVGA_IRQFLAG_ANY_FENCE)
+	if (likely(status))
+		outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+
+	if (!masked_status)
+		return IRQ_NONE;
+
+	if (masked_status & (SVGA_IRQFLAG_ANY_FENCE |
+			     SVGA_IRQFLAG_FENCE_GOAL)) {
+		vmw_fences_update(dev_priv->fman);
 		wake_up_all(&dev_priv->fence_queue);
-	if (status & SVGA_IRQFLAG_FIFO_PROGRESS)
+	}
+
+	if (masked_status & SVGA_IRQFLAG_FIFO_PROGRESS)
 		wake_up_all(&dev_priv->fifo_queue);
 
-	if (likely(status)) {
-		outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
-		return IRQ_HANDLED;
-	}
 
-	return IRQ_NONE;
+	return IRQ_HANDLED;
 }
 
-static bool vmw_fifo_idle(struct vmw_private *dev_priv, uint32_t sequence)
+static bool vmw_fifo_idle(struct vmw_private *dev_priv, uint32_t seqno)
 {
 	uint32_t busy;
 
@@ -64,43 +71,43 @@ static bool vmw_fifo_idle(struct vmw_private *dev_priv, uint32_t sequence)
 	return (busy == 0);
 }
 
-void vmw_update_sequence(struct vmw_private *dev_priv,
+void vmw_update_seqno(struct vmw_private *dev_priv,
 			 struct vmw_fifo_state *fifo_state)
 {
 	__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
+	uint32_t seqno = ioread32(fifo_mem + SVGA_FIFO_FENCE);
 
-	uint32_t sequence = ioread32(fifo_mem + SVGA_FIFO_FENCE);
-
-	if (dev_priv->last_read_sequence != sequence) {
-		dev_priv->last_read_sequence = sequence;
-		vmw_fence_pull(&fifo_state->fence_queue, sequence);
+	if (dev_priv->last_read_seqno != seqno) {
+		dev_priv->last_read_seqno = seqno;
+		vmw_marker_pull(&fifo_state->marker_queue, seqno);
+		vmw_fences_update(dev_priv->fman);
 	}
 }
 
-bool vmw_fence_signaled(struct vmw_private *dev_priv,
-			uint32_t sequence)
+bool vmw_seqno_passed(struct vmw_private *dev_priv,
+			 uint32_t seqno)
 {
 	struct vmw_fifo_state *fifo_state;
 	bool ret;
 
-	if (likely(dev_priv->last_read_sequence - sequence < VMW_FENCE_WRAP))
+	if (likely(dev_priv->last_read_seqno - seqno < VMW_FENCE_WRAP))
 		return true;
 
 	fifo_state = &dev_priv->fifo;
-	vmw_update_sequence(dev_priv, fifo_state);
-	if (likely(dev_priv->last_read_sequence - sequence < VMW_FENCE_WRAP))
+	vmw_update_seqno(dev_priv, fifo_state);
+	if (likely(dev_priv->last_read_seqno - seqno < VMW_FENCE_WRAP))
 		return true;
 
 	if (!(fifo_state->capabilities & SVGA_FIFO_CAP_FENCE) &&
-	    vmw_fifo_idle(dev_priv, sequence))
+	    vmw_fifo_idle(dev_priv, seqno))
 		return true;
 
 	/**
-	 * Then check if the sequence is higher than what we've actually
+	 * Then check if the seqno is higher than what we've actually
 	 * emitted. Then the fence is stale and signaled.
 	 */
 
-	ret = ((atomic_read(&dev_priv->fence_seq) - sequence)
+	ret = ((atomic_read(&dev_priv->marker_seq) - seqno)
 	       > VMW_FENCE_WRAP);
 
 	return ret;
@@ -109,7 +116,7 @@ bool vmw_fence_signaled(struct vmw_private *dev_priv,
 int vmw_fallback_wait(struct vmw_private *dev_priv,
 		      bool lazy,
 		      bool fifo_idle,
-		      uint32_t sequence,
+		      uint32_t seqno,
 		      bool interruptible,
 		      unsigned long timeout)
 {
@@ -123,7 +130,7 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
 	DEFINE_WAIT(__wait);
 
 	wait_condition = (fifo_idle) ? &vmw_fifo_idle :
-		&vmw_fence_signaled;
+		&vmw_seqno_passed;
 
 	/**
 	 * Block command submission while waiting for idle.
@@ -131,14 +138,14 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
 
 	if (fifo_idle)
 		down_read(&fifo_state->rwsem);
-	signal_seq = atomic_read(&dev_priv->fence_seq);
+	signal_seq = atomic_read(&dev_priv->marker_seq);
 	ret = 0;
 
 	for (;;) {
 		prepare_to_wait(&dev_priv->fence_queue, &__wait,
 				(interruptible) ?
 				TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
-		if (wait_condition(dev_priv, sequence))
+		if (wait_condition(dev_priv, seqno))
 			break;
 		if (time_after_eq(jiffies, end_jiffies)) {
 			DRM_ERROR("SVGA device lockup.\n");
@@ -175,68 +182,110 @@ int vmw_fallback_wait(struct vmw_private *dev_priv,
 	return ret;
 }
 
-int vmw_wait_fence(struct vmw_private *dev_priv,
-		   bool lazy, uint32_t sequence,
-		   bool interruptible, unsigned long timeout)
+void vmw_seqno_waiter_add(struct vmw_private *dev_priv)
+{
+	mutex_lock(&dev_priv->hw_mutex);
+	if (dev_priv->fence_queue_waiters++ == 0) {
+		unsigned long irq_flags;
+
+		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
+		outl(SVGA_IRQFLAG_ANY_FENCE,
+		     dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+		dev_priv->irq_mask |= SVGA_IRQFLAG_ANY_FENCE;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
+		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
+	}
+	mutex_unlock(&dev_priv->hw_mutex);
+}
+
+void vmw_seqno_waiter_remove(struct vmw_private *dev_priv)
+{
+	mutex_lock(&dev_priv->hw_mutex);
+	if (--dev_priv->fence_queue_waiters == 0) {
+		unsigned long irq_flags;
+
+		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
+		dev_priv->irq_mask &= ~SVGA_IRQFLAG_ANY_FENCE;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
+		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
+	}
+	mutex_unlock(&dev_priv->hw_mutex);
+}
+
+
+void vmw_goal_waiter_add(struct vmw_private *dev_priv)
+{
+	mutex_lock(&dev_priv->hw_mutex);
+	if (dev_priv->goal_queue_waiters++ == 0) {
+		unsigned long irq_flags;
+
+		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
+		outl(SVGA_IRQFLAG_FENCE_GOAL,
+		     dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
+		dev_priv->irq_mask |= SVGA_IRQFLAG_FENCE_GOAL;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
+		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
+	}
+	mutex_unlock(&dev_priv->hw_mutex);
+}
+
+void vmw_goal_waiter_remove(struct vmw_private *dev_priv)
+{
+	mutex_lock(&dev_priv->hw_mutex);
+	if (--dev_priv->goal_queue_waiters == 0) {
+		unsigned long irq_flags;
+
+		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
+		dev_priv->irq_mask &= ~SVGA_IRQFLAG_FENCE_GOAL;
+		vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask);
+		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
+	}
+	mutex_unlock(&dev_priv->hw_mutex);
+}
+
+int vmw_wait_seqno(struct vmw_private *dev_priv,
+		      bool lazy, uint32_t seqno,
+		      bool interruptible, unsigned long timeout)
 {
 	long ret;
-	unsigned long irq_flags;
 	struct vmw_fifo_state *fifo = &dev_priv->fifo;
 
-	if (likely(dev_priv->last_read_sequence - sequence < VMW_FENCE_WRAP))
+	if (likely(dev_priv->last_read_seqno - seqno < VMW_FENCE_WRAP))
 		return 0;
 
-	if (likely(vmw_fence_signaled(dev_priv, sequence)))
+	if (likely(vmw_seqno_passed(dev_priv, seqno)))
 		return 0;
 
 	vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
 
 	if (!(fifo->capabilities & SVGA_FIFO_CAP_FENCE))
-		return vmw_fallback_wait(dev_priv, lazy, true, sequence,
+		return vmw_fallback_wait(dev_priv, lazy, true, seqno,
 					 interruptible, timeout);
 
 	if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK))
-		return vmw_fallback_wait(dev_priv, lazy, false, sequence,
+		return vmw_fallback_wait(dev_priv, lazy, false, seqno,
 					 interruptible, timeout);
 
-	mutex_lock(&dev_priv->hw_mutex);
-	if (atomic_add_return(1, &dev_priv->fence_queue_waiters) > 0) {
-		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
-		outl(SVGA_IRQFLAG_ANY_FENCE,
-		     dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
-		vmw_write(dev_priv, SVGA_REG_IRQMASK,
-			  vmw_read(dev_priv, SVGA_REG_IRQMASK) |
-			  SVGA_IRQFLAG_ANY_FENCE);
-		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
-	}
-	mutex_unlock(&dev_priv->hw_mutex);
+	vmw_seqno_waiter_add(dev_priv);
 
 	if (interruptible)
 		ret = wait_event_interruptible_timeout
 		    (dev_priv->fence_queue,
-		     vmw_fence_signaled(dev_priv, sequence),
+		     vmw_seqno_passed(dev_priv, seqno),
 		     timeout);
 	else
 		ret = wait_event_timeout
 		    (dev_priv->fence_queue,
-		     vmw_fence_signaled(dev_priv, sequence),
+		     vmw_seqno_passed(dev_priv, seqno),
 		     timeout);
 
+	vmw_seqno_waiter_remove(dev_priv);
+
 	if (unlikely(ret == 0))
 		ret = -EBUSY;
 	else if (likely(ret > 0))
 		ret = 0;
 
-	mutex_lock(&dev_priv->hw_mutex);
-	if (atomic_dec_and_test(&dev_priv->fence_queue_waiters)) {
-		spin_lock_irqsave(&dev_priv->irq_lock, irq_flags);
-		vmw_write(dev_priv, SVGA_REG_IRQMASK,
-			  vmw_read(dev_priv, SVGA_REG_IRQMASK) &
-			  ~SVGA_IRQFLAG_ANY_FENCE);
-		spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags);
-	}
-	mutex_unlock(&dev_priv->hw_mutex);
-
 	return ret;
 }
 
@@ -273,25 +322,3 @@ void vmw_irq_uninstall(struct drm_device *dev)
 	status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
 	outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
 }
-
-#define VMW_FENCE_WAIT_TIMEOUT 3*HZ;
-
-int vmw_fence_wait_ioctl(struct drm_device *dev, void *data,
-			 struct drm_file *file_priv)
-{
-	struct drm_vmw_fence_wait_arg *arg =
-	    (struct drm_vmw_fence_wait_arg *)data;
-	unsigned long timeout;
-
-	if (!arg->cookie_valid) {
-		arg->cookie_valid = 1;
-		arg->kernel_cookie = jiffies + VMW_FENCE_WAIT_TIMEOUT;
-	}
-
-	timeout = jiffies;
-	if (time_after_eq(timeout, (unsigned long)arg->kernel_cookie))
-		return -EBUSY;
-
-	timeout = (unsigned long)arg->kernel_cookie - timeout;
-	return vmw_wait_fence(vmw_priv(dev), true, arg->sequence, true, timeout);
-}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index dfe32e62bd90..8b14dfd513a1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -27,12 +27,10 @@
 
 #include "vmwgfx_kms.h"
 
+
 /* Might need a hrtimer here? */
 #define VMWGFX_PRESENT_RATE ((HZ / 60 > 0) ? HZ / 60 : 1)
 
-static int vmw_surface_dmabuf_pin(struct vmw_framebuffer *vfb);
-static int vmw_surface_dmabuf_unpin(struct vmw_framebuffer *vfb);
-
 void vmw_display_unit_cleanup(struct vmw_display_unit *du)
 {
 	if (du->cursor_surface)
@@ -329,41 +327,10 @@ struct vmw_framebuffer_surface {
 	struct vmw_framebuffer base;
 	struct vmw_surface *surface;
 	struct vmw_dma_buffer *buffer;
-	struct delayed_work d_work;
-	struct mutex work_lock;
-	bool present_fs;
 	struct list_head head;
 	struct drm_master *master;
 };
 
-/**
- * vmw_kms_idle_workqueues - Flush workqueues on this master
- *
- * @vmaster - Pointer identifying the master, for the surfaces of which
- * we idle the dirty work queues.
- *
- * This function should be called with the ttm lock held in exclusive mode
- * to idle all dirty work queues before the fifo is taken down.
- *
- * The work task may actually requeue itself, but after the flush returns we're
- * sure that there's nothing to present, since the ttm lock is held in
- * exclusive mode, so the fifo will never get used.
- */
-
-void vmw_kms_idle_workqueues(struct vmw_master *vmaster)
-{
-	struct vmw_framebuffer_surface *entry;
-
-	mutex_lock(&vmaster->fb_surf_mutex);
-	list_for_each_entry(entry, &vmaster->fb_surf, head) {
-		if (cancel_delayed_work_sync(&entry->d_work))
-			(void) entry->d_work.work.func(&entry->d_work.work);
-
-		(void) cancel_delayed_work_sync(&entry->d_work);
-	}
-	mutex_unlock(&vmaster->fb_surf_mutex);
-}
-
 void vmw_framebuffer_surface_destroy(struct drm_framebuffer *framebuffer)
 {
 	struct vmw_framebuffer_surface *vfbs =
@@ -375,64 +342,127 @@ void vmw_framebuffer_surface_destroy(struct drm_framebuffer *framebuffer)
 	list_del(&vfbs->head);
 	mutex_unlock(&vmaster->fb_surf_mutex);
 
-	cancel_delayed_work_sync(&vfbs->d_work);
 	drm_master_put(&vfbs->master);
 	drm_framebuffer_cleanup(framebuffer);
 	vmw_surface_unreference(&vfbs->surface);
+	ttm_base_object_unref(&vfbs->base.user_obj);
 
 	kfree(vfbs);
 }
 
-static void vmw_framebuffer_present_fs_callback(struct work_struct *work)
+static int do_surface_dirty_sou(struct vmw_private *dev_priv,
+				struct drm_file *file_priv,
+				struct vmw_framebuffer *framebuffer,
+				unsigned flags, unsigned color,
+				struct drm_clip_rect *clips,
+				unsigned num_clips, int inc)
 {
-	struct delayed_work *d_work =
-		container_of(work, struct delayed_work, work);
-	struct vmw_framebuffer_surface *vfbs =
-		container_of(d_work, struct vmw_framebuffer_surface, d_work);
-	struct vmw_surface *surf = vfbs->surface;
-	struct drm_framebuffer *framebuffer = &vfbs->base.base;
-	struct vmw_private *dev_priv = vmw_priv(framebuffer->dev);
+	struct drm_clip_rect *clips_ptr;
+	struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS];
+	struct drm_crtc *crtc;
+	size_t fifo_size;
+	int i, num_units;
+	int ret = 0; /* silence warning */
+	int left, right, top, bottom;
 
 	struct {
 		SVGA3dCmdHeader header;
-		SVGA3dCmdPresent body;
-		SVGA3dCopyRect cr;
+		SVGA3dCmdBlitSurfaceToScreen body;
 	} *cmd;
+	SVGASignedRect *blits;
 
-	/**
-	 * Strictly we should take the ttm_lock in read mode before accessing
-	 * the fifo, to make sure the fifo is present and up. However,
-	 * instead we flush all workqueues under the ttm lock in exclusive mode
-	 * before taking down the fifo.
-	 */
-	mutex_lock(&vfbs->work_lock);
-	if (!vfbs->present_fs)
-		goto out_unlock;
-
-	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
-	if (unlikely(cmd == NULL))
-		goto out_resched;
-
-	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_PRESENT);
-	cmd->header.size = cpu_to_le32(sizeof(cmd->body) + sizeof(cmd->cr));
-	cmd->body.sid = cpu_to_le32(surf->res.id);
-	cmd->cr.x = cpu_to_le32(0);
-	cmd->cr.y = cpu_to_le32(0);
-	cmd->cr.srcx = cmd->cr.x;
-	cmd->cr.srcy = cmd->cr.y;
-	cmd->cr.w = cpu_to_le32(framebuffer->width);
-	cmd->cr.h = cpu_to_le32(framebuffer->height);
-	vfbs->present_fs = false;
-	vmw_fifo_commit(dev_priv, sizeof(*cmd));
-out_resched:
-	/**
-	 * Will not re-add if already pending.
-	 */
-	schedule_delayed_work(&vfbs->d_work, VMWGFX_PRESENT_RATE);
-out_unlock:
-	mutex_unlock(&vfbs->work_lock);
-}
 
+	num_units = 0;
+	list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list,
+			    head) {
+		if (crtc->fb != &framebuffer->base)
+			continue;
+		units[num_units++] = vmw_crtc_to_du(crtc);
+	}
+
+	BUG_ON(!clips || !num_clips);
+
+	fifo_size = sizeof(*cmd) + sizeof(SVGASignedRect) * num_clips;
+	cmd = kzalloc(fifo_size, GFP_KERNEL);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Temporary fifo memory alloc failed.\n");
+		return -ENOMEM;
+	}
+
+	left = clips->x1;
+	right = clips->x2;
+	top = clips->y1;
+	bottom = clips->y2;
+
+	clips_ptr = clips;
+	for (i = 1; i < num_clips; i++, clips_ptr += inc) {
+		left = min_t(int, left, (int)clips_ptr->x1);
+		right = max_t(int, right, (int)clips_ptr->x2);
+		top = min_t(int, top, (int)clips_ptr->y1);
+		bottom = max_t(int, bottom, (int)clips_ptr->y2);
+	}
+
+	/* only need to do this once */
+	memset(cmd, 0, fifo_size);
+	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN);
+	cmd->header.size = cpu_to_le32(fifo_size - sizeof(cmd->header));
+
+	cmd->body.srcRect.left = left;
+	cmd->body.srcRect.right = right;
+	cmd->body.srcRect.top = top;
+	cmd->body.srcRect.bottom = bottom;
+
+	clips_ptr = clips;
+	blits = (SVGASignedRect *)&cmd[1];
+	for (i = 0; i < num_clips; i++, clips_ptr += inc) {
+		blits[i].left   = clips_ptr->x1 - left;
+		blits[i].right  = clips_ptr->x2 - left;
+		blits[i].top    = clips_ptr->y1 - top;
+		blits[i].bottom = clips_ptr->y2 - top;
+	}
+
+	/* do per unit writing, reuse fifo for each */
+	for (i = 0; i < num_units; i++) {
+		struct vmw_display_unit *unit = units[i];
+		int clip_x1 = left - unit->crtc.x;
+		int clip_y1 = top - unit->crtc.y;
+		int clip_x2 = right - unit->crtc.x;
+		int clip_y2 = bottom - unit->crtc.y;
+
+		/* skip any crtcs that misses the clip region */
+		if (clip_x1 >= unit->crtc.mode.hdisplay ||
+		    clip_y1 >= unit->crtc.mode.vdisplay ||
+		    clip_x2 <= 0 || clip_y2 <= 0)
+			continue;
+
+		/* need to reset sid as it is changed by execbuf */
+		cmd->body.srcImage.sid = cpu_to_le32(framebuffer->user_handle);
+
+		cmd->body.destScreenId = unit->unit;
+
+		/*
+		 * The blit command is a lot more resilient then the
+		 * readback command when it comes to clip rects. So its
+		 * okay to go out of bounds.
+		 */
+
+		cmd->body.destRect.left = clip_x1;
+		cmd->body.destRect.right = clip_x2;
+		cmd->body.destRect.top = clip_y1;
+		cmd->body.destRect.bottom = clip_y2;
+
+
+		ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd,
+					  fifo_size, 0, NULL);
+
+		if (unlikely(ret != 0))
+			break;
+	}
+
+	kfree(cmd);
+
+	return ret;
+}
 
 int vmw_framebuffer_surface_dirty(struct drm_framebuffer *framebuffer,
 				  struct drm_file *file_priv,
@@ -444,44 +474,20 @@ int vmw_framebuffer_surface_dirty(struct drm_framebuffer *framebuffer,
 	struct vmw_master *vmaster = vmw_master(file_priv->master);
 	struct vmw_framebuffer_surface *vfbs =
 		vmw_framebuffer_to_vfbs(framebuffer);
-	struct vmw_surface *surf = vfbs->surface;
 	struct drm_clip_rect norect;
-	SVGA3dCopyRect *cr;
-	int i, inc = 1;
-	int ret;
-
-	struct {
-		SVGA3dCmdHeader header;
-		SVGA3dCmdPresent body;
-		SVGA3dCopyRect cr;
-	} *cmd;
+	int ret, inc = 1;
 
 	if (unlikely(vfbs->master != file_priv->master))
 		return -EINVAL;
 
+	/* Require ScreenObject support for 3D */
+	if (!dev_priv->sou_priv)
+		return -EINVAL;
+
 	ret = ttm_read_lock(&vmaster->lock, true);
 	if (unlikely(ret != 0))
 		return ret;
 
-	if (!num_clips ||
-	    !(dev_priv->fifo.capabilities &
-	      SVGA_FIFO_CAP_SCREEN_OBJECT)) {
-		int ret;
-
-		mutex_lock(&vfbs->work_lock);
-		vfbs->present_fs = true;
-		ret = schedule_delayed_work(&vfbs->d_work, VMWGFX_PRESENT_RATE);
-		mutex_unlock(&vfbs->work_lock);
-		if (ret) {
-			/**
-			 * No work pending, Force immediate present.
-			 */
-			vmw_framebuffer_present_fs_callback(&vfbs->d_work.work);
-		}
-		ttm_read_unlock(&vmaster->lock);
-		return 0;
-	}
-
 	if (!num_clips) {
 		num_clips = 1;
 		clips = &norect;
@@ -493,29 +499,10 @@ int vmw_framebuffer_surface_dirty(struct drm_framebuffer *framebuffer,
 		inc = 2; /* skip source rects */
 	}
 
-	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd) + (num_clips - 1) * sizeof(cmd->cr));
-	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Fifo reserve failed.\n");
-		ttm_read_unlock(&vmaster->lock);
-		return -ENOMEM;
-	}
-
-	memset(cmd, 0, sizeof(*cmd));
-
-	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_PRESENT);
-	cmd->header.size = cpu_to_le32(sizeof(cmd->body) + num_clips * sizeof(cmd->cr));
-	cmd->body.sid = cpu_to_le32(surf->res.id);
-
-	for (i = 0, cr = &cmd->cr; i < num_clips; i++, cr++, clips += inc) {
-		cr->x = cpu_to_le16(clips->x1);
-		cr->y = cpu_to_le16(clips->y1);
-		cr->srcx = cr->x;
-		cr->srcy = cr->y;
-		cr->w = cpu_to_le16(clips->x2 - clips->x1);
-		cr->h = cpu_to_le16(clips->y2 - clips->y1);
-	}
+	ret = do_surface_dirty_sou(dev_priv, file_priv, &vfbs->base,
+				   flags, color,
+				   clips, num_clips, inc);
 
-	vmw_fifo_commit(dev_priv, sizeof(*cmd) + (num_clips - 1) * sizeof(cmd->cr));
 	ttm_read_unlock(&vmaster->lock);
 	return 0;
 }
@@ -540,6 +527,10 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
 	struct vmw_master *vmaster = vmw_master(file_priv->master);
 	int ret;
 
+	/* 3D is only supported on HWv8 hosts which supports screen objects */
+	if (!dev_priv->sou_priv)
+		return -ENOSYS;
+
 	/*
 	 * Sanity checks.
 	 */
@@ -567,6 +558,9 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
 	case 15:
 		format = SVGA3D_A1R5G5B5;
 		break;
+	case 8:
+		format = SVGA3D_LUMINANCE8;
+		break;
 	default:
 		DRM_ERROR("Invalid color depth: %d\n", mode_cmd->depth);
 		return -EINVAL;
@@ -599,14 +593,11 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv,
 	vfbs->base.base.depth = mode_cmd->depth;
 	vfbs->base.base.width = mode_cmd->width;
 	vfbs->base.base.height = mode_cmd->height;
-	vfbs->base.pin = &vmw_surface_dmabuf_pin;
-	vfbs->base.unpin = &vmw_surface_dmabuf_unpin;
 	vfbs->surface = surface;
+	vfbs->base.user_handle = mode_cmd->handle;
 	vfbs->master = drm_master_get(file_priv->master);
-	mutex_init(&vfbs->work_lock);
 
 	mutex_lock(&vmaster->fb_surf_mutex);
-	INIT_DELAYED_WORK(&vfbs->d_work, &vmw_framebuffer_present_fs_callback);
 	list_add_tail(&vfbs->head, &vmaster->fb_surf);
 	mutex_unlock(&vmaster->fb_surf_mutex);
 
@@ -641,48 +632,33 @@ void vmw_framebuffer_dmabuf_destroy(struct drm_framebuffer *framebuffer)
 
 	drm_framebuffer_cleanup(framebuffer);
 	vmw_dmabuf_unreference(&vfbd->buffer);
+	ttm_base_object_unref(&vfbd->base.user_obj);
 
 	kfree(vfbd);
 }
 
-int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
-				 struct drm_file *file_priv,
-				 unsigned flags, unsigned color,
-				 struct drm_clip_rect *clips,
-				 unsigned num_clips)
+static int do_dmabuf_dirty_ldu(struct vmw_private *dev_priv,
+			       struct vmw_framebuffer *framebuffer,
+			       unsigned flags, unsigned color,
+			       struct drm_clip_rect *clips,
+			       unsigned num_clips, int increment)
 {
-	struct vmw_private *dev_priv = vmw_priv(framebuffer->dev);
-	struct vmw_master *vmaster = vmw_master(file_priv->master);
-	struct drm_clip_rect norect;
-	int ret;
+	size_t fifo_size;
+	int i;
+
 	struct {
 		uint32_t header;
 		SVGAFifoCmdUpdate body;
 	} *cmd;
-	int i, increment = 1;
-
-	ret = ttm_read_lock(&vmaster->lock, true);
-	if (unlikely(ret != 0))
-		return ret;
-
-	if (!num_clips) {
-		num_clips = 1;
-		clips = &norect;
-		norect.x1 = norect.y1 = 0;
-		norect.x2 = framebuffer->width;
-		norect.y2 = framebuffer->height;
-	} else if (flags & DRM_MODE_FB_DIRTY_ANNOTATE_COPY) {
-		num_clips /= 2;
-		increment = 2;
-	}
 
-	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd) * num_clips);
+	fifo_size = sizeof(*cmd) * num_clips;
+	cmd = vmw_fifo_reserve(dev_priv, fifo_size);
 	if (unlikely(cmd == NULL)) {
 		DRM_ERROR("Fifo reserve failed.\n");
-		ttm_read_unlock(&vmaster->lock);
 		return -ENOMEM;
 	}
 
+	memset(cmd, 0, fifo_size);
 	for (i = 0; i < num_clips; i++, clips += increment) {
 		cmd[i].header = cpu_to_le32(SVGA_CMD_UPDATE);
 		cmd[i].body.x = cpu_to_le32(clips->x1);
@@ -691,57 +667,186 @@ int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
 		cmd[i].body.height = cpu_to_le32(clips->y2 - clips->y1);
 	}
 
-	vmw_fifo_commit(dev_priv, sizeof(*cmd) * num_clips);
-	ttm_read_unlock(&vmaster->lock);
-
+	vmw_fifo_commit(dev_priv, fifo_size);
 	return 0;
 }
 
-static struct drm_framebuffer_funcs vmw_framebuffer_dmabuf_funcs = {
-	.destroy = vmw_framebuffer_dmabuf_destroy,
-	.dirty = vmw_framebuffer_dmabuf_dirty,
-	.create_handle = vmw_framebuffer_create_handle,
-};
-
-static int vmw_surface_dmabuf_pin(struct vmw_framebuffer *vfb)
+static int do_dmabuf_define_gmrfb(struct drm_file *file_priv,
+				  struct vmw_private *dev_priv,
+				  struct vmw_framebuffer *framebuffer)
 {
-	struct vmw_private *dev_priv = vmw_priv(vfb->base.dev);
-	struct vmw_framebuffer_surface *vfbs =
-		vmw_framebuffer_to_vfbs(&vfb->base);
-	unsigned long size = vfbs->base.base.pitch * vfbs->base.base.height;
+	int depth = framebuffer->base.depth;
+	size_t fifo_size;
 	int ret;
 
-	vfbs->buffer = kzalloc(sizeof(*vfbs->buffer), GFP_KERNEL);
-	if (unlikely(vfbs->buffer == NULL))
+	struct {
+		uint32_t header;
+		SVGAFifoCmdDefineGMRFB body;
+	} *cmd;
+
+	/* Emulate RGBA support, contrary to svga_reg.h this is not
+	 * supported by hosts. This is only a problem if we are reading
+	 * this value later and expecting what we uploaded back.
+	 */
+	if (depth == 32)
+		depth = 24;
+
+	fifo_size = sizeof(*cmd);
+	cmd = kmalloc(fifo_size, GFP_KERNEL);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed to allocate temporary cmd buffer.\n");
 		return -ENOMEM;
+	}
 
-	vmw_overlay_pause_all(dev_priv);
-	ret = vmw_dmabuf_init(dev_priv, vfbs->buffer, size,
-			       &vmw_vram_ne_placement,
-			       false, &vmw_dmabuf_bo_free);
-	vmw_overlay_resume_all(dev_priv);
+	memset(cmd, 0, fifo_size);
+	cmd->header = SVGA_CMD_DEFINE_GMRFB;
+	cmd->body.format.bitsPerPixel = framebuffer->base.bits_per_pixel;
+	cmd->body.format.colorDepth = depth;
+	cmd->body.format.reserved = 0;
+	cmd->body.bytesPerLine = framebuffer->base.pitch;
+	cmd->body.ptr.gmrId = framebuffer->user_handle;
+	cmd->body.ptr.offset = 0;
+
+	ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd,
+				  fifo_size, 0, NULL);
+
+	kfree(cmd);
+
+	return ret;
+}
+
+static int do_dmabuf_dirty_sou(struct drm_file *file_priv,
+			       struct vmw_private *dev_priv,
+			       struct vmw_framebuffer *framebuffer,
+			       unsigned flags, unsigned color,
+			       struct drm_clip_rect *clips,
+			       unsigned num_clips, int increment)
+{
+	struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS];
+	struct drm_clip_rect *clips_ptr;
+	int i, k, num_units, ret;
+	struct drm_crtc *crtc;
+	size_t fifo_size;
+
+	struct {
+		uint32_t header;
+		SVGAFifoCmdBlitGMRFBToScreen body;
+	} *blits;
+
+	ret = do_dmabuf_define_gmrfb(file_priv, dev_priv, framebuffer);
 	if (unlikely(ret != 0))
-		vfbs->buffer = NULL;
+		return ret; /* define_gmrfb prints warnings */
+
+	fifo_size = sizeof(*blits) * num_clips;
+	blits = kmalloc(fifo_size, GFP_KERNEL);
+	if (unlikely(blits == NULL)) {
+		DRM_ERROR("Failed to allocate temporary cmd buffer.\n");
+		return -ENOMEM;
+	}
+
+	num_units = 0;
+	list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list, head) {
+		if (crtc->fb != &framebuffer->base)
+			continue;
+		units[num_units++] = vmw_crtc_to_du(crtc);
+	}
+
+	for (k = 0; k < num_units; k++) {
+		struct vmw_display_unit *unit = units[k];
+		int hit_num = 0;
+
+		clips_ptr = clips;
+		for (i = 0; i < num_clips; i++, clips_ptr += increment) {
+			int clip_x1 = clips_ptr->x1 - unit->crtc.x;
+			int clip_y1 = clips_ptr->y1 - unit->crtc.y;
+			int clip_x2 = clips_ptr->x2 - unit->crtc.x;
+			int clip_y2 = clips_ptr->y2 - unit->crtc.y;
+
+			/* skip any crtcs that misses the clip region */
+			if (clip_x1 >= unit->crtc.mode.hdisplay ||
+			    clip_y1 >= unit->crtc.mode.vdisplay ||
+			    clip_x2 <= 0 || clip_y2 <= 0)
+				continue;
+
+			blits[hit_num].header = SVGA_CMD_BLIT_GMRFB_TO_SCREEN;
+			blits[hit_num].body.destScreenId = unit->unit;
+			blits[hit_num].body.srcOrigin.x = clips_ptr->x1;
+			blits[hit_num].body.srcOrigin.y = clips_ptr->y1;
+			blits[hit_num].body.destRect.left = clip_x1;
+			blits[hit_num].body.destRect.top = clip_y1;
+			blits[hit_num].body.destRect.right = clip_x2;
+			blits[hit_num].body.destRect.bottom = clip_y2;
+			hit_num++;
+		}
+
+		/* no clips hit the crtc */
+		if (hit_num == 0)
+			continue;
+
+		fifo_size = sizeof(*blits) * hit_num;
+		ret = vmw_execbuf_process(file_priv, dev_priv, NULL, blits,
+					  fifo_size, 0, NULL);
+
+		if (unlikely(ret != 0))
+			break;
+	}
+
+	kfree(blits);
 
 	return ret;
 }
 
-static int vmw_surface_dmabuf_unpin(struct vmw_framebuffer *vfb)
+int vmw_framebuffer_dmabuf_dirty(struct drm_framebuffer *framebuffer,
+				 struct drm_file *file_priv,
+				 unsigned flags, unsigned color,
+				 struct drm_clip_rect *clips,
+				 unsigned num_clips)
 {
-	struct ttm_buffer_object *bo;
-	struct vmw_framebuffer_surface *vfbs =
-		vmw_framebuffer_to_vfbs(&vfb->base);
+	struct vmw_private *dev_priv = vmw_priv(framebuffer->dev);
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
+	struct vmw_framebuffer_dmabuf *vfbd =
+		vmw_framebuffer_to_vfbd(framebuffer);
+	struct drm_clip_rect norect;
+	int ret, increment = 1;
 
-	if (unlikely(vfbs->buffer == NULL))
-		return 0;
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (!num_clips) {
+		num_clips = 1;
+		clips = &norect;
+		norect.x1 = norect.y1 = 0;
+		norect.x2 = framebuffer->width;
+		norect.y2 = framebuffer->height;
+	} else if (flags & DRM_MODE_FB_DIRTY_ANNOTATE_COPY) {
+		num_clips /= 2;
+		increment = 2;
+	}
 
-	bo = &vfbs->buffer->base;
-	ttm_bo_unref(&bo);
-	vfbs->buffer = NULL;
+	if (dev_priv->ldu_priv) {
+		ret = do_dmabuf_dirty_ldu(dev_priv, &vfbd->base,
+					  flags, color,
+					  clips, num_clips, increment);
+	} else {
+		ret = do_dmabuf_dirty_sou(file_priv, dev_priv, &vfbd->base,
+					  flags, color,
+					  clips, num_clips, increment);
+	}
 
-	return 0;
+	ttm_read_unlock(&vmaster->lock);
+	return ret;
 }
 
+static struct drm_framebuffer_funcs vmw_framebuffer_dmabuf_funcs = {
+	.destroy = vmw_framebuffer_dmabuf_destroy,
+	.dirty = vmw_framebuffer_dmabuf_dirty,
+	.create_handle = vmw_framebuffer_create_handle,
+};
+
+/**
+ * Pin the dmabuffer to the start of vram.
+ */
 static int vmw_framebuffer_dmabuf_pin(struct vmw_framebuffer *vfb)
 {
 	struct vmw_private *dev_priv = vmw_priv(vfb->base.dev);
@@ -749,10 +854,12 @@ static int vmw_framebuffer_dmabuf_pin(struct vmw_framebuffer *vfb)
 		vmw_framebuffer_to_vfbd(&vfb->base);
 	int ret;
 
+	/* This code should not be used with screen objects */
+	BUG_ON(dev_priv->sou_priv);
 
 	vmw_overlay_pause_all(dev_priv);
 
-	ret = vmw_dmabuf_to_start_of_vram(dev_priv, vfbd->buffer);
+	ret = vmw_dmabuf_to_start_of_vram(dev_priv, vfbd->buffer, true, false);
 
 	vmw_overlay_resume_all(dev_priv);
 
@@ -772,7 +879,7 @@ static int vmw_framebuffer_dmabuf_unpin(struct vmw_framebuffer *vfb)
 		return 0;
 	}
 
-	return vmw_dmabuf_from_vram(dev_priv, vfbd->buffer);
+	return vmw_dmabuf_unpin(dev_priv, vfbd->buffer, false);
 }
 
 static int vmw_kms_new_framebuffer_dmabuf(struct vmw_private *dev_priv,
@@ -794,6 +901,33 @@ static int vmw_kms_new_framebuffer_dmabuf(struct vmw_private *dev_priv,
 		return -EINVAL;
 	}
 
+	/* Limited framebuffer color depth support for screen objects */
+	if (dev_priv->sou_priv) {
+		switch (mode_cmd->depth) {
+		case 32:
+		case 24:
+			/* Only support 32 bpp for 32 and 24 depth fbs */
+			if (mode_cmd->bpp == 32)
+				break;
+
+			DRM_ERROR("Invalid color depth/bbp: %d %d\n",
+				  mode_cmd->depth, mode_cmd->bpp);
+			return -EINVAL;
+		case 16:
+		case 15:
+			/* Only support 16 bpp for 16 and 15 depth fbs */
+			if (mode_cmd->bpp == 16)
+				break;
+
+			DRM_ERROR("Invalid color depth/bbp: %d %d\n",
+				  mode_cmd->depth, mode_cmd->bpp);
+			return -EINVAL;
+		default:
+			DRM_ERROR("Invalid color depth: %d\n", mode_cmd->depth);
+			return -EINVAL;
+		}
+	}
+
 	vfbd = kzalloc(sizeof(*vfbd), GFP_KERNEL);
 	if (!vfbd) {
 		ret = -ENOMEM;
@@ -815,9 +949,13 @@ static int vmw_kms_new_framebuffer_dmabuf(struct vmw_private *dev_priv,
 	vfbd->base.base.depth = mode_cmd->depth;
 	vfbd->base.base.width = mode_cmd->width;
 	vfbd->base.base.height = mode_cmd->height;
-	vfbd->base.pin = vmw_framebuffer_dmabuf_pin;
-	vfbd->base.unpin = vmw_framebuffer_dmabuf_unpin;
+	if (!dev_priv->sou_priv) {
+		vfbd->base.pin = vmw_framebuffer_dmabuf_pin;
+		vfbd->base.unpin = vmw_framebuffer_dmabuf_unpin;
+	}
+	vfbd->base.dmabuf = true;
 	vfbd->buffer = dmabuf;
+	vfbd->base.user_handle = mode_cmd->handle;
 	*out = &vfbd->base;
 
 	return 0;
@@ -843,6 +981,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 	struct vmw_framebuffer *vfb = NULL;
 	struct vmw_surface *surface = NULL;
 	struct vmw_dma_buffer *bo = NULL;
+	struct ttm_base_object *user_obj;
 	u64 required_size;
 	int ret;
 
@@ -858,6 +997,21 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 		return NULL;
 	}
 
+	/*
+	 * Take a reference on the user object of the resource
+	 * backing the kms fb. This ensures that user-space handle
+	 * lookups on that resource will always work as long as
+	 * it's registered with a kms framebuffer. This is important,
+	 * since vmw_execbuf_process identifies resources in the
+	 * command stream using user-space handles.
+	 */
+
+	user_obj = ttm_base_object_lookup(tfile, mode_cmd->handle);
+	if (unlikely(user_obj == NULL)) {
+		DRM_ERROR("Could not locate requested kms frame buffer.\n");
+		return ERR_PTR(-ENOENT);
+	}
+
 	/**
 	 * End conditioned code.
 	 */
@@ -878,8 +1032,10 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
 
 	if (ret) {
 		DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret);
+		ttm_base_object_unref(&user_obj);
 		return ERR_PTR(ret);
-	}
+	} else
+		vfb->user_obj = user_obj;
 	return &vfb->base;
 
 try_dmabuf:
@@ -899,8 +1055,10 @@ try_dmabuf:
 
 	if (ret) {
 		DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret);
+		ttm_base_object_unref(&user_obj);
 		return ERR_PTR(ret);
-	}
+	} else
+		vfb->user_obj = user_obj;
 
 	return &vfb->base;
 
@@ -908,6 +1066,7 @@ err_not_scanout:
 	DRM_ERROR("surface not marked as scanout\n");
 	/* vmw_user_surface_lookup takes one ref */
 	vmw_surface_unreference(&surface);
+	ttm_base_object_unref(&user_obj);
 
 	return ERR_PTR(-EINVAL);
 }
@@ -916,6 +1075,210 @@ static struct drm_mode_config_funcs vmw_kms_funcs = {
 	.fb_create = vmw_kms_fb_create,
 };
 
+int vmw_kms_present(struct vmw_private *dev_priv,
+		    struct drm_file *file_priv,
+		    struct vmw_framebuffer *vfb,
+		    struct vmw_surface *surface,
+		    uint32_t sid,
+		    int32_t destX, int32_t destY,
+		    struct drm_vmw_rect *clips,
+		    uint32_t num_clips)
+{
+	struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS];
+	struct drm_crtc *crtc;
+	size_t fifo_size;
+	int i, k, num_units;
+	int ret = 0; /* silence warning */
+
+	struct {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdBlitSurfaceToScreen body;
+	} *cmd;
+	SVGASignedRect *blits;
+
+	num_units = 0;
+	list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list, head) {
+		if (crtc->fb != &vfb->base)
+			continue;
+		units[num_units++] = vmw_crtc_to_du(crtc);
+	}
+
+	BUG_ON(surface == NULL);
+	BUG_ON(!clips || !num_clips);
+
+	fifo_size = sizeof(*cmd) + sizeof(SVGASignedRect) * num_clips;
+	cmd = kmalloc(fifo_size, GFP_KERNEL);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed to allocate temporary fifo memory.\n");
+		return -ENOMEM;
+	}
+
+	/* only need to do this once */
+	memset(cmd, 0, fifo_size);
+	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN);
+	cmd->header.size = cpu_to_le32(fifo_size - sizeof(cmd->header));
+
+	cmd->body.srcRect.left = 0;
+	cmd->body.srcRect.right = surface->sizes[0].width;
+	cmd->body.srcRect.top = 0;
+	cmd->body.srcRect.bottom = surface->sizes[0].height;
+
+	blits = (SVGASignedRect *)&cmd[1];
+	for (i = 0; i < num_clips; i++) {
+		blits[i].left   = clips[i].x;
+		blits[i].right  = clips[i].x + clips[i].w;
+		blits[i].top    = clips[i].y;
+		blits[i].bottom = clips[i].y + clips[i].h;
+	}
+
+	for (k = 0; k < num_units; k++) {
+		struct vmw_display_unit *unit = units[k];
+		int clip_x1 = destX - unit->crtc.x;
+		int clip_y1 = destY - unit->crtc.y;
+		int clip_x2 = clip_x1 + surface->sizes[0].width;
+		int clip_y2 = clip_y1 + surface->sizes[0].height;
+
+		/* skip any crtcs that misses the clip region */
+		if (clip_x1 >= unit->crtc.mode.hdisplay ||
+		    clip_y1 >= unit->crtc.mode.vdisplay ||
+		    clip_x2 <= 0 || clip_y2 <= 0)
+			continue;
+
+		/* need to reset sid as it is changed by execbuf */
+		cmd->body.srcImage.sid = sid;
+
+		cmd->body.destScreenId = unit->unit;
+
+		/*
+		 * The blit command is a lot more resilient then the
+		 * readback command when it comes to clip rects. So its
+		 * okay to go out of bounds.
+		 */
+
+		cmd->body.destRect.left = clip_x1;
+		cmd->body.destRect.right = clip_x2;
+		cmd->body.destRect.top = clip_y1;
+		cmd->body.destRect.bottom = clip_y2;
+
+		ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd,
+					  fifo_size, 0, NULL);
+
+		if (unlikely(ret != 0))
+			break;
+	}
+
+	kfree(cmd);
+
+	return ret;
+}
+
+int vmw_kms_readback(struct vmw_private *dev_priv,
+		     struct drm_file *file_priv,
+		     struct vmw_framebuffer *vfb,
+		     struct drm_vmw_fence_rep __user *user_fence_rep,
+		     struct drm_vmw_rect *clips,
+		     uint32_t num_clips)
+{
+	struct vmw_framebuffer_dmabuf *vfbd =
+		vmw_framebuffer_to_vfbd(&vfb->base);
+	struct vmw_dma_buffer *dmabuf = vfbd->buffer;
+	struct vmw_display_unit *units[VMWGFX_NUM_DISPLAY_UNITS];
+	struct drm_crtc *crtc;
+	size_t fifo_size;
+	int i, k, ret, num_units, blits_pos;
+
+	struct {
+		uint32_t header;
+		SVGAFifoCmdDefineGMRFB body;
+	} *cmd;
+	struct {
+		uint32_t header;
+		SVGAFifoCmdBlitScreenToGMRFB body;
+	} *blits;
+
+	num_units = 0;
+	list_for_each_entry(crtc, &dev_priv->dev->mode_config.crtc_list, head) {
+		if (crtc->fb != &vfb->base)
+			continue;
+		units[num_units++] = vmw_crtc_to_du(crtc);
+	}
+
+	BUG_ON(dmabuf == NULL);
+	BUG_ON(!clips || !num_clips);
+
+	/* take a safe guess at fifo size */
+	fifo_size = sizeof(*cmd) + sizeof(*blits) * num_clips * num_units;
+	cmd = kmalloc(fifo_size, GFP_KERNEL);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed to allocate temporary fifo memory.\n");
+		return -ENOMEM;
+	}
+
+	memset(cmd, 0, fifo_size);
+	cmd->header = SVGA_CMD_DEFINE_GMRFB;
+	cmd->body.format.bitsPerPixel = vfb->base.bits_per_pixel;
+	cmd->body.format.colorDepth = vfb->base.depth;
+	cmd->body.format.reserved = 0;
+	cmd->body.bytesPerLine = vfb->base.pitch;
+	cmd->body.ptr.gmrId = vfb->user_handle;
+	cmd->body.ptr.offset = 0;
+
+	blits = (void *)&cmd[1];
+	blits_pos = 0;
+	for (i = 0; i < num_units; i++) {
+		struct drm_vmw_rect *c = clips;
+		for (k = 0; k < num_clips; k++, c++) {
+			/* transform clip coords to crtc origin based coords */
+			int clip_x1 = c->x - units[i]->crtc.x;
+			int clip_x2 = c->x - units[i]->crtc.x + c->w;
+			int clip_y1 = c->y - units[i]->crtc.y;
+			int clip_y2 = c->y - units[i]->crtc.y + c->h;
+			int dest_x = c->x;
+			int dest_y = c->y;
+
+			/* compensate for clipping, we negate
+			 * a negative number and add that.
+			 */
+			if (clip_x1 < 0)
+				dest_x += -clip_x1;
+			if (clip_y1 < 0)
+				dest_y += -clip_y1;
+
+			/* clip */
+			clip_x1 = max(clip_x1, 0);
+			clip_y1 = max(clip_y1, 0);
+			clip_x2 = min(clip_x2, units[i]->crtc.mode.hdisplay);
+			clip_y2 = min(clip_y2, units[i]->crtc.mode.vdisplay);
+
+			/* and cull any rects that misses the crtc */
+			if (clip_x1 >= units[i]->crtc.mode.hdisplay ||
+			    clip_y1 >= units[i]->crtc.mode.vdisplay ||
+			    clip_x2 <= 0 || clip_y2 <= 0)
+				continue;
+
+			blits[blits_pos].header = SVGA_CMD_BLIT_SCREEN_TO_GMRFB;
+			blits[blits_pos].body.srcScreenId = units[i]->unit;
+			blits[blits_pos].body.destOrigin.x = dest_x;
+			blits[blits_pos].body.destOrigin.y = dest_y;
+
+			blits[blits_pos].body.srcRect.left = clip_x1;
+			blits[blits_pos].body.srcRect.top = clip_y1;
+			blits[blits_pos].body.srcRect.right = clip_x2;
+			blits[blits_pos].body.srcRect.bottom = clip_y2;
+			blits_pos++;
+		}
+	}
+	/* reset size here and use calculated exact size from loops */
+	fifo_size = sizeof(*cmd) + sizeof(*blits) * blits_pos;
+
+	ret = vmw_execbuf_process(file_priv, dev_priv, NULL, cmd, fifo_size,
+				  0, user_fence_rep);
+
+	kfree(cmd);
+
+	return ret;
+}
+
 int vmw_kms_init(struct vmw_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
@@ -929,7 +1292,9 @@ int vmw_kms_init(struct vmw_private *dev_priv)
 	dev->mode_config.max_width = 8192;
 	dev->mode_config.max_height = 8192;
 
-	ret = vmw_kms_init_legacy_display_system(dev_priv);
+	ret = vmw_kms_init_screen_object_display(dev_priv);
+	if (ret) /* Fallback */
+		(void)vmw_kms_init_legacy_display_system(dev_priv);
 
 	return 0;
 }
@@ -987,9 +1352,9 @@ out:
 	return ret;
 }
 
-void vmw_kms_write_svga(struct vmw_private *vmw_priv,
+int vmw_kms_write_svga(struct vmw_private *vmw_priv,
 			unsigned width, unsigned height, unsigned pitch,
-			unsigned bbp, unsigned depth)
+			unsigned bpp, unsigned depth)
 {
 	if (vmw_priv->capabilities & SVGA_CAP_PITCHLOCK)
 		vmw_write(vmw_priv, SVGA_REG_PITCHLOCK, pitch);
@@ -997,11 +1362,15 @@ void vmw_kms_write_svga(struct vmw_private *vmw_priv,
 		iowrite32(pitch, vmw_priv->mmio_virt + SVGA_FIFO_PITCHLOCK);
 	vmw_write(vmw_priv, SVGA_REG_WIDTH, width);
 	vmw_write(vmw_priv, SVGA_REG_HEIGHT, height);
-	vmw_write(vmw_priv, SVGA_REG_BITS_PER_PIXEL, bbp);
-	vmw_write(vmw_priv, SVGA_REG_DEPTH, depth);
-	vmw_write(vmw_priv, SVGA_REG_RED_MASK, 0x00ff0000);
-	vmw_write(vmw_priv, SVGA_REG_GREEN_MASK, 0x0000ff00);
-	vmw_write(vmw_priv, SVGA_REG_BLUE_MASK, 0x000000ff);
+	vmw_write(vmw_priv, SVGA_REG_BITS_PER_PIXEL, bpp);
+
+	if (vmw_read(vmw_priv, SVGA_REG_DEPTH) != depth) {
+		DRM_ERROR("Invalid depth %u for %u bpp, host expects %u\n",
+			  depth, bpp, vmw_read(vmw_priv, SVGA_REG_DEPTH));
+		return -EINVAL;
+	}
+
+	return 0;
 }
 
 int vmw_kms_save_vga(struct vmw_private *vmw_priv)
@@ -1011,12 +1380,7 @@ int vmw_kms_save_vga(struct vmw_private *vmw_priv)
 
 	vmw_priv->vga_width = vmw_read(vmw_priv, SVGA_REG_WIDTH);
 	vmw_priv->vga_height = vmw_read(vmw_priv, SVGA_REG_HEIGHT);
-	vmw_priv->vga_depth = vmw_read(vmw_priv, SVGA_REG_DEPTH);
 	vmw_priv->vga_bpp = vmw_read(vmw_priv, SVGA_REG_BITS_PER_PIXEL);
-	vmw_priv->vga_pseudo = vmw_read(vmw_priv, SVGA_REG_PSEUDOCOLOR);
-	vmw_priv->vga_red_mask = vmw_read(vmw_priv, SVGA_REG_RED_MASK);
-	vmw_priv->vga_blue_mask = vmw_read(vmw_priv, SVGA_REG_BLUE_MASK);
-	vmw_priv->vga_green_mask = vmw_read(vmw_priv, SVGA_REG_GREEN_MASK);
 	if (vmw_priv->capabilities & SVGA_CAP_PITCHLOCK)
 		vmw_priv->vga_pitchlock =
 		  vmw_read(vmw_priv, SVGA_REG_PITCHLOCK);
@@ -1065,12 +1429,7 @@ int vmw_kms_restore_vga(struct vmw_private *vmw_priv)
 
 	vmw_write(vmw_priv, SVGA_REG_WIDTH, vmw_priv->vga_width);
 	vmw_write(vmw_priv, SVGA_REG_HEIGHT, vmw_priv->vga_height);
-	vmw_write(vmw_priv, SVGA_REG_DEPTH, vmw_priv->vga_depth);
 	vmw_write(vmw_priv, SVGA_REG_BITS_PER_PIXEL, vmw_priv->vga_bpp);
-	vmw_write(vmw_priv, SVGA_REG_PSEUDOCOLOR, vmw_priv->vga_pseudo);
-	vmw_write(vmw_priv, SVGA_REG_RED_MASK, vmw_priv->vga_red_mask);
-	vmw_write(vmw_priv, SVGA_REG_GREEN_MASK, vmw_priv->vga_green_mask);
-	vmw_write(vmw_priv, SVGA_REG_BLUE_MASK, vmw_priv->vga_blue_mask);
 	if (vmw_priv->capabilities & SVGA_CAP_PITCHLOCK)
 		vmw_write(vmw_priv, SVGA_REG_PITCHLOCK,
 			  vmw_priv->vga_pitchlock);
@@ -1095,60 +1454,272 @@ int vmw_kms_restore_vga(struct vmw_private *vmw_priv)
 	return 0;
 }
 
-int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *file_priv)
+bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv,
+				uint32_t pitch,
+				uint32_t height)
 {
-	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct drm_vmw_update_layout_arg *arg =
-		(struct drm_vmw_update_layout_arg *)data;
-	struct vmw_master *vmaster = vmw_master(file_priv->master);
-	void __user *user_rects;
-	struct drm_vmw_rect *rects;
-	unsigned rects_size;
-	int ret;
+	return ((u64) pitch * (u64) height) < (u64) dev_priv->vram_size;
+}
 
-	ret = ttm_read_lock(&vmaster->lock, true);
-	if (unlikely(ret != 0))
-		return ret;
 
-	if (!arg->num_outputs) {
-		struct drm_vmw_rect def_rect = {0, 0, 800, 600};
-		vmw_kms_ldu_update_layout(dev_priv, 1, &def_rect);
-		goto out_unlock;
-	}
+/**
+ * Function called by DRM code called with vbl_lock held.
+ */
+u32 vmw_get_vblank_counter(struct drm_device *dev, int crtc)
+{
+	return 0;
+}
 
-	rects_size = arg->num_outputs * sizeof(struct drm_vmw_rect);
-	rects = kzalloc(rects_size, GFP_KERNEL);
-	if (unlikely(!rects)) {
-		ret = -ENOMEM;
-		goto out_unlock;
+/**
+ * Function called by DRM code called with vbl_lock held.
+ */
+int vmw_enable_vblank(struct drm_device *dev, int crtc)
+{
+	return -ENOSYS;
+}
+
+/**
+ * Function called by DRM code called with vbl_lock held.
+ */
+void vmw_disable_vblank(struct drm_device *dev, int crtc)
+{
+}
+
+
+/*
+ * Small shared kms functions.
+ */
+
+int vmw_du_update_layout(struct vmw_private *dev_priv, unsigned num,
+			 struct drm_vmw_rect *rects)
+{
+	struct drm_device *dev = dev_priv->dev;
+	struct vmw_display_unit *du;
+	struct drm_connector *con;
+
+	mutex_lock(&dev->mode_config.mutex);
+
+#if 0
+	{
+		unsigned int i;
+
+		DRM_INFO("%s: new layout ", __func__);
+		for (i = 0; i < num; i++)
+			DRM_INFO("(%i, %i %ux%u) ", rects[i].x, rects[i].y,
+				 rects[i].w, rects[i].h);
+		DRM_INFO("\n");
+	}
+#endif
+
+	list_for_each_entry(con, &dev->mode_config.connector_list, head) {
+		du = vmw_connector_to_du(con);
+		if (num > du->unit) {
+			du->pref_width = rects[du->unit].w;
+			du->pref_height = rects[du->unit].h;
+			du->pref_active = true;
+		} else {
+			du->pref_width = 800;
+			du->pref_height = 600;
+			du->pref_active = false;
+		}
+		con->status = vmw_du_connector_detect(con, true);
 	}
 
-	user_rects = (void __user *)(unsigned long)arg->rects;
-	ret = copy_from_user(rects, user_rects, rects_size);
-	if (unlikely(ret != 0)) {
-		DRM_ERROR("Failed to get rects.\n");
-		ret = -EFAULT;
-		goto out_free;
+	mutex_unlock(&dev->mode_config.mutex);
+
+	return 0;
+}
+
+void vmw_du_crtc_save(struct drm_crtc *crtc)
+{
+}
+
+void vmw_du_crtc_restore(struct drm_crtc *crtc)
+{
+}
+
+void vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
+			   u16 *r, u16 *g, u16 *b,
+			   uint32_t start, uint32_t size)
+{
+	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
+	int i;
+
+	for (i = 0; i < size; i++) {
+		DRM_DEBUG("%d r/g/b = 0x%04x / 0x%04x / 0x%04x\n", i,
+			  r[i], g[i], b[i]);
+		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 0, r[i] >> 8);
+		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 1, g[i] >> 8);
+		vmw_write(dev_priv, SVGA_PALETTE_BASE + i * 3 + 2, b[i] >> 8);
 	}
+}
 
-	vmw_kms_ldu_update_layout(dev_priv, arg->num_outputs, rects);
+void vmw_du_connector_dpms(struct drm_connector *connector, int mode)
+{
+}
 
-out_free:
-	kfree(rects);
-out_unlock:
-	ttm_read_unlock(&vmaster->lock);
-	return ret;
+void vmw_du_connector_save(struct drm_connector *connector)
+{
 }
 
-bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv,
-				uint32_t pitch,
-				uint32_t height)
+void vmw_du_connector_restore(struct drm_connector *connector)
 {
-	return ((u64) pitch * (u64) height) < (u64) dev_priv->vram_size;
 }
 
-u32 vmw_get_vblank_counter(struct drm_device *dev, int crtc)
+enum drm_connector_status
+vmw_du_connector_detect(struct drm_connector *connector, bool force)
+{
+	uint32_t num_displays;
+	struct drm_device *dev = connector->dev;
+	struct vmw_private *dev_priv = vmw_priv(dev);
+
+	mutex_lock(&dev_priv->hw_mutex);
+	num_displays = vmw_read(dev_priv, SVGA_REG_NUM_DISPLAYS);
+	mutex_unlock(&dev_priv->hw_mutex);
+
+	return ((vmw_connector_to_du(connector)->unit < num_displays) ?
+		connector_status_connected : connector_status_disconnected);
+}
+
+static struct drm_display_mode vmw_kms_connector_builtin[] = {
+	/* 640x480@60Hz */
+	{ DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 25175, 640, 656,
+		   752, 800, 0, 480, 489, 492, 525, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) },
+	/* 800x600@60Hz */
+	{ DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 40000, 800, 840,
+		   968, 1056, 0, 600, 601, 605, 628, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1024x768@60Hz */
+	{ DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 65000, 1024, 1048,
+		   1184, 1344, 0, 768, 771, 777, 806, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) },
+	/* 1152x864@75Hz */
+	{ DRM_MODE("1152x864", DRM_MODE_TYPE_DRIVER, 108000, 1152, 1216,
+		   1344, 1600, 0, 864, 865, 868, 900, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1280x768@60Hz */
+	{ DRM_MODE("1280x768", DRM_MODE_TYPE_DRIVER, 79500, 1280, 1344,
+		   1472, 1664, 0, 768, 771, 778, 798, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1280x800@60Hz */
+	{ DRM_MODE("1280x800", DRM_MODE_TYPE_DRIVER, 83500, 1280, 1352,
+		   1480, 1680, 0, 800, 803, 809, 831, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
+	/* 1280x960@60Hz */
+	{ DRM_MODE("1280x960", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1376,
+		   1488, 1800, 0, 960, 961, 964, 1000, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1280x1024@60Hz */
+	{ DRM_MODE("1280x1024", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1328,
+		   1440, 1688, 0, 1024, 1025, 1028, 1066, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1360x768@60Hz */
+	{ DRM_MODE("1360x768", DRM_MODE_TYPE_DRIVER, 85500, 1360, 1424,
+		   1536, 1792, 0, 768, 771, 777, 795, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1440x1050@60Hz */
+	{ DRM_MODE("1400x1050", DRM_MODE_TYPE_DRIVER, 121750, 1400, 1488,
+		   1632, 1864, 0, 1050, 1053, 1057, 1089, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1440x900@60Hz */
+	{ DRM_MODE("1440x900", DRM_MODE_TYPE_DRIVER, 106500, 1440, 1520,
+		   1672, 1904, 0, 900, 903, 909, 934, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1600x1200@60Hz */
+	{ DRM_MODE("1600x1200", DRM_MODE_TYPE_DRIVER, 162000, 1600, 1664,
+		   1856, 2160, 0, 1200, 1201, 1204, 1250, 0,
+		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1680x1050@60Hz */
+	{ DRM_MODE("1680x1050", DRM_MODE_TYPE_DRIVER, 146250, 1680, 1784,
+		   1960, 2240, 0, 1050, 1053, 1059, 1089, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1792x1344@60Hz */
+	{ DRM_MODE("1792x1344", DRM_MODE_TYPE_DRIVER, 204750, 1792, 1920,
+		   2120, 2448, 0, 1344, 1345, 1348, 1394, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1853x1392@60Hz */
+	{ DRM_MODE("1856x1392", DRM_MODE_TYPE_DRIVER, 218250, 1856, 1952,
+		   2176, 2528, 0, 1392, 1393, 1396, 1439, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1920x1200@60Hz */
+	{ DRM_MODE("1920x1200", DRM_MODE_TYPE_DRIVER, 193250, 1920, 2056,
+		   2256, 2592, 0, 1200, 1203, 1209, 1245, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 1920x1440@60Hz */
+	{ DRM_MODE("1920x1440", DRM_MODE_TYPE_DRIVER, 234000, 1920, 2048,
+		   2256, 2600, 0, 1440, 1441, 1444, 1500, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* 2560x1600@60Hz */
+	{ DRM_MODE("2560x1600", DRM_MODE_TYPE_DRIVER, 348500, 2560, 2752,
+		   3032, 3504, 0, 1600, 1603, 1609, 1658, 0,
+		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
+	/* Terminate */
+	{ DRM_MODE("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) },
+};
+
+int vmw_du_connector_fill_modes(struct drm_connector *connector,
+				uint32_t max_width, uint32_t max_height)
+{
+	struct vmw_display_unit *du = vmw_connector_to_du(connector);
+	struct drm_device *dev = connector->dev;
+	struct vmw_private *dev_priv = vmw_priv(dev);
+	struct drm_display_mode *mode = NULL;
+	struct drm_display_mode *bmode;
+	struct drm_display_mode prefmode = { DRM_MODE("preferred",
+		DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC)
+	};
+	int i;
+
+	/* Add preferred mode */
+	{
+		mode = drm_mode_duplicate(dev, &prefmode);
+		if (!mode)
+			return 0;
+		mode->hdisplay = du->pref_width;
+		mode->vdisplay = du->pref_height;
+		mode->vrefresh = drm_mode_vrefresh(mode);
+		if (vmw_kms_validate_mode_vram(dev_priv, mode->hdisplay * 2,
+					       mode->vdisplay)) {
+			drm_mode_probed_add(connector, mode);
+
+			if (du->pref_mode) {
+				list_del_init(&du->pref_mode->head);
+				drm_mode_destroy(dev, du->pref_mode);
+			}
+
+			du->pref_mode = mode;
+		}
+	}
+
+	for (i = 0; vmw_kms_connector_builtin[i].type != 0; i++) {
+		bmode = &vmw_kms_connector_builtin[i];
+		if (bmode->hdisplay > max_width ||
+		    bmode->vdisplay > max_height)
+			continue;
+
+		if (!vmw_kms_validate_mode_vram(dev_priv, bmode->hdisplay * 2,
+						bmode->vdisplay))
+			continue;
+
+		mode = drm_mode_duplicate(dev, bmode);
+		if (!mode)
+			return 0;
+		mode->vrefresh = drm_mode_vrefresh(mode);
+
+		drm_mode_probed_add(connector, mode);
+	}
+
+	drm_mode_connector_list_update(connector);
+
+	return 1;
+}
+
+int vmw_du_connector_set_property(struct drm_connector *connector,
+				  struct drm_property *property,
+				  uint64_t val)
 {
 	return 0;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index 8a398a0339b6..db0b901f8c3f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -31,6 +31,8 @@
 #include "drmP.h"
 #include "vmwgfx_drv.h"
 
+#define VMWGFX_NUM_DISPLAY_UNITS 8
+
 
 #define vmw_framebuffer_to_vfb(x) \
 	container_of(x, struct vmw_framebuffer, base)
@@ -45,6 +47,9 @@ struct vmw_framebuffer {
 	struct drm_framebuffer base;
 	int (*pin)(struct vmw_framebuffer *fb);
 	int (*unpin)(struct vmw_framebuffer *fb);
+	bool dmabuf;
+	struct ttm_base_object *user_obj;
+	uint32_t user_handle;
 };
 
 
@@ -83,22 +88,59 @@ struct vmw_display_unit {
 	int hotspot_y;
 
 	unsigned unit;
+
+	/*
+	 * Prefered mode tracking.
+	 */
+	unsigned pref_width;
+	unsigned pref_height;
+	bool pref_active;
+	struct drm_display_mode *pref_mode;
 };
 
+#define vmw_crtc_to_du(x) \
+	container_of(x, struct vmw_display_unit, crtc)
+#define vmw_connector_to_du(x) \
+	container_of(x, struct vmw_display_unit, connector)
+
+
 /*
  * Shared display unit functions - vmwgfx_kms.c
  */
 void vmw_display_unit_cleanup(struct vmw_display_unit *du);
+void vmw_du_crtc_save(struct drm_crtc *crtc);
+void vmw_du_crtc_restore(struct drm_crtc *crtc);
+void vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
+			   u16 *r, u16 *g, u16 *b,
+			   uint32_t start, uint32_t size);
 int vmw_du_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 			   uint32_t handle, uint32_t width, uint32_t height);
 int vmw_du_crtc_cursor_move(struct drm_crtc *crtc, int x, int y);
+void vmw_du_connector_dpms(struct drm_connector *connector, int mode);
+void vmw_du_connector_save(struct drm_connector *connector);
+void vmw_du_connector_restore(struct drm_connector *connector);
+enum drm_connector_status
+vmw_du_connector_detect(struct drm_connector *connector, bool force);
+int vmw_du_connector_fill_modes(struct drm_connector *connector,
+				uint32_t max_width, uint32_t max_height);
+int vmw_du_connector_set_property(struct drm_connector *connector,
+				  struct drm_property *property,
+				  uint64_t val);
+int vmw_du_update_layout(struct vmw_private *dev_priv, unsigned num,
+			 struct drm_vmw_rect *rects);
 
 /*
  * Legacy display unit functions - vmwgfx_ldu.c
  */
 int vmw_kms_init_legacy_display_system(struct vmw_private *dev_priv);
 int vmw_kms_close_legacy_display_system(struct vmw_private *dev_priv);
-int vmw_kms_ldu_update_layout(struct vmw_private *dev_priv, unsigned num,
+
+/*
+ * Screen Objects display functions - vmwgfx_scrn.c
+ */
+int vmw_kms_init_screen_object_display(struct vmw_private *dev_priv);
+int vmw_kms_close_screen_object_display(struct vmw_private *dev_priv);
+int vmw_kms_sou_update_layout(struct vmw_private *dev_priv, unsigned num,
 			      struct drm_vmw_rect *rects);
 
 #endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
index b3a2cd5118d7..92f56bc594eb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c
@@ -27,7 +27,6 @@
 
 #include "vmwgfx_kms.h"
 
-#define VMWGFX_LDU_NUM_DU 8
 
 #define vmw_crtc_to_ldu(x) \
 	container_of(x, struct vmw_legacy_display_unit, base.crtc)
@@ -51,11 +50,6 @@ struct vmw_legacy_display {
 struct vmw_legacy_display_unit {
 	struct vmw_display_unit base;
 
-	unsigned pref_width;
-	unsigned pref_height;
-	bool pref_active;
-	struct drm_display_mode *pref_mode;
-
 	struct list_head active;
 };
 
@@ -71,20 +65,6 @@ static void vmw_ldu_destroy(struct vmw_legacy_display_unit *ldu)
  * Legacy Display Unit CRTC functions
  */
 
-static void vmw_ldu_crtc_save(struct drm_crtc *crtc)
-{
-}
-
-static void vmw_ldu_crtc_restore(struct drm_crtc *crtc)
-{
-}
-
-static void vmw_ldu_crtc_gamma_set(struct drm_crtc *crtc,
-				   u16 *r, u16 *g, u16 *b,
-				   uint32_t start, uint32_t size)
-{
-}
-
 static void vmw_ldu_crtc_destroy(struct drm_crtc *crtc)
 {
 	vmw_ldu_destroy(vmw_crtc_to_ldu(crtc));
@@ -114,10 +94,8 @@ static int vmw_ldu_commit_list(struct vmw_private *dev_priv)
 			return 0;
 		fb = entry->base.crtc.fb;
 
-		vmw_kms_write_svga(dev_priv, w, h, fb->pitch,
-				   fb->bits_per_pixel, fb->depth);
-
-		return 0;
+		return vmw_kms_write_svga(dev_priv, w, h, fb->pitch,
+					  fb->bits_per_pixel, fb->depth);
 	}
 
 	if (!list_empty(&lds->active)) {
@@ -265,9 +243,7 @@ static int vmw_ldu_crtc_set_config(struct drm_mode_set *set)
 
 		vmw_ldu_del_active(dev_priv, ldu);
 
-		vmw_ldu_commit_list(dev_priv);
-
-		return 0;
+		return vmw_ldu_commit_list(dev_priv);
 	}
 
 
@@ -292,21 +268,20 @@ static int vmw_ldu_crtc_set_config(struct drm_mode_set *set)
 
 	vmw_ldu_add_active(dev_priv, ldu, vfb);
 
-	vmw_ldu_commit_list(dev_priv);
-
-	return 0;
+	return vmw_ldu_commit_list(dev_priv);
 }
 
 static struct drm_crtc_funcs vmw_legacy_crtc_funcs = {
-	.save = vmw_ldu_crtc_save,
-	.restore = vmw_ldu_crtc_restore,
+	.save = vmw_du_crtc_save,
+	.restore = vmw_du_crtc_restore,
 	.cursor_set = vmw_du_crtc_cursor_set,
 	.cursor_move = vmw_du_crtc_cursor_move,
-	.gamma_set = vmw_ldu_crtc_gamma_set,
+	.gamma_set = vmw_du_crtc_gamma_set,
 	.destroy = vmw_ldu_crtc_destroy,
 	.set_config = vmw_ldu_crtc_set_config,
 };
 
+
 /*
  * Legacy Display Unit encoder functions
  */
@@ -324,183 +299,18 @@ static struct drm_encoder_funcs vmw_legacy_encoder_funcs = {
  * Legacy Display Unit connector functions
  */
 
-static void vmw_ldu_connector_dpms(struct drm_connector *connector, int mode)
-{
-}
-
-static void vmw_ldu_connector_save(struct drm_connector *connector)
-{
-}
-
-static void vmw_ldu_connector_restore(struct drm_connector *connector)
-{
-}
-
-static enum drm_connector_status
-	vmw_ldu_connector_detect(struct drm_connector *connector,
-				 bool force)
-{
-	if (vmw_connector_to_ldu(connector)->pref_active)
-		return connector_status_connected;
-	return connector_status_disconnected;
-}
-
-static const struct drm_display_mode vmw_ldu_connector_builtin[] = {
-	/* 640x480@60Hz */
-	{ DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 25175, 640, 656,
-		   752, 800, 0, 480, 489, 492, 525, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) },
-	/* 800x600@60Hz */
-	{ DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 40000, 800, 840,
-		   968, 1056, 0, 600, 601, 605, 628, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1024x768@60Hz */
-	{ DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 65000, 1024, 1048,
-		   1184, 1344, 0, 768, 771, 777, 806, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) },
-	/* 1152x864@75Hz */
-	{ DRM_MODE("1152x864", DRM_MODE_TYPE_DRIVER, 108000, 1152, 1216,
-		   1344, 1600, 0, 864, 865, 868, 900, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1280x768@60Hz */
-	{ DRM_MODE("1280x768", DRM_MODE_TYPE_DRIVER, 79500, 1280, 1344,
-		   1472, 1664, 0, 768, 771, 778, 798, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1280x800@60Hz */
-	{ DRM_MODE("1280x800", DRM_MODE_TYPE_DRIVER, 83500, 1280, 1352,
-		   1480, 1680, 0, 800, 803, 809, 831, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) },
-	/* 1280x960@60Hz */
-	{ DRM_MODE("1280x960", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1376,
-		   1488, 1800, 0, 960, 961, 964, 1000, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1280x1024@60Hz */
-	{ DRM_MODE("1280x1024", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1328,
-		   1440, 1688, 0, 1024, 1025, 1028, 1066, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1360x768@60Hz */
-	{ DRM_MODE("1360x768", DRM_MODE_TYPE_DRIVER, 85500, 1360, 1424,
-		   1536, 1792, 0, 768, 771, 777, 795, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1440x1050@60Hz */
-	{ DRM_MODE("1400x1050", DRM_MODE_TYPE_DRIVER, 121750, 1400, 1488,
-		   1632, 1864, 0, 1050, 1053, 1057, 1089, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1440x900@60Hz */
-	{ DRM_MODE("1440x900", DRM_MODE_TYPE_DRIVER, 106500, 1440, 1520,
-		   1672, 1904, 0, 900, 903, 909, 934, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1600x1200@60Hz */
-	{ DRM_MODE("1600x1200", DRM_MODE_TYPE_DRIVER, 162000, 1600, 1664,
-		   1856, 2160, 0, 1200, 1201, 1204, 1250, 0,
-		   DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1680x1050@60Hz */
-	{ DRM_MODE("1680x1050", DRM_MODE_TYPE_DRIVER, 146250, 1680, 1784,
-		   1960, 2240, 0, 1050, 1053, 1059, 1089, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1792x1344@60Hz */
-	{ DRM_MODE("1792x1344", DRM_MODE_TYPE_DRIVER, 204750, 1792, 1920,
-		   2120, 2448, 0, 1344, 1345, 1348, 1394, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1853x1392@60Hz */
-	{ DRM_MODE("1856x1392", DRM_MODE_TYPE_DRIVER, 218250, 1856, 1952,
-		   2176, 2528, 0, 1392, 1393, 1396, 1439, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1920x1200@60Hz */
-	{ DRM_MODE("1920x1200", DRM_MODE_TYPE_DRIVER, 193250, 1920, 2056,
-		   2256, 2592, 0, 1200, 1203, 1209, 1245, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 1920x1440@60Hz */
-	{ DRM_MODE("1920x1440", DRM_MODE_TYPE_DRIVER, 234000, 1920, 2048,
-		   2256, 2600, 0, 1440, 1441, 1444, 1500, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* 2560x1600@60Hz */
-	{ DRM_MODE("2560x1600", DRM_MODE_TYPE_DRIVER, 348500, 2560, 2752,
-		   3032, 3504, 0, 1600, 1603, 1609, 1658, 0,
-		   DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) },
-	/* Terminate */
-	{ DRM_MODE("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) },
-};
-
-static int vmw_ldu_connector_fill_modes(struct drm_connector *connector,
-					uint32_t max_width, uint32_t max_height)
-{
-	struct vmw_legacy_display_unit *ldu = vmw_connector_to_ldu(connector);
-	struct drm_device *dev = connector->dev;
-	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct drm_display_mode *mode = NULL;
-	struct drm_display_mode prefmode = { DRM_MODE("preferred",
-		DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED,
-		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-		DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC)
-	};
-	int i;
-
-	/* Add preferred mode */
-	{
-		mode = drm_mode_duplicate(dev, &prefmode);
-		if (!mode)
-			return 0;
-		mode->hdisplay = ldu->pref_width;
-		mode->vdisplay = ldu->pref_height;
-		mode->vrefresh = drm_mode_vrefresh(mode);
-		if (vmw_kms_validate_mode_vram(dev_priv, mode->hdisplay * 2,
-					       mode->vdisplay)) {
-			drm_mode_probed_add(connector, mode);
-
-			if (ldu->pref_mode) {
-				list_del_init(&ldu->pref_mode->head);
-				drm_mode_destroy(dev, ldu->pref_mode);
-			}
-
-			ldu->pref_mode = mode;
-		}
-	}
-
-	for (i = 0; vmw_ldu_connector_builtin[i].type != 0; i++) {
-		const struct drm_display_mode *bmode;
-
-		bmode = &vmw_ldu_connector_builtin[i];
-		if (bmode->hdisplay > max_width ||
-		    bmode->vdisplay > max_height)
-			continue;
-
-		if (!vmw_kms_validate_mode_vram(dev_priv, bmode->hdisplay * 2,
-						bmode->vdisplay))
-			continue;
-
-		mode = drm_mode_duplicate(dev, bmode);
-		if (!mode)
-			return 0;
-		mode->vrefresh = drm_mode_vrefresh(mode);
-
-		drm_mode_probed_add(connector, mode);
-	}
-
-	drm_mode_connector_list_update(connector);
-
-	return 1;
-}
-
-static int vmw_ldu_connector_set_property(struct drm_connector *connector,
-					  struct drm_property *property,
-					  uint64_t val)
-{
-	return 0;
-}
-
 static void vmw_ldu_connector_destroy(struct drm_connector *connector)
 {
 	vmw_ldu_destroy(vmw_connector_to_ldu(connector));
 }
 
 static struct drm_connector_funcs vmw_legacy_connector_funcs = {
-	.dpms = vmw_ldu_connector_dpms,
-	.save = vmw_ldu_connector_save,
-	.restore = vmw_ldu_connector_restore,
-	.detect = vmw_ldu_connector_detect,
-	.fill_modes = vmw_ldu_connector_fill_modes,
-	.set_property = vmw_ldu_connector_set_property,
+	.dpms = vmw_du_connector_dpms,
+	.save = vmw_du_connector_save,
+	.restore = vmw_du_connector_restore,
+	.detect = vmw_du_connector_detect,
+	.fill_modes = vmw_du_connector_fill_modes,
+	.set_property = vmw_du_connector_set_property,
 	.destroy = vmw_ldu_connector_destroy,
 };
 
@@ -523,14 +333,14 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit)
 
 	INIT_LIST_HEAD(&ldu->active);
 
-	ldu->pref_active = (unit == 0);
-	ldu->pref_width = 800;
-	ldu->pref_height = 600;
-	ldu->pref_mode = NULL;
+	ldu->base.pref_active = (unit == 0);
+	ldu->base.pref_width = 800;
+	ldu->base.pref_height = 600;
+	ldu->base.pref_mode = NULL;
 
 	drm_connector_init(dev, connector, &vmw_legacy_connector_funcs,
 			   DRM_MODE_CONNECTOR_LVDS);
-	connector->status = vmw_ldu_connector_detect(connector, true);
+	connector->status = vmw_du_connector_detect(connector, true);
 
 	drm_encoder_init(dev, encoder, &vmw_legacy_encoder_funcs,
 			 DRM_MODE_ENCODER_LVDS);
@@ -540,6 +350,8 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit)
 
 	drm_crtc_init(dev, crtc, &vmw_legacy_crtc_funcs);
 
+	drm_mode_crtc_set_gamma_size(crtc, 256);
+
 	drm_connector_attach_property(connector,
 				      dev->mode_config.dirty_info_property,
 				      1);
@@ -550,8 +362,7 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit)
 int vmw_kms_init_legacy_display_system(struct vmw_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
-	int i;
-	int ret;
+	int i, ret;
 
 	if (dev_priv->ldu_priv) {
 		DRM_INFO("ldu system already on\n");
@@ -559,7 +370,6 @@ int vmw_kms_init_legacy_display_system(struct vmw_private *dev_priv)
 	}
 
 	dev_priv->ldu_priv = kmalloc(sizeof(*dev_priv->ldu_priv), GFP_KERNEL);
-
 	if (!dev_priv->ldu_priv)
 		return -ENOMEM;
 
@@ -568,18 +378,31 @@ int vmw_kms_init_legacy_display_system(struct vmw_private *dev_priv)
 	dev_priv->ldu_priv->last_num_active = 0;
 	dev_priv->ldu_priv->fb = NULL;
 
-	drm_mode_create_dirty_info_property(dev_priv->dev);
+	/* for old hardware without multimon only enable one display */
+	if (dev_priv->capabilities & SVGA_CAP_MULTIMON)
+		ret = drm_vblank_init(dev, VMWGFX_NUM_DISPLAY_UNITS);
+	else
+		ret = drm_vblank_init(dev, 1);
+	if (ret != 0)
+		goto err_free;
+
+	ret = drm_mode_create_dirty_info_property(dev);
+	if (ret != 0)
+		goto err_vblank_cleanup;
 
-	if (dev_priv->capabilities & SVGA_CAP_MULTIMON) {
-		for (i = 0; i < VMWGFX_LDU_NUM_DU; ++i)
+	if (dev_priv->capabilities & SVGA_CAP_MULTIMON)
+		for (i = 0; i < VMWGFX_NUM_DISPLAY_UNITS; ++i)
 			vmw_ldu_init(dev_priv, i);
-		ret = drm_vblank_init(dev, VMWGFX_LDU_NUM_DU);
-	} else {
-		/* for old hardware without multimon only enable one display */
+	else
 		vmw_ldu_init(dev_priv, 0);
-		ret = drm_vblank_init(dev, 1);
-	}
 
+	return 0;
+
+err_vblank_cleanup:
+	drm_vblank_cleanup(dev);
+err_free:
+	kfree(dev_priv->ldu_priv);
+	dev_priv->ldu_priv = NULL;
 	return ret;
 }
 
@@ -587,52 +410,14 @@ int vmw_kms_close_legacy_display_system(struct vmw_private *dev_priv)
 {
 	struct drm_device *dev = dev_priv->dev;
 
-	drm_vblank_cleanup(dev);
 	if (!dev_priv->ldu_priv)
 		return -ENOSYS;
 
+	drm_vblank_cleanup(dev);
+
 	BUG_ON(!list_empty(&dev_priv->ldu_priv->active));
 
 	kfree(dev_priv->ldu_priv);
 
 	return 0;
 }
-
-int vmw_kms_ldu_update_layout(struct vmw_private *dev_priv, unsigned num,
-			      struct drm_vmw_rect *rects)
-{
-	struct drm_device *dev = dev_priv->dev;
-	struct vmw_legacy_display_unit *ldu;
-	struct drm_connector *con;
-	int i;
-
-	mutex_lock(&dev->mode_config.mutex);
-
-#if 0
-	DRM_INFO("%s: new layout ", __func__);
-	for (i = 0; i < (int)num; i++)
-		DRM_INFO("(%i, %i %ux%u) ", rects[i].x, rects[i].y,
-			 rects[i].w, rects[i].h);
-	DRM_INFO("\n");
-#else
-	(void)i;
-#endif
-
-	list_for_each_entry(con, &dev->mode_config.connector_list, head) {
-		ldu = vmw_connector_to_ldu(con);
-		if (num > ldu->base.unit) {
-			ldu->pref_width = rects[ldu->base.unit].w;
-			ldu->pref_height = rects[ldu->base.unit].h;
-			ldu->pref_active = true;
-		} else {
-			ldu->pref_width = 800;
-			ldu->pref_height = 600;
-			ldu->pref_active = false;
-		}
-		con->status = vmw_ldu_connector_detect(con, true);
-	}
-
-	mutex_unlock(&dev->mode_config.mutex);
-
-	return 0;
-}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_marker.c b/drivers/gpu/drm/vmwgfx/vmwgfx_marker.c
new file mode 100644
index 000000000000..8a8725c2716c
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_marker.c
@@ -0,0 +1,171 @@
+/**************************************************************************
+ *
+ * Copyright (C) 2010 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "vmwgfx_drv.h"
+
+struct vmw_marker {
+	struct list_head head;
+	uint32_t seqno;
+	struct timespec submitted;
+};
+
+void vmw_marker_queue_init(struct vmw_marker_queue *queue)
+{
+	INIT_LIST_HEAD(&queue->head);
+	queue->lag = ns_to_timespec(0);
+	getrawmonotonic(&queue->lag_time);
+	spin_lock_init(&queue->lock);
+}
+
+void vmw_marker_queue_takedown(struct vmw_marker_queue *queue)
+{
+	struct vmw_marker *marker, *next;
+
+	spin_lock(&queue->lock);
+	list_for_each_entry_safe(marker, next, &queue->head, head) {
+		kfree(marker);
+	}
+	spin_unlock(&queue->lock);
+}
+
+int vmw_marker_push(struct vmw_marker_queue *queue,
+		   uint32_t seqno)
+{
+	struct vmw_marker *marker = kmalloc(sizeof(*marker), GFP_KERNEL);
+
+	if (unlikely(!marker))
+		return -ENOMEM;
+
+	marker->seqno = seqno;
+	getrawmonotonic(&marker->submitted);
+	spin_lock(&queue->lock);
+	list_add_tail(&marker->head, &queue->head);
+	spin_unlock(&queue->lock);
+
+	return 0;
+}
+
+int vmw_marker_pull(struct vmw_marker_queue *queue,
+		   uint32_t signaled_seqno)
+{
+	struct vmw_marker *marker, *next;
+	struct timespec now;
+	bool updated = false;
+
+	spin_lock(&queue->lock);
+	getrawmonotonic(&now);
+
+	if (list_empty(&queue->head)) {
+		queue->lag = ns_to_timespec(0);
+		queue->lag_time = now;
+		updated = true;
+		goto out_unlock;
+	}
+
+	list_for_each_entry_safe(marker, next, &queue->head, head) {
+		if (signaled_seqno - marker->seqno > (1 << 30))
+			continue;
+
+		queue->lag = timespec_sub(now, marker->submitted);
+		queue->lag_time = now;
+		updated = true;
+		list_del(&marker->head);
+		kfree(marker);
+	}
+
+out_unlock:
+	spin_unlock(&queue->lock);
+
+	return (updated) ? 0 : -EBUSY;
+}
+
+static struct timespec vmw_timespec_add(struct timespec t1,
+					struct timespec t2)
+{
+	t1.tv_sec += t2.tv_sec;
+	t1.tv_nsec += t2.tv_nsec;
+	if (t1.tv_nsec >= 1000000000L) {
+		t1.tv_sec += 1;
+		t1.tv_nsec -= 1000000000L;
+	}
+
+	return t1;
+}
+
+static struct timespec vmw_fifo_lag(struct vmw_marker_queue *queue)
+{
+	struct timespec now;
+
+	spin_lock(&queue->lock);
+	getrawmonotonic(&now);
+	queue->lag = vmw_timespec_add(queue->lag,
+				      timespec_sub(now, queue->lag_time));
+	queue->lag_time = now;
+	spin_unlock(&queue->lock);
+	return queue->lag;
+}
+
+
+static bool vmw_lag_lt(struct vmw_marker_queue *queue,
+		       uint32_t us)
+{
+	struct timespec lag, cond;
+
+	cond = ns_to_timespec((s64) us * 1000);
+	lag = vmw_fifo_lag(queue);
+	return (timespec_compare(&lag, &cond) < 1);
+}
+
+int vmw_wait_lag(struct vmw_private *dev_priv,
+		 struct vmw_marker_queue *queue, uint32_t us)
+{
+	struct vmw_marker *marker;
+	uint32_t seqno;
+	int ret;
+
+	while (!vmw_lag_lt(queue, us)) {
+		spin_lock(&queue->lock);
+		if (list_empty(&queue->head))
+			seqno = atomic_read(&dev_priv->marker_seq);
+		else {
+			marker = list_first_entry(&queue->head,
+						 struct vmw_marker, head);
+			seqno = marker->seqno;
+		}
+		spin_unlock(&queue->lock);
+
+		ret = vmw_wait_seqno(dev_priv, false, seqno, true,
+					3*HZ);
+
+		if (unlikely(ret != 0))
+			return ret;
+
+		(void) vmw_marker_pull(queue, seqno);
+	}
+	return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
index 07ce02da78a4..14399eec9c3c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
@@ -87,48 +87,6 @@ static inline void fill_flush(struct vmw_escape_video_flush *cmd,
 }
 
 /**
- * Pin or unpin a buffer in vram.
- *
- * @dev_priv:  Driver private.
- * @buf:  DMA buffer to pin or unpin.
- * @pin:  Pin buffer in vram if true.
- * @interruptible:  Use interruptible wait.
- *
- * Takes the current masters ttm lock in read.
- *
- * Returns
- * -ERESTARTSYS if interrupted by a signal.
- */
-static int vmw_dmabuf_pin_in_vram(struct vmw_private *dev_priv,
-				  struct vmw_dma_buffer *buf,
-				  bool pin, bool interruptible)
-{
-	struct ttm_buffer_object *bo = &buf->base;
-	struct ttm_placement *overlay_placement = &vmw_vram_placement;
-	int ret;
-
-	ret = ttm_read_lock(&dev_priv->active_master->lock, interruptible);
-	if (unlikely(ret != 0))
-		return ret;
-
-	ret = ttm_bo_reserve(bo, interruptible, false, false, 0);
-	if (unlikely(ret != 0))
-		goto err;
-
-	if (pin)
-		overlay_placement = &vmw_vram_ne_placement;
-
-	ret = ttm_bo_validate(bo, overlay_placement, interruptible, false, false);
-
-	ttm_bo_unreserve(bo);
-
-err:
-	ttm_read_unlock(&dev_priv->active_master->lock);
-
-	return ret;
-}
-
-/**
  * Send put command to hw.
  *
  * Returns
@@ -139,68 +97,80 @@ static int vmw_overlay_send_put(struct vmw_private *dev_priv,
 				struct drm_vmw_control_stream_arg *arg,
 				bool interruptible)
 {
+	struct vmw_escape_video_flush *flush;
+	size_t fifo_size;
+	bool have_so = dev_priv->sou_priv ? true : false;
+	int i, num_items;
+	SVGAGuestPtr ptr;
+
 	struct {
 		struct vmw_escape_header escape;
 		struct {
-			struct {
-				uint32_t cmdType;
-				uint32_t streamId;
-			} header;
-			struct {
-				uint32_t registerId;
-				uint32_t value;
-			} items[SVGA_VIDEO_PITCH_3 + 1];
-		} body;
-		struct vmw_escape_video_flush flush;
+			uint32_t cmdType;
+			uint32_t streamId;
+		} header;
 	} *cmds;
-	uint32_t offset;
-	int i, ret;
+	struct {
+		uint32_t registerId;
+		uint32_t value;
+	} *items;
 
-	for (;;) {
-		cmds = vmw_fifo_reserve(dev_priv, sizeof(*cmds));
-		if (cmds)
-			break;
+	/* defines are a index needs + 1 */
+	if (have_so)
+		num_items = SVGA_VIDEO_DST_SCREEN_ID + 1;
+	else
+		num_items = SVGA_VIDEO_PITCH_3 + 1;
 
-		ret = vmw_fallback_wait(dev_priv, false, true, 0,
-					interruptible, 3*HZ);
-		if (interruptible && ret == -ERESTARTSYS)
-			return ret;
-		else
-			BUG_ON(ret != 0);
+	fifo_size = sizeof(*cmds) + sizeof(*flush) + sizeof(*items) * num_items;
+
+	cmds = vmw_fifo_reserve(dev_priv, fifo_size);
+	/* hardware has hung, can't do anything here */
+	if (!cmds)
+		return -ENOMEM;
+
+	items = (typeof(items))&cmds[1];
+	flush = (struct vmw_escape_video_flush *)&items[num_items];
+
+	/* the size is header + number of items */
+	fill_escape(&cmds->escape, sizeof(*items) * (num_items + 1));
+
+	cmds->header.cmdType = SVGA_ESCAPE_VMWARE_VIDEO_SET_REGS;
+	cmds->header.streamId = arg->stream_id;
+
+	/* the IDs are neatly numbered */
+	for (i = 0; i < num_items; i++)
+		items[i].registerId = i;
+
+	vmw_bo_get_guest_ptr(&buf->base, &ptr);
+	ptr.offset += arg->offset;
+
+	items[SVGA_VIDEO_ENABLED].value     = true;
+	items[SVGA_VIDEO_FLAGS].value       = arg->flags;
+	items[SVGA_VIDEO_DATA_OFFSET].value = ptr.offset;
+	items[SVGA_VIDEO_FORMAT].value      = arg->format;
+	items[SVGA_VIDEO_COLORKEY].value    = arg->color_key;
+	items[SVGA_VIDEO_SIZE].value        = arg->size;
+	items[SVGA_VIDEO_WIDTH].value       = arg->width;
+	items[SVGA_VIDEO_HEIGHT].value      = arg->height;
+	items[SVGA_VIDEO_SRC_X].value       = arg->src.x;
+	items[SVGA_VIDEO_SRC_Y].value       = arg->src.y;
+	items[SVGA_VIDEO_SRC_WIDTH].value   = arg->src.w;
+	items[SVGA_VIDEO_SRC_HEIGHT].value  = arg->src.h;
+	items[SVGA_VIDEO_DST_X].value       = arg->dst.x;
+	items[SVGA_VIDEO_DST_Y].value       = arg->dst.y;
+	items[SVGA_VIDEO_DST_WIDTH].value   = arg->dst.w;
+	items[SVGA_VIDEO_DST_HEIGHT].value  = arg->dst.h;
+	items[SVGA_VIDEO_PITCH_1].value     = arg->pitch[0];
+	items[SVGA_VIDEO_PITCH_2].value     = arg->pitch[1];
+	items[SVGA_VIDEO_PITCH_3].value     = arg->pitch[2];
+	if (have_so) {
+		items[SVGA_VIDEO_DATA_GMRID].value    = ptr.gmrId;
+		items[SVGA_VIDEO_DST_SCREEN_ID].value = SVGA_ID_INVALID;
 	}
 
-	fill_escape(&cmds->escape, sizeof(cmds->body));
-	cmds->body.header.cmdType = SVGA_ESCAPE_VMWARE_VIDEO_SET_REGS;
-	cmds->body.header.streamId = arg->stream_id;
-
-	for (i = 0; i <= SVGA_VIDEO_PITCH_3; i++)
-		cmds->body.items[i].registerId = i;
-
-	offset = buf->base.offset + arg->offset;
-
-	cmds->body.items[SVGA_VIDEO_ENABLED].value     = true;
-	cmds->body.items[SVGA_VIDEO_FLAGS].value       = arg->flags;
-	cmds->body.items[SVGA_VIDEO_DATA_OFFSET].value = offset;
-	cmds->body.items[SVGA_VIDEO_FORMAT].value      = arg->format;
-	cmds->body.items[SVGA_VIDEO_COLORKEY].value    = arg->color_key;
-	cmds->body.items[SVGA_VIDEO_SIZE].value        = arg->size;
-	cmds->body.items[SVGA_VIDEO_WIDTH].value       = arg->width;
-	cmds->body.items[SVGA_VIDEO_HEIGHT].value      = arg->height;
-	cmds->body.items[SVGA_VIDEO_SRC_X].value       = arg->src.x;
-	cmds->body.items[SVGA_VIDEO_SRC_Y].value       = arg->src.y;
-	cmds->body.items[SVGA_VIDEO_SRC_WIDTH].value   = arg->src.w;
-	cmds->body.items[SVGA_VIDEO_SRC_HEIGHT].value  = arg->src.h;
-	cmds->body.items[SVGA_VIDEO_DST_X].value       = arg->dst.x;
-	cmds->body.items[SVGA_VIDEO_DST_Y].value       = arg->dst.y;
-	cmds->body.items[SVGA_VIDEO_DST_WIDTH].value   = arg->dst.w;
-	cmds->body.items[SVGA_VIDEO_DST_HEIGHT].value  = arg->dst.h;
-	cmds->body.items[SVGA_VIDEO_PITCH_1].value     = arg->pitch[0];
-	cmds->body.items[SVGA_VIDEO_PITCH_2].value     = arg->pitch[1];
-	cmds->body.items[SVGA_VIDEO_PITCH_3].value     = arg->pitch[2];
-
-	fill_flush(&cmds->flush, arg->stream_id);
+	fill_flush(flush, arg->stream_id);
 
-	vmw_fifo_commit(dev_priv, sizeof(*cmds));
+	vmw_fifo_commit(dev_priv, fifo_size);
 
 	return 0;
 }
@@ -248,6 +218,25 @@ static int vmw_overlay_send_stop(struct vmw_private *dev_priv,
 }
 
 /**
+ * Move a buffer to vram or gmr if @pin is set, else unpin the buffer.
+ *
+ * With the introduction of screen objects buffers could now be
+ * used with GMRs instead of being locked to vram.
+ */
+static int vmw_overlay_move_buffer(struct vmw_private *dev_priv,
+				   struct vmw_dma_buffer *buf,
+				   bool pin, bool inter)
+{
+	if (!pin)
+		return vmw_dmabuf_unpin(dev_priv, buf, inter);
+
+	if (!dev_priv->sou_priv)
+		return vmw_dmabuf_to_vram(dev_priv, buf, true, inter);
+
+	return vmw_dmabuf_to_vram_or_gmr(dev_priv, buf, true, inter);
+}
+
+/**
  * Stop or pause a stream.
  *
  * If the stream is paused the no evict flag is removed from the buffer
@@ -279,8 +268,8 @@ static int vmw_overlay_stop(struct vmw_private *dev_priv,
 			return ret;
 
 		/* We just remove the NO_EVICT flag so no -ENOMEM */
-		ret = vmw_dmabuf_pin_in_vram(dev_priv, stream->buf, false,
-					     interruptible);
+		ret = vmw_overlay_move_buffer(dev_priv, stream->buf, false,
+					      interruptible);
 		if (interruptible && ret == -ERESTARTSYS)
 			return ret;
 		else
@@ -342,7 +331,7 @@ static int vmw_overlay_update_stream(struct vmw_private *dev_priv,
 	/* We don't start the old stream if we are interrupted.
 	 * Might return -ENOMEM if it can't fit the buffer in vram.
 	 */
-	ret = vmw_dmabuf_pin_in_vram(dev_priv, buf, true, interruptible);
+	ret = vmw_overlay_move_buffer(dev_priv, buf, true, interruptible);
 	if (ret)
 		return ret;
 
@@ -351,7 +340,8 @@ static int vmw_overlay_update_stream(struct vmw_private *dev_priv,
 		/* This one needs to happen no matter what. We only remove
 		 * the NO_EVICT flag so this is safe from -ENOMEM.
 		 */
-		BUG_ON(vmw_dmabuf_pin_in_vram(dev_priv, buf, false, false) != 0);
+		BUG_ON(vmw_overlay_move_buffer(dev_priv, buf, false, false)
+		       != 0);
 		return ret;
 	}
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index bfe1bcce7f8a..86c5e4cceb31 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -31,10 +31,6 @@
 #include "ttm/ttm_placement.h"
 #include "drmP.h"
 
-#define VMW_RES_CONTEXT ttm_driver_type0
-#define VMW_RES_SURFACE ttm_driver_type1
-#define VMW_RES_STREAM ttm_driver_type2
-
 struct vmw_user_context {
 	struct ttm_base_object base;
 	struct vmw_resource res;
@@ -43,6 +39,7 @@ struct vmw_user_context {
 struct vmw_user_surface {
 	struct ttm_base_object base;
 	struct vmw_surface srf;
+	uint32_t size;
 };
 
 struct vmw_user_dma_buffer {
@@ -65,6 +62,17 @@ struct vmw_user_stream {
 	struct vmw_stream stream;
 };
 
+struct vmw_surface_offset {
+	uint32_t face;
+	uint32_t mip;
+	uint32_t bo_offset;
+};
+
+
+static uint64_t vmw_user_context_size;
+static uint64_t vmw_user_surface_size;
+static uint64_t vmw_user_stream_size;
+
 static inline struct vmw_dma_buffer *
 vmw_dma_buffer(struct ttm_buffer_object *bo)
 {
@@ -84,13 +92,36 @@ struct vmw_resource *vmw_resource_reference(struct vmw_resource *res)
 	return res;
 }
 
+
+/**
+ * vmw_resource_release_id - release a resource id to the id manager.
+ *
+ * @res: Pointer to the resource.
+ *
+ * Release the resource id to the resource id manager and set it to -1
+ */
+static void vmw_resource_release_id(struct vmw_resource *res)
+{
+	struct vmw_private *dev_priv = res->dev_priv;
+
+	write_lock(&dev_priv->resource_lock);
+	if (res->id != -1)
+		idr_remove(res->idr, res->id);
+	res->id = -1;
+	write_unlock(&dev_priv->resource_lock);
+}
+
 static void vmw_resource_release(struct kref *kref)
 {
 	struct vmw_resource *res =
 	    container_of(kref, struct vmw_resource, kref);
 	struct vmw_private *dev_priv = res->dev_priv;
+	int id = res->id;
+	struct idr *idr = res->idr;
 
-	idr_remove(res->idr, res->id);
+	res->avail = false;
+	if (res->remove_from_lists != NULL)
+		res->remove_from_lists(res);
 	write_unlock(&dev_priv->resource_lock);
 
 	if (likely(res->hw_destroy != NULL))
@@ -102,6 +133,9 @@ static void vmw_resource_release(struct kref *kref)
 		kfree(res);
 
 	write_lock(&dev_priv->resource_lock);
+
+	if (id != -1)
+		idr_remove(idr, id);
 }
 
 void vmw_resource_unreference(struct vmw_resource **p_res)
@@ -115,28 +149,29 @@ void vmw_resource_unreference(struct vmw_resource **p_res)
 	write_unlock(&dev_priv->resource_lock);
 }
 
-static int vmw_resource_init(struct vmw_private *dev_priv,
-			     struct vmw_resource *res,
-			     struct idr *idr,
-			     enum ttm_object_type obj_type,
-			     void (*res_free) (struct vmw_resource *res))
+
+/**
+ * vmw_resource_alloc_id - release a resource id to the id manager.
+ *
+ * @dev_priv: Pointer to the device private structure.
+ * @res: Pointer to the resource.
+ *
+ * Allocate the lowest free resource from the resource manager, and set
+ * @res->id to that id. Returns 0 on success and -ENOMEM on failure.
+ */
+static int vmw_resource_alloc_id(struct vmw_private *dev_priv,
+				 struct vmw_resource *res)
 {
 	int ret;
 
-	kref_init(&res->kref);
-	res->hw_destroy = NULL;
-	res->res_free = res_free;
-	res->res_type = obj_type;
-	res->idr = idr;
-	res->avail = false;
-	res->dev_priv = dev_priv;
+	BUG_ON(res->id != -1);
 
 	do {
-		if (unlikely(idr_pre_get(idr, GFP_KERNEL) == 0))
+		if (unlikely(idr_pre_get(res->idr, GFP_KERNEL) == 0))
 			return -ENOMEM;
 
 		write_lock(&dev_priv->resource_lock);
-		ret = idr_get_new_above(idr, res, 1, &res->id);
+		ret = idr_get_new_above(res->idr, res, 1, &res->id);
 		write_unlock(&dev_priv->resource_lock);
 
 	} while (ret == -EAGAIN);
@@ -144,6 +179,33 @@ static int vmw_resource_init(struct vmw_private *dev_priv,
 	return ret;
 }
 
+
+static int vmw_resource_init(struct vmw_private *dev_priv,
+			     struct vmw_resource *res,
+			     struct idr *idr,
+			     enum ttm_object_type obj_type,
+			     bool delay_id,
+			     void (*res_free) (struct vmw_resource *res),
+			     void (*remove_from_lists)
+			     (struct vmw_resource *res))
+{
+	kref_init(&res->kref);
+	res->hw_destroy = NULL;
+	res->res_free = res_free;
+	res->remove_from_lists = remove_from_lists;
+	res->res_type = obj_type;
+	res->idr = idr;
+	res->avail = false;
+	res->dev_priv = dev_priv;
+	INIT_LIST_HEAD(&res->query_head);
+	INIT_LIST_HEAD(&res->validate_head);
+	res->id = -1;
+	if (delay_id)
+		return 0;
+	else
+		return vmw_resource_alloc_id(dev_priv, res);
+}
+
 /**
  * vmw_resource_activate
  *
@@ -198,8 +260,12 @@ static void vmw_hw_context_destroy(struct vmw_resource *res)
 	struct {
 		SVGA3dCmdHeader header;
 		SVGA3dCmdDestroyContext body;
-	} *cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
+	} *cmd;
 
+
+	vmw_execbuf_release_pinned_bo(dev_priv, true, res->id);
+
+	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
 	if (unlikely(cmd == NULL)) {
 		DRM_ERROR("Failed reserving FIFO space for surface "
 			  "destruction.\n");
@@ -211,7 +277,7 @@ static void vmw_hw_context_destroy(struct vmw_resource *res)
 	cmd->body.cid = cpu_to_le32(res->id);
 
 	vmw_fifo_commit(dev_priv, sizeof(*cmd));
-	vmw_3d_resource_dec(dev_priv);
+	vmw_3d_resource_dec(dev_priv, false);
 }
 
 static int vmw_context_init(struct vmw_private *dev_priv,
@@ -226,14 +292,17 @@ static int vmw_context_init(struct vmw_private *dev_priv,
 	} *cmd;
 
 	ret = vmw_resource_init(dev_priv, res, &dev_priv->context_idr,
-				VMW_RES_CONTEXT, res_free);
+				VMW_RES_CONTEXT, false, res_free, NULL);
 
 	if (unlikely(ret != 0)) {
-		if (res_free == NULL)
-			kfree(res);
-		else
-			res_free(res);
-		return ret;
+		DRM_ERROR("Failed to allocate a resource id.\n");
+		goto out_early;
+	}
+
+	if (unlikely(res->id >= SVGA3D_MAX_CONTEXT_IDS)) {
+		DRM_ERROR("Out of hw context ids.\n");
+		vmw_resource_unreference(&res);
+		return -ENOMEM;
 	}
 
 	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
@@ -248,9 +317,16 @@ static int vmw_context_init(struct vmw_private *dev_priv,
 	cmd->body.cid = cpu_to_le32(res->id);
 
 	vmw_fifo_commit(dev_priv, sizeof(*cmd));
-	(void) vmw_3d_resource_inc(dev_priv);
+	(void) vmw_3d_resource_inc(dev_priv, false);
 	vmw_resource_activate(res, vmw_hw_context_destroy);
 	return 0;
+
+out_early:
+	if (res_free == NULL)
+		kfree(res);
+	else
+		res_free(res);
+	return ret;
 }
 
 struct vmw_resource *vmw_context_alloc(struct vmw_private *dev_priv)
@@ -273,8 +349,11 @@ static void vmw_user_context_free(struct vmw_resource *res)
 {
 	struct vmw_user_context *ctx =
 	    container_of(res, struct vmw_user_context, res);
+	struct vmw_private *dev_priv = res->dev_priv;
 
 	kfree(ctx);
+	ttm_mem_global_free(vmw_mem_glob(dev_priv),
+			    vmw_user_context_size);
 }
 
 /**
@@ -328,23 +407,56 @@ int vmw_context_define_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_user_context *ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+	struct vmw_user_context *ctx;
 	struct vmw_resource *res;
 	struct vmw_resource *tmp;
 	struct drm_vmw_context_arg *arg = (struct drm_vmw_context_arg *)data;
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
 	int ret;
 
-	if (unlikely(ctx == NULL))
-		return -ENOMEM;
+
+	/*
+	 * Approximate idr memory usage with 128 bytes. It will be limited
+	 * by maximum number_of contexts anyway.
+	 */
+
+	if (unlikely(vmw_user_context_size == 0))
+		vmw_user_context_size = ttm_round_pot(sizeof(*ctx)) + 128;
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
+				   vmw_user_context_size,
+				   false, true);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Out of graphics memory for context"
+				  " creation.\n");
+		goto out_unlock;
+	}
+
+	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+	if (unlikely(ctx == NULL)) {
+		ttm_mem_global_free(vmw_mem_glob(dev_priv),
+				    vmw_user_context_size);
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
 
 	res = &ctx->res;
 	ctx->base.shareable = false;
 	ctx->base.tfile = NULL;
 
+	/*
+	 * From here on, the destructor takes over resource freeing.
+	 */
+
 	ret = vmw_context_init(dev_priv, res, vmw_user_context_free);
 	if (unlikely(ret != 0))
-		return ret;
+		goto out_unlock;
 
 	tmp = vmw_resource_reference(&ctx->res);
 	ret = ttm_base_object_init(tfile, &ctx->base, false, VMW_RES_CONTEXT,
@@ -358,13 +470,16 @@ int vmw_context_define_ioctl(struct drm_device *dev, void *data,
 	arg->cid = res->id;
 out_err:
 	vmw_resource_unreference(&res);
+out_unlock:
+	ttm_read_unlock(&vmaster->lock);
 	return ret;
 
 }
 
 int vmw_context_check(struct vmw_private *dev_priv,
 		      struct ttm_object_file *tfile,
-		      int id)
+		      int id,
+		      struct vmw_resource **p_res)
 {
 	struct vmw_resource *res;
 	int ret = 0;
@@ -376,6 +491,8 @@ int vmw_context_check(struct vmw_private *dev_priv,
 			container_of(res, struct vmw_user_context, res);
 		if (ctx->base.tfile != tfile && !ctx->base.shareable)
 			ret = -EPERM;
+		if (p_res)
+			*p_res = vmw_resource_reference(res);
 	} else
 		ret = -EINVAL;
 	read_unlock(&dev_priv->resource_lock);
@@ -383,102 +500,638 @@ int vmw_context_check(struct vmw_private *dev_priv,
 	return ret;
 }
 
+struct vmw_bpp {
+	uint8_t bpp;
+	uint8_t s_bpp;
+};
+
+/*
+ * Size table for the supported SVGA3D surface formats. It consists of
+ * two values. The bpp value and the s_bpp value which is short for
+ * "stride bits per pixel" The values are given in such a way that the
+ * minimum stride for the image is calculated using
+ *
+ * min_stride = w*s_bpp
+ *
+ * and the total memory requirement for the image is
+ *
+ * h*min_stride*bpp/s_bpp
+ *
+ */
+static const struct vmw_bpp vmw_sf_bpp[] = {
+	[SVGA3D_FORMAT_INVALID] = {0, 0},
+	[SVGA3D_X8R8G8B8] = {32, 32},
+	[SVGA3D_A8R8G8B8] = {32, 32},
+	[SVGA3D_R5G6B5] = {16, 16},
+	[SVGA3D_X1R5G5B5] = {16, 16},
+	[SVGA3D_A1R5G5B5] = {16, 16},
+	[SVGA3D_A4R4G4B4] = {16, 16},
+	[SVGA3D_Z_D32] = {32, 32},
+	[SVGA3D_Z_D16] = {16, 16},
+	[SVGA3D_Z_D24S8] = {32, 32},
+	[SVGA3D_Z_D15S1] = {16, 16},
+	[SVGA3D_LUMINANCE8] = {8, 8},
+	[SVGA3D_LUMINANCE4_ALPHA4] = {8, 8},
+	[SVGA3D_LUMINANCE16] = {16, 16},
+	[SVGA3D_LUMINANCE8_ALPHA8] = {16, 16},
+	[SVGA3D_DXT1] = {4, 16},
+	[SVGA3D_DXT2] = {8, 32},
+	[SVGA3D_DXT3] = {8, 32},
+	[SVGA3D_DXT4] = {8, 32},
+	[SVGA3D_DXT5] = {8, 32},
+	[SVGA3D_BUMPU8V8] = {16, 16},
+	[SVGA3D_BUMPL6V5U5] = {16, 16},
+	[SVGA3D_BUMPX8L8V8U8] = {32, 32},
+	[SVGA3D_ARGB_S10E5] = {16, 16},
+	[SVGA3D_ARGB_S23E8] = {32, 32},
+	[SVGA3D_A2R10G10B10] = {32, 32},
+	[SVGA3D_V8U8] = {16, 16},
+	[SVGA3D_Q8W8V8U8] = {32, 32},
+	[SVGA3D_CxV8U8] = {16, 16},
+	[SVGA3D_X8L8V8U8] = {32, 32},
+	[SVGA3D_A2W10V10U10] = {32, 32},
+	[SVGA3D_ALPHA8] = {8, 8},
+	[SVGA3D_R_S10E5] = {16, 16},
+	[SVGA3D_R_S23E8] = {32, 32},
+	[SVGA3D_RG_S10E5] = {16, 16},
+	[SVGA3D_RG_S23E8] = {32, 32},
+	[SVGA3D_BUFFER] = {8, 8},
+	[SVGA3D_Z_D24X8] = {32, 32},
+	[SVGA3D_V16U16] = {32, 32},
+	[SVGA3D_G16R16] = {32, 32},
+	[SVGA3D_A16B16G16R16] = {64,  64},
+	[SVGA3D_UYVY] = {12, 12},
+	[SVGA3D_YUY2] = {12, 12},
+	[SVGA3D_NV12] = {12, 8},
+	[SVGA3D_AYUV] = {32, 32},
+	[SVGA3D_BC4_UNORM] = {4,  16},
+	[SVGA3D_BC5_UNORM] = {8,  32},
+	[SVGA3D_Z_DF16] = {16,  16},
+	[SVGA3D_Z_DF24] = {24,  24},
+	[SVGA3D_Z_D24S8_INT] = {32,  32}
+};
+
 
 /**
  * Surface management.
  */
 
+struct vmw_surface_dma {
+	SVGA3dCmdHeader header;
+	SVGA3dCmdSurfaceDMA body;
+	SVGA3dCopyBox cb;
+	SVGA3dCmdSurfaceDMASuffix suffix;
+};
+
+struct vmw_surface_define {
+	SVGA3dCmdHeader header;
+	SVGA3dCmdDefineSurface body;
+};
+
+struct vmw_surface_destroy {
+	SVGA3dCmdHeader header;
+	SVGA3dCmdDestroySurface body;
+};
+
+
+/**
+ * vmw_surface_dma_size - Compute fifo size for a dma command.
+ *
+ * @srf: Pointer to a struct vmw_surface
+ *
+ * Computes the required size for a surface dma command for backup or
+ * restoration of the surface represented by @srf.
+ */
+static inline uint32_t vmw_surface_dma_size(const struct vmw_surface *srf)
+{
+	return srf->num_sizes * sizeof(struct vmw_surface_dma);
+}
+
+
+/**
+ * vmw_surface_define_size - Compute fifo size for a surface define command.
+ *
+ * @srf: Pointer to a struct vmw_surface
+ *
+ * Computes the required size for a surface define command for the definition
+ * of the surface represented by @srf.
+ */
+static inline uint32_t vmw_surface_define_size(const struct vmw_surface *srf)
+{
+	return sizeof(struct vmw_surface_define) + srf->num_sizes *
+		sizeof(SVGA3dSize);
+}
+
+
+/**
+ * vmw_surface_destroy_size - Compute fifo size for a surface destroy command.
+ *
+ * Computes the required size for a surface destroy command for the destruction
+ * of a hw surface.
+ */
+static inline uint32_t vmw_surface_destroy_size(void)
+{
+	return sizeof(struct vmw_surface_destroy);
+}
+
+/**
+ * vmw_surface_destroy_encode - Encode a surface_destroy command.
+ *
+ * @id: The surface id
+ * @cmd_space: Pointer to memory area in which the commands should be encoded.
+ */
+static void vmw_surface_destroy_encode(uint32_t id,
+				       void *cmd_space)
+{
+	struct vmw_surface_destroy *cmd = (struct vmw_surface_destroy *)
+		cmd_space;
+
+	cmd->header.id = SVGA_3D_CMD_SURFACE_DESTROY;
+	cmd->header.size = sizeof(cmd->body);
+	cmd->body.sid = id;
+}
+
+/**
+ * vmw_surface_define_encode - Encode a surface_define command.
+ *
+ * @srf: Pointer to a struct vmw_surface object.
+ * @cmd_space: Pointer to memory area in which the commands should be encoded.
+ */
+static void vmw_surface_define_encode(const struct vmw_surface *srf,
+				      void *cmd_space)
+{
+	struct vmw_surface_define *cmd = (struct vmw_surface_define *)
+		cmd_space;
+	struct drm_vmw_size *src_size;
+	SVGA3dSize *cmd_size;
+	uint32_t cmd_len;
+	int i;
+
+	cmd_len = sizeof(cmd->body) + srf->num_sizes * sizeof(SVGA3dSize);
+
+	cmd->header.id = SVGA_3D_CMD_SURFACE_DEFINE;
+	cmd->header.size = cmd_len;
+	cmd->body.sid = srf->res.id;
+	cmd->body.surfaceFlags = srf->flags;
+	cmd->body.format = cpu_to_le32(srf->format);
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
+		cmd->body.face[i].numMipLevels = srf->mip_levels[i];
+
+	cmd += 1;
+	cmd_size = (SVGA3dSize *) cmd;
+	src_size = srf->sizes;
+
+	for (i = 0; i < srf->num_sizes; ++i, cmd_size++, src_size++) {
+		cmd_size->width = src_size->width;
+		cmd_size->height = src_size->height;
+		cmd_size->depth = src_size->depth;
+	}
+}
+
+
+/**
+ * vmw_surface_dma_encode - Encode a surface_dma command.
+ *
+ * @srf: Pointer to a struct vmw_surface object.
+ * @cmd_space: Pointer to memory area in which the commands should be encoded.
+ * @ptr: Pointer to an SVGAGuestPtr indicating where the surface contents
+ * should be placed or read from.
+ * @to_surface: Boolean whether to DMA to the surface or from the surface.
+ */
+static void vmw_surface_dma_encode(struct vmw_surface *srf,
+				   void *cmd_space,
+				   const SVGAGuestPtr *ptr,
+				   bool to_surface)
+{
+	uint32_t i;
+	uint32_t bpp = vmw_sf_bpp[srf->format].bpp;
+	uint32_t stride_bpp = vmw_sf_bpp[srf->format].s_bpp;
+	struct vmw_surface_dma *cmd = (struct vmw_surface_dma *)cmd_space;
+
+	for (i = 0; i < srf->num_sizes; ++i) {
+		SVGA3dCmdHeader *header = &cmd->header;
+		SVGA3dCmdSurfaceDMA *body = &cmd->body;
+		SVGA3dCopyBox *cb = &cmd->cb;
+		SVGA3dCmdSurfaceDMASuffix *suffix = &cmd->suffix;
+		const struct vmw_surface_offset *cur_offset = &srf->offsets[i];
+		const struct drm_vmw_size *cur_size = &srf->sizes[i];
+
+		header->id = SVGA_3D_CMD_SURFACE_DMA;
+		header->size = sizeof(*body) + sizeof(*cb) + sizeof(*suffix);
+
+		body->guest.ptr = *ptr;
+		body->guest.ptr.offset += cur_offset->bo_offset;
+		body->guest.pitch = (cur_size->width * stride_bpp + 7) >> 3;
+		body->host.sid = srf->res.id;
+		body->host.face = cur_offset->face;
+		body->host.mipmap = cur_offset->mip;
+		body->transfer = ((to_surface) ?  SVGA3D_WRITE_HOST_VRAM :
+				  SVGA3D_READ_HOST_VRAM);
+		cb->x = 0;
+		cb->y = 0;
+		cb->z = 0;
+		cb->srcx = 0;
+		cb->srcy = 0;
+		cb->srcz = 0;
+		cb->w = cur_size->width;
+		cb->h = cur_size->height;
+		cb->d = cur_size->depth;
+
+		suffix->suffixSize = sizeof(*suffix);
+		suffix->maximumOffset = body->guest.pitch*cur_size->height*
+			cur_size->depth*bpp / stride_bpp;
+		suffix->flags.discard = 0;
+		suffix->flags.unsynchronized = 0;
+		suffix->flags.reserved = 0;
+		++cmd;
+	}
+};
+
+
 static void vmw_hw_surface_destroy(struct vmw_resource *res)
 {
 
 	struct vmw_private *dev_priv = res->dev_priv;
-	struct {
-		SVGA3dCmdHeader header;
-		SVGA3dCmdDestroySurface body;
-	} *cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
+	struct vmw_surface *srf;
+	void *cmd;
 
-	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Failed reserving FIFO space for surface "
-			  "destruction.\n");
-		return;
-	}
+	if (res->id != -1) {
 
-	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_SURFACE_DESTROY);
-	cmd->header.size = cpu_to_le32(sizeof(cmd->body));
-	cmd->body.sid = cpu_to_le32(res->id);
+		cmd = vmw_fifo_reserve(dev_priv, vmw_surface_destroy_size());
+		if (unlikely(cmd == NULL)) {
+			DRM_ERROR("Failed reserving FIFO space for surface "
+				  "destruction.\n");
+			return;
+		}
 
-	vmw_fifo_commit(dev_priv, sizeof(*cmd));
-	vmw_3d_resource_dec(dev_priv);
+		vmw_surface_destroy_encode(res->id, cmd);
+		vmw_fifo_commit(dev_priv, vmw_surface_destroy_size());
+
+		/*
+		 * used_memory_size_atomic, or separate lock
+		 * to avoid taking dev_priv::cmdbuf_mutex in
+		 * the destroy path.
+		 */
+
+		mutex_lock(&dev_priv->cmdbuf_mutex);
+		srf = container_of(res, struct vmw_surface, res);
+		dev_priv->used_memory_size -= srf->backup_size;
+		mutex_unlock(&dev_priv->cmdbuf_mutex);
+
+	}
+	vmw_3d_resource_dec(dev_priv, false);
 }
 
 void vmw_surface_res_free(struct vmw_resource *res)
 {
 	struct vmw_surface *srf = container_of(res, struct vmw_surface, res);
 
+	if (srf->backup)
+		ttm_bo_unref(&srf->backup);
+	kfree(srf->offsets);
 	kfree(srf->sizes);
 	kfree(srf->snooper.image);
 	kfree(srf);
 }
 
-int vmw_surface_init(struct vmw_private *dev_priv,
-		     struct vmw_surface *srf,
-		     void (*res_free) (struct vmw_resource *res))
+
+/**
+ * vmw_surface_do_validate - make a surface available to the device.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @srf: Pointer to a struct vmw_surface.
+ *
+ * If the surface doesn't have a hw id, allocate one, and optionally
+ * DMA the backed up surface contents to the device.
+ *
+ * Returns -EBUSY if there wasn't sufficient device resources to
+ * complete the validation. Retry after freeing up resources.
+ *
+ * May return other errors if the kernel is out of guest resources.
+ */
+int vmw_surface_do_validate(struct vmw_private *dev_priv,
+			    struct vmw_surface *srf)
 {
-	int ret;
-	struct {
-		SVGA3dCmdHeader header;
-		SVGA3dCmdDefineSurface body;
-	} *cmd;
-	SVGA3dSize *cmd_size;
 	struct vmw_resource *res = &srf->res;
-	struct drm_vmw_size *src_size;
-	size_t submit_size;
-	uint32_t cmd_len;
-	int i;
+	struct list_head val_list;
+	struct ttm_validate_buffer val_buf;
+	uint32_t submit_size;
+	uint8_t *cmd;
+	int ret;
 
-	BUG_ON(res_free == NULL);
-	ret = vmw_resource_init(dev_priv, res, &dev_priv->surface_idr,
-				VMW_RES_SURFACE, res_free);
+	if (likely(res->id != -1))
+		return 0;
+
+	if (unlikely(dev_priv->used_memory_size + srf->backup_size >=
+		     dev_priv->memory_size))
+		return -EBUSY;
+
+	/*
+	 * Reserve- and validate the backup DMA bo.
+	 */
+
+	if (srf->backup) {
+		INIT_LIST_HEAD(&val_list);
+		val_buf.bo = ttm_bo_reference(srf->backup);
+		val_buf.new_sync_obj_arg = (void *)((unsigned long)
+						    DRM_VMW_FENCE_FLAG_EXEC);
+		list_add_tail(&val_buf.head, &val_list);
+		ret = ttm_eu_reserve_buffers(&val_list);
+		if (unlikely(ret != 0))
+			goto out_no_reserve;
+
+		ret = ttm_bo_validate(srf->backup, &vmw_srf_placement,
+				      true, false, false);
+		if (unlikely(ret != 0))
+			goto out_no_validate;
+	}
+
+	/*
+	 * Alloc id for the resource.
+	 */
 
+	ret = vmw_resource_alloc_id(dev_priv, res);
 	if (unlikely(ret != 0)) {
-		res_free(res);
-		return ret;
+		DRM_ERROR("Failed to allocate a surface id.\n");
+		goto out_no_id;
+	}
+	if (unlikely(res->id >= SVGA3D_MAX_SURFACE_IDS)) {
+		ret = -EBUSY;
+		goto out_no_fifo;
 	}
 
-	submit_size = sizeof(*cmd) + srf->num_sizes * sizeof(SVGA3dSize);
-	cmd_len = sizeof(cmd->body) + srf->num_sizes * sizeof(SVGA3dSize);
+
+	/*
+	 * Encode surface define- and dma commands.
+	 */
+
+	submit_size = vmw_surface_define_size(srf);
+	if (srf->backup)
+		submit_size += vmw_surface_dma_size(srf);
 
 	cmd = vmw_fifo_reserve(dev_priv, submit_size);
 	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Fifo reserve failed for create surface.\n");
-		vmw_resource_unreference(&res);
-		return -ENOMEM;
+		DRM_ERROR("Failed reserving FIFO space for surface "
+			  "validation.\n");
+		ret = -ENOMEM;
+		goto out_no_fifo;
 	}
 
-	cmd->header.id = cpu_to_le32(SVGA_3D_CMD_SURFACE_DEFINE);
-	cmd->header.size = cpu_to_le32(cmd_len);
-	cmd->body.sid = cpu_to_le32(res->id);
-	cmd->body.surfaceFlags = cpu_to_le32(srf->flags);
-	cmd->body.format = cpu_to_le32(srf->format);
-	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
-		cmd->body.face[i].numMipLevels =
-		    cpu_to_le32(srf->mip_levels[i]);
+	vmw_surface_define_encode(srf, cmd);
+	if (srf->backup) {
+		SVGAGuestPtr ptr;
+
+		cmd += vmw_surface_define_size(srf);
+		vmw_bo_get_guest_ptr(srf->backup, &ptr);
+		vmw_surface_dma_encode(srf, cmd, &ptr, true);
 	}
 
-	cmd += 1;
-	cmd_size = (SVGA3dSize *) cmd;
-	src_size = srf->sizes;
+	vmw_fifo_commit(dev_priv, submit_size);
 
-	for (i = 0; i < srf->num_sizes; ++i, cmd_size++, src_size++) {
-		cmd_size->width = cpu_to_le32(src_size->width);
-		cmd_size->height = cpu_to_le32(src_size->height);
-		cmd_size->depth = cpu_to_le32(src_size->depth);
+	/*
+	 * Create a fence object and fence the backup buffer.
+	 */
+
+	if (srf->backup) {
+		struct vmw_fence_obj *fence;
+
+		(void) vmw_execbuf_fence_commands(NULL, dev_priv,
+						  &fence, NULL);
+		ttm_eu_fence_buffer_objects(&val_list, fence);
+		if (likely(fence != NULL))
+			vmw_fence_obj_unreference(&fence);
+		ttm_bo_unref(&val_buf.bo);
+		ttm_bo_unref(&srf->backup);
+	}
+
+	/*
+	 * Surface memory usage accounting.
+	 */
+
+	dev_priv->used_memory_size += srf->backup_size;
+
+	return 0;
+
+out_no_fifo:
+	vmw_resource_release_id(res);
+out_no_id:
+out_no_validate:
+	if (srf->backup)
+		ttm_eu_backoff_reservation(&val_list);
+out_no_reserve:
+	if (srf->backup)
+		ttm_bo_unref(&val_buf.bo);
+	return ret;
+}
+
+/**
+ * vmw_surface_evict - Evict a hw surface.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @srf: Pointer to a struct vmw_surface
+ *
+ * DMA the contents of a hw surface to a backup guest buffer object,
+ * and destroy the hw surface, releasing its id.
+ */
+int vmw_surface_evict(struct vmw_private *dev_priv,
+		      struct vmw_surface *srf)
+{
+	struct vmw_resource *res = &srf->res;
+	struct list_head val_list;
+	struct ttm_validate_buffer val_buf;
+	uint32_t submit_size;
+	uint8_t *cmd;
+	int ret;
+	struct vmw_fence_obj *fence;
+	SVGAGuestPtr ptr;
+
+	BUG_ON(res->id == -1);
+
+	/*
+	 * Create a surface backup buffer object.
+	 */
+
+	if (!srf->backup) {
+		ret = ttm_bo_create(&dev_priv->bdev, srf->backup_size,
+				    ttm_bo_type_device,
+				    &vmw_srf_placement, 0, 0, true,
+				    NULL, &srf->backup);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+
+	/*
+	 * Reserve- and validate the backup DMA bo.
+	 */
+
+	INIT_LIST_HEAD(&val_list);
+	val_buf.bo = ttm_bo_reference(srf->backup);
+	val_buf.new_sync_obj_arg = (void *)(unsigned long)
+		DRM_VMW_FENCE_FLAG_EXEC;
+	list_add_tail(&val_buf.head, &val_list);
+	ret = ttm_eu_reserve_buffers(&val_list);
+	if (unlikely(ret != 0))
+		goto out_no_reserve;
+
+	ret = ttm_bo_validate(srf->backup, &vmw_srf_placement,
+			      true, false, false);
+	if (unlikely(ret != 0))
+		goto out_no_validate;
+
+
+	/*
+	 * Encode the dma- and surface destroy commands.
+	 */
+
+	submit_size = vmw_surface_dma_size(srf) + vmw_surface_destroy_size();
+	cmd = vmw_fifo_reserve(dev_priv, submit_size);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed reserving FIFO space for surface "
+			  "eviction.\n");
+		ret = -ENOMEM;
+		goto out_no_fifo;
 	}
 
+	vmw_bo_get_guest_ptr(srf->backup, &ptr);
+	vmw_surface_dma_encode(srf, cmd, &ptr, false);
+	cmd += vmw_surface_dma_size(srf);
+	vmw_surface_destroy_encode(res->id, cmd);
 	vmw_fifo_commit(dev_priv, submit_size);
-	(void) vmw_3d_resource_inc(dev_priv);
-	vmw_resource_activate(res, vmw_hw_surface_destroy);
+
+	/*
+	 * Surface memory usage accounting.
+	 */
+
+	dev_priv->used_memory_size -= srf->backup_size;
+
+	/*
+	 * Create a fence object and fence the DMA buffer.
+	 */
+
+	(void) vmw_execbuf_fence_commands(NULL, dev_priv,
+					  &fence, NULL);
+	ttm_eu_fence_buffer_objects(&val_list, fence);
+	if (likely(fence != NULL))
+		vmw_fence_obj_unreference(&fence);
+	ttm_bo_unref(&val_buf.bo);
+
+	/*
+	 * Release the surface ID.
+	 */
+
+	vmw_resource_release_id(res);
+
 	return 0;
+
+out_no_fifo:
+out_no_validate:
+	if (srf->backup)
+		ttm_eu_backoff_reservation(&val_list);
+out_no_reserve:
+	ttm_bo_unref(&val_buf.bo);
+	ttm_bo_unref(&srf->backup);
+	return ret;
+}
+
+
+/**
+ * vmw_surface_validate - make a surface available to the device, evicting
+ * other surfaces if needed.
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @srf: Pointer to a struct vmw_surface.
+ *
+ * Try to validate a surface and if it fails due to limited device resources,
+ * repeatedly try to evict other surfaces until the request can be
+ * acommodated.
+ *
+ * May return errors if out of resources.
+ */
+int vmw_surface_validate(struct vmw_private *dev_priv,
+			 struct vmw_surface *srf)
+{
+	int ret;
+	struct vmw_surface *evict_srf;
+
+	do {
+		write_lock(&dev_priv->resource_lock);
+		list_del_init(&srf->lru_head);
+		write_unlock(&dev_priv->resource_lock);
+
+		ret = vmw_surface_do_validate(dev_priv, srf);
+		if (likely(ret != -EBUSY))
+			break;
+
+		write_lock(&dev_priv->resource_lock);
+		if (list_empty(&dev_priv->surface_lru)) {
+			DRM_ERROR("Out of device memory for surfaces.\n");
+			ret = -EBUSY;
+			write_unlock(&dev_priv->resource_lock);
+			break;
+		}
+
+		evict_srf = vmw_surface_reference
+			(list_first_entry(&dev_priv->surface_lru,
+					  struct vmw_surface,
+					  lru_head));
+		list_del_init(&evict_srf->lru_head);
+
+		write_unlock(&dev_priv->resource_lock);
+		(void) vmw_surface_evict(dev_priv, evict_srf);
+
+		vmw_surface_unreference(&evict_srf);
+
+	} while (1);
+
+	if (unlikely(ret != 0 && srf->res.id != -1)) {
+		write_lock(&dev_priv->resource_lock);
+		list_add_tail(&srf->lru_head, &dev_priv->surface_lru);
+		write_unlock(&dev_priv->resource_lock);
+	}
+
+	return ret;
+}
+
+
+/**
+ * vmw_surface_remove_from_lists - Remove surface resources from lookup lists
+ *
+ * @res: Pointer to a struct vmw_resource embedded in a struct vmw_surface
+ *
+ * As part of the resource destruction, remove the surface from any
+ * lookup lists.
+ */
+static void vmw_surface_remove_from_lists(struct vmw_resource *res)
+{
+	struct vmw_surface *srf = container_of(res, struct vmw_surface, res);
+
+	list_del_init(&srf->lru_head);
+}
+
+int vmw_surface_init(struct vmw_private *dev_priv,
+		     struct vmw_surface *srf,
+		     void (*res_free) (struct vmw_resource *res))
+{
+	int ret;
+	struct vmw_resource *res = &srf->res;
+
+	BUG_ON(res_free == NULL);
+	INIT_LIST_HEAD(&srf->lru_head);
+	ret = vmw_resource_init(dev_priv, res, &dev_priv->surface_idr,
+				VMW_RES_SURFACE, true, res_free,
+				vmw_surface_remove_from_lists);
+
+	if (unlikely(ret != 0))
+		res_free(res);
+
+	/*
+	 * The surface won't be visible to hardware until a
+	 * surface validate.
+	 */
+
+	(void) vmw_3d_resource_inc(dev_priv, false);
+	vmw_resource_activate(res, vmw_hw_surface_destroy);
+	return ret;
 }
 
 static void vmw_user_surface_free(struct vmw_resource *res)
@@ -486,12 +1139,58 @@ static void vmw_user_surface_free(struct vmw_resource *res)
 	struct vmw_surface *srf = container_of(res, struct vmw_surface, res);
 	struct vmw_user_surface *user_srf =
 	    container_of(srf, struct vmw_user_surface, srf);
+	struct vmw_private *dev_priv = srf->res.dev_priv;
+	uint32_t size = user_srf->size;
 
+	if (srf->backup)
+		ttm_bo_unref(&srf->backup);
+	kfree(srf->offsets);
 	kfree(srf->sizes);
 	kfree(srf->snooper.image);
 	kfree(user_srf);
+	ttm_mem_global_free(vmw_mem_glob(dev_priv), size);
+}
+
+/**
+ * vmw_resource_unreserve - unreserve resources previously reserved for
+ * command submission.
+ *
+ * @list_head: list of resources to unreserve.
+ *
+ * Currently only surfaces are considered, and unreserving a surface
+ * means putting it back on the device's surface lru list,
+ * so that it can be evicted if necessary.
+ * This function traverses the resource list and
+ * checks whether resources are surfaces, and in that case puts them back
+ * on the device's surface LRU list.
+ */
+void vmw_resource_unreserve(struct list_head *list)
+{
+	struct vmw_resource *res;
+	struct vmw_surface *srf;
+	rwlock_t *lock = NULL;
+
+	list_for_each_entry(res, list, validate_head) {
+
+		if (res->res_free != &vmw_surface_res_free &&
+		    res->res_free != &vmw_user_surface_free)
+			continue;
+
+		if (unlikely(lock == NULL)) {
+			lock = &res->dev_priv->resource_lock;
+			write_lock(lock);
+		}
+
+		srf = container_of(res, struct vmw_surface, res);
+		list_del_init(&srf->lru_head);
+		list_add_tail(&srf->lru_head, &res->dev_priv->surface_lru);
+	}
+
+	if (lock != NULL)
+		write_unlock(lock);
 }
 
+
 int vmw_user_surface_lookup_handle(struct vmw_private *dev_priv,
 				   struct ttm_object_file *tfile,
 				   uint32_t handle, struct vmw_surface **out)
@@ -556,8 +1255,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_user_surface *user_srf =
-	    kmalloc(sizeof(*user_srf), GFP_KERNEL);
+	struct vmw_user_surface *user_srf;
 	struct vmw_surface *srf;
 	struct vmw_resource *res;
 	struct vmw_resource *tmp;
@@ -568,10 +1266,51 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
 	struct drm_vmw_size __user *user_sizes;
 	int ret;
-	int i;
+	int i, j;
+	uint32_t cur_bo_offset;
+	struct drm_vmw_size *cur_size;
+	struct vmw_surface_offset *cur_offset;
+	uint32_t stride_bpp;
+	uint32_t bpp;
+	uint32_t num_sizes;
+	uint32_t size;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
 
-	if (unlikely(user_srf == NULL))
-		return -ENOMEM;
+	if (unlikely(vmw_user_surface_size == 0))
+		vmw_user_surface_size = ttm_round_pot(sizeof(*user_srf)) +
+			128;
+
+	num_sizes = 0;
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
+		num_sizes += req->mip_levels[i];
+
+	if (num_sizes > DRM_VMW_MAX_SURFACE_FACES *
+	    DRM_VMW_MAX_MIP_LEVELS)
+		return -EINVAL;
+
+	size = vmw_user_surface_size + 128 +
+		ttm_round_pot(num_sizes * sizeof(struct drm_vmw_size)) +
+		ttm_round_pot(num_sizes * sizeof(struct vmw_surface_offset));
+
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
+				   size, false, true);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Out of graphics memory for surface"
+				  " creation.\n");
+		goto out_unlock;
+	}
+
+	user_srf = kmalloc(sizeof(*user_srf), GFP_KERNEL);
+	if (unlikely(user_srf == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_user_srf;
+	}
 
 	srf = &user_srf->srf;
 	res = &srf->res;
@@ -579,21 +1318,22 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	srf->flags = req->flags;
 	srf->format = req->format;
 	srf->scanout = req->scanout;
-	memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels));
-	srf->num_sizes = 0;
-	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
-		srf->num_sizes += srf->mip_levels[i];
+	srf->backup = NULL;
 
-	if (srf->num_sizes > DRM_VMW_MAX_SURFACE_FACES *
-	    DRM_VMW_MAX_MIP_LEVELS) {
-		ret = -EINVAL;
-		goto out_err0;
-	}
+	memcpy(srf->mip_levels, req->mip_levels, sizeof(srf->mip_levels));
+	srf->num_sizes = num_sizes;
+	user_srf->size = size;
 
 	srf->sizes = kmalloc(srf->num_sizes * sizeof(*srf->sizes), GFP_KERNEL);
 	if (unlikely(srf->sizes == NULL)) {
 		ret = -ENOMEM;
-		goto out_err0;
+		goto out_no_sizes;
+	}
+	srf->offsets = kmalloc(srf->num_sizes * sizeof(*srf->offsets),
+			       GFP_KERNEL);
+	if (unlikely(srf->sizes == NULL)) {
+		ret = -ENOMEM;
+		goto out_no_offsets;
 	}
 
 	user_sizes = (struct drm_vmw_size __user *)(unsigned long)
@@ -603,9 +1343,32 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 			     srf->num_sizes * sizeof(*srf->sizes));
 	if (unlikely(ret != 0)) {
 		ret = -EFAULT;
-		goto out_err1;
+		goto out_no_copy;
 	}
 
+	cur_bo_offset = 0;
+	cur_offset = srf->offsets;
+	cur_size = srf->sizes;
+
+	bpp = vmw_sf_bpp[srf->format].bpp;
+	stride_bpp = vmw_sf_bpp[srf->format].s_bpp;
+
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
+		for (j = 0; j < srf->mip_levels[i]; ++j) {
+			uint32_t stride =
+				(cur_size->width * stride_bpp + 7) >> 3;
+
+			cur_offset->face = i;
+			cur_offset->mip = j;
+			cur_offset->bo_offset = cur_bo_offset;
+			cur_bo_offset += stride * cur_size->height *
+				cur_size->depth * bpp / stride_bpp;
+			++cur_offset;
+			++cur_size;
+		}
+	}
+	srf->backup_size = cur_bo_offset;
+
 	if (srf->scanout &&
 	    srf->num_sizes == 1 &&
 	    srf->sizes[0].width == 64 &&
@@ -617,7 +1380,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 		if (!srf->snooper.image) {
 			DRM_ERROR("Failed to allocate cursor_image\n");
 			ret = -ENOMEM;
-			goto out_err1;
+			goto out_no_copy;
 		}
 	} else {
 		srf->snooper.image = NULL;
@@ -634,7 +1397,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 
 	ret = vmw_surface_init(dev_priv, srf, vmw_user_surface_free);
 	if (unlikely(ret != 0))
-		return ret;
+		goto out_unlock;
 
 	tmp = vmw_resource_reference(&srf->res);
 	ret = ttm_base_object_init(tfile, &user_srf->base,
@@ -644,7 +1407,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 	if (unlikely(ret != 0)) {
 		vmw_resource_unreference(&tmp);
 		vmw_resource_unreference(&res);
-		return ret;
+		goto out_unlock;
 	}
 
 	rep->sid = user_srf->base.hash.key;
@@ -652,11 +1415,19 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 		DRM_ERROR("Created bad Surface ID.\n");
 
 	vmw_resource_unreference(&res);
+
+	ttm_read_unlock(&vmaster->lock);
 	return 0;
-out_err1:
+out_no_copy:
+	kfree(srf->offsets);
+out_no_offsets:
 	kfree(srf->sizes);
-out_err0:
+out_no_sizes:
 	kfree(user_srf);
+out_no_user_srf:
+	ttm_mem_global_free(vmw_mem_glob(dev_priv), size);
+out_unlock:
+	ttm_read_unlock(&vmaster->lock);
 	return ret;
 }
 
@@ -970,7 +1741,7 @@ static int vmw_stream_init(struct vmw_private *dev_priv,
 	int ret;
 
 	ret = vmw_resource_init(dev_priv, res, &dev_priv->stream_idr,
-				VMW_RES_STREAM, res_free);
+				VMW_RES_STREAM, false, res_free, NULL);
 
 	if (unlikely(ret != 0)) {
 		if (res_free == NULL)
@@ -1000,8 +1771,11 @@ static void vmw_user_stream_free(struct vmw_resource *res)
 {
 	struct vmw_user_stream *stream =
 	    container_of(res, struct vmw_user_stream, stream.res);
+	struct vmw_private *dev_priv = res->dev_priv;
 
 	kfree(stream);
+	ttm_mem_global_free(vmw_mem_glob(dev_priv),
+			    vmw_user_stream_size);
 }
 
 /**
@@ -1055,23 +1829,56 @@ int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
 			   struct drm_file *file_priv)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_user_stream *stream = kmalloc(sizeof(*stream), GFP_KERNEL);
+	struct vmw_user_stream *stream;
 	struct vmw_resource *res;
 	struct vmw_resource *tmp;
 	struct drm_vmw_stream_arg *arg = (struct drm_vmw_stream_arg *)data;
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
+	struct vmw_master *vmaster = vmw_master(file_priv->master);
 	int ret;
 
-	if (unlikely(stream == NULL))
-		return -ENOMEM;
+	/*
+	 * Approximate idr memory usage with 128 bytes. It will be limited
+	 * by maximum number_of streams anyway?
+	 */
+
+	if (unlikely(vmw_user_stream_size == 0))
+		vmw_user_stream_size = ttm_round_pot(sizeof(*stream)) + 128;
+
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		return ret;
+
+	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
+				   vmw_user_stream_size,
+				   false, true);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Out of graphics memory for stream"
+				  " creation.\n");
+		goto out_unlock;
+	}
+
+
+	stream = kmalloc(sizeof(*stream), GFP_KERNEL);
+	if (unlikely(stream == NULL)) {
+		ttm_mem_global_free(vmw_mem_glob(dev_priv),
+				    vmw_user_stream_size);
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
 
 	res = &stream->stream.res;
 	stream->base.shareable = false;
 	stream->base.tfile = NULL;
 
+	/*
+	 * From here on, the destructor takes over resource freeing.
+	 */
+
 	ret = vmw_stream_init(dev_priv, &stream->stream, vmw_user_stream_free);
 	if (unlikely(ret != 0))
-		return ret;
+		goto out_unlock;
 
 	tmp = vmw_resource_reference(res);
 	ret = ttm_base_object_init(tfile, &stream->base, false, VMW_RES_STREAM,
@@ -1085,6 +1892,8 @@ int vmw_stream_claim_ioctl(struct drm_device *dev, void *data,
 	arg->stream_id = res->id;
 out_err:
 	vmw_resource_unreference(&res);
+out_unlock:
+	ttm_read_unlock(&vmaster->lock);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
new file mode 100644
index 000000000000..477b2a9eb3c2
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -0,0 +1,567 @@
+/**************************************************************************
+ *
+ * Copyright © 2011 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "vmwgfx_kms.h"
+
+
+#define vmw_crtc_to_sou(x) \
+	container_of(x, struct vmw_screen_object_unit, base.crtc)
+#define vmw_encoder_to_sou(x) \
+	container_of(x, struct vmw_screen_object_unit, base.encoder)
+#define vmw_connector_to_sou(x) \
+	container_of(x, struct vmw_screen_object_unit, base.connector)
+
+struct vmw_screen_object_display {
+	struct list_head active;
+
+	unsigned num_active;
+	unsigned last_num_active;
+
+	struct vmw_framebuffer *fb;
+};
+
+/**
+ * Display unit using screen objects.
+ */
+struct vmw_screen_object_unit {
+	struct vmw_display_unit base;
+
+	unsigned long buffer_size; /**< Size of allocated buffer */
+	struct vmw_dma_buffer *buffer; /**< Backing store buffer */
+
+	bool defined;
+
+	struct list_head active;
+};
+
+static void vmw_sou_destroy(struct vmw_screen_object_unit *sou)
+{
+	list_del_init(&sou->active);
+	vmw_display_unit_cleanup(&sou->base);
+	kfree(sou);
+}
+
+
+/*
+ * Screen Object Display Unit CRTC functions
+ */
+
+static void vmw_sou_crtc_destroy(struct drm_crtc *crtc)
+{
+	vmw_sou_destroy(vmw_crtc_to_sou(crtc));
+}
+
+static int vmw_sou_del_active(struct vmw_private *vmw_priv,
+			      struct vmw_screen_object_unit *sou)
+{
+	struct vmw_screen_object_display *ld = vmw_priv->sou_priv;
+	if (list_empty(&sou->active))
+		return 0;
+
+	/* Must init otherwise list_empty(&sou->active) will not work. */
+	list_del_init(&sou->active);
+	if (--(ld->num_active) == 0) {
+		BUG_ON(!ld->fb);
+		if (ld->fb->unpin)
+			ld->fb->unpin(ld->fb);
+		ld->fb = NULL;
+	}
+
+	return 0;
+}
+
+static int vmw_sou_add_active(struct vmw_private *vmw_priv,
+			      struct vmw_screen_object_unit *sou,
+			      struct vmw_framebuffer *vfb)
+{
+	struct vmw_screen_object_display *ld = vmw_priv->sou_priv;
+	struct vmw_screen_object_unit *entry;
+	struct list_head *at;
+
+	BUG_ON(!ld->num_active && ld->fb);
+	if (vfb != ld->fb) {
+		if (ld->fb && ld->fb->unpin)
+			ld->fb->unpin(ld->fb);
+		if (vfb->pin)
+			vfb->pin(vfb);
+		ld->fb = vfb;
+	}
+
+	if (!list_empty(&sou->active))
+		return 0;
+
+	at = &ld->active;
+	list_for_each_entry(entry, &ld->active, active) {
+		if (entry->base.unit > sou->base.unit)
+			break;
+
+		at = &entry->active;
+	}
+
+	list_add(&sou->active, at);
+
+	ld->num_active++;
+
+	return 0;
+}
+
+/**
+ * Send the fifo command to create a screen.
+ */
+static int vmw_sou_fifo_create(struct vmw_private *dev_priv,
+			       struct vmw_screen_object_unit *sou,
+			       uint32_t x, uint32_t y,
+			       struct drm_display_mode *mode)
+{
+	size_t fifo_size;
+
+	struct {
+		struct {
+			uint32_t cmdType;
+		} header;
+		SVGAScreenObject obj;
+	} *cmd;
+
+	BUG_ON(!sou->buffer);
+
+	fifo_size = sizeof(*cmd);
+	cmd = vmw_fifo_reserve(dev_priv, fifo_size);
+	/* The hardware has hung, nothing we can do about it here. */
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Fifo reserve failed.\n");
+		return -ENOMEM;
+	}
+
+	memset(cmd, 0, fifo_size);
+	cmd->header.cmdType = SVGA_CMD_DEFINE_SCREEN;
+	cmd->obj.structSize = sizeof(SVGAScreenObject);
+	cmd->obj.id = sou->base.unit;
+	cmd->obj.flags = SVGA_SCREEN_HAS_ROOT |
+		(sou->base.unit == 0 ? SVGA_SCREEN_IS_PRIMARY : 0);
+	cmd->obj.size.width = mode->hdisplay;
+	cmd->obj.size.height = mode->vdisplay;
+	cmd->obj.root.x = x;
+	cmd->obj.root.y = y;
+
+	/* Ok to assume that buffer is pinned in vram */
+	vmw_bo_get_guest_ptr(&sou->buffer->base, &cmd->obj.backingStore.ptr);
+	cmd->obj.backingStore.pitch = mode->hdisplay * 4;
+
+	vmw_fifo_commit(dev_priv, fifo_size);
+
+	sou->defined = true;
+
+	return 0;
+}
+
+/**
+ * Send the fifo command to destroy a screen.
+ */
+static int vmw_sou_fifo_destroy(struct vmw_private *dev_priv,
+				struct vmw_screen_object_unit *sou)
+{
+	size_t fifo_size;
+	int ret;
+
+	struct {
+		struct {
+			uint32_t cmdType;
+		} header;
+		SVGAFifoCmdDestroyScreen body;
+	} *cmd;
+
+	/* no need to do anything */
+	if (unlikely(!sou->defined))
+		return 0;
+
+	fifo_size = sizeof(*cmd);
+	cmd = vmw_fifo_reserve(dev_priv, fifo_size);
+	/* the hardware has hung, nothing we can do about it here */
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Fifo reserve failed.\n");
+		return -ENOMEM;
+	}
+
+	memset(cmd, 0, fifo_size);
+	cmd->header.cmdType = SVGA_CMD_DESTROY_SCREEN;
+	cmd->body.screenId = sou->base.unit;
+
+	vmw_fifo_commit(dev_priv, fifo_size);
+
+	/* Force sync */
+	ret = vmw_fallback_wait(dev_priv, false, true, 0, false, 3*HZ);
+	if (unlikely(ret != 0))
+		DRM_ERROR("Failed to sync with HW");
+	else
+		sou->defined = false;
+
+	return ret;
+}
+
+/**
+ * Free the backing store.
+ */
+static void vmw_sou_backing_free(struct vmw_private *dev_priv,
+				 struct vmw_screen_object_unit *sou)
+{
+	struct ttm_buffer_object *bo;
+
+	if (unlikely(sou->buffer == NULL))
+		return;
+
+	bo = &sou->buffer->base;
+	ttm_bo_unref(&bo);
+	sou->buffer = NULL;
+	sou->buffer_size = 0;
+}
+
+/**
+ * Allocate the backing store for the buffer.
+ */
+static int vmw_sou_backing_alloc(struct vmw_private *dev_priv,
+				 struct vmw_screen_object_unit *sou,
+				 unsigned long size)
+{
+	int ret;
+
+	if (sou->buffer_size == size)
+		return 0;
+
+	if (sou->buffer)
+		vmw_sou_backing_free(dev_priv, sou);
+
+	sou->buffer = kzalloc(sizeof(*sou->buffer), GFP_KERNEL);
+	if (unlikely(sou->buffer == NULL))
+		return -ENOMEM;
+
+	/* After we have alloced the backing store might not be able to
+	 * resume the overlays, this is preferred to failing to alloc.
+	 */
+	vmw_overlay_pause_all(dev_priv);
+	ret = vmw_dmabuf_init(dev_priv, sou->buffer, size,
+			      &vmw_vram_ne_placement,
+			      false, &vmw_dmabuf_bo_free);
+	vmw_overlay_resume_all(dev_priv);
+
+	if (unlikely(ret != 0))
+		sou->buffer = NULL; /* vmw_dmabuf_init frees on error */
+	else
+		sou->buffer_size = size;
+
+	return ret;
+}
+
+static int vmw_sou_crtc_set_config(struct drm_mode_set *set)
+{
+	struct vmw_private *dev_priv;
+	struct vmw_screen_object_unit *sou;
+	struct drm_connector *connector;
+	struct drm_display_mode *mode;
+	struct drm_encoder *encoder;
+	struct vmw_framebuffer *vfb;
+	struct drm_framebuffer *fb;
+	struct drm_crtc *crtc;
+	int ret = 0;
+
+	if (!set)
+		return -EINVAL;
+
+	if (!set->crtc)
+		return -EINVAL;
+
+	/* get the sou */
+	crtc = set->crtc;
+	sou = vmw_crtc_to_sou(crtc);
+	vfb = set->fb ? vmw_framebuffer_to_vfb(set->fb) : NULL;
+	dev_priv = vmw_priv(crtc->dev);
+
+	if (set->num_connectors > 1) {
+		DRM_ERROR("to many connectors\n");
+		return -EINVAL;
+	}
+
+	if (set->num_connectors == 1 &&
+	    set->connectors[0] != &sou->base.connector) {
+		DRM_ERROR("connector doesn't match %p %p\n",
+			set->connectors[0], &sou->base.connector);
+		return -EINVAL;
+	}
+
+	/* sou only supports one fb active at the time */
+	if (dev_priv->sou_priv->fb && vfb &&
+	    !(dev_priv->sou_priv->num_active == 1 &&
+	      !list_empty(&sou->active)) &&
+	    dev_priv->sou_priv->fb != vfb) {
+		DRM_ERROR("Multiple framebuffers not supported\n");
+		return -EINVAL;
+	}
+
+	/* since they always map one to one these are safe */
+	connector = &sou->base.connector;
+	encoder = &sou->base.encoder;
+
+	/* should we turn the crtc off */
+	if (set->num_connectors == 0 || !set->mode || !set->fb) {
+		ret = vmw_sou_fifo_destroy(dev_priv, sou);
+		/* the hardware has hung don't do anything more */
+		if (unlikely(ret != 0))
+			return ret;
+
+		connector->encoder = NULL;
+		encoder->crtc = NULL;
+		crtc->fb = NULL;
+		crtc->x = 0;
+		crtc->y = 0;
+
+		vmw_sou_del_active(dev_priv, sou);
+
+		vmw_sou_backing_free(dev_priv, sou);
+
+		return 0;
+	}
+
+
+	/* we now know we want to set a mode */
+	mode = set->mode;
+	fb = set->fb;
+
+	if (set->x + mode->hdisplay > fb->width ||
+	    set->y + mode->vdisplay > fb->height) {
+		DRM_ERROR("set outside of framebuffer\n");
+		return -EINVAL;
+	}
+
+	vmw_fb_off(dev_priv);
+
+	if (mode->hdisplay != crtc->mode.hdisplay ||
+	    mode->vdisplay != crtc->mode.vdisplay) {
+		/* no need to check if depth is different, because backing
+		 * store depth is forced to 4 by the device.
+		 */
+
+		ret = vmw_sou_fifo_destroy(dev_priv, sou);
+		/* the hardware has hung don't do anything more */
+		if (unlikely(ret != 0))
+			return ret;
+
+		vmw_sou_backing_free(dev_priv, sou);
+	}
+
+	if (!sou->buffer) {
+		/* forced to depth 4 by the device */
+		size_t size = mode->hdisplay * mode->vdisplay * 4;
+		ret = vmw_sou_backing_alloc(dev_priv, sou, size);
+		if (unlikely(ret != 0))
+			return ret;
+	}
+
+	ret = vmw_sou_fifo_create(dev_priv, sou, set->x, set->y, mode);
+	if (unlikely(ret != 0)) {
+		/*
+		 * We are in a bit of a situation here, the hardware has
+		 * hung and we may or may not have a buffer hanging of
+		 * the screen object, best thing to do is not do anything
+		 * if we where defined, if not just turn the crtc of.
+		 * Not what userspace wants but it needs to htfu.
+		 */
+		if (sou->defined)
+			return ret;
+
+		connector->encoder = NULL;
+		encoder->crtc = NULL;
+		crtc->fb = NULL;
+		crtc->x = 0;
+		crtc->y = 0;
+
+		return ret;
+	}
+
+	vmw_sou_add_active(dev_priv, sou, vfb);
+
+	connector->encoder = encoder;
+	encoder->crtc = crtc;
+	crtc->mode = *mode;
+	crtc->fb = fb;
+	crtc->x = set->x;
+	crtc->y = set->y;
+
+	return 0;
+}
+
+static struct drm_crtc_funcs vmw_screen_object_crtc_funcs = {
+	.save = vmw_du_crtc_save,
+	.restore = vmw_du_crtc_restore,
+	.cursor_set = vmw_du_crtc_cursor_set,
+	.cursor_move = vmw_du_crtc_cursor_move,
+	.gamma_set = vmw_du_crtc_gamma_set,
+	.destroy = vmw_sou_crtc_destroy,
+	.set_config = vmw_sou_crtc_set_config,
+};
+
+/*
+ * Screen Object Display Unit encoder functions
+ */
+
+static void vmw_sou_encoder_destroy(struct drm_encoder *encoder)
+{
+	vmw_sou_destroy(vmw_encoder_to_sou(encoder));
+}
+
+static struct drm_encoder_funcs vmw_screen_object_encoder_funcs = {
+	.destroy = vmw_sou_encoder_destroy,
+};
+
+/*
+ * Screen Object Display Unit connector functions
+ */
+
+static void vmw_sou_connector_destroy(struct drm_connector *connector)
+{
+	vmw_sou_destroy(vmw_connector_to_sou(connector));
+}
+
+static struct drm_connector_funcs vmw_legacy_connector_funcs = {
+	.dpms = vmw_du_connector_dpms,
+	.save = vmw_du_connector_save,
+	.restore = vmw_du_connector_restore,
+	.detect = vmw_du_connector_detect,
+	.fill_modes = vmw_du_connector_fill_modes,
+	.set_property = vmw_du_connector_set_property,
+	.destroy = vmw_sou_connector_destroy,
+};
+
+static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit)
+{
+	struct vmw_screen_object_unit *sou;
+	struct drm_device *dev = dev_priv->dev;
+	struct drm_connector *connector;
+	struct drm_encoder *encoder;
+	struct drm_crtc *crtc;
+
+	sou = kzalloc(sizeof(*sou), GFP_KERNEL);
+	if (!sou)
+		return -ENOMEM;
+
+	sou->base.unit = unit;
+	crtc = &sou->base.crtc;
+	encoder = &sou->base.encoder;
+	connector = &sou->base.connector;
+
+	INIT_LIST_HEAD(&sou->active);
+
+	sou->base.pref_active = (unit == 0);
+	sou->base.pref_width = 800;
+	sou->base.pref_height = 600;
+	sou->base.pref_mode = NULL;
+
+	drm_connector_init(dev, connector, &vmw_legacy_connector_funcs,
+			   DRM_MODE_CONNECTOR_LVDS);
+	connector->status = vmw_du_connector_detect(connector, true);
+
+	drm_encoder_init(dev, encoder, &vmw_screen_object_encoder_funcs,
+			 DRM_MODE_ENCODER_LVDS);
+	drm_mode_connector_attach_encoder(connector, encoder);
+	encoder->possible_crtcs = (1 << unit);
+	encoder->possible_clones = 0;
+
+	drm_crtc_init(dev, crtc, &vmw_screen_object_crtc_funcs);
+
+	drm_mode_crtc_set_gamma_size(crtc, 256);
+
+	drm_connector_attach_property(connector,
+				      dev->mode_config.dirty_info_property,
+				      1);
+
+	return 0;
+}
+
+int vmw_kms_init_screen_object_display(struct vmw_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+	int i, ret;
+
+	if (dev_priv->sou_priv) {
+		DRM_INFO("sou system already on\n");
+		return -EINVAL;
+	}
+
+	if (!(dev_priv->fifo.capabilities & SVGA_FIFO_CAP_SCREEN_OBJECT_2)) {
+		DRM_INFO("Not using screen objects,"
+			 " missing cap SCREEN_OBJECT_2\n");
+		return -ENOSYS;
+	}
+
+	ret = -ENOMEM;
+	dev_priv->sou_priv = kmalloc(sizeof(*dev_priv->sou_priv), GFP_KERNEL);
+	if (unlikely(!dev_priv->sou_priv))
+		goto err_no_mem;
+
+	INIT_LIST_HEAD(&dev_priv->sou_priv->active);
+	dev_priv->sou_priv->num_active = 0;
+	dev_priv->sou_priv->last_num_active = 0;
+	dev_priv->sou_priv->fb = NULL;
+
+	ret = drm_vblank_init(dev, VMWGFX_NUM_DISPLAY_UNITS);
+	if (unlikely(ret != 0))
+		goto err_free;
+
+	ret = drm_mode_create_dirty_info_property(dev);
+	if (unlikely(ret != 0))
+		goto err_vblank_cleanup;
+
+	for (i = 0; i < VMWGFX_NUM_DISPLAY_UNITS; ++i)
+		vmw_sou_init(dev_priv, i);
+
+	DRM_INFO("Screen objects system initialized\n");
+
+	return 0;
+
+err_vblank_cleanup:
+	drm_vblank_cleanup(dev);
+err_free:
+	kfree(dev_priv->sou_priv);
+	dev_priv->sou_priv = NULL;
+err_no_mem:
+	return ret;
+}
+
+int vmw_kms_close_screen_object_display(struct vmw_private *dev_priv)
+{
+	struct drm_device *dev = dev_priv->dev;
+
+	if (!dev_priv->sou_priv)
+		return -ENOSYS;
+
+	drm_vblank_cleanup(dev);
+
+	if (!list_empty(&dev_priv->sou_priv->active))
+		DRM_ERROR("Still have active outputs when unloading driver");
+
+	kfree(dev_priv->sou_priv);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
index 1e8eedd901e0..d3c11f5184f3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
@@ -34,9 +34,8 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
 	struct vmw_private *dev_priv;
 
 	if (unlikely(vma->vm_pgoff < VMWGFX_FILE_PAGE_OFFSET)) {
-		if (vmw_fifo_mmap(filp, vma) == 0)
-			return 0;
-		return drm_mmap(filp, vma);
+		DRM_ERROR("Illegal attempt to mmap old fifo space.\n");
+		return -EINVAL;
 	}
 
 	file_priv = filp->private_data;
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig
index 89e50398dba5..a32961305723 100644
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@@ -74,8 +74,6 @@ source "drivers/staging/phison/Kconfig"
 
 source "drivers/staging/line6/Kconfig"
 
-source "drivers/gpu/drm/vmwgfx/Kconfig"
-
 source "drivers/gpu/drm/nouveau/Kconfig"
 
 source "drivers/staging/octeon/Kconfig"