56 files changed, 4057 insertions, 1943 deletions
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 3904d7964a4b..f4d4505fe831 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -259,7 +259,7 @@ void atombios_crtc_dpms(struct drm_crtc *crtc, int mode)
 		/* adjust pm to dpms changes BEFORE enabling crtcs */
 		radeon_pm_compute_clocks(rdev);
 		/* disable crtc pair power gating before programming */
-		if (ASIC_IS_DCE6(rdev))
+		if (ASIC_IS_DCE6(rdev) && !radeon_crtc->in_mode_set)
 			atombios_powergate_crtc(crtc, ATOM_DISABLE);
 		atombios_enable_crtc(crtc, ATOM_ENABLE);
 		if (ASIC_IS_DCE3(rdev) && !ASIC_IS_DCE6(rdev))
@@ -279,7 +279,7 @@ void atombios_crtc_dpms(struct drm_crtc *crtc, int mode)
 		atombios_enable_crtc(crtc, ATOM_DISABLE);
 		radeon_crtc->enabled = false;
 		/* power gating is per-pair */
-		if (ASIC_IS_DCE6(rdev)) {
+		if (ASIC_IS_DCE6(rdev) && !radeon_crtc->in_mode_set) {
 			struct drm_crtc *other_crtc;
 			struct radeon_crtc *other_radeon_crtc;
 			list_for_each_entry(other_crtc, &rdev->ddev->mode_config.crtc_list, head) {
@@ -444,11 +444,28 @@ union atom_enable_ss {
 static void atombios_crtc_program_ss(struct radeon_device *rdev,
 				     int enable,
 				     int pll_id,
+				     int crtc_id,
 				     struct radeon_atom_ss *ss)
 {
+	unsigned i;
 	int index = GetIndexIntoMasterTable(COMMAND, EnableSpreadSpectrumOnPPLL);
 	union atom_enable_ss args;
 
+	if (!enable) {
+		for (i = 0; i < rdev->num_crtc; i++) {
+			if (rdev->mode_info.crtcs[i] &&
+			    rdev->mode_info.crtcs[i]->enabled &&
+			    i != crtc_id &&
+			    pll_id == rdev->mode_info.crtcs[i]->pll_id) {
+				/* one other crtc is using this pll don't turn
+				 * off spread spectrum as it might turn off
+				 * display on active crtc
+				 */
+				return;
+			}
+		}
+	}
+
 	memset(&args, 0, sizeof(args));
 
 	if (ASIC_IS_DCE5(rdev)) {
@@ -457,22 +474,18 @@ static void atombios_crtc_program_ss(struct radeon_device *rdev,
 		switch (pll_id) {
 		case ATOM_PPLL1:
 			args.v3.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V3_P1PLL;
-			args.v3.usSpreadSpectrumAmount = cpu_to_le16(ss->amount);
-			args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step);
 			break;
 		case ATOM_PPLL2:
 			args.v3.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V3_P2PLL;
-			args.v3.usSpreadSpectrumAmount = cpu_to_le16(ss->amount);
-			args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step);
 			break;
 		case ATOM_DCPLL:
 			args.v3.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V3_DCPLL;
-			args.v3.usSpreadSpectrumAmount = cpu_to_le16(0);
-			args.v3.usSpreadSpectrumStep = cpu_to_le16(0);
 			break;
 		case ATOM_PPLL_INVALID:
 			return;
 		}
+		args.v3.usSpreadSpectrumAmount = cpu_to_le16(ss->amount);
+		args.v3.usSpreadSpectrumStep = cpu_to_le16(ss->step);
 		args.v3.ucEnable = enable;
 		if ((ss->percentage == 0) || (ss->type & ATOM_EXTERNAL_SS_MASK) || ASIC_IS_DCE61(rdev))
 			args.v3.ucEnable = ATOM_DISABLE;
@@ -482,22 +495,18 @@ static void atombios_crtc_program_ss(struct radeon_device *rdev,
 		switch (pll_id) {
 		case ATOM_PPLL1:
 			args.v2.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V2_P1PLL;
-			args.v2.usSpreadSpectrumAmount = cpu_to_le16(ss->amount);
-			args.v2.usSpreadSpectrumStep = cpu_to_le16(ss->step);
 			break;
 		case ATOM_PPLL2:
 			args.v2.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V2_P2PLL;
-			args.v2.usSpreadSpectrumAmount = cpu_to_le16(ss->amount);
-			args.v2.usSpreadSpectrumStep = cpu_to_le16(ss->step);
 			break;
 		case ATOM_DCPLL:
 			args.v2.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V2_DCPLL;
-			args.v2.usSpreadSpectrumAmount = cpu_to_le16(0);
-			args.v2.usSpreadSpectrumStep = cpu_to_le16(0);
 			break;
 		case ATOM_PPLL_INVALID:
 			return;
 		}
+		args.v2.usSpreadSpectrumAmount = cpu_to_le16(ss->amount);
+		args.v2.usSpreadSpectrumStep = cpu_to_le16(ss->step);
 		args.v2.ucEnable = enable;
 		if ((ss->percentage == 0) || (ss->type & ATOM_EXTERNAL_SS_MASK) || ASIC_IS_DCE41(rdev))
 			args.v2.ucEnable = ATOM_DISABLE;
@@ -1036,7 +1045,7 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode
 		radeon_compute_pll_legacy(pll, adjusted_clock, &pll_clock, &fb_div, &frac_fb_div,
 					  &ref_div, &post_div);
 
-	atombios_crtc_program_ss(rdev, ATOM_DISABLE, radeon_crtc->pll_id, &ss);
+	atombios_crtc_program_ss(rdev, ATOM_DISABLE, radeon_crtc->pll_id, radeon_crtc->crtc_id, &ss);
 
 	atombios_crtc_program_pll(crtc, radeon_crtc->crtc_id, radeon_crtc->pll_id,
 				  encoder_mode, radeon_encoder->encoder_id, mode->clock,
@@ -1059,7 +1068,7 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode
 			ss.step = step_size;
 		}
 
-		atombios_crtc_program_ss(rdev, ATOM_ENABLE, radeon_crtc->pll_id, &ss);
+		atombios_crtc_program_ss(rdev, ATOM_ENABLE, radeon_crtc->pll_id, radeon_crtc->crtc_id, &ss);
 	}
 }
 
@@ -1539,8 +1548,12 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc)
 				 * crtc virtual pixel clock.
 				 */
 				if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(test_encoder))) {
-					if (ASIC_IS_DCE5(rdev) || rdev->clock.dp_extclk)
+					if (rdev->clock.dp_extclk)
 						return ATOM_PPLL_INVALID;
+					else if (ASIC_IS_DCE6(rdev))
+						return ATOM_PPLL0;
+					else if (ASIC_IS_DCE5(rdev))
+						return ATOM_DCPLL;
 				}
 			}
 		}
@@ -1576,11 +1589,11 @@ void radeon_atom_disp_eng_pll_init(struct radeon_device *rdev)
 								   ASIC_INTERNAL_SS_ON_DCPLL,
 								   rdev->clock.default_dispclk);
 		if (ss_enabled)
-			atombios_crtc_program_ss(rdev, ATOM_DISABLE, ATOM_DCPLL, &ss);
+			atombios_crtc_program_ss(rdev, ATOM_DISABLE, ATOM_DCPLL, -1, &ss);
 		/* XXX: DCE5, make sure voltage, dispclk is high enough */
 		atombios_crtc_set_disp_eng_pll(rdev, rdev->clock.default_dispclk);
 		if (ss_enabled)
-			atombios_crtc_program_ss(rdev, ATOM_ENABLE, ATOM_DCPLL, &ss);
+			atombios_crtc_program_ss(rdev, ATOM_ENABLE, ATOM_DCPLL, -1, &ss);
 	}
 
 }
@@ -1628,7 +1641,7 @@ int atombios_crtc_mode_set(struct drm_crtc *crtc,
 }
 
 static bool atombios_crtc_mode_fixup(struct drm_crtc *crtc,
-				     struct drm_display_mode *mode,
+				     const struct drm_display_mode *mode,
 				     struct drm_display_mode *adjusted_mode)
 {
 	if (!radeon_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
@@ -1639,18 +1652,28 @@ static bool atombios_crtc_mode_fixup(struct drm_crtc *crtc,
 static void atombios_crtc_prepare(struct drm_crtc *crtc)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
 
+	radeon_crtc->in_mode_set = true;
 	/* pick pll */
 	radeon_crtc->pll_id = radeon_atom_pick_pll(crtc);
 
+	/* disable crtc pair power gating before programming */
+	if (ASIC_IS_DCE6(rdev))
+		atombios_powergate_crtc(crtc, ATOM_DISABLE);
+
 	atombios_lock_crtc(crtc, ATOM_ENABLE);
 	atombios_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 }
 
 static void atombios_crtc_commit(struct drm_crtc *crtc)
 {
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+
 	atombios_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
 	atombios_lock_crtc(crtc, ATOM_DISABLE);
+	radeon_crtc->in_mode_set = false;
 }
 
 static void atombios_crtc_disable(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 5131b3b0f7d2..7712cf5ab33b 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -22,6 +22,7 @@
  *
  * Authors: Dave Airlie
  *          Alex Deucher
+ *          Jerome Glisse
  */
 #include "drmP.h"
 #include "radeon_drm.h"
@@ -608,7 +609,7 @@ int radeon_dp_get_panel_mode(struct drm_encoder *encoder,
 }
 
 void radeon_dp_set_link_config(struct drm_connector *connector,
-			       struct drm_display_mode *mode)
+			       const struct drm_display_mode *mode)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct radeon_connector_atom_dig *dig_connector;
@@ -654,7 +655,6 @@ static bool radeon_dp_get_link_status(struct radeon_connector *radeon_connector,
 	ret = radeon_dp_aux_native_read(radeon_connector, DP_LANE0_1_STATUS,
 					link_status, DP_LINK_STATUS_SIZE, 100);
 	if (ret <= 0) {
-		DRM_ERROR("displayport link status failed\n");
 		return false;
 	}
 
@@ -833,8 +833,10 @@ static int radeon_dp_link_train_cr(struct radeon_dp_link_train_info *dp_info)
 		else
 			mdelay(dp_info->rd_interval * 4);
 
-		if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status))
+		if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) {
+			DRM_ERROR("displayport link status failed\n");
 			break;
+		}
 
 		if (dp_clock_recovery_ok(dp_info->link_status, dp_info->dp_lane_count)) {
 			clock_recovery = true;
@@ -896,8 +898,10 @@ static int radeon_dp_link_train_ce(struct radeon_dp_link_train_info *dp_info)
 		else
 			mdelay(dp_info->rd_interval * 4);
 
-		if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status))
+		if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) {
+			DRM_ERROR("displayport link status failed\n");
 			break;
+		}
 
 		if (dp_channel_eq_ok(dp_info->link_status, dp_info->dp_lane_count)) {
 			channel_eq = true;
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index 486ccdf4aacd..f9bc27fe269a 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -58,7 +58,7 @@ static inline bool radeon_encoder_is_digital(struct drm_encoder *encoder)
 }
 
 static bool radeon_atom_mode_fixup(struct drm_encoder *encoder,
-				   struct drm_display_mode *mode,
+				   const struct drm_display_mode *mode,
 				   struct drm_display_mode *adjusted_mode)
 {
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
@@ -1392,10 +1392,18 @@ radeon_atom_encoder_dpms_dig(struct drm_encoder *encoder, int mode)
 	case DRM_MODE_DPMS_ON:
 		/* some early dce3.2 boards have a bug in their transmitter control table */
 		if ((rdev->family == CHIP_RV710) || (rdev->family == CHIP_RV730) ||
-		    ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev))
+		    ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev)) {
+			if (ASIC_IS_DCE6(rdev)) {
+				/* It seems we need to call ATOM_ENCODER_CMD_SETUP again
+				 * before reenabling encoder on DPMS ON, otherwise we never
+				 * get picture
+				 */
+				atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP, 0);
+			}
 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
-		else
+		} else {
 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0);
+		}
 		if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) {
 			if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
 				atombios_set_edp_panel_power(connector,
@@ -2234,7 +2242,7 @@ radeon_atom_ext_dpms(struct drm_encoder *encoder, int mode)
 }
 
 static bool radeon_atom_ext_mode_fixup(struct drm_encoder *encoder,
-				       struct drm_display_mode *mode,
+				       const struct drm_display_mode *mode,
 				       struct drm_display_mode *adjusted_mode)
 {
 	return true;
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 7fb3d2e0434c..e93b80a6d4e9 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -99,6 +99,14 @@ void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev)
 	}
 }
 
+/**
+ * dce4_wait_for_vblank - vblank wait asic callback.
+ *
+ * @rdev: radeon_device pointer
+ * @crtc: crtc to wait for vblank on
+ *
+ * Wait for vblank on the requested crtc (evergreen+).
+ */
 void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc];
@@ -118,18 +126,49 @@ void dce4_wait_for_vblank(struct radeon_device *rdev, int crtc)
 	}
 }
 
+/**
+ * radeon_irq_kms_pflip_irq_get - pre-pageflip callback.
+ *
+ * @rdev: radeon_device pointer
+ * @crtc: crtc to prepare for pageflip on
+ *
+ * Pre-pageflip callback (evergreen+).
+ * Enables the pageflip irq (vblank irq).
+ */
 void evergreen_pre_page_flip(struct radeon_device *rdev, int crtc)
 {
 	/* enable the pflip int */
 	radeon_irq_kms_pflip_irq_get(rdev, crtc);
 }
 
+/**
+ * evergreen_post_page_flip - pos-pageflip callback.
+ *
+ * @rdev: radeon_device pointer
+ * @crtc: crtc to cleanup pageflip on
+ *
+ * Post-pageflip callback (evergreen+).
+ * Disables the pageflip irq (vblank irq).
+ */
 void evergreen_post_page_flip(struct radeon_device *rdev, int crtc)
 {
 	/* disable the pflip int */
 	radeon_irq_kms_pflip_irq_put(rdev, crtc);
 }
 
+/**
+ * evergreen_page_flip - pageflip callback.
+ *
+ * @rdev: radeon_device pointer
+ * @crtc_id: crtc to cleanup pageflip on
+ * @crtc_base: new address of the crtc (GPU MC address)
+ *
+ * Does the actual pageflip (evergreen+).
+ * During vblank we take the crtc lock and wait for the update_pending
+ * bit to go high, when it does, we release the lock, and allow the
+ * double buffered update to take place.
+ * Returns the current update pending status.
+ */
 u32 evergreen_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
@@ -214,6 +253,15 @@ int sumo_get_temp(struct radeon_device *rdev)
 	return actual_temp * 1000;
 }
 
+/**
+ * sumo_pm_init_profile - Initialize power profiles callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Initialize the power states used in profile mode
+ * (sumo, trinity, SI).
+ * Used for profile mode only.
+ */
 void sumo_pm_init_profile(struct radeon_device *rdev)
 {
 	int idx;
@@ -265,6 +313,14 @@ void sumo_pm_init_profile(struct radeon_device *rdev)
 		rdev->pm.power_state[idx].num_clock_modes - 1;
 }
 
+/**
+ * evergreen_pm_misc - set additional pm hw parameters callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set non-clock parameters associated with a power state
+ * (voltage, etc.) (evergreen+).
+ */
 void evergreen_pm_misc(struct radeon_device *rdev)
 {
 	int req_ps_idx = rdev->pm.requested_power_state_index;
@@ -292,6 +348,13 @@ void evergreen_pm_misc(struct radeon_device *rdev)
 	}
 }
 
+/**
+ * evergreen_pm_prepare - pre-power state change callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Prepare for a power state change (evergreen+).
+ */
 void evergreen_pm_prepare(struct radeon_device *rdev)
 {
 	struct drm_device *ddev = rdev->ddev;
@@ -310,6 +373,13 @@ void evergreen_pm_prepare(struct radeon_device *rdev)
 	}
 }
 
+/**
+ * evergreen_pm_finish - post-power state change callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Clean up after a power state change (evergreen+).
+ */
 void evergreen_pm_finish(struct radeon_device *rdev)
 {
 	struct drm_device *ddev = rdev->ddev;
@@ -328,6 +398,15 @@ void evergreen_pm_finish(struct radeon_device *rdev)
 	}
 }
 
+/**
+ * evergreen_hpd_sense - hpd sense callback.
+ *
+ * @rdev: radeon_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Checks if a digital monitor is connected (evergreen+).
+ * Returns true if connected, false if not connected.
+ */
 bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
 {
 	bool connected = false;
@@ -364,6 +443,14 @@ bool evergreen_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
 	return connected;
 }
 
+/**
+ * evergreen_hpd_set_polarity - hpd set polarity callback.
+ *
+ * @rdev: radeon_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Set the polarity of the hpd pin (evergreen+).
+ */
 void evergreen_hpd_set_polarity(struct radeon_device *rdev,
 				enum radeon_hpd_id hpd)
 {
@@ -424,10 +511,19 @@ void evergreen_hpd_set_polarity(struct radeon_device *rdev,
 	}
 }
 
+/**
+ * evergreen_hpd_init - hpd setup callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Setup the hpd pins used by the card (evergreen+).
+ * Enable the pin, set the polarity, and enable the hpd interrupts.
+ */
 void evergreen_hpd_init(struct radeon_device *rdev)
 {
 	struct drm_device *dev = rdev->ddev;
 	struct drm_connector *connector;
+	unsigned enabled = 0;
 	u32 tmp = DC_HPDx_CONNECTION_TIMER(0x9c4) |
 		DC_HPDx_RX_INT_TIMER(0xfa) | DC_HPDx_EN;
 
@@ -436,73 +532,72 @@ void evergreen_hpd_init(struct radeon_device *rdev)
 		switch (radeon_connector->hpd.hpd) {
 		case RADEON_HPD_1:
 			WREG32(DC_HPD1_CONTROL, tmp);
-			rdev->irq.hpd[0] = true;
 			break;
 		case RADEON_HPD_2:
 			WREG32(DC_HPD2_CONTROL, tmp);
-			rdev->irq.hpd[1] = true;
 			break;
 		case RADEON_HPD_3:
 			WREG32(DC_HPD3_CONTROL, tmp);
-			rdev->irq.hpd[2] = true;
 			break;
 		case RADEON_HPD_4:
 			WREG32(DC_HPD4_CONTROL, tmp);
-			rdev->irq.hpd[3] = true;
 			break;
 		case RADEON_HPD_5:
 			WREG32(DC_HPD5_CONTROL, tmp);
-			rdev->irq.hpd[4] = true;
 			break;
 		case RADEON_HPD_6:
 			WREG32(DC_HPD6_CONTROL, tmp);
-			rdev->irq.hpd[5] = true;
 			break;
 		default:
 			break;
 		}
 		radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
+		enabled |= 1 << radeon_connector->hpd.hpd;
 	}
-	if (rdev->irq.installed)
-		evergreen_irq_set(rdev);
+	radeon_irq_kms_enable_hpd(rdev, enabled);
 }
 
+/**
+ * evergreen_hpd_fini - hpd tear down callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tear down the hpd pins used by the card (evergreen+).
+ * Disable the hpd interrupts.
+ */
 void evergreen_hpd_fini(struct radeon_device *rdev)
 {
 	struct drm_device *dev = rdev->ddev;
 	struct drm_connector *connector;
+	unsigned disabled = 0;
 
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 		switch (radeon_connector->hpd.hpd) {
 		case RADEON_HPD_1:
 			WREG32(DC_HPD1_CONTROL, 0);
-			rdev->irq.hpd[0] = false;
 			break;
 		case RADEON_HPD_2:
 			WREG32(DC_HPD2_CONTROL, 0);
-			rdev->irq.hpd[1] = false;
 			break;
 		case RADEON_HPD_3:
 			WREG32(DC_HPD3_CONTROL, 0);
-			rdev->irq.hpd[2] = false;
 			break;
 		case RADEON_HPD_4:
 			WREG32(DC_HPD4_CONTROL, 0);
-			rdev->irq.hpd[3] = false;
 			break;
 		case RADEON_HPD_5:
 			WREG32(DC_HPD5_CONTROL, 0);
-			rdev->irq.hpd[4] = false;
 			break;
 		case RADEON_HPD_6:
 			WREG32(DC_HPD6_CONTROL, 0);
-			rdev->irq.hpd[5] = false;
 			break;
 		default:
 			break;
 		}
+		disabled |= 1 << radeon_connector->hpd.hpd;
 	}
+	radeon_irq_kms_disable_hpd(rdev, disabled);
 }
 
 /* watermark setup */
@@ -933,6 +1028,14 @@ static void evergreen_program_watermarks(struct radeon_device *rdev,
 
 }
 
+/**
+ * evergreen_bandwidth_update - update display watermarks callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Update the display watermarks based on the requested mode(s)
+ * (evergreen+).
+ */
 void evergreen_bandwidth_update(struct radeon_device *rdev)
 {
 	struct drm_display_mode *mode0 = NULL;
@@ -956,6 +1059,15 @@ void evergreen_bandwidth_update(struct radeon_device *rdev)
 	}
 }
 
+/**
+ * evergreen_mc_wait_for_idle - wait for MC idle callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Wait for the MC (memory controller) to be idle.
+ * (evergreen+).
+ * Returns 0 if the MC is idle, -1 if not.
+ */
 int evergreen_mc_wait_for_idle(struct radeon_device *rdev)
 {
 	unsigned i;
@@ -1117,24 +1229,8 @@ void evergreen_agp_enable(struct radeon_device *rdev)
 
 void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save)
 {
-	save->vga_control[0] = RREG32(D1VGA_CONTROL);
-	save->vga_control[1] = RREG32(D2VGA_CONTROL);
 	save->vga_render_control = RREG32(VGA_RENDER_CONTROL);
 	save->vga_hdp_control = RREG32(VGA_HDP_CONTROL);
-	save->crtc_control[0] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET);
-	save->crtc_control[1] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET);
-	if (rdev->num_crtc >= 4) {
-		save->vga_control[2] = RREG32(EVERGREEN_D3VGA_CONTROL);
-		save->vga_control[3] = RREG32(EVERGREEN_D4VGA_CONTROL);
-		save->crtc_control[2] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET);
-		save->crtc_control[3] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET);
-	}
-	if (rdev->num_crtc >= 6) {
-		save->vga_control[4] = RREG32(EVERGREEN_D5VGA_CONTROL);
-		save->vga_control[5] = RREG32(EVERGREEN_D6VGA_CONTROL);
-		save->crtc_control[4] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET);
-		save->crtc_control[5] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET);
-	}
 
 	/* Stop all video */
 	WREG32(VGA_RENDER_CONTROL, 0);
@@ -1245,47 +1341,6 @@ void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *s
 	/* Unlock host access */
 	WREG32(VGA_HDP_CONTROL, save->vga_hdp_control);
 	mdelay(1);
-	/* Restore video state */
-	WREG32(D1VGA_CONTROL, save->vga_control[0]);
-	WREG32(D2VGA_CONTROL, save->vga_control[1]);
-	if (rdev->num_crtc >= 4) {
-		WREG32(EVERGREEN_D3VGA_CONTROL, save->vga_control[2]);
-		WREG32(EVERGREEN_D4VGA_CONTROL, save->vga_control[3]);
-	}
-	if (rdev->num_crtc >= 6) {
-		WREG32(EVERGREEN_D5VGA_CONTROL, save->vga_control[4]);
-		WREG32(EVERGREEN_D6VGA_CONTROL, save->vga_control[5]);
-	}
-	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 1);
-	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 1);
-	if (rdev->num_crtc >= 4) {
-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 1);
-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 1);
-	}
-	if (rdev->num_crtc >= 6) {
-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 1);
-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 1);
-	}
-	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, save->crtc_control[0]);
-	WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, save->crtc_control[1]);
-	if (rdev->num_crtc >= 4) {
-		WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, save->crtc_control[2]);
-		WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, save->crtc_control[3]);
-	}
-	if (rdev->num_crtc >= 6) {
-		WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, save->crtc_control[4]);
-		WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, save->crtc_control[5]);
-	}
-	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
-	WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
-	if (rdev->num_crtc >= 4) {
-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
-	}
-	if (rdev->num_crtc >= 6) {
-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
-		WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
-	}
 	WREG32(VGA_RENDER_CONTROL, save->vga_render_control);
 }
 
@@ -1371,12 +1426,28 @@ void evergreen_mc_program(struct radeon_device *rdev)
  */
 void evergreen_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
-	struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
+	u32 next_rptr;
 
 	/* set to DX10/11 mode */
 	radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
 	radeon_ring_write(ring, 1);
-	/* FIXME: implement */
+
+	if (ring->rptr_save_reg) {
+		next_rptr = ring->wptr + 3 + 4;
+		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		radeon_ring_write(ring, ((ring->rptr_save_reg - 
+					  PACKET3_SET_CONFIG_REG_START) >> 2));
+		radeon_ring_write(ring, next_rptr);
+	} else if (rdev->wb.enabled) {
+		next_rptr = ring->wptr + 5 + 4;
+		radeon_ring_write(ring, PACKET3(PACKET3_MEM_WRITE, 3));
+		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+		radeon_ring_write(ring, (upper_32_bits(ring->next_rptr_gpu_addr) & 0xff) | (1 << 18));
+		radeon_ring_write(ring, next_rptr);
+		radeon_ring_write(ring, 0);
+	}
+
 	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	radeon_ring_write(ring,
 #ifdef __BIG_ENDIAN
@@ -1858,10 +1929,18 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
 	if (rdev->flags & RADEON_IS_IGP)
 		rdev->config.evergreen.tile_config |= 1 << 4;
 	else {
-		if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
-			rdev->config.evergreen.tile_config |= 1 << 4;
-		else
+		switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
+		case 0: /* four banks */
 			rdev->config.evergreen.tile_config |= 0 << 4;
+			break;
+		case 1: /* eight banks */
+			rdev->config.evergreen.tile_config |= 1 << 4;
+			break;
+		case 2: /* sixteen banks */
+		default:
+			rdev->config.evergreen.tile_config |= 2 << 4;
+			break;
+		}
 	}
 	rdev->config.evergreen.tile_config |= 0 << 8;
 	rdev->config.evergreen.tile_config |=
@@ -2188,6 +2267,14 @@ static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
 		RREG32(GRBM_STATUS_SE1));
 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
 		RREG32(SRBM_STATUS));
+	dev_info(rdev->dev, "  R_008674_CP_STALLED_STAT1 = 0x%08X\n",
+		RREG32(CP_STALLED_STAT1));
+	dev_info(rdev->dev, "  R_008678_CP_STALLED_STAT2 = 0x%08X\n",
+		RREG32(CP_STALLED_STAT2));
+	dev_info(rdev->dev, "  R_00867C_CP_BUSY_STAT     = 0x%08X\n",
+		RREG32(CP_BUSY_STAT));
+	dev_info(rdev->dev, "  R_008680_CP_STAT          = 0x%08X\n",
+		RREG32(CP_STAT));
 	evergreen_mc_stop(rdev, &save);
 	if (evergreen_mc_wait_for_idle(rdev)) {
 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
@@ -2225,6 +2312,14 @@ static int evergreen_gpu_soft_reset(struct radeon_device *rdev)
 		RREG32(GRBM_STATUS_SE1));
 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
 		RREG32(SRBM_STATUS));
+	dev_info(rdev->dev, "  R_008674_CP_STALLED_STAT1 = 0x%08X\n",
+		RREG32(CP_STALLED_STAT1));
+	dev_info(rdev->dev, "  R_008678_CP_STALLED_STAT2 = 0x%08X\n",
+		RREG32(CP_STALLED_STAT2));
+	dev_info(rdev->dev, "  R_00867C_CP_BUSY_STAT     = 0x%08X\n",
+		RREG32(CP_BUSY_STAT));
+	dev_info(rdev->dev, "  R_008680_CP_STAT          = 0x%08X\n",
+		RREG32(CP_STAT));
 	evergreen_mc_resume(rdev, &save);
 	return 0;
 }
@@ -2348,20 +2443,20 @@ int evergreen_irq_set(struct radeon_device *rdev)
 
 	if (rdev->family >= CHIP_CAYMAN) {
 		/* enable CP interrupts on all rings */
-		if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) {
+		if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
 			DRM_DEBUG("evergreen_irq_set: sw int gfx\n");
 			cp_int_cntl |= TIME_STAMP_INT_ENABLE;
 		}
-		if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP1_INDEX]) {
+		if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
 			DRM_DEBUG("evergreen_irq_set: sw int cp1\n");
 			cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
 		}
-		if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP2_INDEX]) {
+		if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
 			DRM_DEBUG("evergreen_irq_set: sw int cp2\n");
 			cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
 		}
 	} else {
-		if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) {
+		if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
 			DRM_DEBUG("evergreen_irq_set: sw int gfx\n");
 			cp_int_cntl |= RB_INT_ENABLE;
 			cp_int_cntl |= TIME_STAMP_INT_ENABLE;
@@ -2369,32 +2464,32 @@ int evergreen_irq_set(struct radeon_device *rdev)
 	}
 
 	if (rdev->irq.crtc_vblank_int[0] ||
-	    rdev->irq.pflip[0]) {
+	    atomic_read(&rdev->irq.pflip[0])) {
 		DRM_DEBUG("evergreen_irq_set: vblank 0\n");
 		crtc1 |= VBLANK_INT_MASK;
 	}
 	if (rdev->irq.crtc_vblank_int[1] ||
-	    rdev->irq.pflip[1]) {
+	    atomic_read(&rdev->irq.pflip[1])) {
 		DRM_DEBUG("evergreen_irq_set: vblank 1\n");
 		crtc2 |= VBLANK_INT_MASK;
 	}
 	if (rdev->irq.crtc_vblank_int[2] ||
-	    rdev->irq.pflip[2]) {
+	    atomic_read(&rdev->irq.pflip[2])) {
 		DRM_DEBUG("evergreen_irq_set: vblank 2\n");
 		crtc3 |= VBLANK_INT_MASK;
 	}
 	if (rdev->irq.crtc_vblank_int[3] ||
-	    rdev->irq.pflip[3]) {
+	    atomic_read(&rdev->irq.pflip[3])) {
 		DRM_DEBUG("evergreen_irq_set: vblank 3\n");
 		crtc4 |= VBLANK_INT_MASK;
 	}
 	if (rdev->irq.crtc_vblank_int[4] ||
-	    rdev->irq.pflip[4]) {
+	    atomic_read(&rdev->irq.pflip[4])) {
 		DRM_DEBUG("evergreen_irq_set: vblank 4\n");
 		crtc5 |= VBLANK_INT_MASK;
 	}
 	if (rdev->irq.crtc_vblank_int[5] ||
-	    rdev->irq.pflip[5]) {
+	    atomic_read(&rdev->irq.pflip[5])) {
 		DRM_DEBUG("evergreen_irq_set: vblank 5\n");
 		crtc6 |= VBLANK_INT_MASK;
 	}
@@ -2676,7 +2771,6 @@ int evergreen_irq_process(struct radeon_device *rdev)
 	u32 rptr;
 	u32 src_id, src_data;
 	u32 ring_index;
-	unsigned long flags;
 	bool queue_hotplug = false;
 	bool queue_hdmi = false;
 
@@ -2684,22 +2778,21 @@ int evergreen_irq_process(struct radeon_device *rdev)
 		return IRQ_NONE;
 
 	wptr = evergreen_get_ih_wptr(rdev);
+
+restart_ih:
+	/* is somebody else already processing irqs? */
+	if (atomic_xchg(&rdev->ih.lock, 1))
+		return IRQ_NONE;
+
 	rptr = rdev->ih.rptr;
 	DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
 
-	spin_lock_irqsave(&rdev->ih.lock, flags);
-	if (rptr == wptr) {
-		spin_unlock_irqrestore(&rdev->ih.lock, flags);
-		return IRQ_NONE;
-	}
-restart_ih:
 	/* Order reading of wptr vs. reading of IH ring data */
 	rmb();
 
 	/* display interrupts */
 	evergreen_irq_ack(rdev);
 
-	rdev->ih.wptr = wptr;
 	while (rptr != wptr) {
 		/* wptr/rptr are in bytes! */
 		ring_index = rptr / 4;
@@ -2716,7 +2809,7 @@ restart_ih:
 						rdev->pm.vblank_sync = true;
 						wake_up(&rdev->irq.vblank_queue);
 					}
-					if (rdev->irq.pflip[0])
+					if (atomic_read(&rdev->irq.pflip[0]))
 						radeon_crtc_handle_flip(rdev, 0);
 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D1 vblank\n");
@@ -2742,7 +2835,7 @@ restart_ih:
 						rdev->pm.vblank_sync = true;
 						wake_up(&rdev->irq.vblank_queue);
 					}
-					if (rdev->irq.pflip[1])
+					if (atomic_read(&rdev->irq.pflip[1]))
 						radeon_crtc_handle_flip(rdev, 1);
 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D2 vblank\n");
@@ -2768,7 +2861,7 @@ restart_ih:
 						rdev->pm.vblank_sync = true;
 						wake_up(&rdev->irq.vblank_queue);
 					}
-					if (rdev->irq.pflip[2])
+					if (atomic_read(&rdev->irq.pflip[2]))
 						radeon_crtc_handle_flip(rdev, 2);
 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D3 vblank\n");
@@ -2794,7 +2887,7 @@ restart_ih:
 						rdev->pm.vblank_sync = true;
 						wake_up(&rdev->irq.vblank_queue);
 					}
-					if (rdev->irq.pflip[3])
+					if (atomic_read(&rdev->irq.pflip[3]))
 						radeon_crtc_handle_flip(rdev, 3);
 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D4 vblank\n");
@@ -2820,7 +2913,7 @@ restart_ih:
 						rdev->pm.vblank_sync = true;
 						wake_up(&rdev->irq.vblank_queue);
 					}
-					if (rdev->irq.pflip[4])
+					if (atomic_read(&rdev->irq.pflip[4]))
 						radeon_crtc_handle_flip(rdev, 4);
 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D5 vblank\n");
@@ -2846,7 +2939,7 @@ restart_ih:
 						rdev->pm.vblank_sync = true;
 						wake_up(&rdev->irq.vblank_queue);
 					}
-					if (rdev->irq.pflip[5])
+					if (atomic_read(&rdev->irq.pflip[5]))
 						radeon_crtc_handle_flip(rdev, 5);
 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D6 vblank\n");
@@ -2986,7 +3079,6 @@ restart_ih:
 			break;
 		case 233: /* GUI IDLE */
 			DRM_DEBUG("IH: GUI idle\n");
-			rdev->pm.gui_idle = true;
 			wake_up(&rdev->irq.idle_queue);
 			break;
 		default:
@@ -2998,17 +3090,19 @@ restart_ih:
 		rptr += 16;
 		rptr &= rdev->ih.ptr_mask;
 	}
-	/* make sure wptr hasn't changed while processing */
-	wptr = evergreen_get_ih_wptr(rdev);
-	if (wptr != rdev->ih.wptr)
-		goto restart_ih;
 	if (queue_hotplug)
 		schedule_work(&rdev->hotplug_work);
 	if (queue_hdmi)
 		schedule_work(&rdev->audio_work);
 	rdev->ih.rptr = rptr;
 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
-	spin_unlock_irqrestore(&rdev->ih.lock, flags);
+	atomic_set(&rdev->ih.lock, 0);
+
+	/* make sure wptr hasn't changed while processing */
+	wptr = evergreen_get_ih_wptr(rdev);
+	if (wptr != rptr)
+		goto restart_ih;
+
 	return IRQ_HANDLED;
 }
 
@@ -3096,13 +3190,11 @@ static int evergreen_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_pool_start(rdev);
-	if (r)
-		return r;
-
-	r = radeon_ib_ring_tests(rdev);
-	if (r)
+	r = radeon_ib_pool_init(rdev);
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
 		return r;
+	}
 
 	r = r600_audio_init(rdev);
 	if (r) {
@@ -3146,9 +3238,6 @@ int evergreen_suspend(struct radeon_device *rdev)
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 
 	r600_audio_fini(rdev);
-	/* FIXME: we should wait for ring to be empty */
-	radeon_ib_pool_suspend(rdev);
-	r600_blit_suspend(rdev);
 	r700_cp_stop(rdev);
 	ring->ready = false;
 	evergreen_irq_suspend(rdev);
@@ -3234,20 +3323,14 @@ int evergreen_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
-	if (r) {
-		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
-		rdev->accel_working = false;
-	}
-
 	r = evergreen_startup(rdev);
 	if (r) {
 		dev_err(rdev->dev, "disabling GPU acceleration\n");
 		r700_cp_fini(rdev);
 		r600_irq_fini(rdev);
 		radeon_wb_fini(rdev);
-		r100_ib_fini(rdev);
+		radeon_ib_pool_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		evergreen_pcie_gart_fini(rdev);
 		rdev->accel_working = false;
@@ -3274,7 +3357,7 @@ void evergreen_fini(struct radeon_device *rdev)
 	r700_cp_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
-	r100_ib_fini(rdev);
+	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
 	evergreen_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
@@ -3289,7 +3372,8 @@ void evergreen_fini(struct radeon_device *rdev)
 
 void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
 {
-	u32 link_width_cntl, speed_cntl;
+	u32 link_width_cntl, speed_cntl, mask;
+	int ret;
 
 	if (radeon_pcie_gen2 == 0)
 		return;
@@ -3304,6 +3388,15 @@ void evergreen_pcie_gen2_enable(struct radeon_device *rdev)
 	if (ASIC_IS_X2(rdev))
 		return;
 
+	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
+	if (ret != 0)
+		return;
+
+	if (!(mask & DRM_PCIE_SPEED_50))
+		return;
+
+	DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
+
 	speed_cntl = RREG32_PCIE_P(PCIE_LC_SPEED_CNTL);
 	if ((speed_cntl & LC_OTHER_SIDE_EVER_SENT_GEN2) ||
 	    (speed_cntl & LC_OTHER_SIDE_SUPPORTS_GEN2)) {
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 1e96bd458cfd..89cb9feb5653 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -622,7 +622,8 @@ int evergreen_blit_init(struct radeon_device *rdev)
 	rdev->r600_blit.primitives.draw_auto = draw_auto;
 	rdev->r600_blit.primitives.set_default_state = set_default_state;
 
-	rdev->r600_blit.ring_size_common = 55; /* shaders + def state */
+	rdev->r600_blit.ring_size_common = 8; /* sync semaphore */
+	rdev->r600_blit.ring_size_common += 55; /* shaders + def state */
 	rdev->r600_blit.ring_size_common += 16; /* fence emit for VB IB */
 	rdev->r600_blit.ring_size_common += 5; /* done copy */
 	rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */
@@ -633,10 +634,6 @@ int evergreen_blit_init(struct radeon_device *rdev)
 
 	rdev->r600_blit.max_dim = 16384;
 
-	/* pin copy shader into vram if already initialized */
-	if (rdev->r600_blit.shader_obj)
-		goto done;
-
 	rdev->r600_blit.state_offset = 0;
 
 	if (rdev->family < CHIP_CAYMAN)
@@ -667,11 +664,26 @@ int evergreen_blit_init(struct radeon_device *rdev)
 		obj_size += cayman_ps_size * 4;
 	obj_size = ALIGN(obj_size, 256);
 
-	r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
-			     NULL, &rdev->r600_blit.shader_obj);
-	if (r) {
-		DRM_ERROR("evergreen failed to allocate shader\n");
-		return r;
+	/* pin copy shader into vram if not already initialized */
+	if (!rdev->r600_blit.shader_obj) {
+		r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true,
+				     RADEON_GEM_DOMAIN_VRAM,
+				     NULL, &rdev->r600_blit.shader_obj);
+		if (r) {
+			DRM_ERROR("evergreen failed to allocate shader\n");
+			return r;
+		}
+
+		r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
+		if (unlikely(r != 0))
+			return r;
+		r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
+				  &rdev->r600_blit.shader_gpu_addr);
+		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
+		if (r) {
+			dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
+			return r;
+		}
 	}
 
 	DRM_DEBUG("evergreen blit allocated bo %08x vs %08x ps %08x\n",
@@ -713,17 +725,6 @@ int evergreen_blit_init(struct radeon_device *rdev)
 	radeon_bo_kunmap(rdev->r600_blit.shader_obj);
 	radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 
-done:
-	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-	if (unlikely(r != 0))
-		return r;
-	r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
-			  &rdev->r600_blit.shader_gpu_addr);
-	radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-	if (r) {
-		dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
-		return r;
-	}
 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index c16554122ccd..e44a62a07fe3 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -788,6 +788,13 @@ static int evergreen_cs_track_validate_texture(struct radeon_cs_parser *p,
 	case V_030000_SQ_TEX_DIM_1D_ARRAY:
 	case V_030000_SQ_TEX_DIM_2D_ARRAY:
 		depth = 1;
+		break;
+	case V_030000_SQ_TEX_DIM_2D_MSAA:
+	case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA:
+		surf.nsamples = 1 << llevel;
+		llevel = 0;
+		depth = 1;
+		break;
 	case V_030000_SQ_TEX_DIM_3D:
 		break;
 	default:
@@ -961,13 +968,15 @@ static int evergreen_cs_track_check(struct radeon_cs_parser *p)
 
 	if (track->db_dirty) {
 		/* Check stencil buffer */
-		if (G_028800_STENCIL_ENABLE(track->db_depth_control)) {
+		if (G_028044_FORMAT(track->db_s_info) != V_028044_STENCIL_INVALID &&
+		    G_028800_STENCIL_ENABLE(track->db_depth_control)) {
 			r = evergreen_cs_track_validate_stencil(p);
 			if (r)
 				return r;
 		}
 		/* Check depth buffer */
-		if (G_028800_Z_ENABLE(track->db_depth_control)) {
+		if (G_028040_FORMAT(track->db_z_info) != V_028040_Z_INVALID &&
+		    G_028800_Z_ENABLE(track->db_depth_control)) {
 			r = evergreen_cs_track_validate_depth(p);
 			if (r)
 				return r;
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index b50b15c70498..79347855d9bf 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -88,6 +88,10 @@
 #define	CONFIG_MEMSIZE					0x5428
 
 #define	CP_COHER_BASE					0x85F8
+#define	CP_STALLED_STAT1			0x8674
+#define	CP_STALLED_STAT2			0x8678
+#define	CP_BUSY_STAT				0x867C
+#define	CP_STAT						0x8680
 #define CP_ME_CNTL					0x86D8
 #define		CP_ME_HALT					(1 << 28)
 #define		CP_PFP_HALT					(1 << 26)
@@ -1273,6 +1277,8 @@
 #define   S_028044_FORMAT(x)                           (((x) & 0x1) << 0)
 #define   G_028044_FORMAT(x)                           (((x) >> 0) & 0x1)
 #define   C_028044_FORMAT                              0xFFFFFFFE
+#define	    V_028044_STENCIL_INVALID			0
+#define	    V_028044_STENCIL_8				1
 #define   G_028044_TILE_SPLIT(x)                       (((x) >> 8) & 0x7)
 #define DB_Z_READ_BASE					0x28048
 #define DB_STENCIL_READ_BASE				0x2804c
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index b7bf18e40215..853800e8582f 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -574,10 +574,18 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 	if (rdev->flags & RADEON_IS_IGP)
 		rdev->config.cayman.tile_config |= 1 << 4;
 	else {
-		if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
-			rdev->config.cayman.tile_config |= 1 << 4;
-		else
+		switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
+		case 0: /* four banks */
 			rdev->config.cayman.tile_config |= 0 << 4;
+			break;
+		case 1: /* eight banks */
+			rdev->config.cayman.tile_config |= 1 << 4;
+			break;
+		case 2: /* sixteen banks */
+		default:
+			rdev->config.cayman.tile_config |= 2 << 4;
+			break;
+		}
 	}
 	rdev->config.cayman.tile_config |=
 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
@@ -850,11 +858,20 @@ void cayman_fence_ring_emit(struct radeon_device *rdev,
 
 void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
-	struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
 
 	/* set to DX10/11 mode */
 	radeon_ring_write(ring, PACKET3(PACKET3_MODE_CONTROL, 0));
 	radeon_ring_write(ring, 1);
+
+	if (ring->rptr_save_reg) {
+		uint32_t next_rptr = ring->wptr + 3 + 4 + 8;
+		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		radeon_ring_write(ring, ((ring->rptr_save_reg - 
+					  PACKET3_SET_CONFIG_REG_START) >> 2));
+		radeon_ring_write(ring, next_rptr);
+	}
+
 	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	radeon_ring_write(ring,
 #ifdef __BIG_ENDIAN
@@ -981,16 +998,41 @@ static int cayman_cp_start(struct radeon_device *rdev)
 
 static void cayman_cp_fini(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	cayman_cp_enable(rdev, false);
-	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
+	radeon_ring_fini(rdev, ring);
+	radeon_scratch_free(rdev, ring->rptr_save_reg);
 }
 
 int cayman_cp_resume(struct radeon_device *rdev)
 {
+	static const int ridx[] = {
+		RADEON_RING_TYPE_GFX_INDEX,
+		CAYMAN_RING_TYPE_CP1_INDEX,
+		CAYMAN_RING_TYPE_CP2_INDEX
+	};
+	static const unsigned cp_rb_cntl[] = {
+		CP_RB0_CNTL,
+		CP_RB1_CNTL,
+		CP_RB2_CNTL,
+	};
+	static const unsigned cp_rb_rptr_addr[] = {
+		CP_RB0_RPTR_ADDR,
+		CP_RB1_RPTR_ADDR,
+		CP_RB2_RPTR_ADDR
+	};
+	static const unsigned cp_rb_rptr_addr_hi[] = {
+		CP_RB0_RPTR_ADDR_HI,
+		CP_RB1_RPTR_ADDR_HI,
+		CP_RB2_RPTR_ADDR_HI
+	};
+	static const unsigned cp_rb_base[] = {
+		CP_RB0_BASE,
+		CP_RB1_BASE,
+		CP_RB2_BASE
+	};
 	struct radeon_ring *ring;
-	u32 tmp;
-	u32 rb_bufsz;
-	int r;
+	int i, r;
 
 	/* Reset cp; if cp is reset, then PA, SH, VGT also need to be reset */
 	WREG32(GRBM_SOFT_RESET, (SOFT_RESET_CP |
@@ -1012,91 +1054,47 @@ int cayman_cp_resume(struct radeon_device *rdev)
 
 	WREG32(CP_DEBUG, (1 << 27));
 
-	/* ring 0 - compute and gfx */
-	/* Set ring buffer size */
-	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
-	rb_bufsz = drm_order(ring->ring_size / 8);
-	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
-#ifdef __BIG_ENDIAN
-	tmp |= BUF_SWAP_32BIT;
-#endif
-	WREG32(CP_RB0_CNTL, tmp);
-
-	/* Initialize the ring buffer's read and write pointers */
-	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
-	ring->wptr = 0;
-	WREG32(CP_RB0_WPTR, ring->wptr);
-
 	/* set the wb address wether it's enabled or not */
-	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
-	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
+	WREG32(SCRATCH_UMSK, 0xff);
 
-	if (rdev->wb.enabled)
-		WREG32(SCRATCH_UMSK, 0xff);
-	else {
-		tmp |= RB_NO_UPDATE;
-		WREG32(SCRATCH_UMSK, 0);
-	}
-
-	mdelay(1);
-	WREG32(CP_RB0_CNTL, tmp);
-
-	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
-
-	ring->rptr = RREG32(CP_RB0_RPTR);
-
-	/* ring1  - compute only */
-	/* Set ring buffer size */
-	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
-	rb_bufsz = drm_order(ring->ring_size / 8);
-	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
-#ifdef __BIG_ENDIAN
-	tmp |= BUF_SWAP_32BIT;
-#endif
-	WREG32(CP_RB1_CNTL, tmp);
-
-	/* Initialize the ring buffer's read and write pointers */
-	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
-	ring->wptr = 0;
-	WREG32(CP_RB1_WPTR, ring->wptr);
-
-	/* set the wb address wether it's enabled or not */
-	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
-	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
-
-	mdelay(1);
-	WREG32(CP_RB1_CNTL, tmp);
-
-	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
+	for (i = 0; i < 3; ++i) {
+		uint32_t rb_cntl;
+		uint64_t addr;
 
-	ring->rptr = RREG32(CP_RB1_RPTR);
-
-	/* ring2 - compute only */
-	/* Set ring buffer size */
-	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
-	rb_bufsz = drm_order(ring->ring_size / 8);
-	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
+		/* Set ring buffer size */
+		ring = &rdev->ring[ridx[i]];
+		rb_cntl = drm_order(ring->ring_size / 8);
+		rb_cntl |= drm_order(RADEON_GPU_PAGE_SIZE/8) << 8;
 #ifdef __BIG_ENDIAN
-	tmp |= BUF_SWAP_32BIT;
+		rb_cntl |= BUF_SWAP_32BIT;
 #endif
-	WREG32(CP_RB2_CNTL, tmp);
+		WREG32(cp_rb_cntl[i], rb_cntl);
 
-	/* Initialize the ring buffer's read and write pointers */
-	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
-	ring->wptr = 0;
-	WREG32(CP_RB2_WPTR, ring->wptr);
+		/* set the wb address wether it's enabled or not */
+		addr = rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET;
+		WREG32(cp_rb_rptr_addr[i], addr & 0xFFFFFFFC);
+		WREG32(cp_rb_rptr_addr_hi[i], upper_32_bits(addr) & 0xFF);
+	}
 
-	/* set the wb address wether it's enabled or not */
-	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
-	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
+	/* set the rb base addr, this causes an internal reset of ALL rings */
+	for (i = 0; i < 3; ++i) {
+		ring = &rdev->ring[ridx[i]];
+		WREG32(cp_rb_base[i], ring->gpu_addr >> 8);
+	}
 
-	mdelay(1);
-	WREG32(CP_RB2_CNTL, tmp);
+	for (i = 0; i < 3; ++i) {
+		/* Initialize the ring buffer's read and write pointers */
+		ring = &rdev->ring[ridx[i]];
+		WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA);
 
-	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
+		ring->rptr = ring->wptr = 0;
+		WREG32(ring->rptr_reg, ring->rptr);
+		WREG32(ring->wptr_reg, ring->wptr);
 
-	ring->rptr = RREG32(CP_RB2_RPTR);
+		mdelay(1);
+		WREG32_P(cp_rb_cntl[i], 0, ~RB_RPTR_WR_ENA);
+	}
 
 	/* start the rings */
 	cayman_cp_start(rdev);
@@ -1132,6 +1130,14 @@ static int cayman_gpu_soft_reset(struct radeon_device *rdev)
 		RREG32(GRBM_STATUS_SE1));
 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
 		RREG32(SRBM_STATUS));
+	dev_info(rdev->dev, "  R_008674_CP_STALLED_STAT1 = 0x%08X\n",
+		RREG32(CP_STALLED_STAT1));
+	dev_info(rdev->dev, "  R_008678_CP_STALLED_STAT2 = 0x%08X\n",
+		RREG32(CP_STALLED_STAT2));
+	dev_info(rdev->dev, "  R_00867C_CP_BUSY_STAT     = 0x%08X\n",
+		RREG32(CP_BUSY_STAT));
+	dev_info(rdev->dev, "  R_008680_CP_STAT          = 0x%08X\n",
+		RREG32(CP_STAT));
 	dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_ADDR   0x%08X\n",
 		 RREG32(0x14F8));
 	dev_info(rdev->dev, "  VM_CONTEXT0_PROTECTION_FAULT_STATUS 0x%08X\n",
@@ -1180,6 +1186,14 @@ static int cayman_gpu_soft_reset(struct radeon_device *rdev)
 		RREG32(GRBM_STATUS_SE1));
 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
 		RREG32(SRBM_STATUS));
+	dev_info(rdev->dev, "  R_008674_CP_STALLED_STAT1 = 0x%08X\n",
+		RREG32(CP_STALLED_STAT1));
+	dev_info(rdev->dev, "  R_008678_CP_STALLED_STAT2 = 0x%08X\n",
+		RREG32(CP_STALLED_STAT2));
+	dev_info(rdev->dev, "  R_00867C_CP_BUSY_STAT     = 0x%08X\n",
+		RREG32(CP_BUSY_STAT));
+	dev_info(rdev->dev, "  R_008680_CP_STAT          = 0x%08X\n",
+		RREG32(CP_STAT));
 	evergreen_mc_resume(rdev, &save);
 	return 0;
 }
@@ -1291,17 +1305,17 @@ static int cayman_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_pool_start(rdev);
-	if (r)
-		return r;
-
-	r = radeon_ib_ring_tests(rdev);
-	if (r)
+	r = radeon_ib_pool_init(rdev);
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
 		return r;
+	}
 
-	r = radeon_vm_manager_start(rdev);
-	if (r)
+	r = radeon_vm_manager_init(rdev);
+	if (r) {
+		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
 		return r;
+	}
 
 	r = r600_audio_init(rdev);
 	if (r)
@@ -1334,10 +1348,6 @@ int cayman_resume(struct radeon_device *rdev)
 int cayman_suspend(struct radeon_device *rdev)
 {
 	r600_audio_fini(rdev);
-	/* FIXME: we should wait for ring to be empty */
-	radeon_ib_pool_suspend(rdev);
-	radeon_vm_manager_suspend(rdev);
-	r600_blit_suspend(rdev);
 	cayman_cp_enable(rdev, false);
 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
 	evergreen_irq_suspend(rdev);
@@ -1413,17 +1423,7 @@ int cayman_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
-	if (r) {
-		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
-		rdev->accel_working = false;
-	}
-	r = radeon_vm_manager_init(rdev);
-	if (r) {
-		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
-	}
-
 	r = cayman_startup(rdev);
 	if (r) {
 		dev_err(rdev->dev, "disabling GPU acceleration\n");
@@ -1432,7 +1432,7 @@ int cayman_init(struct radeon_device *rdev)
 		if (rdev->flags & RADEON_IS_IGP)
 			si_rlc_fini(rdev);
 		radeon_wb_fini(rdev);
-		r100_ib_fini(rdev);
+		radeon_ib_pool_fini(rdev);
 		radeon_vm_manager_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		cayman_pcie_gart_fini(rdev);
@@ -1463,7 +1463,7 @@ void cayman_fini(struct radeon_device *rdev)
 		si_rlc_fini(rdev);
 	radeon_wb_fini(rdev);
 	radeon_vm_manager_fini(rdev);
-	r100_ib_fini(rdev);
+	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
 	cayman_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index a0b98066e207..870db340d377 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -236,6 +236,10 @@
 #define	CP_SEM_WAIT_TIMER				0x85BC
 #define	CP_SEM_INCOMPLETE_TIMER_CNTL			0x85C8
 #define	CP_COHER_CNTL2					0x85E8
+#define	CP_STALLED_STAT1			0x8674
+#define	CP_STALLED_STAT2			0x8678
+#define	CP_BUSY_STAT				0x867C
+#define	CP_STAT						0x8680
 #define CP_ME_CNTL					0x86D8
 #define		CP_ME_HALT					(1 << 28)
 #define		CP_PFP_HALT					(1 << 26)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index fb44e7e49083..8acb34fd3fd5 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -65,6 +65,19 @@ MODULE_FIRMWARE(FIRMWARE_R520);
 
 #include "r100_track.h"
 
+/* This files gather functions specifics to:
+ * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
+ * and others in some cases.
+ */
+
+/**
+ * r100_wait_for_vblank - vblank wait asic callback.
+ *
+ * @rdev: radeon_device pointer
+ * @crtc: crtc to wait for vblank on
+ *
+ * Wait for vblank on the requested crtc (r1xx-r4xx).
+ */
 void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc];
@@ -99,128 +112,49 @@ void r100_wait_for_vblank(struct radeon_device *rdev, int crtc)
 	}
 }
 
-/* This files gather functions specifics to:
- * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
+/**
+ * r100_pre_page_flip - pre-pageflip callback.
+ *
+ * @rdev: radeon_device pointer
+ * @crtc: crtc to prepare for pageflip on
+ *
+ * Pre-pageflip callback (r1xx-r4xx).
+ * Enables the pageflip irq (vblank irq).
  */
-
-int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
-			    struct radeon_cs_packet *pkt,
-			    unsigned idx,
-			    unsigned reg)
-{
-	int r;
-	u32 tile_flags = 0;
-	u32 tmp;
-	struct radeon_cs_reloc *reloc;
-	u32 value;
-
-	r = r100_cs_packet_next_reloc(p, &reloc);
-	if (r) {
-		DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
-			  idx, reg);
-		r100_cs_dump_packet(p, pkt);
-		return r;
-	}
-
-	value = radeon_get_ib_value(p, idx);
-	tmp = value & 0x003fffff;
-	tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
-
-	if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
-		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
-			tile_flags |= RADEON_DST_TILE_MACRO;
-		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
-			if (reg == RADEON_SRC_PITCH_OFFSET) {
-				DRM_ERROR("Cannot src blit from microtiled surface\n");
-				r100_cs_dump_packet(p, pkt);
-				return -EINVAL;
-			}
-			tile_flags |= RADEON_DST_TILE_MICRO;
-		}
-
-		tmp |= tile_flags;
-		p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
-	} else
-		p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
-	return 0;
-}
-
-int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
-			     struct radeon_cs_packet *pkt,
-			     int idx)
-{
-	unsigned c, i;
-	struct radeon_cs_reloc *reloc;
-	struct r100_cs_track *track;
-	int r = 0;
-	volatile uint32_t *ib;
-	u32 idx_value;
-
-	ib = p->ib.ptr;
-	track = (struct r100_cs_track *)p->track;
-	c = radeon_get_ib_value(p, idx++) & 0x1F;
-	if (c > 16) {
-	    DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
-		      pkt->opcode);
-	    r100_cs_dump_packet(p, pkt);
-	    return -EINVAL;
-	}
-	track->num_arrays = c;
-	for (i = 0; i < (c - 1); i+=2, idx+=3) {
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for packet3 %d\n",
-				  pkt->opcode);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
-		idx_value = radeon_get_ib_value(p, idx);
-		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
-
-		track->arrays[i + 0].esize = idx_value >> 8;
-		track->arrays[i + 0].robj = reloc->robj;
-		track->arrays[i + 0].esize &= 0x7F;
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for packet3 %d\n",
-				  pkt->opcode);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
-		ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
-		track->arrays[i + 1].robj = reloc->robj;
-		track->arrays[i + 1].esize = idx_value >> 24;
-		track->arrays[i + 1].esize &= 0x7F;
-	}
-	if (c & 1) {
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for packet3 %d\n",
-					  pkt->opcode);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
-		idx_value = radeon_get_ib_value(p, idx);
-		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
-		track->arrays[i + 0].robj = reloc->robj;
-		track->arrays[i + 0].esize = idx_value >> 8;
-		track->arrays[i + 0].esize &= 0x7F;
-	}
-	return r;
-}
-
 void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
 {
 	/* enable the pflip int */
 	radeon_irq_kms_pflip_irq_get(rdev, crtc);
 }
 
+/**
+ * r100_post_page_flip - pos-pageflip callback.
+ *
+ * @rdev: radeon_device pointer
+ * @crtc: crtc to cleanup pageflip on
+ *
+ * Post-pageflip callback (r1xx-r4xx).
+ * Disables the pageflip irq (vblank irq).
+ */
 void r100_post_page_flip(struct radeon_device *rdev, int crtc)
 {
 	/* disable the pflip int */
 	radeon_irq_kms_pflip_irq_put(rdev, crtc);
 }
 
+/**
+ * r100_page_flip - pageflip callback.
+ *
+ * @rdev: radeon_device pointer
+ * @crtc_id: crtc to cleanup pageflip on
+ * @crtc_base: new address of the crtc (GPU MC address)
+ *
+ * Does the actual pageflip (r1xx-r4xx).
+ * During vblank we take the crtc lock and wait for the update_pending
+ * bit to go high, when it does, we release the lock, and allow the
+ * double buffered update to take place.
+ * Returns the current update pending status.
+ */
 u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
@@ -247,6 +181,15 @@ u32 r100_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 	return RREG32(RADEON_CRTC_OFFSET + radeon_crtc->crtc_offset) & RADEON_CRTC_OFFSET__GUI_TRIG_OFFSET;
 }
 
+/**
+ * r100_pm_get_dynpm_state - look up dynpm power state callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Look up the optimal power state based on the
+ * current state of the GPU (r1xx-r5xx).
+ * Used for dynpm only.
+ */
 void r100_pm_get_dynpm_state(struct radeon_device *rdev)
 {
 	int i;
@@ -329,6 +272,15 @@ void r100_pm_get_dynpm_state(struct radeon_device *rdev)
 		  pcie_lanes);
 }
 
+/**
+ * r100_pm_init_profile - Initialize power profiles callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Initialize the power states used in profile mode
+ * (r1xx-r3xx).
+ * Used for profile mode only.
+ */
 void r100_pm_init_profile(struct radeon_device *rdev)
 {
 	/* default */
@@ -368,6 +320,14 @@ void r100_pm_init_profile(struct radeon_device *rdev)
 	rdev->pm.profiles[PM_PROFILE_HIGH_MH_IDX].dpms_on_cm_idx = 0;
 }
 
+/**
+ * r100_pm_misc - set additional pm hw parameters callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Set non-clock parameters associated with a power state
+ * (voltage, pcie lanes, etc.) (r1xx-r4xx).
+ */
 void r100_pm_misc(struct radeon_device *rdev)
 {
 	int requested_index = rdev->pm.requested_power_state_index;
@@ -459,6 +419,13 @@ void r100_pm_misc(struct radeon_device *rdev)
 	}
 }
 
+/**
+ * r100_pm_prepare - pre-power state change callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Prepare for a power state change (r1xx-r4xx).
+ */
 void r100_pm_prepare(struct radeon_device *rdev)
 {
 	struct drm_device *ddev = rdev->ddev;
@@ -483,6 +450,13 @@ void r100_pm_prepare(struct radeon_device *rdev)
 	}
 }
 
+/**
+ * r100_pm_finish - post-power state change callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Clean up after a power state change (r1xx-r4xx).
+ */
 void r100_pm_finish(struct radeon_device *rdev)
 {
 	struct drm_device *ddev = rdev->ddev;
@@ -507,6 +481,14 @@ void r100_pm_finish(struct radeon_device *rdev)
 	}
 }
 
+/**
+ * r100_gui_idle - gui idle callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Check of the GUI (2D/3D engines) are idle (r1xx-r5xx).
+ * Returns true if idle, false if not.
+ */
 bool r100_gui_idle(struct radeon_device *rdev)
 {
 	if (RREG32(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)
@@ -516,6 +498,15 @@ bool r100_gui_idle(struct radeon_device *rdev)
 }
 
 /* hpd for digital panel detect/disconnect */
+/**
+ * r100_hpd_sense - hpd sense callback.
+ *
+ * @rdev: radeon_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Checks if a digital monitor is connected (r1xx-r4xx).
+ * Returns true if connected, false if not connected.
+ */
 bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
 {
 	bool connected = false;
@@ -535,6 +526,14 @@ bool r100_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd)
 	return connected;
 }
 
+/**
+ * r100_hpd_set_polarity - hpd set polarity callback.
+ *
+ * @rdev: radeon_device pointer
+ * @hpd: hpd (hotplug detect) pin
+ *
+ * Set the polarity of the hpd pin (r1xx-r4xx).
+ */
 void r100_hpd_set_polarity(struct radeon_device *rdev,
 			   enum radeon_hpd_id hpd)
 {
@@ -563,47 +562,47 @@ void r100_hpd_set_polarity(struct radeon_device *rdev,
 	}
 }
 
+/**
+ * r100_hpd_init - hpd setup callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Setup the hpd pins used by the card (r1xx-r4xx).
+ * Set the polarity, and enable the hpd interrupts.
+ */
 void r100_hpd_init(struct radeon_device *rdev)
 {
 	struct drm_device *dev = rdev->ddev;
 	struct drm_connector *connector;
+	unsigned enable = 0;
 
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
-		switch (radeon_connector->hpd.hpd) {
-		case RADEON_HPD_1:
-			rdev->irq.hpd[0] = true;
-			break;
-		case RADEON_HPD_2:
-			rdev->irq.hpd[1] = true;
-			break;
-		default:
-			break;
-		}
+		enable |= 1 << radeon_connector->hpd.hpd;
 		radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
 	}
-	if (rdev->irq.installed)
-		r100_irq_set(rdev);
+	radeon_irq_kms_enable_hpd(rdev, enable);
 }
 
+/**
+ * r100_hpd_fini - hpd tear down callback.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tear down the hpd pins used by the card (r1xx-r4xx).
+ * Disable the hpd interrupts.
+ */
 void r100_hpd_fini(struct radeon_device *rdev)
 {
 	struct drm_device *dev = rdev->ddev;
 	struct drm_connector *connector;
+	unsigned disable = 0;
 
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
-		switch (radeon_connector->hpd.hpd) {
-		case RADEON_HPD_1:
-			rdev->irq.hpd[0] = false;
-			break;
-		case RADEON_HPD_2:
-			rdev->irq.hpd[1] = false;
-			break;
-		default:
-			break;
-		}
+		disable |= 1 << radeon_connector->hpd.hpd;
 	}
+	radeon_irq_kms_disable_hpd(rdev, disable);
 }
 
 /*
@@ -635,15 +634,6 @@ int r100_pci_gart_init(struct radeon_device *rdev)
 	return radeon_gart_table_ram_alloc(rdev);
 }
 
-/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
-void r100_enable_bm(struct radeon_device *rdev)
-{
-	uint32_t tmp;
-	/* Enable bus mastering */
-	tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
-	WREG32(RADEON_BUS_CNTL, tmp);
-}
-
 int r100_pci_gart_enable(struct radeon_device *rdev)
 {
 	uint32_t tmp;
@@ -705,18 +695,18 @@ int r100_irq_set(struct radeon_device *rdev)
 		WREG32(R_000040_GEN_INT_CNTL, 0);
 		return -EINVAL;
 	}
-	if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) {
+	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
 		tmp |= RADEON_SW_INT_ENABLE;
 	}
 	if (rdev->irq.gui_idle) {
 		tmp |= RADEON_GUI_IDLE_MASK;
 	}
 	if (rdev->irq.crtc_vblank_int[0] ||
-	    rdev->irq.pflip[0]) {
+	    atomic_read(&rdev->irq.pflip[0])) {
 		tmp |= RADEON_CRTC_VBLANK_MASK;
 	}
 	if (rdev->irq.crtc_vblank_int[1] ||
-	    rdev->irq.pflip[1]) {
+	    atomic_read(&rdev->irq.pflip[1])) {
 		tmp |= RADEON_CRTC2_VBLANK_MASK;
 	}
 	if (rdev->irq.hpd[0]) {
@@ -782,7 +772,6 @@ int r100_irq_process(struct radeon_device *rdev)
 		/* gui idle interrupt */
 		if (status & RADEON_GUI_IDLE_STAT) {
 			rdev->irq.gui_idle_acked = true;
-			rdev->pm.gui_idle = true;
 			wake_up(&rdev->irq.idle_queue);
 		}
 		/* Vertical blank interrupts */
@@ -792,7 +781,7 @@ int r100_irq_process(struct radeon_device *rdev)
 				rdev->pm.vblank_sync = true;
 				wake_up(&rdev->irq.vblank_queue);
 			}
-			if (rdev->irq.pflip[0])
+			if (atomic_read(&rdev->irq.pflip[0]))
 				radeon_crtc_handle_flip(rdev, 0);
 		}
 		if (status & RADEON_CRTC2_VBLANK_STAT) {
@@ -801,7 +790,7 @@ int r100_irq_process(struct radeon_device *rdev)
 				rdev->pm.vblank_sync = true;
 				wake_up(&rdev->irq.vblank_queue);
 			}
-			if (rdev->irq.pflip[1])
+			if (atomic_read(&rdev->irq.pflip[1]))
 				radeon_crtc_handle_flip(rdev, 1);
 		}
 		if (status & RADEON_FP_DETECT_STAT) {
@@ -883,7 +872,7 @@ int r100_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset,
 		   uint64_t dst_offset,
 		   unsigned num_gpu_pages,
-		   struct radeon_fence *fence)
+		   struct radeon_fence **fence)
 {
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	uint32_t cur_pages;
@@ -947,7 +936,7 @@ int r100_copy_blit(struct radeon_device *rdev,
 			  RADEON_WAIT_HOST_IDLECLEAN |
 			  RADEON_WAIT_DMA_GUI_IDLE);
 	if (fence) {
-		r = radeon_fence_emit(rdev, fence);
+		r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
 	}
 	radeon_ring_unlock_commit(rdev, ring);
 	return r;
@@ -1192,6 +1181,14 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)
 	}
 	ring->ready = true;
 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
+
+	if (radeon_ring_supports_scratch_reg(rdev, ring)) {
+		r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
+		if (r) {
+			DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
+			ring->rptr_save_reg = 0;
+		}
+	}
 	return 0;
 }
 
@@ -1202,6 +1199,7 @@ void r100_cp_fini(struct radeon_device *rdev)
 	}
 	/* Disable ring */
 	r100_cp_disable(rdev);
+	radeon_scratch_free(rdev, rdev->ring[RADEON_RING_TYPE_GFX_INDEX].rptr_save_reg);
 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
 	DRM_INFO("radeon: cp finalized\n");
 }
@@ -1223,6 +1221,112 @@ void r100_cp_disable(struct radeon_device *rdev)
 /*
  * CS functions
  */
+int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
+			    struct radeon_cs_packet *pkt,
+			    unsigned idx,
+			    unsigned reg)
+{
+	int r;
+	u32 tile_flags = 0;
+	u32 tmp;
+	struct radeon_cs_reloc *reloc;
+	u32 value;
+
+	r = r100_cs_packet_next_reloc(p, &reloc);
+	if (r) {
+		DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+			  idx, reg);
+		r100_cs_dump_packet(p, pkt);
+		return r;
+	}
+
+	value = radeon_get_ib_value(p, idx);
+	tmp = value & 0x003fffff;
+	tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
+
+	if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			tile_flags |= RADEON_DST_TILE_MACRO;
+		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+			if (reg == RADEON_SRC_PITCH_OFFSET) {
+				DRM_ERROR("Cannot src blit from microtiled surface\n");
+				r100_cs_dump_packet(p, pkt);
+				return -EINVAL;
+			}
+			tile_flags |= RADEON_DST_TILE_MICRO;
+		}
+
+		tmp |= tile_flags;
+		p->ib.ptr[idx] = (value & 0x3fc00000) | tmp;
+	} else
+		p->ib.ptr[idx] = (value & 0xffc00000) | tmp;
+	return 0;
+}
+
+int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
+			     struct radeon_cs_packet *pkt,
+			     int idx)
+{
+	unsigned c, i;
+	struct radeon_cs_reloc *reloc;
+	struct r100_cs_track *track;
+	int r = 0;
+	volatile uint32_t *ib;
+	u32 idx_value;
+
+	ib = p->ib.ptr;
+	track = (struct r100_cs_track *)p->track;
+	c = radeon_get_ib_value(p, idx++) & 0x1F;
+	if (c > 16) {
+	    DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
+		      pkt->opcode);
+	    r100_cs_dump_packet(p, pkt);
+	    return -EINVAL;
+	}
+	track->num_arrays = c;
+	for (i = 0; i < (c - 1); i+=2, idx+=3) {
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n",
+				  pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		idx_value = radeon_get_ib_value(p, idx);
+		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+
+		track->arrays[i + 0].esize = idx_value >> 8;
+		track->arrays[i + 0].robj = reloc->robj;
+		track->arrays[i + 0].esize &= 0x7F;
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n",
+				  pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
+		track->arrays[i + 1].robj = reloc->robj;
+		track->arrays[i + 1].esize = idx_value >> 24;
+		track->arrays[i + 1].esize &= 0x7F;
+	}
+	if (c & 1) {
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n",
+					  pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		idx_value = radeon_get_ib_value(p, idx);
+		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+		track->arrays[i + 0].robj = reloc->robj;
+		track->arrays[i + 0].esize = idx_value >> 8;
+		track->arrays[i + 0].esize &= 0x7F;
+	}
+	return r;
+}
+
 int r100_cs_parse_packet0(struct radeon_cs_parser *p,
 			  struct radeon_cs_packet *pkt,
 			  const unsigned *auth, unsigned n,
@@ -2048,6 +2152,379 @@ int r100_cs_parse(struct radeon_cs_parser *p)
 	return 0;
 }
 
+static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
+{
+	DRM_ERROR("pitch                      %d\n", t->pitch);
+	DRM_ERROR("use_pitch                  %d\n", t->use_pitch);
+	DRM_ERROR("width                      %d\n", t->width);
+	DRM_ERROR("width_11                   %d\n", t->width_11);
+	DRM_ERROR("height                     %d\n", t->height);
+	DRM_ERROR("height_11                  %d\n", t->height_11);
+	DRM_ERROR("num levels                 %d\n", t->num_levels);
+	DRM_ERROR("depth                      %d\n", t->txdepth);
+	DRM_ERROR("bpp                        %d\n", t->cpp);
+	DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
+	DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
+	DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
+	DRM_ERROR("compress format            %d\n", t->compress_format);
+}
+
+static int r100_track_compress_size(int compress_format, int w, int h)
+{
+	int block_width, block_height, block_bytes;
+	int wblocks, hblocks;
+	int min_wblocks;
+	int sz;
+
+	block_width = 4;
+	block_height = 4;
+
+	switch (compress_format) {
+	case R100_TRACK_COMP_DXT1:
+		block_bytes = 8;
+		min_wblocks = 4;
+		break;
+	default:
+	case R100_TRACK_COMP_DXT35:
+		block_bytes = 16;
+		min_wblocks = 2;
+		break;
+	}
+
+	hblocks = (h + block_height - 1) / block_height;
+	wblocks = (w + block_width - 1) / block_width;
+	if (wblocks < min_wblocks)
+		wblocks = min_wblocks;
+	sz = wblocks * hblocks * block_bytes;
+	return sz;
+}
+
+static int r100_cs_track_cube(struct radeon_device *rdev,
+			      struct r100_cs_track *track, unsigned idx)
+{
+	unsigned face, w, h;
+	struct radeon_bo *cube_robj;
+	unsigned long size;
+	unsigned compress_format = track->textures[idx].compress_format;
+
+	for (face = 0; face < 5; face++) {
+		cube_robj = track->textures[idx].cube_info[face].robj;
+		w = track->textures[idx].cube_info[face].width;
+		h = track->textures[idx].cube_info[face].height;
+
+		if (compress_format) {
+			size = r100_track_compress_size(compress_format, w, h);
+		} else
+			size = w * h;
+		size *= track->textures[idx].cpp;
+
+		size += track->textures[idx].cube_info[face].offset;
+
+		if (size > radeon_bo_size(cube_robj)) {
+			DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
+				  size, radeon_bo_size(cube_robj));
+			r100_cs_track_texture_print(&track->textures[idx]);
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static int r100_cs_track_texture_check(struct radeon_device *rdev,
+				       struct r100_cs_track *track)
+{
+	struct radeon_bo *robj;
+	unsigned long size;
+	unsigned u, i, w, h, d;
+	int ret;
+
+	for (u = 0; u < track->num_texture; u++) {
+		if (!track->textures[u].enabled)
+			continue;
+		if (track->textures[u].lookup_disable)
+			continue;
+		robj = track->textures[u].robj;
+		if (robj == NULL) {
+			DRM_ERROR("No texture bound to unit %u\n", u);
+			return -EINVAL;
+		}
+		size = 0;
+		for (i = 0; i <= track->textures[u].num_levels; i++) {
+			if (track->textures[u].use_pitch) {
+				if (rdev->family < CHIP_R300)
+					w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
+				else
+					w = track->textures[u].pitch / (1 << i);
+			} else {
+				w = track->textures[u].width;
+				if (rdev->family >= CHIP_RV515)
+					w |= track->textures[u].width_11;
+				w = w / (1 << i);
+				if (track->textures[u].roundup_w)
+					w = roundup_pow_of_two(w);
+			}
+			h = track->textures[u].height;
+			if (rdev->family >= CHIP_RV515)
+				h |= track->textures[u].height_11;
+			h = h / (1 << i);
+			if (track->textures[u].roundup_h)
+				h = roundup_pow_of_two(h);
+			if (track->textures[u].tex_coord_type == 1) {
+				d = (1 << track->textures[u].txdepth) / (1 << i);
+				if (!d)
+					d = 1;
+			} else {
+				d = 1;
+			}
+			if (track->textures[u].compress_format) {
+
+				size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
+				/* compressed textures are block based */
+			} else
+				size += w * h * d;
+		}
+		size *= track->textures[u].cpp;
+
+		switch (track->textures[u].tex_coord_type) {
+		case 0:
+		case 1:
+			break;
+		case 2:
+			if (track->separate_cube) {
+				ret = r100_cs_track_cube(rdev, track, u);
+				if (ret)
+					return ret;
+			} else
+				size *= 6;
+			break;
+		default:
+			DRM_ERROR("Invalid texture coordinate type %u for unit "
+				  "%u\n", track->textures[u].tex_coord_type, u);
+			return -EINVAL;
+		}
+		if (size > radeon_bo_size(robj)) {
+			DRM_ERROR("Texture of unit %u needs %lu bytes but is "
+				  "%lu\n", u, size, radeon_bo_size(robj));
+			r100_cs_track_texture_print(&track->textures[u]);
+			return -EINVAL;
+		}
+	}
+	return 0;
+}
+
+int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
+{
+	unsigned i;
+	unsigned long size;
+	unsigned prim_walk;
+	unsigned nverts;
+	unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
+
+	if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
+	    !track->blend_read_enable)
+		num_cb = 0;
+
+	for (i = 0; i < num_cb; i++) {
+		if (track->cb[i].robj == NULL) {
+			DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
+			return -EINVAL;
+		}
+		size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
+		size += track->cb[i].offset;
+		if (size > radeon_bo_size(track->cb[i].robj)) {
+			DRM_ERROR("[drm] Buffer too small for color buffer %d "
+				  "(need %lu have %lu) !\n", i, size,
+				  radeon_bo_size(track->cb[i].robj));
+			DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
+				  i, track->cb[i].pitch, track->cb[i].cpp,
+				  track->cb[i].offset, track->maxy);
+			return -EINVAL;
+		}
+	}
+	track->cb_dirty = false;
+
+	if (track->zb_dirty && track->z_enabled) {
+		if (track->zb.robj == NULL) {
+			DRM_ERROR("[drm] No buffer for z buffer !\n");
+			return -EINVAL;
+		}
+		size = track->zb.pitch * track->zb.cpp * track->maxy;
+		size += track->zb.offset;
+		if (size > radeon_bo_size(track->zb.robj)) {
+			DRM_ERROR("[drm] Buffer too small for z buffer "
+				  "(need %lu have %lu) !\n", size,
+				  radeon_bo_size(track->zb.robj));
+			DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
+				  track->zb.pitch, track->zb.cpp,
+				  track->zb.offset, track->maxy);
+			return -EINVAL;
+		}
+	}
+	track->zb_dirty = false;
+
+	if (track->aa_dirty && track->aaresolve) {
+		if (track->aa.robj == NULL) {
+			DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
+			return -EINVAL;
+		}
+		/* I believe the format comes from colorbuffer0. */
+		size = track->aa.pitch * track->cb[0].cpp * track->maxy;
+		size += track->aa.offset;
+		if (size > radeon_bo_size(track->aa.robj)) {
+			DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
+				  "(need %lu have %lu) !\n", i, size,
+				  radeon_bo_size(track->aa.robj));
+			DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
+				  i, track->aa.pitch, track->cb[0].cpp,
+				  track->aa.offset, track->maxy);
+			return -EINVAL;
+		}
+	}
+	track->aa_dirty = false;
+
+	prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
+	if (track->vap_vf_cntl & (1 << 14)) {
+		nverts = track->vap_alt_nverts;
+	} else {
+		nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
+	}
+	switch (prim_walk) {
+	case 1:
+		for (i = 0; i < track->num_arrays; i++) {
+			size = track->arrays[i].esize * track->max_indx * 4;
+			if (track->arrays[i].robj == NULL) {
+				DRM_ERROR("(PW %u) Vertex array %u no buffer "
+					  "bound\n", prim_walk, i);
+				return -EINVAL;
+			}
+			if (size > radeon_bo_size(track->arrays[i].robj)) {
+				dev_err(rdev->dev, "(PW %u) Vertex array %u "
+					"need %lu dwords have %lu dwords\n",
+					prim_walk, i, size >> 2,
+					radeon_bo_size(track->arrays[i].robj)
+					>> 2);
+				DRM_ERROR("Max indices %u\n", track->max_indx);
+				return -EINVAL;
+			}
+		}
+		break;
+	case 2:
+		for (i = 0; i < track->num_arrays; i++) {
+			size = track->arrays[i].esize * (nverts - 1) * 4;
+			if (track->arrays[i].robj == NULL) {
+				DRM_ERROR("(PW %u) Vertex array %u no buffer "
+					  "bound\n", prim_walk, i);
+				return -EINVAL;
+			}
+			if (size > radeon_bo_size(track->arrays[i].robj)) {
+				dev_err(rdev->dev, "(PW %u) Vertex array %u "
+					"need %lu dwords have %lu dwords\n",
+					prim_walk, i, size >> 2,
+					radeon_bo_size(track->arrays[i].robj)
+					>> 2);
+				return -EINVAL;
+			}
+		}
+		break;
+	case 3:
+		size = track->vtx_size * nverts;
+		if (size != track->immd_dwords) {
+			DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
+				  track->immd_dwords, size);
+			DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
+				  nverts, track->vtx_size);
+			return -EINVAL;
+		}
+		break;
+	default:
+		DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
+			  prim_walk);
+		return -EINVAL;
+	}
+
+	if (track->tex_dirty) {
+		track->tex_dirty = false;
+		return r100_cs_track_texture_check(rdev, track);
+	}
+	return 0;
+}
+
+void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
+{
+	unsigned i, face;
+
+	track->cb_dirty = true;
+	track->zb_dirty = true;
+	track->tex_dirty = true;
+	track->aa_dirty = true;
+
+	if (rdev->family < CHIP_R300) {
+		track->num_cb = 1;
+		if (rdev->family <= CHIP_RS200)
+			track->num_texture = 3;
+		else
+			track->num_texture = 6;
+		track->maxy = 2048;
+		track->separate_cube = 1;
+	} else {
+		track->num_cb = 4;
+		track->num_texture = 16;
+		track->maxy = 4096;
+		track->separate_cube = 0;
+		track->aaresolve = false;
+		track->aa.robj = NULL;
+	}
+
+	for (i = 0; i < track->num_cb; i++) {
+		track->cb[i].robj = NULL;
+		track->cb[i].pitch = 8192;
+		track->cb[i].cpp = 16;
+		track->cb[i].offset = 0;
+	}
+	track->z_enabled = true;
+	track->zb.robj = NULL;
+	track->zb.pitch = 8192;
+	track->zb.cpp = 4;
+	track->zb.offset = 0;
+	track->vtx_size = 0x7F;
+	track->immd_dwords = 0xFFFFFFFFUL;
+	track->num_arrays = 11;
+	track->max_indx = 0x00FFFFFFUL;
+	for (i = 0; i < track->num_arrays; i++) {
+		track->arrays[i].robj = NULL;
+		track->arrays[i].esize = 0x7F;
+	}
+	for (i = 0; i < track->num_texture; i++) {
+		track->textures[i].compress_format = R100_TRACK_COMP_NONE;
+		track->textures[i].pitch = 16536;
+		track->textures[i].width = 16536;
+		track->textures[i].height = 16536;
+		track->textures[i].width_11 = 1 << 11;
+		track->textures[i].height_11 = 1 << 11;
+		track->textures[i].num_levels = 12;
+		if (rdev->family <= CHIP_RS200) {
+			track->textures[i].tex_coord_type = 0;
+			track->textures[i].txdepth = 0;
+		} else {
+			track->textures[i].txdepth = 16;
+			track->textures[i].tex_coord_type = 1;
+		}
+		track->textures[i].cpp = 64;
+		track->textures[i].robj = NULL;
+		/* CS IB emission code makes sure texture unit are disabled */
+		track->textures[i].enabled = false;
+		track->textures[i].lookup_disable = false;
+		track->textures[i].roundup_w = true;
+		track->textures[i].roundup_h = true;
+		if (track->separate_cube)
+			for (face = 0; face < 5; face++) {
+				track->textures[i].cube_info[face].robj = NULL;
+				track->textures[i].cube_info[face].width = 16536;
+				track->textures[i].cube_info[face].height = 16536;
+				track->textures[i].cube_info[face].offset = 0;
+			}
+	}
+}
 
 /*
  * Global GPU functions
@@ -2175,6 +2652,15 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 	return radeon_ring_test_lockup(rdev, ring);
 }
 
+/* required on r1xx, r2xx, r300, r(v)350, r420/r481, rs400/rs480 */
+void r100_enable_bm(struct radeon_device *rdev)
+{
+	uint32_t tmp;
+	/* Enable bus mastering */
+	tmp = RREG32(RADEON_BUS_CNTL) & ~RADEON_BUS_MASTER_DIS;
+	WREG32(RADEON_BUS_CNTL, tmp);
+}
+
 void r100_bm_disable(struct radeon_device *rdev)
 {
 	u32 tmp;
@@ -3261,380 +3747,6 @@ void r100_bandwidth_update(struct radeon_device *rdev)
 	}
 }
 
-static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
-{
-	DRM_ERROR("pitch                      %d\n", t->pitch);
-	DRM_ERROR("use_pitch                  %d\n", t->use_pitch);
-	DRM_ERROR("width                      %d\n", t->width);
-	DRM_ERROR("width_11                   %d\n", t->width_11);
-	DRM_ERROR("height                     %d\n", t->height);
-	DRM_ERROR("height_11                  %d\n", t->height_11);
-	DRM_ERROR("num levels                 %d\n", t->num_levels);
-	DRM_ERROR("depth                      %d\n", t->txdepth);
-	DRM_ERROR("bpp                        %d\n", t->cpp);
-	DRM_ERROR("coordinate type            %d\n", t->tex_coord_type);
-	DRM_ERROR("width round to power of 2  %d\n", t->roundup_w);
-	DRM_ERROR("height round to power of 2 %d\n", t->roundup_h);
-	DRM_ERROR("compress format            %d\n", t->compress_format);
-}
-
-static int r100_track_compress_size(int compress_format, int w, int h)
-{
-	int block_width, block_height, block_bytes;
-	int wblocks, hblocks;
-	int min_wblocks;
-	int sz;
-
-	block_width = 4;
-	block_height = 4;
-
-	switch (compress_format) {
-	case R100_TRACK_COMP_DXT1:
-		block_bytes = 8;
-		min_wblocks = 4;
-		break;
-	default:
-	case R100_TRACK_COMP_DXT35:
-		block_bytes = 16;
-		min_wblocks = 2;
-		break;
-	}
-
-	hblocks = (h + block_height - 1) / block_height;
-	wblocks = (w + block_width - 1) / block_width;
-	if (wblocks < min_wblocks)
-		wblocks = min_wblocks;
-	sz = wblocks * hblocks * block_bytes;
-	return sz;
-}
-
-static int r100_cs_track_cube(struct radeon_device *rdev,
-			      struct r100_cs_track *track, unsigned idx)
-{
-	unsigned face, w, h;
-	struct radeon_bo *cube_robj;
-	unsigned long size;
-	unsigned compress_format = track->textures[idx].compress_format;
-
-	for (face = 0; face < 5; face++) {
-		cube_robj = track->textures[idx].cube_info[face].robj;
-		w = track->textures[idx].cube_info[face].width;
-		h = track->textures[idx].cube_info[face].height;
-
-		if (compress_format) {
-			size = r100_track_compress_size(compress_format, w, h);
-		} else
-			size = w * h;
-		size *= track->textures[idx].cpp;
-
-		size += track->textures[idx].cube_info[face].offset;
-
-		if (size > radeon_bo_size(cube_robj)) {
-			DRM_ERROR("Cube texture offset greater than object size %lu %lu\n",
-				  size, radeon_bo_size(cube_robj));
-			r100_cs_track_texture_print(&track->textures[idx]);
-			return -1;
-		}
-	}
-	return 0;
-}
-
-static int r100_cs_track_texture_check(struct radeon_device *rdev,
-				       struct r100_cs_track *track)
-{
-	struct radeon_bo *robj;
-	unsigned long size;
-	unsigned u, i, w, h, d;
-	int ret;
-
-	for (u = 0; u < track->num_texture; u++) {
-		if (!track->textures[u].enabled)
-			continue;
-		if (track->textures[u].lookup_disable)
-			continue;
-		robj = track->textures[u].robj;
-		if (robj == NULL) {
-			DRM_ERROR("No texture bound to unit %u\n", u);
-			return -EINVAL;
-		}
-		size = 0;
-		for (i = 0; i <= track->textures[u].num_levels; i++) {
-			if (track->textures[u].use_pitch) {
-				if (rdev->family < CHIP_R300)
-					w = (track->textures[u].pitch / track->textures[u].cpp) / (1 << i);
-				else
-					w = track->textures[u].pitch / (1 << i);
-			} else {
-				w = track->textures[u].width;
-				if (rdev->family >= CHIP_RV515)
-					w |= track->textures[u].width_11;
-				w = w / (1 << i);
-				if (track->textures[u].roundup_w)
-					w = roundup_pow_of_two(w);
-			}
-			h = track->textures[u].height;
-			if (rdev->family >= CHIP_RV515)
-				h |= track->textures[u].height_11;
-			h = h / (1 << i);
-			if (track->textures[u].roundup_h)
-				h = roundup_pow_of_two(h);
-			if (track->textures[u].tex_coord_type == 1) {
-				d = (1 << track->textures[u].txdepth) / (1 << i);
-				if (!d)
-					d = 1;
-			} else {
-				d = 1;
-			}
-			if (track->textures[u].compress_format) {
-
-				size += r100_track_compress_size(track->textures[u].compress_format, w, h) * d;
-				/* compressed textures are block based */
-			} else
-				size += w * h * d;
-		}
-		size *= track->textures[u].cpp;
-
-		switch (track->textures[u].tex_coord_type) {
-		case 0:
-		case 1:
-			break;
-		case 2:
-			if (track->separate_cube) {
-				ret = r100_cs_track_cube(rdev, track, u);
-				if (ret)
-					return ret;
-			} else
-				size *= 6;
-			break;
-		default:
-			DRM_ERROR("Invalid texture coordinate type %u for unit "
-				  "%u\n", track->textures[u].tex_coord_type, u);
-			return -EINVAL;
-		}
-		if (size > radeon_bo_size(robj)) {
-			DRM_ERROR("Texture of unit %u needs %lu bytes but is "
-				  "%lu\n", u, size, radeon_bo_size(robj));
-			r100_cs_track_texture_print(&track->textures[u]);
-			return -EINVAL;
-		}
-	}
-	return 0;
-}
-
-int r100_cs_track_check(struct radeon_device *rdev, struct r100_cs_track *track)
-{
-	unsigned i;
-	unsigned long size;
-	unsigned prim_walk;
-	unsigned nverts;
-	unsigned num_cb = track->cb_dirty ? track->num_cb : 0;
-
-	if (num_cb && !track->zb_cb_clear && !track->color_channel_mask &&
-	    !track->blend_read_enable)
-		num_cb = 0;
-
-	for (i = 0; i < num_cb; i++) {
-		if (track->cb[i].robj == NULL) {
-			DRM_ERROR("[drm] No buffer for color buffer %d !\n", i);
-			return -EINVAL;
-		}
-		size = track->cb[i].pitch * track->cb[i].cpp * track->maxy;
-		size += track->cb[i].offset;
-		if (size > radeon_bo_size(track->cb[i].robj)) {
-			DRM_ERROR("[drm] Buffer too small for color buffer %d "
-				  "(need %lu have %lu) !\n", i, size,
-				  radeon_bo_size(track->cb[i].robj));
-			DRM_ERROR("[drm] color buffer %d (%u %u %u %u)\n",
-				  i, track->cb[i].pitch, track->cb[i].cpp,
-				  track->cb[i].offset, track->maxy);
-			return -EINVAL;
-		}
-	}
-	track->cb_dirty = false;
-
-	if (track->zb_dirty && track->z_enabled) {
-		if (track->zb.robj == NULL) {
-			DRM_ERROR("[drm] No buffer for z buffer !\n");
-			return -EINVAL;
-		}
-		size = track->zb.pitch * track->zb.cpp * track->maxy;
-		size += track->zb.offset;
-		if (size > radeon_bo_size(track->zb.robj)) {
-			DRM_ERROR("[drm] Buffer too small for z buffer "
-				  "(need %lu have %lu) !\n", size,
-				  radeon_bo_size(track->zb.robj));
-			DRM_ERROR("[drm] zbuffer (%u %u %u %u)\n",
-				  track->zb.pitch, track->zb.cpp,
-				  track->zb.offset, track->maxy);
-			return -EINVAL;
-		}
-	}
-	track->zb_dirty = false;
-
-	if (track->aa_dirty && track->aaresolve) {
-		if (track->aa.robj == NULL) {
-			DRM_ERROR("[drm] No buffer for AA resolve buffer %d !\n", i);
-			return -EINVAL;
-		}
-		/* I believe the format comes from colorbuffer0. */
-		size = track->aa.pitch * track->cb[0].cpp * track->maxy;
-		size += track->aa.offset;
-		if (size > radeon_bo_size(track->aa.robj)) {
-			DRM_ERROR("[drm] Buffer too small for AA resolve buffer %d "
-				  "(need %lu have %lu) !\n", i, size,
-				  radeon_bo_size(track->aa.robj));
-			DRM_ERROR("[drm] AA resolve buffer %d (%u %u %u %u)\n",
-				  i, track->aa.pitch, track->cb[0].cpp,
-				  track->aa.offset, track->maxy);
-			return -EINVAL;
-		}
-	}
-	track->aa_dirty = false;
-
-	prim_walk = (track->vap_vf_cntl >> 4) & 0x3;
-	if (track->vap_vf_cntl & (1 << 14)) {
-		nverts = track->vap_alt_nverts;
-	} else {
-		nverts = (track->vap_vf_cntl >> 16) & 0xFFFF;
-	}
-	switch (prim_walk) {
-	case 1:
-		for (i = 0; i < track->num_arrays; i++) {
-			size = track->arrays[i].esize * track->max_indx * 4;
-			if (track->arrays[i].robj == NULL) {
-				DRM_ERROR("(PW %u) Vertex array %u no buffer "
-					  "bound\n", prim_walk, i);
-				return -EINVAL;
-			}
-			if (size > radeon_bo_size(track->arrays[i].robj)) {
-				dev_err(rdev->dev, "(PW %u) Vertex array %u "
-					"need %lu dwords have %lu dwords\n",
-					prim_walk, i, size >> 2,
-					radeon_bo_size(track->arrays[i].robj)
-					>> 2);
-				DRM_ERROR("Max indices %u\n", track->max_indx);
-				return -EINVAL;
-			}
-		}
-		break;
-	case 2:
-		for (i = 0; i < track->num_arrays; i++) {
-			size = track->arrays[i].esize * (nverts - 1) * 4;
-			if (track->arrays[i].robj == NULL) {
-				DRM_ERROR("(PW %u) Vertex array %u no buffer "
-					  "bound\n", prim_walk, i);
-				return -EINVAL;
-			}
-			if (size > radeon_bo_size(track->arrays[i].robj)) {
-				dev_err(rdev->dev, "(PW %u) Vertex array %u "
-					"need %lu dwords have %lu dwords\n",
-					prim_walk, i, size >> 2,
-					radeon_bo_size(track->arrays[i].robj)
-					>> 2);
-				return -EINVAL;
-			}
-		}
-		break;
-	case 3:
-		size = track->vtx_size * nverts;
-		if (size != track->immd_dwords) {
-			DRM_ERROR("IMMD draw %u dwors but needs %lu dwords\n",
-				  track->immd_dwords, size);
-			DRM_ERROR("VAP_VF_CNTL.NUM_VERTICES %u, VTX_SIZE %u\n",
-				  nverts, track->vtx_size);
-			return -EINVAL;
-		}
-		break;
-	default:
-		DRM_ERROR("[drm] Invalid primitive walk %d for VAP_VF_CNTL\n",
-			  prim_walk);
-		return -EINVAL;
-	}
-
-	if (track->tex_dirty) {
-		track->tex_dirty = false;
-		return r100_cs_track_texture_check(rdev, track);
-	}
-	return 0;
-}
-
-void r100_cs_track_clear(struct radeon_device *rdev, struct r100_cs_track *track)
-{
-	unsigned i, face;
-
-	track->cb_dirty = true;
-	track->zb_dirty = true;
-	track->tex_dirty = true;
-	track->aa_dirty = true;
-
-	if (rdev->family < CHIP_R300) {
-		track->num_cb = 1;
-		if (rdev->family <= CHIP_RS200)
-			track->num_texture = 3;
-		else
-			track->num_texture = 6;
-		track->maxy = 2048;
-		track->separate_cube = 1;
-	} else {
-		track->num_cb = 4;
-		track->num_texture = 16;
-		track->maxy = 4096;
-		track->separate_cube = 0;
-		track->aaresolve = false;
-		track->aa.robj = NULL;
-	}
-
-	for (i = 0; i < track->num_cb; i++) {
-		track->cb[i].robj = NULL;
-		track->cb[i].pitch = 8192;
-		track->cb[i].cpp = 16;
-		track->cb[i].offset = 0;
-	}
-	track->z_enabled = true;
-	track->zb.robj = NULL;
-	track->zb.pitch = 8192;
-	track->zb.cpp = 4;
-	track->zb.offset = 0;
-	track->vtx_size = 0x7F;
-	track->immd_dwords = 0xFFFFFFFFUL;
-	track->num_arrays = 11;
-	track->max_indx = 0x00FFFFFFUL;
-	for (i = 0; i < track->num_arrays; i++) {
-		track->arrays[i].robj = NULL;
-		track->arrays[i].esize = 0x7F;
-	}
-	for (i = 0; i < track->num_texture; i++) {
-		track->textures[i].compress_format = R100_TRACK_COMP_NONE;
-		track->textures[i].pitch = 16536;
-		track->textures[i].width = 16536;
-		track->textures[i].height = 16536;
-		track->textures[i].width_11 = 1 << 11;
-		track->textures[i].height_11 = 1 << 11;
-		track->textures[i].num_levels = 12;
-		if (rdev->family <= CHIP_RS200) {
-			track->textures[i].tex_coord_type = 0;
-			track->textures[i].txdepth = 0;
-		} else {
-			track->textures[i].txdepth = 16;
-			track->textures[i].tex_coord_type = 1;
-		}
-		track->textures[i].cpp = 64;
-		track->textures[i].robj = NULL;
-		/* CS IB emission code makes sure texture unit are disabled */
-		track->textures[i].enabled = false;
-		track->textures[i].lookup_disable = false;
-		track->textures[i].roundup_w = true;
-		track->textures[i].roundup_h = true;
-		if (track->separate_cube)
-			for (face = 0; face < 5; face++) {
-				track->textures[i].cube_info[face].robj = NULL;
-				track->textures[i].cube_info[face].width = 16536;
-				track->textures[i].cube_info[face].height = 16536;
-				track->textures[i].cube_info[face].offset = 0;
-			}
-	}
-}
-
 int r100_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	uint32_t scratch;
@@ -3679,6 +3791,12 @@ void r100_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 
+	if (ring->rptr_save_reg) {
+		u32 next_rptr = ring->wptr + 2 + 3;
+		radeon_ring_write(ring, PACKET0(ring->rptr_save_reg, 0));
+		radeon_ring_write(ring, next_rptr);
+	}
+
 	radeon_ring_write(ring, PACKET0(RADEON_CP_IB_BASE, 1));
 	radeon_ring_write(ring, ib->gpu_addr);
 	radeon_ring_write(ring, ib->length_dw);
@@ -3711,7 +3829,7 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	ib.ptr[6] = PACKET2(0);
 	ib.ptr[7] = PACKET2(0);
 	ib.length_dw = 8;
-	r = radeon_ib_schedule(rdev, &ib);
+	r = radeon_ib_schedule(rdev, &ib, NULL);
 	if (r) {
 		radeon_scratch_free(rdev, scratch);
 		radeon_ib_free(rdev, &ib);
@@ -3740,12 +3858,6 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	return r;
 }
 
-void r100_ib_fini(struct radeon_device *rdev)
-{
-	radeon_ib_pool_suspend(rdev);
-	radeon_ib_pool_fini(rdev);
-}
-
 void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save)
 {
 	/* Shutdown CP we shouldn't need to do that but better be safe than
@@ -3905,13 +4017,11 @@ static int r100_startup(struct radeon_device *rdev)
 		return r;
 	}
 
-	r = radeon_ib_pool_start(rdev);
-	if (r)
-		return r;
-
-	r = radeon_ib_ring_tests(rdev);
-	if (r)
+	r = radeon_ib_pool_init(rdev);
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
 		return r;
+	}
 
 	return 0;
 }
@@ -3948,7 +4058,6 @@ int r100_resume(struct radeon_device *rdev)
 
 int r100_suspend(struct radeon_device *rdev)
 {
-	radeon_ib_pool_suspend(rdev);
 	r100_cp_disable(rdev);
 	radeon_wb_disable(rdev);
 	r100_irq_disable(rdev);
@@ -3961,7 +4070,7 @@ void r100_fini(struct radeon_device *rdev)
 {
 	r100_cp_fini(rdev);
 	radeon_wb_fini(rdev);
-	r100_ib_fini(rdev);
+	radeon_ib_pool_fini(rdev);
 	radeon_gem_fini(rdev);
 	if (rdev->flags & RADEON_IS_PCI)
 		r100_pci_gart_fini(rdev);
@@ -4068,20 +4177,14 @@ int r100_init(struct radeon_device *rdev)
 	}
 	r100_set_safe_registers(rdev);
 
-	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
-	if (r) {
-		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
-		rdev->accel_working = false;
-	}
-
 	r = r100_startup(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
 		r100_cp_fini(rdev);
 		radeon_wb_fini(rdev);
-		r100_ib_fini(rdev);
+		radeon_ib_pool_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		if (rdev->flags & RADEON_IS_PCI)
 			r100_pci_gart_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/r200.c b/drivers/gpu/drm/radeon/r200.c
index a26144d01207..f0889259eb08 100644
--- a/drivers/gpu/drm/radeon/r200.c
+++ b/drivers/gpu/drm/radeon/r200.c
@@ -85,7 +85,7 @@ int r200_copy_dma(struct radeon_device *rdev,
 		  uint64_t src_offset,
 		  uint64_t dst_offset,
 		  unsigned num_gpu_pages,
-		  struct radeon_fence *fence)
+		  struct radeon_fence **fence)
 {
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	uint32_t size;
@@ -120,7 +120,7 @@ int r200_copy_dma(struct radeon_device *rdev,
 	radeon_ring_write(ring, PACKET0(RADEON_WAIT_UNTIL, 0));
 	radeon_ring_write(ring, RADEON_WAIT_DMA_GUI_IDLE);
 	if (fence) {
-		r = radeon_fence_emit(rdev, fence);
+		r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
 	}
 	radeon_ring_unlock_commit(rdev, ring);
 	return r;
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 97722a33e513..646a1927dda7 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -1391,13 +1391,11 @@ static int r300_startup(struct radeon_device *rdev)
 		return r;
 	}
 
-	r = radeon_ib_pool_start(rdev);
-	if (r)
-		return r;
-
-	r = radeon_ib_ring_tests(rdev);
-	if (r)
+	r = radeon_ib_pool_init(rdev);
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
 		return r;
+	}
 
 	return 0;
 }
@@ -1436,7 +1434,6 @@ int r300_resume(struct radeon_device *rdev)
 
 int r300_suspend(struct radeon_device *rdev)
 {
-	radeon_ib_pool_suspend(rdev);
 	r100_cp_disable(rdev);
 	radeon_wb_disable(rdev);
 	r100_irq_disable(rdev);
@@ -1451,7 +1448,7 @@ void r300_fini(struct radeon_device *rdev)
 {
 	r100_cp_fini(rdev);
 	radeon_wb_fini(rdev);
-	r100_ib_fini(rdev);
+	radeon_ib_pool_fini(rdev);
 	radeon_gem_fini(rdev);
 	if (rdev->flags & RADEON_IS_PCIE)
 		rv370_pcie_gart_fini(rdev);
@@ -1538,20 +1535,14 @@ int r300_init(struct radeon_device *rdev)
 	}
 	r300_set_reg_safe(rdev);
 
-	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
-	if (r) {
-		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
-		rdev->accel_working = false;
-	}
-
 	r = r300_startup(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
 		r100_cp_fini(rdev);
 		radeon_wb_fini(rdev);
-		r100_ib_fini(rdev);
+		radeon_ib_pool_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		if (rdev->flags & RADEON_IS_PCIE)
 			rv370_pcie_gart_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c
index 99137be7a300..f2f5bf6d339f 100644
--- a/drivers/gpu/drm/radeon/r420.c
+++ b/drivers/gpu/drm/radeon/r420.c
@@ -275,13 +275,11 @@ static int r420_startup(struct radeon_device *rdev)
 	}
 	r420_cp_errata_init(rdev);
 
-	r = radeon_ib_pool_start(rdev);
-	if (r)
-		return r;
-
-	r = radeon_ib_ring_tests(rdev);
-	if (r)
+	r = radeon_ib_pool_init(rdev);
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
 		return r;
+	}
 
 	return 0;
 }
@@ -324,7 +322,6 @@ int r420_resume(struct radeon_device *rdev)
 
 int r420_suspend(struct radeon_device *rdev)
 {
-	radeon_ib_pool_suspend(rdev);
 	r420_cp_errata_fini(rdev);
 	r100_cp_disable(rdev);
 	radeon_wb_disable(rdev);
@@ -340,7 +337,7 @@ void r420_fini(struct radeon_device *rdev)
 {
 	r100_cp_fini(rdev);
 	radeon_wb_fini(rdev);
-	r100_ib_fini(rdev);
+	radeon_ib_pool_fini(rdev);
 	radeon_gem_fini(rdev);
 	if (rdev->flags & RADEON_IS_PCIE)
 		rv370_pcie_gart_fini(rdev);
@@ -438,20 +435,14 @@ int r420_init(struct radeon_device *rdev)
 	}
 	r420_set_reg_safe(rdev);
 
-	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
-	if (r) {
-		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
-		rdev->accel_working = false;
-	}
-
 	r = r420_startup(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
 		r100_cp_fini(rdev);
 		radeon_wb_fini(rdev);
-		r100_ib_fini(rdev);
+		radeon_ib_pool_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		if (rdev->flags & RADEON_IS_PCIE)
 			rv370_pcie_gart_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c
index b5cf8375cd25..079d3c52c08a 100644
--- a/drivers/gpu/drm/radeon/r520.c
+++ b/drivers/gpu/drm/radeon/r520.c
@@ -203,13 +203,11 @@ static int r520_startup(struct radeon_device *rdev)
 		return r;
 	}
 
-	r = radeon_ib_pool_start(rdev);
-	if (r)
-		return r;
-
-	r = radeon_ib_ring_tests(rdev);
-	if (r)
+	r = radeon_ib_pool_init(rdev);
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
 		return r;
+	}
 
 	return 0;
 }
@@ -311,20 +309,14 @@ int r520_init(struct radeon_device *rdev)
 		return r;
 	rv515_set_safe_registers(rdev);
 
-	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
-	if (r) {
-		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
-		rdev->accel_working = false;
-	}
-
 	r = r520_startup(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
 		r100_cp_fini(rdev);
 		radeon_wb_fini(rdev);
-		r100_ib_fini(rdev);
+		radeon_ib_pool_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		rv370_pcie_gart_fini(rdev);
 		radeon_agp_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index bff627293812..d79c639ae739 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -709,6 +709,7 @@ void r600_hpd_init(struct radeon_device *rdev)
 {
 	struct drm_device *dev = rdev->ddev;
 	struct drm_connector *connector;
+	unsigned enable = 0;
 
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
@@ -729,28 +730,22 @@ void r600_hpd_init(struct radeon_device *rdev)
 			switch (radeon_connector->hpd.hpd) {
 			case RADEON_HPD_1:
 				WREG32(DC_HPD1_CONTROL, tmp);
-				rdev->irq.hpd[0] = true;
 				break;
 			case RADEON_HPD_2:
 				WREG32(DC_HPD2_CONTROL, tmp);
-				rdev->irq.hpd[1] = true;
 				break;
 			case RADEON_HPD_3:
 				WREG32(DC_HPD3_CONTROL, tmp);
-				rdev->irq.hpd[2] = true;
 				break;
 			case RADEON_HPD_4:
 				WREG32(DC_HPD4_CONTROL, tmp);
-				rdev->irq.hpd[3] = true;
 				break;
 				/* DCE 3.2 */
 			case RADEON_HPD_5:
 				WREG32(DC_HPD5_CONTROL, tmp);
-				rdev->irq.hpd[4] = true;
 				break;
 			case RADEON_HPD_6:
 				WREG32(DC_HPD6_CONTROL, tmp);
-				rdev->irq.hpd[5] = true;
 				break;
 			default:
 				break;
@@ -759,85 +754,73 @@ void r600_hpd_init(struct radeon_device *rdev)
 			switch (radeon_connector->hpd.hpd) {
 			case RADEON_HPD_1:
 				WREG32(DC_HOT_PLUG_DETECT1_CONTROL, DC_HOT_PLUG_DETECTx_EN);
-				rdev->irq.hpd[0] = true;
 				break;
 			case RADEON_HPD_2:
 				WREG32(DC_HOT_PLUG_DETECT2_CONTROL, DC_HOT_PLUG_DETECTx_EN);
-				rdev->irq.hpd[1] = true;
 				break;
 			case RADEON_HPD_3:
 				WREG32(DC_HOT_PLUG_DETECT3_CONTROL, DC_HOT_PLUG_DETECTx_EN);
-				rdev->irq.hpd[2] = true;
 				break;
 			default:
 				break;
 			}
 		}
+		enable |= 1 << radeon_connector->hpd.hpd;
 		radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
 	}
-	if (rdev->irq.installed)
-		r600_irq_set(rdev);
+	radeon_irq_kms_enable_hpd(rdev, enable);
 }
 
 void r600_hpd_fini(struct radeon_device *rdev)
 {
 	struct drm_device *dev = rdev->ddev;
 	struct drm_connector *connector;
+	unsigned disable = 0;
 
-	if (ASIC_IS_DCE3(rdev)) {
-		list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-			struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+		if (ASIC_IS_DCE3(rdev)) {
 			switch (radeon_connector->hpd.hpd) {
 			case RADEON_HPD_1:
 				WREG32(DC_HPD1_CONTROL, 0);
-				rdev->irq.hpd[0] = false;
 				break;
 			case RADEON_HPD_2:
 				WREG32(DC_HPD2_CONTROL, 0);
-				rdev->irq.hpd[1] = false;
 				break;
 			case RADEON_HPD_3:
 				WREG32(DC_HPD3_CONTROL, 0);
-				rdev->irq.hpd[2] = false;
 				break;
 			case RADEON_HPD_4:
 				WREG32(DC_HPD4_CONTROL, 0);
-				rdev->irq.hpd[3] = false;
 				break;
 				/* DCE 3.2 */
 			case RADEON_HPD_5:
 				WREG32(DC_HPD5_CONTROL, 0);
-				rdev->irq.hpd[4] = false;
 				break;
 			case RADEON_HPD_6:
 				WREG32(DC_HPD6_CONTROL, 0);
-				rdev->irq.hpd[5] = false;
 				break;
 			default:
 				break;
 			}
-		}
-	} else {
-		list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-			struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+		} else {
 			switch (radeon_connector->hpd.hpd) {
 			case RADEON_HPD_1:
 				WREG32(DC_HOT_PLUG_DETECT1_CONTROL, 0);
-				rdev->irq.hpd[0] = false;
 				break;
 			case RADEON_HPD_2:
 				WREG32(DC_HOT_PLUG_DETECT2_CONTROL, 0);
-				rdev->irq.hpd[1] = false;
 				break;
 			case RADEON_HPD_3:
 				WREG32(DC_HOT_PLUG_DETECT3_CONTROL, 0);
-				rdev->irq.hpd[2] = false;
 				break;
 			default:
 				break;
 			}
 		}
+		disable |= 1 << radeon_connector->hpd.hpd;
 	}
+	radeon_irq_kms_disable_hpd(rdev, disable);
 }
 
 /*
@@ -1306,6 +1289,14 @@ int r600_gpu_soft_reset(struct radeon_device *rdev)
 		RREG32(R_008014_GRBM_STATUS2));
 	dev_info(rdev->dev, "  R_000E50_SRBM_STATUS=0x%08X\n",
 		RREG32(R_000E50_SRBM_STATUS));
+	dev_info(rdev->dev, "  R_008674_CP_STALLED_STAT1 = 0x%08X\n",
+		RREG32(CP_STALLED_STAT1));
+	dev_info(rdev->dev, "  R_008678_CP_STALLED_STAT2 = 0x%08X\n",
+		RREG32(CP_STALLED_STAT2));
+	dev_info(rdev->dev, "  R_00867C_CP_BUSY_STAT     = 0x%08X\n",
+		RREG32(CP_BUSY_STAT));
+	dev_info(rdev->dev, "  R_008680_CP_STAT          = 0x%08X\n",
+		RREG32(CP_STAT));
 	rv515_mc_stop(rdev, &save);
 	if (r600_mc_wait_for_idle(rdev)) {
 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
@@ -1349,6 +1340,14 @@ int r600_gpu_soft_reset(struct radeon_device *rdev)
 		RREG32(R_008014_GRBM_STATUS2));
 	dev_info(rdev->dev, "  R_000E50_SRBM_STATUS=0x%08X\n",
 		RREG32(R_000E50_SRBM_STATUS));
+	dev_info(rdev->dev, "  R_008674_CP_STALLED_STAT1 = 0x%08X\n",
+		RREG32(CP_STALLED_STAT1));
+	dev_info(rdev->dev, "  R_008678_CP_STALLED_STAT2 = 0x%08X\n",
+		RREG32(CP_STALLED_STAT2));
+	dev_info(rdev->dev, "  R_00867C_CP_BUSY_STAT     = 0x%08X\n",
+		RREG32(CP_BUSY_STAT));
+	dev_info(rdev->dev, "  R_008680_CP_STAT          = 0x%08X\n",
+		RREG32(CP_STAT));
 	rv515_mc_resume(rdev, &save);
 	return 0;
 }
@@ -2172,18 +2171,29 @@ int r600_cp_resume(struct radeon_device *rdev)
 void r600_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size)
 {
 	u32 rb_bufsz;
+	int r;
 
 	/* Align ring size */
 	rb_bufsz = drm_order(ring_size / 8);
 	ring_size = (1 << (rb_bufsz + 1)) * 4;
 	ring->ring_size = ring_size;
 	ring->align_mask = 16 - 1;
+
+	if (radeon_ring_supports_scratch_reg(rdev, ring)) {
+		r = radeon_scratch_get(rdev, &ring->rptr_save_reg);
+		if (r) {
+			DRM_ERROR("failed to get scratch reg for rptr save (%d).\n", r);
+			ring->rptr_save_reg = 0;
+		}
+	}
 }
 
 void r600_cp_fini(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	r600_cp_stop(rdev);
-	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
+	radeon_ring_fini(rdev, ring);
+	radeon_scratch_free(rdev, ring->rptr_save_reg);
 }
 
 
@@ -2206,7 +2216,7 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	uint32_t scratch;
 	uint32_t tmp = 0;
-	unsigned i, ridx = radeon_ring_index(rdev, ring);
+	unsigned i;
 	int r;
 
 	r = radeon_scratch_get(rdev, &scratch);
@@ -2217,7 +2227,7 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	WREG32(scratch, 0xCAFEDEAD);
 	r = radeon_ring_lock(rdev, ring, 3);
 	if (r) {
-		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ridx, r);
+		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
 		radeon_scratch_free(rdev, scratch);
 		return r;
 	}
@@ -2232,10 +2242,10 @@ int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		DRM_UDELAY(1);
 	}
 	if (i < rdev->usec_timeout) {
-		DRM_INFO("ring test on %d succeeded in %d usecs\n", ridx, i);
+		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
 	} else {
 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
-			  ridx, scratch, tmp);
+			  ring->idx, scratch, tmp);
 		r = -EINVAL;
 	}
 	radeon_scratch_free(rdev, scratch);
@@ -2309,34 +2319,21 @@ int r600_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset,
 		   uint64_t dst_offset,
 		   unsigned num_gpu_pages,
-		   struct radeon_fence *fence)
+		   struct radeon_fence **fence)
 {
+	struct radeon_semaphore *sem = NULL;
 	struct radeon_sa_bo *vb = NULL;
 	int r;
 
-	r = r600_blit_prepare_copy(rdev, num_gpu_pages, &vb);
+	r = r600_blit_prepare_copy(rdev, num_gpu_pages, fence, &vb, &sem);
 	if (r) {
 		return r;
 	}
 	r600_kms_blit_copy(rdev, src_offset, dst_offset, num_gpu_pages, vb);
-	r600_blit_done_copy(rdev, fence, vb);
+	r600_blit_done_copy(rdev, fence, vb, sem);
 	return 0;
 }
 
-void r600_blit_suspend(struct radeon_device *rdev)
-{
-	int r;
-
-	/* unpin shaders bo */
-	if (rdev->r600_blit.shader_obj) {
-		r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-		if (!r) {
-			radeon_bo_unpin(rdev->r600_blit.shader_obj);
-			radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-		}
-	}
-}
-
 int r600_set_surface_reg(struct radeon_device *rdev, int reg,
 			 uint32_t tiling_flags, uint32_t pitch,
 			 uint32_t offset, uint32_t obj_size)
@@ -2419,13 +2416,11 @@ int r600_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_pool_start(rdev);
-	if (r)
-		return r;
-
-	r = radeon_ib_ring_tests(rdev);
-	if (r)
+	r = radeon_ib_pool_init(rdev);
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
 		return r;
+	}
 
 	r = r600_audio_init(rdev);
 	if (r) {
@@ -2475,9 +2470,6 @@ int r600_resume(struct radeon_device *rdev)
 int r600_suspend(struct radeon_device *rdev)
 {
 	r600_audio_fini(rdev);
-	radeon_ib_pool_suspend(rdev);
-	r600_blit_suspend(rdev);
-	/* FIXME: we should wait for ring to be empty */
 	r600_cp_stop(rdev);
 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
 	r600_irq_suspend(rdev);
@@ -2559,20 +2551,14 @@ int r600_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
-	if (r) {
-		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
-		rdev->accel_working = false;
-	}
-
 	r = r600_startup(rdev);
 	if (r) {
 		dev_err(rdev->dev, "disabling GPU acceleration\n");
 		r600_cp_fini(rdev);
 		r600_irq_fini(rdev);
 		radeon_wb_fini(rdev);
-		r100_ib_fini(rdev);
+		radeon_ib_pool_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		r600_pcie_gart_fini(rdev);
 		rdev->accel_working = false;
@@ -2588,7 +2574,7 @@ void r600_fini(struct radeon_device *rdev)
 	r600_cp_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
-	r100_ib_fini(rdev);
+	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
 	r600_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
@@ -2607,9 +2593,24 @@ void r600_fini(struct radeon_device *rdev)
  */
 void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
-	struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
+	u32 next_rptr;
+
+	if (ring->rptr_save_reg) {
+		next_rptr = ring->wptr + 3 + 4;
+		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		radeon_ring_write(ring, ((ring->rptr_save_reg -
+					 PACKET3_SET_CONFIG_REG_OFFSET) >> 2));
+		radeon_ring_write(ring, next_rptr);
+	} else if (rdev->wb.enabled) {
+		next_rptr = ring->wptr + 5 + 4;
+		radeon_ring_write(ring, PACKET3(PACKET3_MEM_WRITE, 3));
+		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+		radeon_ring_write(ring, (upper_32_bits(ring->next_rptr_gpu_addr) & 0xff) | (1 << 18));
+		radeon_ring_write(ring, next_rptr);
+		radeon_ring_write(ring, 0);
+	}
 
-	/* FIXME: implement */
 	radeon_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	radeon_ring_write(ring,
 #ifdef __BIG_ENDIAN
@@ -2627,7 +2628,6 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	uint32_t tmp = 0;
 	unsigned i;
 	int r;
-	int ring_index = radeon_ring_index(rdev, ring);
 
 	r = radeon_scratch_get(rdev, &scratch);
 	if (r) {
@@ -2635,7 +2635,7 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 		return r;
 	}
 	WREG32(scratch, 0xCAFEDEAD);
-	r = radeon_ib_get(rdev, ring_index, &ib, 256);
+	r = radeon_ib_get(rdev, ring->idx, &ib, 256);
 	if (r) {
 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
 		return r;
@@ -2644,7 +2644,7 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_OFFSET) >> 2);
 	ib.ptr[2] = 0xDEADBEEF;
 	ib.length_dw = 3;
-	r = radeon_ib_schedule(rdev, &ib);
+	r = radeon_ib_schedule(rdev, &ib, NULL);
 	if (r) {
 		radeon_scratch_free(rdev, scratch);
 		radeon_ib_free(rdev, &ib);
@@ -2857,7 +2857,6 @@ void r600_disable_interrupts(struct radeon_device *rdev)
 	WREG32(IH_RB_RPTR, 0);
 	WREG32(IH_RB_WPTR, 0);
 	rdev->ih.enabled = false;
-	rdev->ih.wptr = 0;
 	rdev->ih.rptr = 0;
 }
 
@@ -3042,18 +3041,18 @@ int r600_irq_set(struct radeon_device *rdev)
 		hdmi1 = RREG32(HDMI1_AUDIO_PACKET_CONTROL) & ~HDMI0_AZ_FORMAT_WTRIG_MASK;
 	}
 
-	if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) {
+	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
 		DRM_DEBUG("r600_irq_set: sw int\n");
 		cp_int_cntl |= RB_INT_ENABLE;
 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
 	}
 	if (rdev->irq.crtc_vblank_int[0] ||
-	    rdev->irq.pflip[0]) {
+	    atomic_read(&rdev->irq.pflip[0])) {
 		DRM_DEBUG("r600_irq_set: vblank 0\n");
 		mode_int |= D1MODE_VBLANK_INT_MASK;
 	}
 	if (rdev->irq.crtc_vblank_int[1] ||
-	    rdev->irq.pflip[1]) {
+	    atomic_read(&rdev->irq.pflip[1])) {
 		DRM_DEBUG("r600_irq_set: vblank 1\n");
 		mode_int |= D2MODE_VBLANK_INT_MASK;
 	}
@@ -3309,7 +3308,6 @@ int r600_irq_process(struct radeon_device *rdev)
 	u32 rptr;
 	u32 src_id, src_data;
 	u32 ring_index;
-	unsigned long flags;
 	bool queue_hotplug = false;
 	bool queue_hdmi = false;
 
@@ -3321,24 +3319,21 @@ int r600_irq_process(struct radeon_device *rdev)
 		RREG32(IH_RB_WPTR);
 
 	wptr = r600_get_ih_wptr(rdev);
-	rptr = rdev->ih.rptr;
-	DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
 
-	spin_lock_irqsave(&rdev->ih.lock, flags);
-
-	if (rptr == wptr) {
-		spin_unlock_irqrestore(&rdev->ih.lock, flags);
+restart_ih:
+	/* is somebody else already processing irqs? */
+	if (atomic_xchg(&rdev->ih.lock, 1))
 		return IRQ_NONE;
-	}
 
-restart_ih:
+	rptr = rdev->ih.rptr;
+	DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
+
 	/* Order reading of wptr vs. reading of IH ring data */
 	rmb();
 
 	/* display interrupts */
 	r600_irq_ack(rdev);
 
-	rdev->ih.wptr = wptr;
 	while (rptr != wptr) {
 		/* wptr/rptr are in bytes! */
 		ring_index = rptr / 4;
@@ -3355,7 +3350,7 @@ restart_ih:
 						rdev->pm.vblank_sync = true;
 						wake_up(&rdev->irq.vblank_queue);
 					}
-					if (rdev->irq.pflip[0])
+					if (atomic_read(&rdev->irq.pflip[0]))
 						radeon_crtc_handle_flip(rdev, 0);
 					rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D1 vblank\n");
@@ -3381,7 +3376,7 @@ restart_ih:
 						rdev->pm.vblank_sync = true;
 						wake_up(&rdev->irq.vblank_queue);
 					}
-					if (rdev->irq.pflip[1])
+					if (atomic_read(&rdev->irq.pflip[1]))
 						radeon_crtc_handle_flip(rdev, 1);
 					rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D2 vblank\n");
@@ -3480,7 +3475,6 @@ restart_ih:
 			break;
 		case 233: /* GUI IDLE */
 			DRM_DEBUG("IH: GUI idle\n");
-			rdev->pm.gui_idle = true;
 			wake_up(&rdev->irq.idle_queue);
 			break;
 		default:
@@ -3492,17 +3486,19 @@ restart_ih:
 		rptr += 16;
 		rptr &= rdev->ih.ptr_mask;
 	}
-	/* make sure wptr hasn't changed while processing */
-	wptr = r600_get_ih_wptr(rdev);
-	if (wptr != rdev->ih.wptr)
-		goto restart_ih;
 	if (queue_hotplug)
 		schedule_work(&rdev->hotplug_work);
 	if (queue_hdmi)
 		schedule_work(&rdev->audio_work);
 	rdev->ih.rptr = rptr;
 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
-	spin_unlock_irqrestore(&rdev->ih.lock, flags);
+	atomic_set(&rdev->ih.lock, 0);
+
+	/* make sure wptr hasn't changed while processing */
+	wptr = r600_get_ih_wptr(rdev);
+	if (wptr != rptr)
+		goto restart_ih;
+
 	return IRQ_HANDLED;
 }
 
@@ -3685,6 +3681,8 @@ static void r600_pcie_gen2_enable(struct radeon_device *rdev)
 {
 	u32 link_width_cntl, lanes, speed_cntl, training_cntl, tmp;
 	u16 link_cntl2;
+	u32 mask;
+	int ret;
 
 	if (radeon_pcie_gen2 == 0)
 		return;
@@ -3703,6 +3701,15 @@ static void r600_pcie_gen2_enable(struct radeon_device *rdev)
 	if (rdev->family <= CHIP_R600)
 		return;
 
+	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
+	if (ret != 0)
+		return;
+
+	if (!(mask & DRM_PCIE_SPEED_50))
+		return;
+
+	DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
+
 	/* 55 nm r6xx asics */
 	if ((rdev->family == CHIP_RV670) ||
 	    (rdev->family == CHIP_RV620) ||
@@ -3782,3 +3789,23 @@ static void r600_pcie_gen2_enable(struct radeon_device *rdev)
 		WREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl);
 	}
 }
+
+/**
+ * r600_get_gpu_clock - return GPU clock counter snapshot
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Fetches a GPU clock counter snapshot (R6xx-cayman).
+ * Returns the 64 bit clock counter snapshot.
+ */
+uint64_t r600_get_gpu_clock(struct radeon_device *rdev)
+{
+	uint64_t clock;
+
+	mutex_lock(&rdev->gpu_clock_mutex);
+	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
+	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+	mutex_unlock(&rdev->gpu_clock_mutex);
+	return clock;
+}
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 03b6e0d3d503..2bef8549ddfe 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -512,7 +512,8 @@ int r600_blit_init(struct radeon_device *rdev)
 	rdev->r600_blit.primitives.draw_auto = draw_auto;
 	rdev->r600_blit.primitives.set_default_state = set_default_state;
 
-	rdev->r600_blit.ring_size_common = 40; /* shaders + def state */
+	rdev->r600_blit.ring_size_common = 8; /* sync semaphore */
+	rdev->r600_blit.ring_size_common += 40; /* shaders + def state */
 	rdev->r600_blit.ring_size_common += 5; /* done copy */
 	rdev->r600_blit.ring_size_common += 16; /* fence emit for done copy */
 
@@ -523,10 +524,6 @@ int r600_blit_init(struct radeon_device *rdev)
 
 	rdev->r600_blit.max_dim = 8192;
 
-	/* pin copy shader into vram if already initialized */
-	if (rdev->r600_blit.shader_obj)
-		goto done;
-
 	rdev->r600_blit.state_offset = 0;
 
 	if (rdev->family >= CHIP_RV770)
@@ -551,11 +548,26 @@ int r600_blit_init(struct radeon_device *rdev)
 	obj_size += r6xx_ps_size * 4;
 	obj_size = ALIGN(obj_size, 256);
 
-	r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_VRAM,
-			     NULL, &rdev->r600_blit.shader_obj);
-	if (r) {
-		DRM_ERROR("r600 failed to allocate shader\n");
-		return r;
+	/* pin copy shader into vram if not already initialized */
+	if (rdev->r600_blit.shader_obj == NULL) {
+		r = radeon_bo_create(rdev, obj_size, PAGE_SIZE, true,
+				     RADEON_GEM_DOMAIN_VRAM,
+				     NULL, &rdev->r600_blit.shader_obj);
+		if (r) {
+			DRM_ERROR("r600 failed to allocate shader\n");
+			return r;
+		}
+
+		r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
+		if (unlikely(r != 0))
+			return r;
+		r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
+				  &rdev->r600_blit.shader_gpu_addr);
+		radeon_bo_unreserve(rdev->r600_blit.shader_obj);
+		if (r) {
+			dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
+			return r;
+		}
 	}
 
 	DRM_DEBUG("r6xx blit allocated bo %08x vs %08x ps %08x\n",
@@ -586,17 +598,6 @@ int r600_blit_init(struct radeon_device *rdev)
 	radeon_bo_kunmap(rdev->r600_blit.shader_obj);
 	radeon_bo_unreserve(rdev->r600_blit.shader_obj);
 
-done:
-	r = radeon_bo_reserve(rdev->r600_blit.shader_obj, false);
-	if (unlikely(r != 0))
-		return r;
-	r = radeon_bo_pin(rdev->r600_blit.shader_obj, RADEON_GEM_DOMAIN_VRAM,
-			  &rdev->r600_blit.shader_gpu_addr);
-	radeon_bo_unreserve(rdev->r600_blit.shader_obj);
-	if (r) {
-		dev_err(rdev->dev, "(%d) pin blit object failed\n", r);
-		return r;
-	}
 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
 	return 0;
 }
@@ -666,7 +667,8 @@ static unsigned r600_blit_create_rect(unsigned num_gpu_pages,
 
 
 int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages,
-			   struct radeon_sa_bo **vb)
+			   struct radeon_fence **fence, struct radeon_sa_bo **vb,
+			   struct radeon_semaphore **sem)
 {
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	int r;
@@ -689,34 +691,50 @@ int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages,
 		return r;
 	}
 
+	r = radeon_semaphore_create(rdev, sem);
+	if (r) {
+		radeon_sa_bo_free(rdev, vb, NULL);
+		return r;
+	}
+
 	/* calculate number of loops correctly */
 	ring_size = num_loops * dwords_per_loop;
 	ring_size += rdev->r600_blit.ring_size_common;
 	r = radeon_ring_lock(rdev, ring, ring_size);
 	if (r) {
 		radeon_sa_bo_free(rdev, vb, NULL);
+		radeon_semaphore_free(rdev, sem, NULL);
 		return r;
 	}
 
+	if (radeon_fence_need_sync(*fence, RADEON_RING_TYPE_GFX_INDEX)) {
+		radeon_semaphore_sync_rings(rdev, *sem, (*fence)->ring,
+					    RADEON_RING_TYPE_GFX_INDEX);
+		radeon_fence_note_sync(*fence, RADEON_RING_TYPE_GFX_INDEX);
+	} else {
+		radeon_semaphore_free(rdev, sem, NULL);
+	}
+
 	rdev->r600_blit.primitives.set_default_state(rdev);
 	rdev->r600_blit.primitives.set_shaders(rdev);
 	return 0;
 }
 
-void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence,
-			 struct radeon_sa_bo *vb)
+void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence,
+			 struct radeon_sa_bo *vb, struct radeon_semaphore *sem)
 {
 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	int r;
 
-	r = radeon_fence_emit(rdev, fence);
+	r = radeon_fence_emit(rdev, fence, RADEON_RING_TYPE_GFX_INDEX);
 	if (r) {
 		radeon_ring_unlock_undo(rdev, ring);
 		return;
 	}
 
 	radeon_ring_unlock_commit(rdev, ring);
-	radeon_sa_bo_free(rdev, &vb, fence);
+	radeon_sa_bo_free(rdev, &vb, *fence);
+	radeon_semaphore_free(rdev, &sem, *fence);
 }
 
 void r600_kms_blit_copy(struct radeon_device *rdev,
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index ca87f7afaf23..ab74e6b149e7 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -47,13 +47,17 @@ struct r600_cs_track {
 	u32			npipes;
 	/* value we track */
 	u32			sq_config;
+	u32			log_nsamples;
 	u32			nsamples;
 	u32			cb_color_base_last[8];
 	struct radeon_bo	*cb_color_bo[8];
 	u64			cb_color_bo_mc[8];
-	u32			cb_color_bo_offset[8];
-	struct radeon_bo	*cb_color_frag_bo[8]; /* unused */
-	struct radeon_bo	*cb_color_tile_bo[8]; /* unused */
+	u64			cb_color_bo_offset[8];
+	struct radeon_bo	*cb_color_frag_bo[8];
+	u64			cb_color_frag_offset[8];
+	struct radeon_bo	*cb_color_tile_bo[8];
+	u64			cb_color_tile_offset[8];
+	u32			cb_color_mask[8];
 	u32			cb_color_info[8];
 	u32			cb_color_view[8];
 	u32			cb_color_size_idx[8]; /* unused */
@@ -349,10 +353,6 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
 	unsigned array_mode;
 	u32 format;
 
-	if (G_0280A0_TILE_MODE(track->cb_color_info[i])) {
-		dev_warn(p->dev, "FMASK or CMASK buffer are not supported by this kernel\n");
-		return -EINVAL;
-	}
 	size = radeon_bo_size(track->cb_color_bo[i]) - track->cb_color_bo_offset[i];
 	format = G_0280A0_FORMAT(track->cb_color_info[i]);
 	if (!r600_fmt_is_valid_color(format)) {
@@ -420,7 +420,8 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
 	}
 
 	/* check offset */
-	tmp = r600_fmt_get_nblocksy(format, height) * r600_fmt_get_nblocksx(format, pitch) * r600_fmt_get_blocksize(format);
+	tmp = r600_fmt_get_nblocksy(format, height) * r600_fmt_get_nblocksx(format, pitch) *
+	      r600_fmt_get_blocksize(format) * track->nsamples;
 	switch (array_mode) {
 	default:
 	case V_0280A0_ARRAY_LINEAR_GENERAL:
@@ -441,7 +442,7 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
 			 * broken userspace.
 			 */
 		} else {
-			dev_warn(p->dev, "%s offset[%d] %d %d %d %lu too big (%d %d) (%d %d %d)\n",
+			dev_warn(p->dev, "%s offset[%d] %d %llu %d %lu too big (%d %d) (%d %d %d)\n",
 				 __func__, i, array_mode,
 				 track->cb_color_bo_offset[i], tmp,
 				 radeon_bo_size(track->cb_color_bo[i]),
@@ -458,6 +459,51 @@ static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
 	tmp = S_028060_PITCH_TILE_MAX((pitch / 8) - 1) |
 		S_028060_SLICE_TILE_MAX(slice_tile_max - 1);
 	ib[track->cb_color_size_idx[i]] = tmp;
+
+	/* FMASK/CMASK */
+	switch (G_0280A0_TILE_MODE(track->cb_color_info[i])) {
+	case V_0280A0_TILE_DISABLE:
+		break;
+	case V_0280A0_FRAG_ENABLE:
+		if (track->nsamples > 1) {
+			uint32_t tile_max = G_028100_FMASK_TILE_MAX(track->cb_color_mask[i]);
+			/* the tile size is 8x8, but the size is in units of bits.
+			 * for bytes, do just * 8. */
+			uint32_t bytes = track->nsamples * track->log_nsamples * 8 * (tile_max + 1);
+
+			if (bytes + track->cb_color_frag_offset[i] >
+			    radeon_bo_size(track->cb_color_frag_bo[i])) {
+				dev_warn(p->dev, "%s FMASK_TILE_MAX too large "
+					 "(tile_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n",
+					 __func__, tile_max, bytes,
+					 track->cb_color_frag_offset[i],
+					 radeon_bo_size(track->cb_color_frag_bo[i]));
+				return -EINVAL;
+			}
+		}
+		/* fall through */
+	case V_0280A0_CLEAR_ENABLE:
+	{
+		uint32_t block_max = G_028100_CMASK_BLOCK_MAX(track->cb_color_mask[i]);
+		/* One block = 128x128 pixels, one 8x8 tile has 4 bits..
+		 * (128*128) / (8*8) / 2 = 128 bytes per block. */
+		uint32_t bytes = (block_max + 1) * 128;
+
+		if (bytes + track->cb_color_tile_offset[i] >
+		    radeon_bo_size(track->cb_color_tile_bo[i])) {
+			dev_warn(p->dev, "%s CMASK_BLOCK_MAX too large "
+				 "(block_max=%u, bytes=%u, offset=%llu, bo_size=%lu)\n",
+				 __func__, block_max, bytes,
+				 track->cb_color_tile_offset[i],
+				 radeon_bo_size(track->cb_color_tile_bo[i]));
+			return -EINVAL;
+		}
+		break;
+	}
+	default:
+		dev_warn(p->dev, "%s invalid tile mode\n", __func__);
+		return -EINVAL;
+	}
 	return 0;
 }
 
@@ -566,7 +612,7 @@ static int r600_cs_track_validate_db(struct radeon_cs_parser *p)
 
 		ntiles = G_028000_SLICE_TILE_MAX(track->db_depth_size) + 1;
 		nviews = G_028004_SLICE_MAX(track->db_depth_view) + 1;
-		tmp = ntiles * bpe * 64 * nviews;
+		tmp = ntiles * bpe * 64 * nviews * track->nsamples;
 		if ((tmp + track->db_offset) > radeon_bo_size(track->db_bo)) {
 			dev_warn(p->dev, "z/stencil buffer (%d) too small (0x%08X %d %d %d -> %u have %lu)\n",
 					array_mode,
@@ -764,8 +810,10 @@ static int r600_cs_track_check(struct radeon_cs_parser *p)
 	}
 
 	/* Check depth buffer */
-	if (track->db_dirty && (G_028800_STENCIL_ENABLE(track->db_depth_control) ||
-		G_028800_Z_ENABLE(track->db_depth_control))) {
+	if (track->db_dirty &&
+	    G_028010_FORMAT(track->db_depth_info) != V_028010_DEPTH_INVALID &&
+	    (G_028800_STENCIL_ENABLE(track->db_depth_control) ||
+	     G_028800_Z_ENABLE(track->db_depth_control))) {
 		r = r600_cs_track_validate_db(p);
 		if (r)
 			return r;
@@ -1229,6 +1277,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 		break;
 	case R_028C04_PA_SC_AA_CONFIG:
 		tmp = G_028C04_MSAA_NUM_SAMPLES(radeon_get_ib_value(p, idx));
+		track->log_nsamples = tmp;
 		track->nsamples = 1 << tmp;
 		track->cb_dirty = true;
 		break;
@@ -1310,16 +1359,21 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 				dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg);
 				return -EINVAL;
 			}
-			ib[idx] = track->cb_color_base_last[tmp];
 			track->cb_color_frag_bo[tmp] = track->cb_color_bo[tmp];
+			track->cb_color_frag_offset[tmp] = track->cb_color_bo_offset[tmp];
+			ib[idx] = track->cb_color_base_last[tmp];
 		} else {
 			r = r600_cs_packet_next_reloc(p, &reloc);
 			if (r) {
 				dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
 				return -EINVAL;
 			}
-			ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
 			track->cb_color_frag_bo[tmp] = reloc->robj;
+			track->cb_color_frag_offset[tmp] = (u64)ib[idx] << 8;
+			ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		}
+		if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
+			track->cb_dirty = true;
 		}
 		break;
 	case R_0280C0_CB_COLOR0_TILE:
@@ -1336,16 +1390,35 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 				dev_err(p->dev, "Broken old userspace ? no cb_color0_base supplied before trying to write 0x%08X\n", reg);
 				return -EINVAL;
 			}
-			ib[idx] = track->cb_color_base_last[tmp];
 			track->cb_color_tile_bo[tmp] = track->cb_color_bo[tmp];
+			track->cb_color_tile_offset[tmp] = track->cb_color_bo_offset[tmp];
+			ib[idx] = track->cb_color_base_last[tmp];
 		} else {
 			r = r600_cs_packet_next_reloc(p, &reloc);
 			if (r) {
 				dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
 				return -EINVAL;
 			}
-			ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
 			track->cb_color_tile_bo[tmp] = reloc->robj;
+			track->cb_color_tile_offset[tmp] = (u64)ib[idx] << 8;
+			ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		}
+		if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
+			track->cb_dirty = true;
+		}
+		break;
+	case R_028100_CB_COLOR0_MASK:
+	case R_028104_CB_COLOR1_MASK:
+	case R_028108_CB_COLOR2_MASK:
+	case R_02810C_CB_COLOR3_MASK:
+	case R_028110_CB_COLOR4_MASK:
+	case R_028114_CB_COLOR5_MASK:
+	case R_028118_CB_COLOR6_MASK:
+	case R_02811C_CB_COLOR7_MASK:
+		tmp = (reg - R_028100_CB_COLOR0_MASK) / 4;
+		track->cb_color_mask[tmp] = ib[idx];
+		if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
+			track->cb_dirty = true;
 		}
 		break;
 	case CB_COLOR0_BASE:
@@ -1490,7 +1563,7 @@ unsigned r600_mip_minify(unsigned size, unsigned level)
 }
 
 static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel,
-			      unsigned w0, unsigned h0, unsigned d0, unsigned format,
+			      unsigned w0, unsigned h0, unsigned d0, unsigned nsamples, unsigned format,
 			      unsigned block_align, unsigned height_align, unsigned base_align,
 			      unsigned *l0_size, unsigned *mipmap_size)
 {
@@ -1518,7 +1591,7 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel,
 
 		depth = r600_mip_minify(d0, i);
 
-		size = nbx * nby * blocksize;
+		size = nbx * nby * blocksize * nsamples;
 		if (nfaces)
 			size *= nfaces;
 		else
@@ -1557,13 +1630,14 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
 					      u32 tiling_flags)
 {
 	struct r600_cs_track *track = p->track;
-	u32 nfaces, llevel, blevel, w0, h0, d0;
-	u32 word0, word1, l0_size, mipmap_size, word2, word3;
+	u32 dim, nfaces, llevel, blevel, w0, h0, d0;
+	u32 word0, word1, l0_size, mipmap_size, word2, word3, word4, word5;
 	u32 height_align, pitch, pitch_align, depth_align;
-	u32 array, barray, larray;
+	u32 barray, larray;
 	u64 base_align;
 	struct array_mode_checker array_check;
 	u32 format;
+	bool is_array;
 
 	/* on legacy kernel we don't perform advanced check */
 	if (p->rdev == NULL)
@@ -1581,12 +1655,28 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
 			word0 |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
 	}
 	word1 = radeon_get_ib_value(p, idx + 1);
+	word2 = radeon_get_ib_value(p, idx + 2) << 8;
+	word3 = radeon_get_ib_value(p, idx + 3) << 8;
+	word4 = radeon_get_ib_value(p, idx + 4);
+	word5 = radeon_get_ib_value(p, idx + 5);
+	dim = G_038000_DIM(word0);
 	w0 = G_038000_TEX_WIDTH(word0) + 1;
+	pitch = (G_038000_PITCH(word0) + 1) * 8;
 	h0 = G_038004_TEX_HEIGHT(word1) + 1;
 	d0 = G_038004_TEX_DEPTH(word1);
+	format = G_038004_DATA_FORMAT(word1);
+	blevel = G_038010_BASE_LEVEL(word4);
+	llevel = G_038014_LAST_LEVEL(word5);
+	/* pitch in texels */
+	array_check.array_mode = G_038000_TILE_MODE(word0);
+	array_check.group_size = track->group_size;
+	array_check.nbanks = track->nbanks;
+	array_check.npipes = track->npipes;
+	array_check.nsamples = 1;
+	array_check.blocksize = r600_fmt_get_blocksize(format);
 	nfaces = 1;
-	array = 0;
-	switch (G_038000_DIM(word0)) {
+	is_array = false;
+	switch (dim) {
 	case V_038000_SQ_TEX_DIM_1D:
 	case V_038000_SQ_TEX_DIM_2D:
 	case V_038000_SQ_TEX_DIM_3D:
@@ -1599,29 +1689,25 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
 		break;
 	case V_038000_SQ_TEX_DIM_1D_ARRAY:
 	case V_038000_SQ_TEX_DIM_2D_ARRAY:
-		array = 1;
+		is_array = true;
 		break;
-	case V_038000_SQ_TEX_DIM_2D_MSAA:
 	case V_038000_SQ_TEX_DIM_2D_ARRAY_MSAA:
+		is_array = true;
+		/* fall through */
+	case V_038000_SQ_TEX_DIM_2D_MSAA:
+		array_check.nsamples = 1 << llevel;
+		llevel = 0;
+		break;
 	default:
 		dev_warn(p->dev, "this kernel doesn't support %d texture dim\n", G_038000_DIM(word0));
 		return -EINVAL;
 	}
-	format = G_038004_DATA_FORMAT(word1);
 	if (!r600_fmt_is_valid_texture(format, p->family)) {
 		dev_warn(p->dev, "%s:%d texture invalid format %d\n",
 			 __func__, __LINE__, format);
 		return -EINVAL;
 	}
 
-	/* pitch in texels */
-	pitch = (G_038000_PITCH(word0) + 1) * 8;
-	array_check.array_mode = G_038000_TILE_MODE(word0);
-	array_check.group_size = track->group_size;
-	array_check.nbanks = track->nbanks;
-	array_check.npipes = track->npipes;
-	array_check.nsamples = 1;
-	array_check.blocksize = r600_fmt_get_blocksize(format);
 	if (r600_get_array_mode_alignment(&array_check,
 					  &pitch_align, &height_align, &depth_align, &base_align)) {
 		dev_warn(p->dev, "%s:%d tex array mode (%d) invalid\n",
@@ -1647,24 +1733,17 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
 		return -EINVAL;
 	}
 
-	word2 = radeon_get_ib_value(p, idx + 2) << 8;
-	word3 = radeon_get_ib_value(p, idx + 3) << 8;
-
-	word0 = radeon_get_ib_value(p, idx + 4);
-	word1 = radeon_get_ib_value(p, idx + 5);
-	blevel = G_038010_BASE_LEVEL(word0);
-	llevel = G_038014_LAST_LEVEL(word1);
 	if (blevel > llevel) {
 		dev_warn(p->dev, "texture blevel %d > llevel %d\n",
 			 blevel, llevel);
 	}
-	if (array == 1) {
-		barray = G_038014_BASE_ARRAY(word1);
-		larray = G_038014_LAST_ARRAY(word1);
+	if (is_array) {
+		barray = G_038014_BASE_ARRAY(word5);
+		larray = G_038014_LAST_ARRAY(word5);
 
 		nfaces = larray - barray + 1;
 	}
-	r600_texture_size(nfaces, blevel, llevel, w0, h0, d0, format,
+	r600_texture_size(nfaces, blevel, llevel, w0, h0, d0, array_check.nsamples, format,
 			  pitch_align, height_align, base_align,
 			  &l0_size, &mipmap_size);
 	/* using get ib will give us the offset into the texture bo */
@@ -1677,7 +1756,6 @@ static int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
 		return -EINVAL;
 	}
 	/* using get ib will give us the offset into the mipmap bo */
-	word3 = radeon_get_ib_value(p, idx + 3) << 8;
 	if ((mipmap_size + word3) > radeon_bo_size(mipmap)) {
 		/*dev_warn(p->dev, "mipmap bo too small (%d %d %d %d %d %d -> %d have %ld)\n",
 		  w0, h0, format, blevel, nlevels, word3, mipmap_size, radeon_bo_size(texture));*/
diff --git a/drivers/gpu/drm/radeon/r600_hdmi.c b/drivers/gpu/drm/radeon/r600_hdmi.c
index 82a0a4c919c0..e3558c3ef24a 100644
--- a/drivers/gpu/drm/radeon/r600_hdmi.c
+++ b/drivers/gpu/drm/radeon/r600_hdmi.c
@@ -519,8 +519,7 @@ void r600_hdmi_enable(struct drm_encoder *encoder)
 
 	if (rdev->irq.installed) {
 		/* if irq is available use it */
-		rdev->irq.afmt[dig->afmt->id] = true;
-		radeon_irq_set(rdev);
+		radeon_irq_kms_enable_afmt(rdev, dig->afmt->id);
 	}
 
 	dig->afmt->enabled = true;
@@ -556,8 +555,7 @@ void r600_hdmi_disable(struct drm_encoder *encoder)
 		  offset, radeon_encoder->encoder_id);
 
 	/* disable irq */
-	rdev->irq.afmt[dig->afmt->id] = false;
-	radeon_irq_set(rdev);
+	radeon_irq_kms_disable_afmt(rdev, dig->afmt->id);
 
 	/* Older chipsets not handled by AtomBIOS */
 	if (rdev->family >= CHIP_R600 && !ASIC_IS_DCE3(rdev)) {
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index 025fd5b6c08c..bdb69a63062f 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -92,6 +92,20 @@
 #define R_028094_CB_COLOR5_VIEW                      0x028094
 #define R_028098_CB_COLOR6_VIEW                      0x028098
 #define R_02809C_CB_COLOR7_VIEW                      0x02809C
+#define R_028100_CB_COLOR0_MASK                      0x028100
+#define   S_028100_CMASK_BLOCK_MAX(x)                  (((x) & 0xFFF) << 0)
+#define   G_028100_CMASK_BLOCK_MAX(x)                  (((x) >> 0) & 0xFFF)
+#define   C_028100_CMASK_BLOCK_MAX                     0xFFFFF000
+#define   S_028100_FMASK_TILE_MAX(x)                   (((x) & 0xFFFFF) << 12)
+#define   G_028100_FMASK_TILE_MAX(x)                   (((x) >> 12) & 0xFFFFF)
+#define   C_028100_FMASK_TILE_MAX                      0x00000FFF
+#define R_028104_CB_COLOR1_MASK                      0x028104
+#define R_028108_CB_COLOR2_MASK                      0x028108
+#define R_02810C_CB_COLOR3_MASK                      0x02810C
+#define R_028110_CB_COLOR4_MASK                      0x028110
+#define R_028114_CB_COLOR5_MASK                      0x028114
+#define R_028118_CB_COLOR6_MASK                      0x028118
+#define R_02811C_CB_COLOR7_MASK                      0x02811C
 #define CB_COLOR0_INFO                                  0x280a0
 #	define CB_FORMAT(x)				((x) << 2)
 #       define CB_ARRAY_MODE(x)                         ((x) << 8)
@@ -153,6 +167,9 @@
 
 #define	CONFIG_MEMSIZE					0x5428
 #define CONFIG_CNTL					0x5424
+#define	CP_STALLED_STAT1			0x8674
+#define	CP_STALLED_STAT2			0x8678
+#define	CP_BUSY_STAT				0x867C
 #define	CP_STAT						0x8680
 #define	CP_COHER_BASE					0x85F8
 #define	CP_DEBUG					0xC1FC
@@ -599,6 +616,9 @@
 #define RLC_HB_WPTR                                       0x3f1c
 #define RLC_HB_WPTR_LSB_ADDR                              0x3f14
 #define RLC_HB_WPTR_MSB_ADDR                              0x3f18
+#define RLC_GPU_CLOCK_COUNT_LSB				  0x3f38
+#define RLC_GPU_CLOCK_COUNT_MSB				  0x3f3c
+#define RLC_CAPTURE_GPU_CLOCK_COUNT			  0x3f40
 #define RLC_MC_CNTL                                       0x3f44
 #define RLC_UCODE_CNTL                                    0x3f48
 #define RLC_UCODE_ADDR                                    0x3f2c
@@ -1394,6 +1414,9 @@
 #define   S_0280A0_TILE_MODE(x)                        (((x) & 0x3) << 18)
 #define   G_0280A0_TILE_MODE(x)                        (((x) >> 18) & 0x3)
 #define   C_0280A0_TILE_MODE                           0xFFF3FFFF
+#define     V_0280A0_TILE_DISABLE			0
+#define     V_0280A0_CLEAR_ENABLE			1
+#define     V_0280A0_FRAG_ENABLE			2
 #define   S_0280A0_BLEND_CLAMP(x)                      (((x) & 0x1) << 20)
 #define   G_0280A0_BLEND_CLAMP(x)                      (((x) >> 20) & 0x1)
 #define   C_0280A0_BLEND_CLAMP                         0xFFEFFFFF
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index fefcca55c1eb..59a15315ae9f 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -113,7 +113,6 @@ extern int radeon_lockup_timeout;
 
 /* fence seq are set to this number when signaled */
 #define RADEON_FENCE_SIGNALED_SEQ		0LL
-#define RADEON_FENCE_NOTEMITED_SEQ		(~0LL)
 
 /* internal ring indices */
 /* r1xx+ has gfx CP ring */
@@ -143,65 +142,8 @@ struct radeon_device;
 /*
  * BIOS.
  */
-#define ATRM_BIOS_PAGE 4096
-
-#if defined(CONFIG_VGA_SWITCHEROO)
-bool radeon_atrm_supported(struct pci_dev *pdev);
-int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len);
-#else
-static inline bool radeon_atrm_supported(struct pci_dev *pdev)
-{
-	return false;
-}
-
-static inline int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len){
-	return -EINVAL;
-}
-#endif
 bool radeon_get_bios(struct radeon_device *rdev);
 
-
-/*
- * Mutex which allows recursive locking from the same process.
- */
-struct radeon_mutex {
-	struct mutex		mutex;
-	struct task_struct	*owner;
-	int			level;
-};
-
-static inline void radeon_mutex_init(struct radeon_mutex *mutex)
-{
-	mutex_init(&mutex->mutex);
-	mutex->owner = NULL;
-	mutex->level = 0;
-}
-
-static inline void radeon_mutex_lock(struct radeon_mutex *mutex)
-{
-	if (mutex_trylock(&mutex->mutex)) {
-		/* The mutex was unlocked before, so it's ours now */
-		mutex->owner = current;
-	} else if (mutex->owner != current) {
-		/* Another process locked the mutex, take it */
-		mutex_lock(&mutex->mutex);
-		mutex->owner = current;
-	}
-	/* Otherwise the mutex was already locked by this process */
-
-	mutex->level++;
-}
-
-static inline void radeon_mutex_unlock(struct radeon_mutex *mutex)
-{
-	if (--mutex->level > 0)
-		return;
-
-	mutex->owner = NULL;
-	mutex_unlock(&mutex->mutex);
-}
-
-
 /*
  * Dummy page
  */
@@ -258,8 +200,8 @@ struct radeon_fence_driver {
 	uint32_t			scratch_reg;
 	uint64_t			gpu_addr;
 	volatile uint32_t		*cpu_addr;
-	/* seq is protected by ring emission lock */
-	uint64_t			seq;
+	/* sync_seq is protected by ring emission lock */
+	uint64_t			sync_seq[RADEON_NUM_RINGS];
 	atomic64_t			last_seq;
 	unsigned long			last_activity;
 	bool				initialized;
@@ -277,19 +219,39 @@ struct radeon_fence {
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
 int radeon_fence_driver_init(struct radeon_device *rdev);
 void radeon_fence_driver_fini(struct radeon_device *rdev);
-int radeon_fence_create(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
-int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence);
+int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
 void radeon_fence_process(struct radeon_device *rdev, int ring);
 bool radeon_fence_signaled(struct radeon_fence *fence);
 int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
 int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring);
-int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
+void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
 int radeon_fence_wait_any(struct radeon_device *rdev,
 			  struct radeon_fence **fences,
 			  bool intr);
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence);
 void radeon_fence_unref(struct radeon_fence **fence);
 unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring);
+bool radeon_fence_need_sync(struct radeon_fence *fence, int ring);
+void radeon_fence_note_sync(struct radeon_fence *fence, int ring);
+static inline struct radeon_fence *radeon_fence_later(struct radeon_fence *a,
+						      struct radeon_fence *b)
+{
+	if (!a) {
+		return b;
+	}
+
+	if (!b) {
+		return a;
+	}
+
+	BUG_ON(a->ring != b->ring);
+
+	if (a->seq > b->seq) {
+		return a;
+	} else {
+		return b;
+	}
+}
 
 /*
  * Tiling registers
@@ -323,6 +285,7 @@ struct radeon_bo_va {
 	uint64_t			soffset;
 	uint64_t			eoffset;
 	uint32_t			flags;
+	struct radeon_fence		*fence;
 	bool				valid;
 };
 
@@ -385,7 +348,7 @@ struct radeon_bo_list {
  * alignment).
  */
 struct radeon_sa_manager {
-	spinlock_t		lock;
+	wait_queue_head_t	wq;
 	struct radeon_bo	*bo;
 	struct list_head	*hole;
 	struct list_head	flist[RADEON_NUM_RINGS];
@@ -451,10 +414,9 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
 				struct radeon_semaphore *semaphore);
 int radeon_semaphore_sync_rings(struct radeon_device *rdev,
 				struct radeon_semaphore *semaphore,
-				bool sync_to[RADEON_NUM_RINGS],
-				int dst_ring);
+				int signaler, int waiter);
 void radeon_semaphore_free(struct radeon_device *rdev,
-			   struct radeon_semaphore *semaphore,
+			   struct radeon_semaphore **semaphore,
 			   struct radeon_fence *fence);
 
 /*
@@ -597,21 +559,18 @@ union radeon_irq_stat_regs {
 #define RADEON_MAX_AFMT_BLOCKS 6
 
 struct radeon_irq {
-	bool		installed;
-	bool		sw_int[RADEON_NUM_RINGS];
-	bool		crtc_vblank_int[RADEON_MAX_CRTCS];
-	bool		pflip[RADEON_MAX_CRTCS];
-	wait_queue_head_t	vblank_queue;
-	bool            hpd[RADEON_MAX_HPD_PINS];
-	bool            gui_idle;
-	bool            gui_idle_acked;
-	wait_queue_head_t	idle_queue;
-	bool		afmt[RADEON_MAX_AFMT_BLOCKS];
-	spinlock_t sw_lock;
-	int sw_refcount[RADEON_NUM_RINGS];
-	union radeon_irq_stat_regs stat_regs;
-	spinlock_t pflip_lock[RADEON_MAX_CRTCS];
-	int pflip_refcount[RADEON_MAX_CRTCS];
+	bool				installed;
+	spinlock_t			lock;
+	atomic_t			ring_int[RADEON_NUM_RINGS];
+	bool				crtc_vblank_int[RADEON_MAX_CRTCS];
+	atomic_t			pflip[RADEON_MAX_CRTCS];
+	wait_queue_head_t		vblank_queue;
+	bool				hpd[RADEON_MAX_HPD_PINS];
+	bool				gui_idle;
+	bool				gui_idle_acked;
+	wait_queue_head_t		idle_queue;
+	bool				afmt[RADEON_MAX_AFMT_BLOCKS];
+	union radeon_irq_stat_regs	stat_regs;
 };
 
 int radeon_irq_kms_init(struct radeon_device *rdev);
@@ -620,6 +579,11 @@ void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring);
 void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring);
 void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc);
 void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc);
+void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block);
+void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block);
+void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned hpd_mask);
+void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned hpd_mask);
+int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev);
 
 /*
  * CP & rings.
@@ -630,9 +594,11 @@ struct radeon_ib {
 	uint32_t			length_dw;
 	uint64_t			gpu_addr;
 	uint32_t			*ptr;
+	int				ring;
 	struct radeon_fence		*fence;
 	unsigned			vm_id;
 	bool				is_const_ib;
+	struct radeon_fence		*sync_to[RADEON_NUM_RINGS];
 	struct radeon_semaphore		*semaphore;
 };
 
@@ -642,6 +608,9 @@ struct radeon_ring {
 	unsigned		rptr;
 	unsigned		rptr_offs;
 	unsigned		rptr_reg;
+	unsigned		rptr_save_reg;
+	u64			next_rptr_gpu_addr;
+	volatile u32		*next_rptr_cpu_addr;
 	unsigned		wptr;
 	unsigned		wptr_old;
 	unsigned		wptr_reg;
@@ -657,6 +626,7 @@ struct radeon_ring {
 	u32			ptr_reg_shift;
 	u32			ptr_reg_mask;
 	u32			nop;
+	u32			idx;
 };
 
 /*
@@ -690,6 +660,7 @@ struct radeon_vm_funcs {
 };
 
 struct radeon_vm_manager {
+	struct mutex			lock;
 	struct list_head		lru_vm;
 	uint32_t			use_bitmap;
 	struct radeon_sa_manager	sa_manager;
@@ -718,13 +689,10 @@ struct r600_ih {
 	struct radeon_bo	*ring_obj;
 	volatile uint32_t	*ring;
 	unsigned		rptr;
-	unsigned		rptr_offs;
-	unsigned		wptr;
-	unsigned		wptr_old;
 	unsigned		ring_size;
 	uint64_t		gpu_addr;
 	uint32_t		ptr_mask;
-	spinlock_t              lock;
+	atomic_t		lock;
 	bool                    enabled;
 };
 
@@ -757,8 +725,6 @@ struct r600_blit {
 	u32 state_len;
 };
 
-void r600_blit_suspend(struct radeon_device *rdev);
-
 /*
  * SI RLC stuff
  */
@@ -774,14 +740,14 @@ struct si_rlc {
 int radeon_ib_get(struct radeon_device *rdev, int ring,
 		  struct radeon_ib *ib, unsigned size);
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib);
-int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
+int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
+		       struct radeon_ib *const_ib);
 int radeon_ib_pool_init(struct radeon_device *rdev);
 void radeon_ib_pool_fini(struct radeon_device *rdev);
-int radeon_ib_pool_start(struct radeon_device *rdev);
-int radeon_ib_pool_suspend(struct radeon_device *rdev);
 int radeon_ib_ring_tests(struct radeon_device *rdev);
 /* Ring access between begin & end cannot sleep */
-int radeon_ring_index(struct radeon_device *rdev, struct radeon_ring *cp);
+bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev,
+				      struct radeon_ring *ring);
 void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *cp);
 int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw);
 int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ndw);
@@ -793,6 +759,10 @@ int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
 void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring);
 void radeon_ring_lockup_update(struct radeon_ring *ring);
 bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
+unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring,
+			    uint32_t **data);
+int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring,
+			unsigned size, uint32_t *data);
 int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *cp, unsigned ring_size,
 		     unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg,
 		     u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop);
@@ -891,6 +861,7 @@ struct radeon_wb {
 };
 
 #define RADEON_WB_SCRATCH_OFFSET 0
+#define RADEON_WB_RING0_NEXT_RPTR 256
 #define RADEON_WB_CP_RPTR_OFFSET 1024
 #define RADEON_WB_CP1_RPTR_OFFSET 1280
 #define RADEON_WB_CP2_RPTR_OFFSET 1536
@@ -1039,11 +1010,12 @@ struct radeon_power_state {
 
 struct radeon_pm {
 	struct mutex		mutex;
+	/* write locked while reprogramming mclk */
+	struct rw_semaphore	mclk_lock;
 	u32			active_crtcs;
 	int			active_crtc_count;
 	int			req_vblank;
 	bool			vblank_sync;
-	bool			gui_idle;
 	fixed20_12		max_bandwidth;
 	fixed20_12		igp_sideport_mclk;
 	fixed20_12		igp_system_mclk;
@@ -1192,20 +1164,20 @@ struct radeon_asic {
 			    uint64_t src_offset,
 			    uint64_t dst_offset,
 			    unsigned num_gpu_pages,
-			    struct radeon_fence *fence);
+			    struct radeon_fence **fence);
 		u32 blit_ring_index;
 		int (*dma)(struct radeon_device *rdev,
 			   uint64_t src_offset,
 			   uint64_t dst_offset,
 			   unsigned num_gpu_pages,
-			   struct radeon_fence *fence);
+			   struct radeon_fence **fence);
 		u32 dma_ring_index;
 		/* method used for bo copy */
 		int (*copy)(struct radeon_device *rdev,
 			    uint64_t src_offset,
 			    uint64_t dst_offset,
 			    unsigned num_gpu_pages,
-			    struct radeon_fence *fence);
+			    struct radeon_fence **fence);
 		/* ring used for bo copies */
 		u32 copy_ring_index;
 	} copy;
@@ -1467,6 +1439,7 @@ struct radeon_device {
 	struct device			*dev;
 	struct drm_device		*ddev;
 	struct pci_dev			*pdev;
+	struct rw_semaphore		exclusive_lock;
 	/* ASIC */
 	union radeon_asic_config	config;
 	enum radeon_family		family;
@@ -1512,7 +1485,6 @@ struct radeon_device {
 	struct radeon_gem		gem;
 	struct radeon_pm		pm;
 	uint32_t			bios_scratch[RADEON_BIOS_NUM_SCRATCH];
-	struct radeon_mutex		cs_mutex;
 	struct radeon_wb		wb;
 	struct radeon_dummy_page	dummy_page;
 	bool				shutdown;
@@ -1534,7 +1506,6 @@ struct radeon_device {
 	struct work_struct audio_work;
 	int num_crtc; /* number of crtcs */
 	struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */
-	struct mutex vram_mutex;
 	bool audio_enabled;
 	struct r600_audio audio_status; /* audio stuff */
 	struct notifier_block acpi_nb;
@@ -1548,6 +1519,7 @@ struct radeon_device {
 	unsigned 		debugfs_count;
 	/* virtual memory */
 	struct radeon_vm_manager	vm_manager;
+	struct mutex			gpu_clock_mutex;
 };
 
 int radeon_device_init(struct radeon_device *rdev,
@@ -1748,11 +1720,11 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
 #define radeon_pm_finish(rdev) (rdev)->asic->pm.finish((rdev))
 #define radeon_pm_init_profile(rdev) (rdev)->asic->pm.init_profile((rdev))
 #define radeon_pm_get_dynpm_state(rdev) (rdev)->asic->pm.get_dynpm_state((rdev))
-#define radeon_pre_page_flip(rdev, crtc) rdev->asic->pflip.pre_page_flip((rdev), (crtc))
-#define radeon_page_flip(rdev, crtc, base) rdev->asic->pflip.page_flip((rdev), (crtc), (base))
-#define radeon_post_page_flip(rdev, crtc) rdev->asic->pflip.post_page_flip((rdev), (crtc))
-#define radeon_wait_for_vblank(rdev, crtc) rdev->asic->display.wait_for_vblank((rdev), (crtc))
-#define radeon_mc_wait_for_idle(rdev) rdev->asic->mc_wait_for_idle((rdev))
+#define radeon_pre_page_flip(rdev, crtc) (rdev)->asic->pflip.pre_page_flip((rdev), (crtc))
+#define radeon_page_flip(rdev, crtc, base) (rdev)->asic->pflip.page_flip((rdev), (crtc), (base))
+#define radeon_post_page_flip(rdev, crtc) (rdev)->asic->pflip.post_page_flip((rdev), (crtc))
+#define radeon_wait_for_vblank(rdev, crtc) (rdev)->asic->display.wait_for_vblank((rdev), (crtc))
+#define radeon_mc_wait_for_idle(rdev) (rdev)->asic->mc_wait_for_idle((rdev))
 
 /* Common functions */
 /* AGP */
@@ -1785,8 +1757,6 @@ extern void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size
  */
 int radeon_vm_manager_init(struct radeon_device *rdev);
 void radeon_vm_manager_fini(struct radeon_device *rdev);
-int radeon_vm_manager_start(struct radeon_device *rdev);
-int radeon_vm_manager_suspend(struct radeon_device *rdev);
 int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm);
 int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index f533df5f7d50..973417c4b014 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -40,6 +40,16 @@
 /*
  * Registers accessors functions.
  */
+/**
+ * radeon_invalid_rreg - dummy reg read function
+ *
+ * @rdev: radeon device pointer
+ * @reg: offset of register
+ *
+ * Dummy register read function.  Used for register blocks
+ * that certain asics don't have (all asics).
+ * Returns the value in the register.
+ */
 static uint32_t radeon_invalid_rreg(struct radeon_device *rdev, uint32_t reg)
 {
 	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
@@ -47,6 +57,16 @@ static uint32_t radeon_invalid_rreg(struct radeon_device *rdev, uint32_t reg)
 	return 0;
 }
 
+/**
+ * radeon_invalid_wreg - dummy reg write function
+ *
+ * @rdev: radeon device pointer
+ * @reg: offset of register
+ * @v: value to write to the register
+ *
+ * Dummy register read function.  Used for register blocks
+ * that certain asics don't have (all asics).
+ */
 static void radeon_invalid_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
 {
 	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
@@ -54,6 +74,14 @@ static void radeon_invalid_wreg(struct radeon_device *rdev, uint32_t reg, uint32
 	BUG_ON(1);
 }
 
+/**
+ * radeon_register_accessor_init - sets up the register accessor callbacks
+ *
+ * @rdev: radeon device pointer
+ *
+ * Sets up the register accessor callbacks for various register
+ * apertures.  Not all asics have all apertures (all asics).
+ */
 static void radeon_register_accessor_init(struct radeon_device *rdev)
 {
 	rdev->mc_rreg = &radeon_invalid_rreg;
@@ -102,6 +130,14 @@ static void radeon_register_accessor_init(struct radeon_device *rdev)
 
 
 /* helper to disable agp */
+/**
+ * radeon_agp_disable - AGP disable helper function
+ *
+ * @rdev: radeon device pointer
+ *
+ * Removes AGP flags and changes the gart callbacks on AGP
+ * cards when using the internal gart rather than AGP (all asics).
+ */
 void radeon_agp_disable(struct radeon_device *rdev)
 {
 	rdev->flags &= ~RADEON_IS_AGP;
@@ -1608,6 +1644,16 @@ static struct radeon_asic si_asic = {
 	},
 };
 
+/**
+ * radeon_asic_init - register asic specific callbacks
+ *
+ * @rdev: radeon device pointer
+ *
+ * Registers the appropriate asic specific callbacks for each
+ * chip family.  Also sets other asics specific info like the number
+ * of crtcs and the register aperture accessors (all asics).
+ * Returns 0 for success.
+ */
 int radeon_asic_init(struct radeon_device *rdev)
 {
 	radeon_register_accessor_init(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index e76a941ef14e..18c38d14c8cd 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -79,7 +79,7 @@ int r100_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset,
 		   uint64_t dst_offset,
 		   unsigned num_gpu_pages,
-		   struct radeon_fence *fence);
+		   struct radeon_fence **fence);
 int r100_set_surface_reg(struct radeon_device *rdev, int reg,
 			 uint32_t tiling_flags, uint32_t pitch,
 			 uint32_t offset, uint32_t obj_size);
@@ -103,7 +103,6 @@ int r100_pci_gart_enable(struct radeon_device *rdev);
 void r100_pci_gart_disable(struct radeon_device *rdev);
 int r100_debugfs_mc_info_init(struct radeon_device *rdev);
 int r100_gui_wait_for_idle(struct radeon_device *rdev);
-void r100_ib_fini(struct radeon_device *rdev);
 int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
 void r100_irq_disable(struct radeon_device *rdev);
 void r100_mc_stop(struct radeon_device *rdev, struct r100_mc_save *save);
@@ -144,7 +143,7 @@ extern int r200_copy_dma(struct radeon_device *rdev,
 			 uint64_t src_offset,
 			 uint64_t dst_offset,
 			 unsigned num_gpu_pages,
-			 struct radeon_fence *fence);
+			 struct radeon_fence **fence);
 void r200_set_safe_registers(struct radeon_device *rdev);
 
 /*
@@ -256,13 +255,10 @@ extern int rs690_mc_wait_for_idle(struct radeon_device *rdev);
  * rv515
  */
 struct rv515_mc_save {
-	u32 d1vga_control;
-	u32 d2vga_control;
 	u32 vga_render_control;
 	u32 vga_hdp_control;
-	u32 d1crtc_control;
-	u32 d2crtc_control;
 };
+
 int rv515_init(struct radeon_device *rdev);
 void rv515_fini(struct radeon_device *rdev);
 uint32_t rv515_mc_rreg(struct radeon_device *rdev, uint32_t reg);
@@ -318,7 +314,7 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
 int r600_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset, uint64_t dst_offset,
-		   unsigned num_gpu_pages, struct radeon_fence *fence);
+		   unsigned num_gpu_pages, struct radeon_fence **fence);
 void r600_hpd_init(struct radeon_device *rdev);
 void r600_hpd_fini(struct radeon_device *rdev);
 bool r600_hpd_sense(struct radeon_device *rdev, enum radeon_hpd_id hpd);
@@ -363,14 +359,16 @@ int r600_hdmi_buffer_status_changed(struct drm_encoder *encoder);
 void r600_hdmi_update_audio_settings(struct drm_encoder *encoder);
 /* r600 blit */
 int r600_blit_prepare_copy(struct radeon_device *rdev, unsigned num_gpu_pages,
-			   struct radeon_sa_bo **vb);
-void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence *fence,
-			 struct radeon_sa_bo *vb);
+			   struct radeon_fence **fence, struct radeon_sa_bo **vb,
+			   struct radeon_semaphore **sem);
+void r600_blit_done_copy(struct radeon_device *rdev, struct radeon_fence **fence,
+			 struct radeon_sa_bo *vb, struct radeon_semaphore *sem);
 void r600_kms_blit_copy(struct radeon_device *rdev,
 			u64 src_gpu_addr, u64 dst_gpu_addr,
 			unsigned num_gpu_pages,
 			struct radeon_sa_bo *vb);
 int r600_mc_wait_for_idle(struct radeon_device *rdev);
+uint64_t r600_get_gpu_clock(struct radeon_device *rdev);
 
 /*
  * rv770,rv730,rv710,rv740
@@ -389,11 +387,10 @@ void r700_cp_fini(struct radeon_device *rdev);
  * evergreen
  */
 struct evergreen_mc_save {
-	u32 vga_control[6];
 	u32 vga_render_control;
 	u32 vga_hdp_control;
-	u32 crtc_control[6];
 };
+
 void evergreen_pcie_gart_tlb_flush(struct radeon_device *rdev);
 int evergreen_init(struct radeon_device *rdev);
 void evergreen_fini(struct radeon_device *rdev);
@@ -472,5 +469,6 @@ int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id);
 void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm);
 void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm);
 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib);
+uint64_t si_get_gpu_clock(struct radeon_device *rdev);
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index b1e3820df363..d67d4f3eb6f4 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -452,7 +452,7 @@ static bool radeon_atom_apply_quirks(struct drm_device *dev,
 	}
 
 	/* Fujitsu D3003-S2 board lists DVI-I as DVI-D and VGA */
-	if ((dev->pdev->device == 0x9802) &&
+	if (((dev->pdev->device == 0x9802) || (dev->pdev->device == 0x9806)) &&
 	    (dev->pdev->subsystem_vendor == 0x1734) &&
 	    (dev->pdev->subsystem_device == 0x11bd)) {
 		if (*connector_type == DRM_MODE_CONNECTOR_VGA) {
@@ -1263,6 +1263,8 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
 union igp_info {
 	struct _ATOM_INTEGRATED_SYSTEM_INFO info;
 	struct _ATOM_INTEGRATED_SYSTEM_INFO_V2 info_2;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 info_6;
+	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_7 info_7;
 };
 
 bool radeon_atombios_sideport_present(struct radeon_device *rdev)
@@ -1390,27 +1392,50 @@ static void radeon_atombios_get_igp_ss_overrides(struct radeon_device *rdev,
 	struct radeon_mode_info *mode_info = &rdev->mode_info;
 	int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
 	u16 data_offset, size;
-	struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 *igp_info;
+	union igp_info *igp_info;
 	u8 frev, crev;
 	u16 percentage = 0, rate = 0;
 
 	/* get any igp specific overrides */
 	if (atom_parse_data_header(mode_info->atom_context, index, &size,
 				   &frev, &crev, &data_offset)) {
-		igp_info = (struct _ATOM_INTEGRATED_SYSTEM_INFO_V6 *)
+		igp_info = (union igp_info *)
 			(mode_info->atom_context->bios + data_offset);
-		switch (id) {
-		case ASIC_INTERNAL_SS_ON_TMDS:
-			percentage = le16_to_cpu(igp_info->usDVISSPercentage);
-			rate = le16_to_cpu(igp_info->usDVISSpreadRateIn10Hz);
+		switch (crev) {
+		case 6:
+			switch (id) {
+			case ASIC_INTERNAL_SS_ON_TMDS:
+				percentage = le16_to_cpu(igp_info->info_6.usDVISSPercentage);
+				rate = le16_to_cpu(igp_info->info_6.usDVISSpreadRateIn10Hz);
+				break;
+			case ASIC_INTERNAL_SS_ON_HDMI:
+				percentage = le16_to_cpu(igp_info->info_6.usHDMISSPercentage);
+				rate = le16_to_cpu(igp_info->info_6.usHDMISSpreadRateIn10Hz);
+				break;
+			case ASIC_INTERNAL_SS_ON_LVDS:
+				percentage = le16_to_cpu(igp_info->info_6.usLvdsSSPercentage);
+				rate = le16_to_cpu(igp_info->info_6.usLvdsSSpreadRateIn10Hz);
+				break;
+			}
 			break;
-		case ASIC_INTERNAL_SS_ON_HDMI:
-			percentage = le16_to_cpu(igp_info->usHDMISSPercentage);
-			rate = le16_to_cpu(igp_info->usHDMISSpreadRateIn10Hz);
+		case 7:
+			switch (id) {
+			case ASIC_INTERNAL_SS_ON_TMDS:
+				percentage = le16_to_cpu(igp_info->info_7.usDVISSPercentage);
+				rate = le16_to_cpu(igp_info->info_7.usDVISSpreadRateIn10Hz);
+				break;
+			case ASIC_INTERNAL_SS_ON_HDMI:
+				percentage = le16_to_cpu(igp_info->info_7.usHDMISSPercentage);
+				rate = le16_to_cpu(igp_info->info_7.usHDMISSpreadRateIn10Hz);
+				break;
+			case ASIC_INTERNAL_SS_ON_LVDS:
+				percentage = le16_to_cpu(igp_info->info_7.usLvdsSSPercentage);
+				rate = le16_to_cpu(igp_info->info_7.usLvdsSSpreadRateIn10Hz);
+				break;
+			}
 			break;
-		case ASIC_INTERNAL_SS_ON_LVDS:
-			percentage = le16_to_cpu(igp_info->usLvdsSSPercentage);
-			rate = le16_to_cpu(igp_info->usLvdsSSpreadRateIn10Hz);
+		default:
+			DRM_ERROR("Unsupported IGP table: %d %d\n", frev, crev);
 			break;
 		}
 		if (percentage)
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index 98724fcb0088..2a2cf0b88a28 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -30,57 +30,8 @@ static struct radeon_atpx_priv {
 	/* handle for device - and atpx */
 	acpi_handle dhandle;
 	acpi_handle atpx_handle;
-	acpi_handle atrm_handle;
 } radeon_atpx_priv;
 
-/* retrieve the ROM in 4k blocks */
-static int radeon_atrm_call(acpi_handle atrm_handle, uint8_t *bios,
-			    int offset, int len)
-{
-	acpi_status status;
-	union acpi_object atrm_arg_elements[2], *obj;
-	struct acpi_object_list atrm_arg;
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL};
-
-	atrm_arg.count = 2;
-	atrm_arg.pointer = &atrm_arg_elements[0];
-
-	atrm_arg_elements[0].type = ACPI_TYPE_INTEGER;
-	atrm_arg_elements[0].integer.value = offset;
-
-	atrm_arg_elements[1].type = ACPI_TYPE_INTEGER;
-	atrm_arg_elements[1].integer.value = len;
-
-	status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer);
-	if (ACPI_FAILURE(status)) {
-		printk("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
-		return -ENODEV;
-	}
-
-	obj = (union acpi_object *)buffer.pointer;
-	memcpy(bios+offset, obj->buffer.pointer, obj->buffer.length);
-	len = obj->buffer.length;
-	kfree(buffer.pointer);
-	return len;
-}
-
-bool radeon_atrm_supported(struct pci_dev *pdev)
-{
-	/* get the discrete ROM only via ATRM */
-	if (!radeon_atpx_priv.atpx_detected)
-		return false;
-
-	if (radeon_atpx_priv.dhandle == DEVICE_ACPI_HANDLE(&pdev->dev))
-		return false;
-	return true;
-}
-
-
-int radeon_atrm_get_bios_chunk(uint8_t *bios, int offset, int len)
-{
-	return radeon_atrm_call(radeon_atpx_priv.atrm_handle, bios, offset, len);
-}
-
 static int radeon_atpx_get_version(acpi_handle handle)
 {
 	acpi_status status;
@@ -198,7 +149,7 @@ static int radeon_atpx_power_state(enum vga_switcheroo_client_id id,
 
 static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev)
 {
-	acpi_handle dhandle, atpx_handle, atrm_handle;
+	acpi_handle dhandle, atpx_handle;
 	acpi_status status;
 
 	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
@@ -209,13 +160,8 @@ static bool radeon_atpx_pci_probe_handle(struct pci_dev *pdev)
 	if (ACPI_FAILURE(status))
 		return false;
 
-	status = acpi_get_handle(dhandle, "ATRM", &atrm_handle);
-	if (ACPI_FAILURE(status))
-		return false;
-
 	radeon_atpx_priv.dhandle = dhandle;
 	radeon_atpx_priv.atpx_handle = atpx_handle;
-	radeon_atpx_priv.atrm_handle = atrm_handle;
 	return true;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_benchmark.c b/drivers/gpu/drm/radeon/radeon_benchmark.c
index 364f5b1a04b9..bedda9caadd9 100644
--- a/drivers/gpu/drm/radeon/radeon_benchmark.c
+++ b/drivers/gpu/drm/radeon/radeon_benchmark.c
@@ -45,20 +45,14 @@ static int radeon_benchmark_do_move(struct radeon_device *rdev, unsigned size,
 	for (i = 0; i < n; i++) {
 		switch (flag) {
 		case RADEON_BENCHMARK_COPY_DMA:
-			r = radeon_fence_create(rdev, &fence, radeon_copy_dma_ring_index(rdev));
-			if (r)
-				return r;
 			r = radeon_copy_dma(rdev, saddr, daddr,
 					    size / RADEON_GPU_PAGE_SIZE,
-					    fence);
+					    &fence);
 			break;
 		case RADEON_BENCHMARK_COPY_BLIT:
-			r = radeon_fence_create(rdev, &fence, radeon_copy_blit_ring_index(rdev));
-			if (r)
-				return r;
 			r = radeon_copy_blit(rdev, saddr, daddr,
 					     size / RADEON_GPU_PAGE_SIZE,
-					     fence);
+					     &fence);
 			break;
 		default:
 			DRM_ERROR("Unknown copy method\n");
diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c
index 501f4881e5aa..d306cc8fdeaa 100644
--- a/drivers/gpu/drm/radeon/radeon_bios.c
+++ b/drivers/gpu/drm/radeon/radeon_bios.c
@@ -32,6 +32,7 @@
 
 #include <linux/vga_switcheroo.h>
 #include <linux/slab.h>
+#include <linux/acpi.h>
 /*
  * BIOS.
  */
@@ -98,16 +99,81 @@ static bool radeon_read_bios(struct radeon_device *rdev)
 	return true;
 }
 
+#ifdef CONFIG_ACPI
 /* ATRM is used to get the BIOS on the discrete cards in
  * dual-gpu systems.
  */
+/* retrieve the ROM in 4k blocks */
+#define ATRM_BIOS_PAGE 4096
+/**
+ * radeon_atrm_call - fetch a chunk of the vbios
+ *
+ * @atrm_handle: acpi ATRM handle
+ * @bios: vbios image pointer
+ * @offset: offset of vbios image data to fetch
+ * @len: length of vbios image data to fetch
+ *
+ * Executes ATRM to fetch a chunk of the discrete
+ * vbios image on PX systems (all asics).
+ * Returns the length of the buffer fetched.
+ */
+static int radeon_atrm_call(acpi_handle atrm_handle, uint8_t *bios,
+			    int offset, int len)
+{
+	acpi_status status;
+	union acpi_object atrm_arg_elements[2], *obj;
+	struct acpi_object_list atrm_arg;
+	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL};
+
+	atrm_arg.count = 2;
+	atrm_arg.pointer = &atrm_arg_elements[0];
+
+	atrm_arg_elements[0].type = ACPI_TYPE_INTEGER;
+	atrm_arg_elements[0].integer.value = offset;
+
+	atrm_arg_elements[1].type = ACPI_TYPE_INTEGER;
+	atrm_arg_elements[1].integer.value = len;
+
+	status = acpi_evaluate_object(atrm_handle, NULL, &atrm_arg, &buffer);
+	if (ACPI_FAILURE(status)) {
+		printk("failed to evaluate ATRM got %s\n", acpi_format_exception(status));
+		return -ENODEV;
+	}
+
+	obj = (union acpi_object *)buffer.pointer;
+	memcpy(bios+offset, obj->buffer.pointer, obj->buffer.length);
+	len = obj->buffer.length;
+	kfree(buffer.pointer);
+	return len;
+}
+
 static bool radeon_atrm_get_bios(struct radeon_device *rdev)
 {
 	int ret;
 	int size = 256 * 1024;
 	int i;
+	struct pci_dev *pdev = NULL;
+	acpi_handle dhandle, atrm_handle;
+	acpi_status status;
+	bool found = false;
+
+	/* ATRM is for the discrete card only */
+	if (rdev->flags & RADEON_IS_IGP)
+		return false;
+
+	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+		dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
+		if (!dhandle)
+			continue;
+
+		status = acpi_get_handle(dhandle, "ATRM", &atrm_handle);
+		if (!ACPI_FAILURE(status)) {
+			found = true;
+			break;
+		}
+	}
 
-	if (!radeon_atrm_supported(rdev->pdev))
+	if (!found)
 		return false;
 
 	rdev->bios = kmalloc(size, GFP_KERNEL);
@@ -117,9 +183,10 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev)
 	}
 
 	for (i = 0; i < size / ATRM_BIOS_PAGE; i++) {
-		ret = radeon_atrm_get_bios_chunk(rdev->bios,
-						 (i * ATRM_BIOS_PAGE),
-						 ATRM_BIOS_PAGE);
+		ret = radeon_atrm_call(atrm_handle,
+				       rdev->bios,
+				       (i * ATRM_BIOS_PAGE),
+				       ATRM_BIOS_PAGE);
 		if (ret < ATRM_BIOS_PAGE)
 			break;
 	}
@@ -130,6 +197,12 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev)
 	}
 	return true;
 }
+#else
+static inline bool radeon_atrm_get_bios(struct radeon_device *rdev)
+{
+	return false;
+}
+#endif
 
 static bool ni_read_disabled_bios(struct radeon_device *rdev)
 {
@@ -476,6 +549,61 @@ static bool radeon_read_disabled_bios(struct radeon_device *rdev)
 		return legacy_read_disabled_bios(rdev);
 }
 
+#ifdef CONFIG_ACPI
+static bool radeon_acpi_vfct_bios(struct radeon_device *rdev)
+{
+	bool ret = false;
+	struct acpi_table_header *hdr;
+	acpi_size tbl_size;
+	UEFI_ACPI_VFCT *vfct;
+	GOP_VBIOS_CONTENT *vbios;
+	VFCT_IMAGE_HEADER *vhdr;
+
+	if (!ACPI_SUCCESS(acpi_get_table_with_size("VFCT", 1, &hdr, &tbl_size)))
+		return false;
+	if (tbl_size < sizeof(UEFI_ACPI_VFCT)) {
+		DRM_ERROR("ACPI VFCT table present but broken (too short #1)\n");
+		goto out_unmap;
+	}
+
+	vfct = (UEFI_ACPI_VFCT *)hdr;
+	if (vfct->VBIOSImageOffset + sizeof(VFCT_IMAGE_HEADER) > tbl_size) {
+		DRM_ERROR("ACPI VFCT table present but broken (too short #2)\n");
+		goto out_unmap;
+	}
+
+	vbios = (GOP_VBIOS_CONTENT *)((char *)hdr + vfct->VBIOSImageOffset);
+	vhdr = &vbios->VbiosHeader;
+	DRM_INFO("ACPI VFCT contains a BIOS for %02x:%02x.%d %04x:%04x, size %d\n",
+			vhdr->PCIBus, vhdr->PCIDevice, vhdr->PCIFunction,
+			vhdr->VendorID, vhdr->DeviceID, vhdr->ImageLength);
+
+	if (vhdr->PCIBus != rdev->pdev->bus->number ||
+	    vhdr->PCIDevice != PCI_SLOT(rdev->pdev->devfn) ||
+	    vhdr->PCIFunction != PCI_FUNC(rdev->pdev->devfn) ||
+	    vhdr->VendorID != rdev->pdev->vendor ||
+	    vhdr->DeviceID != rdev->pdev->device) {
+		DRM_INFO("ACPI VFCT table is not for this card\n");
+		goto out_unmap;
+	};
+
+	if (vfct->VBIOSImageOffset + sizeof(VFCT_IMAGE_HEADER) + vhdr->ImageLength > tbl_size) {
+		DRM_ERROR("ACPI VFCT image truncated\n");
+		goto out_unmap;
+	}
+
+	rdev->bios = kmemdup(&vbios->VbiosContent, vhdr->ImageLength, GFP_KERNEL);
+	ret = !!rdev->bios;
+
+out_unmap:
+	return ret;
+}
+#else
+static inline bool radeon_acpi_vfct_bios(struct radeon_device *rdev)
+{
+	return false;
+}
+#endif
 
 bool radeon_get_bios(struct radeon_device *rdev)
 {
@@ -484,6 +612,8 @@ bool radeon_get_bios(struct radeon_device *rdev)
 
 	r = radeon_atrm_get_bios(rdev);
 	if (r == false)
+		r = radeon_acpi_vfct_bios(rdev);
+	if (r == false)
 		r = igp_read_bios_from_vram(rdev);
 	if (r == false)
 		r = radeon_read_bios(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 576f4f6919f2..f75247d42ffd 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -719,6 +719,34 @@ static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rde
 	return i2c;
 }
 
+static struct radeon_i2c_bus_rec radeon_combios_get_i2c_info_from_table(struct radeon_device *rdev)
+{
+	struct drm_device *dev = rdev->ddev;
+	struct radeon_i2c_bus_rec i2c;
+	u16 offset;
+	u8 id, blocks, clk, data;
+	int i;
+
+	i2c.valid = false;
+
+	offset = combios_get_table_offset(dev, COMBIOS_I2C_INFO_TABLE);
+	if (offset) {
+		blocks = RBIOS8(offset + 2);
+		for (i = 0; i < blocks; i++) {
+			id = RBIOS8(offset + 3 + (i * 5) + 0);
+			if (id == 136) {
+				clk = RBIOS8(offset + 3 + (i * 5) + 3);
+				data = RBIOS8(offset + 3 + (i * 5) + 4);
+				/* gpiopad */
+				i2c = combios_setup_i2c_bus(rdev, DDC_MONID,
+							    (1 << clk), (1 << data));
+				break;
+			}
+		}
+	}
+	return i2c;
+}
+
 void radeon_combios_i2c_init(struct radeon_device *rdev)
 {
 	struct drm_device *dev = rdev->ddev;
@@ -755,30 +783,14 @@ void radeon_combios_i2c_init(struct radeon_device *rdev)
 	} else if (rdev->family == CHIP_RS300 ||
 		   rdev->family == CHIP_RS400 ||
 		   rdev->family == CHIP_RS480) {
-		u16 offset;
-		u8 id, blocks, clk, data;
-		int i;
-
 		/* 0x68 */
 		i2c = combios_setup_i2c_bus(rdev, DDC_CRT2, 0, 0);
 		rdev->i2c_bus[3] = radeon_i2c_create(dev, &i2c, "MONID");
 
-		offset = combios_get_table_offset(dev, COMBIOS_I2C_INFO_TABLE);
-		if (offset) {
-			blocks = RBIOS8(offset + 2);
-			for (i = 0; i < blocks; i++) {
-				id = RBIOS8(offset + 3 + (i * 5) + 0);
-				if (id == 136) {
-					clk = RBIOS8(offset + 3 + (i * 5) + 3);
-					data = RBIOS8(offset + 3 + (i * 5) + 4);
-					/* gpiopad */
-					i2c = combios_setup_i2c_bus(rdev, DDC_MONID,
-								    (1 << clk), (1 << data));
-					rdev->i2c_bus[4] = radeon_i2c_create(dev, &i2c, "GPIOPAD_MASK");
-					break;
-				}
-			}
-		}
+		/* gpiopad */
+		i2c = radeon_combios_get_i2c_info_from_table(rdev);
+		if (i2c.valid)
+			rdev->i2c_bus[4] = radeon_i2c_create(dev, &i2c, "GPIOPAD_MASK");
 	} else if ((rdev->family == CHIP_R200) ||
 		   (rdev->family >= CHIP_R300)) {
 		/* 0x68 */
@@ -2321,7 +2333,10 @@ bool radeon_get_legacy_connector_info_from_bios(struct drm_device *dev)
 			connector = (tmp >> 12) & 0xf;
 
 			ddc_type = (tmp >> 8) & 0xf;
-			ddc_i2c = combios_setup_i2c_bus(rdev, ddc_type, 0, 0);
+			if (ddc_type == 5)
+				ddc_i2c = radeon_combios_get_i2c_info_from_table(rdev);
+			else
+				ddc_i2c = combios_setup_i2c_bus(rdev, ddc_type, 0, 0);
 
 			switch (connector) {
 			case CONNECTOR_PROPRIETARY_LEGACY:
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 2914c5761cfc..895e628b60f8 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -64,14 +64,33 @@ void radeon_connector_hotplug(struct drm_connector *connector)
 
 	/* just deal with DP (not eDP) here. */
 	if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
-		int saved_dpms = connector->dpms;
-
-		/* Only turn off the display it it's physically disconnected */
-		if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd))
-			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
-		else if (radeon_dp_needs_link_train(radeon_connector))
-			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
-		connector->dpms = saved_dpms;
+		struct radeon_connector_atom_dig *dig_connector =
+			radeon_connector->con_priv;
+
+		/* if existing sink type was not DP no need to retrain */
+		if (dig_connector->dp_sink_type != CONNECTOR_OBJECT_ID_DISPLAYPORT)
+			return;
+
+		/* first get sink type as it may be reset after (un)plug */
+		dig_connector->dp_sink_type = radeon_dp_getsinktype(radeon_connector);
+		/* don't do anything if sink is not display port, i.e.,
+		 * passive dp->(dvi|hdmi) adaptor
+		 */
+		if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) {
+			int saved_dpms = connector->dpms;
+			/* Only turn off the display if it's physically disconnected */
+			if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) {
+				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+			} else if (radeon_dp_needs_link_train(radeon_connector)) {
+				/* set it to OFF so that drm_helper_connector_dpms()
+				 * won't return immediately since the current state
+				 * is ON at this point.
+				 */
+				connector->dpms = DRM_MODE_DPMS_OFF;
+				drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
+			}
+			connector->dpms = saved_dpms;
+		}
 	}
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 142f89462aa4..b4a0db24f4dd 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -115,36 +115,20 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
 	return 0;
 }
 
-static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
+static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
 {
-	bool sync_to_ring[RADEON_NUM_RINGS] = { };
-	bool need_sync = false;
-	int i, r;
+	int i;
 
 	for (i = 0; i < p->nrelocs; i++) {
-		struct radeon_fence *fence;
+		struct radeon_fence *a, *b;
 
 		if (!p->relocs[i].robj || !p->relocs[i].robj->tbo.sync_obj)
 			continue;
 
-		fence = p->relocs[i].robj->tbo.sync_obj;
-		if (fence->ring != p->ring && !radeon_fence_signaled(fence)) {
-			sync_to_ring[fence->ring] = true;
-			need_sync = true;
-		}
-	}
-
-	if (!need_sync) {
-		return 0;
-	}
-
-	r = radeon_semaphore_create(p->rdev, &p->ib.semaphore);
-	if (r) {
-		return r;
+		a = p->relocs[i].robj->tbo.sync_obj;
+		b = p->ib.sync_to[a->ring];
+		p->ib.sync_to[a->ring] = radeon_fence_later(a, b);
 	}
-
-	return radeon_semaphore_sync_rings(p->rdev, p->ib.semaphore,
-					   sync_to_ring, p->ring);
 }
 
 /* XXX: note that this is called from the legacy UMS CS ioctl as well */
@@ -294,6 +278,30 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 	return 0;
 }
 
+static void radeon_bo_vm_fence_va(struct radeon_cs_parser *parser,
+				  struct radeon_fence *fence)
+{
+	struct radeon_fpriv *fpriv = parser->filp->driver_priv;
+	struct radeon_vm *vm = &fpriv->vm;
+	struct radeon_bo_list *lobj;
+
+	if (parser->chunk_ib_idx == -1) {
+		return;
+	}
+	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) {
+		return;
+	}
+
+	list_for_each_entry(lobj, &parser->validated, tv.head) {
+		struct radeon_bo_va *bo_va;
+		struct radeon_bo *rbo = lobj->bo;
+
+		bo_va = radeon_bo_va(rbo, vm);
+		radeon_fence_unref(&bo_va->fence);
+		bo_va->fence = radeon_fence_ref(fence);
+	}
+}
+
 /**
  * cs_parser_fini() - clean parser states
  * @parser:	parser structure holding parsing context.
@@ -306,11 +314,14 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
 {
 	unsigned i;
 
-	if (!error)
+	if (!error) {
+		/* fence all bo va before ttm_eu_fence_buffer_objects so bo are still reserved */
+		radeon_bo_vm_fence_va(parser, parser->ib.fence);
 		ttm_eu_fence_buffer_objects(&parser->validated,
 					    parser->ib.fence);
-	else
+	} else {
 		ttm_eu_backoff_reservation(&parser->validated);
+	}
 
 	if (parser->relocs != NULL) {
 		for (i = 0; i < parser->nrelocs; i++) {
@@ -368,16 +379,13 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
 		DRM_ERROR("Invalid command stream !\n");
 		return r;
 	}
-	r = radeon_cs_sync_rings(parser);
-	if (r) {
-		DRM_ERROR("Failed to synchronize rings !\n");
-	}
+	radeon_cs_sync_rings(parser);
 	parser->ib.vm_id = 0;
-	r = radeon_ib_schedule(rdev, &parser->ib);
+	r = radeon_ib_schedule(rdev, &parser->ib, NULL);
 	if (r) {
 		DRM_ERROR("Failed to schedule IB !\n");
 	}
-	return 0;
+	return r;
 }
 
 static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser,
@@ -407,7 +415,6 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 
 	if (parser->chunk_ib_idx == -1)
 		return 0;
-
 	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
 		return 0;
 
@@ -459,6 +466,7 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 		return r;
 	}
 
+	mutex_lock(&rdev->vm_manager.lock);
 	mutex_lock(&vm->mutex);
 	r = radeon_vm_bind(rdev, vm);
 	if (r) {
@@ -468,30 +476,26 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 	if (r) {
 		goto out;
 	}
-	r = radeon_cs_sync_rings(parser);
-	if (r) {
-		DRM_ERROR("Failed to synchronize rings !\n");
-	}
+	radeon_cs_sync_rings(parser);
+
+	parser->ib.vm_id = vm->id;
+	/* ib pool is bind at 0 in virtual address space,
+	 * so gpu_addr is the offset inside the pool bo
+	 */
+	parser->ib.gpu_addr = parser->ib.sa_bo->soffset;
 
 	if ((rdev->family >= CHIP_TAHITI) &&
 	    (parser->chunk_const_ib_idx != -1)) {
 		parser->const_ib.vm_id = vm->id;
-		/* ib pool is bind at 0 in virtual address space to gpu_addr is the
-		 * offset inside the pool bo
+		/* ib pool is bind at 0 in virtual address space,
+		 * so gpu_addr is the offset inside the pool bo
 		 */
 		parser->const_ib.gpu_addr = parser->const_ib.sa_bo->soffset;
-		r = radeon_ib_schedule(rdev, &parser->const_ib);
-		if (r)
-			goto out;
+		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib);
+	} else {
+		r = radeon_ib_schedule(rdev, &parser->ib, NULL);
 	}
 
-	parser->ib.vm_id = vm->id;
-	/* ib pool is bind at 0 in virtual address space to gpu_addr is the
-	 * offset inside the pool bo
-	 */
-	parser->ib.gpu_addr = parser->ib.sa_bo->soffset;
-	parser->ib.is_const_ib = false;
-	r = radeon_ib_schedule(rdev, &parser->ib);
 out:
 	if (!r) {
 		if (vm->fence) {
@@ -499,7 +503,8 @@ out:
 		}
 		vm->fence = radeon_fence_ref(parser->ib.fence);
 	}
-	mutex_unlock(&fpriv->vm.mutex);
+	mutex_unlock(&vm->mutex);
+	mutex_unlock(&rdev->vm_manager.lock);
 	return r;
 }
 
@@ -519,9 +524,9 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	struct radeon_cs_parser parser;
 	int r;
 
-	radeon_mutex_lock(&rdev->cs_mutex);
+	down_read(&rdev->exclusive_lock);
 	if (!rdev->accel_working) {
-		radeon_mutex_unlock(&rdev->cs_mutex);
+		up_read(&rdev->exclusive_lock);
 		return -EBUSY;
 	}
 	/* initialize parser */
@@ -534,8 +539,8 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	if (r) {
 		DRM_ERROR("Failed to initialize parser !\n");
 		radeon_cs_parser_fini(&parser, r);
+		up_read(&rdev->exclusive_lock);
 		r = radeon_cs_handle_lockup(rdev, r);
-		radeon_mutex_unlock(&rdev->cs_mutex);
 		return r;
 	}
 	r = radeon_cs_parser_relocs(&parser);
@@ -543,8 +548,8 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		if (r != -ERESTARTSYS)
 			DRM_ERROR("Failed to parse relocation %d!\n", r);
 		radeon_cs_parser_fini(&parser, r);
+		up_read(&rdev->exclusive_lock);
 		r = radeon_cs_handle_lockup(rdev, r);
-		radeon_mutex_unlock(&rdev->cs_mutex);
 		return r;
 	}
 	r = radeon_cs_ib_chunk(rdev, &parser);
@@ -557,8 +562,8 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	}
 out:
 	radeon_cs_parser_fini(&parser, r);
+	up_read(&rdev->exclusive_lock);
 	r = radeon_cs_handle_lockup(rdev, r);
-	radeon_mutex_unlock(&rdev->cs_mutex);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index 42acc6449dd6..8794744cdf1a 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -67,7 +67,8 @@ static void radeon_hide_cursor(struct drm_crtc *crtc)
 
 	if (ASIC_IS_DCE4(rdev)) {
 		WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset);
-		WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT));
+		WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) |
+		       EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2));
 	} else if (ASIC_IS_AVIVO(rdev)) {
 		WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset);
 		WREG32(RADEON_MM_DATA, (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT));
@@ -94,7 +95,8 @@ static void radeon_show_cursor(struct drm_crtc *crtc)
 	if (ASIC_IS_DCE4(rdev)) {
 		WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset);
 		WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_EN |
-		       EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT));
+		       EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) |
+		       EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2));
 	} else if (ASIC_IS_AVIVO(rdev)) {
 		WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset);
 		WREG32(RADEON_MM_DATA, AVIVO_D1CURSOR_EN |
@@ -262,8 +264,14 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc,
 				if (!(cursor_end & 0x7f))
 					w--;
 			}
-			if (w <= 0)
+			if (w <= 0) {
 				w = 1;
+				cursor_end = x - xorigin + w;
+				if (!(cursor_end & 0x7f)) {
+					x--;
+					WARN_ON_ONCE(x < 0);
+				}
+			}
 		}
 	}
 
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 066c98b888a5..d2e243867ac6 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -96,8 +96,12 @@ static const char radeon_family_name[][16] = {
 	"LAST",
 };
 
-/*
- * Clear GPU surface registers.
+/**
+ * radeon_surface_init - Clear GPU surface registers.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Clear GPU surface registers (r1xx-r5xx).
  */
 void radeon_surface_init(struct radeon_device *rdev)
 {
@@ -119,6 +123,13 @@ void radeon_surface_init(struct radeon_device *rdev)
 /*
  * GPU scratch registers helpers function.
  */
+/**
+ * radeon_scratch_init - Init scratch register driver information.
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Init CP scratch register driver information (r1xx-r5xx)
+ */
 void radeon_scratch_init(struct radeon_device *rdev)
 {
 	int i;
@@ -136,6 +147,15 @@ void radeon_scratch_init(struct radeon_device *rdev)
 	}
 }
 
+/**
+ * radeon_scratch_get - Allocate a scratch register
+ *
+ * @rdev: radeon_device pointer
+ * @reg: scratch register mmio offset
+ *
+ * Allocate a CP scratch register for use by the driver (all asics).
+ * Returns 0 on success or -EINVAL on failure.
+ */
 int radeon_scratch_get(struct radeon_device *rdev, uint32_t *reg)
 {
 	int i;
@@ -150,6 +170,14 @@ int radeon_scratch_get(struct radeon_device *rdev, uint32_t *reg)
 	return -EINVAL;
 }
 
+/**
+ * radeon_scratch_free - Free a scratch register
+ *
+ * @rdev: radeon_device pointer
+ * @reg: scratch register mmio offset
+ *
+ * Free a CP scratch register allocated for use by the driver (all asics)
+ */
 void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg)
 {
 	int i;
@@ -162,6 +190,20 @@ void radeon_scratch_free(struct radeon_device *rdev, uint32_t reg)
 	}
 }
 
+/*
+ * radeon_wb_*()
+ * Writeback is the the method by which the the GPU updates special pages
+ * in memory with the status of certain GPU events (fences, ring pointers,
+ * etc.).
+ */
+
+/**
+ * radeon_wb_disable - Disable Writeback
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Disables Writeback (all asics).  Used for suspend.
+ */
 void radeon_wb_disable(struct radeon_device *rdev)
 {
 	int r;
@@ -177,6 +219,14 @@ void radeon_wb_disable(struct radeon_device *rdev)
 	rdev->wb.enabled = false;
 }
 
+/**
+ * radeon_wb_fini - Disable Writeback and free memory
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Disables Writeback and frees the Writeback memory (all asics).
+ * Used at driver shutdown.
+ */
 void radeon_wb_fini(struct radeon_device *rdev)
 {
 	radeon_wb_disable(rdev);
@@ -187,6 +237,15 @@ void radeon_wb_fini(struct radeon_device *rdev)
 	}
 }
 
+/**
+ * radeon_wb_init- Init Writeback driver info and allocate memory
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Disables Writeback and frees the Writeback memory (all asics).
+ * Used at driver startup.
+ * Returns 0 on success or an -error on failure.
+ */
 int radeon_wb_init(struct radeon_device *rdev)
 {
 	int r;
@@ -355,6 +414,15 @@ void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
 /*
  * GPU helpers function.
  */
+/**
+ * radeon_card_posted - check if the hw has already been initialized
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Check if the asic has been initialized (all asics).
+ * Used at driver startup.
+ * Returns true if initialized or false if not.
+ */
 bool radeon_card_posted(struct radeon_device *rdev)
 {
 	uint32_t reg;
@@ -404,6 +472,14 @@ bool radeon_card_posted(struct radeon_device *rdev)
 
 }
 
+/**
+ * radeon_update_bandwidth_info - update display bandwidth params
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Used when sclk/mclk are switched or display modes are set.
+ * params are used to calculate display watermarks (all asics)
+ */
 void radeon_update_bandwidth_info(struct radeon_device *rdev)
 {
 	fixed20_12 a;
@@ -424,6 +500,15 @@ void radeon_update_bandwidth_info(struct radeon_device *rdev)
 	}
 }
 
+/**
+ * radeon_boot_test_post_card - check and possibly initialize the hw
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Check if the asic is initialized and if not, attempt to initialize
+ * it (all asics).
+ * Returns true if initialized or false if not.
+ */
 bool radeon_boot_test_post_card(struct radeon_device *rdev)
 {
 	if (radeon_card_posted(rdev))
@@ -442,6 +527,16 @@ bool radeon_boot_test_post_card(struct radeon_device *rdev)
 	}
 }
 
+/**
+ * radeon_dummy_page_init - init dummy page used by the driver
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Allocate the dummy page used by the driver (all asics).
+ * This dummy page is used by the driver as a filler for gart entries
+ * when pages are taken out of the GART
+ * Returns 0 on sucess, -ENOMEM on failure.
+ */
 int radeon_dummy_page_init(struct radeon_device *rdev)
 {
 	if (rdev->dummy_page.page)
@@ -460,6 +555,13 @@ int radeon_dummy_page_init(struct radeon_device *rdev)
 	return 0;
 }
 
+/**
+ * radeon_dummy_page_fini - free dummy page used by the driver
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Frees the dummy page used by the driver (all asics).
+ */
 void radeon_dummy_page_fini(struct radeon_device *rdev)
 {
 	if (rdev->dummy_page.page == NULL)
@@ -472,6 +574,23 @@ void radeon_dummy_page_fini(struct radeon_device *rdev)
 
 
 /* ATOM accessor methods */
+/*
+ * ATOM is an interpreted byte code stored in tables in the vbios.  The
+ * driver registers callbacks to access registers and the interpreter
+ * in the driver parses the tables and executes then to program specific
+ * actions (set display modes, asic init, etc.).  See radeon_atombios.c,
+ * atombios.h, and atom.c
+ */
+
+/**
+ * cail_pll_read - read PLL register
+ *
+ * @info: atom card_info pointer
+ * @reg: PLL register offset
+ *
+ * Provides a PLL register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the PLL register.
+ */
 static uint32_t cail_pll_read(struct card_info *info, uint32_t reg)
 {
 	struct radeon_device *rdev = info->dev->dev_private;
@@ -481,6 +600,15 @@ static uint32_t cail_pll_read(struct card_info *info, uint32_t reg)
 	return r;
 }
 
+/**
+ * cail_pll_write - write PLL register
+ *
+ * @info: atom card_info pointer
+ * @reg: PLL register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a PLL register accessor for the atom interpreter (r4xx+).
+ */
 static void cail_pll_write(struct card_info *info, uint32_t reg, uint32_t val)
 {
 	struct radeon_device *rdev = info->dev->dev_private;
@@ -488,6 +616,15 @@ static void cail_pll_write(struct card_info *info, uint32_t reg, uint32_t val)
 	rdev->pll_wreg(rdev, reg, val);
 }
 
+/**
+ * cail_mc_read - read MC (Memory Controller) register
+ *
+ * @info: atom card_info pointer
+ * @reg: MC register offset
+ *
+ * Provides an MC register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the MC register.
+ */
 static uint32_t cail_mc_read(struct card_info *info, uint32_t reg)
 {
 	struct radeon_device *rdev = info->dev->dev_private;
@@ -497,6 +634,15 @@ static uint32_t cail_mc_read(struct card_info *info, uint32_t reg)
 	return r;
 }
 
+/**
+ * cail_mc_write - write MC (Memory Controller) register
+ *
+ * @info: atom card_info pointer
+ * @reg: MC register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a MC register accessor for the atom interpreter (r4xx+).
+ */
 static void cail_mc_write(struct card_info *info, uint32_t reg, uint32_t val)
 {
 	struct radeon_device *rdev = info->dev->dev_private;
@@ -504,6 +650,15 @@ static void cail_mc_write(struct card_info *info, uint32_t reg, uint32_t val)
 	rdev->mc_wreg(rdev, reg, val);
 }
 
+/**
+ * cail_reg_write - write MMIO register
+ *
+ * @info: atom card_info pointer
+ * @reg: MMIO register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a MMIO register accessor for the atom interpreter (r4xx+).
+ */
 static void cail_reg_write(struct card_info *info, uint32_t reg, uint32_t val)
 {
 	struct radeon_device *rdev = info->dev->dev_private;
@@ -511,6 +666,15 @@ static void cail_reg_write(struct card_info *info, uint32_t reg, uint32_t val)
 	WREG32(reg*4, val);
 }
 
+/**
+ * cail_reg_read - read MMIO register
+ *
+ * @info: atom card_info pointer
+ * @reg: MMIO register offset
+ *
+ * Provides an MMIO register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the MMIO register.
+ */
 static uint32_t cail_reg_read(struct card_info *info, uint32_t reg)
 {
 	struct radeon_device *rdev = info->dev->dev_private;
@@ -520,6 +684,15 @@ static uint32_t cail_reg_read(struct card_info *info, uint32_t reg)
 	return r;
 }
 
+/**
+ * cail_ioreg_write - write IO register
+ *
+ * @info: atom card_info pointer
+ * @reg: IO register offset
+ * @val: value to write to the pll register
+ *
+ * Provides a IO register accessor for the atom interpreter (r4xx+).
+ */
 static void cail_ioreg_write(struct card_info *info, uint32_t reg, uint32_t val)
 {
 	struct radeon_device *rdev = info->dev->dev_private;
@@ -527,6 +700,15 @@ static void cail_ioreg_write(struct card_info *info, uint32_t reg, uint32_t val)
 	WREG32_IO(reg*4, val);
 }
 
+/**
+ * cail_ioreg_read - read IO register
+ *
+ * @info: atom card_info pointer
+ * @reg: IO register offset
+ *
+ * Provides an IO register accessor for the atom interpreter (r4xx+).
+ * Returns the value of the IO register.
+ */
 static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg)
 {
 	struct radeon_device *rdev = info->dev->dev_private;
@@ -536,6 +718,16 @@ static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg)
 	return r;
 }
 
+/**
+ * radeon_atombios_init - init the driver info and callbacks for atombios
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Initializes the driver info and register access callbacks for the
+ * ATOM interpreter (r4xx+).
+ * Returns 0 on sucess, -ENOMEM on failure.
+ * Called at driver startup.
+ */
 int radeon_atombios_init(struct radeon_device *rdev)
 {
 	struct card_info *atom_card_info =
@@ -569,6 +761,15 @@ int radeon_atombios_init(struct radeon_device *rdev)
 	return 0;
 }
 
+/**
+ * radeon_atombios_fini - free the driver info and callbacks for atombios
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Frees the driver info and register access callbacks for the ATOM
+ * interpreter (r4xx+).
+ * Called at driver shutdown.
+ */
 void radeon_atombios_fini(struct radeon_device *rdev)
 {
 	if (rdev->mode_info.atom_context) {
@@ -578,17 +779,50 @@ void radeon_atombios_fini(struct radeon_device *rdev)
 	kfree(rdev->mode_info.atom_card_info);
 }
 
+/* COMBIOS */
+/*
+ * COMBIOS is the bios format prior to ATOM. It provides
+ * command tables similar to ATOM, but doesn't have a unified
+ * parser.  See radeon_combios.c
+ */
+
+/**
+ * radeon_combios_init - init the driver info for combios
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Initializes the driver info for combios (r1xx-r3xx).
+ * Returns 0 on sucess.
+ * Called at driver startup.
+ */
 int radeon_combios_init(struct radeon_device *rdev)
 {
 	radeon_combios_initialize_bios_scratch_regs(rdev->ddev);
 	return 0;
 }
 
+/**
+ * radeon_combios_fini - free the driver info for combios
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Frees the driver info for combios (r1xx-r3xx).
+ * Called at driver shutdown.
+ */
 void radeon_combios_fini(struct radeon_device *rdev)
 {
 }
 
-/* if we get transitioned to only one device, tak VGA back */
+/* if we get transitioned to only one device, take VGA back */
+/**
+ * radeon_vga_set_decode - enable/disable vga decode
+ *
+ * @cookie: radeon_device pointer
+ * @state: enable/disable vga decode
+ *
+ * Enable/disable vga decode (all asics).
+ * Returns VGA resource flags.
+ */
 static unsigned int radeon_vga_set_decode(void *cookie, bool state)
 {
 	struct radeon_device *rdev = cookie;
@@ -600,6 +834,14 @@ static unsigned int radeon_vga_set_decode(void *cookie, bool state)
 		return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
 }
 
+/**
+ * radeon_check_arguments - validate module params
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Validates certain module parameters and updates
+ * the associated values used by the driver (all asics).
+ */
 void radeon_check_arguments(struct radeon_device *rdev)
 {
 	/* vramlimit must be a power of two */
@@ -666,6 +908,15 @@ void radeon_check_arguments(struct radeon_device *rdev)
 	}
 }
 
+/**
+ * radeon_switcheroo_set_state - set switcheroo state
+ *
+ * @pdev: pci dev pointer
+ * @state: vga switcheroo state
+ *
+ * Callback for the switcheroo driver.  Suspends or resumes the
+ * the asics before or after it is powered up using ACPI methods.
+ */
 static void radeon_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
@@ -686,6 +937,15 @@ static void radeon_switcheroo_set_state(struct pci_dev *pdev, enum vga_switchero
 	}
 }
 
+/**
+ * radeon_switcheroo_can_switch - see if switcheroo state can change
+ *
+ * @pdev: pci dev pointer
+ *
+ * Callback for the switcheroo driver.  Check of the switcheroo
+ * state can be changed.
+ * Returns true if the state can be changed, false if not.
+ */
 static bool radeon_switcheroo_can_switch(struct pci_dev *pdev)
 {
 	struct drm_device *dev = pci_get_drvdata(pdev);
@@ -703,6 +963,18 @@ static const struct vga_switcheroo_client_ops radeon_switcheroo_ops = {
 	.can_switch = radeon_switcheroo_can_switch,
 };
 
+/**
+ * radeon_device_init - initialize the driver
+ *
+ * @rdev: radeon_device pointer
+ * @pdev: drm dev pointer
+ * @pdev: pci dev pointer
+ * @flags: driver flags
+ *
+ * Initializes the driver info and hw (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver startup.
+ */
 int radeon_device_init(struct radeon_device *rdev,
 		       struct drm_device *ddev,
 		       struct pci_dev *pdev,
@@ -721,6 +993,10 @@ int radeon_device_init(struct radeon_device *rdev,
 	rdev->usec_timeout = RADEON_MAX_USEC_TIMEOUT;
 	rdev->mc.gtt_size = radeon_gart_size * 1024 * 1024;
 	rdev->accel_working = false;
+	/* set up ring ids */
+	for (i = 0; i < RADEON_NUM_RINGS; i++) {
+		rdev->ring[i].idx = i;
+	}
 
 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X).\n",
 		radeon_family_name[rdev->family], pdev->vendor, pdev->device,
@@ -728,20 +1004,21 @@ int radeon_device_init(struct radeon_device *rdev,
 
 	/* mutex initialization are all done here so we
 	 * can recall function without having locking issues */
-	radeon_mutex_init(&rdev->cs_mutex);
 	mutex_init(&rdev->ring_lock);
 	mutex_init(&rdev->dc_hw_i2c_mutex);
-	if (rdev->family >= CHIP_R600)
-		spin_lock_init(&rdev->ih.lock);
+	atomic_set(&rdev->ih.lock, 0);
 	mutex_init(&rdev->gem.mutex);
 	mutex_init(&rdev->pm.mutex);
-	mutex_init(&rdev->vram_mutex);
+	mutex_init(&rdev->gpu_clock_mutex);
+	init_rwsem(&rdev->pm.mclk_lock);
+	init_rwsem(&rdev->exclusive_lock);
 	init_waitqueue_head(&rdev->irq.vblank_queue);
 	init_waitqueue_head(&rdev->irq.idle_queue);
 	r = radeon_gem_init(rdev);
 	if (r)
 		return r;
 	/* initialize vm here */
+	mutex_init(&rdev->vm_manager.lock);
 	rdev->vm_manager.use_bitmap = 1;
 	rdev->vm_manager.max_pfn = 1 << 20;
 	INIT_LIST_HEAD(&rdev->vm_manager.lru_vm);
@@ -822,6 +1099,10 @@ int radeon_device_init(struct radeon_device *rdev,
 	if (r)
 		return r;
 
+	r = radeon_ib_ring_tests(rdev);
+	if (r)
+		DRM_ERROR("ib ring test failed (%d).\n", r);
+
 	if (rdev->flags & RADEON_IS_AGP && !rdev->accel_working) {
 		/* Acceleration not working on AGP card try again
 		 * with fallback to PCI or PCIE GART
@@ -847,6 +1128,14 @@ int radeon_device_init(struct radeon_device *rdev,
 
 static void radeon_debugfs_remove_files(struct radeon_device *rdev);
 
+/**
+ * radeon_device_fini - tear down the driver
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tear down the driver info (all asics).
+ * Called at driver shutdown.
+ */
 void radeon_device_fini(struct radeon_device *rdev)
 {
 	DRM_INFO("radeon: finishing device.\n");
@@ -868,6 +1157,16 @@ void radeon_device_fini(struct radeon_device *rdev)
 /*
  * Suspend & resume.
  */
+/**
+ * radeon_suspend_kms - initiate device suspend
+ *
+ * @pdev: drm dev pointer
+ * @state: suspend state
+ *
+ * Puts the hw in the suspend state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver suspend.
+ */
 int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
 {
 	struct radeon_device *rdev;
@@ -942,10 +1241,20 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state)
 	return 0;
 }
 
+/**
+ * radeon_resume_kms - initiate device resume
+ *
+ * @pdev: drm dev pointer
+ *
+ * Bring the hw back to operating state (all asics).
+ * Returns 0 for success or an error on failure.
+ * Called at driver resume.
+ */
 int radeon_resume_kms(struct drm_device *dev)
 {
 	struct drm_connector *connector;
 	struct radeon_device *rdev = dev->dev_private;
+	int r;
 
 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 		return 0;
@@ -960,6 +1269,11 @@ int radeon_resume_kms(struct drm_device *dev)
 	/* resume AGP if in use */
 	radeon_agp_resume(rdev);
 	radeon_resume(rdev);
+
+	r = radeon_ib_ring_tests(rdev);
+	if (r)
+		DRM_ERROR("ib ring test failed (%d).\n", r);
+
 	radeon_pm_resume(rdev);
 	radeon_restore_bios_scratch_regs(rdev);
 
@@ -984,30 +1298,77 @@ int radeon_resume_kms(struct drm_device *dev)
 	return 0;
 }
 
+/**
+ * radeon_gpu_reset - reset the asic
+ *
+ * @rdev: radeon device pointer
+ *
+ * Attempt the reset the GPU if it has hung (all asics).
+ * Returns 0 for success or an error on failure.
+ */
 int radeon_gpu_reset(struct radeon_device *rdev)
 {
-	int r;
+	unsigned ring_sizes[RADEON_NUM_RINGS];
+	uint32_t *ring_data[RADEON_NUM_RINGS];
+
+	bool saved = false;
+
+	int i, r;
 	int resched;
 
+	down_write(&rdev->exclusive_lock);
 	radeon_save_bios_scratch_regs(rdev);
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
 	radeon_suspend(rdev);
 
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		ring_sizes[i] = radeon_ring_backup(rdev, &rdev->ring[i],
+						   &ring_data[i]);
+		if (ring_sizes[i]) {
+			saved = true;
+			dev_info(rdev->dev, "Saved %d dwords of commands "
+				 "on ring %d.\n", ring_sizes[i], i);
+		}
+	}
+
+retry:
 	r = radeon_asic_reset(rdev);
 	if (!r) {
-		dev_info(rdev->dev, "GPU reset succeed\n");
+		dev_info(rdev->dev, "GPU reset succeeded, trying to resume\n");
 		radeon_resume(rdev);
-		radeon_restore_bios_scratch_regs(rdev);
-		drm_helper_resume_force_mode(rdev->ddev);
-		ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
 	}
 
+	radeon_restore_bios_scratch_regs(rdev);
+	drm_helper_resume_force_mode(rdev->ddev);
+
+	if (!r) {
+		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+			radeon_ring_restore(rdev, &rdev->ring[i],
+					    ring_sizes[i], ring_data[i]);
+		}
+
+		r = radeon_ib_ring_tests(rdev);
+		if (r) {
+			dev_err(rdev->dev, "ib ring test failed (%d).\n", r);
+			if (saved) {
+				radeon_suspend(rdev);
+				goto retry;
+			}
+		}
+	} else {
+		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+			kfree(ring_data[i]);
+		}
+	}
+
+	ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
 	if (r) {
 		/* bad news, how to tell it to userspace ? */
 		dev_info(rdev->dev, "GPU reset failed\n");
 	}
 
+	up_write(&rdev->exclusive_lock);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 64a008d14493..7ddef8f30d0e 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -1401,7 +1401,7 @@ void radeon_modeset_fini(struct radeon_device *rdev)
 	radeon_i2c_fini(rdev);
 }
 
-static bool is_hdtv_mode(struct drm_display_mode *mode)
+static bool is_hdtv_mode(const struct drm_display_mode *mode)
 {
 	/* try and guess if this is a tv or a monitor */
 	if ((mode->vdisplay == 480 && mode->hdisplay == 720) || /* 480p */
@@ -1414,7 +1414,7 @@ static bool is_hdtv_mode(struct drm_display_mode *mode)
 }
 
 bool radeon_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
-				struct drm_display_mode *mode,
+				const struct drm_display_mode *mode,
 				struct drm_display_mode *adjusted_mode)
 {
 	struct drm_device *dev = crtc->dev;
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 2c4d53fd20c5..27d22d709c90 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -59,9 +59,13 @@
  *   2.15.0 - add max_pipes query
  *   2.16.0 - fix evergreen 2D tiled surface calculation
  *   2.17.0 - add STRMOUT_BASE_UPDATE for r7xx
+ *   2.18.0 - r600-eg: allow "invalid" DB formats
+ *   2.19.0 - r600-eg: MSAA textures
+ *   2.20.0 - r600-si: RADEON_INFO_TIMESTAMP query
+ *   2.21.0 - r600-r700: FMASK and CMASK
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	17
+#define KMS_DRIVER_MINOR	21
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
@@ -133,7 +137,7 @@ int radeon_tv = 1;
 int radeon_audio = 0;
 int radeon_disp_priority = 0;
 int radeon_hw_i2c = 0;
-int radeon_pcie_gen2 = 0;
+int radeon_pcie_gen2 = -1;
 int radeon_msi = -1;
 int radeon_lockup_timeout = 10000;
 
@@ -179,7 +183,7 @@ module_param_named(disp_priority, radeon_disp_priority, int, 0444);
 MODULE_PARM_DESC(hw_i2c, "hw i2c engine enable (0 = disable)");
 module_param_named(hw_i2c, radeon_hw_i2c, int, 0444);
 
-MODULE_PARM_DESC(pcie_gen2, "PCIE Gen2 mode (1 = enable)");
+MODULE_PARM_DESC(pcie_gen2, "PCIE Gen2 mode (-1 = auto, 0 = disable, 1 = enable)");
 module_param_named(pcie_gen2, radeon_pcie_gen2, int, 0444);
 
 MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
@@ -262,7 +266,6 @@ static struct drm_driver driver_old = {
 	.irq_postinstall = radeon_driver_irq_postinstall,
 	.irq_uninstall = radeon_driver_irq_uninstall,
 	.irq_handler = radeon_driver_irq_handler,
-	.reclaim_buffers = drm_core_reclaim_buffers,
 	.ioctls = radeon_ioctls,
 	.dma_ioctl = radeon_cp_buffers,
 	.fops = &radeon_driver_old_fops,
@@ -365,7 +368,6 @@ static struct drm_driver kms_driver = {
 	.irq_postinstall = radeon_driver_irq_postinstall_kms,
 	.irq_uninstall = radeon_driver_irq_uninstall_kms,
 	.irq_handler = radeon_driver_irq_handler_kms,
-	.reclaim_buffers = drm_core_reclaim_buffers,
 	.ioctls = radeon_ioctls_kms,
 	.gem_init_object = radeon_gem_object_init,
 	.gem_free_object = radeon_gem_object_free,
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 11f5f402d22c..7b737b9339ad 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -40,39 +40,95 @@
 #include "radeon.h"
 #include "radeon_trace.h"
 
+/*
+ * Fences
+ * Fences mark an event in the GPUs pipeline and are used
+ * for GPU/CPU synchronization.  When the fence is written,
+ * it is expected that all buffers associated with that fence
+ * are no longer in use by the associated ring on the GPU and
+ * that the the relevant GPU caches have been flushed.  Whether
+ * we use a scratch register or memory location depends on the asic
+ * and whether writeback is enabled.
+ */
+
+/**
+ * radeon_fence_write - write a fence value
+ *
+ * @rdev: radeon_device pointer
+ * @seq: sequence number to write
+ * @ring: ring index the fence is associated with
+ *
+ * Writes a fence value to memory or a scratch register (all asics).
+ */
 static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
 {
-	if (rdev->wb.enabled) {
-		*rdev->fence_drv[ring].cpu_addr = cpu_to_le32(seq);
+	struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
+	if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
+		*drv->cpu_addr = cpu_to_le32(seq);
 	} else {
-		WREG32(rdev->fence_drv[ring].scratch_reg, seq);
+		WREG32(drv->scratch_reg, seq);
 	}
 }
 
+/**
+ * radeon_fence_read - read a fence value
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring index the fence is associated with
+ *
+ * Reads a fence value from memory or a scratch register (all asics).
+ * Returns the value of the fence read from memory or register.
+ */
 static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
 {
+	struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
 	u32 seq = 0;
 
-	if (rdev->wb.enabled) {
-		seq = le32_to_cpu(*rdev->fence_drv[ring].cpu_addr);
+	if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
+		seq = le32_to_cpu(*drv->cpu_addr);
 	} else {
-		seq = RREG32(rdev->fence_drv[ring].scratch_reg);
+		seq = RREG32(drv->scratch_reg);
 	}
 	return seq;
 }
 
-int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
+/**
+ * radeon_fence_emit - emit a fence on the requested ring
+ *
+ * @rdev: radeon_device pointer
+ * @fence: radeon fence object
+ * @ring: ring index the fence is associated with
+ *
+ * Emits a fence command on the requested ring (all asics).
+ * Returns 0 on success, -ENOMEM on failure.
+ */
+int radeon_fence_emit(struct radeon_device *rdev,
+		      struct radeon_fence **fence,
+		      int ring)
 {
 	/* we are protected by the ring emission mutex */
-	if (fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {
-		return 0;
+	*fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
+	if ((*fence) == NULL) {
+		return -ENOMEM;
 	}
-	fence->seq = ++rdev->fence_drv[fence->ring].seq;
-	radeon_fence_ring_emit(rdev, fence->ring, fence);
-	trace_radeon_fence_emit(rdev->ddev, fence->seq);
+	kref_init(&((*fence)->kref));
+	(*fence)->rdev = rdev;
+	(*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring];
+	(*fence)->ring = ring;
+	radeon_fence_ring_emit(rdev, ring, *fence);
+	trace_radeon_fence_emit(rdev->ddev, (*fence)->seq);
 	return 0;
 }
 
+/**
+ * radeon_fence_process - process a fence
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring index the fence is associated with
+ *
+ * Checks the current fence value and wakes the fence queue
+ * if the sequence number has increased (all asics).
+ */
 void radeon_fence_process(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq, last_seq;
@@ -133,30 +189,35 @@ void radeon_fence_process(struct radeon_device *rdev, int ring)
 	}
 }
 
+/**
+ * radeon_fence_destroy - destroy a fence
+ *
+ * @kref: fence kref
+ *
+ * Frees the fence object (all asics).
+ */
 static void radeon_fence_destroy(struct kref *kref)
 {
 	struct radeon_fence *fence;
 
 	fence = container_of(kref, struct radeon_fence, kref);
-	fence->seq = RADEON_FENCE_NOTEMITED_SEQ;
 	kfree(fence);
 }
 
-int radeon_fence_create(struct radeon_device *rdev,
-			struct radeon_fence **fence,
-			int ring)
-{
-	*fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
-	if ((*fence) == NULL) {
-		return -ENOMEM;
-	}
-	kref_init(&((*fence)->kref));
-	(*fence)->rdev = rdev;
-	(*fence)->seq = RADEON_FENCE_NOTEMITED_SEQ;
-	(*fence)->ring = ring;
-	return 0;
-}
-
+/**
+ * radeon_fence_seq_signaled - check if a fence sequeuce number has signaled
+ *
+ * @rdev: radeon device pointer
+ * @seq: sequence number
+ * @ring: ring index the fence is associated with
+ *
+ * Check if the last singled fence sequnce number is >= the requested
+ * sequence number (all asics).
+ * Returns true if the fence has signaled (current fence value
+ * is >= requested value) or false if it has not (current fence
+ * value is < the requested value.  Helper function for
+ * radeon_fence_signaled().
+ */
 static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
 				      u64 seq, unsigned ring)
 {
@@ -171,15 +232,19 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
 	return false;
 }
 
+/**
+ * radeon_fence_signaled - check if a fence has signaled
+ *
+ * @fence: radeon fence object
+ *
+ * Check if the requested fence has signaled (all asics).
+ * Returns true if the fence has signaled or false if it has not.
+ */
 bool radeon_fence_signaled(struct radeon_fence *fence)
 {
 	if (!fence) {
 		return true;
 	}
-	if (fence->seq == RADEON_FENCE_NOTEMITED_SEQ) {
-		WARN(1, "Querying an unemitted fence : %p !\n", fence);
-		return true;
-	}
 	if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) {
 		return true;
 	}
@@ -190,6 +255,24 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
 	return false;
 }
 
+/**
+ * radeon_fence_wait_seq - wait for a specific sequence number
+ *
+ * @rdev: radeon device pointer
+ * @target_seq: sequence number we want to wait for
+ * @ring: ring index the fence is associated with
+ * @intr: use interruptable sleep
+ * @lock_ring: whether the ring should be locked or not
+ *
+ * Wait for the requested sequence number to be written (all asics).
+ * @intr selects whether to use interruptable (true) or non-interruptable
+ * (false) sleep when waiting for the sequence number.  Helper function
+ * for radeon_fence_wait(), et al.
+ * Returns 0 if the sequence number has passed, error for all other cases.
+ * -EDEADLK is returned when a GPU lockup has been detected and the ring is
+ * marked as not ready so no further jobs get scheduled until a successful
+ * reset.
+ */
 static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq,
 				 unsigned ring, bool intr, bool lock_ring)
 {
@@ -285,6 +368,17 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 target_seq,
 	return 0;
 }
 
+/**
+ * radeon_fence_wait - wait for a fence to signal
+ *
+ * @fence: radeon fence object
+ * @intr: use interruptable sleep
+ *
+ * Wait for the requested fence to signal (all asics).
+ * @intr selects whether to use interruptable (true) or non-interruptable
+ * (false) sleep when waiting for the fence.
+ * Returns 0 if the fence has passed, error for all other cases.
+ */
 int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 {
 	int r;
@@ -315,6 +409,20 @@ bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
 	return false;
 }
 
+/**
+ * radeon_fence_wait_any_seq - wait for a sequence number on any ring
+ *
+ * @rdev: radeon device pointer
+ * @target_seq: sequence number(s) we want to wait for
+ * @intr: use interruptable sleep
+ *
+ * Wait for the requested sequence number(s) to be written by any ring
+ * (all asics).  Sequnce number array is indexed by ring id.
+ * @intr selects whether to use interruptable (true) or non-interruptable
+ * (false) sleep when waiting for the sequence number.  Helper function
+ * for radeon_fence_wait_any(), et al.
+ * Returns 0 if the sequence number has passed, error for all other cases.
+ */
 static int radeon_fence_wait_any_seq(struct radeon_device *rdev,
 				     u64 *target_seq, bool intr)
 {
@@ -343,7 +451,7 @@ static int radeon_fence_wait_any_seq(struct radeon_device *rdev,
 
 	/* nothing to wait for ? */
 	if (ring == RADEON_NUM_RINGS) {
-		return 0;
+		return -ENOENT;
 	}
 
 	while (!radeon_fence_any_seq_signaled(rdev, target_seq)) {
@@ -424,6 +532,19 @@ static int radeon_fence_wait_any_seq(struct radeon_device *rdev,
 	return 0;
 }
 
+/**
+ * radeon_fence_wait_any - wait for a fence to signal on any ring
+ *
+ * @rdev: radeon device pointer
+ * @fences: radeon fence object(s)
+ * @intr: use interruptable sleep
+ *
+ * Wait for any requested fence to signal (all asics).  Fence
+ * array is indexed by ring id.  @intr selects whether to use
+ * interruptable (true) or non-interruptable (false) sleep when
+ * waiting for the fences. Used by the suballocator.
+ * Returns 0 if any fence has passed, error for all other cases.
+ */
 int radeon_fence_wait_any(struct radeon_device *rdev,
 			  struct radeon_fence **fences,
 			  bool intr)
@@ -444,9 +565,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
 			return 0;
 		}
 
-		if (fences[i]->seq < RADEON_FENCE_NOTEMITED_SEQ) {
-			seq[i] = fences[i]->seq;
-		}
+		seq[i] = fences[i]->seq;
 	}
 
 	r = radeon_fence_wait_any_seq(rdev, seq, intr);
@@ -456,16 +575,22 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
 	return 0;
 }
 
+/**
+ * radeon_fence_wait_next_locked - wait for the next fence to signal
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring index the fence is associated with
+ *
+ * Wait for the next fence on the requested ring to signal (all asics).
+ * Returns 0 if the next fence has passed, error for all other cases.
+ * Caller must hold ring lock.
+ */
 int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq;
 
-	/* We are not protected by ring lock when reading current seq but
-	 * it's ok as worst case is we return to early while we could have
-	 * wait.
-	 */
 	seq = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
-	if (seq >= rdev->fence_drv[ring].seq) {
+	if (seq >= rdev->fence_drv[ring].sync_seq[ring]) {
 		/* nothing to wait for, last_seq is
 		   already the last emited fence */
 		return -ENOENT;
@@ -473,23 +598,59 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
 	return radeon_fence_wait_seq(rdev, seq, ring, false, false);
 }
 
-int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
+/**
+ * radeon_fence_wait_empty_locked - wait for all fences to signal
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring index the fence is associated with
+ *
+ * Wait for all fences on the requested ring to signal (all asics).
+ * Returns 0 if the fences have passed, error for all other cases.
+ * Caller must hold ring lock.
+ */
+void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
 {
-	/* We are not protected by ring lock when reading current seq
-	 * but it's ok as wait empty is call from place where no more
-	 * activity can be scheduled so there won't be concurrent access
-	 * to seq value.
-	 */
-	return radeon_fence_wait_seq(rdev, rdev->fence_drv[ring].seq,
-				     ring, false, false);
+	uint64_t seq = rdev->fence_drv[ring].sync_seq[ring];
+
+	while(1) {
+		int r;
+		r = radeon_fence_wait_seq(rdev, seq, ring, false, false);
+		if (r == -EDEADLK) {
+			mutex_unlock(&rdev->ring_lock);
+			r = radeon_gpu_reset(rdev);
+			mutex_lock(&rdev->ring_lock);
+			if (!r)
+				continue;
+		}
+		if (r) {
+			dev_err(rdev->dev, "error waiting for ring to become"
+				" idle (%d)\n", r);
+		}
+		return;
+	}
 }
 
+/**
+ * radeon_fence_ref - take a ref on a fence
+ *
+ * @fence: radeon fence object
+ *
+ * Take a reference on a fence (all asics).
+ * Returns the fence.
+ */
 struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
 {
 	kref_get(&fence->kref);
 	return fence;
 }
 
+/**
+ * radeon_fence_unref - remove a ref on a fence
+ *
+ * @fence: radeon fence object
+ *
+ * Remove a reference on a fence (all asics).
+ */
 void radeon_fence_unref(struct radeon_fence **fence)
 {
 	struct radeon_fence *tmp = *fence;
@@ -500,6 +661,16 @@ void radeon_fence_unref(struct radeon_fence **fence)
 	}
 }
 
+/**
+ * radeon_fence_count_emitted - get the count of emitted fences
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring index the fence is associated with
+ *
+ * Get the number of fences emitted on the requested ring (all asics).
+ * Returns the number of emitted fences on the ring.  Used by the
+ * dynpm code to ring track activity.
+ */
 unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
 {
 	uint64_t emitted;
@@ -508,7 +679,8 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
 	 * but it's ok to report slightly wrong fence count here.
 	 */
 	radeon_fence_process(rdev, ring);
-	emitted = rdev->fence_drv[ring].seq - atomic64_read(&rdev->fence_drv[ring].last_seq);
+	emitted = rdev->fence_drv[ring].sync_seq[ring]
+		- atomic64_read(&rdev->fence_drv[ring].last_seq);
 	/* to avoid 32bits warp around */
 	if (emitted > 0x10000000) {
 		emitted = 0x10000000;
@@ -516,6 +688,83 @@ unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
 	return (unsigned)emitted;
 }
 
+/**
+ * radeon_fence_need_sync - do we need a semaphore
+ *
+ * @fence: radeon fence object
+ * @dst_ring: which ring to check against
+ *
+ * Check if the fence needs to be synced against another ring
+ * (all asics).  If so, we need to emit a semaphore.
+ * Returns true if we need to sync with another ring, false if
+ * not.
+ */
+bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
+{
+	struct radeon_fence_driver *fdrv;
+
+	if (!fence) {
+		return false;
+	}
+
+	if (fence->ring == dst_ring) {
+		return false;
+	}
+
+	/* we are protected by the ring mutex */
+	fdrv = &fence->rdev->fence_drv[dst_ring];
+	if (fence->seq <= fdrv->sync_seq[fence->ring]) {
+		return false;
+	}
+
+	return true;
+}
+
+/**
+ * radeon_fence_note_sync - record the sync point
+ *
+ * @fence: radeon fence object
+ * @dst_ring: which ring to check against
+ *
+ * Note the sequence number at which point the fence will
+ * be synced with the requested ring (all asics).
+ */
+void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
+{
+	struct radeon_fence_driver *dst, *src;
+	unsigned i;
+
+	if (!fence) {
+		return;
+	}
+
+	if (fence->ring == dst_ring) {
+		return;
+	}
+
+	/* we are protected by the ring mutex */
+	src = &fence->rdev->fence_drv[fence->ring];
+	dst = &fence->rdev->fence_drv[dst_ring];
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		if (i == dst_ring) {
+			continue;
+		}
+		dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
+	}
+}
+
+/**
+ * radeon_fence_driver_start_ring - make the fence driver
+ * ready for use on the requested ring.
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring index to start the fence driver on
+ *
+ * Make the fence driver ready for processing (all asics).
+ * Not all asics have all rings, so each asic will only
+ * start the fence driver on the rings it has.
+ * Returns 0 for success, errors for failure.
+ */
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 {
 	uint64_t index;
@@ -537,24 +786,49 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 	}
 	rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
 	rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
-	radeon_fence_write(rdev, rdev->fence_drv[ring].seq, ring);
+	radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
 	rdev->fence_drv[ring].initialized = true;
 	dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
 		 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
 	return 0;
 }
 
+/**
+ * radeon_fence_driver_init_ring - init the fence driver
+ * for the requested ring.
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring index to start the fence driver on
+ *
+ * Init the fence driver for the requested ring (all asics).
+ * Helper function for radeon_fence_driver_init().
+ */
 static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
 {
+	int i;
+
 	rdev->fence_drv[ring].scratch_reg = -1;
 	rdev->fence_drv[ring].cpu_addr = NULL;
 	rdev->fence_drv[ring].gpu_addr = 0;
-	rdev->fence_drv[ring].seq = 0;
+	for (i = 0; i < RADEON_NUM_RINGS; ++i)
+		rdev->fence_drv[ring].sync_seq[i] = 0;
 	atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
 	rdev->fence_drv[ring].last_activity = jiffies;
 	rdev->fence_drv[ring].initialized = false;
 }
 
+/**
+ * radeon_fence_driver_init - init the fence driver
+ * for all possible rings.
+ *
+ * @rdev: radeon device pointer
+ *
+ * Init the fence driver for all possible rings (all asics).
+ * Not all asics have all rings, so each asic will only
+ * start the fence driver on the rings it has using
+ * radeon_fence_driver_start_ring().
+ * Returns 0 for success.
+ */
 int radeon_fence_driver_init(struct radeon_device *rdev)
 {
 	int ring;
@@ -569,6 +843,14 @@ int radeon_fence_driver_init(struct radeon_device *rdev)
 	return 0;
 }
 
+/**
+ * radeon_fence_driver_fini - tear down the fence driver
+ * for all possible rings.
+ *
+ * @rdev: radeon device pointer
+ *
+ * Tear down the fence driver for all possible rings (all asics).
+ */
 void radeon_fence_driver_fini(struct radeon_device *rdev)
 {
 	int ring;
@@ -595,7 +877,7 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *dev = node->minor->dev;
 	struct radeon_device *rdev = dev->dev_private;
-	int i;
+	int i, j;
 
 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 		if (!rdev->fence_drv[i].initialized)
@@ -604,8 +886,14 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
 		seq_printf(m, "--- ring %d ---\n", i);
 		seq_printf(m, "Last signaled fence 0x%016llx\n",
 			   (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
-		seq_printf(m, "Last emitted  0x%016llx\n",
-			   rdev->fence_drv[i].seq);
+		seq_printf(m, "Last emitted        0x%016llx\n",
+			   rdev->fence_drv[i].sync_seq[i]);
+
+		for (j = 0; j < RADEON_NUM_RINGS; ++j) {
+			if (i != j && rdev->fence_drv[j].initialized)
+				seq_printf(m, "Last sync to ring %d 0x%016llx\n",
+					   j, rdev->fence_drv[i].sync_seq[j]);
+		}
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 84b648a7ddd8..bb3b7fe05ccd 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -31,8 +31,38 @@
 #include "radeon_reg.h"
 
 /*
+ * GART
+ * The GART (Graphics Aperture Remapping Table) is an aperture
+ * in the GPU's address space.  System pages can be mapped into
+ * the aperture and look like contiguous pages from the GPU's
+ * perspective.  A page table maps the pages in the aperture
+ * to the actual backing pages in system memory.
+ *
+ * Radeon GPUs support both an internal GART, as described above,
+ * and AGP.  AGP works similarly, but the GART table is configured
+ * and maintained by the northbridge rather than the driver.
+ * Radeon hw has a separate AGP aperture that is programmed to
+ * point to the AGP aperture provided by the northbridge and the
+ * requests are passed through to the northbridge aperture.
+ * Both AGP and internal GART can be used at the same time, however
+ * that is not currently supported by the driver.
+ *
+ * This file handles the common internal GART management.
+ */
+
+/*
  * Common GART table functions.
  */
+/**
+ * radeon_gart_table_ram_alloc - allocate system ram for gart page table
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Allocate system memory for GART page table
+ * (r1xx-r3xx, non-pcie r4xx, rs400).  These asics require the
+ * gart table to be in system memory.
+ * Returns 0 for success, -ENOMEM for failure.
+ */
 int radeon_gart_table_ram_alloc(struct radeon_device *rdev)
 {
 	void *ptr;
@@ -54,6 +84,15 @@ int radeon_gart_table_ram_alloc(struct radeon_device *rdev)
 	return 0;
 }
 
+/**
+ * radeon_gart_table_ram_free - free system ram for gart page table
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Free system memory for GART page table
+ * (r1xx-r3xx, non-pcie r4xx, rs400).  These asics require the
+ * gart table to be in system memory.
+ */
 void radeon_gart_table_ram_free(struct radeon_device *rdev)
 {
 	if (rdev->gart.ptr == NULL) {
@@ -73,6 +112,16 @@ void radeon_gart_table_ram_free(struct radeon_device *rdev)
 	rdev->gart.table_addr = 0;
 }
 
+/**
+ * radeon_gart_table_vram_alloc - allocate vram for gart page table
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Allocate video memory for GART page table
+ * (pcie r4xx, r5xx+).  These asics require the
+ * gart table to be in video memory.
+ * Returns 0 for success, error for failure.
+ */
 int radeon_gart_table_vram_alloc(struct radeon_device *rdev)
 {
 	int r;
@@ -88,6 +137,16 @@ int radeon_gart_table_vram_alloc(struct radeon_device *rdev)
 	return 0;
 }
 
+/**
+ * radeon_gart_table_vram_pin - pin gart page table in vram
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Pin the GART page table in vram so it will not be moved
+ * by the memory manager (pcie r4xx, r5xx+).  These asics require the
+ * gart table to be in video memory.
+ * Returns 0 for success, error for failure.
+ */
 int radeon_gart_table_vram_pin(struct radeon_device *rdev)
 {
 	uint64_t gpu_addr;
@@ -110,6 +169,14 @@ int radeon_gart_table_vram_pin(struct radeon_device *rdev)
 	return r;
 }
 
+/**
+ * radeon_gart_table_vram_unpin - unpin gart page table in vram
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Unpin the GART page table in vram (pcie r4xx, r5xx+).
+ * These asics require the gart table to be in video memory.
+ */
 void radeon_gart_table_vram_unpin(struct radeon_device *rdev)
 {
 	int r;
@@ -126,6 +193,15 @@ void radeon_gart_table_vram_unpin(struct radeon_device *rdev)
 	}
 }
 
+/**
+ * radeon_gart_table_vram_free - free gart page table vram
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Free the video memory used for the GART page table
+ * (pcie r4xx, r5xx+).  These asics require the gart table to
+ * be in video memory.
+ */
 void radeon_gart_table_vram_free(struct radeon_device *rdev)
 {
 	if (rdev->gart.robj == NULL) {
@@ -135,12 +211,19 @@ void radeon_gart_table_vram_free(struct radeon_device *rdev)
 	radeon_bo_unref(&rdev->gart.robj);
 }
 
-
-
-
 /*
  * Common gart functions.
  */
+/**
+ * radeon_gart_unbind - unbind pages from the gart page table
+ *
+ * @rdev: radeon_device pointer
+ * @offset: offset into the GPU's gart aperture
+ * @pages: number of pages to unbind
+ *
+ * Unbinds the requested pages from the gart page table and
+ * replaces them with the dummy page (all asics).
+ */
 void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
 			int pages)
 {
@@ -172,6 +255,19 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
 	radeon_gart_tlb_flush(rdev);
 }
 
+/**
+ * radeon_gart_bind - bind pages into the gart page table
+ *
+ * @rdev: radeon_device pointer
+ * @offset: offset into the GPU's gart aperture
+ * @pages: number of pages to bind
+ * @pagelist: pages to bind
+ * @dma_addr: DMA addresses of pages
+ *
+ * Binds the requested pages to the gart page table
+ * (all asics).
+ * Returns 0 for success, -EINVAL for failure.
+ */
 int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 		     int pages, struct page **pagelist, dma_addr_t *dma_addr)
 {
@@ -203,6 +299,14 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 	return 0;
 }
 
+/**
+ * radeon_gart_restore - bind all pages in the gart page table
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Binds all pages in the gart page table (all asics).
+ * Used to rebuild the gart table on device startup or resume.
+ */
 void radeon_gart_restore(struct radeon_device *rdev)
 {
 	int i, j, t;
@@ -222,6 +326,14 @@ void radeon_gart_restore(struct radeon_device *rdev)
 	radeon_gart_tlb_flush(rdev);
 }
 
+/**
+ * radeon_gart_init - init the driver info for managing the gart
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Allocate the dummy page and init the gart driver info (all asics).
+ * Returns 0 for success, error for failure.
+ */
 int radeon_gart_init(struct radeon_device *rdev)
 {
 	int r, i;
@@ -262,6 +374,13 @@ int radeon_gart_init(struct radeon_device *rdev)
 	return 0;
 }
 
+/**
+ * radeon_gart_fini - tear down the driver info for managing the gart
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tear down the gart driver info and free the dummy page (all asics).
+ */
 void radeon_gart_fini(struct radeon_device *rdev)
 {
 	if (rdev->gart.pages && rdev->gart.pages_addr && rdev->gart.ready) {
@@ -278,35 +397,104 @@ void radeon_gart_fini(struct radeon_device *rdev)
 }
 
 /*
+ * GPUVM
+ * GPUVM is similar to the legacy gart on older asics, however
+ * rather than there being a single global gart table
+ * for the entire GPU, there are multiple VM page tables active
+ * at any given time.  The VM page tables can contain a mix
+ * vram pages and system memory pages and system memory pages
+ * can be mapped as snooped (cached system pages) or unsnooped
+ * (uncached system pages).
+ * Each VM has an ID associated with it and there is a page table
+ * associated with each VMID.  When execting a command buffer,
+ * the kernel tells the the ring what VMID to use for that command
+ * buffer.  VMIDs are allocated dynamically as commands are submitted.
+ * The userspace drivers maintain their own address space and the kernel
+ * sets up their pages tables accordingly when they submit their
+ * command buffers and a VMID is assigned.
+ * Cayman/Trinity support up to 8 active VMs at any given time;
+ * SI supports 16.
+ */
+
+/*
  * vm helpers
  *
  * TODO bind a default page at vm initialization for default address
  */
+
+/**
+ * radeon_vm_manager_init - init the vm manager
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Init the vm manager (cayman+).
+ * Returns 0 for success, error for failure.
+ */
 int radeon_vm_manager_init(struct radeon_device *rdev)
 {
+	struct radeon_vm *vm;
+	struct radeon_bo_va *bo_va;
 	int r;
 
-	rdev->vm_manager.enabled = false;
+	if (!rdev->vm_manager.enabled) {
+		/* mark first vm as always in use, it's the system one */
+		/* allocate enough for 2 full VM pts */
+		r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
+					      rdev->vm_manager.max_pfn * 8 * 2,
+					      RADEON_GEM_DOMAIN_VRAM);
+		if (r) {
+			dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
+				(rdev->vm_manager.max_pfn * 8) >> 10);
+			return r;
+		}
 
-	/* mark first vm as always in use, it's the system one */
-	/* allocate enough for 2 full VM pts */
-	r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
-				      rdev->vm_manager.max_pfn * 8 * 2,
-				      RADEON_GEM_DOMAIN_VRAM);
-	if (r) {
-		dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
-			(rdev->vm_manager.max_pfn * 8) >> 10);
-		return r;
+		r = rdev->vm_manager.funcs->init(rdev);
+		if (r)
+			return r;
+	
+		rdev->vm_manager.enabled = true;
+
+		r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager);
+		if (r)
+			return r;
 	}
 
-	r = rdev->vm_manager.funcs->init(rdev);
-	if (r == 0)
-		rdev->vm_manager.enabled = true;
+	/* restore page table */
+	list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
+		if (vm->id == -1)
+			continue;
 
-	return r;
+		list_for_each_entry(bo_va, &vm->va, vm_list) {
+			struct ttm_mem_reg *mem = NULL;
+			if (bo_va->valid)
+				mem = &bo_va->bo->tbo.mem;
+
+			bo_va->valid = false;
+			r = radeon_vm_bo_update_pte(rdev, vm, bo_va->bo, mem);
+			if (r) {
+				DRM_ERROR("Failed to update pte for vm %d!\n", vm->id);
+			}
+		}
+
+		r = rdev->vm_manager.funcs->bind(rdev, vm, vm->id);
+		if (r) {
+			DRM_ERROR("Failed to bind vm %d!\n", vm->id);
+		}
+	}
+	return 0;
 }
 
-/* cs mutex must be lock */
+/* global mutex must be lock */
+/**
+ * radeon_vm_unbind_locked - unbind a specific vm
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm to unbind
+ *
+ * Unbind the requested vm (cayman+).
+ * Wait for use of the VM to finish, then unbind the page table,
+ * and free the page table memory.
+ */
 static void radeon_vm_unbind_locked(struct radeon_device *rdev,
 				    struct radeon_vm *vm)
 {
@@ -317,10 +505,21 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev,
 	}
 
 	/* wait for vm use to end */
-	if (vm->fence) {
-		radeon_fence_wait(vm->fence, false);
-		radeon_fence_unref(&vm->fence);
+	while (vm->fence) {
+		int r;
+		r = radeon_fence_wait(vm->fence, false);
+		if (r)
+			DRM_ERROR("error while waiting for fence: %d\n", r);
+		if (r == -EDEADLK) {
+			mutex_unlock(&rdev->vm_manager.lock);
+			r = radeon_gpu_reset(rdev);
+			mutex_lock(&rdev->vm_manager.lock);
+			if (!r)
+				continue;
+		}
+		break;
 	}
+	radeon_fence_unref(&vm->fence);
 
 	/* hw unbind */
 	rdev->vm_manager.funcs->unbind(rdev, vm);
@@ -335,39 +534,42 @@ static void radeon_vm_unbind_locked(struct radeon_device *rdev,
 	}
 }
 
+/**
+ * radeon_vm_manager_fini - tear down the vm manager
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tear down the VM manager (cayman+).
+ */
 void radeon_vm_manager_fini(struct radeon_device *rdev)
 {
-	if (rdev->vm_manager.sa_manager.bo == NULL)
-		return;
-	radeon_vm_manager_suspend(rdev);
-	rdev->vm_manager.funcs->fini(rdev);
-	radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager);
-	rdev->vm_manager.enabled = false;
-}
-
-int radeon_vm_manager_start(struct radeon_device *rdev)
-{
-	if (rdev->vm_manager.sa_manager.bo == NULL) {
-		return -EINVAL;
-	}
-	return radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager);
-}
-
-int radeon_vm_manager_suspend(struct radeon_device *rdev)
-{
 	struct radeon_vm *vm, *tmp;
 
-	radeon_mutex_lock(&rdev->cs_mutex);
+	if (!rdev->vm_manager.enabled)
+		return;
+
+	mutex_lock(&rdev->vm_manager.lock);
 	/* unbind all active vm */
 	list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
 		radeon_vm_unbind_locked(rdev, vm);
 	}
 	rdev->vm_manager.funcs->fini(rdev);
-	radeon_mutex_unlock(&rdev->cs_mutex);
-	return radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager);
+	mutex_unlock(&rdev->vm_manager.lock);
+
+	radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager);
+	radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager);
+	rdev->vm_manager.enabled = false;
 }
 
-/* cs mutex must be lock */
+/* global mutex must be locked */
+/**
+ * radeon_vm_unbind - locked version of unbind
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm to unbind
+ *
+ * Locked version that wraps radeon_vm_unbind_locked (cayman+).
+ */
 void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
 {
 	mutex_lock(&vm->mutex);
@@ -375,7 +577,19 @@ void radeon_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
 	mutex_unlock(&vm->mutex);
 }
 
-/* cs mutex must be lock & vm mutex must be lock */
+/* global and local mutex must be locked */
+/**
+ * radeon_vm_bind - bind a page table to a VMID
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm to bind
+ *
+ * Bind the requested vm (cayman+).
+ * Suballocate memory for the page table, allocate a VMID
+ * and bind the page table to it, and finally start to populate
+ * the page table.
+ * Returns 0 for success, error for failure.
+ */
 int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm)
 {
 	struct radeon_vm *vm_evict;
@@ -438,6 +652,20 @@ retry_id:
 }
 
 /* object have to be reserved */
+/**
+ * radeon_vm_bo_add - add a bo to a specific vm
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ * @bo: radeon buffer object
+ * @offset: requested offset of the buffer in the VM address space
+ * @flags: attributes of pages (read/write/valid/etc.)
+ *
+ * Add @bo into the requested vm (cayman+).
+ * Add @bo to the list of bos associated with the vm and validate
+ * the offset requested within the vm address space.
+ * Returns 0 for success, error for failure.
+ */
 int radeon_vm_bo_add(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
 		     struct radeon_bo *bo,
@@ -479,7 +707,7 @@ int radeon_vm_bo_add(struct radeon_device *rdev,
 	if (last_pfn > vm->last_pfn) {
 		/* release mutex and lock in right order */
 		mutex_unlock(&vm->mutex);
-		radeon_mutex_lock(&rdev->cs_mutex);
+		mutex_lock(&rdev->vm_manager.lock);
 		mutex_lock(&vm->mutex);
 		/* and check again */
 		if (last_pfn > vm->last_pfn) {
@@ -488,7 +716,7 @@ int radeon_vm_bo_add(struct radeon_device *rdev,
 			radeon_vm_unbind_locked(rdev, vm);
 			vm->last_pfn = (last_pfn + align) & ~align;
 		}
-		radeon_mutex_unlock(&rdev->cs_mutex);
+		mutex_unlock(&rdev->vm_manager.lock);
 	}
 	head = &vm->va;
 	last_offset = 0;
@@ -515,6 +743,17 @@ int radeon_vm_bo_add(struct radeon_device *rdev,
 	return 0;
 }
 
+/**
+ * radeon_vm_get_addr - get the physical address of the page
+ *
+ * @rdev: radeon_device pointer
+ * @mem: ttm mem
+ * @pfn: pfn
+ *
+ * Look up the physical address of the page that the pte resolves
+ * to (cayman+).
+ * Returns the physical address of the page.
+ */
 static u64 radeon_vm_get_addr(struct radeon_device *rdev,
 			      struct ttm_mem_reg *mem,
 			      unsigned pfn)
@@ -543,7 +782,18 @@ static u64 radeon_vm_get_addr(struct radeon_device *rdev,
 	return addr;
 }
 
-/* object have to be reserved & cs mutex took & vm mutex took */
+/* object have to be reserved & global and local mutex must be locked */
+/**
+ * radeon_vm_bo_update_pte - map a bo into the vm page table
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ * @bo: radeon buffer object
+ * @mem: ttm mem
+ *
+ * Fill in the page table entries for @bo (cayman+).
+ * Returns 0 for success, -EINVAL for failure.
+ */
 int radeon_vm_bo_update_pte(struct radeon_device *rdev,
 			    struct radeon_vm *vm,
 			    struct radeon_bo *bo,
@@ -564,7 +814,7 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
 		return -EINVAL;
 	}
 
-	if (bo_va->valid)
+	if (bo_va->valid && mem)
 		return 0;
 
 	ngpu_pages = radeon_bo_ngpu_pages(bo);
@@ -592,20 +842,48 @@ int radeon_vm_bo_update_pte(struct radeon_device *rdev,
 }
 
 /* object have to be reserved */
+/**
+ * radeon_vm_bo_rmv - remove a bo to a specific vm
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ * @bo: radeon buffer object
+ *
+ * Remove @bo from the requested vm (cayman+).
+ * Remove @bo from the list of bos associated with the vm and
+ * remove the ptes for @bo in the page table.
+ * Returns 0 for success.
+ */
 int radeon_vm_bo_rmv(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
 		     struct radeon_bo *bo)
 {
 	struct radeon_bo_va *bo_va;
+	int r;
 
 	bo_va = radeon_bo_va(bo, vm);
 	if (bo_va == NULL)
 		return 0;
 
-	radeon_mutex_lock(&rdev->cs_mutex);
+	/* wait for va use to end */
+	while (bo_va->fence) {
+		r = radeon_fence_wait(bo_va->fence, false);
+		if (r) {
+			DRM_ERROR("error while waiting for fence: %d\n", r);
+		}
+		if (r == -EDEADLK) {
+			r = radeon_gpu_reset(rdev);
+			if (!r)
+				continue;
+		}
+		break;
+	}
+	radeon_fence_unref(&bo_va->fence);
+
+	mutex_lock(&rdev->vm_manager.lock);
 	mutex_lock(&vm->mutex);
 	radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
-	radeon_mutex_unlock(&rdev->cs_mutex);
+	mutex_unlock(&rdev->vm_manager.lock);
 	list_del(&bo_va->vm_list);
 	mutex_unlock(&vm->mutex);
 	list_del(&bo_va->bo_list);
@@ -614,6 +892,15 @@ int radeon_vm_bo_rmv(struct radeon_device *rdev,
 	return 0;
 }
 
+/**
+ * radeon_vm_bo_invalidate - mark the bo as invalid
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ * @bo: radeon buffer object
+ *
+ * Mark @bo as invalid (cayman+).
+ */
 void radeon_vm_bo_invalidate(struct radeon_device *rdev,
 			     struct radeon_bo *bo)
 {
@@ -625,6 +912,17 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev,
 	}
 }
 
+/**
+ * radeon_vm_init - initialize a vm instance
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ *
+ * Init @vm (cayman+).
+ * Map the IB pool and any other shared objects into the VM
+ * by default as it's used by all VMs.
+ * Returns 0 for success, error for failure.
+ */
 int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 {
 	int r;
@@ -651,22 +949,34 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 	return r;
 }
 
+/**
+ * radeon_vm_fini - tear down a vm instance
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ *
+ * Tear down @vm (cayman+).
+ * Unbind the VM and remove all bos from the vm bo list
+ */
 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
 {
 	struct radeon_bo_va *bo_va, *tmp;
 	int r;
 
-	radeon_mutex_lock(&rdev->cs_mutex);
+	mutex_lock(&rdev->vm_manager.lock);
 	mutex_lock(&vm->mutex);
 	radeon_vm_unbind_locked(rdev, vm);
-	radeon_mutex_unlock(&rdev->cs_mutex);
+	mutex_unlock(&rdev->vm_manager.lock);
 
-	/* remove all bo */
+	/* remove all bo at this point non are busy any more because unbind
+	 * waited for the last vm fence to signal
+	 */
 	r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
 	if (!r) {
 		bo_va = radeon_bo_va(rdev->ring_tmp_bo.bo, vm);
 		list_del_init(&bo_va->bo_list);
 		list_del_init(&bo_va->vm_list);
+		radeon_fence_unref(&bo_va->fence);
 		radeon_bo_unreserve(rdev->ring_tmp_bo.bo);
 		kfree(bo_va);
 	}
@@ -678,6 +988,7 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
 		r = radeon_bo_reserve(bo_va->bo, false);
 		if (!r) {
 			list_del_init(&bo_va->bo_list);
+			radeon_fence_unref(&bo_va->fence);
 			radeon_bo_unreserve(bo_va->bo);
 			kfree(bo_va);
 		}
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 21ec9f5653ce..1b57b0058ad6 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -134,36 +134,25 @@ void radeon_gem_object_close(struct drm_gem_object *obj,
 	struct radeon_device *rdev = rbo->rdev;
 	struct radeon_fpriv *fpriv = file_priv->driver_priv;
 	struct radeon_vm *vm = &fpriv->vm;
-	struct radeon_bo_va *bo_va, *tmp;
 
 	if (rdev->family < CHIP_CAYMAN) {
 		return;
 	}
 
 	if (radeon_bo_reserve(rbo, false)) {
+		dev_err(rdev->dev, "leaking bo va because we fail to reserve bo\n");
 		return;
 	}
-	list_for_each_entry_safe(bo_va, tmp, &rbo->va, bo_list) {
-		if (bo_va->vm == vm) {
-			/* remove from this vm address space */
-			mutex_lock(&vm->mutex);
-			list_del(&bo_va->vm_list);
-			mutex_unlock(&vm->mutex);
-			list_del(&bo_va->bo_list);
-			kfree(bo_va);
-		}
-	}
+	radeon_vm_bo_rmv(rdev, vm, rbo);
 	radeon_bo_unreserve(rbo);
 }
 
 static int radeon_gem_handle_lockup(struct radeon_device *rdev, int r)
 {
 	if (r == -EDEADLK) {
-		radeon_mutex_lock(&rdev->cs_mutex);
 		r = radeon_gpu_reset(rdev);
 		if (!r)
 			r = -EAGAIN;
-		radeon_mutex_unlock(&rdev->cs_mutex);
 	}
 	return r;
 }
@@ -217,12 +206,14 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
 	uint32_t handle;
 	int r;
 
+	down_read(&rdev->exclusive_lock);
 	/* create a gem object to contain this object in */
 	args->size = roundup(args->size, PAGE_SIZE);
 	r = radeon_gem_object_create(rdev, args->size, args->alignment,
 					args->initial_domain, false,
 					false, &gobj);
 	if (r) {
+		up_read(&rdev->exclusive_lock);
 		r = radeon_gem_handle_lockup(rdev, r);
 		return r;
 	}
@@ -230,10 +221,12 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data,
 	/* drop reference from allocate - handle holds it now */
 	drm_gem_object_unreference_unlocked(gobj);
 	if (r) {
+		up_read(&rdev->exclusive_lock);
 		r = radeon_gem_handle_lockup(rdev, r);
 		return r;
 	}
 	args->handle = handle;
+	up_read(&rdev->exclusive_lock);
 	return 0;
 }
 
@@ -242,6 +235,7 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 {
 	/* transition the BO to a domain -
 	 * just validate the BO into a certain domain */
+	struct radeon_device *rdev = dev->dev_private;
 	struct drm_radeon_gem_set_domain *args = data;
 	struct drm_gem_object *gobj;
 	struct radeon_bo *robj;
@@ -249,10 +243,12 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 
 	/* for now if someone requests domain CPU -
 	 * just make sure the buffer is finished with */
+	down_read(&rdev->exclusive_lock);
 
 	/* just do a BO wait for now */
 	gobj = drm_gem_object_lookup(dev, filp, args->handle);
 	if (gobj == NULL) {
+		up_read(&rdev->exclusive_lock);
 		return -ENOENT;
 	}
 	robj = gem_to_radeon_bo(gobj);
@@ -260,6 +256,7 @@ int radeon_gem_set_domain_ioctl(struct drm_device *dev, void *data,
 	r = radeon_gem_set_domain(gobj, args->read_domains, args->write_domain);
 
 	drm_gem_object_unreference_unlocked(gobj);
+	up_read(&rdev->exclusive_lock);
 	r = radeon_gem_handle_lockup(robj->rdev, r);
 	return r;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_irq_kms.c b/drivers/gpu/drm/radeon/radeon_irq_kms.c
index 5df58d1aba06..afaa1727abd2 100644
--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@@ -32,6 +32,17 @@
 #include "radeon.h"
 #include "atom.h"
 
+#define RADEON_WAIT_IDLE_TIMEOUT 200
+
+/**
+ * radeon_driver_irq_handler_kms - irq handler for KMS
+ *
+ * @DRM_IRQ_ARGS: args
+ *
+ * This is the irq handler for the radeon KMS driver (all asics).
+ * radeon_irq_process is a macro that points to the per-asic
+ * irq handler callback.
+ */
 irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS)
 {
 	struct drm_device *dev = (struct drm_device *) arg;
@@ -43,6 +54,17 @@ irqreturn_t radeon_driver_irq_handler_kms(DRM_IRQ_ARGS)
 /*
  * Handle hotplug events outside the interrupt handler proper.
  */
+/**
+ * radeon_hotplug_work_func - display hotplug work handler
+ *
+ * @work: work struct
+ *
+ * This is the hot plug event work handler (all asics).
+ * The work gets scheduled from the irq handler if there
+ * was a hot plug interrupt.  It walks the connector table
+ * and calls the hotplug handler for each one, then sends
+ * a drm hotplug event to alert userspace.
+ */
 static void radeon_hotplug_work_func(struct work_struct *work)
 {
 	struct radeon_device *rdev = container_of(work, struct radeon_device,
@@ -59,61 +81,94 @@ static void radeon_hotplug_work_func(struct work_struct *work)
 	drm_helper_hpd_irq_event(dev);
 }
 
+/**
+ * radeon_driver_irq_preinstall_kms - drm irq preinstall callback
+ *
+ * @dev: drm dev pointer
+ *
+ * Gets the hw ready to enable irqs (all asics).
+ * This function disables all interrupt sources on the GPU.
+ */
 void radeon_driver_irq_preinstall_kms(struct drm_device *dev)
 {
 	struct radeon_device *rdev = dev->dev_private;
+	unsigned long irqflags;
 	unsigned i;
 
+	spin_lock_irqsave(&rdev->irq.lock, irqflags);
 	/* Disable *all* interrupts */
 	for (i = 0; i < RADEON_NUM_RINGS; i++)
-		rdev->irq.sw_int[i] = false;
+		atomic_set(&rdev->irq.ring_int[i], 0);
 	rdev->irq.gui_idle = false;
 	for (i = 0; i < RADEON_MAX_HPD_PINS; i++)
 		rdev->irq.hpd[i] = false;
 	for (i = 0; i < RADEON_MAX_CRTCS; i++) {
 		rdev->irq.crtc_vblank_int[i] = false;
-		rdev->irq.pflip[i] = false;
+		atomic_set(&rdev->irq.pflip[i], 0);
 		rdev->irq.afmt[i] = false;
 	}
 	radeon_irq_set(rdev);
+	spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
 	/* Clear bits */
 	radeon_irq_process(rdev);
 }
 
+/**
+ * radeon_driver_irq_postinstall_kms - drm irq preinstall callback
+ *
+ * @dev: drm dev pointer
+ *
+ * Handles stuff to be done after enabling irqs (all asics).
+ * Returns 0 on success.
+ */
 int radeon_driver_irq_postinstall_kms(struct drm_device *dev)
 {
-	struct radeon_device *rdev = dev->dev_private;
-	unsigned i;
-
 	dev->max_vblank_count = 0x001fffff;
-	for (i = 0; i < RADEON_NUM_RINGS; i++)
-		rdev->irq.sw_int[i] = true;
-	radeon_irq_set(rdev);
 	return 0;
 }
 
+/**
+ * radeon_driver_irq_uninstall_kms - drm irq uninstall callback
+ *
+ * @dev: drm dev pointer
+ *
+ * This function disables all interrupt sources on the GPU (all asics).
+ */
 void radeon_driver_irq_uninstall_kms(struct drm_device *dev)
 {
 	struct radeon_device *rdev = dev->dev_private;
+	unsigned long irqflags;
 	unsigned i;
 
 	if (rdev == NULL) {
 		return;
 	}
+	spin_lock_irqsave(&rdev->irq.lock, irqflags);
 	/* Disable *all* interrupts */
 	for (i = 0; i < RADEON_NUM_RINGS; i++)
-		rdev->irq.sw_int[i] = false;
+		atomic_set(&rdev->irq.ring_int[i], 0);
 	rdev->irq.gui_idle = false;
 	for (i = 0; i < RADEON_MAX_HPD_PINS; i++)
 		rdev->irq.hpd[i] = false;
 	for (i = 0; i < RADEON_MAX_CRTCS; i++) {
 		rdev->irq.crtc_vblank_int[i] = false;
-		rdev->irq.pflip[i] = false;
+		atomic_set(&rdev->irq.pflip[i], 0);
 		rdev->irq.afmt[i] = false;
 	}
 	radeon_irq_set(rdev);
+	spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
 }
 
+/**
+ * radeon_msi_ok - asic specific msi checks
+ *
+ * @rdev: radeon device pointer
+ *
+ * Handles asic specific MSI checks to determine if
+ * MSIs should be enabled on a particular chip (all asics).
+ * Returns true if MSIs should be enabled, false if MSIs
+ * should not be enabled.
+ */
 static bool radeon_msi_ok(struct radeon_device *rdev)
 {
 	/* RV370/RV380 was first asic with MSI support */
@@ -166,17 +221,22 @@ static bool radeon_msi_ok(struct radeon_device *rdev)
 	return true;
 }
 
+/**
+ * radeon_irq_kms_init - init driver interrupt info
+ *
+ * @rdev: radeon device pointer
+ *
+ * Sets up the work irq handlers, vblank init, MSIs, etc. (all asics).
+ * Returns 0 for success, error for failure.
+ */
 int radeon_irq_kms_init(struct radeon_device *rdev)
 {
-	int i;
 	int r = 0;
 
 	INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func);
 	INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi);
 
-	spin_lock_init(&rdev->irq.sw_lock);
-	for (i = 0; i < rdev->num_crtc; i++)
-		spin_lock_init(&rdev->irq.pflip_lock[i]);
+	spin_lock_init(&rdev->irq.lock);
 	r = drm_vblank_init(rdev->ddev, rdev->num_crtc);
 	if (r) {
 		return r;
@@ -201,6 +261,13 @@ int radeon_irq_kms_init(struct radeon_device *rdev)
 	return 0;
 }
 
+/**
+ * radeon_irq_kms_fini - tear down driver interrrupt info
+ *
+ * @rdev: radeon device pointer
+ *
+ * Tears down the work irq handlers, vblank handlers, MSIs, etc. (all asics).
+ */
 void radeon_irq_kms_fini(struct radeon_device *rdev)
 {
 	drm_vblank_cleanup(rdev->ddev);
@@ -213,31 +280,63 @@ void radeon_irq_kms_fini(struct radeon_device *rdev)
 	flush_work_sync(&rdev->hotplug_work);
 }
 
+/**
+ * radeon_irq_kms_sw_irq_get - enable software interrupt
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring whose interrupt you want to enable
+ *
+ * Enables the software interrupt for a specific ring (all asics).
+ * The software interrupt is generally used to signal a fence on
+ * a particular ring.
+ */
 void radeon_irq_kms_sw_irq_get(struct radeon_device *rdev, int ring)
 {
 	unsigned long irqflags;
 
-	spin_lock_irqsave(&rdev->irq.sw_lock, irqflags);
-	if (rdev->ddev->irq_enabled && (++rdev->irq.sw_refcount[ring] == 1)) {
-		rdev->irq.sw_int[ring] = true;
+	if (!rdev->ddev->irq_enabled)
+		return;
+
+	if (atomic_inc_return(&rdev->irq.ring_int[ring]) == 1) {
+		spin_lock_irqsave(&rdev->irq.lock, irqflags);
 		radeon_irq_set(rdev);
+		spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
 	}
-	spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags);
 }
 
+/**
+ * radeon_irq_kms_sw_irq_put - disable software interrupt
+ *
+ * @rdev: radeon device pointer
+ * @ring: ring whose interrupt you want to disable
+ *
+ * Disables the software interrupt for a specific ring (all asics).
+ * The software interrupt is generally used to signal a fence on
+ * a particular ring.
+ */
 void radeon_irq_kms_sw_irq_put(struct radeon_device *rdev, int ring)
 {
 	unsigned long irqflags;
 
-	spin_lock_irqsave(&rdev->irq.sw_lock, irqflags);
-	BUG_ON(rdev->ddev->irq_enabled && rdev->irq.sw_refcount[ring] <= 0);
-	if (rdev->ddev->irq_enabled && (--rdev->irq.sw_refcount[ring] == 0)) {
-		rdev->irq.sw_int[ring] = false;
+	if (!rdev->ddev->irq_enabled)
+		return;
+
+	if (atomic_dec_and_test(&rdev->irq.ring_int[ring])) {
+		spin_lock_irqsave(&rdev->irq.lock, irqflags);
 		radeon_irq_set(rdev);
+		spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
 	}
-	spin_unlock_irqrestore(&rdev->irq.sw_lock, irqflags);
 }
 
+/**
+ * radeon_irq_kms_pflip_irq_get - enable pageflip interrupt
+ *
+ * @rdev: radeon device pointer
+ * @crtc: crtc whose interrupt you want to enable
+ *
+ * Enables the pageflip interrupt for a specific crtc (all asics).
+ * For pageflips we use the vblank interrupt source.
+ */
 void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc)
 {
 	unsigned long irqflags;
@@ -245,14 +344,25 @@ void radeon_irq_kms_pflip_irq_get(struct radeon_device *rdev, int crtc)
 	if (crtc < 0 || crtc >= rdev->num_crtc)
 		return;
 
-	spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags);
-	if (rdev->ddev->irq_enabled && (++rdev->irq.pflip_refcount[crtc] == 1)) {
-		rdev->irq.pflip[crtc] = true;
+	if (!rdev->ddev->irq_enabled)
+		return;
+
+	if (atomic_inc_return(&rdev->irq.pflip[crtc]) == 1) {
+		spin_lock_irqsave(&rdev->irq.lock, irqflags);
 		radeon_irq_set(rdev);
+		spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
 	}
-	spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags);
 }
 
+/**
+ * radeon_irq_kms_pflip_irq_put - disable pageflip interrupt
+ *
+ * @rdev: radeon device pointer
+ * @crtc: crtc whose interrupt you want to disable
+ *
+ * Disables the pageflip interrupt for a specific crtc (all asics).
+ * For pageflips we use the vblank interrupt source.
+ */
 void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc)
 {
 	unsigned long irqflags;
@@ -260,12 +370,121 @@ void radeon_irq_kms_pflip_irq_put(struct radeon_device *rdev, int crtc)
 	if (crtc < 0 || crtc >= rdev->num_crtc)
 		return;
 
-	spin_lock_irqsave(&rdev->irq.pflip_lock[crtc], irqflags);
-	BUG_ON(rdev->ddev->irq_enabled && rdev->irq.pflip_refcount[crtc] <= 0);
-	if (rdev->ddev->irq_enabled && (--rdev->irq.pflip_refcount[crtc] == 0)) {
-		rdev->irq.pflip[crtc] = false;
+	if (!rdev->ddev->irq_enabled)
+		return;
+
+	if (atomic_dec_and_test(&rdev->irq.pflip[crtc])) {
+		spin_lock_irqsave(&rdev->irq.lock, irqflags);
 		radeon_irq_set(rdev);
+		spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
 	}
-	spin_unlock_irqrestore(&rdev->irq.pflip_lock[crtc], irqflags);
 }
 
+/**
+ * radeon_irq_kms_enable_afmt - enable audio format change interrupt
+ *
+ * @rdev: radeon device pointer
+ * @block: afmt block whose interrupt you want to enable
+ *
+ * Enables the afmt change interrupt for a specific afmt block (all asics).
+ */
+void radeon_irq_kms_enable_afmt(struct radeon_device *rdev, int block)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&rdev->irq.lock, irqflags);
+	rdev->irq.afmt[block] = true;
+	radeon_irq_set(rdev);
+	spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+
+}
+
+/**
+ * radeon_irq_kms_disable_afmt - disable audio format change interrupt
+ *
+ * @rdev: radeon device pointer
+ * @block: afmt block whose interrupt you want to disable
+ *
+ * Disables the afmt change interrupt for a specific afmt block (all asics).
+ */
+void radeon_irq_kms_disable_afmt(struct radeon_device *rdev, int block)
+{
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&rdev->irq.lock, irqflags);
+	rdev->irq.afmt[block] = false;
+	radeon_irq_set(rdev);
+	spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+
+/**
+ * radeon_irq_kms_enable_hpd - enable hotplug detect interrupt
+ *
+ * @rdev: radeon device pointer
+ * @hpd_mask: mask of hpd pins you want to enable.
+ *
+ * Enables the hotplug detect interrupt for a specific hpd pin (all asics).
+ */
+void radeon_irq_kms_enable_hpd(struct radeon_device *rdev, unsigned hpd_mask)
+{
+	unsigned long irqflags;
+	int i;
+
+	spin_lock_irqsave(&rdev->irq.lock, irqflags);
+	for (i = 0; i < RADEON_MAX_HPD_PINS; ++i)
+		rdev->irq.hpd[i] |= !!(hpd_mask & (1 << i));
+	radeon_irq_set(rdev);
+	spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+
+/**
+ * radeon_irq_kms_disable_hpd - disable hotplug detect interrupt
+ *
+ * @rdev: radeon device pointer
+ * @hpd_mask: mask of hpd pins you want to disable.
+ *
+ * Disables the hotplug detect interrupt for a specific hpd pin (all asics).
+ */
+void radeon_irq_kms_disable_hpd(struct radeon_device *rdev, unsigned hpd_mask)
+{
+	unsigned long irqflags;
+	int i;
+
+	spin_lock_irqsave(&rdev->irq.lock, irqflags);
+	for (i = 0; i < RADEON_MAX_HPD_PINS; ++i)
+		rdev->irq.hpd[i] &= !(hpd_mask & (1 << i));
+	radeon_irq_set(rdev);
+	spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+}
+
+/**
+ * radeon_irq_kms_wait_gui_idle - waits for drawing engine to be idle
+ *
+ * @rdev: radeon device pointer
+ *
+ * Enabled the GUI idle interrupt and waits for it to fire (r6xx+).
+ * This is currently used to make sure the 3D engine is idle for power
+ * management, but should be replaces with proper fence waits.
+ * GUI idle interrupts don't work very well on pre-r6xx hw and it also
+ * does not take into account other aspects of the chip that may be busy.
+ * DO NOT USE GOING FORWARD.
+ */
+int radeon_irq_kms_wait_gui_idle(struct radeon_device *rdev)
+{
+	unsigned long irqflags;
+	int r;
+
+	spin_lock_irqsave(&rdev->irq.lock, irqflags);
+	rdev->irq.gui_idle = true;
+	radeon_irq_set(rdev);
+	spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+
+	r = wait_event_timeout(rdev->irq.idle_queue, radeon_gui_idle(rdev),
+			       msecs_to_jiffies(RADEON_WAIT_IDLE_TIMEOUT));
+
+	spin_lock_irqsave(&rdev->irq.lock, irqflags);
+	rdev->irq.gui_idle = false;
+	radeon_irq_set(rdev);
+	spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+	return r;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 5c58d7d90cb2..414b4acf6947 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -29,10 +29,22 @@
 #include "drm_sarea.h"
 #include "radeon.h"
 #include "radeon_drm.h"
+#include "radeon_asic.h"
 
 #include <linux/vga_switcheroo.h>
 #include <linux/slab.h>
 
+/**
+ * radeon_driver_unload_kms - Main unload function for KMS.
+ *
+ * @dev: drm dev pointer
+ *
+ * This is the main unload function for KMS (all asics).
+ * It calls radeon_modeset_fini() to tear down the
+ * displays, and radeon_device_fini() to tear down
+ * the rest of the device (CP, writeback, etc.).
+ * Returns 0 on success.
+ */
 int radeon_driver_unload_kms(struct drm_device *dev)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -46,6 +58,19 @@ int radeon_driver_unload_kms(struct drm_device *dev)
 	return 0;
 }
 
+/**
+ * radeon_driver_load_kms - Main load function for KMS.
+ *
+ * @dev: drm dev pointer
+ * @flags: device flags
+ *
+ * This is the main load function for KMS (all asics).
+ * It calls radeon_device_init() to set up the non-display
+ * parts of the chip (asic init, CP, writeback, etc.), and
+ * radeon_modeset_init() to set up the display parts
+ * (crtcs, encoders, hotplug detect, etc.).
+ * Returns 0 on success, error on failure.
+ */
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags)
 {
 	struct radeon_device *rdev;
@@ -96,6 +121,16 @@ out:
 	return r;
 }
 
+/**
+ * radeon_set_filp_rights - Set filp right.
+ *
+ * @dev: drm dev pointer
+ * @owner: drm file
+ * @applier: drm file
+ * @value: value
+ *
+ * Sets the filp rights for the device (all asics).
+ */
 static void radeon_set_filp_rights(struct drm_device *dev,
 				   struct drm_file **owner,
 				   struct drm_file *applier,
@@ -118,20 +153,54 @@ static void radeon_set_filp_rights(struct drm_device *dev,
 /*
  * Userspace get information ioctl
  */
+/**
+ * radeon_info_ioctl - answer a device specific request.
+ *
+ * @rdev: radeon device pointer
+ * @data: request object
+ * @filp: drm filp
+ *
+ * This function is used to pass device specific parameters to the userspace
+ * drivers.  Examples include: pci device id, pipeline parms, tiling params,
+ * etc. (all asics).
+ * Returns 0 on success, -EINVAL on failure.
+ */
 int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
 	struct radeon_device *rdev = dev->dev_private;
-	struct drm_radeon_info *info;
+	struct drm_radeon_info *info = data;
 	struct radeon_mode_info *minfo = &rdev->mode_info;
-	uint32_t *value_ptr;
-	uint32_t value;
+	uint32_t value, *value_ptr;
+	uint64_t value64, *value_ptr64;
 	struct drm_crtc *crtc;
 	int i, found;
 
-	info = data;
+	/* TIMESTAMP is a 64-bit value, needs special handling. */
+	if (info->request == RADEON_INFO_TIMESTAMP) {
+		if (rdev->family >= CHIP_R600) {
+			value_ptr64 = (uint64_t*)((unsigned long)info->value);
+			if (rdev->family >= CHIP_TAHITI) {
+				value64 = si_get_gpu_clock(rdev);
+			} else {
+				value64 = r600_get_gpu_clock(rdev);
+			}
+
+			if (DRM_COPY_TO_USER(value_ptr64, &value64, sizeof(value64))) {
+				DRM_ERROR("copy_to_user %s:%u\n", __func__, __LINE__);
+				return -EFAULT;
+			}
+			return 0;
+		} else {
+			DRM_DEBUG_KMS("timestamp is r6xx+ only!\n");
+			return -EINVAL;
+		}
+	}
+
 	value_ptr = (uint32_t *)((unsigned long)info->value);
-	if (DRM_COPY_FROM_USER(&value, value_ptr, sizeof(value)))
+	if (DRM_COPY_FROM_USER(&value, value_ptr, sizeof(value))) {
+		DRM_ERROR("copy_from_user %s:%u\n", __func__, __LINE__);
 		return -EFAULT;
+	}
 
 	switch (info->request) {
 	case RADEON_INFO_DEVICE_ID:
@@ -291,7 +360,7 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		return -EINVAL;
 	}
 	if (DRM_COPY_TO_USER(value_ptr, &value, sizeof(uint32_t))) {
-		DRM_ERROR("copy_to_user\n");
+		DRM_ERROR("copy_to_user %s:%u\n", __func__, __LINE__);
 		return -EFAULT;
 	}
 	return 0;
@@ -301,16 +370,40 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 /*
  * Outdated mess for old drm with Xorg being in charge (void function now).
  */
+/**
+ * radeon_driver_firstopen_kms - drm callback for first open
+ *
+ * @dev: drm dev pointer
+ *
+ * Nothing to be done for KMS (all asics).
+ * Returns 0 on success.
+ */
 int radeon_driver_firstopen_kms(struct drm_device *dev)
 {
 	return 0;
 }
 
+/**
+ * radeon_driver_firstopen_kms - drm callback for last close
+ *
+ * @dev: drm dev pointer
+ *
+ * Switch vga switcheroo state after last close (all asics).
+ */
 void radeon_driver_lastclose_kms(struct drm_device *dev)
 {
 	vga_switcheroo_process_delayed_switch();
 }
 
+/**
+ * radeon_driver_open_kms - drm callback for open
+ *
+ * @dev: drm dev pointer
+ * @file_priv: drm file
+ *
+ * On device open, init vm on cayman+ (all asics).
+ * Returns 0 on success, error on failure.
+ */
 int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -339,6 +432,14 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	return 0;
 }
 
+/**
+ * radeon_driver_postclose_kms - drm callback for post close
+ *
+ * @dev: drm dev pointer
+ * @file_priv: drm file
+ *
+ * On device post close, tear down vm on cayman+ (all asics).
+ */
 void radeon_driver_postclose_kms(struct drm_device *dev,
 				 struct drm_file *file_priv)
 {
@@ -354,6 +455,15 @@ void radeon_driver_postclose_kms(struct drm_device *dev,
 	}
 }
 
+/**
+ * radeon_driver_preclose_kms - drm callback for pre close
+ *
+ * @dev: drm dev pointer
+ * @file_priv: drm file
+ *
+ * On device pre close, tear down hyperz and cmask filps on r1xx-r5xx
+ * (all asics).
+ */
 void radeon_driver_preclose_kms(struct drm_device *dev,
 				struct drm_file *file_priv)
 {
@@ -367,6 +477,15 @@ void radeon_driver_preclose_kms(struct drm_device *dev,
 /*
  * VBlank related functions.
  */
+/**
+ * radeon_get_vblank_counter_kms - get frame count
+ *
+ * @dev: drm dev pointer
+ * @crtc: crtc to get the frame count from
+ *
+ * Gets the frame count on the requested crtc (all asics).
+ * Returns frame count on success, -EINVAL on failure.
+ */
 u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc)
 {
 	struct radeon_device *rdev = dev->dev_private;
@@ -379,34 +498,70 @@ u32 radeon_get_vblank_counter_kms(struct drm_device *dev, int crtc)
 	return radeon_get_vblank_counter(rdev, crtc);
 }
 
+/**
+ * radeon_enable_vblank_kms - enable vblank interrupt
+ *
+ * @dev: drm dev pointer
+ * @crtc: crtc to enable vblank interrupt for
+ *
+ * Enable the interrupt on the requested crtc (all asics).
+ * Returns 0 on success, -EINVAL on failure.
+ */
 int radeon_enable_vblank_kms(struct drm_device *dev, int crtc)
 {
 	struct radeon_device *rdev = dev->dev_private;
+	unsigned long irqflags;
+	int r;
 
 	if (crtc < 0 || crtc >= rdev->num_crtc) {
 		DRM_ERROR("Invalid crtc %d\n", crtc);
 		return -EINVAL;
 	}
 
+	spin_lock_irqsave(&rdev->irq.lock, irqflags);
 	rdev->irq.crtc_vblank_int[crtc] = true;
-
-	return radeon_irq_set(rdev);
+	r = radeon_irq_set(rdev);
+	spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
+	return r;
 }
 
+/**
+ * radeon_disable_vblank_kms - disable vblank interrupt
+ *
+ * @dev: drm dev pointer
+ * @crtc: crtc to disable vblank interrupt for
+ *
+ * Disable the interrupt on the requested crtc (all asics).
+ */
 void radeon_disable_vblank_kms(struct drm_device *dev, int crtc)
 {
 	struct radeon_device *rdev = dev->dev_private;
+	unsigned long irqflags;
 
 	if (crtc < 0 || crtc >= rdev->num_crtc) {
 		DRM_ERROR("Invalid crtc %d\n", crtc);
 		return;
 	}
 
+	spin_lock_irqsave(&rdev->irq.lock, irqflags);
 	rdev->irq.crtc_vblank_int[crtc] = false;
-
 	radeon_irq_set(rdev);
+	spin_unlock_irqrestore(&rdev->irq.lock, irqflags);
 }
 
+/**
+ * radeon_get_vblank_timestamp_kms - get vblank timestamp
+ *
+ * @dev: drm dev pointer
+ * @crtc: crtc to get the timestamp for
+ * @max_error: max error
+ * @vblank_time: time value
+ * @flags: flags passed to the driver
+ *
+ * Gets the timestamp on the requested crtc based on the
+ * scanout position.  (all asics).
+ * Returns postive status flags on success, negative error on failure.
+ */
 int radeon_get_vblank_timestamp_kms(struct drm_device *dev, int crtc,
 				    int *max_error,
 				    struct timeval *vblank_time,
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index 210317c7045e..94b4a1c12893 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -990,7 +990,7 @@ static void radeon_set_pll(struct drm_crtc *crtc, struct drm_display_mode *mode)
 }
 
 static bool radeon_crtc_mode_fixup(struct drm_crtc *crtc,
-				   struct drm_display_mode *mode,
+				   const struct drm_display_mode *mode,
 				   struct drm_display_mode *adjusted_mode)
 {
 	if (!radeon_crtc_scaling_mode_fixup(crtc, mode, adjusted_mode))
@@ -1025,9 +1025,11 @@ static int radeon_crtc_mode_set(struct drm_crtc *crtc,
 
 static void radeon_crtc_prepare(struct drm_crtc *crtc)
 {
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct drm_crtc *crtci;
 
+	radeon_crtc->in_mode_set = true;
 	/*
 	* The hardware wedges sometimes if you reconfigure one CRTC
 	* whilst another is running (see fdo bug #24611).
@@ -1038,6 +1040,7 @@ static void radeon_crtc_prepare(struct drm_crtc *crtc)
 
 static void radeon_crtc_commit(struct drm_crtc *crtc)
 {
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct drm_crtc *crtci;
 
@@ -1048,6 +1051,7 @@ static void radeon_crtc_commit(struct drm_crtc *crtc)
 		if (crtci->enabled)
 			radeon_crtc_dpms(crtci, DRM_MODE_DPMS_ON);
 	}
+	radeon_crtc->in_mode_set = false;
 }
 
 static const struct drm_crtc_helper_funcs legacy_helper_funcs = {
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
index a0c82229e8f0..670e9910f869 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_encoders.c
@@ -244,7 +244,7 @@ static void radeon_legacy_lvds_mode_set(struct drm_encoder *encoder,
 }
 
 static bool radeon_legacy_mode_fixup(struct drm_encoder *encoder,
-				     struct drm_display_mode *mode,
+				     const struct drm_display_mode *mode,
 				     struct drm_display_mode *adjusted_mode)
 {
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 5b10ffd7bb2f..d56978949f34 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -275,6 +275,7 @@ struct radeon_crtc {
 	u16 lut_r[256], lut_g[256], lut_b[256];
 	bool enabled;
 	bool can_tile;
+	bool in_mode_set;
 	uint32_t crtc_offset;
 	struct drm_gem_object *cursor_bo;
 	uint64_t cursor_addr;
@@ -488,7 +489,7 @@ extern void radeon_connector_hotplug(struct drm_connector *connector);
 extern int radeon_dp_mode_valid_helper(struct drm_connector *connector,
 				       struct drm_display_mode *mode);
 extern void radeon_dp_set_link_config(struct drm_connector *connector,
-				      struct drm_display_mode *mode);
+				      const struct drm_display_mode *mode);
 extern void radeon_dp_link_train(struct drm_encoder *encoder,
 				 struct drm_connector *connector);
 extern bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector);
@@ -678,7 +679,7 @@ void radeon_enc_destroy(struct drm_encoder *encoder);
 void radeon_copy_fb(struct drm_device *dev, struct drm_gem_object *dst_obj);
 void radeon_combios_asic_init(struct drm_device *dev);
 bool radeon_crtc_scaling_mode_fixup(struct drm_crtc *crtc,
-					struct drm_display_mode *mode,
+					const struct drm_display_mode *mode,
 					struct drm_display_mode *adjusted_mode);
 void radeon_panel_mode_fixup(struct drm_encoder *encoder,
 			     struct drm_display_mode *adjusted_mode);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 830f1a7b486f..9024e7222839 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -52,11 +52,7 @@ void radeon_bo_clear_va(struct radeon_bo *bo)
 
 	list_for_each_entry_safe(bo_va, tmp, &bo->va, bo_list) {
 		/* remove from all vm address space */
-		mutex_lock(&bo_va->vm->mutex);
-		list_del(&bo_va->vm_list);
-		mutex_unlock(&bo_va->vm->mutex);
-		list_del(&bo_va->bo_list);
-		kfree(bo_va);
+		radeon_vm_bo_rmv(bo->rdev, bo_va->vm, bo);
 	}
 }
 
@@ -115,9 +111,7 @@ int radeon_bo_create(struct radeon_device *rdev,
 
 	size = ALIGN(size, PAGE_SIZE);
 
-	if (unlikely(rdev->mman.bdev.dev_mapping == NULL)) {
-		rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping;
-	}
+	rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping;
 	if (kernel) {
 		type = ttm_bo_type_kernel;
 	} else if (sg) {
@@ -154,11 +148,11 @@ retry:
 	INIT_LIST_HEAD(&bo->va);
 	radeon_ttm_placement_from_domain(bo, domain);
 	/* Kernel allocation are uninterruptible */
-	mutex_lock(&rdev->vram_mutex);
+	down_read(&rdev->pm.mclk_lock);
 	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
 			&bo->placement, page_align, 0, !kernel, NULL,
 			acc_size, sg, &radeon_ttm_bo_destroy);
-	mutex_unlock(&rdev->vram_mutex);
+	up_read(&rdev->pm.mclk_lock);
 	if (unlikely(r != 0)) {
 		if (r != -ERESTARTSYS) {
 			if (domain == RADEON_GEM_DOMAIN_VRAM) {
@@ -219,9 +213,9 @@ void radeon_bo_unref(struct radeon_bo **bo)
 		return;
 	rdev = (*bo)->rdev;
 	tbo = &((*bo)->tbo);
-	mutex_lock(&rdev->vram_mutex);
+	down_read(&rdev->pm.mclk_lock);
 	ttm_bo_unref(&tbo);
-	mutex_unlock(&rdev->vram_mutex);
+	up_read(&rdev->pm.mclk_lock);
 	if (tbo == NULL)
 		*bo = NULL;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c
index 5b37e283ec38..7ae606600107 100644
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -34,7 +34,6 @@
 #define RADEON_IDLE_LOOP_MS 100
 #define RADEON_RECLOCK_DELAY_MS 200
 #define RADEON_WAIT_VBLANK_TIMEOUT 200
-#define RADEON_WAIT_IDLE_TIMEOUT 200
 
 static const char *radeon_pm_state_type_name[5] = {
 	"Default",
@@ -251,21 +250,14 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev)
 		return;
 
 	mutex_lock(&rdev->ddev->struct_mutex);
-	mutex_lock(&rdev->vram_mutex);
+	down_write(&rdev->pm.mclk_lock);
 	mutex_lock(&rdev->ring_lock);
 
 	/* gui idle int has issues on older chips it seems */
 	if (rdev->family >= CHIP_R600) {
 		if (rdev->irq.installed) {
-			/* wait for GPU idle */
-			rdev->pm.gui_idle = false;
-			rdev->irq.gui_idle = true;
-			radeon_irq_set(rdev);
-			wait_event_interruptible_timeout(
-				rdev->irq.idle_queue, rdev->pm.gui_idle,
-				msecs_to_jiffies(RADEON_WAIT_IDLE_TIMEOUT));
-			rdev->irq.gui_idle = false;
-			radeon_irq_set(rdev);
+			/* wait for GPU to become idle */
+			radeon_irq_kms_wait_gui_idle(rdev);
 		}
 	} else {
 		struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
@@ -303,7 +295,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev)
 	rdev->pm.dynpm_planned_action = DYNPM_ACTION_NONE;
 
 	mutex_unlock(&rdev->ring_lock);
-	mutex_unlock(&rdev->vram_mutex);
+	up_write(&rdev->pm.mclk_lock);
 	mutex_unlock(&rdev->ddev->struct_mutex);
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 983658c91358..43c431a2686d 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -35,47 +35,97 @@
 #include "atom.h"
 
 /*
- * IB.
+ * IB
+ * IBs (Indirect Buffers) and areas of GPU accessible memory where
+ * commands are stored.  You can put a pointer to the IB in the
+ * command ring and the hw will fetch the commands from the IB
+ * and execute them.  Generally userspace acceleration drivers
+ * produce command buffers which are send to the kernel and
+ * put in IBs for execution by the requested ring.
  */
 int radeon_debugfs_sa_init(struct radeon_device *rdev);
 
+/**
+ * radeon_ib_get - request an IB (Indirect Buffer)
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring index the IB is associated with
+ * @ib: IB object returned
+ * @size: requested IB size
+ *
+ * Request an IB (all asics).  IBs are allocated using the
+ * suballocator.
+ * Returns 0 on success, error on failure.
+ */
 int radeon_ib_get(struct radeon_device *rdev, int ring,
 		  struct radeon_ib *ib, unsigned size)
 {
-	int r;
+	int i, r;
 
 	r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true);
 	if (r) {
 		dev_err(rdev->dev, "failed to get a new IB (%d)\n", r);
 		return r;
 	}
-	r = radeon_fence_create(rdev, &ib->fence, ring);
+
+	r = radeon_semaphore_create(rdev, &ib->semaphore);
 	if (r) {
-		dev_err(rdev->dev, "failed to create fence for new IB (%d)\n", r);
-		radeon_sa_bo_free(rdev, &ib->sa_bo, NULL);
 		return r;
 	}
 
+	ib->ring = ring;
+	ib->fence = NULL;
 	ib->ptr = radeon_sa_bo_cpu_addr(ib->sa_bo);
 	ib->gpu_addr = radeon_sa_bo_gpu_addr(ib->sa_bo);
 	ib->vm_id = 0;
 	ib->is_const_ib = false;
-	ib->semaphore = NULL;
+	for (i = 0; i < RADEON_NUM_RINGS; ++i)
+		ib->sync_to[i] = NULL;
 
 	return 0;
 }
 
+/**
+ * radeon_ib_free - free an IB (Indirect Buffer)
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to free
+ *
+ * Free an IB (all asics).
+ */
 void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib *ib)
 {
-	radeon_semaphore_free(rdev, ib->semaphore, ib->fence);
+	radeon_semaphore_free(rdev, &ib->semaphore, ib->fence);
 	radeon_sa_bo_free(rdev, &ib->sa_bo, ib->fence);
 	radeon_fence_unref(&ib->fence);
 }
 
-int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
+/**
+ * radeon_ib_schedule - schedule an IB (Indirect Buffer) on the ring
+ *
+ * @rdev: radeon_device pointer
+ * @ib: IB object to schedule
+ * @const_ib: Const IB to schedule (SI only)
+ *
+ * Schedule an IB on the associated ring (all asics).
+ * Returns 0 on success, error on failure.
+ *
+ * On SI, there are two parallel engines fed from the primary ring,
+ * the CE (Constant Engine) and the DE (Drawing Engine).  Since
+ * resource descriptors have moved to memory, the CE allows you to
+ * prime the caches while the DE is updating register state so that
+ * the resource descriptors will be already in cache when the draw is
+ * processed.  To accomplish this, the userspace driver submits two
+ * IBs, one for the CE and one for the DE.  If there is a CE IB (called
+ * a CONST_IB), it will be put on the ring prior to the DE IB.  Prior
+ * to SI there was just a DE IB.
+ */
+int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
+		       struct radeon_ib *const_ib)
 {
-	struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
-	int r = 0;
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
+	bool need_sync = false;
+	int i, r = 0;
 
 	if (!ib->length_dw || !ring->ready) {
 		/* TODO: Nothings in the ib we should report. */
@@ -84,17 +134,51 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
 	}
 
 	/* 64 dwords should be enough for fence too */
-	r = radeon_ring_lock(rdev, ring, 64);
+	r = radeon_ring_lock(rdev, ring, 64 + RADEON_NUM_RINGS * 8);
 	if (r) {
 		dev_err(rdev->dev, "scheduling IB failed (%d).\n", r);
 		return r;
 	}
-	radeon_ring_ib_execute(rdev, ib->fence->ring, ib);
-	radeon_fence_emit(rdev, ib->fence);
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		struct radeon_fence *fence = ib->sync_to[i];
+		if (radeon_fence_need_sync(fence, ib->ring)) {
+			need_sync = true;
+			radeon_semaphore_sync_rings(rdev, ib->semaphore,
+						    fence->ring, ib->ring);
+			radeon_fence_note_sync(fence, ib->ring);
+		}
+	}
+	/* immediately free semaphore when we don't need to sync */
+	if (!need_sync) {
+		radeon_semaphore_free(rdev, &ib->semaphore, NULL);
+	}
+	if (const_ib) {
+		radeon_ring_ib_execute(rdev, const_ib->ring, const_ib);
+		radeon_semaphore_free(rdev, &const_ib->semaphore, NULL);
+	}
+	radeon_ring_ib_execute(rdev, ib->ring, ib);
+	r = radeon_fence_emit(rdev, &ib->fence, ib->ring);
+	if (r) {
+		dev_err(rdev->dev, "failed to emit fence for new IB (%d)\n", r);
+		radeon_ring_unlock_undo(rdev, ring);
+		return r;
+	}
+	if (const_ib) {
+		const_ib->fence = radeon_fence_ref(ib->fence);
+	}
 	radeon_ring_unlock_commit(rdev, ring);
 	return 0;
 }
 
+/**
+ * radeon_ib_pool_init - Init the IB (Indirect Buffer) pool
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Initialize the suballocator to manage a pool of memory
+ * for use as IBs (all asics).
+ * Returns 0 on success, error on failure.
+ */
 int radeon_ib_pool_init(struct radeon_device *rdev)
 {
 	int r;
@@ -108,6 +192,12 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
 	if (r) {
 		return r;
 	}
+
+	r = radeon_sa_bo_manager_start(rdev, &rdev->ring_tmp_bo);
+	if (r) {
+		return r;
+	}
+
 	rdev->ib_pool_ready = true;
 	if (radeon_debugfs_sa_init(rdev)) {
 		dev_err(rdev->dev, "failed to register debugfs file for SA\n");
@@ -115,24 +205,33 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
 	return 0;
 }
 
+/**
+ * radeon_ib_pool_fini - Free the IB (Indirect Buffer) pool
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tear down the suballocator managing the pool of memory
+ * for use as IBs (all asics).
+ */
 void radeon_ib_pool_fini(struct radeon_device *rdev)
 {
 	if (rdev->ib_pool_ready) {
+		radeon_sa_bo_manager_suspend(rdev, &rdev->ring_tmp_bo);
 		radeon_sa_bo_manager_fini(rdev, &rdev->ring_tmp_bo);
 		rdev->ib_pool_ready = false;
 	}
 }
 
-int radeon_ib_pool_start(struct radeon_device *rdev)
-{
-	return radeon_sa_bo_manager_start(rdev, &rdev->ring_tmp_bo);
-}
-
-int radeon_ib_pool_suspend(struct radeon_device *rdev)
-{
-	return radeon_sa_bo_manager_suspend(rdev, &rdev->ring_tmp_bo);
-}
-
+/**
+ * radeon_ib_ring_tests - test IBs on the rings
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Test an IB (Indirect Buffer) on each ring.
+ * If the test fails, disable the ring.
+ * Returns 0 on success, error if the primary GFX ring
+ * IB test fails.
+ */
 int radeon_ib_ring_tests(struct radeon_device *rdev)
 {
 	unsigned i;
@@ -164,10 +263,28 @@ int radeon_ib_ring_tests(struct radeon_device *rdev)
 }
 
 /*
- * Ring.
+ * Rings
+ * Most engines on the GPU are fed via ring buffers.  Ring
+ * buffers are areas of GPU accessible memory that the host
+ * writes commands into and the GPU reads commands out of.
+ * There is a rptr (read pointer) that determines where the
+ * GPU is currently reading, and a wptr (write pointer)
+ * which determines where the host has written.  When the
+ * pointers are equal, the ring is idle.  When the host
+ * writes commands to the ring buffer, it increments the
+ * wptr.  The GPU then starts fetching commands and executes
+ * them until the pointers are equal again.
  */
 int radeon_debugfs_ring_init(struct radeon_device *rdev, struct radeon_ring *ring);
 
+/**
+ * radeon_ring_write - write a value to the ring
+ *
+ * @ring: radeon_ring structure holding ring information
+ * @v: dword (dw) value to write
+ *
+ * Write a value to the requested ring buffer (all asics).
+ */
 void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
 {
 #if DRM_DEBUG_CODE
@@ -181,21 +298,37 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v)
 	ring->ring_free_dw--;
 }
 
-int radeon_ring_index(struct radeon_device *rdev, struct radeon_ring *ring)
+/**
+ * radeon_ring_supports_scratch_reg - check if the ring supports
+ * writing to scratch registers
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Check if a specific ring supports writing to scratch registers (all asics).
+ * Returns true if the ring supports writing to scratch regs, false if not.
+ */
+bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev,
+				      struct radeon_ring *ring)
 {
-	/* r1xx-r5xx only has CP ring */
-	if (rdev->family < CHIP_R600)
-		return RADEON_RING_TYPE_GFX_INDEX;
-
-	if (rdev->family >= CHIP_CAYMAN) {
-		if (ring == &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX])
-			return CAYMAN_RING_TYPE_CP1_INDEX;
-		else if (ring == &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX])
-			return CAYMAN_RING_TYPE_CP2_INDEX;
+	switch (ring->idx) {
+	case RADEON_RING_TYPE_GFX_INDEX:
+	case CAYMAN_RING_TYPE_CP1_INDEX:
+	case CAYMAN_RING_TYPE_CP2_INDEX:
+		return true;
+	default:
+		return false;
 	}
-	return RADEON_RING_TYPE_GFX_INDEX;
 }
 
+/**
+ * radeon_ring_free_size - update the free size
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Update the free dw slots in the ring buffer (all asics).
+ */
 void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	u32 rptr;
@@ -214,7 +347,16 @@ void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring)
 	}
 }
 
-
+/**
+ * radeon_ring_alloc - allocate space on the ring buffer
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ * @ndw: number of dwords to allocate in the ring buffer
+ *
+ * Allocate @ndw dwords in the ring buffer (all asics).
+ * Returns 0 on success, error on failure.
+ */
 int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ndw)
 {
 	int r;
@@ -227,7 +369,7 @@ int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsi
 		if (ndw < ring->ring_free_dw) {
 			break;
 		}
-		r = radeon_fence_wait_next_locked(rdev, radeon_ring_index(rdev, ring));
+		r = radeon_fence_wait_next_locked(rdev, ring->idx);
 		if (r)
 			return r;
 	}
@@ -236,6 +378,17 @@ int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsi
 	return 0;
 }
 
+/**
+ * radeon_ring_lock - lock the ring and allocate space on it
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ * @ndw: number of dwords to allocate in the ring buffer
+ *
+ * Lock the ring and allocate @ndw dwords in the ring buffer
+ * (all asics).
+ * Returns 0 on success, error on failure.
+ */
 int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ndw)
 {
 	int r;
@@ -249,15 +402,20 @@ int radeon_ring_lock(struct radeon_device *rdev, struct radeon_ring *ring, unsig
 	return 0;
 }
 
+/**
+ * radeon_ring_commit - tell the GPU to execute the new
+ * commands on the ring buffer
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Update the wptr (write pointer) to tell the GPU to
+ * execute new commands on the ring buffer (all asics).
+ */
 void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring)
 {
-	unsigned count_dw_pad;
-	unsigned i;
-
 	/* We pad to match fetch size */
-	count_dw_pad = (ring->align_mask + 1) -
-		       (ring->wptr & ring->align_mask);
-	for (i = 0; i < count_dw_pad; i++) {
+	while (ring->wptr & ring->align_mask) {
 		radeon_ring_write(ring, ring->nop);
 	}
 	DRM_MEMORYBARRIER();
@@ -265,23 +423,55 @@ void radeon_ring_commit(struct radeon_device *rdev, struct radeon_ring *ring)
 	(void)RREG32(ring->wptr_reg);
 }
 
+/**
+ * radeon_ring_unlock_commit - tell the GPU to execute the new
+ * commands on the ring buffer and unlock it
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Call radeon_ring_commit() then unlock the ring (all asics).
+ */
 void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	radeon_ring_commit(rdev, ring);
 	mutex_unlock(&rdev->ring_lock);
 }
 
+/**
+ * radeon_ring_undo - reset the wptr
+ *
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Reset the driver's copy of the wtpr (all asics).
+ */
 void radeon_ring_undo(struct radeon_ring *ring)
 {
 	ring->wptr = ring->wptr_old;
 }
 
+/**
+ * radeon_ring_unlock_undo - reset the wptr and unlock the ring
+ *
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Call radeon_ring_undo() then unlock the ring (all asics).
+ */
 void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	radeon_ring_undo(ring);
 	mutex_unlock(&rdev->ring_lock);
 }
 
+/**
+ * radeon_ring_force_activity - add some nop packets to the ring
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Add some nop packets to the ring to force activity (all asics).
+ * Used for lockup detection to see if the rptr is advancing.
+ */
 void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	int r;
@@ -296,6 +486,13 @@ void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *
 	}
 }
 
+/**
+ * radeon_ring_force_activity - update lockup variables
+ *
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Update the last rptr value and timestamp (all asics).
+ */
 void radeon_ring_lockup_update(struct radeon_ring *ring)
 {
 	ring->last_rptr = ring->rptr;
@@ -349,6 +546,116 @@ bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *rin
 	return false;
 }
 
+/**
+ * radeon_ring_backup - Back up the content of a ring
+ *
+ * @rdev: radeon_device pointer
+ * @ring: the ring we want to back up
+ *
+ * Saves all unprocessed commits from a ring, returns the number of dwords saved.
+ */
+unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring,
+			    uint32_t **data)
+{
+	unsigned size, ptr, i;
+
+	/* just in case lock the ring */
+	mutex_lock(&rdev->ring_lock);
+	*data = NULL;
+
+	if (ring->ring_obj == NULL) {
+		mutex_unlock(&rdev->ring_lock);
+		return 0;
+	}
+
+	/* it doesn't make sense to save anything if all fences are signaled */
+	if (!radeon_fence_count_emitted(rdev, ring->idx)) {
+		mutex_unlock(&rdev->ring_lock);
+		return 0;
+	}
+
+	/* calculate the number of dw on the ring */
+	if (ring->rptr_save_reg)
+		ptr = RREG32(ring->rptr_save_reg);
+	else if (rdev->wb.enabled)
+		ptr = le32_to_cpu(*ring->next_rptr_cpu_addr);
+	else {
+		/* no way to read back the next rptr */
+		mutex_unlock(&rdev->ring_lock);
+		return 0;
+	}
+
+	size = ring->wptr + (ring->ring_size / 4);
+	size -= ptr;
+	size &= ring->ptr_mask;
+	if (size == 0) {
+		mutex_unlock(&rdev->ring_lock);
+		return 0;
+	}
+
+	/* and then save the content of the ring */
+	*data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
+	if (!*data) {
+		mutex_unlock(&rdev->ring_lock);
+		return 0;
+	}
+	for (i = 0; i < size; ++i) {
+		(*data)[i] = ring->ring[ptr++];
+		ptr &= ring->ptr_mask;
+	}
+
+	mutex_unlock(&rdev->ring_lock);
+	return size;
+}
+
+/**
+ * radeon_ring_restore - append saved commands to the ring again
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring to append commands to
+ * @size: number of dwords we want to write
+ * @data: saved commands
+ *
+ * Allocates space on the ring and restore the previously saved commands.
+ */
+int radeon_ring_restore(struct radeon_device *rdev, struct radeon_ring *ring,
+			unsigned size, uint32_t *data)
+{
+	int i, r;
+
+	if (!size || !data)
+		return 0;
+
+	/* restore the saved ring content */
+	r = radeon_ring_lock(rdev, ring, size);
+	if (r)
+		return r;
+
+	for (i = 0; i < size; ++i) {
+		radeon_ring_write(ring, data[i]);
+	}
+
+	radeon_ring_unlock_commit(rdev, ring);
+	kfree(data);
+	return 0;
+}
+
+/**
+ * radeon_ring_init - init driver ring struct.
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ * @ring_size: size of the ring
+ * @rptr_offs: offset of the rptr writeback location in the WB buffer
+ * @rptr_reg: MMIO offset of the rptr register
+ * @wptr_reg: MMIO offset of the wptr register
+ * @ptr_reg_shift: bit offset of the rptr/wptr values
+ * @ptr_reg_mask: bit mask of the rptr/wptr values
+ * @nop: nop packet for this ring
+ *
+ * Initialize the driver information for the selected ring (all asics).
+ * Returns 0 on success, error on failure.
+ */
 int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsigned ring_size,
 		     unsigned rptr_offs, unsigned rptr_reg, unsigned wptr_reg,
 		     u32 ptr_reg_shift, u32 ptr_reg_mask, u32 nop)
@@ -391,12 +698,26 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig
 	}
 	ring->ptr_mask = (ring->ring_size / 4) - 1;
 	ring->ring_free_dw = ring->ring_size / 4;
+	if (rdev->wb.enabled) {
+		u32 index = RADEON_WB_RING0_NEXT_RPTR + (ring->idx * 4);
+		ring->next_rptr_gpu_addr = rdev->wb.gpu_addr + index;
+		ring->next_rptr_cpu_addr = &rdev->wb.wb[index/4];
+	}
 	if (radeon_debugfs_ring_init(rdev, ring)) {
 		DRM_ERROR("Failed to register debugfs file for rings !\n");
 	}
+	radeon_ring_lockup_update(ring);
 	return 0;
 }
 
+/**
+ * radeon_ring_fini - tear down the driver ring struct.
+ *
+ * @rdev: radeon_device pointer
+ * @ring: radeon_ring structure holding ring information
+ *
+ * Tear down the driver information for the selected ring (all asics).
+ */
 void radeon_ring_fini(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	int r;
@@ -438,6 +759,10 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data)
 	count = (ring->ring_size / 4) - ring->ring_free_dw;
 	seq_printf(m, "wptr(0x%04x): 0x%08x\n", ring->wptr_reg, RREG32(ring->wptr_reg));
 	seq_printf(m, "rptr(0x%04x): 0x%08x\n", ring->rptr_reg, RREG32(ring->rptr_reg));
+	if (ring->rptr_save_reg) {
+		seq_printf(m, "rptr next(0x%04x): 0x%08x\n", ring->rptr_save_reg,
+			   RREG32(ring->rptr_save_reg));
+	}
 	seq_printf(m, "driver's copy of the wptr: 0x%08x\n", ring->wptr);
 	seq_printf(m, "driver's copy of the rptr: 0x%08x\n", ring->rptr);
 	seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
index 32059b745728..4e771240fdd0 100644
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@@ -54,7 +54,7 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev,
 {
 	int i, r;
 
-	spin_lock_init(&sa_manager->lock);
+	init_waitqueue_head(&sa_manager->wq);
 	sa_manager->bo = NULL;
 	sa_manager->size = size;
 	sa_manager->domain = domain;
@@ -211,6 +211,39 @@ static bool radeon_sa_bo_try_alloc(struct radeon_sa_manager *sa_manager,
 	return false;
 }
 
+/**
+ * radeon_sa_event - Check if we can stop waiting
+ *
+ * @sa_manager: pointer to the sa_manager
+ * @size: number of bytes we want to allocate
+ * @align: alignment we need to match
+ *
+ * Check if either there is a fence we can wait for or
+ * enough free memory to satisfy the allocation directly
+ */
+static bool radeon_sa_event(struct radeon_sa_manager *sa_manager,
+			    unsigned size, unsigned align)
+{
+	unsigned soffset, eoffset, wasted;
+	int i;
+
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		if (!list_empty(&sa_manager->flist[i])) {
+			return true;
+		}
+	}
+
+	soffset = radeon_sa_bo_hole_soffset(sa_manager);
+	eoffset = radeon_sa_bo_hole_eoffset(sa_manager);
+	wasted = (align - (soffset % align)) % align;
+
+	if ((eoffset - soffset) >= (size + wasted)) {
+		return true;
+	}
+
+	return false;
+}
+
 static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager,
 				   struct radeon_fence **fences,
 				   unsigned *tries)
@@ -297,8 +330,8 @@ int radeon_sa_bo_new(struct radeon_device *rdev,
 	INIT_LIST_HEAD(&(*sa_bo)->olist);
 	INIT_LIST_HEAD(&(*sa_bo)->flist);
 
-	spin_lock(&sa_manager->lock);
-	do {
+	spin_lock(&sa_manager->wq.lock);
+	while(1) {
 		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 			fences[i] = NULL;
 			tries[i] = 0;
@@ -309,30 +342,34 @@ int radeon_sa_bo_new(struct radeon_device *rdev,
 
 			if (radeon_sa_bo_try_alloc(sa_manager, *sa_bo,
 						   size, align)) {
-				spin_unlock(&sa_manager->lock);
+				spin_unlock(&sa_manager->wq.lock);
 				return 0;
 			}
 
 			/* see if we can skip over some allocations */
 		} while (radeon_sa_bo_next_hole(sa_manager, fences, tries));
 
-		if (block) {
-			spin_unlock(&sa_manager->lock);
-			r = radeon_fence_wait_any(rdev, fences, false);
-			spin_lock(&sa_manager->lock);
-			if (r) {
-				/* if we have nothing to wait for we
-				   are practically out of memory */
-				if (r == -ENOENT) {
-					r = -ENOMEM;
-				}
-				goto out_err;
-			}
+		if (!block) {
+			break;
+		}
+
+		spin_unlock(&sa_manager->wq.lock);
+		r = radeon_fence_wait_any(rdev, fences, false);
+		spin_lock(&sa_manager->wq.lock);
+		/* if we have nothing to wait for block */
+		if (r == -ENOENT) {
+			r = wait_event_interruptible_locked(
+				sa_manager->wq, 
+				radeon_sa_event(sa_manager, size, align)
+			);
+		}
+		if (r) {
+			goto out_err;
 		}
-	} while (block);
+	};
 
 out_err:
-	spin_unlock(&sa_manager->lock);
+	spin_unlock(&sa_manager->wq.lock);
 	kfree(*sa_bo);
 	*sa_bo = NULL;
 	return r;
@@ -348,15 +385,16 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo,
 	}
 
 	sa_manager = (*sa_bo)->manager;
-	spin_lock(&sa_manager->lock);
-	if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {
+	spin_lock(&sa_manager->wq.lock);
+	if (fence && !radeon_fence_signaled(fence)) {
 		(*sa_bo)->fence = radeon_fence_ref(fence);
 		list_add_tail(&(*sa_bo)->flist,
 			      &sa_manager->flist[fence->ring]);
 	} else {
 		radeon_sa_bo_remove_locked(*sa_bo);
 	}
-	spin_unlock(&sa_manager->lock);
+	wake_up_all_locked(&sa_manager->wq);
+	spin_unlock(&sa_manager->wq.lock);
 	*sa_bo = NULL;
 }
 
@@ -366,7 +404,7 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
 {
 	struct radeon_sa_bo *i;
 
-	spin_lock(&sa_manager->lock);
+	spin_lock(&sa_manager->wq.lock);
 	list_for_each_entry(i, &sa_manager->olist, olist) {
 		if (&i->olist == sa_manager->hole) {
 			seq_printf(m, ">");
@@ -381,6 +419,6 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
 		}
 		seq_printf(m, "\n");
 	}
-	spin_unlock(&sa_manager->lock);
+	spin_unlock(&sa_manager->wq.lock);
 }
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
index e2ace5dce117..7cc78de6ddc3 100644
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -68,70 +68,49 @@ void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
 	radeon_semaphore_ring_emit(rdev, ring, &rdev->ring[ring], semaphore, true);
 }
 
+/* caller must hold ring lock */
 int radeon_semaphore_sync_rings(struct radeon_device *rdev,
 				struct radeon_semaphore *semaphore,
-				bool sync_to[RADEON_NUM_RINGS],
-				int dst_ring)
+				int signaler, int waiter)
 {
-	int i = 0, r;
+	int r;
 
-	mutex_lock(&rdev->ring_lock);
-	r = radeon_ring_alloc(rdev, &rdev->ring[dst_ring], RADEON_NUM_RINGS * 8);
-	if (r) {
-		goto error;
+	/* no need to signal and wait on the same ring */
+	if (signaler == waiter) {
+		return 0;
 	}
 
-	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-		/* no need to sync to our own or unused rings */
-		if (!sync_to[i] || i == dst_ring)
-			continue;
-
-		/* prevent GPU deadlocks */
-		if (!rdev->ring[i].ready) {
-			dev_err(rdev->dev, "Trying to sync to a disabled ring!");
-			r = -EINVAL;
-			goto error;
-		}
-
-		r = radeon_ring_alloc(rdev, &rdev->ring[i], 8);
-		if (r) {
-			goto error;
-		}
-
-		radeon_semaphore_emit_signal(rdev, i, semaphore);
-		radeon_semaphore_emit_wait(rdev, dst_ring, semaphore);
+	/* prevent GPU deadlocks */
+	if (!rdev->ring[signaler].ready) {
+		dev_err(rdev->dev, "Trying to sync to a disabled ring!");
+		return -EINVAL;
+	}
 
-		radeon_ring_commit(rdev, &rdev->ring[i]);
+	r = radeon_ring_alloc(rdev, &rdev->ring[signaler], 8);
+	if (r) {
+		return r;
 	}
+	radeon_semaphore_emit_signal(rdev, signaler, semaphore);
+	radeon_ring_commit(rdev, &rdev->ring[signaler]);
 
-	radeon_ring_commit(rdev, &rdev->ring[dst_ring]);
-	mutex_unlock(&rdev->ring_lock);
+	/* we assume caller has already allocated space on waiters ring */
+	radeon_semaphore_emit_wait(rdev, waiter, semaphore);
 
 	return 0;
-
-error:
-	/* unlock all locks taken so far */
-	for (--i; i >= 0; --i) {
-		if (sync_to[i] || i == dst_ring) {
-			radeon_ring_undo(&rdev->ring[i]);
-		}
-	}
-	radeon_ring_undo(&rdev->ring[dst_ring]);
-	mutex_unlock(&rdev->ring_lock);
-	return r;
 }
 
 void radeon_semaphore_free(struct radeon_device *rdev,
-			   struct radeon_semaphore *semaphore,
+			   struct radeon_semaphore **semaphore,
 			   struct radeon_fence *fence)
 {
-	if (semaphore == NULL) {
+	if (semaphore == NULL || *semaphore == NULL) {
 		return;
 	}
-	if (semaphore->waiters > 0) {
+	if ((*semaphore)->waiters > 0) {
 		dev_err(rdev->dev, "semaphore %p has more waiters than signalers,"
-			" hardware lockup imminent!\n", semaphore);
+			" hardware lockup imminent!\n", *semaphore);
 	}
-	radeon_sa_bo_free(rdev, &semaphore->sa_bo, fence);
-	kfree(semaphore);
+	radeon_sa_bo_free(rdev, &(*semaphore)->sa_bo, fence);
+	kfree(*semaphore);
+	*semaphore = NULL;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index efff929ea49d..7c16540c10ff 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -106,13 +106,7 @@ void radeon_test_moves(struct radeon_device *rdev)
 
 		radeon_bo_kunmap(gtt_obj[i]);
 
-		r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
-		if (r) {
-			DRM_ERROR("Failed to create GTT->VRAM fence %d\n", i);
-			goto out_cleanup;
-		}
-
-		r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, fence);
+		r = radeon_copy(rdev, gtt_addr, vram_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
 		if (r) {
 			DRM_ERROR("Failed GTT->VRAM copy %d\n", i);
 			goto out_cleanup;
@@ -155,13 +149,7 @@ void radeon_test_moves(struct radeon_device *rdev)
 
 		radeon_bo_kunmap(vram_obj);
 
-		r = radeon_fence_create(rdev, &fence, RADEON_RING_TYPE_GFX_INDEX);
-		if (r) {
-			DRM_ERROR("Failed to create VRAM->GTT fence %d\n", i);
-			goto out_cleanup;
-		}
-
-		r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, fence);
+		r = radeon_copy(rdev, vram_addr, gtt_addr, size / RADEON_GPU_PAGE_SIZE, &fence);
 		if (r) {
 			DRM_ERROR("Failed VRAM->GTT copy %d\n", i);
 			goto out_cleanup;
@@ -241,36 +229,33 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
 {
 	struct radeon_fence *fence1 = NULL, *fence2 = NULL;
 	struct radeon_semaphore *semaphore = NULL;
-	int ridxA = radeon_ring_index(rdev, ringA);
-	int ridxB = radeon_ring_index(rdev, ringB);
 	int r;
 
-	r = radeon_fence_create(rdev, &fence1, ridxA);
+	r = radeon_semaphore_create(rdev, &semaphore);
 	if (r) {
-		DRM_ERROR("Failed to create sync fence 1\n");
+		DRM_ERROR("Failed to create semaphore\n");
 		goto out_cleanup;
 	}
-	r = radeon_fence_create(rdev, &fence2, ridxA);
+
+	r = radeon_ring_lock(rdev, ringA, 64);
 	if (r) {
-		DRM_ERROR("Failed to create sync fence 2\n");
+		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
 		goto out_cleanup;
 	}
-
-	r = radeon_semaphore_create(rdev, &semaphore);
+	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
+	r = radeon_fence_emit(rdev, &fence1, ringA->idx);
 	if (r) {
-		DRM_ERROR("Failed to create semaphore\n");
+		DRM_ERROR("Failed to emit fence 1\n");
+		radeon_ring_unlock_undo(rdev, ringA);
 		goto out_cleanup;
 	}
-
-	r = radeon_ring_lock(rdev, ringA, 64);
+	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
+	r = radeon_fence_emit(rdev, &fence2, ringA->idx);
 	if (r) {
-		DRM_ERROR("Failed to lock ring A %d\n", ridxA);
+		DRM_ERROR("Failed to emit fence 2\n");
+		radeon_ring_unlock_undo(rdev, ringA);
 		goto out_cleanup;
 	}
-	radeon_semaphore_emit_wait(rdev, ridxA, semaphore);
-	radeon_fence_emit(rdev, fence1);
-	radeon_semaphore_emit_wait(rdev, ridxA, semaphore);
-	radeon_fence_emit(rdev, fence2);
 	radeon_ring_unlock_commit(rdev, ringA);
 
 	mdelay(1000);
@@ -285,7 +270,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
 		DRM_ERROR("Failed to lock ring B %p\n", ringB);
 		goto out_cleanup;
 	}
-	radeon_semaphore_emit_signal(rdev, ridxB, semaphore);
+	radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore);
 	radeon_ring_unlock_commit(rdev, ringB);
 
 	r = radeon_fence_wait(fence1, false);
@@ -306,7 +291,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
 		DRM_ERROR("Failed to lock ring B %p\n", ringB);
 		goto out_cleanup;
 	}
-	radeon_semaphore_emit_signal(rdev, ridxB, semaphore);
+	radeon_semaphore_emit_signal(rdev, ringB->idx, semaphore);
 	radeon_ring_unlock_commit(rdev, ringB);
 
 	r = radeon_fence_wait(fence2, false);
@@ -316,8 +301,7 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
 	}
 
 out_cleanup:
-	if (semaphore)
-		radeon_semaphore_free(rdev, semaphore, NULL);
+	radeon_semaphore_free(rdev, &semaphore, NULL);
 
 	if (fence1)
 		radeon_fence_unref(&fence1);
@@ -336,23 +320,9 @@ void radeon_test_ring_sync2(struct radeon_device *rdev,
 {
 	struct radeon_fence *fenceA = NULL, *fenceB = NULL;
 	struct radeon_semaphore *semaphore = NULL;
-	int ridxA = radeon_ring_index(rdev, ringA);
-	int ridxB = radeon_ring_index(rdev, ringB);
-	int ridxC = radeon_ring_index(rdev, ringC);
 	bool sigA, sigB;
 	int i, r;
 
-	r = radeon_fence_create(rdev, &fenceA, ridxA);
-	if (r) {
-		DRM_ERROR("Failed to create sync fence 1\n");
-		goto out_cleanup;
-	}
-	r = radeon_fence_create(rdev, &fenceB, ridxB);
-	if (r) {
-		DRM_ERROR("Failed to create sync fence 2\n");
-		goto out_cleanup;
-	}
-
 	r = radeon_semaphore_create(rdev, &semaphore);
 	if (r) {
 		DRM_ERROR("Failed to create semaphore\n");
@@ -361,20 +331,30 @@ void radeon_test_ring_sync2(struct radeon_device *rdev,
 
 	r = radeon_ring_lock(rdev, ringA, 64);
 	if (r) {
-		DRM_ERROR("Failed to lock ring A %d\n", ridxA);
+		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
+		goto out_cleanup;
+	}
+	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
+	r = radeon_fence_emit(rdev, &fenceA, ringA->idx);
+	if (r) {
+		DRM_ERROR("Failed to emit sync fence 1\n");
+		radeon_ring_unlock_undo(rdev, ringA);
 		goto out_cleanup;
 	}
-	radeon_semaphore_emit_wait(rdev, ridxA, semaphore);
-	radeon_fence_emit(rdev, fenceA);
 	radeon_ring_unlock_commit(rdev, ringA);
 
 	r = radeon_ring_lock(rdev, ringB, 64);
 	if (r) {
-		DRM_ERROR("Failed to lock ring B %d\n", ridxB);
+		DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
+		goto out_cleanup;
+	}
+	radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore);
+	r = radeon_fence_emit(rdev, &fenceB, ringB->idx);
+	if (r) {
+		DRM_ERROR("Failed to create sync fence 2\n");
+		radeon_ring_unlock_undo(rdev, ringB);
 		goto out_cleanup;
 	}
-	radeon_semaphore_emit_wait(rdev, ridxB, semaphore);
-	radeon_fence_emit(rdev, fenceB);
 	radeon_ring_unlock_commit(rdev, ringB);
 
 	mdelay(1000);
@@ -393,7 +373,7 @@ void radeon_test_ring_sync2(struct radeon_device *rdev,
 		DRM_ERROR("Failed to lock ring B %p\n", ringC);
 		goto out_cleanup;
 	}
-	radeon_semaphore_emit_signal(rdev, ridxC, semaphore);
+	radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore);
 	radeon_ring_unlock_commit(rdev, ringC);
 
 	for (i = 0; i < 30; ++i) {
@@ -419,7 +399,7 @@ void radeon_test_ring_sync2(struct radeon_device *rdev,
 		DRM_ERROR("Failed to lock ring B %p\n", ringC);
 		goto out_cleanup;
 	}
-	radeon_semaphore_emit_signal(rdev, ridxC, semaphore);
+	radeon_semaphore_emit_signal(rdev, ringC->idx, semaphore);
 	radeon_ring_unlock_commit(rdev, ringC);
 
 	mdelay(1000);
@@ -436,8 +416,7 @@ void radeon_test_ring_sync2(struct radeon_device *rdev,
 	}
 
 out_cleanup:
-	if (semaphore)
-		radeon_semaphore_free(rdev, semaphore, NULL);
+	radeon_semaphore_free(rdev, &semaphore, NULL);
 
 	if (fenceA)
 		radeon_fence_unref(&fenceA);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index c94a2257761f..5b71c716d83f 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -222,15 +222,11 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 {
 	struct radeon_device *rdev;
 	uint64_t old_start, new_start;
-	struct radeon_fence *fence, *old_fence;
-	struct radeon_semaphore *sem = NULL;
-	int r;
+	struct radeon_fence *fence;
+	int r, ridx;
 
 	rdev = radeon_get_rdev(bo->bdev);
-	r = radeon_fence_create(rdev, &fence, radeon_copy_ring_index(rdev));
-	if (unlikely(r)) {
-		return r;
-	}
+	ridx = radeon_copy_ring_index(rdev);
 	old_start = old_mem->start << PAGE_SHIFT;
 	new_start = new_mem->start << PAGE_SHIFT;
 
@@ -243,7 +239,6 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 		break;
 	default:
 		DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
-		radeon_fence_unref(&fence);
 		return -EINVAL;
 	}
 	switch (new_mem->mem_type) {
@@ -255,46 +250,23 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 		break;
 	default:
 		DRM_ERROR("Unknown placement %d\n", old_mem->mem_type);
-		radeon_fence_unref(&fence);
 		return -EINVAL;
 	}
-	if (!rdev->ring[radeon_copy_ring_index(rdev)].ready) {
+	if (!rdev->ring[ridx].ready) {
 		DRM_ERROR("Trying to move memory with ring turned off.\n");
-		radeon_fence_unref(&fence);
 		return -EINVAL;
 	}
 
 	BUILD_BUG_ON((PAGE_SIZE % RADEON_GPU_PAGE_SIZE) != 0);
 
 	/* sync other rings */
-	old_fence = bo->sync_obj;
-	if (old_fence && old_fence->ring != fence->ring
-	    && !radeon_fence_signaled(old_fence)) {
-		bool sync_to_ring[RADEON_NUM_RINGS] = { };
-		sync_to_ring[old_fence->ring] = true;
-
-		r = radeon_semaphore_create(rdev, &sem);
-		if (r) {
-			radeon_fence_unref(&fence);
-			return r;
-		}
-
-		r = radeon_semaphore_sync_rings(rdev, sem,
-						sync_to_ring, fence->ring);
-		if (r) {
-			radeon_semaphore_free(rdev, sem, NULL);
-			radeon_fence_unref(&fence);
-			return r;
-		}
-	}
-
+	fence = bo->sync_obj;
 	r = radeon_copy(rdev, old_start, new_start,
 			new_mem->num_pages * (PAGE_SIZE / RADEON_GPU_PAGE_SIZE), /* GPU pages */
-			fence);
+			&fence);
 	/* FIXME: handle copy error */
 	r = ttm_bo_move_accel_cleanup(bo, (void *)fence, NULL,
 				      evict, no_wait_reserve, no_wait_gpu, new_mem);
-	radeon_semaphore_free(rdev, sem, fence);
 	radeon_fence_unref(&fence);
 	return r;
 }
@@ -762,9 +734,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	}
 	DRM_INFO("radeon: %uM of GTT memory ready.\n",
 		 (unsigned)(rdev->mc.gtt_size / (1024 * 1024)));
-	if (unlikely(rdev->mman.bdev.dev_mapping == NULL)) {
-		rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping;
-	}
+	rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping;
 
 	r = radeon_ttm_debugfs_init(rdev);
 	if (r) {
@@ -825,9 +795,9 @@ static int radeon_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 		return VM_FAULT_NOPAGE;
 	}
 	rdev = radeon_get_rdev(bo->bdev);
-	mutex_lock(&rdev->vram_mutex);
+	down_read(&rdev->pm.mclk_lock);
 	r = ttm_vm_ops->fault(vma, vmf);
-	mutex_unlock(&rdev->vram_mutex);
+	up_read(&rdev->pm.mclk_lock);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/radeon/reg_srcs/r600 b/drivers/gpu/drm/radeon/reg_srcs/r600
index 5e659b034d9a..f93e45d869f4 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/r600
+++ b/drivers/gpu/drm/radeon/reg_srcs/r600
@@ -744,14 +744,6 @@ r600 0x9400
 0x00028C38 CB_CLRCMP_DST
 0x00028C3C CB_CLRCMP_MSK
 0x00028C34 CB_CLRCMP_SRC
-0x00028100 CB_COLOR0_MASK
-0x00028104 CB_COLOR1_MASK
-0x00028108 CB_COLOR2_MASK
-0x0002810C CB_COLOR3_MASK
-0x00028110 CB_COLOR4_MASK
-0x00028114 CB_COLOR5_MASK
-0x00028118 CB_COLOR6_MASK
-0x0002811C CB_COLOR7_MASK
 0x00028808 CB_COLOR_CONTROL
 0x0002842C CB_FOG_BLUE
 0x00028428 CB_FOG_GREEN
diff --git a/drivers/gpu/drm/radeon/rs400.c b/drivers/gpu/drm/radeon/rs400.c
index a464eb5e2df2..2752f7f78237 100644
--- a/drivers/gpu/drm/radeon/rs400.c
+++ b/drivers/gpu/drm/radeon/rs400.c
@@ -426,13 +426,11 @@ static int rs400_startup(struct radeon_device *rdev)
 		return r;
 	}
 
-	r = radeon_ib_pool_start(rdev);
-	if (r)
-		return r;
-
-	r = radeon_ib_ring_tests(rdev);
-	if (r)
+	r = radeon_ib_pool_init(rdev);
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
 		return r;
+	}
 
 	return 0;
 }
@@ -470,7 +468,6 @@ int rs400_resume(struct radeon_device *rdev)
 
 int rs400_suspend(struct radeon_device *rdev)
 {
-	radeon_ib_pool_suspend(rdev);
 	r100_cp_disable(rdev);
 	radeon_wb_disable(rdev);
 	r100_irq_disable(rdev);
@@ -482,7 +479,7 @@ void rs400_fini(struct radeon_device *rdev)
 {
 	r100_cp_fini(rdev);
 	radeon_wb_fini(rdev);
-	r100_ib_fini(rdev);
+	radeon_ib_pool_fini(rdev);
 	radeon_gem_fini(rdev);
 	rs400_gart_fini(rdev);
 	radeon_irq_kms_fini(rdev);
@@ -550,20 +547,14 @@ int rs400_init(struct radeon_device *rdev)
 		return r;
 	r300_set_reg_safe(rdev);
 
-	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
-	if (r) {
-		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
-		rdev->accel_working = false;
-	}
-
 	r = rs400_startup(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
 		r100_cp_fini(rdev);
 		radeon_wb_fini(rdev);
-		r100_ib_fini(rdev);
+		radeon_ib_pool_fini(rdev);
 		rs400_gart_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		rdev->accel_working = false;
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index e95c5e61d4e2..5301b3df8466 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -294,6 +294,7 @@ void rs600_hpd_init(struct radeon_device *rdev)
 {
 	struct drm_device *dev = rdev->ddev;
 	struct drm_connector *connector;
+	unsigned enable = 0;
 
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
@@ -301,26 +302,25 @@ void rs600_hpd_init(struct radeon_device *rdev)
 		case RADEON_HPD_1:
 			WREG32(R_007D00_DC_HOT_PLUG_DETECT1_CONTROL,
 			       S_007D00_DC_HOT_PLUG_DETECT1_EN(1));
-			rdev->irq.hpd[0] = true;
 			break;
 		case RADEON_HPD_2:
 			WREG32(R_007D10_DC_HOT_PLUG_DETECT2_CONTROL,
 			       S_007D10_DC_HOT_PLUG_DETECT2_EN(1));
-			rdev->irq.hpd[1] = true;
 			break;
 		default:
 			break;
 		}
+		enable |= 1 << radeon_connector->hpd.hpd;
 		radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
 	}
-	if (rdev->irq.installed)
-		rs600_irq_set(rdev);
+	radeon_irq_kms_enable_hpd(rdev, enable);
 }
 
 void rs600_hpd_fini(struct radeon_device *rdev)
 {
 	struct drm_device *dev = rdev->ddev;
 	struct drm_connector *connector;
+	unsigned disable = 0;
 
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
@@ -328,17 +328,17 @@ void rs600_hpd_fini(struct radeon_device *rdev)
 		case RADEON_HPD_1:
 			WREG32(R_007D00_DC_HOT_PLUG_DETECT1_CONTROL,
 			       S_007D00_DC_HOT_PLUG_DETECT1_EN(0));
-			rdev->irq.hpd[0] = false;
 			break;
 		case RADEON_HPD_2:
 			WREG32(R_007D10_DC_HOT_PLUG_DETECT2_CONTROL,
 			       S_007D10_DC_HOT_PLUG_DETECT2_EN(0));
-			rdev->irq.hpd[1] = false;
 			break;
 		default:
 			break;
 		}
+		disable |= 1 << radeon_connector->hpd.hpd;
 	}
+	radeon_irq_kms_disable_hpd(rdev, disable);
 }
 
 int rs600_asic_reset(struct radeon_device *rdev)
@@ -564,18 +564,18 @@ int rs600_irq_set(struct radeon_device *rdev)
 		WREG32(R_000040_GEN_INT_CNTL, 0);
 		return -EINVAL;
 	}
-	if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) {
+	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
 		tmp |= S_000040_SW_INT_EN(1);
 	}
 	if (rdev->irq.gui_idle) {
 		tmp |= S_000040_GUI_IDLE(1);
 	}
 	if (rdev->irq.crtc_vblank_int[0] ||
-	    rdev->irq.pflip[0]) {
+	    atomic_read(&rdev->irq.pflip[0])) {
 		mode_int |= S_006540_D1MODE_VBLANK_INT_MASK(1);
 	}
 	if (rdev->irq.crtc_vblank_int[1] ||
-	    rdev->irq.pflip[1]) {
+	    atomic_read(&rdev->irq.pflip[1])) {
 		mode_int |= S_006540_D2MODE_VBLANK_INT_MASK(1);
 	}
 	if (rdev->irq.hpd[0]) {
@@ -686,7 +686,6 @@ int rs600_irq_process(struct radeon_device *rdev)
 		/* GUI idle */
 		if (G_000040_GUI_IDLE(status)) {
 			rdev->irq.gui_idle_acked = true;
-			rdev->pm.gui_idle = true;
 			wake_up(&rdev->irq.idle_queue);
 		}
 		/* Vertical blank interrupts */
@@ -696,7 +695,7 @@ int rs600_irq_process(struct radeon_device *rdev)
 				rdev->pm.vblank_sync = true;
 				wake_up(&rdev->irq.vblank_queue);
 			}
-			if (rdev->irq.pflip[0])
+			if (atomic_read(&rdev->irq.pflip[0]))
 				radeon_crtc_handle_flip(rdev, 0);
 		}
 		if (G_007EDC_LB_D2_VBLANK_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) {
@@ -705,7 +704,7 @@ int rs600_irq_process(struct radeon_device *rdev)
 				rdev->pm.vblank_sync = true;
 				wake_up(&rdev->irq.vblank_queue);
 			}
-			if (rdev->irq.pflip[1])
+			if (atomic_read(&rdev->irq.pflip[1]))
 				radeon_crtc_handle_flip(rdev, 1);
 		}
 		if (G_007EDC_DC_HOT_PLUG_DETECT1_INTERRUPT(rdev->irq.stat_regs.r500.disp_int)) {
@@ -908,13 +907,11 @@ static int rs600_startup(struct radeon_device *rdev)
 		return r;
 	}
 
-	r = radeon_ib_pool_start(rdev);
-	if (r)
-		return r;
-
-	r = radeon_ib_ring_tests(rdev);
-	if (r)
+	r = radeon_ib_pool_init(rdev);
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
 		return r;
+	}
 
 	r = r600_audio_init(rdev);
 	if (r) {
@@ -956,7 +953,6 @@ int rs600_resume(struct radeon_device *rdev)
 
 int rs600_suspend(struct radeon_device *rdev)
 {
-	radeon_ib_pool_suspend(rdev);
 	r600_audio_fini(rdev);
 	r100_cp_disable(rdev);
 	radeon_wb_disable(rdev);
@@ -970,7 +966,7 @@ void rs600_fini(struct radeon_device *rdev)
 	r600_audio_fini(rdev);
 	r100_cp_fini(rdev);
 	radeon_wb_fini(rdev);
-	r100_ib_fini(rdev);
+	radeon_ib_pool_fini(rdev);
 	radeon_gem_fini(rdev);
 	rs600_gart_fini(rdev);
 	radeon_irq_kms_fini(rdev);
@@ -1038,20 +1034,14 @@ int rs600_init(struct radeon_device *rdev)
 		return r;
 	rs600_set_safe_registers(rdev);
 
-	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
-	if (r) {
-		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
-		rdev->accel_working = false;
-	}
-
 	r = rs600_startup(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
 		r100_cp_fini(rdev);
 		radeon_wb_fini(rdev);
-		r100_ib_fini(rdev);
+		radeon_ib_pool_fini(rdev);
 		rs600_gart_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		rdev->accel_working = false;
diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c
index 159b6a43fda0..3b663fcfe061 100644
--- a/drivers/gpu/drm/radeon/rs690.c
+++ b/drivers/gpu/drm/radeon/rs690.c
@@ -637,13 +637,11 @@ static int rs690_startup(struct radeon_device *rdev)
 		return r;
 	}
 
-	r = radeon_ib_pool_start(rdev);
-	if (r)
-		return r;
-
-	r = radeon_ib_ring_tests(rdev);
-	if (r)
+	r = radeon_ib_pool_init(rdev);
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
 		return r;
+	}
 
 	r = r600_audio_init(rdev);
 	if (r) {
@@ -685,7 +683,6 @@ int rs690_resume(struct radeon_device *rdev)
 
 int rs690_suspend(struct radeon_device *rdev)
 {
-	radeon_ib_pool_suspend(rdev);
 	r600_audio_fini(rdev);
 	r100_cp_disable(rdev);
 	radeon_wb_disable(rdev);
@@ -699,7 +696,7 @@ void rs690_fini(struct radeon_device *rdev)
 	r600_audio_fini(rdev);
 	r100_cp_fini(rdev);
 	radeon_wb_fini(rdev);
-	r100_ib_fini(rdev);
+	radeon_ib_pool_fini(rdev);
 	radeon_gem_fini(rdev);
 	rs400_gart_fini(rdev);
 	radeon_irq_kms_fini(rdev);
@@ -768,20 +765,14 @@ int rs690_init(struct radeon_device *rdev)
 		return r;
 	rs600_set_safe_registers(rdev);
 
-	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
-	if (r) {
-		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
-		rdev->accel_working = false;
-	}
-
 	r = rs690_startup(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
 		r100_cp_fini(rdev);
 		radeon_wb_fini(rdev);
-		r100_ib_fini(rdev);
+		radeon_ib_pool_fini(rdev);
 		rs400_gart_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		rdev->accel_working = false;
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 7f08cedb5333..aa8ef491ef3c 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -281,12 +281,8 @@ int rv515_debugfs_ga_info_init(struct radeon_device *rdev)
 
 void rv515_mc_stop(struct radeon_device *rdev, struct rv515_mc_save *save)
 {
-	save->d1vga_control = RREG32(R_000330_D1VGA_CONTROL);
-	save->d2vga_control = RREG32(R_000338_D2VGA_CONTROL);
 	save->vga_render_control = RREG32(R_000300_VGA_RENDER_CONTROL);
 	save->vga_hdp_control = RREG32(R_000328_VGA_HDP_CONTROL);
-	save->d1crtc_control = RREG32(R_006080_D1CRTC_CONTROL);
-	save->d2crtc_control = RREG32(R_006880_D2CRTC_CONTROL);
 
 	/* Stop all video */
 	WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0);
@@ -311,15 +307,6 @@ void rv515_mc_resume(struct radeon_device *rdev, struct rv515_mc_save *save)
 	/* Unlock host access */
 	WREG32(R_000328_VGA_HDP_CONTROL, save->vga_hdp_control);
 	mdelay(1);
-	/* Restore video state */
-	WREG32(R_000330_D1VGA_CONTROL, save->d1vga_control);
-	WREG32(R_000338_D2VGA_CONTROL, save->d2vga_control);
-	WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 1);
-	WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 1);
-	WREG32(R_006080_D1CRTC_CONTROL, save->d1crtc_control);
-	WREG32(R_006880_D2CRTC_CONTROL, save->d2crtc_control);
-	WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 0);
-	WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0);
 	WREG32(R_000300_VGA_RENDER_CONTROL, save->vga_render_control);
 }
 
@@ -408,13 +395,11 @@ static int rv515_startup(struct radeon_device *rdev)
 		return r;
 	}
 
-	r = radeon_ib_pool_start(rdev);
-	if (r)
-		return r;
-
-	r = radeon_ib_ring_tests(rdev);
-	if (r)
+	r = radeon_ib_pool_init(rdev);
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
 		return r;
+	}
 
 	return 0;
 }
@@ -469,7 +454,7 @@ void rv515_fini(struct radeon_device *rdev)
 {
 	r100_cp_fini(rdev);
 	radeon_wb_fini(rdev);
-	r100_ib_fini(rdev);
+	radeon_ib_pool_fini(rdev);
 	radeon_gem_fini(rdev);
 	rv370_pcie_gart_fini(rdev);
 	radeon_agp_fini(rdev);
@@ -543,20 +528,14 @@ int rv515_init(struct radeon_device *rdev)
 		return r;
 	rv515_set_safe_registers(rdev);
 
-	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
-	if (r) {
-		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
-		rdev->accel_working = false;
-	}
-
 	r = rv515_startup(rdev);
 	if (r) {
 		/* Somethings want wront with the accel init stop accel */
 		dev_err(rdev->dev, "Disabling GPU acceleration\n");
 		r100_cp_fini(rdev);
 		radeon_wb_fini(rdev);
-		r100_ib_fini(rdev);
+		radeon_ib_pool_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		rv370_pcie_gart_fini(rdev);
 		radeon_agp_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index b4f51c569c36..ca8ffec10ff6 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -358,8 +358,10 @@ static int rv770_cp_load_microcode(struct radeon_device *rdev)
 
 void r700_cp_fini(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	r700_cp_stop(rdev);
-	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
+	radeon_ring_fini(rdev, ring);
+	radeon_scratch_free(rdev, ring->rptr_save_reg);
 }
 
 /*
@@ -951,13 +953,11 @@ static int rv770_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_pool_start(rdev);
-	if (r)
-		return r;
-
-	r = radeon_ib_ring_tests(rdev);
-	if (r)
+	r = radeon_ib_pool_init(rdev);
+	if (r) {
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
 		return r;
+	}
 
 	r = r600_audio_init(rdev);
 	if (r) {
@@ -994,9 +994,6 @@ int rv770_resume(struct radeon_device *rdev)
 int rv770_suspend(struct radeon_device *rdev)
 {
 	r600_audio_fini(rdev);
-	radeon_ib_pool_suspend(rdev);
-	r600_blit_suspend(rdev);
-	/* FIXME: we should wait for ring to be empty */
 	r700_cp_stop(rdev);
 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
 	r600_irq_suspend(rdev);
@@ -1076,20 +1073,14 @@ int rv770_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
-	if (r) {
-		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
-		rdev->accel_working = false;
-	}
-
 	r = rv770_startup(rdev);
 	if (r) {
 		dev_err(rdev->dev, "disabling GPU acceleration\n");
 		r700_cp_fini(rdev);
 		r600_irq_fini(rdev);
 		radeon_wb_fini(rdev);
-		r100_ib_fini(rdev);
+		radeon_ib_pool_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		rv770_pcie_gart_fini(rdev);
 		rdev->accel_working = false;
@@ -1104,7 +1095,7 @@ void rv770_fini(struct radeon_device *rdev)
 	r700_cp_fini(rdev);
 	r600_irq_fini(rdev);
 	radeon_wb_fini(rdev);
-	r100_ib_fini(rdev);
+	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
 	rv770_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
@@ -1121,6 +1112,8 @@ static void rv770_pcie_gen2_enable(struct radeon_device *rdev)
 {
 	u32 link_width_cntl, lanes, speed_cntl, tmp;
 	u16 link_cntl2;
+	u32 mask;
+	int ret;
 
 	if (radeon_pcie_gen2 == 0)
 		return;
@@ -1135,6 +1128,15 @@ static void rv770_pcie_gen2_enable(struct radeon_device *rdev)
 	if (ASIC_IS_X2(rdev))
 		return;
 
+	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
+	if (ret != 0)
+		return;
+
+	if (!(mask & DRM_PCIE_SPEED_50))
+		return;
+
+	DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
+
 	/* advertise upconfig capability */
 	link_width_cntl = RREG32_PCIE_P(PCIE_LC_LINK_WIDTH_CNTL);
 	link_width_cntl &= ~LC_UPCONFIGURE_DIS;
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 0b0279291a73..0139e227e3c7 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -1639,11 +1639,19 @@ static void si_gpu_init(struct radeon_device *rdev)
 		/* XXX what about 12? */
 		rdev->config.si.tile_config |= (3 << 0);
 		break;
-	}
-	if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
-		rdev->config.si.tile_config |= 1 << 4;
-	else
+	}	
+	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
+	case 0: /* four banks */
 		rdev->config.si.tile_config |= 0 << 4;
+		break;
+	case 1: /* eight banks */
+		rdev->config.si.tile_config |= 1 << 4;
+		break;
+	case 2: /* sixteen banks */
+	default:
+		rdev->config.si.tile_config |= 2 << 4;
+		break;
+	}
 	rdev->config.si.tile_config |=
 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
 	rdev->config.si.tile_config |=
@@ -1762,13 +1770,34 @@ void si_fence_ring_emit(struct radeon_device *rdev,
  */
 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 {
-	struct radeon_ring *ring = &rdev->ring[ib->fence->ring];
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
 	u32 header;
 
-	if (ib->is_const_ib)
+	if (ib->is_const_ib) {
+		/* set switch buffer packet before const IB */
+		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+		radeon_ring_write(ring, 0);
+
 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
-	else
+	} else {
+		u32 next_rptr;
+		if (ring->rptr_save_reg) {
+			next_rptr = ring->wptr + 3 + 4 + 8;
+			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+			radeon_ring_write(ring, ((ring->rptr_save_reg -
+						  PACKET3_SET_CONFIG_REG_START) >> 2));
+			radeon_ring_write(ring, next_rptr);
+		} else if (rdev->wb.enabled) {
+			next_rptr = ring->wptr + 5 + 4 + 8;
+			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+			radeon_ring_write(ring, (1 << 8));
+			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
+			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
+			radeon_ring_write(ring, next_rptr);
+		}
+
 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+	}
 
 	radeon_ring_write(ring, header);
 	radeon_ring_write(ring,
@@ -1779,18 +1808,20 @@ void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
 	radeon_ring_write(ring, ib->length_dw | (ib->vm_id << 24));
 
-	/* flush read cache over gart for this vmid */
-	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
-	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
-	radeon_ring_write(ring, ib->vm_id);
-	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
-	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
-			  PACKET3_TC_ACTION_ENA |
-			  PACKET3_SH_KCACHE_ACTION_ENA |
-			  PACKET3_SH_ICACHE_ACTION_ENA);
-	radeon_ring_write(ring, 0xFFFFFFFF);
-	radeon_ring_write(ring, 0);
-	radeon_ring_write(ring, 10); /* poll interval */
+	if (!ib->is_const_ib) {
+		/* flush read cache over gart for this vmid */
+		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
+		radeon_ring_write(ring, ib->vm_id);
+		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
+		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
+				  PACKET3_TC_ACTION_ENA |
+				  PACKET3_SH_KCACHE_ACTION_ENA |
+				  PACKET3_SH_ICACHE_ACTION_ENA);
+		radeon_ring_write(ring, 0xFFFFFFFF);
+		radeon_ring_write(ring, 0);
+		radeon_ring_write(ring, 10); /* poll interval */
+	}
 }
 
 /*
@@ -1917,10 +1948,20 @@ static int si_cp_start(struct radeon_device *rdev)
 
 static void si_cp_fini(struct radeon_device *rdev)
 {
+	struct radeon_ring *ring;
 	si_cp_enable(rdev, false);
-	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
-	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
+
+	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	radeon_ring_fini(rdev, ring);
+	radeon_scratch_free(rdev, ring->rptr_save_reg);
+
+	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
+	radeon_ring_fini(rdev, ring);
+	radeon_scratch_free(rdev, ring->rptr_save_reg);
+
+	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
+	radeon_ring_fini(rdev, ring);
+	radeon_scratch_free(rdev, ring->rptr_save_reg);
 }
 
 static int si_cp_resume(struct radeon_device *rdev)
@@ -2702,7 +2743,7 @@ int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
 			if (ib->is_const_ib)
 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
 			else {
-				switch (ib->fence->ring) {
+				switch (ib->ring) {
 				case RADEON_RING_TYPE_GFX_INDEX:
 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
 					break;
@@ -2711,7 +2752,7 @@ int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
 					break;
 				default:
-					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->fence->ring);
+					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
 					ret = -EINVAL;
 					break;
 				}
@@ -2942,7 +2983,6 @@ static void si_disable_interrupts(struct radeon_device *rdev)
 	WREG32(IH_RB_RPTR, 0);
 	WREG32(IH_RB_WPTR, 0);
 	rdev->ih.enabled = false;
-	rdev->ih.wptr = 0;
 	rdev->ih.rptr = 0;
 }
 
@@ -3093,45 +3133,45 @@ int si_irq_set(struct radeon_device *rdev)
 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
 
 	/* enable CP interrupts on all rings */
-	if (rdev->irq.sw_int[RADEON_RING_TYPE_GFX_INDEX]) {
+	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
 		DRM_DEBUG("si_irq_set: sw int gfx\n");
 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
 	}
-	if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP1_INDEX]) {
+	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
 		DRM_DEBUG("si_irq_set: sw int cp1\n");
 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
 	}
-	if (rdev->irq.sw_int[CAYMAN_RING_TYPE_CP2_INDEX]) {
+	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
 		DRM_DEBUG("si_irq_set: sw int cp2\n");
 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
 	}
 	if (rdev->irq.crtc_vblank_int[0] ||
-	    rdev->irq.pflip[0]) {
+	    atomic_read(&rdev->irq.pflip[0])) {
 		DRM_DEBUG("si_irq_set: vblank 0\n");
 		crtc1 |= VBLANK_INT_MASK;
 	}
 	if (rdev->irq.crtc_vblank_int[1] ||
-	    rdev->irq.pflip[1]) {
+	    atomic_read(&rdev->irq.pflip[1])) {
 		DRM_DEBUG("si_irq_set: vblank 1\n");
 		crtc2 |= VBLANK_INT_MASK;
 	}
 	if (rdev->irq.crtc_vblank_int[2] ||
-	    rdev->irq.pflip[2]) {
+	    atomic_read(&rdev->irq.pflip[2])) {
 		DRM_DEBUG("si_irq_set: vblank 2\n");
 		crtc3 |= VBLANK_INT_MASK;
 	}
 	if (rdev->irq.crtc_vblank_int[3] ||
-	    rdev->irq.pflip[3]) {
+	    atomic_read(&rdev->irq.pflip[3])) {
 		DRM_DEBUG("si_irq_set: vblank 3\n");
 		crtc4 |= VBLANK_INT_MASK;
 	}
 	if (rdev->irq.crtc_vblank_int[4] ||
-	    rdev->irq.pflip[4]) {
+	    atomic_read(&rdev->irq.pflip[4])) {
 		DRM_DEBUG("si_irq_set: vblank 4\n");
 		crtc5 |= VBLANK_INT_MASK;
 	}
 	if (rdev->irq.crtc_vblank_int[5] ||
-	    rdev->irq.pflip[5]) {
+	    atomic_read(&rdev->irq.pflip[5])) {
 		DRM_DEBUG("si_irq_set: vblank 5\n");
 		crtc6 |= VBLANK_INT_MASK;
 	}
@@ -3359,29 +3399,27 @@ int si_irq_process(struct radeon_device *rdev)
 	u32 rptr;
 	u32 src_id, src_data, ring_id;
 	u32 ring_index;
-	unsigned long flags;
 	bool queue_hotplug = false;
 
 	if (!rdev->ih.enabled || rdev->shutdown)
 		return IRQ_NONE;
 
 	wptr = si_get_ih_wptr(rdev);
+
+restart_ih:
+	/* is somebody else already processing irqs? */
+	if (atomic_xchg(&rdev->ih.lock, 1))
+		return IRQ_NONE;
+
 	rptr = rdev->ih.rptr;
 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
 
-	spin_lock_irqsave(&rdev->ih.lock, flags);
-	if (rptr == wptr) {
-		spin_unlock_irqrestore(&rdev->ih.lock, flags);
-		return IRQ_NONE;
-	}
-restart_ih:
 	/* Order reading of wptr vs. reading of IH ring data */
 	rmb();
 
 	/* display interrupts */
 	si_irq_ack(rdev);
 
-	rdev->ih.wptr = wptr;
 	while (rptr != wptr) {
 		/* wptr/rptr are in bytes! */
 		ring_index = rptr / 4;
@@ -3399,7 +3437,7 @@ restart_ih:
 						rdev->pm.vblank_sync = true;
 						wake_up(&rdev->irq.vblank_queue);
 					}
-					if (rdev->irq.pflip[0])
+					if (atomic_read(&rdev->irq.pflip[0]))
 						radeon_crtc_handle_flip(rdev, 0);
 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D1 vblank\n");
@@ -3425,7 +3463,7 @@ restart_ih:
 						rdev->pm.vblank_sync = true;
 						wake_up(&rdev->irq.vblank_queue);
 					}
-					if (rdev->irq.pflip[1])
+					if (atomic_read(&rdev->irq.pflip[1]))
 						radeon_crtc_handle_flip(rdev, 1);
 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D2 vblank\n");
@@ -3451,7 +3489,7 @@ restart_ih:
 						rdev->pm.vblank_sync = true;
 						wake_up(&rdev->irq.vblank_queue);
 					}
-					if (rdev->irq.pflip[2])
+					if (atomic_read(&rdev->irq.pflip[2]))
 						radeon_crtc_handle_flip(rdev, 2);
 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D3 vblank\n");
@@ -3477,7 +3515,7 @@ restart_ih:
 						rdev->pm.vblank_sync = true;
 						wake_up(&rdev->irq.vblank_queue);
 					}
-					if (rdev->irq.pflip[3])
+					if (atomic_read(&rdev->irq.pflip[3]))
 						radeon_crtc_handle_flip(rdev, 3);
 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D4 vblank\n");
@@ -3503,7 +3541,7 @@ restart_ih:
 						rdev->pm.vblank_sync = true;
 						wake_up(&rdev->irq.vblank_queue);
 					}
-					if (rdev->irq.pflip[4])
+					if (atomic_read(&rdev->irq.pflip[4]))
 						radeon_crtc_handle_flip(rdev, 4);
 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D5 vblank\n");
@@ -3529,7 +3567,7 @@ restart_ih:
 						rdev->pm.vblank_sync = true;
 						wake_up(&rdev->irq.vblank_queue);
 					}
-					if (rdev->irq.pflip[5])
+					if (atomic_read(&rdev->irq.pflip[5]))
 						radeon_crtc_handle_flip(rdev, 5);
 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
 					DRM_DEBUG("IH: D6 vblank\n");
@@ -3620,7 +3658,6 @@ restart_ih:
 			break;
 		case 233: /* GUI IDLE */
 			DRM_DEBUG("IH: GUI idle\n");
-			rdev->pm.gui_idle = true;
 			wake_up(&rdev->irq.idle_queue);
 			break;
 		default:
@@ -3632,15 +3669,17 @@ restart_ih:
 		rptr += 16;
 		rptr &= rdev->ih.ptr_mask;
 	}
-	/* make sure wptr hasn't changed while processing */
-	wptr = si_get_ih_wptr(rdev);
-	if (wptr != rdev->ih.wptr)
-		goto restart_ih;
 	if (queue_hotplug)
 		schedule_work(&rdev->hotplug_work);
 	rdev->ih.rptr = rptr;
 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
-	spin_unlock_irqrestore(&rdev->ih.lock, flags);
+	atomic_set(&rdev->ih.lock, 0);
+
+	/* make sure wptr hasn't changed while processing */
+	wptr = si_get_ih_wptr(rdev);
+	if (wptr != rptr)
+		goto restart_ih;
+
 	return IRQ_HANDLED;
 }
 
@@ -3752,35 +3791,18 @@ static int si_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_pool_start(rdev);
-	if (r)
-		return r;
-
-	r = radeon_ib_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
-	if (r) {
-		DRM_ERROR("radeon: failed testing IB (%d) on CP ring 0\n", r);
-		rdev->accel_working = false;
-		return r;
-	}
-
-	r = radeon_ib_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
+	r = radeon_ib_pool_init(rdev);
 	if (r) {
-		DRM_ERROR("radeon: failed testing IB (%d) on CP ring 1\n", r);
-		rdev->accel_working = false;
+		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
 		return r;
 	}
 
-	r = radeon_ib_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
+	r = radeon_vm_manager_init(rdev);
 	if (r) {
-		DRM_ERROR("radeon: failed testing IB (%d) on CP ring 2\n", r);
-		rdev->accel_working = false;
+		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
 		return r;
 	}
 
-	r = radeon_vm_manager_start(rdev);
-	if (r)
-		return r;
-
 	return 0;
 }
 
@@ -3809,12 +3831,6 @@ int si_resume(struct radeon_device *rdev)
 
 int si_suspend(struct radeon_device *rdev)
 {
-	/* FIXME: we should wait for ring to be empty */
-	radeon_ib_pool_suspend(rdev);
-	radeon_vm_manager_suspend(rdev);
-#if 0
-	r600_blit_suspend(rdev);
-#endif
 	si_cp_enable(rdev, false);
 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
@@ -3903,17 +3919,7 @@ int si_init(struct radeon_device *rdev)
 	if (r)
 		return r;
 
-	r = radeon_ib_pool_init(rdev);
 	rdev->accel_working = true;
-	if (r) {
-		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
-		rdev->accel_working = false;
-	}
-	r = radeon_vm_manager_init(rdev);
-	if (r) {
-		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
-	}
-
 	r = si_startup(rdev);
 	if (r) {
 		dev_err(rdev->dev, "disabling GPU acceleration\n");
@@ -3921,7 +3927,7 @@ int si_init(struct radeon_device *rdev)
 		si_irq_fini(rdev);
 		si_rlc_fini(rdev);
 		radeon_wb_fini(rdev);
-		r100_ib_fini(rdev);
+		radeon_ib_pool_fini(rdev);
 		radeon_vm_manager_fini(rdev);
 		radeon_irq_kms_fini(rdev);
 		si_pcie_gart_fini(rdev);
@@ -3950,7 +3956,7 @@ void si_fini(struct radeon_device *rdev)
 	si_rlc_fini(rdev);
 	radeon_wb_fini(rdev);
 	radeon_vm_manager_fini(rdev);
-	r100_ib_fini(rdev);
+	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
 	si_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
@@ -3962,3 +3968,22 @@ void si_fini(struct radeon_device *rdev)
 	rdev->bios = NULL;
 }
 
+/**
+ * si_get_gpu_clock - return GPU clock counter snapshot
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Fetches a GPU clock counter snapshot (SI).
+ * Returns the 64 bit clock counter snapshot.
+ */
+uint64_t si_get_gpu_clock(struct radeon_device *rdev)
+{
+	uint64_t clock;
+
+	mutex_lock(&rdev->gpu_clock_mutex);
+	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
+	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
+	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
+	mutex_unlock(&rdev->gpu_clock_mutex);
+	return clock;
+}
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index db4067962868..ef4815c27b1c 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -698,6 +698,9 @@
 #define RLC_UCODE_ADDR                                    0xC32C
 #define RLC_UCODE_DATA                                    0xC330
 
+#define RLC_GPU_CLOCK_COUNT_LSB                           0xC338
+#define RLC_GPU_CLOCK_COUNT_MSB                           0xC33C
+#define RLC_CAPTURE_GPU_CLOCK_COUNT                       0xC340
 #define RLC_MC_CNTL                                       0xC344
 #define RLC_UCODE_CNTL                                    0xC348
 
@@ -901,5 +904,6 @@
 #define	PACKET3_WAIT_ON_DE_COUNTER_DIFF			0x88
 #define	PACKET3_SET_CE_DE_COUNTERS			0x89
 #define	PACKET3_WAIT_ON_AVAIL_BUFFER			0x8A
+#define	PACKET3_SWITCH_BUFFER				0x8B
 
 #endif