546 files changed, 19020 insertions, 7031 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index da3d02154fa6..8ebc5f1eb4c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -85,8 +85,12 @@ extern int amdgpu_vm_debug;
 extern int amdgpu_sched_jobs;
 extern int amdgpu_sched_hw_submission;
 extern int amdgpu_powerplay;
+extern int amdgpu_powercontainment;
 extern unsigned amdgpu_pcie_gen_cap;
 extern unsigned amdgpu_pcie_lane_cap;
+extern unsigned amdgpu_cg_mask;
+extern unsigned amdgpu_pg_mask;
+extern char *amdgpu_disable_cu;
 
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
 #define AMDGPU_MAX_USEC_TIMEOUT			100000	/* 100 ms */
@@ -183,6 +187,10 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
 int amdgpu_set_powergating_state(struct amdgpu_device *adev,
 				  enum amd_ip_block_type block_type,
 				  enum amd_powergating_state state);
+int amdgpu_wait_for_idle(struct amdgpu_device *adev,
+			 enum amd_ip_block_type block_type);
+bool amdgpu_is_idle(struct amdgpu_device *adev,
+		    enum amd_ip_block_type block_type);
 
 struct amdgpu_ip_block_version {
 	enum amd_ip_block_type type;
@@ -298,13 +306,16 @@ struct amdgpu_ring_funcs {
 				uint32_t oa_base, uint32_t oa_size);
 	/* testing functions */
 	int (*test_ring)(struct amdgpu_ring *ring);
-	int (*test_ib)(struct amdgpu_ring *ring);
+	int (*test_ib)(struct amdgpu_ring *ring, long timeout);
 	/* insert NOP packets */
 	void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
 	/* pad the indirect buffer to the necessary number of dw */
 	void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 	unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
 	void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
+	/* note usage for clock and power gating */
+	void (*begin_use)(struct amdgpu_ring *ring);
+	void (*end_use)(struct amdgpu_ring *ring);
 };
 
 /*
@@ -594,11 +605,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     struct amdgpu_sync *sync,
 		     struct reservation_object *resv,
 		     void *owner);
-bool amdgpu_sync_is_idle(struct amdgpu_sync *sync);
-int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
-			     struct fence *fence);
+struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+				     struct amdgpu_ring *ring);
 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
-int amdgpu_sync_wait(struct amdgpu_sync *sync);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
 int amdgpu_sync_init(void);
 void amdgpu_sync_fini(void);
@@ -754,12 +763,11 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 			     struct amdgpu_job **job);
 
+void amdgpu_job_free_resources(struct amdgpu_job *job);
 void amdgpu_job_free(struct amdgpu_job *job);
-void amdgpu_job_free_func(struct kref *refcount);
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
 		      struct fence **f);
-void amdgpu_job_timeout_func(struct work_struct *work);
 
 struct amdgpu_ring {
 	struct amdgpu_device		*adev;
@@ -767,12 +775,9 @@ struct amdgpu_ring {
 	struct amdgpu_fence_driver	fence_drv;
 	struct amd_gpu_scheduler	sched;
 
-	spinlock_t              fence_lock;
 	struct amdgpu_bo	*ring_obj;
 	volatile uint32_t	*ring;
 	unsigned		rptr_offs;
-	u64			next_rptr_gpu_addr;
-	volatile u32		*next_rptr_cpu_addr;
 	unsigned		wptr;
 	unsigned		wptr_old;
 	unsigned		ring_size;
@@ -791,14 +796,16 @@ struct amdgpu_ring {
 	u32			doorbell_index;
 	bool			use_doorbell;
 	unsigned		wptr_offs;
-	unsigned		next_rptr_offs;
 	unsigned		fence_offs;
 	uint64_t		current_ctx;
 	enum amdgpu_ring_type	type;
 	char			name[16];
 	unsigned		cond_exe_offs;
-	u64				cond_exe_gpu_addr;
-	volatile u32	*cond_exe_cpu_addr;
+	u64			cond_exe_gpu_addr;
+	volatile u32		*cond_exe_cpu_addr;
+#if defined(CONFIG_DEBUG_FS)
+	struct dentry *ent;
+#endif
 };
 
 /*
@@ -861,6 +868,7 @@ struct amdgpu_vm {
 	struct amdgpu_bo	*page_directory;
 	unsigned		max_pde_used;
 	struct fence		*page_directory_fence;
+	uint64_t		last_eviction_counter;
 
 	/* array of page tables, one for each page directory entry */
 	struct amdgpu_vm_pt	*page_tables;
@@ -883,13 +891,14 @@ struct amdgpu_vm_id {
 	struct fence		*first;
 	struct amdgpu_sync	active;
 	struct fence		*last_flush;
-	struct amdgpu_ring      *last_user;
 	atomic64_t		owner;
 
 	uint64_t		pd_gpu_addr;
 	/* last flushed PD/PT update */
 	struct fence		*flushed_updates;
 
+	uint32_t                current_gpu_reset_count;
+
 	uint32_t		gds_base;
 	uint32_t		gds_size;
 	uint32_t		gws_base;
@@ -905,6 +914,10 @@ struct amdgpu_vm_manager {
 	struct list_head			ids_lru;
 	struct amdgpu_vm_id			ids[AMDGPU_NUM_VM];
 
+	/* Handling of VM fences */
+	u64					fence_context;
+	unsigned				seqno[AMDGPU_MAX_RINGS];
+
 	uint32_t				max_pfn;
 	/* vram base address for page table entry  */
 	u64					vram_base_offset;
@@ -926,17 +939,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry);
-void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates);
+void amdgpu_vm_get_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			  struct list_head *duplicates);
 void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
 				  struct amdgpu_vm *vm);
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_sync *sync, struct fence *fence,
-		      unsigned *vm_id, uint64_t *vm_pd_addr);
-int amdgpu_vm_flush(struct amdgpu_ring *ring,
-		    unsigned vm_id, uint64_t pd_addr,
-		    uint32_t gds_base, uint32_t gds_size,
-		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size);
+		      struct amdgpu_job *job);
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
@@ -1142,6 +1152,12 @@ struct amdgpu_cu_info {
 	uint32_t bitmap[4][4];
 };
 
+struct amdgpu_gfx_funcs {
+	/* get the gpu clock counter */
+	uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
+	void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
+};
+
 struct amdgpu_gfx {
 	struct mutex			gpu_clock_mutex;
 	struct amdgpu_gca_config	config;
@@ -1178,6 +1194,7 @@ struct amdgpu_gfx {
 	/* ce ram size*/
 	unsigned			ce_ram_size;
 	struct amdgpu_cu_info		cu_info;
+	const struct amdgpu_gfx_funcs	*funcs;
 };
 
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
@@ -1195,10 +1212,6 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_undo(struct amdgpu_ring *ring);
-unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
-			    uint32_t **data);
-int amdgpu_ring_restore(struct amdgpu_ring *ring,
-			unsigned size, uint32_t *data);
 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     unsigned ring_size, u32 nop, u32 align_mask,
 		     struct amdgpu_irq_src *irq_src, unsigned irq_type,
@@ -1250,6 +1263,7 @@ struct amdgpu_job {
 	uint32_t		num_ibs;
 	void			*owner;
 	uint64_t		ctx;
+	bool                    vm_needs_flush;
 	unsigned		vm_id;
 	uint64_t		vm_pd_addr;
 	uint32_t		gds_base, gds_size;
@@ -1257,8 +1271,7 @@ struct amdgpu_job {
 	uint32_t		oa_base, oa_size;
 
 	/* user fence handling */
-	struct amdgpu_bo	*uf_bo;
-	uint32_t		uf_offset;
+	uint64_t		uf_addr;
 	uint64_t		uf_sequence;
 
 };
@@ -1560,6 +1573,12 @@ struct amdgpu_dpm_funcs {
 	u32 (*get_fan_control_mode)(struct amdgpu_device *adev);
 	int (*set_fan_speed_percent)(struct amdgpu_device *adev, u32 speed);
 	int (*get_fan_speed_percent)(struct amdgpu_device *adev, u32 *speed);
+	int (*force_clock_level)(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t mask);
+	int (*print_clock_levels)(struct amdgpu_device *adev, enum pp_clock_type type, char *buf);
+	int (*get_sclk_od)(struct amdgpu_device *adev);
+	int (*set_sclk_od)(struct amdgpu_device *adev, uint32_t value);
+	int (*get_mclk_od)(struct amdgpu_device *adev);
+	int (*set_mclk_od)(struct amdgpu_device *adev, uint32_t value);
 };
 
 struct amdgpu_dpm {
@@ -1662,6 +1681,7 @@ struct amdgpu_uvd {
 	struct amdgpu_ring	ring;
 	struct amdgpu_irq_src	irq;
 	bool			address_64_bit;
+	bool			use_ctx_buf;
 	struct amd_sched_entity entity;
 };
 
@@ -1683,6 +1703,7 @@ struct amdgpu_vce {
 	struct drm_file		*filp[AMDGPU_MAX_VCE_HANDLES];
 	uint32_t		img_size[AMDGPU_MAX_VCE_HANDLES];
 	struct delayed_work	idle_work;
+	struct mutex		idle_mutex;
 	const struct firmware	*fw;	/* VCE firmware */
 	struct amdgpu_ring	ring[AMDGPU_MAX_VCE_RINGS];
 	struct amdgpu_irq_src	irq;
@@ -1767,6 +1788,8 @@ int amdgpu_debugfs_init(struct drm_minor *minor);
 void amdgpu_debugfs_cleanup(struct drm_minor *minor);
 #endif
 
+int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
+
 /*
  * amdgpu smumgr functions
  */
@@ -1811,15 +1834,13 @@ struct amdgpu_asic_funcs {
 			     u32 sh_num, u32 reg_offset, u32 *value);
 	void (*set_vga_state)(struct amdgpu_device *adev, bool state);
 	int (*reset)(struct amdgpu_device *adev);
-	/* wait for mc_idle */
-	int (*wait_for_mc_idle)(struct amdgpu_device *adev);
 	/* get the reference clock */
 	u32 (*get_xclk)(struct amdgpu_device *adev);
-	/* get the gpu clock counter */
-	uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
 	/* MM block clocks */
 	int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
 	int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
+	/* query virtual capabilities */
+	u32 (*get_virtual_caps)(struct amdgpu_device *adev);
 };
 
 /*
@@ -1914,8 +1935,12 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
 
 
 /* GPU virtualization */
+#define AMDGPU_VIRT_CAPS_SRIOV_EN       (1 << 0)
+#define AMDGPU_VIRT_CAPS_IS_VF          (1 << 1)
 struct amdgpu_virtualization {
 	bool supports_sr_iov;
+	bool is_virtual;
+	u32 caps;
 };
 
 /*
@@ -1997,6 +2022,10 @@ struct amdgpu_device {
 	spinlock_t didt_idx_lock;
 	amdgpu_rreg_t			didt_rreg;
 	amdgpu_wreg_t			didt_wreg;
+	/* protects concurrent gc_cac register access */
+	spinlock_t gc_cac_idx_lock;
+	amdgpu_rreg_t			gc_cac_rreg;
+	amdgpu_wreg_t			gc_cac_wreg;
 	/* protects concurrent ENDPOINT (audio) register access */
 	spinlock_t audio_endpt_idx_lock;
 	amdgpu_block_rreg_t		audio_endpt_rreg;
@@ -2022,6 +2051,7 @@ struct amdgpu_device {
 	atomic64_t			vram_vis_usage;
 	atomic64_t			gtt_usage;
 	atomic64_t			num_bytes_moved;
+	atomic64_t			num_evictions;
 	atomic_t			gpu_reset_counter;
 
 	/* display */
@@ -2125,6 +2155,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
 #define WREG32_UVD_CTX(reg, v) adev->uvd_ctx_wreg(adev, (reg), (v))
 #define RREG32_DIDT(reg) adev->didt_rreg(adev, (reg))
 #define WREG32_DIDT(reg, v) adev->didt_wreg(adev, (reg), (v))
+#define RREG32_GC_CAC(reg) adev->gc_cac_rreg(adev, (reg))
+#define WREG32_GC_CAC(reg, v) adev->gc_cac_wreg(adev, (reg), (v))
 #define RREG32_AUDIO_ENDPT(block, reg) adev->audio_endpt_rreg(adev, (block), (reg))
 #define WREG32_AUDIO_ENDPT(block, reg, v) adev->audio_endpt_wreg(adev, (block), (reg), (v))
 #define WREG32_P(reg, val, mask)				\
@@ -2200,11 +2232,10 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
  */
 #define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state))
 #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
-#define amdgpu_asic_wait_for_mc_idle(adev) (adev)->asic_funcs->wait_for_mc_idle((adev))
 #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
 #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
 #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
-#define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev))
+#define amdgpu_asic_get_virtual_caps(adev) ((adev)->asic_funcs->get_virtual_caps((adev)))
 #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
 #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
 #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
@@ -2215,7 +2246,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
 #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
 #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
-#define amdgpu_ring_test_ib(r) (r)->funcs->test_ib((r))
+#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
 #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
 #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
 #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
@@ -2257,6 +2288,8 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_dpm_print_power_state(adev, ps) (adev)->pm.funcs->print_power_state((adev), (ps))
 #define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs->vblank_too_short((adev))
 #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
+#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
+#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
 
 #define amdgpu_dpm_get_temperature(adev) \
 	((adev)->pp_enabled ?						\
@@ -2335,6 +2368,18 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_dpm_force_clock_level(adev, type, level) \
 		(adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level)
 
+#define amdgpu_dpm_get_sclk_od(adev) \
+	(adev)->powerplay.pp_funcs->get_sclk_od((adev)->powerplay.pp_handle)
+
+#define amdgpu_dpm_set_sclk_od(adev, value) \
+	(adev)->powerplay.pp_funcs->set_sclk_od((adev)->powerplay.pp_handle, value)
+
+#define amdgpu_dpm_get_mclk_od(adev) \
+	((adev)->powerplay.pp_funcs->get_mclk_od((adev)->powerplay.pp_handle))
+
+#define amdgpu_dpm_set_mclk_od(adev, value) \
+	((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value))
+
 #define amdgpu_dpm_dispatch_task(adev, event_id, input, output)		\
 	(adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output))
 
@@ -2376,9 +2421,13 @@ bool amdgpu_device_is_px(struct drm_device *dev);
 #if defined(CONFIG_VGA_SWITCHEROO)
 void amdgpu_register_atpx_handler(void);
 void amdgpu_unregister_atpx_handler(void);
+bool amdgpu_has_atpx_dgpu_power_cntl(void);
+bool amdgpu_is_atpx_hybrid(void);
 #else
 static inline void amdgpu_register_atpx_handler(void) {}
 static inline void amdgpu_unregister_atpx_handler(void) {}
+static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
+static inline bool amdgpu_is_atpx_hybrid(void) { return false; }
 #endif
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 32809f749903..d080d0807a5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -240,8 +240,8 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
 {
 	struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
 
-	if (rdev->asic_funcs->get_gpu_clock_counter)
-		return rdev->asic_funcs->get_gpu_clock_counter(rdev);
+	if (rdev->gfx.funcs->get_gpu_clock_counter)
+		return rdev->gfx.funcs->get_gpu_clock_counter(rdev);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 9df1bcb35bf0..983175363b06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -551,28 +551,19 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
 		    le16_to_cpu(firmware_info->info.usReferenceClock);
 		ppll->reference_div = 0;
 
-		if (crev < 2)
-			ppll->pll_out_min =
-				le16_to_cpu(firmware_info->info.usMinPixelClockPLL_Output);
-		else
-			ppll->pll_out_min =
-				le32_to_cpu(firmware_info->info_12.ulMinPixelClockPLL_Output);
+		ppll->pll_out_min =
+			le32_to_cpu(firmware_info->info_12.ulMinPixelClockPLL_Output);
 		ppll->pll_out_max =
 		    le32_to_cpu(firmware_info->info.ulMaxPixelClockPLL_Output);
 
-		if (crev >= 4) {
-			ppll->lcd_pll_out_min =
-				le16_to_cpu(firmware_info->info_14.usLcdMinPixelClockPLL_Output) * 100;
-			if (ppll->lcd_pll_out_min == 0)
-				ppll->lcd_pll_out_min = ppll->pll_out_min;
-			ppll->lcd_pll_out_max =
-				le16_to_cpu(firmware_info->info_14.usLcdMaxPixelClockPLL_Output) * 100;
-			if (ppll->lcd_pll_out_max == 0)
-				ppll->lcd_pll_out_max = ppll->pll_out_max;
-		} else {
+		ppll->lcd_pll_out_min =
+			le16_to_cpu(firmware_info->info_14.usLcdMinPixelClockPLL_Output) * 100;
+		if (ppll->lcd_pll_out_min == 0)
 			ppll->lcd_pll_out_min = ppll->pll_out_min;
+		ppll->lcd_pll_out_max =
+			le16_to_cpu(firmware_info->info_14.usLcdMaxPixelClockPLL_Output) * 100;
+		if (ppll->lcd_pll_out_max == 0)
 			ppll->lcd_pll_out_max = ppll->pll_out_max;
-		}
 
 		if (ppll->pll_out_min == 0)
 			ppll->pll_out_min = 64800;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 35a1248aaa77..49de92600074 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -10,6 +10,7 @@
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <linux/pci.h>
+#include <linux/delay.h>
 
 #include "amd_acpi.h"
 
@@ -27,6 +28,7 @@ struct amdgpu_atpx_functions {
 struct amdgpu_atpx {
 	acpi_handle handle;
 	struct amdgpu_atpx_functions functions;
+	bool is_hybrid;
 };
 
 static struct amdgpu_atpx_priv {
@@ -63,6 +65,14 @@ bool amdgpu_has_atpx(void) {
 	return amdgpu_atpx_priv.atpx_detected;
 }
 
+bool amdgpu_has_atpx_dgpu_power_cntl(void) {
+	return amdgpu_atpx_priv.atpx.functions.power_cntl;
+}
+
+bool amdgpu_is_atpx_hybrid(void) {
+	return amdgpu_atpx_priv.atpx.is_hybrid;
+}
+
 /**
  * amdgpu_atpx_call - call an ATPX method
  *
@@ -142,18 +152,12 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
  */
 static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
 {
-	/* make sure required functions are enabled */
-	/* dGPU power control is required */
-	if (atpx->functions.power_cntl == false) {
-		printk("ATPX dGPU power cntl not present, forcing\n");
-		atpx->functions.power_cntl = true;
-	}
+	u32 valid_bits = 0;
 
 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
 		size_t size;
-		u32 valid_bits;
 
 		info = amdgpu_atpx_call(atpx->handle, ATPX_FUNCTION_GET_PX_PARAMETERS, NULL);
 		if (!info)
@@ -172,19 +176,43 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
 		memcpy(&output, info->buffer.pointer, size);
 
 		valid_bits = output.flags & output.valid_flags;
-		/* if separate mux flag is set, mux controls are required */
-		if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
-			atpx->functions.i2c_mux_cntl = true;
-			atpx->functions.disp_mux_cntl = true;
-		}
-		/* if any outputs are muxed, mux controls are required */
-		if (valid_bits & (ATPX_CRT1_RGB_SIGNAL_MUXED |
-				  ATPX_TV_SIGNAL_MUXED |
-				  ATPX_DFP_SIGNAL_MUXED))
-			atpx->functions.disp_mux_cntl = true;
 
 		kfree(info);
 	}
+
+	/* if separate mux flag is set, mux controls are required */
+	if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
+		atpx->functions.i2c_mux_cntl = true;
+		atpx->functions.disp_mux_cntl = true;
+	}
+	/* if any outputs are muxed, mux controls are required */
+	if (valid_bits & (ATPX_CRT1_RGB_SIGNAL_MUXED |
+			  ATPX_TV_SIGNAL_MUXED |
+			  ATPX_DFP_SIGNAL_MUXED))
+		atpx->functions.disp_mux_cntl = true;
+
+
+	/* some bioses set these bits rather than flagging power_cntl as supported */
+	if (valid_bits & (ATPX_DYNAMIC_PX_SUPPORTED |
+			  ATPX_DYNAMIC_DGPU_POWER_OFF_SUPPORTED))
+		atpx->functions.power_cntl = true;
+
+	atpx->is_hybrid = false;
+	if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
+		printk("ATPX Hybrid Graphics\n");
+#if 1
+		/* This is a temporary hack until the D3 cold support
+		 * makes it upstream.  The ATPX power_control method seems
+		 * to still work on even if the system should be using
+		 * the new standardized hybrid D3 cold ACPI interface.
+		 */
+		atpx->functions.power_cntl = true;
+#else
+		atpx->functions.power_cntl = false;
+#endif
+		atpx->is_hybrid = true;
+	}
+
 	return 0;
 }
 
@@ -259,6 +287,10 @@ static int amdgpu_atpx_set_discrete_state(struct amdgpu_atpx *atpx, u8 state)
 		if (!info)
 			return -EIO;
 		kfree(info);
+
+		/* 200ms delay is required after off */
+		if (state == 0)
+			msleep(200);
 	}
 	return 0;
 }
@@ -507,7 +539,6 @@ static int amdgpu_atpx_get_client_id(struct pci_dev *pdev)
 static const struct vga_switcheroo_handler amdgpu_atpx_handler = {
 	.switchto = amdgpu_atpx_switchto,
 	.power_state = amdgpu_atpx_power_state,
-	.init = amdgpu_atpx_init,
 	.get_client_id = amdgpu_atpx_get_client_id,
 };
 
@@ -542,6 +573,7 @@ static bool amdgpu_atpx_detect(void)
 		printk(KERN_INFO "vga_switcheroo: detected switching method %s handle\n",
 		       acpi_method_name);
 		amdgpu_atpx_priv.atpx_detected = true;
+		amdgpu_atpx_init();
 		return true;
 	}
 	return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index 99ca75baa47d..2b6afe123f3d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -352,22 +352,22 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
 	uint16_t tmp, bios_header_start;
 
 	r = amdgpu_atrm_get_bios(adev);
-	if (r == false)
+	if (!r)
 		r = amdgpu_acpi_vfct_bios(adev);
-	if (r == false)
+	if (!r)
 		r = igp_read_bios_from_vram(adev);
-	if (r == false)
+	if (!r)
 		r = amdgpu_read_bios(adev);
-	if (r == false) {
+	if (!r) {
 		r = amdgpu_read_bios_from_rom(adev);
 	}
-	if (r == false) {
+	if (!r) {
 		r = amdgpu_read_disabled_bios(adev);
 	}
-	if (r == false) {
+	if (!r) {
 		r = amdgpu_read_platform_bios(adev);
 	}
-	if (r == false || adev->bios == NULL) {
+	if (!r || adev->bios == NULL) {
 		DRM_ERROR("Unable to locate a BIOS ROM\n");
 		adev->bios = NULL;
 		return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 823bf5e0b0c8..651115dcce12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -94,6 +94,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	unsigned last_entry = 0, first_userptr = num_entries;
 	unsigned i;
 	int r;
+	unsigned long total_size = 0;
 
 	array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry));
 	if (!array)
@@ -140,6 +141,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 		if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
 			oa_obj = entry->robj;
 
+		total_size += amdgpu_bo_size(entry->robj);
 		trace_amdgpu_bo_list_set(list, entry->robj);
 	}
 
@@ -155,6 +157,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	list->array = array;
 	list->num_entries = num_entries;
 
+	trace_amdgpu_cs_bo_status(list->num_entries, total_size);
 	return 0;
 
 error_free:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 199f76baf22c..bc0440f7a31d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -312,6 +312,8 @@ static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
 		return RREG32_UVD_CTX(index);
 	case CGS_IND_REG__DIDT:
 		return RREG32_DIDT(index);
+	case CGS_IND_REG_GC_CAC:
+		return RREG32_GC_CAC(index);
 	case CGS_IND_REG__AUDIO_ENDPT:
 		DRM_ERROR("audio endpt register access not implemented.\n");
 		return 0;
@@ -336,6 +338,8 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
 		return WREG32_UVD_CTX(index, value);
 	case CGS_IND_REG__DIDT:
 		return WREG32_DIDT(index, value);
+	case CGS_IND_REG_GC_CAC:
+		return WREG32_GC_CAC(index, value);
 	case CGS_IND_REG__AUDIO_ENDPT:
 		DRM_ERROR("audio endpt register access not implemented.\n");
 		return;
@@ -696,6 +700,17 @@ static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
 	return result;
 }
 
+static int amdgpu_cgs_rel_firmware(struct cgs_device *cgs_device, enum cgs_ucode_id type)
+{
+	CGS_FUNC_ADEV;
+	if ((CGS_UCODE_ID_SMU == type) || (CGS_UCODE_ID_SMU_SK == type)) {
+		release_firmware(adev->pm.fw);
+		return 0;
+	}
+	/* cannot release other firmware because they are not created by cgs */
+	return -EINVAL;
+}
+
 static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 					enum cgs_ucode_id type,
 					struct cgs_firmware_info *info)
@@ -737,6 +752,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 
 		if (!adev->pm.fw) {
 			switch (adev->asic_type) {
+			case CHIP_TOPAZ:
+				strcpy(fw_name, "amdgpu/topaz_smc.bin");
+				break;
 			case CHIP_TONGA:
 				strcpy(fw_name, "amdgpu/tonga_smc.bin");
 				break;
@@ -776,6 +794,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 		}
 
 		hdr = (const struct smc_firmware_header_v1_0 *)	adev->pm.fw->data;
+		amdgpu_ucode_print_smc_hdr(&hdr->header);
 		adev->pm.fw_version = le32_to_cpu(hdr->header.ucode_version);
 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes);
 		ucode_start_address = le32_to_cpu(hdr->ucode_start_addr);
@@ -784,13 +803,14 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 
 		info->version = adev->pm.fw_version;
 		info->image_size = ucode_size;
+		info->ucode_start_address = ucode_start_address;
 		info->kptr = (void *)src;
 	}
 	return 0;
 }
 
 static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
-				struct cgs_system_info *sys_info)
+					struct cgs_system_info *sys_info)
 {
 	CGS_FUNC_ADEV;
 
@@ -810,6 +830,12 @@ static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
 	case CGS_SYSTEM_INFO_PCIE_MLW:
 		sys_info->value = adev->pm.pcie_mlw_mask;
 		break;
+	case CGS_SYSTEM_INFO_PCIE_DEV:
+		sys_info->value = adev->pdev->device;
+		break;
+	case CGS_SYSTEM_INFO_PCIE_REV:
+		sys_info->value = adev->pdev->revision;
+		break;
 	case CGS_SYSTEM_INFO_CG_FLAGS:
 		sys_info->value = adev->cg_flags;
 		break;
@@ -819,6 +845,9 @@ static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
 	case CGS_SYSTEM_INFO_GFX_CU_INFO:
 		sys_info->value = adev->gfx.cu_info.number;
 		break;
+	case CGS_SYSTEM_INFO_GFX_SE_INFO:
+		sys_info->value = adev->gfx.config.max_shader_engines;
+		break;
 	default:
 		return -ENODEV;
 	}
@@ -892,14 +921,12 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 	acpi_handle handle;
 	struct acpi_object_list input;
 	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
-	union acpi_object *params = NULL;
-	union acpi_object *obj = NULL;
+	union acpi_object *params, *obj;
 	uint8_t name[5] = {'\0'};
-	struct cgs_acpi_method_argument *argument = NULL;
+	struct cgs_acpi_method_argument *argument;
 	uint32_t i, count;
 	acpi_status status;
 	int result;
-	uint32_t func_no = 0xFFFFFFFF;
 
 	handle = ACPI_HANDLE(&adev->pdev->dev);
 	if (!handle)
@@ -916,7 +943,6 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 		if (info->pinput_argument == NULL)
 			return -EINVAL;
 		argument = info->pinput_argument;
-		func_no = argument->value;
 		for (i = 0; i < info->input_count; i++) {
 			if (((argument->type == ACPI_TYPE_STRING) ||
 			     (argument->type == ACPI_TYPE_BUFFER)) &&
@@ -961,11 +987,11 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 				params->integer.value = argument->value;
 				break;
 			case ACPI_TYPE_STRING:
-				params->string.length = argument->method_length;
+				params->string.length = argument->data_length;
 				params->string.pointer = argument->pointer;
 				break;
 			case ACPI_TYPE_BUFFER:
-				params->buffer.length = argument->method_length;
+				params->buffer.length = argument->data_length;
 				params->buffer.pointer = argument->pointer;
 				break;
 			default:
@@ -985,7 +1011,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 
 	if (ACPI_FAILURE(status)) {
 		result = -EIO;
-		goto error;
+		goto free_input;
 	}
 
 	/* return the output info */
@@ -995,7 +1021,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 		if ((obj->type != ACPI_TYPE_PACKAGE) ||
 			(obj->package.count != count)) {
 			result = -EIO;
-			goto error;
+			goto free_obj;
 		}
 		params = obj->package.elements;
 	} else
@@ -1003,13 +1029,13 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 
 	if (params == NULL) {
 		result = -EIO;
-		goto error;
+		goto free_obj;
 	}
 
 	for (i = 0; i < count; i++) {
 		if (argument->type != params->type) {
 			result = -EIO;
-			goto error;
+			goto free_obj;
 		}
 		switch (params->type) {
 		case ACPI_TYPE_INTEGER:
@@ -1019,7 +1045,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 			if ((params->string.length != argument->data_length) ||
 				(params->string.pointer == NULL)) {
 				result = -EIO;
-				goto error;
+				goto free_obj;
 			}
 			strncpy(argument->pointer,
 				params->string.pointer,
@@ -1028,7 +1054,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 		case ACPI_TYPE_BUFFER:
 			if (params->buffer.pointer == NULL) {
 				result = -EIO;
-				goto error;
+				goto free_obj;
 			}
 			memcpy(argument->pointer,
 				params->buffer.pointer,
@@ -1041,9 +1067,10 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 		params++;
 	}
 
-error:
-	if (obj != NULL)
-		kfree(obj);
+	result = 0;
+free_obj:
+	kfree(obj);
+free_input:
 	kfree((void *)input.pointer);
 	return result;
 }
@@ -1055,7 +1082,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 }
 #endif
 
-int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
+static int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
 					uint32_t acpi_method,
 					uint32_t acpi_function,
 					void *pinput, void *poutput,
@@ -1068,17 +1095,14 @@ int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
 	struct cgs_acpi_method_info info = {0};
 
 	acpi_input[0].type = CGS_ACPI_TYPE_INTEGER;
-	acpi_input[0].method_length = sizeof(uint32_t);
 	acpi_input[0].data_length = sizeof(uint32_t);
 	acpi_input[0].value = acpi_function;
 
 	acpi_input[1].type = CGS_ACPI_TYPE_BUFFER;
-	acpi_input[1].method_length = CGS_ACPI_MAX_BUFFER_SIZE;
 	acpi_input[1].data_length = input_size;
 	acpi_input[1].pointer = pinput;
 
 	acpi_output.type = CGS_ACPI_TYPE_BUFFER;
-	acpi_output.method_length = CGS_ACPI_MAX_BUFFER_SIZE;
 	acpi_output.data_length = output_size;
 	acpi_output.pointer = poutput;
 
@@ -1125,6 +1149,7 @@ static const struct cgs_ops amdgpu_cgs_ops = {
 	amdgpu_cgs_pm_query_clock_limits,
 	amdgpu_cgs_set_camera_voltages,
 	amdgpu_cgs_get_firmware_info,
+	amdgpu_cgs_rel_firmware,
 	amdgpu_cgs_set_powergating_state,
 	amdgpu_cgs_set_clockgating_state,
 	amdgpu_cgs_get_active_displays_info,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index cb07da41152b..ff0b55a65ca3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -1690,7 +1690,6 @@ amdgpu_connector_add(struct amdgpu_device *adev,
 						   DRM_MODE_SCALE_NONE);
 			/* no HPD on analog connectors */
 			amdgpu_connector->hpd.hpd = AMDGPU_HPD_NONE;
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 			connector->interlace_allowed = true;
 			connector->doublescan_allowed = true;
 			break;
@@ -1893,8 +1892,10 @@ amdgpu_connector_add(struct amdgpu_device *adev,
 	}
 
 	if (amdgpu_connector->hpd.hpd == AMDGPU_HPD_NONE) {
-		if (i2c_bus->valid)
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+		if (i2c_bus->valid) {
+			connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+			                    DRM_CONNECTOR_POLL_DISCONNECT;
+		}
 	} else
 		connector->polled = DRM_CONNECTOR_POLL_HPD;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9bc8f1d99733..0307ff5887c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -216,11 +216,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	if (ret)
 		goto free_all_kdata;
 
-	if (p->uf_entry.robj) {
-		p->job->uf_bo = amdgpu_bo_ref(p->uf_entry.robj);
-		p->job->uf_offset = uf_offset;
-	}
-
+	if (p->uf_entry.robj)
+		p->job->uf_addr = uf_offset;
 	kfree(chunk_array);
 	return 0;
 
@@ -459,7 +456,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		list_splice(&need_pages, &p->validated);
 	}
 
-	amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates);
+	amdgpu_vm_get_pt_bos(p->adev, &fpriv->vm, &duplicates);
 
 	p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
 	p->bytes_moved = 0;
@@ -472,6 +469,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (r)
 		goto error_validate;
 
+	fpriv->vm.last_eviction_counter =
+		atomic64_read(&p->adev->num_evictions);
+
 	if (p->bo_list) {
 		struct amdgpu_bo *gds = p->bo_list->gds_obj;
 		struct amdgpu_bo *gws = p->bo_list->gws_obj;
@@ -499,6 +499,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		}
 	}
 
+	if (p->uf_entry.robj)
+		p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
+
 error_validate:
 	if (r) {
 		amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm);
@@ -653,18 +656,21 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 
 	/* Only for UVD/VCE VM emulation */
 	if (ring->funcs->parse_cs) {
+		p->job->vm = NULL;
 		for (i = 0; i < p->job->num_ibs; i++) {
 			r = amdgpu_ring_parse_cs(ring, p, i);
 			if (r)
 				return r;
 		}
-	}
+	} else {
+		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
 
-	r = amdgpu_bo_vm_update_pte(p, vm);
-	if (!r)
-		amdgpu_cs_sync_rings(p);
+		r = amdgpu_bo_vm_update_pte(p, vm);
+		if (r)
+			return r;
+	}
 
-	return r;
+	return amdgpu_cs_sync_rings(p);
 }
 
 static int amdgpu_cs_handle_lockup(struct amdgpu_device *adev, int r)
@@ -761,7 +767,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	}
 
 	/* UVD & VCE fw doesn't support user fences */
-	if (parser->job->uf_bo && (
+	if (parser->job->uf_addr && (
 	    parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
 	    parser->job->ring->type == AMDGPU_RING_TYPE_VCE))
 		return -EINVAL;
@@ -830,17 +836,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 {
 	struct amdgpu_ring *ring = p->job->ring;
 	struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
-	struct fence *fence;
 	struct amdgpu_job *job;
 	int r;
 
 	job = p->job;
 	p->job = NULL;
 
-	r = amd_sched_job_init(&job->base, &ring->sched,
-			       entity, amdgpu_job_timeout_func,
-			       amdgpu_job_free_func,
-			       p->filp, &fence);
+	r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp);
 	if (r) {
 		amdgpu_job_free(job);
 		return r;
@@ -848,9 +850,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	job->owner = p->filp;
 	job->ctx = entity->fence_context;
-	p->fence = fence_get(fence);
-	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, fence);
+	p->fence = fence_get(&job->base.s_fence->finished);
+	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
 	job->uf_sequence = cs->out.handle;
+	amdgpu_job_free_resources(job);
 
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index bb8b149786d7..df7ab2458e50 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -25,6 +25,7 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+#include <linux/kthread.h>
 #include <linux/console.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
@@ -35,6 +36,7 @@
 #include <linux/vga_switcheroo.h>
 #include <linux/efi.h>
 #include "amdgpu.h"
+#include "amdgpu_trace.h"
 #include "amdgpu_i2c.h"
 #include "atom.h"
 #include "amdgpu_atombios.h"
@@ -79,24 +81,27 @@ bool amdgpu_device_is_px(struct drm_device *dev)
 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
 			bool always_indirect)
 {
+	uint32_t ret;
+
 	if ((reg * 4) < adev->rmmio_size && !always_indirect)
-		return readl(((void __iomem *)adev->rmmio) + (reg * 4));
+		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 	else {
 		unsigned long flags;
-		uint32_t ret;
 
 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
 		ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
-
-		return ret;
 	}
+	trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
+	return ret;
 }
 
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 		    bool always_indirect)
 {
+	trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
+	
 	if ((reg * 4) < adev->rmmio_size && !always_indirect)
 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 	else {
@@ -827,8 +832,10 @@ static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg)
  */
 static void amdgpu_atombios_fini(struct amdgpu_device *adev)
 {
-	if (adev->mode_info.atom_context)
+	if (adev->mode_info.atom_context) {
 		kfree(adev->mode_info.atom_context->scratch);
+		kfree(adev->mode_info.atom_context->iio);
+	}
 	kfree(adev->mode_info.atom_context);
 	adev->mode_info.atom_context = NULL;
 	kfree(adev->mode_info.atom_card_info);
@@ -1068,11 +1075,14 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
 	int i, r = 0;
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_block_status[i].valid)
+			continue;
 		if (adev->ip_blocks[i].type == block_type) {
 			r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
 									    state);
 			if (r)
 				return r;
+			break;
 		}
 	}
 	return r;
@@ -1085,16 +1095,53 @@ int amdgpu_set_powergating_state(struct amdgpu_device *adev,
 	int i, r = 0;
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_block_status[i].valid)
+			continue;
 		if (adev->ip_blocks[i].type == block_type) {
 			r = adev->ip_blocks[i].funcs->set_powergating_state((void *)adev,
 									    state);
 			if (r)
 				return r;
+			break;
 		}
 	}
 	return r;
 }
 
+int amdgpu_wait_for_idle(struct amdgpu_device *adev,
+			 enum amd_ip_block_type block_type)
+{
+	int i, r;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_block_status[i].valid)
+			continue;
+		if (adev->ip_blocks[i].type == block_type) {
+			r = adev->ip_blocks[i].funcs->wait_for_idle((void *)adev);
+			if (r)
+				return r;
+			break;
+		}
+	}
+	return 0;
+
+}
+
+bool amdgpu_is_idle(struct amdgpu_device *adev,
+		    enum amd_ip_block_type block_type)
+{
+	int i;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_block_status[i].valid)
+			continue;
+		if (adev->ip_blocks[i].type == block_type)
+			return adev->ip_blocks[i].funcs->is_idle((void *)adev);
+	}
+	return true;
+
+}
+
 const struct amdgpu_ip_block_version * amdgpu_get_ip_block(
 					struct amdgpu_device *adev,
 					enum amd_ip_block_type type)
@@ -1207,6 +1254,9 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
 		}
 	}
 
+	adev->cg_flags &= amdgpu_cg_mask;
+	adev->pg_flags &= amdgpu_pg_mask;
+
 	return 0;
 }
 
@@ -1325,6 +1375,11 @@ static int amdgpu_fini(struct amdgpu_device *adev)
 		adev->ip_block_status[i].valid = false;
 	}
 
+	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+		if (adev->ip_blocks[i].funcs->late_fini)
+			adev->ip_blocks[i].funcs->late_fini((void *)adev);
+	}
+
 	return 0;
 }
 
@@ -1378,6 +1433,15 @@ static int amdgpu_resume(struct amdgpu_device *adev)
 	return 0;
 }
 
+static bool amdgpu_device_is_virtual(void)
+{
+#ifdef CONFIG_X86
+	return boot_cpu_has(X86_FEATURE_HYPERVISOR);
+#else
+	return false;
+#endif
+}
+
 /**
  * amdgpu_device_init - initialize the driver
  *
@@ -1424,9 +1488,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
 	adev->didt_rreg = &amdgpu_invalid_rreg;
 	adev->didt_wreg = &amdgpu_invalid_wreg;
+	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
+	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
 
+
 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
@@ -1451,6 +1518,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	spin_lock_init(&adev->pcie_idx_lock);
 	spin_lock_init(&adev->uvd_ctx_idx_lock);
 	spin_lock_init(&adev->didt_idx_lock);
+	spin_lock_init(&adev->gc_cac_idx_lock);
 	spin_lock_init(&adev->audio_endpt_idx_lock);
 
 	adev->rmmio_base = pci_resource_start(adev->pdev, 5);
@@ -1495,29 +1563,38 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
 
 	/* Read BIOS */
-	if (!amdgpu_get_bios(adev))
-		return -EINVAL;
+	if (!amdgpu_get_bios(adev)) {
+		r = -EINVAL;
+		goto failed;
+	}
 	/* Must be an ATOMBIOS */
 	if (!adev->is_atom_bios) {
 		dev_err(adev->dev, "Expecting atombios for GPU\n");
-		return -EINVAL;
+		r = -EINVAL;
+		goto failed;
 	}
 	r = amdgpu_atombios_init(adev);
 	if (r) {
 		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
-		return r;
+		goto failed;
 	}
 
 	/* See if the asic supports SR-IOV */
 	adev->virtualization.supports_sr_iov =
 		amdgpu_atombios_has_gpu_virtualization_table(adev);
 
+	/* Check if we are executing in a virtualized environment */
+	adev->virtualization.is_virtual = amdgpu_device_is_virtual();
+	adev->virtualization.caps = amdgpu_asic_get_virtual_caps(adev);
+
 	/* Post card if necessary */
 	if (!amdgpu_card_posted(adev) ||
-	    adev->virtualization.supports_sr_iov) {
+	    (adev->virtualization.is_virtual &&
+	     !(adev->virtualization.caps & AMDGPU_VIRT_CAPS_SRIOV_EN))) {
 		if (!adev->bios) {
 			dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
-			return -EINVAL;
+			r = -EINVAL;
+			goto failed;
 		}
 		DRM_INFO("GPU not posted. posting now...\n");
 		amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -1527,7 +1604,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_atombios_get_clock_info(adev);
 	if (r) {
 		dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
-		return r;
+		goto failed;
 	}
 	/* init i2c buses */
 	amdgpu_atombios_i2c_init(adev);
@@ -1536,7 +1613,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_fence_driver_init(adev);
 	if (r) {
 		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
-		return r;
+		goto failed;
 	}
 
 	/* init the mode config */
@@ -1546,7 +1623,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (r) {
 		dev_err(adev->dev, "amdgpu_init failed\n");
 		amdgpu_fini(adev);
-		return r;
+		goto failed;
 	}
 
 	adev->accel_working = true;
@@ -1556,7 +1633,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_ib_pool_init(adev);
 	if (r) {
 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
-		return r;
+		goto failed;
 	}
 
 	r = amdgpu_ib_ring_tests(adev);
@@ -1573,6 +1650,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		DRM_ERROR("registering register debugfs failed (%d).\n", r);
 	}
 
+	r = amdgpu_debugfs_firmware_init(adev);
+	if (r) {
+		DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
+		return r;
+	}
+
 	if ((amdgpu_testing & 1)) {
 		if (adev->accel_working)
 			amdgpu_test_moves(adev);
@@ -1598,10 +1681,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_late_init(adev);
 	if (r) {
 		dev_err(adev->dev, "amdgpu_late_init failed\n");
-		return r;
+		goto failed;
 	}
 
 	return 0;
+
+failed:
+	if (runtime)
+		vga_switcheroo_fini_domain_pm_ops(adev->dev);
+	return r;
 }
 
 static void amdgpu_debugfs_remove_files(struct amdgpu_device *adev);
@@ -1624,6 +1712,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	amdgpu_bo_evict_vram(adev);
 	amdgpu_ib_pool_fini(adev);
 	amdgpu_fence_driver_fini(adev);
+	drm_crtc_force_disable_all(adev->ddev);
 	amdgpu_fbdev_fini(adev);
 	r = amdgpu_fini(adev);
 	kfree(adev->ip_block_status);
@@ -1635,6 +1724,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	kfree(adev->bios);
 	adev->bios = NULL;
 	vga_switcheroo_unregister_client(adev->pdev);
+	if (adev->flags & AMD_IS_PX)
+		vga_switcheroo_fini_domain_pm_ops(adev->dev);
 	vga_client_register(adev->pdev, NULL, NULL, NULL);
 	if (adev->rio_mem)
 		pci_iounmap(adev->pdev, adev->rio_mem);
@@ -1820,7 +1911,23 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
 	}
 
 	drm_kms_helper_poll_enable(dev);
+
+	/*
+	 * Most of the connector probing functions try to acquire runtime pm
+	 * refs to ensure that the GPU is powered on when connector polling is
+	 * performed. Since we're calling this from a runtime PM callback,
+	 * trying to acquire rpm refs will cause us to deadlock.
+	 *
+	 * Since we're guaranteed to be holding the rpm lock, it's safe to
+	 * temporarily disable the rpm helpers so this doesn't deadlock us.
+	 */
+#ifdef CONFIG_PM
+	dev->dev->power.disable_depth++;
+#endif
 	drm_helper_hpd_irq_event(dev);
+#ifdef CONFIG_PM
+	dev->dev->power.disable_depth--;
+#endif
 
 	if (fbcon) {
 		amdgpu_fbdev_set_suspend(adev, 0);
@@ -1840,11 +1947,6 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
  */
 int amdgpu_gpu_reset(struct amdgpu_device *adev)
 {
-	unsigned ring_sizes[AMDGPU_MAX_RINGS];
-	uint32_t *ring_data[AMDGPU_MAX_RINGS];
-
-	bool saved = false;
-
 	int i, r;
 	int resched;
 
@@ -1853,22 +1955,30 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
 
-	r = amdgpu_suspend(adev);
-
+	/* block scheduler */
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
+
 		if (!ring)
 			continue;
-
-		ring_sizes[i] = amdgpu_ring_backup(ring, &ring_data[i]);
-		if (ring_sizes[i]) {
-			saved = true;
-			dev_info(adev->dev, "Saved %d dwords of commands "
-				 "on ring %d.\n", ring_sizes[i], i);
-		}
+		kthread_park(ring->sched.thread);
+		amd_sched_hw_job_reset(&ring->sched);
 	}
+	/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
+	amdgpu_fence_driver_force_completion(adev);
+
+	/* save scratch */
+	amdgpu_atombios_scratch_regs_save(adev);
+	r = amdgpu_suspend(adev);
 
 retry:
+	/* Disable fb access */
+	if (adev->mode_info.num_crtc) {
+		struct amdgpu_mode_mc_save save;
+		amdgpu_display_stop_mc_access(adev, &save);
+		amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC);
+	}
+
 	r = amdgpu_asic_reset(adev);
 	/* post card */
 	amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -1877,32 +1987,29 @@ retry:
 		dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
 		r = amdgpu_resume(adev);
 	}
-
+	/* restore scratch */
+	amdgpu_atombios_scratch_regs_restore(adev);
 	if (!r) {
+		r = amdgpu_ib_ring_tests(adev);
+		if (r) {
+			dev_err(adev->dev, "ib ring test failed (%d).\n", r);
+			r = amdgpu_suspend(adev);
+			goto retry;
+		}
+
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = adev->rings[i];
 			if (!ring)
 				continue;
-
-			amdgpu_ring_restore(ring, ring_sizes[i], ring_data[i]);
-			ring_sizes[i] = 0;
-			ring_data[i] = NULL;
-		}
-
-		r = amdgpu_ib_ring_tests(adev);
-		if (r) {
-			dev_err(adev->dev, "ib ring test failed (%d).\n", r);
-			if (saved) {
-				saved = false;
-				r = amdgpu_suspend(adev);
-				goto retry;
-			}
+			amd_sched_job_recovery(&ring->sched);
+			kthread_unpark(ring->sched.thread);
 		}
 	} else {
-		amdgpu_fence_driver_force_completion(adev);
+		dev_err(adev->dev, "asic resume failed (%d).\n", r);
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-			if (adev->rings[i])
-				kfree(ring_data[i]);
+			if (adev->rings[i]) {
+				kthread_unpark(adev->rings[i]->sched.thread);
+			}
 		}
 	}
 
@@ -1913,13 +2020,11 @@ retry:
 		/* bad news, how to tell it to userspace ? */
 		dev_info(adev->dev, "GPU reset failed\n");
 	}
+	amdgpu_irq_gpu_reset_resume_helper(adev);
 
 	return r;
 }
 
-#define AMDGPU_DEFAULT_PCIE_GEN_MASK 0x30007  /* gen: chipset 1/2, asic 1/2/3 */
-#define AMDGPU_DEFAULT_PCIE_MLW_MASK 0x2f0000 /* 1/2/4/8/16 lanes */
-
 void amdgpu_get_pcie_info(struct amdgpu_device *adev)
 {
 	u32 mask;
@@ -2073,20 +2178,43 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 	struct amdgpu_device *adev = f->f_inode->i_private;
 	ssize_t result = 0;
 	int r;
+	bool use_bank;
+	unsigned instance_bank, sh_bank, se_bank;
 
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	if (*pos & (1ULL << 62)) {
+		se_bank = (*pos >> 24) & 0x3FF;
+		sh_bank = (*pos >> 34) & 0x3FF;
+		instance_bank = (*pos >> 44) & 0x3FF;
+		use_bank = 1;
+		*pos &= 0xFFFFFF;
+	} else {
+		use_bank = 0;
+	}
+
+	if (use_bank) {
+		if (sh_bank >= adev->gfx.config.max_sh_per_se ||
+		    se_bank >= adev->gfx.config.max_shader_engines)
+			return -EINVAL;
+		mutex_lock(&adev->grbm_idx_mutex);
+		amdgpu_gfx_select_se_sh(adev, se_bank,
+					sh_bank, instance_bank);
+	}
+
 	while (size) {
 		uint32_t value;
 
 		if (*pos > adev->rmmio_size)
-			return result;
+			goto end;
 
 		value = RREG32(*pos >> 2);
 		r = put_user(value, (uint32_t *)buf);
-		if (r)
-			return r;
+		if (r) {
+			result = r;
+			goto end;
+		}
 
 		result += 4;
 		buf += 4;
@@ -2094,6 +2222,12 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 		size -= 4;
 	}
 
+end:
+	if (use_bank) {
+		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+		mutex_unlock(&adev->grbm_idx_mutex);
+	}
+
 	return result;
 }
 
@@ -2293,6 +2427,68 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 	return result;
 }
 
+static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = f->f_inode->i_private;
+	ssize_t result = 0;
+	int r;
+	uint32_t *config, no_regs = 0;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	config = kmalloc(256 * sizeof(*config), GFP_KERNEL);
+	if (!config)
+		return -ENOMEM;
+
+	/* version, increment each time something is added */
+	config[no_regs++] = 0;
+	config[no_regs++] = adev->gfx.config.max_shader_engines;
+	config[no_regs++] = adev->gfx.config.max_tile_pipes;
+	config[no_regs++] = adev->gfx.config.max_cu_per_sh;
+	config[no_regs++] = adev->gfx.config.max_sh_per_se;
+	config[no_regs++] = adev->gfx.config.max_backends_per_se;
+	config[no_regs++] = adev->gfx.config.max_texture_channel_caches;
+	config[no_regs++] = adev->gfx.config.max_gprs;
+	config[no_regs++] = adev->gfx.config.max_gs_threads;
+	config[no_regs++] = adev->gfx.config.max_hw_contexts;
+	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend;
+	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend;
+	config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size;
+	config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size;
+	config[no_regs++] = adev->gfx.config.num_tile_pipes;
+	config[no_regs++] = adev->gfx.config.backend_enable_mask;
+	config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes;
+	config[no_regs++] = adev->gfx.config.mem_row_size_in_kb;
+	config[no_regs++] = adev->gfx.config.shader_engine_tile_size;
+	config[no_regs++] = adev->gfx.config.num_gpus;
+	config[no_regs++] = adev->gfx.config.multi_gpu_tile_size;
+	config[no_regs++] = adev->gfx.config.mc_arb_ramcfg;
+	config[no_regs++] = adev->gfx.config.gb_addr_config;
+	config[no_regs++] = adev->gfx.config.num_rbs;
+
+	while (size && (*pos < no_regs * 4)) {
+		uint32_t value;
+
+		value = config[*pos >> 2];
+		r = put_user(value, (uint32_t *)buf);
+		if (r) {
+			kfree(config);
+			return r;
+		}
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	kfree(config);
+	return result;
+}
+
+
 static const struct file_operations amdgpu_debugfs_regs_fops = {
 	.owner = THIS_MODULE,
 	.read = amdgpu_debugfs_regs_read,
@@ -2318,11 +2514,18 @@ static const struct file_operations amdgpu_debugfs_regs_smc_fops = {
 	.llseek = default_llseek
 };
 
+static const struct file_operations amdgpu_debugfs_gca_config_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gca_config_read,
+	.llseek = default_llseek
+};
+
 static const struct file_operations *debugfs_regs[] = {
 	&amdgpu_debugfs_regs_fops,
 	&amdgpu_debugfs_regs_didt_fops,
 	&amdgpu_debugfs_regs_pcie_fops,
 	&amdgpu_debugfs_regs_smc_fops,
+	&amdgpu_debugfs_gca_config_fops,
 };
 
 static const char *debugfs_regs_names[] = {
@@ -2330,6 +2533,7 @@ static const char *debugfs_regs_names[] = {
 	"amdgpu_regs_didt",
 	"amdgpu_regs_pcie",
 	"amdgpu_regs_smc",
+	"amdgpu_gca_config",
 };
 
 static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index a6eecf6f9065..76f96028313d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -122,7 +122,7 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 		usleep_range(min_udelay, 2 * min_udelay);
 		spin_lock_irqsave(&crtc->dev->event_lock, flags);
-	};
+	}
 
 	if (!repcnt)
 		DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, "
@@ -220,19 +220,17 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
 
 	r = amdgpu_bo_pin_restricted(new_rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, 0, &base);
 	if (unlikely(r != 0)) {
-		amdgpu_bo_unreserve(new_rbo);
 		r = -EINVAL;
 		DRM_ERROR("failed to pin new rbo buffer before flip\n");
-		goto cleanup;
+		goto unreserve;
 	}
 
 	r = reservation_object_get_fences_rcu(new_rbo->tbo.resv, &work->excl,
 					      &work->shared_count,
 					      &work->shared);
 	if (unlikely(r != 0)) {
-		amdgpu_bo_unreserve(new_rbo);
 		DRM_ERROR("failed to get fences for buffer\n");
-		goto cleanup;
+		goto unpin;
 	}
 
 	amdgpu_bo_get_tiling_flags(new_rbo, &tiling_flags);
@@ -275,9 +273,11 @@ pflip_cleanup:
 		DRM_ERROR("failed to reserve new rbo in error path\n");
 		goto cleanup;
 	}
+unpin:
 	if (unlikely(amdgpu_bo_unpin(new_rbo) != 0)) {
 		DRM_ERROR("failed to unpin new rbo in error path\n");
 	}
+unreserve:
 	amdgpu_bo_unreserve(new_rbo);
 
 cleanup:
@@ -516,9 +516,7 @@ static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);
 
-	if (amdgpu_fb->obj) {
-		drm_gem_object_unreference_unlocked(amdgpu_fb->obj);
-	}
+	drm_gem_object_unreference_unlocked(amdgpu_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(amdgpu_fb);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f888c015f76c..9aa533cf4ad1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -52,9 +52,10 @@
  * - 3.1.0 - allow reading more status registers (GRBM, SRBM, SDMA, CP)
  * - 3.2.0 - GFX8: Uses EOP_TC_WB_ACTION_EN, so UMDs don't have to do the same
  *           at the end of IBs.
+ * - 3.3.0 - Add VM support for UVD on supported hardware.
  */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	2
+#define KMS_DRIVER_MINOR	3
 #define KMS_DRIVER_PATCHLEVEL	0
 
 int amdgpu_vram_limit = 0;
@@ -82,8 +83,12 @@ int amdgpu_exp_hw_support = 0;
 int amdgpu_sched_jobs = 32;
 int amdgpu_sched_hw_submission = 2;
 int amdgpu_powerplay = -1;
+int amdgpu_powercontainment = 1;
 unsigned amdgpu_pcie_gen_cap = 0;
 unsigned amdgpu_pcie_lane_cap = 0;
+unsigned amdgpu_cg_mask = 0xffffffff;
+unsigned amdgpu_pg_mask = 0xffffffff;
+char *amdgpu_disable_cu = NULL;
 
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -160,6 +165,9 @@ module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
 #ifdef CONFIG_DRM_AMD_POWERPLAY
 MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))");
 module_param_named(powerplay, amdgpu_powerplay, int, 0444);
+
+MODULE_PARM_DESC(powercontainment, "Power Containment (1 = enable (default), 0 = disable)");
+module_param_named(powercontainment, amdgpu_powercontainment, int, 0444);
 #endif
 
 MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))");
@@ -168,6 +176,15 @@ module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
 MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
 module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
 
+MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)");
+module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444);
+
+MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)");
+module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444);
+
+MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");
+module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	/* Kaveri */
@@ -413,7 +430,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
 	pci_ignore_hotplug(pdev);
-	pci_set_power_state(pdev, PCI_D3cold);
+	if (amdgpu_is_atpx_hybrid())
+		pci_set_power_state(pdev, PCI_D3cold);
+	else if (!amdgpu_has_atpx_dgpu_power_cntl())
+		pci_set_power_state(pdev, PCI_D3hot);
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
 
 	return 0;
@@ -430,7 +450,9 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
 
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 
-	pci_set_power_state(pdev, PCI_D0);
+	if (amdgpu_is_atpx_hybrid() ||
+	    !amdgpu_has_atpx_dgpu_power_cntl())
+		pci_set_power_state(pdev, PCI_D0);
 	pci_restore_state(pdev);
 	ret = pci_enable_device(pdev);
 	if (ret)
@@ -515,7 +537,7 @@ static struct drm_driver kms_driver = {
 	.driver_features =
 	    DRIVER_USE_AGP |
 	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
-	    DRIVER_PRIME | DRIVER_RENDER,
+	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
 	.dev_priv_size = 0,
 	.load = amdgpu_driver_load_kms,
 	.open = amdgpu_driver_open_kms,
@@ -590,7 +612,6 @@ static int __init amdgpu_init(void)
 	DRM_INFO("amdgpu kernel modesetting enabled.\n");
 	driver = &kms_driver;
 	pdriver = &amdgpu_kms_pci_driver;
-	driver->driver_features |= DRIVER_MODESET;
 	driver->num_ioctls = amdgpu_max_kms_ioctl;
 	amdgpu_register_atpx_handler();
 	/* let modprobe override vga console setting */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index d1558768cfb7..0b109aebfec6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -204,16 +204,25 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
 	if (seq != ring->fence_drv.sync_seq)
 		amdgpu_fence_schedule_fallback(ring);
 
-	while (last_seq != seq) {
+	if (unlikely(seq == last_seq))
+		return;
+
+	last_seq &= drv->num_fences_mask;
+	seq &= drv->num_fences_mask;
+
+	do {
 		struct fence *fence, **ptr;
 
-		ptr = &drv->fences[++last_seq & drv->num_fences_mask];
+		++last_seq;
+		last_seq &= drv->num_fences_mask;
+		ptr = &drv->fences[last_seq];
 
 		/* There is always exactly one thread signaling this fence slot */
 		fence = rcu_dereference_protected(*ptr, 1);
 		RCU_INIT_POINTER(*ptr, NULL);
 
-		BUG_ON(!fence);
+		if (!fence)
+			continue;
 
 		r = fence_signal(fence);
 		if (!r)
@@ -222,7 +231,7 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
 			BUG();
 
 		fence_put(fence);
-	}
+	} while (last_seq != seq);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 8fab6486064f..88fbed2389c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -503,7 +503,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 	if (r)
 		goto error_print;
 
-	amdgpu_vm_get_pt_bos(bo_va->vm, &duplicates);
+	amdgpu_vm_get_pt_bos(adev, bo_va->vm, &duplicates);
 	list_for_each_entry(entry, &list, head) {
 		domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type);
 		/* if anything is swapped out don't swap it in here,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 9f95da4f0536..a074edd95c70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -70,3 +70,47 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg)
 		}
 	}
 }
+
+/**
+ * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
+ *
+ * @mask: array in which the per-shader array disable masks will be stored
+ * @max_se: number of SEs
+ * @max_sh: number of SHs
+ *
+ * The bitmask of CUs to be disabled in the shader array determined by se and
+ * sh is stored in mask[se * max_sh + sh].
+ */
+void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
+{
+	unsigned se, sh, cu;
+	const char *p;
+
+	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
+
+	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
+		return;
+
+	p = amdgpu_disable_cu;
+	for (;;) {
+		char *next;
+		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
+		if (ret < 3) {
+			DRM_ERROR("amdgpu: could not parse disable_cu\n");
+			return;
+		}
+
+		if (se < max_se && sh < max_sh && cu < 16) {
+			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
+			mask[se * max_sh + sh] |= 1u << cu;
+		} else {
+			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
+				  se, sh, cu);
+		}
+
+		next = strchr(p, ',');
+		if (!next)
+			break;
+		p = next + 1;
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index dc06cbda7be6..51321e154c09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -27,4 +27,6 @@
 int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
 void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
 
+unsigned amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 34e35423b78e..a31d7ef3032c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -33,6 +33,8 @@
 #include "amdgpu.h"
 #include "atom.h"
 
+#define AMDGPU_IB_TEST_TIMEOUT	msecs_to_jiffies(1000)
+
 /*
  * IB
  * IBs (Indirect Buffers) and areas of GPU accessible memory where
@@ -122,7 +124,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	bool skip_preamble, need_ctx_switch;
 	unsigned patch_offset = ~0;
 	struct amdgpu_vm *vm;
-	struct fence *hwf;
 	uint64_t ctx;
 
 	unsigned i;
@@ -160,10 +161,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		patch_offset = amdgpu_ring_init_cond_exec(ring);
 
 	if (vm) {
-		r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr,
-				    job->gds_base, job->gds_size,
-				    job->gws_base, job->gws_size,
-				    job->oa_base, job->oa_size);
+		r = amdgpu_vm_flush(ring, job);
 		if (r) {
 			amdgpu_ring_undo(ring);
 			return r;
@@ -193,7 +191,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (ring->funcs->emit_hdp_invalidate)
 		amdgpu_ring_emit_hdp_invalidate(ring);
 
-	r = amdgpu_fence_emit(ring, &hwf);
+	r = amdgpu_fence_emit(ring, f);
 	if (r) {
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		if (job && job->vm_id)
@@ -203,17 +201,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	}
 
 	/* wrap the last IB with fence */
-	if (job && job->uf_bo) {
-		uint64_t addr = amdgpu_bo_gpu_offset(job->uf_bo);
-
-		addr += job->uf_offset;
-		amdgpu_ring_emit_fence(ring, addr, job->uf_sequence,
+	if (job && job->uf_addr) {
+		amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
 				       AMDGPU_FENCE_FLAG_64BIT);
 	}
 
-	if (f)
-		*f = fence_get(hwf);
-
 	if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
 		amdgpu_ring_patch_cond_exec(ring, patch_offset);
 
@@ -296,7 +288,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 		if (!ring || !ring->ready)
 			continue;
 
-		r = amdgpu_ring_test_ib(ring);
+		r = amdgpu_ring_test_ib(ring, AMDGPU_IB_TEST_TIMEOUT);
 		if (r) {
 			ring->ready = false;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 835a3fa8d8df..278708f5a744 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -383,6 +383,18 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
 	return r;
 }
 
+void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
+{
+	int i, j;
+	for (i = 0; i < AMDGPU_MAX_IRQ_SRC_ID; i++) {
+		struct amdgpu_irq_src *src = adev->irq.sources[i];
+		if (!src)
+			continue;
+		for (j = 0; j < src->num_types; j++)
+			amdgpu_irq_update(adev, src, j);
+	}
+}
+
 /**
  * amdgpu_irq_get - enable interrupt
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index e124b59f39c1..7ef09352e534 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -94,6 +94,7 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 		   unsigned type);
 bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 			unsigned type);
+void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev);
 
 int amdgpu_irq_add_domain(struct amdgpu_device *adev);
 void amdgpu_irq_remove_domain(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index f0dafa514fe4..6674d40eb3ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,21 +28,15 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
-static void amdgpu_job_free_handler(struct work_struct *ws)
+static void amdgpu_job_timedout(struct amd_sched_job *s_job)
 {
-	struct amdgpu_job *job = container_of(ws, struct amdgpu_job, base.work_free_job);
-	amd_sched_job_put(&job->base);
-}
+	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
 
-void amdgpu_job_timeout_func(struct work_struct *work)
-{
-	struct amdgpu_job *job = container_of(work, struct amdgpu_job, base.work_tdr.work);
 	DRM_ERROR("ring %s timeout, last signaled seq=%u, last emitted seq=%u\n",
-				job->base.sched->name,
-				(uint32_t)atomic_read(&job->ring->fence_drv.last_seq),
-				job->ring->fence_drv.sync_seq);
-
-	amd_sched_job_put(&job->base);
+		  job->base.sched->name,
+		  atomic_read(&job->ring->fence_drv.last_seq),
+		  job->ring->fence_drv.sync_seq);
+	amdgpu_gpu_reset(job->adev);
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
@@ -63,7 +57,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 	(*job)->vm = vm;
 	(*job)->ibs = (void *)&(*job)[1];
 	(*job)->num_ibs = num_ibs;
-	INIT_WORK(&(*job)->base.work_free_job, amdgpu_job_free_handler);
 
 	amdgpu_sync_create(&(*job)->sync);
 
@@ -86,27 +79,33 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 	return r;
 }
 
-void amdgpu_job_free(struct amdgpu_job *job)
+void amdgpu_job_free_resources(struct amdgpu_job *job)
 {
-	unsigned i;
 	struct fence *f;
+	unsigned i;
+
 	/* use sched fence if available */
-	f = (job->base.s_fence)? &job->base.s_fence->base : job->fence;
+	f = job->base.s_fence ? &job->base.s_fence->finished : job->fence;
 
 	for (i = 0; i < job->num_ibs; ++i)
-		amdgpu_sa_bo_free(job->adev, &job->ibs[i].sa_bo, f);
-	fence_put(job->fence);
+		amdgpu_ib_free(job->adev, &job->ibs[i], f);
+}
 
-	amdgpu_bo_unref(&job->uf_bo);
-	amdgpu_sync_free(&job->sync);
+void amdgpu_job_free_cb(struct amd_sched_job *s_job)
+{
+	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
 
-	if (!job->base.use_sched)
-		kfree(job);
+	fence_put(job->fence);
+	amdgpu_sync_free(&job->sync);
+	kfree(job);
 }
 
-void amdgpu_job_free_func(struct kref *refcount)
+void amdgpu_job_free(struct amdgpu_job *job)
 {
-	struct amdgpu_job *job = container_of(refcount, struct amdgpu_job, base.refcount);
+	amdgpu_job_free_resources(job);
+
+	fence_put(job->fence);
+	amdgpu_sync_free(&job->sync);
 	kfree(job);
 }
 
@@ -114,22 +113,20 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
 		      struct fence **f)
 {
-	struct fence *fence;
 	int r;
 	job->ring = ring;
 
 	if (!f)
 		return -EINVAL;
 
-	r = amd_sched_job_init(&job->base, &ring->sched,
-			       entity, amdgpu_job_timeout_func,
-			       amdgpu_job_free_func, owner, &fence);
+	r = amd_sched_job_init(&job->base, &ring->sched, entity, owner);
 	if (r)
 		return r;
 
 	job->owner = owner;
 	job->ctx = entity->fence_context;
-	*f = fence_get(fence);
+	*f = fence_get(&job->base.s_fence->finished);
+	amdgpu_job_free_resources(job);
 	amd_sched_entity_push_job(&job->base);
 
 	return 0;
@@ -147,8 +144,8 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 		int r;
 
 		r = amdgpu_vm_grab_id(vm, ring, &job->sync,
-				      &job->base.s_fence->base,
-				      &job->vm_id, &job->vm_pd_addr);
+				      &job->base.s_fence->finished,
+				      job);
 		if (r)
 			DRM_ERROR("Error getting VM ID (%d)\n", r);
 
@@ -170,29 +167,24 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 	}
 	job = to_amdgpu_job(sched_job);
 
-	r = amdgpu_sync_wait(&job->sync);
-	if (r) {
-		DRM_ERROR("failed to sync wait (%d)\n", r);
-		return NULL;
-	}
+	BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
 
 	trace_amdgpu_sched_run_job(job);
 	r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs,
 			       job->sync.last_vm_update, job, &fence);
-	if (r) {
+	if (r)
 		DRM_ERROR("Error scheduling IBs (%d)\n", r);
-		goto err;
-	}
 
-err:
-	job->fence = fence;
-	amdgpu_job_free(job);
+	/* if gpu reset, hw fence will be replaced here */
+	fence_put(job->fence);
+	job->fence = fence_get(fence);
+	amdgpu_job_free_resources(job);
 	return fence;
 }
 
 const struct amd_sched_backend_ops amdgpu_sched_ops = {
 	.dependency = amdgpu_job_dependency,
 	.run_job = amdgpu_job_run,
-	.begin_job = amd_sched_job_begin,
-	.finish_job = amd_sched_job_finish,
+	.timedout_job = amdgpu_job_timedout,
+	.free_job = amdgpu_job_free_cb
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 40a23704a981..d942654a1de0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -60,7 +60,10 @@ int amdgpu_driver_unload_kms(struct drm_device *dev)
 	if (adev->rmmio == NULL)
 		goto done_free;
 
-	pm_runtime_get_sync(dev->dev);
+	if (amdgpu_device_is_px(dev)) {
+		pm_runtime_get_sync(dev->dev);
+		pm_runtime_forbid(dev->dev);
+	}
 
 	amdgpu_amdkfd_device_fini(adev);
 
@@ -135,13 +138,75 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 	}
 
 out:
-	if (r)
+	if (r) {
+		/* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */
+		if (adev->rmmio && amdgpu_device_is_px(dev))
+			pm_runtime_put_noidle(dev->dev);
 		amdgpu_driver_unload_kms(dev);
-
+	}
 
 	return r;
 }
 
+static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
+				struct drm_amdgpu_query_fw *query_fw,
+				struct amdgpu_device *adev)
+{
+	switch (query_fw->fw_type) {
+	case AMDGPU_INFO_FW_VCE:
+		fw_info->ver = adev->vce.fw_version;
+		fw_info->feature = adev->vce.fb_version;
+		break;
+	case AMDGPU_INFO_FW_UVD:
+		fw_info->ver = adev->uvd.fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_GMC:
+		fw_info->ver = adev->mc.fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_GFX_ME:
+		fw_info->ver = adev->gfx.me_fw_version;
+		fw_info->feature = adev->gfx.me_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_PFP:
+		fw_info->ver = adev->gfx.pfp_fw_version;
+		fw_info->feature = adev->gfx.pfp_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_CE:
+		fw_info->ver = adev->gfx.ce_fw_version;
+		fw_info->feature = adev->gfx.ce_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_RLC:
+		fw_info->ver = adev->gfx.rlc_fw_version;
+		fw_info->feature = adev->gfx.rlc_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_MEC:
+		if (query_fw->index == 0) {
+			fw_info->ver = adev->gfx.mec_fw_version;
+			fw_info->feature = adev->gfx.mec_feature_version;
+		} else if (query_fw->index == 1) {
+			fw_info->ver = adev->gfx.mec2_fw_version;
+			fw_info->feature = adev->gfx.mec2_feature_version;
+		} else
+			return -EINVAL;
+		break;
+	case AMDGPU_INFO_FW_SMC:
+		fw_info->ver = adev->pm.fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_SDMA:
+		if (query_fw->index >= adev->sdma.num_instances)
+			return -EINVAL;
+		fw_info->ver = adev->sdma.instance[query_fw->index].fw_version;
+		fw_info->feature = adev->sdma.instance[query_fw->index].feature_version;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
 /*
  * Userspace get information ioctl
  */
@@ -288,67 +353,20 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0;
 	}
 	case AMDGPU_INFO_TIMESTAMP:
-		ui64 = amdgpu_asic_get_gpu_clock_counter(adev);
+		ui64 = amdgpu_gfx_get_gpu_clock_counter(adev);
 		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
 	case AMDGPU_INFO_FW_VERSION: {
 		struct drm_amdgpu_info_firmware fw_info;
+		int ret;
 
 		/* We only support one instance of each IP block right now. */
 		if (info->query_fw.ip_instance != 0)
 			return -EINVAL;
 
-		switch (info->query_fw.fw_type) {
-		case AMDGPU_INFO_FW_VCE:
-			fw_info.ver = adev->vce.fw_version;
-			fw_info.feature = adev->vce.fb_version;
-			break;
-		case AMDGPU_INFO_FW_UVD:
-			fw_info.ver = adev->uvd.fw_version;
-			fw_info.feature = 0;
-			break;
-		case AMDGPU_INFO_FW_GMC:
-			fw_info.ver = adev->mc.fw_version;
-			fw_info.feature = 0;
-			break;
-		case AMDGPU_INFO_FW_GFX_ME:
-			fw_info.ver = adev->gfx.me_fw_version;
-			fw_info.feature = adev->gfx.me_feature_version;
-			break;
-		case AMDGPU_INFO_FW_GFX_PFP:
-			fw_info.ver = adev->gfx.pfp_fw_version;
-			fw_info.feature = adev->gfx.pfp_feature_version;
-			break;
-		case AMDGPU_INFO_FW_GFX_CE:
-			fw_info.ver = adev->gfx.ce_fw_version;
-			fw_info.feature = adev->gfx.ce_feature_version;
-			break;
-		case AMDGPU_INFO_FW_GFX_RLC:
-			fw_info.ver = adev->gfx.rlc_fw_version;
-			fw_info.feature = adev->gfx.rlc_feature_version;
-			break;
-		case AMDGPU_INFO_FW_GFX_MEC:
-			if (info->query_fw.index == 0) {
-				fw_info.ver = adev->gfx.mec_fw_version;
-				fw_info.feature = adev->gfx.mec_feature_version;
-			} else if (info->query_fw.index == 1) {
-				fw_info.ver = adev->gfx.mec2_fw_version;
-				fw_info.feature = adev->gfx.mec2_feature_version;
-			} else
-				return -EINVAL;
-			break;
-		case AMDGPU_INFO_FW_SMC:
-			fw_info.ver = adev->pm.fw_version;
-			fw_info.feature = 0;
-			break;
-		case AMDGPU_INFO_FW_SDMA:
-			if (info->query_fw.index >= adev->sdma.num_instances)
-				return -EINVAL;
-			fw_info.ver = adev->sdma.instance[info->query_fw.index].fw_version;
-			fw_info.feature = adev->sdma.instance[info->query_fw.index].feature_version;
-			break;
-		default:
-			return -EINVAL;
-		}
+		ret = amdgpu_firmware_info(&fw_info, &info->query_fw, adev);
+		if (ret)
+			return ret;
+
 		return copy_to_user(out, &fw_info,
 				    min((size_t)size, sizeof(fw_info))) ? -EFAULT : 0;
 	}
@@ -447,7 +465,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			dev_info.max_memory_clock = adev->pm.default_mclk * 10;
 		}
 		dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
-		dev_info.num_rb_pipes = adev->gfx.config.num_rbs;
+		dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se *
+			adev->gfx.config.max_shader_engines;
 		dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
 		dev_info._pad = 0;
 		dev_info.ids_flags = 0;
@@ -565,6 +584,9 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 
 	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);
 
+	amdgpu_uvd_free_handles(adev, file_priv);
+	amdgpu_vce_free_handles(adev, file_priv);
+
 	amdgpu_vm_fini(adev, &fpriv->vm);
 
 	idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
@@ -589,10 +611,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 void amdgpu_driver_preclose_kms(struct drm_device *dev,
 				struct drm_file *file_priv)
 {
-	struct amdgpu_device *adev = dev->dev_private;
-
-	amdgpu_uvd_free_handles(adev, file_priv);
-	amdgpu_vce_free_handles(adev, file_priv);
 }
 
 /*
@@ -755,3 +773,130 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 };
 const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+	struct drm_amdgpu_info_firmware fw_info;
+	struct drm_amdgpu_query_fw query_fw;
+	int ret, i;
+
+	/* VCE */
+	query_fw.fw_type = AMDGPU_INFO_FW_VCE;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "VCE feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* UVD */
+	query_fw.fw_type = AMDGPU_INFO_FW_UVD;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "UVD feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* GMC */
+	query_fw.fw_type = AMDGPU_INFO_FW_GMC;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "MC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* ME */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_ME;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "ME feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* PFP */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_PFP;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "PFP feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* CE */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_CE;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "CE feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* RLC */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* MEC */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
+	query_fw.index = 0;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "MEC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* MEC2 */
+	if (adev->asic_type == CHIP_KAVERI ||
+	    (adev->asic_type > CHIP_TOPAZ && adev->asic_type != CHIP_STONEY)) {
+		query_fw.index = 1;
+		ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+		if (ret)
+			return ret;
+		seq_printf(m, "MEC2 feature version: %u, firmware version: 0x%08x\n",
+			   fw_info.feature, fw_info.ver);
+	}
+
+	/* SMC */
+	query_fw.fw_type = AMDGPU_INFO_FW_SMC;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "SMC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* SDMA */
+	query_fw.fw_type = AMDGPU_INFO_FW_SDMA;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		query_fw.index = i;
+		ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+		if (ret)
+			return ret;
+		seq_printf(m, "SDMA%d feature version: %u, firmware version: 0x%08x\n",
+			   i, fw_info.feature, fw_info.ver);
+	}
+
+	return 0;
+}
+
+static const struct drm_info_list amdgpu_firmware_info_list[] = {
+	{"amdgpu_firmware_info", amdgpu_debugfs_firmware_info, 0, NULL},
+};
+#endif
+
+int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return amdgpu_debugfs_add_files(adev, amdgpu_firmware_info_list,
+					ARRAY_SIZE(amdgpu_firmware_info_list));
+#else
+	return 0;
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 7ecea83ce453..6f0873c75a25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -589,6 +589,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 			   struct ttm_mem_reg *new_mem)
 {
 	struct amdgpu_bo *rbo;
+	struct ttm_mem_reg *old_mem = &bo->mem;
 
 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
 		return;
@@ -602,6 +603,8 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 
 	/* move_notify is called before move happens */
 	amdgpu_update_memory_usage(rbo->adev, &bo->mem, new_mem);
+
+	trace_amdgpu_ttm_bo_move(rbo, new_mem->mem_type, old_mem->mem_type);
 }
 
 int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 589b36e8c5cf..ff63b88b0ffa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -270,30 +270,28 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	enum amd_pm_state_type state = 0;
-	long idx;
+	unsigned long idx;
 	int ret;
 
 	if (strlen(buf) == 1)
 		adev->pp_force_state_enabled = false;
-	else {
-		ret = kstrtol(buf, 0, &idx);
+	else if (adev->pp_enabled) {
+		struct pp_states_info data;
 
-		if (ret) {
+		ret = kstrtoul(buf, 0, &idx);
+		if (ret || idx >= ARRAY_SIZE(data.states)) {
 			count = -EINVAL;
 			goto fail;
 		}
 
-		if (adev->pp_enabled) {
-			struct pp_states_info data;
-			amdgpu_dpm_get_pp_num_states(adev, &data);
-			state = data.states[idx];
-			/* only set user selected power states */
-			if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
-				state != POWER_STATE_TYPE_DEFAULT) {
-				amdgpu_dpm_dispatch_task(adev,
-						AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL);
-				adev->pp_force_state_enabled = true;
-			}
+		amdgpu_dpm_get_pp_num_states(adev, &data);
+		state = data.states[idx];
+		/* only set user selected power states */
+		if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
+		    state != POWER_STATE_TYPE_DEFAULT) {
+			amdgpu_dpm_dispatch_task(adev,
+					AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL);
+			adev->pp_force_state_enabled = true;
 		}
 	}
 fail:
@@ -349,6 +347,8 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
 
 	if (adev->pp_enabled)
 		size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
+	else if (adev->pm.funcs->print_clock_levels)
+		size = adev->pm.funcs->print_clock_levels(adev, PP_SCLK, buf);
 
 	return size;
 }
@@ -365,7 +365,9 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
 	uint32_t i, mask = 0;
 	char sub_str[2];
 
-	for (i = 0; i < strlen(buf) - 1; i++) {
+	for (i = 0; i < strlen(buf); i++) {
+		if (*(buf + i) == '\n')
+			continue;
 		sub_str[0] = *(buf + i);
 		sub_str[1] = '\0';
 		ret = kstrtol(sub_str, 0, &level);
@@ -379,6 +381,8 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
 
 	if (adev->pp_enabled)
 		amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
+	else if (adev->pm.funcs->force_clock_level)
+		adev->pm.funcs->force_clock_level(adev, PP_SCLK, mask);
 fail:
 	return count;
 }
@@ -393,6 +397,8 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
 
 	if (adev->pp_enabled)
 		size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
+	else if (adev->pm.funcs->print_clock_levels)
+		size = adev->pm.funcs->print_clock_levels(adev, PP_MCLK, buf);
 
 	return size;
 }
@@ -409,7 +415,9 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 	uint32_t i, mask = 0;
 	char sub_str[2];
 
-	for (i = 0; i < strlen(buf) - 1; i++) {
+	for (i = 0; i < strlen(buf); i++) {
+		if (*(buf + i) == '\n')
+			continue;
 		sub_str[0] = *(buf + i);
 		sub_str[1] = '\0';
 		ret = kstrtol(sub_str, 0, &level);
@@ -423,6 +431,8 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 
 	if (adev->pp_enabled)
 		amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
+	else if (adev->pm.funcs->force_clock_level)
+		adev->pm.funcs->force_clock_level(adev, PP_MCLK, mask);
 fail:
 	return count;
 }
@@ -437,6 +447,8 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
 
 	if (adev->pp_enabled)
 		size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
+	else if (adev->pm.funcs->print_clock_levels)
+		size = adev->pm.funcs->print_clock_levels(adev, PP_PCIE, buf);
 
 	return size;
 }
@@ -453,7 +465,9 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
 	uint32_t i, mask = 0;
 	char sub_str[2];
 
-	for (i = 0; i < strlen(buf) - 1; i++) {
+	for (i = 0; i < strlen(buf); i++) {
+		if (*(buf + i) == '\n')
+			continue;
 		sub_str[0] = *(buf + i);
 		sub_str[1] = '\0';
 		ret = kstrtol(sub_str, 0, &level);
@@ -467,6 +481,100 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
 
 	if (adev->pp_enabled)
 		amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
+	else if (adev->pm.funcs->force_clock_level)
+		adev->pm.funcs->force_clock_level(adev, PP_PCIE, mask);
+fail:
+	return count;
+}
+
+static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	uint32_t value = 0;
+
+	if (adev->pp_enabled)
+		value = amdgpu_dpm_get_sclk_od(adev);
+	else if (adev->pm.funcs->get_sclk_od)
+		value = adev->pm.funcs->get_sclk_od(adev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", value);
+}
+
+static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int ret;
+	long int value;
+
+	ret = kstrtol(buf, 0, &value);
+
+	if (ret) {
+		count = -EINVAL;
+		goto fail;
+	}
+
+	if (adev->pp_enabled) {
+		amdgpu_dpm_set_sclk_od(adev, (uint32_t)value);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_READJUST_POWER_STATE, NULL, NULL);
+	} else if (adev->pm.funcs->set_sclk_od) {
+		adev->pm.funcs->set_sclk_od(adev, (uint32_t)value);
+		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
+		amdgpu_pm_compute_clocks(adev);
+	}
+
+fail:
+	return count;
+}
+
+static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	uint32_t value = 0;
+
+	if (adev->pp_enabled)
+		value = amdgpu_dpm_get_mclk_od(adev);
+	else if (adev->pm.funcs->get_mclk_od)
+		value = adev->pm.funcs->get_mclk_od(adev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", value);
+}
+
+static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int ret;
+	long int value;
+
+	ret = kstrtol(buf, 0, &value);
+
+	if (ret) {
+		count = -EINVAL;
+		goto fail;
+	}
+
+	if (adev->pp_enabled) {
+		amdgpu_dpm_set_mclk_od(adev, (uint32_t)value);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_READJUST_POWER_STATE, NULL, NULL);
+	} else if (adev->pm.funcs->set_mclk_od) {
+		adev->pm.funcs->set_mclk_od(adev, (uint32_t)value);
+		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
+		amdgpu_pm_compute_clocks(adev);
+	}
+
 fail:
 	return count;
 }
@@ -492,6 +600,12 @@ static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR,
 static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR,
 		amdgpu_get_pp_dpm_pcie,
 		amdgpu_set_pp_dpm_pcie);
+static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_sclk_od,
+		amdgpu_set_pp_sclk_od);
+static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_mclk_od,
+		amdgpu_set_pp_mclk_od);
 
 static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 				      struct device_attribute *attr,
@@ -1110,22 +1224,34 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 			DRM_ERROR("failed to create device file pp_table\n");
 			return ret;
 		}
-		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_dpm_sclk\n");
-			return ret;
-		}
-		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_dpm_mclk\n");
-			return ret;
-		}
-		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_dpm_pcie\n");
-			return ret;
-		}
 	}
+
+	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_dpm_sclk\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_dpm_mclk\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_dpm_pcie\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_sclk_od);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_sclk_od\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_mclk_od);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_mclk_od\n");
+		return ret;
+	}
+
 	ret = amdgpu_debugfs_pm_init(adev);
 	if (ret) {
 		DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -1148,10 +1274,12 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
 		device_remove_file(adev->dev, &dev_attr_pp_cur_state);
 		device_remove_file(adev->dev, &dev_attr_pp_force_state);
 		device_remove_file(adev->dev, &dev_attr_pp_table);
-		device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
-		device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
-		device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
 	}
+	device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
+	device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
+	device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
+	device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
+	device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
 }
 
 void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index 6bd961fb43dc..c5738a22b690 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -52,6 +52,7 @@ static int amdgpu_powerplay_init(struct amdgpu_device *adev)
 		pp_init->chip_family = adev->family;
 		pp_init->chip_id = adev->asic_type;
 		pp_init->device = amdgpu_cgs_create_device(adev);
+		pp_init->powercontainment_enabled = amdgpu_powercontainment;
 
 		ret = amd_powerplay_init(pp_init, amd_pp);
 		kfree(pp_init);
@@ -183,13 +184,6 @@ static int amdgpu_pp_sw_fini(void *handle)
 	if (ret)
 		return ret;
 
-#ifdef CONFIG_DRM_AMD_POWERPLAY
-	if (adev->pp_enabled) {
-		amdgpu_pm_sysfs_fini(adev);
-		amd_powerplay_fini(adev->powerplay.pp_handle);
-	}
-#endif
-
 	return ret;
 }
 
@@ -223,6 +217,22 @@ static int amdgpu_pp_hw_fini(void *handle)
 	return ret;
 }
 
+static void amdgpu_pp_late_fini(void *handle)
+{
+#ifdef CONFIG_DRM_AMD_POWERPLAY
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	if (adev->pp_enabled) {
+		amdgpu_pm_sysfs_fini(adev);
+		amd_powerplay_fini(adev->powerplay.pp_handle);
+	}
+
+	if (adev->powerplay.ip_funcs->late_fini)
+		adev->powerplay.ip_funcs->late_fini(
+			  adev->powerplay.pp_handle);
+#endif
+}
+
 static int amdgpu_pp_suspend(void *handle)
 {
 	int ret = 0;
@@ -311,6 +321,7 @@ const struct amd_ip_funcs amdgpu_pp_ip_funcs = {
 	.sw_fini = amdgpu_pp_sw_fini,
 	.hw_init = amdgpu_pp_hw_init,
 	.hw_fini = amdgpu_pp_hw_fini,
+	.late_fini = amdgpu_pp_late_fini,
 	.suspend = amdgpu_pp_suspend,
 	.resume = amdgpu_pp_resume,
 	.is_idle = amdgpu_pp_is_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 3b02272db678..85aeb0a804bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -28,6 +28,7 @@
  */
 #include <linux/seq_file.h>
 #include <linux/slab.h>
+#include <linux/debugfs.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
@@ -48,6 +49,7 @@
  */
 static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
 				    struct amdgpu_ring *ring);
+static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring);
 
 /**
  * amdgpu_ring_alloc - allocate space on the ring buffer
@@ -73,6 +75,10 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
 
 	ring->count_dw = ndw;
 	ring->wptr_old = ring->wptr;
+
+	if (ring->funcs->begin_use)
+		ring->funcs->begin_use(ring);
+
 	return 0;
 }
 
@@ -125,6 +131,9 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 
 	mb();
 	amdgpu_ring_set_wptr(ring);
+
+	if (ring->funcs->end_use)
+		ring->funcs->end_use(ring);
 }
 
 /**
@@ -137,78 +146,9 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 void amdgpu_ring_undo(struct amdgpu_ring *ring)
 {
 	ring->wptr = ring->wptr_old;
-}
-
-/**
- * amdgpu_ring_backup - Back up the content of a ring
- *
- * @ring: the ring we want to back up
- *
- * Saves all unprocessed commits from a ring, returns the number of dwords saved.
- */
-unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
-			    uint32_t **data)
-{
-	unsigned size, ptr, i;
-
-	*data = NULL;
-
-	if (ring->ring_obj == NULL)
-		return 0;
-
-	/* it doesn't make sense to save anything if all fences are signaled */
-	if (!amdgpu_fence_count_emitted(ring))
-		return 0;
-
-	ptr = le32_to_cpu(*ring->next_rptr_cpu_addr);
-
-	size = ring->wptr + (ring->ring_size / 4);
-	size -= ptr;
-	size &= ring->ptr_mask;
-	if (size == 0)
-		return 0;
-
-	/* and then save the content of the ring */
-	*data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
-	if (!*data)
-		return 0;
-	for (i = 0; i < size; ++i) {
-		(*data)[i] = ring->ring[ptr++];
-		ptr &= ring->ptr_mask;
-	}
-
-	return size;
-}
-
-/**
- * amdgpu_ring_restore - append saved commands to the ring again
- *
- * @ring: ring to append commands to
- * @size: number of dwords we want to write
- * @data: saved commands
- *
- * Allocates space on the ring and restore the previously saved commands.
- */
-int amdgpu_ring_restore(struct amdgpu_ring *ring,
-			unsigned size, uint32_t *data)
-{
-	int i, r;
-
-	if (!size || !data)
-		return 0;
-
-	/* restore the saved ring content */
-	r = amdgpu_ring_alloc(ring, size);
-	if (r)
-		return r;
-
-	for (i = 0; i < size; ++i) {
-		amdgpu_ring_write(ring, data[i]);
-	}
 
-	amdgpu_ring_commit(ring);
-	kfree(data);
-	return 0;
+	if (ring->funcs->end_use)
+		ring->funcs->end_use(ring);
 }
 
 /**
@@ -260,14 +200,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		return r;
 	}
 
-	r = amdgpu_wb_get(adev, &ring->next_rptr_offs);
-	if (r) {
-		dev_err(adev->dev, "(%d) ring next_rptr wb alloc failed\n", r);
-		return r;
-	}
-	ring->next_rptr_gpu_addr = adev->wb.gpu_addr + ring->next_rptr_offs * 4;
-	ring->next_rptr_cpu_addr = &adev->wb.wb[ring->next_rptr_offs];
-
 	r = amdgpu_wb_get(adev, &ring->cond_exe_offs);
 	if (r) {
 		dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
@@ -276,7 +208,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 	ring->cond_exe_gpu_addr = adev->wb.gpu_addr + (ring->cond_exe_offs * 4);
 	ring->cond_exe_cpu_addr = &adev->wb.wb[ring->cond_exe_offs];
 
-	spin_lock_init(&ring->fence_lock);
 	r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
 	if (r) {
 		dev_err(adev->dev, "failed initializing fences (%d).\n", r);
@@ -310,6 +241,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		}
 		r = amdgpu_bo_kmap(ring->ring_obj,
 				       (void **)&ring->ring);
+
+		memset((void *)ring->ring, 0, ring->ring_size);
+
 		amdgpu_bo_unreserve(ring->ring_obj);
 		if (r) {
 			dev_err(adev->dev, "(%d) ring map failed\n", r);
@@ -343,10 +277,10 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 	ring->ring = NULL;
 	ring->ring_obj = NULL;
 
+	amdgpu_wb_free(ring->adev, ring->cond_exe_offs);
 	amdgpu_wb_free(ring->adev, ring->fence_offs);
 	amdgpu_wb_free(ring->adev, ring->rptr_offs);
 	amdgpu_wb_free(ring->adev, ring->wptr_offs);
-	amdgpu_wb_free(ring->adev, ring->next_rptr_offs);
 
 	if (ring_obj) {
 		r = amdgpu_bo_reserve(ring_obj, false);
@@ -357,6 +291,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 		}
 		amdgpu_bo_unref(&ring_obj);
 	}
+	amdgpu_debugfs_ring_fini(ring);
 }
 
 /*
@@ -364,57 +299,62 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
  */
 #if defined(CONFIG_DEBUG_FS)
 
-static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
+/* Layout of file is 12 bytes consisting of
+ * - rptr
+ * - wptr
+ * - driver's copy of wptr
+ *
+ * followed by n-words of ring data
+ */
+static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
-	struct drm_device *dev = node->minor->dev;
-	struct amdgpu_device *adev = dev->dev_private;
-	int roffset = (unsigned long)node->info_ent->data;
-	struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset);
-	uint32_t rptr, wptr, rptr_next;
-	unsigned i;
-
-	wptr = amdgpu_ring_get_wptr(ring);
-	seq_printf(m, "wptr: 0x%08x [%5d]\n", wptr, wptr);
-
-	rptr = amdgpu_ring_get_rptr(ring);
-	rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
-
-	seq_printf(m, "rptr: 0x%08x [%5d]\n", rptr, rptr);
-
-	seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
-		   ring->wptr, ring->wptr);
-
-	if (!ring->ready)
-		return 0;
-
-	/* print 8 dw before current rptr as often it's the last executed
-	 * packet that is the root issue
-	 */
-	i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask;
-	while (i != rptr) {
-		seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
-		if (i == rptr)
-			seq_puts(m, " *");
-		if (i == rptr_next)
-			seq_puts(m, " #");
-		seq_puts(m, "\n");
-		i = (i + 1) & ring->ptr_mask;
+	struct amdgpu_ring *ring = (struct amdgpu_ring*)f->f_inode->i_private;
+	int r, i;
+	uint32_t value, result, early[3];
+
+	if (*pos & 3 || size & 3)
+		return -EINVAL;
+
+	result = 0;
+
+	if (*pos < 12) {
+		early[0] = amdgpu_ring_get_rptr(ring);
+		early[1] = amdgpu_ring_get_wptr(ring);
+		early[2] = ring->wptr;
+		for (i = *pos / 4; i < 3 && size; i++) {
+			r = put_user(early[i], (uint32_t *)buf);
+			if (r)
+				return r;
+			buf += 4;
+			result += 4;
+			size -= 4;
+			*pos += 4;
+		}
 	}
-	while (i != wptr) {
-		seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
-		if (i == rptr)
-			seq_puts(m, " *");
-		if (i == rptr_next)
-			seq_puts(m, " #");
-		seq_puts(m, "\n");
-		i = (i + 1) & ring->ptr_mask;
+
+	while (size) {
+		if (*pos >= (ring->ring_size + 12))
+			return result;
+			
+		value = ring->ring[(*pos - 12)/4];
+		r = put_user(value, (uint32_t*)buf);
+		if (r)
+			return r;
+		buf += 4;
+		result += 4;
+		size -= 4;
+		*pos += 4;
 	}
-	return 0;
+
+	return result;
 }
 
-static struct drm_info_list amdgpu_debugfs_ring_info_list[AMDGPU_MAX_RINGS];
-static char amdgpu_debugfs_ring_names[AMDGPU_MAX_RINGS][32];
+static const struct file_operations amdgpu_debugfs_ring_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_ring_read,
+	.llseek = default_llseek
+};
 
 #endif
 
@@ -422,28 +362,27 @@ static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
 				    struct amdgpu_ring *ring)
 {
 #if defined(CONFIG_DEBUG_FS)
-	unsigned offset = (uint8_t*)ring - (uint8_t*)adev;
-	unsigned i;
-	struct drm_info_list *info;
-	char *name;
-
-	for (i = 0; i < ARRAY_SIZE(amdgpu_debugfs_ring_info_list); ++i) {
-		info = &amdgpu_debugfs_ring_info_list[i];
-		if (!info->data)
-			break;
-	}
+	struct drm_minor *minor = adev->ddev->primary;
+	struct dentry *ent, *root = minor->debugfs_root;
+	char name[32];
 
-	if (i == ARRAY_SIZE(amdgpu_debugfs_ring_info_list))
-		return -ENOSPC;
-
-	name = &amdgpu_debugfs_ring_names[i][0];
 	sprintf(name, "amdgpu_ring_%s", ring->name);
-	info->name = name;
-	info->show = amdgpu_debugfs_ring_info;
-	info->driver_features = 0;
-	info->data = (void*)(uintptr_t)offset;
 
-	return amdgpu_debugfs_add_files(adev, info, 1);
+	ent = debugfs_create_file(name,
+				  S_IFREG | S_IRUGO, root,
+				  ring, &amdgpu_debugfs_ring_fops);
+	if (IS_ERR(ent))
+		return PTR_ERR(ent);
+
+	i_size_write(ent->d_inode, ring->ring_size + 12);
+	ring->ent = ent;
 #endif
 	return 0;
 }
+
+static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring)
+{
+#if defined(CONFIG_DEBUG_FS)
+	debugfs_remove(ring->ent);
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index b16366c2b4a0..d8af37a845f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -115,6 +115,7 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
 		return r;
 	}
 	r = amdgpu_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr);
+	memset(sa_manager->cpu_ptr, 0, sa_manager->size);
 	amdgpu_bo_unreserve(sa_manager->bo);
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 34a92808bbd4..5c8d3022fb87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -223,13 +223,16 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_sync_is_idle - test if all fences are signaled
+ * amdgpu_sync_peek_fence - get the next fence not signaled yet
  *
  * @sync: the sync object
+ * @ring: optional ring to use for test
  *
- * Returns true if all fences in the sync object are signaled.
+ * Returns the next fence not signaled yet without removing it from the sync
+ * object.
  */
-bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)
+struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+				     struct amdgpu_ring *ring)
 {
 	struct amdgpu_sync_entry *e;
 	struct hlist_node *tmp;
@@ -237,6 +240,19 @@ bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)
 
 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
 		struct fence *f = e->fence;
+		struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+
+		if (ring && s_fence) {
+			/* For fences from the same ring it is sufficient
+			 * when they are scheduled.
+			 */
+			if (s_fence->sched == &ring->sched) {
+				if (fence_is_signaled(&s_fence->scheduled))
+					continue;
+
+				return &s_fence->scheduled;
+			}
+		}
 
 		if (fence_is_signaled(f)) {
 			hash_del(&e->node);
@@ -245,58 +261,19 @@ bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)
 			continue;
 		}
 
-		return false;
+		return f;
 	}
 
-	return true;
+	return NULL;
 }
 
 /**
- * amdgpu_sync_cycle_fences - move fences from one sync object into another
+ * amdgpu_sync_get_fence - get the next fence from the sync object
  *
- * @dst: the destination sync object
- * @src: the source sync object
- * @fence: fence to add to source
+ * @sync: sync object to use
  *
- * Remove all fences from source and put them into destination and add
- * fence as new one into source.
+ * Get and removes the next fence from the sync object not signaled yet.
  */
-int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
-			     struct fence *fence)
-{
-	struct amdgpu_sync_entry *e, *newone;
-	struct hlist_node *tmp;
-	int i;
-
-	/* Allocate the new entry before moving the old ones */
-	newone = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
-	if (!newone)
-		return -ENOMEM;
-
-	hash_for_each_safe(src->fences, i, tmp, e, node) {
-		struct fence *f = e->fence;
-
-		hash_del(&e->node);
-		if (fence_is_signaled(f)) {
-			fence_put(f);
-			kmem_cache_free(amdgpu_sync_slab, e);
-			continue;
-		}
-
-		if (amdgpu_sync_add_later(dst, f)) {
-			kmem_cache_free(amdgpu_sync_slab, e);
-			continue;
-		}
-
-		hash_add(dst->fences, &e->node, f->context);
-	}
-
-	hash_add(src->fences, &newone->node, fence->context);
-	newone->fence = fence_get(fence);
-
-	return 0;
-}
-
 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 {
 	struct amdgpu_sync_entry *e;
@@ -319,25 +296,6 @@ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 	return NULL;
 }
 
-int amdgpu_sync_wait(struct amdgpu_sync *sync)
-{
-	struct amdgpu_sync_entry *e;
-	struct hlist_node *tmp;
-	int i, r;
-
-	hash_for_each_safe(sync->fences, i, tmp, e, node) {
-		r = fence_wait(e->fence, false);
-		if (r)
-			return r;
-
-		hash_del(&e->node);
-		fence_put(e->fence);
-		kmem_cache_free(amdgpu_sync_slab, e);
-	}
-
-	return 0;
-}
-
 /**
  * amdgpu_sync_free - free the sync object
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 26a5f4acf584..0d8d65eb46cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -11,19 +11,68 @@
 #define TRACE_SYSTEM amdgpu
 #define TRACE_INCLUDE_FILE amdgpu_trace
 
+TRACE_EVENT(amdgpu_mm_rreg,
+	    TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
+	    TP_ARGS(did, reg, value),
+	    TP_STRUCT__entry(
+				__field(unsigned, did)
+				__field(uint32_t, reg)
+				__field(uint32_t, value)
+			    ),
+	    TP_fast_assign(
+			   __entry->did = did;
+			   __entry->reg = reg;
+			   __entry->value = value;
+			   ),
+	    TP_printk("0x%04lx, 0x%04lx, 0x%08lx",
+		      (unsigned long)__entry->did,
+		      (unsigned long)__entry->reg,
+		      (unsigned long)__entry->value)
+);
+
+TRACE_EVENT(amdgpu_mm_wreg,
+	    TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
+	    TP_ARGS(did, reg, value),
+	    TP_STRUCT__entry(
+				__field(unsigned, did)
+				__field(uint32_t, reg)
+				__field(uint32_t, value)
+			    ),
+	    TP_fast_assign(
+			   __entry->did = did;
+			   __entry->reg = reg;
+			   __entry->value = value;
+			   ),
+	    TP_printk("0x%04lx, 0x%04lx, 0x%08lx",
+		      (unsigned long)__entry->did,
+		      (unsigned long)__entry->reg,
+		      (unsigned long)__entry->value)
+);
+
 TRACE_EVENT(amdgpu_bo_create,
 	    TP_PROTO(struct amdgpu_bo *bo),
 	    TP_ARGS(bo),
 	    TP_STRUCT__entry(
 			     __field(struct amdgpu_bo *, bo)
 			     __field(u32, pages)
+			     __field(u32, type)
+			     __field(u32, prefer)
+			     __field(u32, allow)
+			     __field(u32, visible)
 			     ),
 
 	    TP_fast_assign(
 			   __entry->bo = bo;
 			   __entry->pages = bo->tbo.num_pages;
+			   __entry->type = bo->tbo.mem.mem_type;
+			   __entry->prefer = bo->prefered_domains;
+			   __entry->allow = bo->allowed_domains;
+			   __entry->visible = bo->flags;
 			   ),
-	    TP_printk("bo=%p, pages=%u", __entry->bo, __entry->pages)
+
+	    TP_printk("bo=%p,pages=%u,type=%d,prefered=%d,allowed=%d,visible=%d",
+		       __entry->bo, __entry->pages, __entry->type,
+		       __entry->prefer, __entry->allow, __entry->visible)
 );
 
 TRACE_EVENT(amdgpu_cs,
@@ -64,7 +113,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
 			   __entry->adev = job->adev;
 			   __entry->sched_job = &job->base;
 			   __entry->ib = job->ibs;
-			   __entry->fence = &job->base.s_fence->base;
+			   __entry->fence = &job->base.s_fence->finished;
 			   __entry->ring_name = job->ring->name;
 			   __entry->num_ibs = job->num_ibs;
 			   ),
@@ -89,7 +138,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
 			   __entry->adev = job->adev;
 			   __entry->sched_job = &job->base;
 			   __entry->ib = job->ibs;
-			   __entry->fence = &job->base.s_fence->base;
+			   __entry->fence = &job->base.s_fence->finished;
 			   __entry->ring_name = job->ring->name;
 			   __entry->num_ibs = job->num_ibs;
 			   ),
@@ -100,24 +149,26 @@ TRACE_EVENT(amdgpu_sched_run_job,
 
 
 TRACE_EVENT(amdgpu_vm_grab_id,
-	    TP_PROTO(struct amdgpu_vm *vm, int ring, unsigned vmid,
-		     uint64_t pd_addr),
-	    TP_ARGS(vm, ring, vmid, pd_addr),
+	    TP_PROTO(struct amdgpu_vm *vm, int ring, struct amdgpu_job *job),
+	    TP_ARGS(vm, ring, job),
 	    TP_STRUCT__entry(
 			     __field(struct amdgpu_vm *, vm)
 			     __field(u32, ring)
 			     __field(u32, vmid)
 			     __field(u64, pd_addr)
+			     __field(u32, needs_flush)
 			     ),
 
 	    TP_fast_assign(
 			   __entry->vm = vm;
 			   __entry->ring = ring;
-			   __entry->vmid = vmid;
-			   __entry->pd_addr = pd_addr;
+			   __entry->vmid = job->vm_id;
+			   __entry->pd_addr = job->vm_pd_addr;
+			   __entry->needs_flush = job->vm_needs_flush;
 			   ),
-	    TP_printk("vm=%p, ring=%u, id=%u, pd_addr=%010Lx", __entry->vm,
-		      __entry->ring, __entry->vmid, __entry->pd_addr)
+	    TP_printk("vm=%p, ring=%u, id=%u, pd_addr=%010Lx needs_flush=%u",
+		      __entry->vm, __entry->ring, __entry->vmid,
+		      __entry->pd_addr, __entry->needs_flush)
 );
 
 TRACE_EVENT(amdgpu_vm_bo_map,
@@ -244,13 +295,55 @@ TRACE_EVENT(amdgpu_bo_list_set,
 	    TP_STRUCT__entry(
 			     __field(struct amdgpu_bo_list *, list)
 			     __field(struct amdgpu_bo *, bo)
+			     __field(u64, bo_size)
 			     ),
 
 	    TP_fast_assign(
 			   __entry->list = list;
 			   __entry->bo = bo;
+			   __entry->bo_size = amdgpu_bo_size(bo);
 			   ),
-	    TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
+	    TP_printk("list=%p, bo=%p, bo_size = %Ld",
+		      __entry->list,
+		      __entry->bo,
+		      __entry->bo_size)
+);
+
+TRACE_EVENT(amdgpu_cs_bo_status,
+	    TP_PROTO(uint64_t total_bo, uint64_t total_size),
+	    TP_ARGS(total_bo, total_size),
+	    TP_STRUCT__entry(
+			__field(u64, total_bo)
+			__field(u64, total_size)
+			),
+
+	    TP_fast_assign(
+			__entry->total_bo = total_bo;
+			__entry->total_size = total_size;
+			),
+	    TP_printk("total bo size = %Ld, total bo count = %Ld",
+			__entry->total_bo, __entry->total_size)
+);
+
+TRACE_EVENT(amdgpu_ttm_bo_move,
+	    TP_PROTO(struct amdgpu_bo* bo, uint32_t new_placement, uint32_t old_placement),
+	    TP_ARGS(bo, new_placement, old_placement),
+	    TP_STRUCT__entry(
+			__field(struct amdgpu_bo *, bo)
+			__field(u64, bo_size)
+			__field(u32, new_placement)
+			__field(u32, old_placement)
+			),
+
+	    TP_fast_assign(
+			__entry->bo      = bo;
+			__entry->bo_size = amdgpu_bo_size(bo);
+			__entry->new_placement = new_placement;
+			__entry->old_placement = old_placement;
+			),
+	    TP_printk("bo=%p from:%d to %d with size = %Ld",
+			__entry->bo, __entry->old_placement,
+			__entry->new_placement, __entry->bo_size)
 );
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 3b9053af4762..b7742e62972a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -286,9 +286,10 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 	r = amdgpu_copy_buffer(ring, old_start, new_start,
 			       new_mem->num_pages * PAGE_SIZE, /* bytes */
 			       bo->resv, &fence);
-	/* FIXME: handle copy error */
-	r = ttm_bo_move_accel_cleanup(bo, fence,
-				      evict, no_wait_gpu, new_mem);
+	if (r)
+		return r;
+
+	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
 	fence_put(fence);
 	return r;
 }
@@ -396,6 +397,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
 		return -EINVAL;
 
 	adev = amdgpu_get_adev(bo->bdev);
+
+	/* remember the eviction */
+	if (evict)
+		atomic64_inc(&adev->num_evictions);
+
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		amdgpu_move_null(bo, new_mem);
 		return 0;
@@ -429,7 +435,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
 
 	if (r) {
 memcpy:
-		r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
+		r = ttm_bo_move_memcpy(bo, evict, interruptible,
+				       no_wait_gpu, new_mem);
 		if (r) {
 			return r;
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 01abfc21b4a2..b11f4e8868d7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -40,9 +40,16 @@
 #include "uvd/uvd_4_2_d.h"
 
 /* 1 second timeout */
-#define UVD_IDLE_TIMEOUT_MS	1000
+#define UVD_IDLE_TIMEOUT	msecs_to_jiffies(1000)
+
+/* Firmware versions for VI */
+#define FW_1_65_10	((1 << 24) | (65 << 16) | (10 << 8))
+#define FW_1_87_11	((1 << 24) | (87 << 16) | (11 << 8))
+#define FW_1_87_12	((1 << 24) | (87 << 16) | (12 << 8))
+#define FW_1_37_15	((1 << 24) | (37 << 16) | (15 << 8))
+
 /* Polaris10/11 firmware version */
-#define FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
+#define FW_1_66_16	((1 << 24) | (66 << 16) | (16 << 8))
 
 /* Firmware Names */
 #ifdef CONFIG_DRM_AMDGPU_CIK
@@ -92,7 +99,6 @@ MODULE_FIRMWARE(FIRMWARE_STONEY);
 MODULE_FIRMWARE(FIRMWARE_POLARIS10);
 MODULE_FIRMWARE(FIRMWARE_POLARIS11);
 
-static void amdgpu_uvd_note_usage(struct amdgpu_device *adev);
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
 
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
@@ -246,6 +252,23 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	if (!amdgpu_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
 		adev->uvd.address_64_bit = true;
 
+	switch (adev->asic_type) {
+	case CHIP_TONGA:
+		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_65_10;
+		break;
+	case CHIP_CARRIZO:
+		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_11;
+		break;
+	case CHIP_FIJI:
+		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_12;
+		break;
+	case CHIP_STONEY:
+		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_37_15;
+		break;
+	default:
+		adev->uvd.use_ctx_buf = adev->asic_type >= CHIP_POLARIS10;
+	}
+
 	return 0;
 }
 
@@ -253,19 +276,20 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 {
 	int r;
 
-	if (adev->uvd.vcpu_bo == NULL)
-		return 0;
+	kfree(adev->uvd.saved_bo);
 
 	amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
 
-	r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false);
-	if (!r) {
-		amdgpu_bo_kunmap(adev->uvd.vcpu_bo);
-		amdgpu_bo_unpin(adev->uvd.vcpu_bo);
-		amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
-	}
+	if (adev->uvd.vcpu_bo) {
+		r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false);
+		if (!r) {
+			amdgpu_bo_kunmap(adev->uvd.vcpu_bo);
+			amdgpu_bo_unpin(adev->uvd.vcpu_bo);
+			amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
+		}
 
-	amdgpu_bo_unref(&adev->uvd.vcpu_bo);
+		amdgpu_bo_unref(&adev->uvd.vcpu_bo);
+	}
 
 	amdgpu_ring_fini(&adev->uvd.ring);
 
@@ -345,8 +369,6 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 		if (handle != 0 && adev->uvd.filp[i] == filp) {
 			struct fence *fence;
 
-			amdgpu_uvd_note_usage(adev);
-
 			r = amdgpu_uvd_get_destroy_msg(ring, handle,
 						       false, &fence);
 			if (r) {
@@ -437,7 +459,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
 	unsigned fs_in_mb = width_in_mb * height_in_mb;
 
 	unsigned image_size, tmp, min_dpb_size, num_dpb_buffer;
-	unsigned min_ctx_size = 0;
+	unsigned min_ctx_size = ~0;
 
 	image_size = width * height;
 	image_size += image_size / 2;
@@ -556,7 +578,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
 		/* reference picture buffer */
 		min_dpb_size = image_size * num_dpb_buffer;
 
-		if (adev->asic_type < CHIP_POLARIS10){
+		if (!adev->uvd.use_ctx_buf){
 			/* macroblock context buffer */
 			min_dpb_size +=
 				width_in_mb * height_in_mb * num_dpb_buffer * 192;
@@ -661,7 +683,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 		}
 
 		DRM_ERROR("No more free UVD handles!\n");
-		return -EINVAL;
+		return -ENOSPC;
 
 	case 1:
 		/* it's a decode msg, calc buffer sizes */
@@ -912,8 +934,6 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 		return -EINVAL;
 	}
 
-	amdgpu_uvd_note_usage(ctx.parser->adev);
-
 	return 0;
 }
 
@@ -967,7 +987,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 
 	if (direct) {
 		r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
-		job->fence = f;
+		job->fence = fence_get(f);
 		if (r)
 			goto err_free;
 
@@ -1109,16 +1129,14 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
 		}
 	} else {
-		schedule_delayed_work(&adev->uvd.idle_work,
-				      msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
+		schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
 	}
 }
 
-static void amdgpu_uvd_note_usage(struct amdgpu_device *adev)
+void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
 {
+	struct amdgpu_device *adev = ring->adev;
 	bool set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
-	set_clocks &= schedule_delayed_work(&adev->uvd.idle_work,
-					    msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
 
 	if (set_clocks) {
 		if (adev->pm.dpm_enabled) {
@@ -1128,3 +1146,48 @@ static void amdgpu_uvd_note_usage(struct amdgpu_device *adev)
 		}
 	}
 }
+
+void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
+{
+	schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
+}
+
+/**
+ * amdgpu_uvd_ring_test_ib - test ib execution
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Test if we can successfully execute an IB
+ */
+int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct fence *fence;
+	long r;
+
+	r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
+		goto error;
+	}
+
+	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
+		goto error;
+	}
+
+	r = fence_wait_timeout(fence, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out.\n");
+		r = -ETIMEDOUT;
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+	} else {
+		DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
+		r = 0;
+	}
+
+error:
+	fence_put(fence);
+	return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index 9a3b449081a7..c850009602d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -35,5 +35,8 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
 			     struct drm_file *filp);
 int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx);
+void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring);
+int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 875626a2eccb..05865ce35351 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -36,7 +36,7 @@
 #include "cikd.h"
 
 /* 1 second timeout */
-#define VCE_IDLE_TIMEOUT_MS	1000
+#define VCE_IDLE_TIMEOUT	msecs_to_jiffies(1000)
 
 /* Firmware Names */
 #ifdef CONFIG_DRM_AMDGPU_CIK
@@ -85,8 +85,6 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 	unsigned ucode_version, version_major, version_minor, binary_id;
 	int i, r;
 
-	INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
-
 	switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	case CHIP_BONAIRE:
@@ -197,6 +195,9 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 		adev->vce.filp[i] = NULL;
 	}
 
+	INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
+	mutex_init(&adev->vce.idle_mutex);
+
 	return 0;
 }
 
@@ -220,6 +221,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
 	amdgpu_ring_fini(&adev->vce.ring[1]);
 
 	release_firmware(adev->vce.fw);
+	mutex_destroy(&adev->vce.idle_mutex);
 
 	return 0;
 }
@@ -310,37 +312,44 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work)
 			amdgpu_asic_set_vce_clocks(adev, 0, 0);
 		}
 	} else {
-		schedule_delayed_work(&adev->vce.idle_work,
-				      msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS));
+		schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT);
 	}
 }
 
 /**
- * amdgpu_vce_note_usage - power up VCE
+ * amdgpu_vce_ring_begin_use - power up VCE
  *
- * @adev: amdgpu_device pointer
+ * @ring: amdgpu ring
  *
  * Make sure VCE is powerd up when we want to use it
  */
-static void amdgpu_vce_note_usage(struct amdgpu_device *adev)
+void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
 {
-	bool streams_changed = false;
-	bool set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
-	set_clocks &= schedule_delayed_work(&adev->vce.idle_work,
-					    msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS));
-
-	if (adev->pm.dpm_enabled) {
-		/* XXX figure out if the streams changed */
-		streams_changed = false;
-	}
+	struct amdgpu_device *adev = ring->adev;
+	bool set_clocks;
 
-	if (set_clocks || streams_changed) {
+	mutex_lock(&adev->vce.idle_mutex);
+	set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
+	if (set_clocks) {
 		if (adev->pm.dpm_enabled) {
 			amdgpu_dpm_enable_vce(adev, true);
 		} else {
 			amdgpu_asic_set_vce_clocks(adev, 53300, 40000);
 		}
 	}
+	mutex_unlock(&adev->vce.idle_mutex);
+}
+
+/**
+ * amdgpu_vce_ring_end_use - power VCE down
+ *
+ * @ring: amdgpu ring
+ *
+ * Schedule work to power VCE down again
+ */
+void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
+{
+	schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
 }
 
 /**
@@ -357,11 +366,10 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 	int i, r;
 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
 		uint32_t handle = atomic_read(&adev->vce.handles[i]);
+
 		if (!handle || adev->vce.filp[i] != filp)
 			continue;
 
-		amdgpu_vce_note_usage(adev);
-
 		r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL);
 		if (r)
 			DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
@@ -437,7 +445,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 		ib->ptr[i] = 0x0;
 
 	r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
-	job->fence = f;
+	job->fence = fence_get(f);
 	if (r)
 		goto err;
 
@@ -469,7 +477,6 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
 	struct fence *f = NULL;
-	uint64_t dummy;
 	int i, r;
 
 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@@ -477,7 +484,6 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 		return r;
 
 	ib = &job->ibs[0];
-	dummy = ib->gpu_addr + 1024;
 
 	/* stitch together an VCE destroy msg */
 	ib->length_dw = 0;
@@ -485,11 +491,14 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
 	ib->ptr[ib->length_dw++] = handle;
 
-	ib->ptr[ib->length_dw++] = 0x00000014; /* len */
-	ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
-	ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
-	ib->ptr[ib->length_dw++] = dummy;
-	ib->ptr[ib->length_dw++] = 0x00000001;
+	ib->ptr[ib->length_dw++] = 0x00000020; /* len */
+	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
+	ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */
+	ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */
+	ib->ptr[ib->length_dw++] = 0x00000000;
+	ib->ptr[ib->length_dw++] = 0x00000000;
+	ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */
+	ib->ptr[ib->length_dw++] = 0x00000000;
 
 	ib->ptr[ib->length_dw++] = 0x00000008; /* len */
 	ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */
@@ -499,7 +508,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 
 	if (direct) {
 		r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
-		job->fence = f;
+		job->fence = fence_get(f);
 		if (r)
 			goto err;
 
@@ -580,12 +589,10 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
  * we we don't have another free session index.
  */
 static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
-				      uint32_t handle, bool *allocated)
+				      uint32_t handle, uint32_t *allocated)
 {
 	unsigned i;
 
-	*allocated = false;
-
 	/* validate the handle */
 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
 		if (atomic_read(&p->adev->vce.handles[i]) == handle) {
@@ -602,7 +609,7 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
 		if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
 			p->adev->vce.filp[i] = p->filp;
 			p->adev->vce.img_size[i] = 0;
-			*allocated = true;
+			*allocated |= 1 << i;
 			return i;
 		}
 	}
@@ -622,15 +629,13 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 	struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
 	unsigned fb_idx = 0, bs_idx = 0;
 	int session_idx = -1;
-	bool destroyed = false;
-	bool created = false;
-	bool allocated = false;
+	uint32_t destroyed = 0;
+	uint32_t created = 0;
+	uint32_t allocated = 0;
 	uint32_t tmp, handle = 0;
 	uint32_t *size = &tmp;
 	int i, r = 0, idx = 0;
 
-	amdgpu_vce_note_usage(p->adev);
-
 	while (idx < ib->length_dw) {
 		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
 		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
@@ -641,30 +646,30 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 			goto out;
 		}
 
-		if (destroyed) {
-			DRM_ERROR("No other command allowed after destroy!\n");
-			r = -EINVAL;
-			goto out;
-		}
-
 		switch (cmd) {
-		case 0x00000001: // session
+		case 0x00000001: /* session */
 			handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
 			session_idx = amdgpu_vce_validate_handle(p, handle,
 								 &allocated);
-			if (session_idx < 0)
-				return session_idx;
+			if (session_idx < 0) {
+				r = session_idx;
+				goto out;
+			}
 			size = &p->adev->vce.img_size[session_idx];
 			break;
 
-		case 0x00000002: // task info
+		case 0x00000002: /* task info */
 			fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
 			bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
 			break;
 
-		case 0x01000001: // create
-			created = true;
-			if (!allocated) {
+		case 0x01000001: /* create */
+			created |= 1 << session_idx;
+			if (destroyed & (1 << session_idx)) {
+				destroyed &= ~(1 << session_idx);
+				allocated |= 1 << session_idx;
+
+			} else if (!(allocated & (1 << session_idx))) {
 				DRM_ERROR("Handle already in use!\n");
 				r = -EINVAL;
 				goto out;
@@ -675,16 +680,16 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 				8 * 3 / 2;
 			break;
 
-		case 0x04000001: // config extension
-		case 0x04000002: // pic control
-		case 0x04000005: // rate control
-		case 0x04000007: // motion estimation
-		case 0x04000008: // rdo
-		case 0x04000009: // vui
-		case 0x05000002: // auxiliary buffer
+		case 0x04000001: /* config extension */
+		case 0x04000002: /* pic control */
+		case 0x04000005: /* rate control */
+		case 0x04000007: /* motion estimation */
+		case 0x04000008: /* rdo */
+		case 0x04000009: /* vui */
+		case 0x05000002: /* auxiliary buffer */
 			break;
 
-		case 0x03000001: // encode
+		case 0x03000001: /* encode */
 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
 						*size, 0);
 			if (r)
@@ -696,18 +701,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 				goto out;
 			break;
 
-		case 0x02000001: // destroy
-			destroyed = true;
+		case 0x02000001: /* destroy */
+			destroyed |= 1 << session_idx;
 			break;
 
-		case 0x05000001: // context buffer
+		case 0x05000001: /* context buffer */
 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
 						*size * 2, 0);
 			if (r)
 				goto out;
 			break;
 
-		case 0x05000004: // video bitstream buffer
+		case 0x05000004: /* video bitstream buffer */
 			tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
 						tmp, bs_idx);
@@ -715,7 +720,7 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 				goto out;
 			break;
 
-		case 0x05000005: // feedback buffer
+		case 0x05000005: /* feedback buffer */
 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
 						4096, fb_idx);
 			if (r)
@@ -737,21 +742,24 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 		idx += len / 4;
 	}
 
-	if (allocated && !created) {
+	if (allocated & ~created) {
 		DRM_ERROR("New session without create command!\n");
 		r = -ENOENT;
 	}
 
 out:
-	if ((!r && destroyed) || (r && allocated)) {
-		/*
-		 * IB contains a destroy msg or we have allocated an
-		 * handle and got an error, anyway free the handle
-		 */
-		for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
-			atomic_cmpxchg(&p->adev->vce.handles[i], handle, 0);
+	if (!r) {
+		/* No error, free all destroyed handle slots */
+		tmp = destroyed;
+	} else {
+		/* Error during parsing, free all allocated handle slots */
+		tmp = allocated;
 	}
 
+	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
+		if (tmp & (1 << i))
+			atomic_set(&p->adev->vce.handles[i], 0);
+
 	return r;
 }
 
@@ -837,10 +845,10 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
  * @ring: the engine to test on
  *
  */
-int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring)
+int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct fence *fence = NULL;
-	int r;
+	long r;
 
 	/* skip vce ring1 ib test for now, since it's not reliable */
 	if (ring == &ring->adev->vce.ring[1])
@@ -848,21 +856,25 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring)
 
 	r = amdgpu_vce_get_create_msg(ring, 1, NULL);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get create msg (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
 		goto error;
 	}
 
 	r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
 		goto error;
 	}
 
-	r = fence_wait(fence, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+	r = fence_wait_timeout(fence, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out.\n");
+		r = -ETIMEDOUT;
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 	} else {
 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
 	}
 error:
 	fence_put(fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index f40cf761c66f..63f83d0d985c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -39,6 +39,8 @@ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
 				unsigned flags);
 int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
-int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring);
+int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9f36ed30ba11..8e642fc48df4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -25,6 +25,7 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+#include <linux/fence-array.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
@@ -114,16 +115,26 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 /**
  * amdgpu_vm_get_bos - add the vm BOs to a duplicates list
  *
+ * @adev: amdgpu device pointer
  * @vm: vm providing the BOs
  * @duplicates: head of duplicates list
  *
  * Add the page directory to the BO duplicates list
  * for command submission.
  */
-void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates)
+void amdgpu_vm_get_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			  struct list_head *duplicates)
 {
+	uint64_t num_evictions;
 	unsigned i;
 
+	/* We only need to validate the page tables
+	 * if they aren't already valid.
+	 */
+	num_evictions = atomic64_read(&adev->num_evictions);
+	if (num_evictions == vm->last_eviction_counter)
+		return;
+
 	/* add the vm page table to the list */
 	for (i = 0; i <= vm->max_pde_used; ++i) {
 		struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry;
@@ -162,6 +173,13 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
 	spin_unlock(&glob->lru_lock);
 }
 
+static bool amdgpu_vm_is_gpu_reset(struct amdgpu_device *adev,
+			      struct amdgpu_vm_id *id)
+{
+	return id->current_gpu_reset_count !=
+		atomic_read(&adev->gpu_reset_counter) ? true : false;
+}
+
 /**
  * amdgpu_vm_grab_id - allocate the next free VMID
  *
@@ -174,18 +192,67 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
  */
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_sync *sync, struct fence *fence,
-		      unsigned *vm_id, uint64_t *vm_pd_addr)
+		      struct amdgpu_job *job)
 {
-	uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
 	struct amdgpu_device *adev = ring->adev;
+	uint64_t fence_context = adev->fence_context + ring->idx;
 	struct fence *updates = sync->last_vm_update;
-	struct amdgpu_vm_id *id;
-	unsigned i = ring->idx;
-	int r;
+	struct amdgpu_vm_id *id, *idle;
+	struct fence **fences;
+	unsigned i;
+	int r = 0;
+
+	fences = kmalloc_array(sizeof(void *), adev->vm_manager.num_ids,
+			       GFP_KERNEL);
+	if (!fences)
+		return -ENOMEM;
 
 	mutex_lock(&adev->vm_manager.lock);
 
+	/* Check if we have an idle VMID */
+	i = 0;
+	list_for_each_entry(idle, &adev->vm_manager.ids_lru, list) {
+		fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
+		if (!fences[i])
+			break;
+		++i;
+	}
+
+	/* If we can't find a idle VMID to use, wait till one becomes available */
+	if (&idle->list == &adev->vm_manager.ids_lru) {
+		u64 fence_context = adev->vm_manager.fence_context + ring->idx;
+		unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
+		struct fence_array *array;
+		unsigned j;
+
+		for (j = 0; j < i; ++j)
+			fence_get(fences[j]);
+
+		array = fence_array_create(i, fences, fence_context,
+					   seqno, true);
+		if (!array) {
+			for (j = 0; j < i; ++j)
+				fence_put(fences[j]);
+			kfree(fences);
+			r = -ENOMEM;
+			goto error;
+		}
+
+
+		r = amdgpu_sync_fence(ring->adev, sync, &array->base);
+		fence_put(&array->base);
+		if (r)
+			goto error;
+
+		mutex_unlock(&adev->vm_manager.lock);
+		return 0;
+
+	}
+	kfree(fences);
+
+	job->vm_needs_flush = true;
 	/* Check if we can use a VMID already assigned to this VM */
+	i = ring->idx;
 	do {
 		struct fence *flushed;
 
@@ -196,67 +263,52 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		/* Check all the prerequisites to using this VMID */
 		if (!id)
 			continue;
+		if (amdgpu_vm_is_gpu_reset(adev, id))
+			continue;
 
 		if (atomic64_read(&id->owner) != vm->client_id)
 			continue;
 
-		if (pd_addr != id->pd_gpu_addr)
+		if (job->vm_pd_addr != id->pd_gpu_addr)
 			continue;
 
-		if (id->last_user != ring &&
-		    (!id->last_flush || !fence_is_signaled(id->last_flush)))
+		if (!id->last_flush)
 			continue;
 
-		flushed  = id->flushed_updates;
-		if (updates && (!flushed || fence_is_later(updates, flushed)))
+		if (id->last_flush->context != fence_context &&
+		    !fence_is_signaled(id->last_flush))
 			continue;
 
-		/* Good we can use this VMID */
-		if (id->last_user == ring) {
-			r = amdgpu_sync_fence(ring->adev, sync,
-					      id->first);
-			if (r)
-				goto error;
-		}
+		flushed  = id->flushed_updates;
+		if (updates &&
+		    (!flushed || fence_is_later(updates, flushed)))
+			continue;
 
-		/* And remember this submission as user of the VMID */
+		/* Good we can use this VMID. Remember this submission as
+		 * user of the VMID.
+		 */
 		r = amdgpu_sync_fence(ring->adev, &id->active, fence);
 		if (r)
 			goto error;
 
+		id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
 		list_move_tail(&id->list, &adev->vm_manager.ids_lru);
 		vm->ids[ring->idx] = id;
 
-		*vm_id = id - adev->vm_manager.ids;
-		*vm_pd_addr = AMDGPU_VM_NO_FLUSH;
-		trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
+		job->vm_id = id - adev->vm_manager.ids;
+		job->vm_needs_flush = false;
+		trace_amdgpu_vm_grab_id(vm, ring->idx, job);
 
 		mutex_unlock(&adev->vm_manager.lock);
 		return 0;
 
 	} while (i != ring->idx);
 
-	id = list_first_entry(&adev->vm_manager.ids_lru,
-			      struct amdgpu_vm_id,
-			      list);
-
-	if (!amdgpu_sync_is_idle(&id->active)) {
-		struct list_head *head = &adev->vm_manager.ids_lru;
-		struct amdgpu_vm_id *tmp;
+	/* Still no ID to use? Then use the idle one found earlier */
+	id = idle;
 
-		list_for_each_entry_safe(id, tmp, &adev->vm_manager.ids_lru,
-					 list) {
-			if (amdgpu_sync_is_idle(&id->active)) {
-				list_move(&id->list, head);
-				head = &id->list;
-			}
-		}
-		id = list_first_entry(&adev->vm_manager.ids_lru,
-				      struct amdgpu_vm_id,
-				      list);
-	}
-
-	r = amdgpu_sync_cycle_fences(sync, &id->active, fence);
+	/* Remember this submission as user of the VMID */
+	r = amdgpu_sync_fence(ring->adev, &id->active, fence);
 	if (r)
 		goto error;
 
@@ -269,22 +321,46 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	fence_put(id->flushed_updates);
 	id->flushed_updates = fence_get(updates);
 
-	id->pd_gpu_addr = pd_addr;
-
+	id->pd_gpu_addr = job->vm_pd_addr;
+	id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
 	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
-	id->last_user = ring;
 	atomic64_set(&id->owner, vm->client_id);
 	vm->ids[ring->idx] = id;
 
-	*vm_id = id - adev->vm_manager.ids;
-	*vm_pd_addr = pd_addr;
-	trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
+	job->vm_id = id - adev->vm_manager.ids;
+	trace_amdgpu_vm_grab_id(vm, ring->idx, job);
 
 error:
 	mutex_unlock(&adev->vm_manager.lock);
 	return r;
 }
 
+static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	const struct amdgpu_ip_block_version *ip_block;
+
+	if (ring->type != AMDGPU_RING_TYPE_COMPUTE)
+		/* only compute rings */
+		return false;
+
+	ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+	if (!ip_block)
+		return false;
+
+	if (ip_block->major <= 7) {
+		/* gfx7 has no workaround */
+		return true;
+	} else if (ip_block->major == 8) {
+		if (adev->gfx.mec_fw_version >= 673)
+			/* gfx8 is fixed in MEC firmware 673 */
+			return false;
+		else
+			return true;
+	}
+	return false;
+}
+
 /**
  * amdgpu_vm_flush - hardware flush the vm
  *
@@ -294,59 +370,52 @@ error:
  *
  * Emit a VM flush when it is necessary.
  */
-int amdgpu_vm_flush(struct amdgpu_ring *ring,
-		    unsigned vm_id, uint64_t pd_addr,
-		    uint32_t gds_base, uint32_t gds_size,
-		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size)
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
+	struct amdgpu_vm_id *id = &adev->vm_manager.ids[job->vm_id];
 	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
-		id->gds_base != gds_base ||
-		id->gds_size != gds_size ||
-		id->gws_base != gws_base ||
-		id->gws_size != gws_size ||
-		id->oa_base != oa_base ||
-		id->oa_size != oa_size);
+		id->gds_base != job->gds_base ||
+		id->gds_size != job->gds_size ||
+		id->gws_base != job->gws_base ||
+		id->gws_size != job->gws_size ||
+		id->oa_base != job->oa_base ||
+		id->oa_size != job->oa_size);
 	int r;
 
 	if (ring->funcs->emit_pipeline_sync && (
-	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed ||
-		    ring->type == AMDGPU_RING_TYPE_COMPUTE))
+	    job->vm_needs_flush || gds_switch_needed ||
+	    amdgpu_vm_ring_has_compute_vm_bug(ring)))
 		amdgpu_ring_emit_pipeline_sync(ring);
 
-	if (ring->funcs->emit_vm_flush &&
-	    pd_addr != AMDGPU_VM_NO_FLUSH) {
+	if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
+	    amdgpu_vm_is_gpu_reset(adev, id))) {
 		struct fence *fence;
 
-		trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id);
-		amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr);
+		trace_amdgpu_vm_flush(job->vm_pd_addr, ring->idx, job->vm_id);
+		amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);
+
+		r = amdgpu_fence_emit(ring, &fence);
+		if (r)
+			return r;
 
 		mutex_lock(&adev->vm_manager.lock);
-		if ((id->pd_gpu_addr == pd_addr) && (id->last_user == ring)) {
-			r = amdgpu_fence_emit(ring, &fence);
-			if (r) {
-				mutex_unlock(&adev->vm_manager.lock);
-				return r;
-			}
-			fence_put(id->last_flush);
-			id->last_flush = fence;
-		}
+		fence_put(id->last_flush);
+		id->last_flush = fence;
 		mutex_unlock(&adev->vm_manager.lock);
 	}
 
 	if (gds_switch_needed) {
-		id->gds_base = gds_base;
-		id->gds_size = gds_size;
-		id->gws_base = gws_base;
-		id->gws_size = gws_size;
-		id->oa_base = oa_base;
-		id->oa_size = oa_size;
-		amdgpu_ring_emit_gds_switch(ring, vm_id,
-					    gds_base, gds_size,
-					    gws_base, gws_size,
-					    oa_base, oa_size);
+		id->gds_base = job->gds_base;
+		id->gds_size = job->gds_size;
+		id->gws_base = job->gws_base;
+		id->gws_size = job->gws_size;
+		id->oa_base = job->oa_base;
+		id->oa_size = job->oa_size;
+		amdgpu_ring_emit_gds_switch(ring, job->vm_id,
+					    job->gds_base, job->gds_size,
+					    job->gws_base, job->gws_size,
+					    job->oa_base, job->oa_size);
 	}
 
 	return 0;
@@ -723,7 +792,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
  * @vm: requested vm
  * @start: start of GPU address range
  * @end: end of GPU address range
- * @dst: destination address to map to
+ * @dst: destination address to map to, the next dst inside the function
  * @flags: mapping flags
  *
  * Update the page tables in the range @start - @end.
@@ -737,49 +806,75 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 {
 	const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
 
-	uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0;
-	uint64_t addr;
+	uint64_t cur_pe_start, cur_pe_end, cur_dst;
+	uint64_t addr; /* next GPU address to be updated */
+	uint64_t pt_idx;
+	struct amdgpu_bo *pt;
+	unsigned nptes; /* next number of ptes to be updated */
+	uint64_t next_pe_start;
+
+	/* initialize the variables */
+	addr = start;
+	pt_idx = addr >> amdgpu_vm_block_size;
+	pt = vm->page_tables[pt_idx].entry.robj;
+
+	if ((addr & ~mask) == (end & ~mask))
+		nptes = end - addr;
+	else
+		nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
+
+	cur_pe_start = amdgpu_bo_gpu_offset(pt);
+	cur_pe_start += (addr & mask) * 8;
+	cur_pe_end = cur_pe_start + 8 * nptes;
+	cur_dst = dst;
+
+	/* for next ptb*/
+	addr += nptes;
+	dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 
 	/* walk over the address space and update the page tables */
-	for (addr = start; addr < end; ) {
-		uint64_t pt_idx = addr >> amdgpu_vm_block_size;
-		struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj;
-		unsigned nptes;
-		uint64_t pe_start;
+	while (addr < end) {
+		pt_idx = addr >> amdgpu_vm_block_size;
+		pt = vm->page_tables[pt_idx].entry.robj;
 
 		if ((addr & ~mask) == (end & ~mask))
 			nptes = end - addr;
 		else
 			nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
 
-		pe_start = amdgpu_bo_gpu_offset(pt);
-		pe_start += (addr & mask) * 8;
-
-		if (last_pe_end != pe_start) {
+		next_pe_start = amdgpu_bo_gpu_offset(pt);
+		next_pe_start += (addr & mask) * 8;
 
+		if (cur_pe_end == next_pe_start) {
+			/* The next ptb is consecutive to current ptb.
+			 * Don't call amdgpu_vm_frag_ptes now.
+			 * Will update two ptbs together in future.
+			*/
+			cur_pe_end += 8 * nptes;
+		} else {
 			amdgpu_vm_frag_ptes(adev, vm_update_params,
-					    last_pe_start, last_pe_end,
-					    last_dst, flags);
+					    cur_pe_start, cur_pe_end,
+					    cur_dst, flags);
 
-			last_pe_start = pe_start;
-			last_pe_end = pe_start + 8 * nptes;
-			last_dst = dst;
-		} else {
-			last_pe_end += 8 * nptes;
+			cur_pe_start = next_pe_start;
+			cur_pe_end = next_pe_start + 8 * nptes;
+			cur_dst = dst;
 		}
 
+		/* for next ptb*/
 		addr += nptes;
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}
 
-	amdgpu_vm_frag_ptes(adev, vm_update_params, last_pe_start,
-			    last_pe_end, last_dst, flags);
+	amdgpu_vm_frag_ptes(adev, vm_update_params, cur_pe_start,
+			    cur_pe_end, cur_dst, flags);
 }
 
 /**
  * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
  *
  * @adev: amdgpu_device pointer
+ * @exclusive: fence we need to sync to
  * @src: address where to copy page table entries from
  * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
@@ -793,6 +888,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
  * Returns 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+				       struct fence *exclusive,
 				       uint64_t src,
 				       dma_addr_t *pages_addr,
 				       struct amdgpu_vm *vm,
@@ -853,6 +949,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 	vm_update_params.ib = &job->ibs[0];
 
+	r = amdgpu_sync_fence(adev, &job->sync, exclusive);
+	if (r)
+		goto error_free;
+
 	r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv,
 			     owner);
 	if (r)
@@ -889,6 +989,7 @@ error_free:
  * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
  *
  * @adev: amdgpu_device pointer
+ * @exclusive: fence we need to sync to
  * @gtt_flags: flags as they are used for GTT
  * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
@@ -902,6 +1003,7 @@ error_free:
  * Returns 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
+				      struct fence *exclusive,
 				      uint32_t gtt_flags,
 				      dma_addr_t *pages_addr,
 				      struct amdgpu_vm *vm,
@@ -932,7 +1034,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 	addr += mapping->offset;
 
 	if (!pages_addr || src)
-		return amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm,
+		return amdgpu_vm_bo_update_mapping(adev, exclusive,
+						   src, pages_addr, vm,
 						   start, mapping->it.last,
 						   flags, addr, fence);
 
@@ -940,7 +1043,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		uint64_t last;
 
 		last = min((uint64_t)mapping->it.last, start + max_size - 1);
-		r = amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, exclusive,
+						src, pages_addr, vm,
 						start, last, flags, addr,
 						fence);
 		if (r)
@@ -973,6 +1077,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	struct amdgpu_bo_va_mapping *mapping;
 	dma_addr_t *pages_addr = NULL;
 	uint32_t gtt_flags, flags;
+	struct fence *exclusive;
 	uint64_t addr;
 	int r;
 
@@ -994,8 +1099,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 		default:
 			break;
 		}
+
+		exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv);
 	} else {
 		addr = 0;
+		exclusive = NULL;
 	}
 
 	flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
@@ -1007,7 +1115,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	spin_unlock(&vm->status_lock);
 
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
-		r = amdgpu_vm_bo_split_mapping(adev, gtt_flags, pages_addr, vm,
+		r = amdgpu_vm_bo_split_mapping(adev, exclusive,
+					       gtt_flags, pages_addr, vm,
 					       mapping, flags, addr,
 					       &bo_va->last_pt_update);
 		if (r)
@@ -1054,7 +1163,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			struct amdgpu_bo_va_mapping, list);
 		list_del(&mapping->list);
 
-		r = amdgpu_vm_bo_split_mapping(adev, 0, NULL, vm, mapping,
+		r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping,
 					       0, 0, NULL);
 		kfree(mapping);
 		if (r)
@@ -1445,6 +1554,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	amdgpu_bo_unreserve(vm->page_directory);
 	if (r)
 		goto error_free_page_directory;
+	vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
 
 	return 0;
 
@@ -1516,6 +1626,10 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 			      &adev->vm_manager.ids_lru);
 	}
 
+	adev->vm_manager.fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+		adev->vm_manager.seqno[i] = 0;
+
 	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
 	atomic64_set(&adev->vm_manager.client_counter, 0);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index 48b6bd671cda..c32eca26155c 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -98,6 +98,7 @@ amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encode
 		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
 		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
 		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
 			if (dig->backlight_level == 0)
 				amdgpu_atombios_encoder_setup_dig_transmitter(encoder,
 								       ATOM_TRANSMITTER_ACTION_LCD_BLOFF, 0, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
index 13cdb01e9b45..bc56c8a181e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.c
@@ -156,3 +156,18 @@ u32 amdgpu_atombios_i2c_func(struct i2c_adapter *adap)
 	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
 }
 
+void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device* adev, u8 slave_addr, u8 line_number, u8 offset, u8 data)
+{
+	PROCESS_I2C_CHANNEL_TRANSACTION_PS_ALLOCATION args;
+	int index = GetIndexIntoMasterTable(COMMAND, ProcessI2cChannelTransaction);
+
+	args.ucRegIndex = offset;
+	args.lpI2CDataOut = data;
+	args.ucFlag = 1;
+	args.ucI2CSpeed = TARGET_HW_I2C_CLOCK;
+	args.ucTransBytes = 1;
+	args.ucSlaveAddr = slave_addr;
+	args.ucLineNumber = line_number;
+
+	amdgpu_atom_execute_table(adev->mode_info.atom_context, index, (uint32_t *)&args);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h
index d6128d9de56e..251aaf41f65d 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_i2c.h
@@ -27,5 +27,7 @@
 int amdgpu_atombios_i2c_xfer(struct i2c_adapter *i2c_adap,
 		      struct i2c_msg *msgs, int num);
 u32 amdgpu_atombios_i2c_func(struct i2c_adapter *adap);
+void amdgpu_atombios_i2c_channel_trans(struct amdgpu_device* adev,
+		u8 slave_addr, u8 line_number, u8 offset, u8 data);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index ea407db1fbcf..e2f0e5d58d5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -50,7 +50,9 @@
 #include "gmc/gmc_7_1_sh_mask.h"
 
 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
+MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
+MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
 
 #define MC_CG_ARB_FREQ_F0           0x0a
 #define MC_CG_ARB_FREQ_F1           0x0b
@@ -84,12 +86,14 @@ static const struct ci_pt_defaults defaults_bonaire_xt =
 	{ 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 }
 };
 
+#if 0
 static const struct ci_pt_defaults defaults_bonaire_pro =
 {
 	1, 0xF, 0xFD, 0x19, 5, 45, 0, 0x65062,
 	{ 0x8C,  0x23F, 0x244, 0xA6,  0x83,  0x85,  0x86,  0x86,  0x83,  0xDB,  0xDB,  0xDA,  0x67,  0x60,  0x5F  },
 	{ 0x187, 0x193, 0x193, 0x1C7, 0x1D1, 0x1D1, 0x210, 0x219, 0x219, 0x266, 0x26C, 0x26C, 0x2C9, 0x2CB, 0x2CB }
 };
+#endif
 
 static const struct ci_pt_defaults defaults_saturn_xt =
 {
@@ -98,12 +102,14 @@ static const struct ci_pt_defaults defaults_saturn_xt =
 	{ 0x187, 0x187, 0x187, 0x1C7, 0x1C7, 0x1C7, 0x210, 0x210, 0x210, 0x266, 0x266, 0x266, 0x2C9, 0x2C9, 0x2C9 }
 };
 
+#if 0
 static const struct ci_pt_defaults defaults_saturn_pro =
 {
 	1, 0xF, 0xFD, 0x19, 5, 55, 0, 0x30000,
 	{ 0x96,  0x21D, 0x23B, 0xA1,  0x85,  0x87,  0x83,  0x84,  0x81,  0xE6,  0xE6,  0xE6,  0x71,  0x6A,  0x6A  },
 	{ 0x193, 0x19E, 0x19E, 0x1D2, 0x1DC, 0x1DC, 0x21A, 0x223, 0x223, 0x26E, 0x27E, 0x274, 0x2CF, 0x2D2, 0x2D2 }
 };
+#endif
 
 static const struct ci_pt_config_reg didt_config_ci[] =
 {
@@ -736,19 +742,19 @@ static int ci_enable_didt(struct amdgpu_device *adev, bool enable)
 
 	if (pi->caps_sq_ramping || pi->caps_db_ramping ||
 	    pi->caps_td_ramping || pi->caps_tcp_ramping) {
-		gfx_v7_0_enter_rlc_safe_mode(adev);
+		adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
 		if (enable) {
 			ret = ci_program_pt_config_registers(adev, didt_config_ci);
 			if (ret) {
-				gfx_v7_0_exit_rlc_safe_mode(adev);
+				adev->gfx.rlc.funcs->exit_safe_mode(adev);
 				return ret;
 			}
 		}
 
 		ci_do_enable_didt(adev, enable);
 
-		gfx_v7_0_exit_rlc_safe_mode(adev);
+		adev->gfx.rlc.funcs->exit_safe_mode(adev);
 	}
 
 	return 0;
@@ -3030,7 +3036,7 @@ static int ci_populate_single_memory_level(struct amdgpu_device *adev,
 
 	if (pi->mclk_stutter_mode_threshold &&
 	    (memory_clock <= pi->mclk_stutter_mode_threshold) &&
-	    (pi->uvd_enabled == false) &&
+	    (!pi->uvd_enabled) &&
 	    (RREG32(mmDPG_PIPE_STUTTER_CONTROL) & DPG_PIPE_STUTTER_CONTROL__STUTTER_ENABLE_MASK) &&
 	    (adev->pm.dpm.new_active_crtc_count <= 2))
 		memory_level->StutterEnable = true;
@@ -3636,6 +3642,10 @@ static int ci_setup_default_dpm_tables(struct amdgpu_device *adev)
 
 	ci_setup_default_pcie_tables(adev);
 
+	/* save a copy of the default DPM table */
+	memcpy(&(pi->golden_dpm_table), &(pi->dpm_table),
+			sizeof(struct ci_dpm_table));
+
 	return 0;
 }
 
@@ -5754,10 +5764,18 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev)
 
 	switch (adev->asic_type) {
 	case CHIP_BONAIRE:
-		chip_name = "bonaire";
+		if ((adev->pdev->revision == 0x80) ||
+		    (adev->pdev->revision == 0x81) ||
+		    (adev->pdev->device == 0x665f))
+			chip_name = "bonaire_k";
+		else
+			chip_name = "bonaire";
 		break;
 	case CHIP_HAWAII:
-		chip_name = "hawaii";
+		if (adev->pdev->revision == 0x80)
+			chip_name = "hawaii_k";
+		else
+			chip_name = "hawaii";
 		break;
 	case CHIP_KAVERI:
 	case CHIP_KABINI:
@@ -6221,6 +6239,9 @@ static int ci_dpm_sw_fini(void *handle)
 	ci_dpm_fini(adev);
 	mutex_unlock(&adev->pm.mutex);
 
+	release_firmware(adev->pm.fw);
+	adev->pm.fw = NULL;
+
 	return 0;
 }
 
@@ -6401,6 +6422,186 @@ static int ci_dpm_set_powergating_state(void *handle,
 	return 0;
 }
 
+static int ci_dpm_print_clock_levels(struct amdgpu_device *adev,
+		enum pp_clock_type type, char *buf)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_single_dpm_table *sclk_table = &pi->dpm_table.sclk_table;
+	struct ci_single_dpm_table *mclk_table = &pi->dpm_table.mclk_table;
+	struct ci_single_dpm_table *pcie_table = &pi->dpm_table.pcie_speed_table;
+
+	int i, now, size = 0;
+	uint32_t clock, pcie_speed;
+
+	switch (type) {
+	case PP_SCLK:
+		amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_API_GetSclkFrequency);
+		clock = RREG32(mmSMC_MSG_ARG_0);
+
+		for (i = 0; i < sclk_table->count; i++) {
+			if (clock > sclk_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < sclk_table->count; i++)
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
+					i, sclk_table->dpm_levels[i].value / 100,
+					(i == now) ? "*" : "");
+		break;
+	case PP_MCLK:
+		amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_API_GetMclkFrequency);
+		clock = RREG32(mmSMC_MSG_ARG_0);
+
+		for (i = 0; i < mclk_table->count; i++) {
+			if (clock > mclk_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < mclk_table->count; i++)
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
+					i, mclk_table->dpm_levels[i].value / 100,
+					(i == now) ? "*" : "");
+		break;
+	case PP_PCIE:
+		pcie_speed = ci_get_current_pcie_speed(adev);
+		for (i = 0; i < pcie_table->count; i++) {
+			if (pcie_speed != pcie_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < pcie_table->count; i++)
+			size += sprintf(buf + size, "%d: %s %s\n", i,
+					(pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x1" :
+					(pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" :
+					(pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "",
+					(i == now) ? "*" : "");
+		break;
+	default:
+		break;
+	}
+
+	return size;
+}
+
+static int ci_dpm_force_clock_level(struct amdgpu_device *adev,
+		enum pp_clock_type type, uint32_t mask)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+
+	if (adev->pm.dpm.forced_level
+			!= AMDGPU_DPM_FORCED_LEVEL_MANUAL)
+		return -EINVAL;
+
+	switch (type) {
+	case PP_SCLK:
+		if (!pi->sclk_dpm_key_disabled)
+			amdgpu_ci_send_msg_to_smc_with_parameter(adev,
+					PPSMC_MSG_SCLKDPM_SetEnabledMask,
+					pi->dpm_level_enable_mask.sclk_dpm_enable_mask & mask);
+		break;
+
+	case PP_MCLK:
+		if (!pi->mclk_dpm_key_disabled)
+			amdgpu_ci_send_msg_to_smc_with_parameter(adev,
+					PPSMC_MSG_MCLKDPM_SetEnabledMask,
+					pi->dpm_level_enable_mask.mclk_dpm_enable_mask & mask);
+		break;
+
+	case PP_PCIE:
+	{
+		uint32_t tmp = mask & pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
+		uint32_t level = 0;
+
+		while (tmp >>= 1)
+			level++;
+
+		if (!pi->pcie_dpm_key_disabled)
+			amdgpu_ci_send_msg_to_smc_with_parameter(adev,
+					PPSMC_MSG_PCIeDPM_ForceLevel,
+					level);
+		break;
+	}
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int ci_dpm_get_sclk_od(struct amdgpu_device *adev)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_single_dpm_table *sclk_table = &(pi->dpm_table.sclk_table);
+	struct ci_single_dpm_table *golden_sclk_table =
+			&(pi->golden_dpm_table.sclk_table);
+	int value;
+
+	value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) *
+			100 /
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return value;
+}
+
+static int ci_dpm_set_sclk_od(struct amdgpu_device *adev, uint32_t value)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps);
+	struct ci_single_dpm_table *golden_sclk_table =
+			&(pi->golden_dpm_table.sclk_table);
+
+	if (value > 20)
+		value = 20;
+
+	ps->performance_levels[ps->performance_level_count - 1].sclk =
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value *
+			value / 100 +
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return 0;
+}
+
+static int ci_dpm_get_mclk_od(struct amdgpu_device *adev)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_single_dpm_table *mclk_table = &(pi->dpm_table.mclk_table);
+	struct ci_single_dpm_table *golden_mclk_table =
+			&(pi->golden_dpm_table.mclk_table);
+	int value;
+
+	value = (mclk_table->dpm_levels[mclk_table->count - 1].value -
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) *
+			100 /
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return value;
+}
+
+static int ci_dpm_set_mclk_od(struct amdgpu_device *adev, uint32_t value)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps);
+	struct ci_single_dpm_table *golden_mclk_table =
+			&(pi->golden_dpm_table.mclk_table);
+
+	if (value > 20)
+		value = 20;
+
+	ps->performance_levels[ps->performance_level_count - 1].mclk =
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value *
+			value / 100 +
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return 0;
+}
+
 const struct amd_ip_funcs ci_dpm_ip_funcs = {
 	.name = "ci_dpm",
 	.early_init = ci_dpm_early_init,
@@ -6435,6 +6636,12 @@ static const struct amdgpu_dpm_funcs ci_dpm_funcs = {
 	.get_fan_control_mode = &ci_dpm_get_fan_control_mode,
 	.set_fan_speed_percent = &ci_dpm_set_fan_speed_percent,
 	.get_fan_speed_percent = &ci_dpm_get_fan_speed_percent,
+	.print_clock_levels = ci_dpm_print_clock_levels,
+	.force_clock_level = ci_dpm_force_clock_level,
+	.get_sclk_od = ci_dpm_get_sclk_od,
+	.set_sclk_od = ci_dpm_set_sclk_od,
+	.get_mclk_od = ci_dpm_get_mclk_od,
+	.set_mclk_od = ci_dpm_set_mclk_od,
 };
 
 static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
index faccc30c93bf..91be2996ae7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
@@ -193,6 +193,7 @@ struct ci_pt_defaults {
 
 struct ci_power_info {
 	struct ci_dpm_table dpm_table;
+	struct ci_dpm_table golden_dpm_table;
 	u32 voltage_control;
 	u32 mvdd_control;
 	u32 vddci_control;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 07bc795a4ca9..4efc901f658c 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -879,7 +879,7 @@ static void cik_vga_set_state(struct amdgpu_device *adev, bool state)
 	uint32_t tmp;
 
 	tmp = RREG32(mmCONFIG_CNTL);
-	if (state == false)
+	if (!state)
 		tmp |= CONFIG_CNTL__VGA_DIS_MASK;
 	else
 		tmp &= ~CONFIG_CNTL__VGA_DIS_MASK;
@@ -962,6 +962,12 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev,
 	return true;
 }
 
+static u32 cik_get_virtual_caps(struct amdgpu_device *adev)
+{
+	/* CIK does not support SR-IOV */
+	return 0;
+}
+
 static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {
 	{mmGRBM_STATUS, false},
 	{mmGB_ADDR_CONFIG, false},
@@ -1029,12 +1035,12 @@ static uint32_t cik_read_indexed_register(struct amdgpu_device *adev,
 
 	mutex_lock(&adev->grbm_idx_mutex);
 	if (se_num != 0xffffffff || sh_num != 0xffffffff)
-		gfx_v7_0_select_se_sh(adev, se_num, sh_num);
+		amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
 
 	val = RREG32(reg_offset);
 
 	if (se_num != 0xffffffff || sh_num != 0xffffffff)
-		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 	return val;
 }
@@ -1152,10 +1158,11 @@ static void kv_restore_regs_for_reset(struct amdgpu_device *adev,
 	WREG32(mmGMCON_RENG_EXECUTE, save->gmcon_reng_execute);
 }
 
-static void cik_gpu_pci_config_reset(struct amdgpu_device *adev)
+static int cik_gpu_pci_config_reset(struct amdgpu_device *adev)
 {
 	struct kv_reset_save_regs kv_save = { 0 };
 	u32 i;
+	int r = -EINVAL;
 
 	dev_info(adev->dev, "GPU pci config reset\n");
 
@@ -1171,14 +1178,20 @@ static void cik_gpu_pci_config_reset(struct amdgpu_device *adev)
 
 	/* wait for asic to come out of reset */
 	for (i = 0; i < adev->usec_timeout; i++) {
-		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff)
+		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff) {
+			/* enable BM */
+			pci_set_master(adev->pdev);
+			r = 0;
 			break;
+		}
 		udelay(1);
 	}
 
 	/* does asic init need to be run first??? */
 	if (adev->flags & AMD_IS_APU)
 		kv_restore_regs_for_reset(adev, &kv_save);
+
+	return r;
 }
 
 static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hung)
@@ -1204,13 +1217,14 @@ static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hu
  */
 static int cik_asic_reset(struct amdgpu_device *adev)
 {
+	int r;
 	cik_set_bios_scratch_engine_hung(adev, true);
 
-	cik_gpu_pci_config_reset(adev);
+	r = cik_gpu_pci_config_reset(adev);
 
 	cik_set_bios_scratch_engine_hung(adev, false);
 
-	return 0;
+	return r;
 }
 
 static int cik_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
@@ -2007,9 +2021,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
 	.get_xclk = &cik_get_xclk,
 	.set_uvd_clocks = &cik_set_uvd_clocks,
 	.set_vce_clocks = &cik_set_vce_clocks,
-	/* these should be moved to their own ip modules */
-	.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
-	.wait_for_mc_idle = &gmc_v7_0_mc_wait_for_idle,
+	.get_virtual_caps = &cik_get_virtual_caps,
 };
 
 static int cik_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 518dca43b133..ee6466912497 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -66,6 +66,16 @@ MODULE_FIRMWARE("radeon/mullins_sdma1.bin");
 
 u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
 
+
+static void cik_sdma_free_microcode(struct amdgpu_device *adev)
+{
+	int i;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+			release_firmware(adev->sdma.instance[i].fw);
+			adev->sdma.instance[i].fw = NULL;
+	}
+}
+
 /*
  * sDMA - System DMA
  * Starting with CIK, the GPU has new asynchronous
@@ -214,17 +224,6 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
 				  unsigned vm_id, bool ctx_switch)
 {
 	u32 extra_bits = vm_id & 0xf;
-	u32 next_rptr = ring->wptr + 5;
-
-	while ((next_rptr & 7) != 4)
-		next_rptr++;
-
-	next_rptr += 4;
-	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
-	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-	amdgpu_ring_write(ring, 1); /* number of DWs to follow */
-	amdgpu_ring_write(ring, next_rptr);
 
 	/* IB packet must end on a 8 DW boundary */
 	cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8);
@@ -355,7 +354,7 @@ static void cik_sdma_enable(struct amdgpu_device *adev, bool enable)
 	u32 me_cntl;
 	int i;
 
-	if (enable == false) {
+	if (!enable) {
 		cik_sdma_gfx_stop(adev);
 		cik_sdma_rlc_stop(adev);
 	}
@@ -419,6 +418,8 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
 		/* Initialize the ring buffer's read and write pointers */
 		WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
 		WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
 
 		/* set the wb address whether it's enabled or not */
 		WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
@@ -446,7 +447,12 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 
 		ring->ready = true;
+	}
+
+	cik_sdma_enable(adev, true);
 
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		ring = &adev->sdma.instance[i].ring;
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
 			ring->ready = false;
@@ -529,8 +535,8 @@ static int cik_sdma_start(struct amdgpu_device *adev)
 	if (r)
 		return r;
 
-	/* unhalt the MEs */
-	cik_sdma_enable(adev, true);
+	/* halt the engine before programing */
+	cik_sdma_enable(adev, false);
 
 	/* start the gfx rings and rlc compute queues */
 	r = cik_sdma_gfx_resume(adev);
@@ -611,20 +617,19 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
  * Test a simple IB in the DMA ring (CIK).
  * Returns 0 on success, error on failure.
  */
-static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
+static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
 	struct fence *f = NULL;
-	unsigned i;
 	unsigned index;
-	int r;
 	u32 tmp = 0;
 	u64 gpu_addr;
+	long r;
 
 	r = amdgpu_wb_get(adev, &index);
 	if (r) {
-		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
 		return r;
 	}
 
@@ -634,11 +639,12 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
 	memset(&ib, 0, sizeof(ib));
 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 		goto err0;
 	}
 
-	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE,
+				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
 	ib.ptr[1] = lower_32_bits(gpu_addr);
 	ib.ptr[2] = upper_32_bits(gpu_addr);
 	ib.ptr[3] = 1;
@@ -648,28 +654,25 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
 	if (r)
 		goto err1;
 
-	r = fence_wait(f, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+	r = fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out\n");
+		r = -ETIMEDOUT;
 		goto err1;
-	}
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = le32_to_cpu(adev->wb.wb[index]);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-	if (i < adev->usec_timeout) {
-		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
-			 ring->idx, i);
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 		goto err1;
+	}
+	tmp = le32_to_cpu(adev->wb.wb[index]);
+	if (tmp == 0xDEADBEEF) {
+		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
 	} else {
 		DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
 		r = -EINVAL;
 	}
 
 err1:
-	fence_put(f);
 	amdgpu_ib_free(adev, &ib, NULL);
 	fence_put(f);
 err0:
@@ -998,6 +1001,7 @@ static int cik_sdma_sw_fini(void *handle)
 	for (i = 0; i < adev->sdma.num_instances; i++)
 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
 
+	cik_sdma_free_microcode(adev);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
index 933e425a8154..2a11413ed54a 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
@@ -425,7 +425,7 @@ static int cz_dpm_init(struct amdgpu_device *adev)
 	pi->mgcg_cgtt_local1 = 0x0;
 	pi->clock_slow_down_step = 25000;
 	pi->skip_clock_slow_down = 1;
-	pi->enable_nb_ps_policy = 0;
+	pi->enable_nb_ps_policy = false;
 	pi->caps_power_containment = true;
 	pi->caps_cac = true;
 	pi->didt_enabled = false;
@@ -2219,6 +2219,7 @@ static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate)
 			}
 		}
 	} else { /*pi->caps_vce_pg*/
+		pi->vce_power_gated = gate;
 		cz_update_vce_dpm(adev);
 		cz_enable_vce_dpm(adev, !gate);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index c90408bc0fde..d4bf133908b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -307,11 +307,10 @@ static void dce_v11_0_page_flip(struct amdgpu_device *adev,
 	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
 	u32 tmp;
 
-	/* flip at hsync for async, default is vsync */
-	/* use UPDATE_IMMEDIATE_EN instead for async? */
+	/* flip immediate for async, default is vsync */
 	tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
 	tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
-			    GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0);
+			    GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0);
 	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
 	/* update the scanout addresses */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 300ff4aab0fd..4fdfab1e9200 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -526,36 +526,16 @@ static void dce_v8_0_stop_mc_access(struct amdgpu_device *adev,
 		crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
 					     CRTC_CONTROL, CRTC_MASTER_EN);
 		if (crtc_enabled) {
-#if 0
-			u32 frame_count;
-			int j;
-
+#if 1
 			save->crtc_enabled[i] = true;
 			tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
 			if (REG_GET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN) == 0) {
-				amdgpu_display_vblank_wait(adev, i);
-				WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
+				/*it is correct only for RGB ; black is 0*/
+				WREG32(mmCRTC_BLANK_DATA_COLOR + crtc_offsets[i], 0);
 				tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1);
 				WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
-				WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
-			}
-			/* wait for the next frame */
-			frame_count = amdgpu_display_vblank_get_counter(adev, i);
-			for (j = 0; j < adev->usec_timeout; j++) {
-				if (amdgpu_display_vblank_get_counter(adev, i) != frame_count)
-					break;
-				udelay(1);
-			}
-			tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK) == 0) {
-				tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1);
-				WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp);
-			}
-			tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK) == 0) {
-				tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 1);
-				WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp);
 			}
+			mdelay(20);
 #else
 			/* XXX this is a hack to avoid strange behavior with EFI on certain systems */
 			WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
@@ -575,55 +555,22 @@ static void dce_v8_0_stop_mc_access(struct amdgpu_device *adev,
 static void dce_v8_0_resume_mc_access(struct amdgpu_device *adev,
 				      struct amdgpu_mode_mc_save *save)
 {
-	u32 tmp, frame_count;
-	int i, j;
+	u32 tmp;
+	int i;
 
 	/* update crtc base addresses */
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 		WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
 		       upper_32_bits(adev->mc.vram_start));
-		WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
-		       upper_32_bits(adev->mc.vram_start));
 		WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i],
 		       (u32)adev->mc.vram_start);
-		WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i],
-		       (u32)adev->mc.vram_start);
 
 		if (save->crtc_enabled[i]) {
-			tmp = RREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, MASTER_UPDATE_MODE, MASTER_UPDATE_MODE) != 3) {
-				tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_MODE, MASTER_UPDATE_MODE, 3);
-				WREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i], tmp);
-			}
-			tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK)) {
-				tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0);
-				WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp);
-			}
-			tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK)) {
-				tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 0);
-				WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp);
-			}
-			for (j = 0; j < adev->usec_timeout; j++) {
-				tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-				if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_SURFACE_UPDATE_PENDING) == 0)
-					break;
-				udelay(1);
-			}
 			tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
 			tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 0);
-			WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
 			WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
-			WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
-			/* wait for the next frame */
-			frame_count = amdgpu_display_vblank_get_counter(adev, i);
-			for (j = 0; j < adev->usec_timeout; j++) {
-				if (amdgpu_display_vblank_get_counter(adev, i) != frame_count)
-					break;
-				udelay(1);
-			}
 		}
+		mdelay(20);
 	}
 
 	WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start));
diff --git a/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c b/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c
index 245cabf06575..ed03b75175d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c
@@ -72,6 +72,11 @@ static int fiji_dpm_sw_init(void *handle)
 
 static int fiji_dpm_sw_fini(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	release_firmware(adev->pm.fw);
+	adev->pm.fw = NULL;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
index b336c918d6a7..b3e19ba4c57f 100644
--- a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
@@ -173,7 +173,7 @@ static int fiji_send_msg_to_smc(struct amdgpu_device *adev, PPSMC_Msg msg)
 {
 	if (!fiji_is_smc_ram_running(adev))
 	{
-		return -EINVAL;;
+		return -EINVAL;
 	}
 
 	if (wait_smu_response(adev)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 7f18a53ab53a..d869d058ef24 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -991,6 +991,22 @@ out:
 	return err;
 }
 
+static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
+{
+	release_firmware(adev->gfx.pfp_fw);
+	adev->gfx.pfp_fw = NULL;
+	release_firmware(adev->gfx.me_fw);
+	adev->gfx.me_fw = NULL;
+	release_firmware(adev->gfx.ce_fw);
+	adev->gfx.ce_fw = NULL;
+	release_firmware(adev->gfx.mec_fw);
+	adev->gfx.mec_fw = NULL;
+	release_firmware(adev->gfx.mec2_fw);
+	adev->gfx.mec2_fw = NULL;
+	release_firmware(adev->gfx.rlc_fw);
+	adev->gfx.rlc_fw = NULL;
+}
+
 /**
  * gfx_v7_0_tiling_mode_table_init - init the hw tiling table
  *
@@ -1567,9 +1583,15 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
  * registers are instanced per SE or SH.  0xffffffff means
  * broadcast to all SEs or SHs (CIK).
  */
-void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
+static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
+				  u32 se_num, u32 sh_num, u32 instance)
 {
-	u32 data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK;
+	u32 data;
+
+	if (instance == 0xffffffff)
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+	else
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
 
 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
 		data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
@@ -1643,13 +1665,13 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-			gfx_v7_0_select_se_sh(adev, i, j);
+			gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
 			data = gfx_v7_0_get_rb_active_bitmap(adev);
 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
 					       rb_bitmap_width_per_sh);
 		}
 	}
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	adev->gfx.config.backend_enable_mask = active_rbs;
@@ -1730,7 +1752,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
 	 * making sure that the following register writes will be broadcasted
 	 * to all the shaders
 	 */
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 
 	/* XXX SH_MEM regs */
 	/* where to put LDS, scratch, GPUVM in FSA64 space */
@@ -2034,17 +2056,6 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 				      unsigned vm_id, bool ctx_switch)
 {
 	u32 header, control = 0;
-	u32 next_rptr = ring->wptr + 5;
-
-	if (ctx_switch)
-		next_rptr += 2;
-
-	next_rptr += 4;
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
-	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-	amdgpu_ring_write(ring, next_rptr);
 
 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
 	if (ctx_switch) {
@@ -2073,22 +2084,9 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 					  struct amdgpu_ib *ib,
 					  unsigned vm_id, bool ctx_switch)
 {
-	u32 header, control = 0;
-	u32 next_rptr = ring->wptr + 5;
+	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
 
-	control |= INDIRECT_BUFFER_VALID;
-	next_rptr += 4;
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
-	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-	amdgpu_ring_write(ring, next_rptr);
-
-	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
-
-	control |= ib->length_dw | (vm_id << 24);
-
-	amdgpu_ring_write(ring, header);
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
 					  (2 << 0) |
@@ -2107,26 +2105,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
  * Provides a basic gfx ring test to verify that IBs are working.
  * Returns 0 on success, error on failure.
  */
-static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
+static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
 	struct fence *f = NULL;
 	uint32_t scratch;
 	uint32_t tmp = 0;
-	unsigned i;
-	int r;
+	long r;
 
 	r = amdgpu_gfx_scratch_get(adev, &scratch);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
 		return r;
 	}
 	WREG32(scratch, 0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 		goto err1;
 	}
 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
@@ -2138,21 +2135,19 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
 	if (r)
 		goto err2;
 
-	r = fence_wait(f, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+	r = fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out\n");
+		r = -ETIMEDOUT;
 		goto err2;
-	}
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32(scratch);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-	if (i < adev->usec_timeout) {
-		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
-			 ring->idx, i);
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 		goto err2;
+	}
+	tmp = RREG32(scratch);
+	if (tmp == 0xDEADBEEF) {
+		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
 	} else {
 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
 			  scratch, tmp);
@@ -2160,7 +2155,6 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
 	}
 
 err2:
-	fence_put(f);
 	amdgpu_ib_free(adev, &ib, NULL);
 	fence_put(f);
 err1:
@@ -3205,7 +3199,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
 		}
 	}
 	adev->gfx.rlc.cs_data = ci_cs_data;
-	adev->gfx.rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
+	adev->gfx.rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
+	adev->gfx.rlc.cp_table_size += 64 * 1024; /* GDS */
 
 	src_ptr = adev->gfx.rlc.reg_list;
 	dws = adev->gfx.rlc.reg_list_size;
@@ -3363,7 +3358,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-			gfx_v7_0_select_se_sh(adev, i, j);
+			gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
 			for (k = 0; k < adev->usec_timeout; k++) {
 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
 					break;
@@ -3371,7 +3366,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 			}
 		}
 	}
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -3418,7 +3413,7 @@ static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
 	return orig;
 }
 
-void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
+static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
 {
 	u32 tmp, i, mask;
 
@@ -3440,7 +3435,7 @@ void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
 	}
 }
 
-void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
+static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
 {
 	u32 tmp;
 
@@ -3455,7 +3450,7 @@ void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
  *
  * Halt the RLC ME (MicroEngine) (CIK).
  */
-void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
+static void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
 {
 	WREG32(mmRLC_CNTL, 0);
 
@@ -3531,7 +3526,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
 	WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
 
 	mutex_lock(&adev->grbm_idx_mutex);
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
 	WREG32(mmRLC_LB_PARAMS, 0x00600408);
 	WREG32(mmRLC_LB_CNTL, 0x80000004);
@@ -3571,7 +3566,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
 		tmp = gfx_v7_0_halt_rlc(adev);
 
 		mutex_lock(&adev->grbm_idx_mutex);
-		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
 		tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
@@ -3622,7 +3617,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
 		tmp = gfx_v7_0_halt_rlc(adev);
 
 		mutex_lock(&adev->grbm_idx_mutex);
-		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
 		data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
@@ -3673,7 +3668,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
 		tmp = gfx_v7_0_halt_rlc(adev);
 
 		mutex_lock(&adev->grbm_idx_mutex);
-		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
 		data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
@@ -3851,6 +3846,20 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
 	}
 }
 
+static void gfx_v7_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+						 u32 bitmap)
+{
+	u32 data;
+
+	if (!bitmap)
+		return;
+
+	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+
+	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
 static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
 {
 	u32 data, mask;
@@ -4107,7 +4116,7 @@ static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
  * Fetches a GPU clock counter snapshot (SI).
  * Returns the 64 bit clock counter snapshot.
  */
-uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
 {
 	uint64_t clock;
 
@@ -4167,12 +4176,24 @@ static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
 }
 
+static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
+	.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
+	.select_se_sh = &gfx_v7_0_select_se_sh,
+};
+
+static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
+	.enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode,
+	.exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode
+};
+
 static int gfx_v7_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
 	adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS;
+	adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
+	adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
 	gfx_v7_0_set_ring_funcs(adev);
 	gfx_v7_0_set_irq_funcs(adev);
 	gfx_v7_0_set_gds_init(adev);
@@ -4489,6 +4510,7 @@ static int gfx_v7_0_sw_fini(void *handle)
 	gfx_v7_0_cp_compute_fini(adev);
 	gfx_v7_0_rlc_fini(adev);
 	gfx_v7_0_mec_fini(adev);
+	gfx_v7_0_free_microcode(adev);
 
 	return 0;
 }
@@ -4816,7 +4838,7 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
 	case 2:
 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 			ring = &adev->gfx.compute_ring[i];
-			if ((ring->me == me_id) & (ring->pipe == pipe_id))
+			if ((ring->me == me_id) && (ring->pipe == pipe_id))
 				amdgpu_fence_process(ring);
 		}
 		break;
@@ -5015,16 +5037,22 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
 	int i, j, k, counter, active_cu_number = 0;
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+	unsigned disable_masks[4 * 2];
 
 	memset(cu_info, 0, sizeof(*cu_info));
 
+	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			mask = 1;
 			ao_bitmap = 0;
 			counter = 0;
-			gfx_v7_0_select_se_sh(adev, i, j);
+			gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+			if (i < 4 && j < 2)
+				gfx_v7_0_set_user_cu_inactive_bitmap(
+					adev, disable_masks[i * 2 + j]);
 			bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
 			cu_info->bitmap[i][j] = bitmap;
 
@@ -5040,7 +5068,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
 		}
 	}
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
index e747aa935c88..94e3ea147c26 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
@@ -26,11 +26,4 @@
 
 extern const struct amd_ip_funcs gfx_v7_0_ip_funcs;
 
-/* XXX these shouldn't be exported */
-void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev);
-void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev);
-void gfx_v7_0_rlc_stop(struct amdgpu_device *adev);
-uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num);
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index f19bab68fd83..bff8668e9e6d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -28,6 +28,7 @@
 #include "vid.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_atombios.h"
+#include "atombios_i2c.h"
 #include "clearstate_vi.h"
 
 #include "gmc/gmc_8_2_d.h"
@@ -47,6 +48,8 @@
 #include "dce/dce_10_0_d.h"
 #include "dce/dce_10_0_sh_mask.h"
 
+#include "smu/smu_7_1_3_d.h"
+
 #define GFX8_NUM_GFX_RINGS     1
 #define GFX8_NUM_COMPUTE_RINGS 8
 
@@ -282,6 +285,7 @@ static const u32 golden_settings_polaris11_a11[] =
 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
 	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
+	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 };
 
 static const u32 polaris11_golden_common_all[] =
@@ -297,7 +301,8 @@ static const u32 polaris11_golden_common_all[] =
 static const u32 golden_settings_polaris10_a11[] =
 {
 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
-	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
+	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
+	mmCB_HW_CONTROL_2, 0, 0x0f000000,
 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
@@ -311,6 +316,7 @@ static const u32 golden_settings_polaris10_a11[] =
 	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
 	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
+	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 };
 
 static const u32 polaris10_golden_common_all[] =
@@ -692,6 +698,11 @@ static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
 		amdgpu_program_register_sequence(adev,
 						 polaris10_golden_common_all,
 						 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
+		WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
+		if (adev->pdev->revision == 0xc7) {
+			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
+			amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
+		}
 		break;
 	case CHIP_CARRIZO:
 		amdgpu_program_register_sequence(adev,
@@ -776,26 +787,25 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
 	return r;
 }
 
-static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
+static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
 	struct fence *f = NULL;
 	uint32_t scratch;
 	uint32_t tmp = 0;
-	unsigned i;
-	int r;
+	long r;
 
 	r = amdgpu_gfx_scratch_get(adev, &scratch);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
 		return r;
 	}
 	WREG32(scratch, 0xCAFEDEAD);
 	memset(&ib, 0, sizeof(ib));
 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 		goto err1;
 	}
 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
@@ -807,28 +817,25 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
 	if (r)
 		goto err2;
 
-	r = fence_wait(f, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+	r = fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out.\n");
+		r = -ETIMEDOUT;
 		goto err2;
-	}
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32(scratch);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-	if (i < adev->usec_timeout) {
-		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
-			 ring->idx, i);
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 		goto err2;
+	}
+	tmp = RREG32(scratch);
+	if (tmp == 0xDEADBEEF) {
+		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
 	} else {
 		DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
 			  scratch, tmp);
 		r = -EINVAL;
 	}
 err2:
-	fence_put(f);
 	amdgpu_ib_free(adev, &ib, NULL);
 	fence_put(f);
 err1:
@@ -836,6 +843,26 @@ err1:
 	return r;
 }
 
+
+static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
+	release_firmware(adev->gfx.pfp_fw);
+	adev->gfx.pfp_fw = NULL;
+	release_firmware(adev->gfx.me_fw);
+	adev->gfx.me_fw = NULL;
+	release_firmware(adev->gfx.ce_fw);
+	adev->gfx.ce_fw = NULL;
+	release_firmware(adev->gfx.rlc_fw);
+	adev->gfx.rlc_fw = NULL;
+	release_firmware(adev->gfx.mec_fw);
+	adev->gfx.mec_fw = NULL;
+	if ((adev->asic_type != CHIP_STONEY) &&
+	    (adev->asic_type != CHIP_TOPAZ))
+		release_firmware(adev->gfx.mec2_fw);
+	adev->gfx.mec2_fw = NULL;
+
+	kfree(adev->gfx.rlc.register_list_format);
+}
+
 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 {
 	const char *chip_name;
@@ -1129,6 +1156,71 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
 	buffer[count++] = cpu_to_le32(0);
 }
 
+static void cz_init_cp_jump_table(struct amdgpu_device *adev)
+{
+	const __le32 *fw_data;
+	volatile u32 *dst_ptr;
+	int me, i, max_me = 4;
+	u32 bo_offset = 0;
+	u32 table_offset, table_size;
+
+	if (adev->asic_type == CHIP_CARRIZO)
+		max_me = 5;
+
+	/* write the cp table buffer */
+	dst_ptr = adev->gfx.rlc.cp_table_ptr;
+	for (me = 0; me < max_me; me++) {
+		if (me == 0) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.ce_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else if (me == 1) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.pfp_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else if (me == 2) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.me_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else if (me == 3) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.mec_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else  if (me == 4) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.mec2_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		}
+
+		for (i = 0; i < table_size; i ++) {
+			dst_ptr[bo_offset + i] =
+				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
+		}
+
+		bo_offset += table_size;
+	}
+}
+
 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
 {
 	int r;
@@ -1144,6 +1236,18 @@ static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
 		adev->gfx.rlc.clear_state_obj = NULL;
 	}
+
+	/* jump table block */
+	if (adev->gfx.rlc.cp_table_obj) {
+		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
+		if (unlikely(r != 0))
+			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
+		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
+		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+
+		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
+		adev->gfx.rlc.cp_table_obj = NULL;
+	}
 }
 
 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
@@ -1200,6 +1304,46 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
 	}
 
+	if ((adev->asic_type == CHIP_CARRIZO) ||
+	    (adev->asic_type == CHIP_STONEY)) {
+		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
+		if (adev->gfx.rlc.cp_table_obj == NULL) {
+			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
+					     AMDGPU_GEM_DOMAIN_VRAM,
+					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+					     NULL, NULL,
+					     &adev->gfx.rlc.cp_table_obj);
+			if (r) {
+				dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
+				return r;
+			}
+		}
+
+		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
+		if (unlikely(r != 0)) {
+			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
+			return r;
+		}
+		r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
+				  &adev->gfx.rlc.cp_table_gpu_addr);
+		if (r) {
+			amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+			dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
+			return r;
+		}
+		r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
+		if (r) {
+			dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
+			return r;
+		}
+
+		cz_init_cp_jump_table(adev);
+
+		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
+		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+
+	}
+
 	return 0;
 }
 
@@ -1581,7 +1725,6 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 		RREG32(sec_ded_counter_registers[i]);
 
 fail:
-	fence_put(f);
 	amdgpu_ib_free(adev, &ib, NULL);
 	fence_put(f);
 
@@ -1983,7 +2126,7 @@ static int gfx_v8_0_sw_fini(void *handle)
 
 	gfx_v8_0_rlc_fini(adev);
 
-	kfree(adev->gfx.rlc.register_list_format);
+	gfx_v8_0_free_microcode(adev);
 
 	return 0;
 }
@@ -3308,9 +3451,15 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
 	}
 }
 
-void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
+static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
+				  u32 se_num, u32 sh_num, u32 instance)
 {
-	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+	u32 data;
+
+	if (instance == 0xffffffff)
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+	else
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
 
 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
@@ -3360,13 +3509,13 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-			gfx_v8_0_select_se_sh(adev, i, j);
+			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
 			data = gfx_v8_0_get_rb_active_bitmap(adev);
 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
 					       rb_bitmap_width_per_sh);
 		}
 	}
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	adev->gfx.config.backend_enable_mask = active_rbs;
@@ -3470,7 +3619,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
 	 * making sure that the following register writes will be broadcasted
 	 * to all the shaders
 	 */
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 
 	WREG32(mmPA_SC_FIFO_SIZE,
 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
@@ -3493,7 +3642,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-			gfx_v8_0_select_se_sh(adev, i, j);
+			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
 			for (k = 0; k < adev->usec_timeout; k++) {
 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
 					break;
@@ -3501,7 +3650,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 			}
 		}
 	}
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -3662,13 +3811,13 @@ static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
 	WREG32(mmRLC_SRM_CNTL, data);
 }
 
-static void polaris11_init_power_gating(struct amdgpu_device *adev)
+static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
 {
 	uint32_t data;
 
 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
-			AMD_PG_SUPPORT_GFX_SMG |
-			AMD_PG_SUPPORT_GFX_DMG)) {
+			      AMD_PG_SUPPORT_GFX_SMG |
+			      AMD_PG_SUPPORT_GFX_DMG)) {
 		data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
@@ -3693,6 +3842,53 @@ static void polaris11_init_power_gating(struct amdgpu_device *adev)
 	}
 }
 
+static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
+						bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(mmRLC_PG_CNTL);
+
+	if (enable)
+		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
+
+	if (orig != data)
+		WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
+						  bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(mmRLC_PG_CNTL);
+
+	if (enable)
+		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
+
+	if (orig != data)
+		WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(mmRLC_PG_CNTL);
+
+	if (enable)
+		data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
+	else
+		data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
+
+	if (orig != data)
+		WREG32(mmRLC_PG_CNTL, data);
+}
+
 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
 {
 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
@@ -3705,8 +3901,25 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
 		gfx_v8_0_init_save_restore_list(adev);
 		gfx_v8_0_enable_save_restore_machine(adev);
 
-		if (adev->asic_type == CHIP_POLARIS11)
-			polaris11_init_power_gating(adev);
+		if ((adev->asic_type == CHIP_CARRIZO) ||
+		    (adev->asic_type == CHIP_STONEY)) {
+			WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
+			gfx_v8_0_init_power_gating(adev);
+			WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
+			if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
+				cz_enable_sck_slow_down_on_power_up(adev, true);
+				cz_enable_sck_slow_down_on_power_down(adev, true);
+			} else {
+				cz_enable_sck_slow_down_on_power_up(adev, false);
+				cz_enable_sck_slow_down_on_power_down(adev, false);
+			}
+			if (adev->pg_flags & AMD_PG_SUPPORT_CP)
+				cz_enable_cp_power_gating(adev, true);
+			else
+				cz_enable_cp_power_gating(adev, false);
+		} else if (adev->asic_type == CHIP_POLARIS11) {
+			gfx_v8_0_init_power_gating(adev);
+		}
 	}
 }
 
@@ -3974,11 +4187,15 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
 		amdgpu_ring_write(ring, 0x3a00161a);
 		amdgpu_ring_write(ring, 0x0000002e);
 		break;
-	case CHIP_TOPAZ:
 	case CHIP_CARRIZO:
 		amdgpu_ring_write(ring, 0x00000002);
 		amdgpu_ring_write(ring, 0x00000000);
 		break;
+	case CHIP_TOPAZ:
+		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
+				0x00000000 : 0x00000002);
+		amdgpu_ring_write(ring, 0x00000000);
+		break;
 	case CHIP_STONEY:
 		amdgpu_ring_write(ring, 0x00000000);
 		amdgpu_ring_write(ring, 0x00000000);
@@ -4941,7 +5158,7 @@ static int gfx_v8_0_soft_reset(void *handle)
  * Fetches a GPU clock counter snapshot.
  * Returns the 64 bit clock counter snapshot.
  */
-uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
 {
 	uint64_t clock;
 
@@ -5001,12 +5218,18 @@ static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
 }
 
+static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
+	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
+	.select_se_sh = &gfx_v8_0_select_se_sh,
+};
+
 static int gfx_v8_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
+	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
 	gfx_v8_0_set_ring_funcs(adev);
 	gfx_v8_0_set_irq_funcs(adev);
 	gfx_v8_0_set_gds_init(adev);
@@ -5039,51 +5262,43 @@ static int gfx_v8_0_late_init(void *handle)
 	return 0;
 }
 
-static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
-		bool enable)
+static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
+						       bool enable)
 {
 	uint32_t data, temp;
 
-	/* Send msg to SMU via Powerplay */
-	amdgpu_set_powergating_state(adev,
-			AMD_IP_BLOCK_TYPE_SMC,
-			enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
+	if (adev->asic_type == CHIP_POLARIS11)
+		/* Send msg to SMU via Powerplay */
+		amdgpu_set_powergating_state(adev,
+					     AMD_IP_BLOCK_TYPE_SMC,
+					     enable ?
+					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
 
-	if (enable) {
-		/* Enable static MGPG */
-		temp = data = RREG32(mmRLC_PG_CNTL);
+	temp = data = RREG32(mmRLC_PG_CNTL);
+	/* Enable static MGPG */
+	if (enable)
 		data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
-
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
-	} else {
-		temp = data = RREG32(mmRLC_PG_CNTL);
+	else
 		data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
 
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
-	}
+	if (temp != data)
+		WREG32(mmRLC_PG_CNTL, data);
 }
 
-static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
-		bool enable)
+static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
+							bool enable)
 {
 	uint32_t data, temp;
 
-	if (enable) {
-		/* Enable dynamic MGPG */
-		temp = data = RREG32(mmRLC_PG_CNTL);
+	temp = data = RREG32(mmRLC_PG_CNTL);
+	/* Enable dynamic MGPG */
+	if (enable)
 		data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
-
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
-	} else {
-		temp = data = RREG32(mmRLC_PG_CNTL);
+	else
 		data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
 
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
-	}
+	if (temp != data)
+		WREG32(mmRLC_PG_CNTL, data);
 }
 
 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
@@ -5091,19 +5306,63 @@ static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *ade
 {
 	uint32_t data, temp;
 
-	if (enable) {
-		/* Enable quick PG */
-		temp = data = RREG32(mmRLC_PG_CNTL);
-		data |= 0x100000;
+	temp = data = RREG32(mmRLC_PG_CNTL);
+	/* Enable quick PG */
+	if (enable)
+		data |= RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
 
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
-	} else {
-		temp = data = RREG32(mmRLC_PG_CNTL);
-		data &= ~0x100000;
+	if (temp != data)
+		WREG32(mmRLC_PG_CNTL, data);
+}
 
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
+static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
+					  bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(mmRLC_PG_CNTL);
+
+	if (enable)
+		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+
+	if (orig != data)
+		WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
+						bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(mmRLC_PG_CNTL);
+
+	if (enable)
+		data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
+
+	if (orig != data)
+		WREG32(mmRLC_PG_CNTL, data);
+
+	/* Read any GFX register to wake up GFX. */
+	if (!enable)
+		data = RREG32(mmDB_RENDER_CONTROL);
+}
+
+static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
+					  bool enable)
+{
+	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
+		cz_enable_gfx_cg_power_gating(adev, true);
+		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
+			cz_enable_gfx_pipeline_power_gating(adev, true);
+	} else {
+		cz_enable_gfx_cg_power_gating(adev, false);
+		cz_enable_gfx_pipeline_power_gating(adev, false);
 	}
 }
 
@@ -5111,21 +5370,42 @@ static int gfx_v8_0_set_powergating_state(void *handle,
 					  enum amd_powergating_state state)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
 
 	if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
 		return 0;
 
 	switch (adev->asic_type) {
+	case CHIP_CARRIZO:
+	case CHIP_STONEY:
+		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
+			cz_update_gfx_cg_power_gating(adev, enable);
+
+		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
+			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
+		else
+			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
+
+		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
+			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
+		else
+			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
+		break;
 	case CHIP_POLARIS11:
-		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
-			polaris11_enable_gfx_static_mg_power_gating(adev,
-					state == AMD_PG_STATE_GATE ? true : false);
-		else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
-			polaris11_enable_gfx_dynamic_mg_power_gating(adev,
-					state == AMD_PG_STATE_GATE ? true : false);
+		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
+			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
 		else
-			polaris11_enable_gfx_quick_mg_power_gating(adev,
-					state == AMD_PG_STATE_GATE ? true : false);
+			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
+
+		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
+			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
+		else
+			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
+
+		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
+			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
+		else
+			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
 		break;
 	default:
 		break;
@@ -5139,7 +5419,7 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
 {
 	uint32_t data;
 
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 
 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
@@ -5527,6 +5807,8 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
 	}
 
+	gfx_v8_0_wait_for_rlc_serdes(adev);
+
 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
 }
 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
@@ -5652,17 +5934,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 				      unsigned vm_id, bool ctx_switch)
 {
 	u32 header, control = 0;
-	u32 next_rptr = ring->wptr + 5;
-
-	if (ctx_switch)
-		next_rptr += 2;
-
-	next_rptr += 4;
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
-	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-	amdgpu_ring_write(ring, next_rptr);
 
 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
 	if (ctx_switch) {
@@ -5691,23 +5962,9 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 					  struct amdgpu_ib *ib,
 					  unsigned vm_id, bool ctx_switch)
 {
-	u32 header, control = 0;
-	u32 next_rptr = ring->wptr + 5;
-
-	control |= INDIRECT_BUFFER_VALID;
+	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
 
-	next_rptr += 4;
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
-	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-	amdgpu_ring_write(ring, next_rptr);
-
-	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
-
-	control |= ib->length_dw | (vm_id << 24);
-
-	amdgpu_ring_write(ring, header);
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
 					  (2 << 0) |
@@ -6160,9 +6417,9 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_TOPAZ:
-	case CHIP_STONEY:
 		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
 		break;
+	case CHIP_STONEY:
 	case CHIP_CARRIZO:
 		adev->gfx.rlc.funcs = &cz_rlc_funcs;
 		break;
@@ -6200,6 +6457,20 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
 	}
 }
 
+static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+						 u32 bitmap)
+{
+	u32 data;
+
+	if (!bitmap)
+		return;
+
+	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+
+	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
 {
 	u32 data, mask;
@@ -6220,16 +6491,22 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
 	int i, j, k, counter, active_cu_number = 0;
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+	unsigned disable_masks[4 * 2];
 
 	memset(cu_info, 0, sizeof(*cu_info));
 
+	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			mask = 1;
 			ao_bitmap = 0;
 			counter = 0;
-			gfx_v8_0_select_se_sh(adev, i, j);
+			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+			if (i < 4 && j < 2)
+				gfx_v8_0_set_user_cu_inactive_bitmap(
+					adev, disable_masks[i * 2 + j]);
 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
 			cu_info->bitmap[i][j] = bitmap;
 
@@ -6245,7 +6522,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
 		}
 	}
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
index 16a49f53a2fa..bc82c794312c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
@@ -26,7 +26,6 @@
 
 extern const struct amd_ip_funcs gfx_v8_0_ip_funcs;
 
-uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 1feb6439cb0b..d24a82bd0c7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -39,6 +39,7 @@
 
 static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev);
 static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
+static int gmc_v7_0_wait_for_idle(void *handle);
 
 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
@@ -73,39 +74,15 @@ static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev)
 	}
 }
 
-/**
- * gmc7_mc_wait_for_idle - wait for MC idle callback.
- *
- * @adev: amdgpu_device pointer
- *
- * Wait for the MC (memory controller) to be idle.
- * (evergreen+).
- * Returns 0 if the MC is idle, -1 if not.
- */
-int gmc_v7_0_mc_wait_for_idle(struct amdgpu_device *adev)
-{
-	unsigned i;
-	u32 tmp;
-
-	for (i = 0; i < adev->usec_timeout; i++) {
-		/* read MC_STATUS */
-		tmp = RREG32(mmSRBM_STATUS) & 0x1F00;
-		if (!tmp)
-			return 0;
-		udelay(1);
-	}
-	return -1;
-}
-
-void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
-		      struct amdgpu_mode_mc_save *save)
+static void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
+			     struct amdgpu_mode_mc_save *save)
 {
 	u32 blackout;
 
 	if (adev->mode_info.num_crtc)
 		amdgpu_display_stop_mc_access(adev, save);
 
-	amdgpu_asic_wait_for_mc_idle(adev);
+	gmc_v7_0_wait_for_idle((void *)adev);
 
 	blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
 	if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -120,8 +97,8 @@ void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
 	udelay(100);
 }
 
-void gmc_v7_0_mc_resume(struct amdgpu_device *adev,
-			struct amdgpu_mode_mc_save *save)
+static void gmc_v7_0_mc_resume(struct amdgpu_device *adev,
+			       struct amdgpu_mode_mc_save *save)
 {
 	u32 tmp;
 
@@ -311,7 +288,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
 		amdgpu_display_set_vga_render_state(adev, false);
 
 	gmc_v7_0_mc_stop(adev, &save);
-	if (amdgpu_asic_wait_for_mc_idle(adev)) {
+	if (gmc_v7_0_wait_for_idle((void *)adev)) {
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
 	}
 	/* Update configuration */
@@ -331,7 +308,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
 	WREG32(mmMC_VM_AGP_BASE, 0);
 	WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
 	WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
-	if (amdgpu_asic_wait_for_mc_idle(adev)) {
+	if (gmc_v7_0_wait_for_idle((void *)adev)) {
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
 	}
 	gmc_v7_0_mc_resume(adev, &save);
@@ -1137,7 +1114,7 @@ static int gmc_v7_0_soft_reset(void *handle)
 
 	if (srbm_soft_reset) {
 		gmc_v7_0_mc_stop(adev, &save);
-		if (gmc_v7_0_wait_for_idle(adev)) {
+		if (gmc_v7_0_wait_for_idle((void *)adev)) {
 			dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
 		}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h
index 36fcbbc46ada..0b386b5d2f7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h
@@ -26,11 +26,4 @@
 
 extern const struct amd_ip_funcs gmc_v7_0_ip_funcs;
 
-/* XXX these shouldn't be exported */
-void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
-		      struct amdgpu_mode_mc_save *save);
-void gmc_v7_0_mc_resume(struct amdgpu_device *adev,
-			struct amdgpu_mode_mc_save *save);
-int gmc_v7_0_mc_wait_for_idle(struct amdgpu_device *adev);
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 9945d5bbf1fe..717359d3ba8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -41,6 +41,7 @@
 
 static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev);
 static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
+static int gmc_v8_0_wait_for_idle(void *handle);
 
 MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_mc.bin");
@@ -147,44 +148,15 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
 	}
 }
 
-/**
- * gmc8_mc_wait_for_idle - wait for MC idle callback.
- *
- * @adev: amdgpu_device pointer
- *
- * Wait for the MC (memory controller) to be idle.
- * (evergreen+).
- * Returns 0 if the MC is idle, -1 if not.
- */
-int gmc_v8_0_mc_wait_for_idle(struct amdgpu_device *adev)
-{
-	unsigned i;
-	u32 tmp;
-
-	for (i = 0; i < adev->usec_timeout; i++) {
-		/* read MC_STATUS */
-		tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__VMC_BUSY_MASK |
-					       SRBM_STATUS__MCB_BUSY_MASK |
-					       SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
-					       SRBM_STATUS__MCC_BUSY_MASK |
-					       SRBM_STATUS__MCD_BUSY_MASK |
-					       SRBM_STATUS__VMC1_BUSY_MASK);
-		if (!tmp)
-			return 0;
-		udelay(1);
-	}
-	return -1;
-}
-
-void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
-		      struct amdgpu_mode_mc_save *save)
+static void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
+			     struct amdgpu_mode_mc_save *save)
 {
 	u32 blackout;
 
 	if (adev->mode_info.num_crtc)
 		amdgpu_display_stop_mc_access(adev, save);
 
-	amdgpu_asic_wait_for_mc_idle(adev);
+	gmc_v8_0_wait_for_idle(adev);
 
 	blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
 	if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -199,8 +171,8 @@ void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
 	udelay(100);
 }
 
-void gmc_v8_0_mc_resume(struct amdgpu_device *adev,
-			struct amdgpu_mode_mc_save *save)
+static void gmc_v8_0_mc_resume(struct amdgpu_device *adev,
+			       struct amdgpu_mode_mc_save *save)
 {
 	u32 tmp;
 
@@ -393,7 +365,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
 		amdgpu_display_set_vga_render_state(adev, false);
 
 	gmc_v8_0_mc_stop(adev, &save);
-	if (amdgpu_asic_wait_for_mc_idle(adev)) {
+	if (gmc_v8_0_wait_for_idle((void *)adev)) {
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
 	}
 	/* Update configuration */
@@ -413,7 +385,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
 	WREG32(mmMC_VM_AGP_BASE, 0);
 	WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
 	WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
-	if (amdgpu_asic_wait_for_mc_idle(adev)) {
+	if (gmc_v8_0_wait_for_idle((void *)adev)) {
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
 	}
 	gmc_v8_0_mc_resume(adev, &save);
@@ -1140,7 +1112,7 @@ static int gmc_v8_0_soft_reset(void *handle)
 
 	if (srbm_soft_reset) {
 		gmc_v8_0_mc_stop(adev, &save);
-		if (gmc_v8_0_wait_for_idle(adev)) {
+		if (gmc_v8_0_wait_for_idle((void *)adev)) {
 			dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
 		}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h
index 973436086b38..fc5001a8119d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h
@@ -26,11 +26,4 @@
 
 extern const struct amd_ip_funcs gmc_v8_0_ip_funcs;
 
-/* XXX these shouldn't be exported */
-void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
-		      struct amdgpu_mode_mc_save *save);
-void gmc_v8_0_mc_resume(struct amdgpu_device *adev,
-			struct amdgpu_mode_mc_save *save);
-int gmc_v8_0_mc_wait_for_idle(struct amdgpu_device *adev);
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c b/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c
index 460bc8ad37e6..2f078ad6095c 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c
@@ -24,7 +24,7 @@
 #include <linux/firmware.h>
 #include "drmP.h"
 #include "amdgpu.h"
-#include "iceland_smumgr.h"
+#include "iceland_smum.h"
 
 MODULE_FIRMWARE("amdgpu/topaz_smc.bin");
 
@@ -72,6 +72,11 @@ static int iceland_dpm_sw_init(void *handle)
 
 static int iceland_dpm_sw_fini(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	release_firmware(adev->pm.fw);
+	adev->pm.fw = NULL;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
index 52ee08193295..211839913728 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
@@ -25,7 +25,7 @@
 #include "drmP.h"
 #include "amdgpu.h"
 #include "ppsmc.h"
-#include "iceland_smumgr.h"
+#include "iceland_smum.h"
 #include "smu_ucode_xfer_vi.h"
 #include "amdgpu_ucode.h"
 
@@ -211,7 +211,7 @@ static int iceland_send_msg_to_smc_without_waiting(struct amdgpu_device *adev,
 						   PPSMC_Msg msg)
 {
 	if (!iceland_is_smc_ram_running(adev))
-		return -EINVAL;;
+		return -EINVAL;
 
 	if (wait_smu_response(adev)) {
 		DRM_ERROR("Failed to send previous message\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_smumgr.h b/drivers/gpu/drm/amd/amdgpu/iceland_smum.h
index 1e0769e110fa..5983e3150cc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_smumgr.h
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_smum.h
@@ -21,8 +21,8 @@
  *
  */
 
-#ifndef ICELAND_SMUMGR_H
-#define ICELAND_SMUMGR_H
+#ifndef ICELAND_SMUM_H
+#define ICELAND_SMUM_H
 
 #include "ppsmc.h"
 
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index a789a863d677..a845e883f5fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -191,6 +191,7 @@ static void sumo_construct_vid_mapping_table(struct amdgpu_device *adev,
 	vid_mapping_table->num_entries = i;
 }
 
+#if 0
 static const struct kv_lcac_config_values sx_local_cac_cfg_kv[] =
 {
 	{  0,       4,        1    },
@@ -289,6 +290,7 @@ static const struct kv_lcac_config_reg cpl_cac_config_reg[] =
 {
 	{ 0xc0400d80, 0x003e0000, 17, 0x3fc00000, 22, 0x0001fffe, 1, 0x00000001, 0 }
 };
+#endif
 
 static const struct kv_pt_config_reg didt_config_kv[] =
 {
@@ -507,19 +509,19 @@ static int kv_enable_didt(struct amdgpu_device *adev, bool enable)
 	    pi->caps_db_ramping ||
 	    pi->caps_td_ramping ||
 	    pi->caps_tcp_ramping) {
-		gfx_v7_0_enter_rlc_safe_mode(adev);
+		adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
 		if (enable) {
 			ret = kv_program_pt_config_registers(adev, didt_config_kv);
 			if (ret) {
-				gfx_v7_0_exit_rlc_safe_mode(adev);
+				adev->gfx.rlc.funcs->exit_safe_mode(adev);
 				return ret;
 			}
 		}
 
 		kv_do_enable_didt(adev, enable);
 
-		gfx_v7_0_exit_rlc_safe_mode(adev);
+		adev->gfx.rlc.funcs->exit_safe_mode(adev);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/ppsmc.h b/drivers/gpu/drm/amd/amdgpu/ppsmc.h
index 7837f2ecc357..8463245f424f 100644
--- a/drivers/gpu/drm/amd/amdgpu/ppsmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/ppsmc.h
@@ -90,7 +90,9 @@ typedef uint8_t PPSMC_Result;
 #define PPSMC_StartFanControl               ((uint8_t)0x5B)
 #define PPSMC_StopFanControl                ((uint8_t)0x5C)
 #define PPSMC_MSG_NoDisplay                 ((uint8_t)0x5D)
+#define PPSMC_NoDisplay                     ((uint8_t)0x5D)
 #define PPSMC_MSG_HasDisplay                ((uint8_t)0x5E)
+#define PPSMC_HasDisplay                    ((uint8_t)0x5E)
 #define PPSMC_MSG_UVDPowerOFF               ((uint8_t)0x60)
 #define PPSMC_MSG_UVDPowerON                ((uint8_t)0x61)
 #define PPSMC_MSG_EnableULV                 ((uint8_t)0x62)
@@ -108,6 +110,7 @@ typedef uint8_t PPSMC_Result;
 #define PPSMC_MSG_DisableDTE                ((uint8_t)0x88)
 #define PPSMC_MSG_ThrottleOVRDSCLKDS        ((uint8_t)0x96)
 #define PPSMC_MSG_CancelThrottleOVRDSCLKDS  ((uint8_t)0x97)
+#define PPSMC_MSG_EnableACDCGPIOInterrupt   ((uint16_t) 0x149)
 
 /* CI/KV/KB */
 #define PPSMC_MSG_UVDDPM_SetEnabledMask       ((uint16_t) 0x12D)
@@ -161,6 +164,7 @@ typedef uint8_t PPSMC_Result;
 #define PPSMC_MSG_MASTER_DeepSleep_OFF        ((uint16_t) 0x190)
 #define PPSMC_MSG_Remove_DC_Clamp             ((uint16_t) 0x191)
 #define PPSMC_MSG_SetFanPwmMax                ((uint16_t) 0x19A)
+#define PPSMC_MSG_SetFanRpmMax                ((uint16_t) 0x205)
 
 #define PPSMC_MSG_ENABLE_THERMAL_DPM          ((uint16_t) 0x19C)
 #define PPSMC_MSG_DISABLE_THERMAL_DPM         ((uint16_t) 0x19D)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index f4c3130d3fdb..1351c7e834a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -105,6 +105,15 @@ static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev)
 	}
 }
 
+static void sdma_v2_4_free_microcode(struct amdgpu_device *adev)
+{
+	int i;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		release_firmware(adev->sdma.instance[i].fw);
+		adev->sdma.instance[i].fw = NULL;
+	}
+}
+
 /**
  * sdma_v2_4_init_microcode - load ucode images from disk
  *
@@ -246,19 +255,6 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
 				   unsigned vm_id, bool ctx_switch)
 {
 	u32 vmid = vm_id & 0xf;
-	u32 next_rptr = ring->wptr + 5;
-
-	while ((next_rptr & 7) != 2)
-		next_rptr++;
-
-	next_rptr += 6;
-
-	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
-			  SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
-	amdgpu_ring_write(ring, lower_32_bits(ring->next_rptr_gpu_addr) & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
-	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
-	amdgpu_ring_write(ring, next_rptr);
 
 	/* IB packet must end on a 8 DW boundary */
 	sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
@@ -397,7 +393,7 @@ static void sdma_v2_4_enable(struct amdgpu_device *adev, bool enable)
 	u32 f32_cntl;
 	int i;
 
-	if (enable == false) {
+	if (!enable) {
 		sdma_v2_4_gfx_stop(adev);
 		sdma_v2_4_rlc_stop(adev);
 	}
@@ -461,6 +457,8 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
 		/* Initialize the ring buffer's read and write pointers */
 		WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
 		WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
 
 		/* set the wb address whether it's enabled or not */
 		WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
@@ -489,7 +487,11 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 
 		ring->ready = true;
+	}
 
+	sdma_v2_4_enable(adev, true);
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		ring = &adev->sdma.instance[i].ring;
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
 			ring->ready = false;
@@ -565,23 +567,25 @@ static int sdma_v2_4_start(struct amdgpu_device *adev)
 {
 	int r;
 
-	if (!adev->firmware.smu_load) {
-		r = sdma_v2_4_load_microcode(adev);
-		if (r)
-			return r;
-	} else {
-		r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
-						AMDGPU_UCODE_ID_SDMA0);
-		if (r)
-			return -EINVAL;
-		r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
-						AMDGPU_UCODE_ID_SDMA1);
-		if (r)
-			return -EINVAL;
+	if (!adev->pp_enabled) {
+		if (!adev->firmware.smu_load) {
+			r = sdma_v2_4_load_microcode(adev);
+			if (r)
+				return r;
+		} else {
+			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
+							AMDGPU_UCODE_ID_SDMA0);
+			if (r)
+				return -EINVAL;
+			r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
+							AMDGPU_UCODE_ID_SDMA1);
+			if (r)
+				return -EINVAL;
+		}
 	}
 
-	/* unhalt the MEs */
-	sdma_v2_4_enable(adev, true);
+	/* halt the engine before programing */
+	sdma_v2_4_enable(adev, false);
 
 	/* start the gfx rings and rlc compute queues */
 	r = sdma_v2_4_gfx_resume(adev);
@@ -664,20 +668,19 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
  * Test a simple IB in the DMA ring (VI).
  * Returns 0 on success, error on failure.
  */
-static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
+static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
 	struct fence *f = NULL;
-	unsigned i;
 	unsigned index;
-	int r;
 	u32 tmp = 0;
 	u64 gpu_addr;
+	long r;
 
 	r = amdgpu_wb_get(adev, &index);
 	if (r) {
-		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
 		return r;
 	}
 
@@ -687,7 +690,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
 	memset(&ib, 0, sizeof(ib));
 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 		goto err0;
 	}
 
@@ -706,28 +709,25 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
 	if (r)
 		goto err1;
 
-	r = fence_wait(f, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+	r = fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out\n");
+		r = -ETIMEDOUT;
 		goto err1;
-	}
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = le32_to_cpu(adev->wb.wb[index]);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-	if (i < adev->usec_timeout) {
-		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
-			 ring->idx, i);
+	} else if (r) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 		goto err1;
+	}
+	tmp = le32_to_cpu(adev->wb.wb[index]);
+	if (tmp == 0xDEADBEEF) {
+		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
 	} else {
 		DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
 		r = -EINVAL;
 	}
 
 err1:
-	fence_put(f);
 	amdgpu_ib_free(adev, &ib, NULL);
 	fence_put(f);
 err0:
@@ -1012,6 +1012,7 @@ static int sdma_v2_4_sw_fini(void *handle)
 	for (i = 0; i < adev->sdma.num_instances; i++)
 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
 
+	sdma_v2_4_free_microcode(adev);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 31d99b0010f7..653ce5ed55ae 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -236,6 +236,15 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev)
 	}
 }
 
+static void sdma_v3_0_free_microcode(struct amdgpu_device *adev)
+{
+	int i;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		release_firmware(adev->sdma.instance[i].fw);
+		adev->sdma.instance[i].fw = NULL;
+	}
+}
+
 /**
  * sdma_v3_0_init_microcode - load ucode images from disk
  *
@@ -406,18 +415,6 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
 				   unsigned vm_id, bool ctx_switch)
 {
 	u32 vmid = vm_id & 0xf;
-	u32 next_rptr = ring->wptr + 5;
-
-	while ((next_rptr & 7) != 2)
-		next_rptr++;
-	next_rptr += 6;
-
-	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
-			  SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
-	amdgpu_ring_write(ring, lower_32_bits(ring->next_rptr_gpu_addr) & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
-	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
-	amdgpu_ring_write(ring, next_rptr);
 
 	/* IB packet must end on a 8 DW boundary */
 	sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
@@ -607,7 +604,7 @@ static void sdma_v3_0_enable(struct amdgpu_device *adev, bool enable)
 	u32 f32_cntl;
 	int i;
 
-	if (enable == false) {
+	if (!enable) {
 		sdma_v3_0_gfx_stop(adev);
 		sdma_v3_0_rlc_stop(adev);
 	}
@@ -672,6 +669,8 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
 		/* Initialize the ring buffer's read and write pointers */
 		WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
 		WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
 
 		/* set the wb address whether it's enabled or not */
 		WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
@@ -711,7 +710,15 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 
 		ring->ready = true;
+	}
+
+	/* unhalt the MEs */
+	sdma_v3_0_enable(adev, true);
+	/* enable sdma ring preemption */
+	sdma_v3_0_ctx_switch_enable(adev, true);
 
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		ring = &adev->sdma.instance[i].ring;
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
 			ring->ready = false;
@@ -804,10 +811,9 @@ static int sdma_v3_0_start(struct amdgpu_device *adev)
 		}
 	}
 
-	/* unhalt the MEs */
-	sdma_v3_0_enable(adev, true);
-	/* enable sdma ring preemption */
-	sdma_v3_0_ctx_switch_enable(adev, true);
+	/* disble sdma engine before programing it */
+	sdma_v3_0_ctx_switch_enable(adev, false);
+	sdma_v3_0_enable(adev, false);
 
 	/* start the gfx rings and rlc compute queues */
 	r = sdma_v3_0_gfx_resume(adev);
@@ -890,20 +896,19 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
  * Test a simple IB in the DMA ring (VI).
  * Returns 0 on success, error on failure.
  */
-static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
+static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
 	struct fence *f = NULL;
-	unsigned i;
 	unsigned index;
-	int r;
 	u32 tmp = 0;
 	u64 gpu_addr;
+	long r;
 
 	r = amdgpu_wb_get(adev, &index);
 	if (r) {
-		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
 		return r;
 	}
 
@@ -913,7 +918,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
 	memset(&ib, 0, sizeof(ib));
 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 		goto err0;
 	}
 
@@ -932,27 +937,24 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
 	if (r)
 		goto err1;
 
-	r = fence_wait(f, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+	r = fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out\n");
+		r = -ETIMEDOUT;
 		goto err1;
-	}
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = le32_to_cpu(adev->wb.wb[index]);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-	if (i < adev->usec_timeout) {
-		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
-			 ring->idx, i);
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 		goto err1;
+	}
+	tmp = le32_to_cpu(adev->wb.wb[index]);
+	if (tmp == 0xDEADBEEF) {
+		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
 	} else {
 		DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
 		r = -EINVAL;
 	}
 err1:
-	fence_put(f);
 	amdgpu_ib_free(adev, &ib, NULL);
 	fence_put(f);
 err0:
@@ -1247,6 +1249,7 @@ static int sdma_v3_0_sw_fini(void *handle)
 	for (i = 0; i < adev->sdma.num_instances; i++)
 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
 
+	sdma_v3_0_free_microcode(adev);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
index b7615cefcac4..f06f6f4dc3a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
@@ -71,6 +71,11 @@ static int tonga_dpm_sw_init(void *handle)
 
 static int tonga_dpm_sw_fini(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	release_firmware(adev->pm.fw);
+	adev->pm.fw = NULL;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
index 083893dd68c0..940de1836f8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
@@ -173,7 +173,7 @@ static int tonga_send_msg_to_smc(struct amdgpu_device *adev, PPSMC_Msg msg)
 {
 	if (!tonga_is_smc_ram_running(adev))
 	{
-		return -EINVAL;;
+		return -EINVAL;
 	}
 
 	if (wait_smu_response(adev)) {
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index f07551476a70..132e613ed674 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -34,6 +34,8 @@
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 
+#include "bif/bif_4_1_d.h"
+
 static void uvd_v4_2_mc_resume(struct amdgpu_device *adev);
 static void uvd_v4_2_init_cg(struct amdgpu_device *adev);
 static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev);
@@ -439,6 +441,32 @@ static void uvd_v4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
 }
 
 /**
+ * uvd_v4_2_ring_emit_hdp_flush - emit an hdp flush
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp flush.
+ */
+static void uvd_v4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
+	amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * uvd_v4_2_ring_hdp_invalidate - emit an hdp invalidate
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp invalidate.
+ */
+static void uvd_v4_2_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
+	amdgpu_ring_write(ring, 1);
+}
+
+/**
  * uvd_v4_2_ring_test_ring - register write test
  *
  * @ring: amdgpu_ring pointer
@@ -499,49 +527,6 @@ static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring,
 }
 
 /**
- * uvd_v4_2_ring_test_ib - test ib execution
- *
- * @ring: amdgpu_ring pointer
- *
- * Test if we can successfully execute an IB
- */
-static int uvd_v4_2_ring_test_ib(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-	struct fence *fence = NULL;
-	int r;
-
-	r = amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to raise UVD clocks (%d).\n", r);
-		return r;
-	}
-
-	r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to get create msg (%d).\n", r);
-		goto error;
-	}
-
-	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
-		goto error;
-	}
-
-	r = fence_wait(fence, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
-		goto error;
-	}
-	DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
-error:
-	fence_put(fence);
-	amdgpu_asic_set_uvd_clocks(adev, 0, 0);
-	return r;
-}
-
-/**
  * uvd_v4_2_mc_resume - memory controller programming
  *
  * @adev: amdgpu_device pointer
@@ -763,10 +748,14 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
 	.parse_cs = amdgpu_uvd_ring_parse_cs,
 	.emit_ib = uvd_v4_2_ring_emit_ib,
 	.emit_fence = uvd_v4_2_ring_emit_fence,
+	.emit_hdp_flush = uvd_v4_2_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = uvd_v4_2_ring_emit_hdp_invalidate,
 	.test_ring = uvd_v4_2_ring_test_ring,
-	.test_ib = uvd_v4_2_ring_test_ib,
+	.test_ib = amdgpu_uvd_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.begin_use = amdgpu_uvd_ring_begin_use,
+	.end_use = amdgpu_uvd_ring_end_use,
 };
 
 static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index e0a76a883d46..101de136ba63 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -31,6 +31,7 @@
 #include "uvd/uvd_5_0_sh_mask.h"
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
+#include "bif/bif_5_0_d.h"
 #include "vi.h"
 
 static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev);
@@ -489,6 +490,32 @@ static void uvd_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
 }
 
 /**
+ * uvd_v5_0_ring_emit_hdp_flush - emit an hdp flush
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp flush.
+ */
+static void uvd_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
+	amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * uvd_v5_0_ring_hdp_invalidate - emit an hdp invalidate
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp invalidate.
+ */
+static void uvd_v5_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
+	amdgpu_ring_write(ring, 1);
+}
+
+/**
  * uvd_v5_0_ring_test_ring - register write test
  *
  * @ring: amdgpu_ring pointer
@@ -550,49 +577,6 @@ static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, ib->length_dw);
 }
 
-/**
- * uvd_v5_0_ring_test_ib - test ib execution
- *
- * @ring: amdgpu_ring pointer
- *
- * Test if we can successfully execute an IB
- */
-static int uvd_v5_0_ring_test_ib(struct amdgpu_ring *ring)
-{
-	struct amdgpu_device *adev = ring->adev;
-	struct fence *fence = NULL;
-	int r;
-
-	r = amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to raise UVD clocks (%d).\n", r);
-		return r;
-	}
-
-	r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to get create msg (%d).\n", r);
-		goto error;
-	}
-
-	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
-		goto error;
-	}
-
-	r = fence_wait(fence, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
-		goto error;
-	}
-	DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
-error:
-	fence_put(fence);
-	amdgpu_asic_set_uvd_clocks(adev, 0, 0);
-	return r;
-}
-
 static bool uvd_v5_0_is_idle(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -815,10 +799,14 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
 	.parse_cs = amdgpu_uvd_ring_parse_cs,
 	.emit_ib = uvd_v5_0_ring_emit_ib,
 	.emit_fence = uvd_v5_0_ring_emit_fence,
+	.emit_hdp_flush = uvd_v5_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = uvd_v5_0_ring_emit_hdp_invalidate,
 	.test_ring = uvd_v5_0_ring_test_ring,
-	.test_ib = uvd_v5_0_ring_test_ib,
+	.test_ib = amdgpu_uvd_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.begin_use = amdgpu_uvd_ring_begin_use,
+	.end_use = amdgpu_uvd_ring_end_use,
 };
 
 static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index c9929d665c01..7f21102bfb99 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -33,6 +33,8 @@
 #include "oss/oss_2_0_sh_mask.h"
 #include "smu/smu_7_1_3_d.h"
 #include "smu/smu_7_1_3_sh_mask.h"
+#include "bif/bif_5_1_d.h"
+#include "gmc/gmc_8_1_d.h"
 #include "vi.h"
 
 static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev);
@@ -385,8 +387,8 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
 	uint32_t mp_swap_cntl;
 	int i, j, r;
 
-	/*disable DPG */
-	WREG32_P(mmUVD_POWER_STATUS, 0, ~(1 << 2));
+	/* disable DPG */
+	WREG32_P(mmUVD_POWER_STATUS, 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
 
 	/* disable byte swapping */
 	lmi_swap_cntl = 0;
@@ -405,17 +407,21 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
 	}
 
 	/* disable interupt */
-	WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
+	WREG32_P(mmUVD_MASTINT_EN, 0, ~UVD_MASTINT_EN__VCPU_EN_MASK);
 
 	/* stall UMC and register bus before resetting VCPU */
-	WREG32_P(mmUVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+	WREG32_P(mmUVD_LMI_CTRL2, UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
 	mdelay(1);
 
 	/* put LMI, VCPU, RBC etc... into reset */
-	WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+	WREG32(mmUVD_SOFT_RESET,
+		UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
 		UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
 	mdelay(5);
 
@@ -424,8 +430,13 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
 	mdelay(5);
 
 	/* initialize UVD memory controller */
-	WREG32(mmUVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
-			     (1 << 21) | (1 << 9) | (1 << 20));
+	WREG32(mmUVD_LMI_CTRL,
+		(0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+		UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+		UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+		UVD_LMI_CTRL__REQ_MODE_MASK |
+		UVD_LMI_CTRL__DISABLE_ON_FWV_FAIL_MASK);
 
 #ifdef __BIG_ENDIAN
 	/* swap (8 in 32) RB and IB */
@@ -447,10 +458,10 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
 	mdelay(5);
 
 	/* enable VCPU clock */
-	WREG32(mmUVD_VCPU_CNTL,  1 << 9);
+	WREG32(mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK);
 
 	/* enable UMC */
-	WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8));
+	WREG32_P(mmUVD_LMI_CTRL2, 0, ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
 
 	/* boot up the VCPU */
 	WREG32(mmUVD_SOFT_RESET, 0);
@@ -484,10 +495,12 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
 		return r;
 	}
 	/* enable master interrupt */
-	WREG32_P(mmUVD_MASTINT_EN, 3 << 1, ~(3 << 1));
+	WREG32_P(mmUVD_MASTINT_EN,
+		(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+		~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
 
 	/* clear the bit 4 of UVD_STATUS */
-	WREG32_P(mmUVD_STATUS, 0, ~(2 << 1));
+	WREG32_P(mmUVD_STATUS, 0, ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
 
 	rb_bufsz = order_base_2(ring->ring_size);
 	tmp = 0;
@@ -581,6 +594,32 @@ static void uvd_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
 }
 
 /**
+ * uvd_v6_0_ring_emit_hdp_flush - emit an hdp flush
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp flush.
+ */
+static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
+	amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * uvd_v6_0_ring_hdp_invalidate - emit an hdp invalidate
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp invalidate.
+ */
+static void uvd_v6_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
+	amdgpu_ring_write(ring, 1);
+}
+
+/**
  * uvd_v6_0_ring_test_ring - register write test
  *
  * @ring: amdgpu_ring pointer
@@ -634,6 +673,9 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
 				  struct amdgpu_ib *ib,
 				  unsigned vm_id, bool ctx_switch)
 {
+	amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID, 0));
+	amdgpu_ring_write(ring, vm_id);
+
 	amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
 	amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH, 0));
@@ -642,39 +684,55 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, ib->length_dw);
 }
 
-/**
- * uvd_v6_0_ring_test_ib - test ib execution
- *
- * @ring: amdgpu_ring pointer
- *
- * Test if we can successfully execute an IB
- */
-static int uvd_v6_0_ring_test_ib(struct amdgpu_ring *ring)
+static void uvd_v6_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+					 unsigned vm_id, uint64_t pd_addr)
 {
-	struct fence *fence = NULL;
-	int r;
+	uint32_t reg;
 
-	r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to get create msg (%d).\n", r);
-		goto error;
-	}
+	if (vm_id < 8)
+		reg = mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id;
+	else
+		reg = mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8;
 
-	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
-	if (r) {
-		DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
-		goto error;
-	}
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+	amdgpu_ring_write(ring, reg << 2);
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+	amdgpu_ring_write(ring, pd_addr >> 12);
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+	amdgpu_ring_write(ring, 0x8);
 
-	r = fence_wait(fence, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
-		goto error;
-	}
-	DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
-error:
-	fence_put(fence);
-	return r;
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+	amdgpu_ring_write(ring, 1 << vm_id);
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+	amdgpu_ring_write(ring, 0x8);
+
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+	amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+	amdgpu_ring_write(ring, 0);
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8, 0));
+	amdgpu_ring_write(ring, 1 << vm_id); /* mask */
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+	amdgpu_ring_write(ring, 0xC);
+}
+
+static void uvd_v6_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+	uint32_t seq = ring->fence_drv.sync_seq;
+	uint64_t addr = ring->fence_drv.gpu_addr;
+
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0));
+	amdgpu_ring_write(ring, lower_32_bits(addr));
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_DATA1, 0));
+	amdgpu_ring_write(ring, upper_32_bits(addr));
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH8, 0));
+	amdgpu_ring_write(ring, 0xffffffff); /* mask */
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GP_SCRATCH9, 0));
+	amdgpu_ring_write(ring, seq);
+	amdgpu_ring_write(ring, PACKET0(mmUVD_GPCOM_VCPU_CMD, 0));
+	amdgpu_ring_write(ring, 0xE);
 }
 
 static bool uvd_v6_0_is_idle(void *handle)
@@ -847,7 +905,8 @@ static int uvd_v6_0_set_clockgating_state(void *handle,
 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
 	static int curstate = -1;
 
-	if (adev->asic_type == CHIP_FIJI)
+	if (adev->asic_type == CHIP_FIJI ||
+			adev->asic_type == CHIP_POLARIS10)
 		uvd_v6_set_bypass_mode(adev, enable);
 
 	if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
@@ -912,22 +971,51 @@ const struct amd_ip_funcs uvd_v6_0_ip_funcs = {
 	.set_powergating_state = uvd_v6_0_set_powergating_state,
 };
 
-static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = {
+static const struct amdgpu_ring_funcs uvd_v6_0_ring_phys_funcs = {
 	.get_rptr = uvd_v6_0_ring_get_rptr,
 	.get_wptr = uvd_v6_0_ring_get_wptr,
 	.set_wptr = uvd_v6_0_ring_set_wptr,
 	.parse_cs = amdgpu_uvd_ring_parse_cs,
 	.emit_ib = uvd_v6_0_ring_emit_ib,
 	.emit_fence = uvd_v6_0_ring_emit_fence,
+	.emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = uvd_v6_0_ring_emit_hdp_invalidate,
+	.test_ring = uvd_v6_0_ring_test_ring,
+	.test_ib = amdgpu_uvd_ring_test_ib,
+	.insert_nop = amdgpu_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.begin_use = amdgpu_uvd_ring_begin_use,
+	.end_use = amdgpu_uvd_ring_end_use,
+};
+
+static const struct amdgpu_ring_funcs uvd_v6_0_ring_vm_funcs = {
+	.get_rptr = uvd_v6_0_ring_get_rptr,
+	.get_wptr = uvd_v6_0_ring_get_wptr,
+	.set_wptr = uvd_v6_0_ring_set_wptr,
+	.parse_cs = NULL,
+	.emit_ib = uvd_v6_0_ring_emit_ib,
+	.emit_fence = uvd_v6_0_ring_emit_fence,
+	.emit_vm_flush = uvd_v6_0_ring_emit_vm_flush,
+	.emit_pipeline_sync = uvd_v6_0_ring_emit_pipeline_sync,
+	.emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = uvd_v6_0_ring_emit_hdp_invalidate,
 	.test_ring = uvd_v6_0_ring_test_ring,
-	.test_ib = uvd_v6_0_ring_test_ib,
+	.test_ib = amdgpu_uvd_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.begin_use = amdgpu_uvd_ring_begin_use,
+	.end_use = amdgpu_uvd_ring_end_use,
 };
 
 static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev)
 {
-	adev->uvd.ring.funcs = &uvd_v6_0_ring_funcs;
+	if (adev->asic_type >= CHIP_POLARIS10) {
+		adev->uvd.ring.funcs = &uvd_v6_0_ring_vm_funcs;
+		DRM_INFO("UVD is enabled in VM mode\n");
+	} else {
+		adev->uvd.ring.funcs = &uvd_v6_0_ring_phys_funcs;
+		DRM_INFO("UVD is enabled in physical mode\n");
+	}
 }
 
 static const struct amdgpu_irq_src_funcs uvd_v6_0_irq_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 45d92aceb485..80a37a602181 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -594,6 +594,8 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
 	.test_ib = amdgpu_vce_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.begin_use = amdgpu_vce_ring_begin_use,
+	.end_use = amdgpu_vce_ring_end_use,
 };
 
 static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 30e8099e94c5..c271abffd8dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -43,6 +43,7 @@
 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR0	0x8616
 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR1	0x8617
 #define mmVCE_LMI_VCPU_CACHE_40BIT_BAR2	0x8618
+#define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
 
 #define VCE_V3_0_FW_SIZE	(384 * 1024)
 #define VCE_V3_0_STACK_SIZE	(64 * 1024)
@@ -51,6 +52,7 @@
 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx);
 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev);
 static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev);
+static int vce_v3_0_wait_for_idle(void *handle);
 
 /**
  * vce_v3_0_ring_get_rptr - get read pointer
@@ -205,6 +207,32 @@ static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
 	vce_v3_0_override_vce_clock_gating(adev, false);
 }
 
+static int vce_v3_0_firmware_loaded(struct amdgpu_device *adev)
+{
+	int i, j;
+
+	for (i = 0; i < 10; ++i) {
+		for (j = 0; j < 100; ++j) {
+			uint32_t status = RREG32(mmVCE_STATUS);
+
+			if (status & VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK)
+				return 0;
+			mdelay(10);
+		}
+
+		DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
+		WREG32_P(mmVCE_SOFT_RESET,
+			VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
+			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+		mdelay(10);
+		WREG32_P(mmVCE_SOFT_RESET, 0,
+			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+		mdelay(10);
+	}
+
+	return -ETIMEDOUT;
+}
+
 /**
  * vce_v3_0_start - start VCE block
  *
@@ -215,11 +243,24 @@ static void vce_v3_0_set_vce_sw_clock_gating(struct amdgpu_device *adev,
 static int vce_v3_0_start(struct amdgpu_device *adev)
 {
 	struct amdgpu_ring *ring;
-	int idx, i, j, r;
+	int idx, r;
+
+	ring = &adev->vce.ring[0];
+	WREG32(mmVCE_RB_RPTR, ring->wptr);
+	WREG32(mmVCE_RB_WPTR, ring->wptr);
+	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
+	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
+	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
+
+	ring = &adev->vce.ring[1];
+	WREG32(mmVCE_RB_RPTR2, ring->wptr);
+	WREG32(mmVCE_RB_WPTR2, ring->wptr);
+	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
+	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
+	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
 
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (idx = 0; idx < 2; ++idx) {
-
 		if (adev->vce.harvest_config & (1 << idx))
 			continue;
 
@@ -233,48 +274,24 @@ static int vce_v3_0_start(struct amdgpu_device *adev)
 
 		vce_v3_0_mc_resume(adev, idx);
 
-		/* set BUSY flag */
-		WREG32_P(mmVCE_STATUS, 1, ~1);
+		WREG32_P(mmVCE_STATUS, VCE_STATUS__JOB_BUSY_MASK,
+		         ~VCE_STATUS__JOB_BUSY_MASK);
+
 		if (adev->asic_type >= CHIP_STONEY)
 			WREG32_P(mmVCE_VCPU_CNTL, 1, ~0x200001);
 		else
 			WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK,
 				~VCE_VCPU_CNTL__CLK_EN_MASK);
 
-		WREG32_P(mmVCE_SOFT_RESET,
-			 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
-			 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
-
-		mdelay(100);
-
 		WREG32_P(mmVCE_SOFT_RESET, 0,
 			~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
 
-		for (i = 0; i < 10; ++i) {
-			uint32_t status;
-			for (j = 0; j < 100; ++j) {
-				status = RREG32(mmVCE_STATUS);
-				if (status & 2)
-					break;
-				mdelay(10);
-			}
-			r = 0;
-			if (status & 2)
-				break;
-
-			DRM_ERROR("VCE not responding, trying to reset the ECPU!!!\n");
-			WREG32_P(mmVCE_SOFT_RESET,
-				VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
-				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
-			mdelay(10);
-			WREG32_P(mmVCE_SOFT_RESET, 0,
-				~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
-			mdelay(10);
-			r = -1;
-		}
+		mdelay(100);
+
+		r = vce_v3_0_firmware_loaded(adev);
 
 		/* clear BUSY flag */
-		WREG32_P(mmVCE_STATUS, 0, ~1);
+		WREG32_P(mmVCE_STATUS, 0, ~VCE_STATUS__JOB_BUSY_MASK);
 
 		/* Set Clock-Gating off */
 		if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
@@ -290,19 +307,46 @@ static int vce_v3_0_start(struct amdgpu_device *adev)
 	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
-	ring = &adev->vce.ring[0];
-	WREG32(mmVCE_RB_RPTR, ring->wptr);
-	WREG32(mmVCE_RB_WPTR, ring->wptr);
-	WREG32(mmVCE_RB_BASE_LO, ring->gpu_addr);
-	WREG32(mmVCE_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
-	WREG32(mmVCE_RB_SIZE, ring->ring_size / 4);
+	return 0;
+}
 
-	ring = &adev->vce.ring[1];
-	WREG32(mmVCE_RB_RPTR2, ring->wptr);
-	WREG32(mmVCE_RB_WPTR2, ring->wptr);
-	WREG32(mmVCE_RB_BASE_LO2, ring->gpu_addr);
-	WREG32(mmVCE_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
-	WREG32(mmVCE_RB_SIZE2, ring->ring_size / 4);
+static int vce_v3_0_stop(struct amdgpu_device *adev)
+{
+	int idx;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+	for (idx = 0; idx < 2; ++idx) {
+		if (adev->vce.harvest_config & (1 << idx))
+			continue;
+
+		if (idx == 0)
+			WREG32_P(mmGRBM_GFX_INDEX, 0,
+				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
+		else
+			WREG32_P(mmGRBM_GFX_INDEX,
+				GRBM_GFX_INDEX__VCE_INSTANCE_MASK,
+				~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
+
+		if (adev->asic_type >= CHIP_STONEY)
+			WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x200001);
+		else
+			WREG32_P(mmVCE_VCPU_CNTL, 0,
+				~VCE_VCPU_CNTL__CLK_EN_MASK);
+		/* hold on ECPU */
+		WREG32_P(mmVCE_SOFT_RESET,
+			 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
+			 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
+
+		/* clear BUSY flag */
+		WREG32_P(mmVCE_STATUS, 0, ~VCE_STATUS__JOB_BUSY_MASK);
+
+		/* Set Clock-Gating off */
+		if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
+			vce_v3_0_set_vce_sw_clock_gating(adev, false);
+	}
+
+	WREG32_P(mmGRBM_GFX_INDEX, 0, ~GRBM_GFX_INDEX__VCE_INSTANCE_MASK);
+	mutex_unlock(&adev->grbm_idx_mutex);
 
 	return 0;
 }
@@ -441,7 +485,14 @@ static int vce_v3_0_hw_init(void *handle)
 
 static int vce_v3_0_hw_fini(void *handle)
 {
-	return 0;
+	int r;
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	r = vce_v3_0_wait_for_idle(handle);
+	if (r)
+		return r;
+
+	return vce_v3_0_stop(adev);
 }
 
 static int vce_v3_0_suspend(void *handle)
@@ -604,6 +655,18 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev,
 	return 0;
 }
 
+static void vce_v3_set_bypass_mode(struct amdgpu_device *adev, bool enable)
+{
+	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
+
+	if (enable)
+		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
+	else
+		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
+
+	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
+}
+
 static int vce_v3_0_set_clockgating_state(void *handle,
 					  enum amd_clockgating_state state)
 {
@@ -611,6 +674,9 @@ static int vce_v3_0_set_clockgating_state(void *handle,
 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
 	int i;
 
+	if (adev->asic_type == CHIP_POLARIS10)
+		vce_v3_set_bypass_mode(adev, enable);
+
 	if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
 		return 0;
 
@@ -701,6 +767,8 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = {
 	.test_ib = amdgpu_vce_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.begin_use = amdgpu_vce_ring_begin_use,
+	.end_use = amdgpu_vce_ring_end_use,
 };
 
 static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 2c88d0b66cf3..03a31c53aec3 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -203,6 +203,29 @@ static void vi_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 	spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
 }
 
+static u32 vi_gc_cac_rreg(struct amdgpu_device *adev, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&adev->gc_cac_idx_lock, flags);
+	WREG32(mmGC_CAC_IND_INDEX, (reg));
+	r = RREG32(mmGC_CAC_IND_DATA);
+	spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags);
+	return r;
+}
+
+static void vi_gc_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&adev->gc_cac_idx_lock, flags);
+	WREG32(mmGC_CAC_IND_INDEX, (reg));
+	WREG32(mmGC_CAC_IND_DATA, (v));
+	spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags);
+}
+
+
 static const u32 tonga_mgcg_cgcg_init[] =
 {
 	mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00600100,
@@ -421,6 +444,20 @@ static bool vi_read_bios_from_rom(struct amdgpu_device *adev,
 	return true;
 }
 
+static u32 vi_get_virtual_caps(struct amdgpu_device *adev)
+{
+	u32 caps = 0;
+	u32 reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER);
+
+	if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, IOV_ENABLE))
+		caps |= AMDGPU_VIRT_CAPS_SRIOV_EN;
+
+	if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, FUNC_IDENTIFIER))
+		caps |= AMDGPU_VIRT_CAPS_IS_VF;
+
+	return caps;
+}
+
 static const struct amdgpu_allowed_register_entry tonga_allowed_read_registers[] = {
 	{mmGB_MACROTILE_MODE7, true},
 };
@@ -519,12 +556,12 @@ static uint32_t vi_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
 
 	mutex_lock(&adev->grbm_idx_mutex);
 	if (se_num != 0xffffffff || sh_num != 0xffffffff)
-		gfx_v8_0_select_se_sh(adev, se_num, sh_num);
+		amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
 
 	val = RREG32(reg_offset);
 
 	if (se_num != 0xffffffff || sh_num != 0xffffffff)
-		gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 	return val;
 }
@@ -583,7 +620,7 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num,
 	return -EINVAL;
 }
 
-static void vi_gpu_pci_config_reset(struct amdgpu_device *adev)
+static int vi_gpu_pci_config_reset(struct amdgpu_device *adev)
 {
 	u32 i;
 
@@ -598,11 +635,14 @@ static void vi_gpu_pci_config_reset(struct amdgpu_device *adev)
 
 	/* wait for asic to come out of reset */
 	for (i = 0; i < adev->usec_timeout; i++) {
-		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff)
-			break;
+		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff) {
+			/* enable BM */
+			pci_set_master(adev->pdev);
+			return 0;
+		}
 		udelay(1);
 	}
-
+	return -EINVAL;
 }
 
 static void vi_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hung)
@@ -628,13 +668,15 @@ static void vi_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hun
  */
 static int vi_asic_reset(struct amdgpu_device *adev)
 {
+	int r;
+
 	vi_set_bios_scratch_engine_hung(adev, true);
 
-	vi_gpu_pci_config_reset(adev);
+	r = vi_gpu_pci_config_reset(adev);
 
 	vi_set_bios_scratch_engine_hung(adev, false);
 
-	return 0;
+	return r;
 }
 
 static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
@@ -1118,9 +1160,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
 	.get_xclk = &vi_get_xclk,
 	.set_uvd_clocks = &vi_set_uvd_clocks,
 	.set_vce_clocks = &vi_set_vce_clocks,
-	/* these should be moved to their own ip modules */
-	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
-	.wait_for_mc_idle = &gmc_v8_0_mc_wait_for_idle,
+	.get_virtual_caps = &vi_get_virtual_caps,
 };
 
 static int vi_common_early_init(void *handle)
@@ -1141,6 +1181,8 @@ static int vi_common_early_init(void *handle)
 	adev->uvd_ctx_wreg = &vi_uvd_ctx_wreg;
 	adev->didt_rreg = &vi_didt_rreg;
 	adev->didt_wreg = &vi_didt_wreg;
+	adev->gc_cac_rreg = &vi_gc_cac_rreg;
+	adev->gc_cac_wreg = &vi_gc_cac_wreg;
 
 	adev->asic_funcs = &vi_asic_funcs;
 
@@ -1214,12 +1256,18 @@ static int vi_common_early_init(void *handle)
 		adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG |
 			AMD_CG_SUPPORT_GFX_MGCG |
 			AMD_CG_SUPPORT_GFX_MGLS |
+			AMD_CG_SUPPORT_GFX_RLC_LS |
+			AMD_CG_SUPPORT_GFX_CP_LS |
+			AMD_CG_SUPPORT_GFX_CGTS |
+			AMD_CG_SUPPORT_GFX_MGLS |
+			AMD_CG_SUPPORT_GFX_CGTS_LS |
+			AMD_CG_SUPPORT_GFX_CGCG |
+			AMD_CG_SUPPORT_GFX_CGLS |
 			AMD_CG_SUPPORT_BIF_LS |
 			AMD_CG_SUPPORT_HDP_MGCG |
 			AMD_CG_SUPPORT_HDP_LS |
 			AMD_CG_SUPPORT_SDMA_MGCG |
 			AMD_CG_SUPPORT_SDMA_LS;
-		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x1;
 		break;
 	default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index ac005796b71c..4f3849ac8c07 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -63,13 +63,12 @@ static struct kfd_process *create_process(const struct task_struct *thread);
 void kfd_process_create_wq(void)
 {
 	if (!kfd_process_wq)
-		kfd_process_wq = create_workqueue("kfd_process_wq");
+		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
 }
 
 void kfd_process_destroy_wq(void)
 {
 	if (kfd_process_wq) {
-		flush_workqueue(kfd_process_wq);
 		destroy_workqueue(kfd_process_wq);
 		kfd_process_wq = NULL;
 	}
@@ -242,13 +241,19 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 	pqm_uninit(&p->pqm);
 
 	/* Iterate over all process device data structure and check
-	 * if we should reset all wavefronts */
-	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
+	 * if we should delete debug managers and reset all wavefronts
+	 */
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+		if ((pdd->dev->dbgmgr) &&
+				(pdd->dev->dbgmgr->pasid == p->pasid))
+			kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
+
 		if (pdd->reset_wavefronts) {
 			pr_warn("amdkfd: Resetting all wave fronts\n");
 			dbgdev_wave_reset_wavefronts(pdd->dev, p);
 			pdd->reset_wavefronts = false;
 		}
+	}
 
 	mutex_unlock(&p->mutex);
 
@@ -324,6 +329,7 @@ err_process_pqm_init:
 	synchronize_rcu();
 	mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
 err_mmu_notifier:
+	mutex_destroy(&process->mutex);
 	kfd_pasid_free(process->pasid);
 err_alloc_pasid:
 	kfree(process->queues);
@@ -404,42 +410,52 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
 
 	idx = srcu_read_lock(&kfd_processes_srcu);
 
+	/*
+	 * Look for the process that matches the pasid. If there is no such
+	 * process, we either released it in amdkfd's own notifier, or there
+	 * is a bug. Unfortunately, there is no way to tell...
+	 */
 	hash_for_each_rcu(kfd_processes_table, i, p, kfd_processes)
-		if (p->pasid == pasid)
-			break;
+		if (p->pasid == pasid) {
 
-	srcu_read_unlock(&kfd_processes_srcu, idx);
+			srcu_read_unlock(&kfd_processes_srcu, idx);
 
-	BUG_ON(p->pasid != pasid);
+			pr_debug("Unbinding process %d from IOMMU\n", pasid);
 
-	mutex_lock(&p->mutex);
+			mutex_lock(&p->mutex);
 
-	if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
-		kfd_dbgmgr_destroy(dev->dbgmgr);
+			if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
+				kfd_dbgmgr_destroy(dev->dbgmgr);
 
-	pqm_uninit(&p->pqm);
+			pqm_uninit(&p->pqm);
 
-	pdd = kfd_get_process_device_data(dev, p);
+			pdd = kfd_get_process_device_data(dev, p);
 
-	if (!pdd) {
-		mutex_unlock(&p->mutex);
-		return;
-	}
+			if (!pdd) {
+				mutex_unlock(&p->mutex);
+				return;
+			}
 
-	if (pdd->reset_wavefronts) {
-		dbgdev_wave_reset_wavefronts(pdd->dev, p);
-		pdd->reset_wavefronts = false;
-	}
+			if (pdd->reset_wavefronts) {
+				dbgdev_wave_reset_wavefronts(pdd->dev, p);
+				pdd->reset_wavefronts = false;
+			}
 
-	/*
-	 * Just mark pdd as unbound, because we still need it to call
-	 * amd_iommu_unbind_pasid() in when the process exits.
-	 * We don't call amd_iommu_unbind_pasid() here
-	 * because the IOMMU called us.
-	 */
-	pdd->bound = false;
+			/*
+			 * Just mark pdd as unbound, because we still need it
+			 * to call amd_iommu_unbind_pasid() in when the
+			 * process exits.
+			 * We don't call amd_iommu_unbind_pasid() here
+			 * because the IOMMU called us.
+			 */
+			pdd->bound = false;
 
-	mutex_unlock(&p->mutex);
+			mutex_unlock(&p->mutex);
+
+			return;
+		}
+
+	srcu_read_unlock(&kfd_processes_srcu, idx);
 }
 
 struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 74909e72a009..884c96f50c3d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -666,7 +666,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 			dev->node_props.simd_count);
 
 	if (dev->mem_bank_count < dev->node_props.mem_banks_count) {
-		pr_warn("kfd: mem_banks_count truncated from %d to %d\n",
+		pr_info_once("kfd: mem_banks_count truncated from %d to %d\n",
 				dev->node_props.mem_banks_count,
 				dev->mem_bank_count);
 		sysfs_show_32bit_prop(buffer, "mem_banks_count",
diff --git a/drivers/gpu/drm/amd/include/amd_pcie.h b/drivers/gpu/drm/amd/include/amd_pcie.h
index 7c2a916c1e63..5eb895fd98bf 100644
--- a/drivers/gpu/drm/amd/include/amd_pcie.h
+++ b/drivers/gpu/drm/amd/include/amd_pcie.h
@@ -37,6 +37,13 @@
 #define CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_MASK   0x0000FFFF
 #define CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_SHIFT  0
 
+/* gen: chipset 1/2, asic 1/2/3 */
+#define AMDGPU_DEFAULT_PCIE_GEN_MASK (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 \
+				      | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 \
+				      | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 \
+				      | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 \
+				      | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3)
+
 /* Following flags shows PCIe lane width switch supported in driver which are decided by chipset and ASIC */
 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X1          0x00010000
 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X2          0x00020000
@@ -47,4 +54,11 @@
 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X32         0x00400000
 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT       16
 
+/* 1/2/4/8/16 lanes */
+#define AMDGPU_DEFAULT_PCIE_MLW_MASK (CAIL_PCIE_LINK_WIDTH_SUPPORT_X1 \
+				      | CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 \
+				      | CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 \
+				      | CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 \
+				      | CAIL_PCIE_LINK_WIDTH_SUPPORT_X16)
+
 #endif
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 6080951d539d..a74a0d2ff1ca 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -26,15 +26,6 @@
 #define AMD_MAX_USEC_TIMEOUT		100000  /* 100 ms */
 
 /*
-* Supported GPU families (aligned with amdgpu_drm.h)
-*/
-#define AMD_FAMILY_UNKNOWN              0
-#define AMD_FAMILY_CI                   120 /* Bonaire, Hawaii */
-#define AMD_FAMILY_KV                   125 /* Kaveri, Kabini, Mullins */
-#define AMD_FAMILY_VI                   130 /* Iceland, Tonga */
-#define AMD_FAMILY_CZ                   135 /* Carrizo */
-
-/*
  * Supported ASIC types
  */
 enum amd_asic_type {
@@ -120,6 +111,8 @@ enum amd_powergating_state {
 #define AMD_PG_SUPPORT_SDMA			(1 << 8)
 #define AMD_PG_SUPPORT_ACP			(1 << 9)
 #define AMD_PG_SUPPORT_SAMU			(1 << 10)
+#define AMD_PG_SUPPORT_GFX_QUICK_MG		(1 << 11)
+#define AMD_PG_SUPPORT_GFX_PIPELINE		(1 << 12)
 
 enum amd_pm_state_type {
 	/* not used for dpm */
@@ -157,6 +150,7 @@ struct amd_ip_funcs {
 	int (*hw_init)(void *handle);
 	/* tears down the hw state */
 	int (*hw_fini)(void *handle);
+	void (*late_fini)(void *handle);
 	/* handles IP specific hw/sw changes for suspend */
 	int (*suspend)(void *handle);
 	/* handles IP specific hw/sw changes for resume */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_d.h
index 293329719bba..809759f7bb81 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/bif/bif_5_0_d.h
@@ -27,6 +27,7 @@
 #define mmMM_INDEX                                                              0x0
 #define mmMM_INDEX_HI                                                           0x6
 #define mmMM_DATA                                                               0x1
+#define mmCC_BIF_BX_STRAP2							0x152A
 #define mmBIF_MM_INDACCESS_CNTL                                                 0x1500
 #define mmBIF_DOORBELL_APER_EN                                                  0x1501
 #define mmBUS_CNTL                                                              0x1508
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h
index ebaf67bb1589..90ff7c8a6011 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h
@@ -2823,4 +2823,7 @@
 #define mmDC_EDC_CSINVOC_CNT                                                    0x3192
 #define mmDC_EDC_RESTORE_CNT                                                    0x3193
 
+#define mmGC_CAC_IND_INDEX                                                      0x129a
+#define mmGC_CAC_IND_DATA                                                       0x129b
+
 #endif /* GFX_8_0_D_H */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_sh_mask.h
index 7d722458d9f5..4070ca3a68eb 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_sh_mask.h
@@ -8730,8 +8730,6 @@
 #define RLC_GPM_STAT__DYN_CU_POWERING_DOWN__SHIFT 0x10
 #define RLC_GPM_STAT__ABORTED_PD_SEQUENCE_MASK 0x20000
 #define RLC_GPM_STAT__ABORTED_PD_SEQUENCE__SHIFT 0x11
-#define RLC_GPM_STAT__RESERVED_MASK 0xfc0000
-#define RLC_GPM_STAT__RESERVED__SHIFT 0x12
 #define RLC_GPM_STAT__PG_ERROR_STATUS_MASK 0xff000000
 #define RLC_GPM_STAT__PG_ERROR_STATUS__SHIFT 0x18
 #define RLC_GPU_CLOCK_32_RES_SEL__RES_SEL_MASK 0x3f
@@ -8764,8 +8762,10 @@
 #define RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE__SHIFT 0x12
 #define RLC_PG_CNTL__SMU_HANDSHAKE_ENABLE_MASK 0x80000
 #define RLC_PG_CNTL__SMU_HANDSHAKE_ENABLE__SHIFT 0x13
-#define RLC_PG_CNTL__RESERVED1_MASK 0xf00000
-#define RLC_PG_CNTL__RESERVED1__SHIFT 0x14
+#define RLC_PG_CNTL__QUICK_PG_ENABLE_MASK 0x100000
+#define RLC_PG_CNTL__QUICK_PG_ENABLE__SHIFT 0x14
+#define RLC_PG_CNTL__RESERVED1_MASK 0xe00000
+#define RLC_PG_CNTL__RESERVED1__SHIFT 0x15
 #define RLC_GPM_THREAD_PRIORITY__THREAD0_PRIORITY_MASK 0xff
 #define RLC_GPM_THREAD_PRIORITY__THREAD0_PRIORITY__SHIFT 0x0
 #define RLC_GPM_THREAD_PRIORITY__THREAD1_PRIORITY_MASK 0xff00
@@ -9102,8 +9102,6 @@
 #define RLC_GPM_LOG_CONT__CONT__SHIFT 0x0
 #define RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK 0xff
 #define RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT 0x0
-#define RLC_PG_DELAY_3__RESERVED_MASK 0xffffff00
-#define RLC_PG_DELAY_3__RESERVED__SHIFT 0x8
 #define RLC_GPM_INT_DISABLE_TH0__DISABLE_MASK 0xffffffff
 #define RLC_GPM_INT_DISABLE_TH0__DISABLE__SHIFT 0x0
 #define RLC_GPM_INT_DISABLE_TH1__DISABLE_MASK 0xffffffff
@@ -9124,14 +9122,8 @@
 #define RLC_SRM_DEBUG_SELECT__RESERVED__SHIFT 0x8
 #define RLC_SRM_DEBUG__DATA_MASK 0xffffffff
 #define RLC_SRM_DEBUG__DATA__SHIFT 0x0
-#define RLC_SRM_ARAM_ADDR__ADDR_MASK 0x3ff
-#define RLC_SRM_ARAM_ADDR__ADDR__SHIFT 0x0
-#define RLC_SRM_ARAM_ADDR__RESERVED_MASK 0xfffffc00
-#define RLC_SRM_ARAM_ADDR__RESERVED__SHIFT 0xa
 #define RLC_SRM_ARAM_DATA__DATA_MASK 0xffffffff
 #define RLC_SRM_ARAM_DATA__DATA__SHIFT 0x0
-#define RLC_SRM_DRAM_ADDR__ADDR_MASK 0x3ff
-#define RLC_SRM_DRAM_ADDR__ADDR__SHIFT 0x0
 #define RLC_SRM_DRAM_ADDR__RESERVED_MASK 0xfffffc00
 #define RLC_SRM_DRAM_ADDR__RESERVED__SHIFT 0xa
 #define RLC_SRM_DRAM_DATA__DATA_MASK 0xffffffff
@@ -17946,8 +17938,6 @@
 #define VGT_TESS_DISTRIBUTION__ACCUM_TRI__SHIFT 0x8
 #define VGT_TESS_DISTRIBUTION__ACCUM_QUAD_MASK 0xff0000
 #define VGT_TESS_DISTRIBUTION__ACCUM_QUAD__SHIFT 0x10
-#define VGT_TESS_DISTRIBUTION__DONUT_SPLIT_MASK 0xff000000
-#define VGT_TESS_DISTRIBUTION__DONUT_SPLIT__SHIFT 0x18
 #define VGT_TF_RING_SIZE__SIZE_MASK 0xffff
 #define VGT_TF_RING_SIZE__SIZE__SHIFT 0x0
 #define VGT_SYS_CONFIG__DUAL_CORE_EN_MASK 0x1
@@ -20502,8 +20492,6 @@
 #define DIDT_SQ_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
 #define DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
 #define DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
-#define DIDT_SQ_CTRL0__UNUSED_0_MASK 0xffffffc0
-#define DIDT_SQ_CTRL0__UNUSED_0__SHIFT 0x6
 #define DIDT_SQ_CTRL1__MIN_POWER_MASK 0xffff
 #define DIDT_SQ_CTRL1__MIN_POWER__SHIFT 0x0
 #define DIDT_SQ_CTRL1__MAX_POWER_MASK 0xffff0000
@@ -20558,8 +20546,6 @@
 #define DIDT_DB_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
 #define DIDT_DB_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
 #define DIDT_DB_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
-#define DIDT_DB_CTRL0__UNUSED_0_MASK 0xffffffc0
-#define DIDT_DB_CTRL0__UNUSED_0__SHIFT 0x6
 #define DIDT_DB_CTRL1__MIN_POWER_MASK 0xffff
 #define DIDT_DB_CTRL1__MIN_POWER__SHIFT 0x0
 #define DIDT_DB_CTRL1__MAX_POWER_MASK 0xffff0000
@@ -20614,8 +20600,6 @@
 #define DIDT_TD_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
 #define DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
 #define DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
-#define DIDT_TD_CTRL0__UNUSED_0_MASK 0xffffffc0
-#define DIDT_TD_CTRL0__UNUSED_0__SHIFT 0x6
 #define DIDT_TD_CTRL1__MIN_POWER_MASK 0xffff
 #define DIDT_TD_CTRL1__MIN_POWER__SHIFT 0x0
 #define DIDT_TD_CTRL1__MAX_POWER_MASK 0xffff0000
@@ -20670,8 +20654,6 @@
 #define DIDT_TCP_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
 #define DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
 #define DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
-#define DIDT_TCP_CTRL0__UNUSED_0_MASK 0xffffffc0
-#define DIDT_TCP_CTRL0__UNUSED_0__SHIFT 0x6
 #define DIDT_TCP_CTRL1__MIN_POWER_MASK 0xffff
 #define DIDT_TCP_CTRL1__MIN_POWER__SHIFT 0x0
 #define DIDT_TCP_CTRL1__MAX_POWER_MASK 0xffff0000
@@ -20726,8 +20708,6 @@
 #define DIDT_DBR_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
 #define DIDT_DBR_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
 #define DIDT_DBR_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
-#define DIDT_DBR_CTRL0__UNUSED_0_MASK 0xffffffc0
-#define DIDT_DBR_CTRL0__UNUSED_0__SHIFT 0x6
 #define DIDT_DBR_CTRL1__MIN_POWER_MASK 0xffff
 #define DIDT_DBR_CTRL1__MIN_POWER__SHIFT 0x0
 #define DIDT_DBR_CTRL1__MAX_POWER_MASK 0xffff0000
@@ -20773,4 +20753,84 @@
 #define DIDT_DBR_WEIGHT8_11__WEIGHT11_MASK 0xff000000
 #define DIDT_DBR_WEIGHT8_11__WEIGHT11__SHIFT 0x18
 
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK    0x00000001
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT  0x00000000
+
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK       0x0000007e
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK       0x00001f80L
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT     0x00000001
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT     0x00000007
+
+#define DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK   0x1fffe000L
+#define DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT 0x0000000d
+
+#define DIDT_SQ_STALL_CTRL__UNUSED_0_MASK                  0xe0000000L
+#define DIDT_SQ_STALL_CTRL__UNUSED_0__SHIFT                0x0000001d
+
+#define DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK       0x00000001L
+#define DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT     0x00000000
+
+#define DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK       0x00007ffeL
+#define DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT     0x00000001
+#define DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK       0x1fff8000L
+#define DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT     0x0000000f
+
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK    0x00000001L
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT  0x00000000
+
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK       0x0000007eL
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK       0x00001f80L
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT     0x00000001
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT     0x00000007
+
+#define DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK   0x1fffe000L
+#define DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT 0x0000000d
+
+#define DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK     0x00000fc0L
+#define DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK     0x0003f000L
+#define DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT   0x00000006
+#define DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT   0x0000000c
+
+#define DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK       0x00000001L
+#define DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK       0x00007ffeL
+#define DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK       0x1fff8000L
+
+#define DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT     0x00000000
+#define DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT     0x00000001
+#define DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT     0x0000000f
+
+#define DIDT_TD_STALL_CTRL__UNUSED_0_MASK                  0xe0000000L
+#define DIDT_TD_STALL_CTRL__UNUSED_0__SHIFT                0x0000001d
+
+#define DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK     0x00000fc0L
+#define DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK     0x0003f000L
+#define DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT   0x00000006
+#define DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT   0x0000000c
+
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK   0x00000001L
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT 0x00000000
+
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK      0x0000007eL
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK      0x00001f80L
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT    0x00000001
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT    0x00000007
+
+#define DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK  0x1fffe000L
+#define DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT 0x0000000d
+
+#define DIDT_TCP_STALL_CTRL__UNUSED_0_MASK                 0xe0000000L
+#define DIDT_TCP_STALL_CTRL__UNUSED_0__SHIFT               0x0000001d
+
+#define DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK      0x00000001L
+#define DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK      0x00007ffeL
+#define DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK      0x1fff8000L
+#define DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT    0x00000000
+#define DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT    0x00000001
+#define DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT    0x0000000f
+
+#define DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK    0x00000fc0L
+#define DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK    0x0003f000L
+#define DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT  0x00000006
+#define DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT  0x0000000c
+
 #endif /* GFX_8_0_SH_MASK_H */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_6_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_6_0_d.h
index 6f6fb34742d2..ec69869c55ff 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_6_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/uvd/uvd_6_0_d.h
@@ -111,6 +111,8 @@
 #define mmUVD_MIF_RECON1_ADDR_CONFIG                                            0x39c5
 #define ixUVD_MIF_SCLR_ADDR_CONFIG                                              0x4
 #define mmUVD_JPEG_ADDR_CONFIG                                                  0x3a1f
+#define mmUVD_GP_SCRATCH8                                                       0x3c0a
+#define mmUVD_GP_SCRATCH9                                                       0x3c0b
 #define mmUVD_GP_SCRATCH4                                                       0x3d38
 
 #endif /* UVD_6_0_D_H */
diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h
index 32f3e345de08..3493da5c8f0e 100644
--- a/drivers/gpu/drm/amd/include/atombios.h
+++ b/drivers/gpu/drm/amd/include/atombios.h
@@ -5538,6 +5538,78 @@ typedef struct  _ATOM_ASIC_PROFILING_INFO_V3_5
   ULONG  ulReserved[12];
 }ATOM_ASIC_PROFILING_INFO_V3_5;
 
+/* for Polars10/11 AVFS parameters */
+typedef struct  _ATOM_ASIC_PROFILING_INFO_V3_6
+{
+  ATOM_COMMON_TABLE_HEADER         asHeader;
+  ULONG  ulMaxVddc;
+  ULONG  ulMinVddc;
+  USHORT usLkgEuseIndex;
+  UCHAR  ucLkgEfuseBitLSB;
+  UCHAR  ucLkgEfuseLength;
+  ULONG  ulLkgEncodeLn_MaxDivMin;
+  ULONG  ulLkgEncodeMax;
+  ULONG  ulLkgEncodeMin;
+  EFUSE_LINEAR_FUNC_PARAM sRoFuse;
+  ULONG  ulEvvDefaultVddc;
+  ULONG  ulEvvNoCalcVddc;
+  ULONG  ulSpeed_Model;
+  ULONG  ulSM_A0;
+  ULONG  ulSM_A1;
+  ULONG  ulSM_A2;
+  ULONG  ulSM_A3;
+  ULONG  ulSM_A4;
+  ULONG  ulSM_A5;
+  ULONG  ulSM_A6;
+  ULONG  ulSM_A7;
+  UCHAR  ucSM_A0_sign;
+  UCHAR  ucSM_A1_sign;
+  UCHAR  ucSM_A2_sign;
+  UCHAR  ucSM_A3_sign;
+  UCHAR  ucSM_A4_sign;
+  UCHAR  ucSM_A5_sign;
+  UCHAR  ucSM_A6_sign;
+  UCHAR  ucSM_A7_sign;
+  ULONG  ulMargin_RO_a;
+  ULONG  ulMargin_RO_b;
+  ULONG  ulMargin_RO_c;
+  ULONG  ulMargin_fixed;
+  ULONG  ulMargin_Fmax_mean;
+  ULONG  ulMargin_plat_mean;
+  ULONG  ulMargin_Fmax_sigma;
+  ULONG  ulMargin_plat_sigma;
+  ULONG  ulMargin_DC_sigma;
+  ULONG  ulLoadLineSlop;
+  ULONG  ulaTDClimitPerDPM[8];
+  ULONG  ulaNoCalcVddcPerDPM[8];
+  ULONG  ulAVFS_meanNsigma_Acontant0;
+  ULONG  ulAVFS_meanNsigma_Acontant1;
+  ULONG  ulAVFS_meanNsigma_Acontant2;
+  USHORT usAVFS_meanNsigma_DC_tol_sigma;
+  USHORT usAVFS_meanNsigma_Platform_mean;
+  USHORT usAVFS_meanNsigma_Platform_sigma;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a0;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a1;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a2;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a0;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a1;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSOFF_m1;
+  USHORT usAVFSGB_FUSE_TABLE_CKSOFF_m2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSOFF_b;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSON_m1;
+  USHORT usAVFSGB_FUSE_TABLE_CKSON_m2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSON_b;
+  USHORT usMaxVoltage_0_25mv;
+  UCHAR  ucEnableGB_VDROOP_TABLE_CKSOFF;
+  UCHAR  ucEnableGB_VDROOP_TABLE_CKSON;
+  UCHAR  ucEnableGB_FUSE_TABLE_CKSOFF;
+  UCHAR  ucEnableGB_FUSE_TABLE_CKSON;
+  USHORT usPSM_Age_ComFactor;
+  UCHAR  ucEnableApplyAVFS_CKS_OFF_Voltage;
+  UCHAR  ucReserved;
+}ATOM_ASIC_PROFILING_INFO_V3_6;
+
 
 typedef struct _ATOM_SCLK_FCW_RANGE_ENTRY_V1{
   ULONG  ulMaxSclkFreq;
diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h
index a461e155a160..b86aba9d019f 100644
--- a/drivers/gpu/drm/amd/include/cgs_common.h
+++ b/drivers/gpu/drm/amd/include/cgs_common.h
@@ -49,6 +49,7 @@ enum cgs_ind_reg {
 	CGS_IND_REG__SMC,
 	CGS_IND_REG__UVD_CTX,
 	CGS_IND_REG__DIDT,
+	CGS_IND_REG_GC_CAC,
 	CGS_IND_REG__AUDIO_ENDPT
 };
 
@@ -112,20 +113,23 @@ enum cgs_system_info_id {
 	CGS_SYSTEM_INFO_ADAPTER_BDF_ID = 1,
 	CGS_SYSTEM_INFO_PCIE_GEN_INFO,
 	CGS_SYSTEM_INFO_PCIE_MLW,
+	CGS_SYSTEM_INFO_PCIE_DEV,
+	CGS_SYSTEM_INFO_PCIE_REV,
 	CGS_SYSTEM_INFO_CG_FLAGS,
 	CGS_SYSTEM_INFO_PG_FLAGS,
 	CGS_SYSTEM_INFO_GFX_CU_INFO,
+	CGS_SYSTEM_INFO_GFX_SE_INFO,
 	CGS_SYSTEM_INFO_ID_MAXIMUM,
 };
 
 struct cgs_system_info {
-	uint64_t       size;
-	uint64_t       info_id;
+	uint64_t			size;
+	enum cgs_system_info_id		info_id;
 	union {
-		void           *ptr;
-		uint64_t        value;
+		void			*ptr;
+		uint64_t		value;
 	};
-	uint64_t               padding[13];
+	uint64_t			padding[13];
 };
 
 /*
@@ -158,6 +162,10 @@ struct cgs_firmware_info {
 	uint16_t		feature_version;
 	uint32_t		image_size;
 	uint64_t		mc_addr;
+
+	/* only for smc firmware */
+	uint32_t		ucode_start_address;
+
 	void			*kptr;
 };
 
@@ -189,7 +197,6 @@ typedef unsigned long cgs_handle_t;
 
 struct cgs_acpi_method_argument {
 	uint32_t type;
-	uint32_t method_length;
 	uint32_t data_length;
 	union{
 		uint32_t value;
@@ -581,6 +588,9 @@ typedef int (*cgs_get_firmware_info)(struct cgs_device *cgs_device,
 				     enum cgs_ucode_id type,
 				     struct cgs_firmware_info *info);
 
+typedef int (*cgs_rel_firmware)(struct cgs_device *cgs_device,
+					 enum cgs_ucode_id type);
+
 typedef int(*cgs_set_powergating_state)(struct cgs_device *cgs_device,
 				  enum amd_ip_block_type block_type,
 				  enum amd_powergating_state state);
@@ -645,6 +655,7 @@ struct cgs_ops {
 	cgs_set_camera_voltages_t set_camera_voltages;
 	/* Firmware Info */
 	cgs_get_firmware_info get_firmware_info;
+	cgs_rel_firmware rel_firmware;
 	/* cg pg interface*/
 	cgs_set_powergating_state set_powergating_state;
 	cgs_set_clockgating_state set_clockgating_state;
@@ -738,6 +749,8 @@ struct cgs_device
 	CGS_CALL(set_camera_voltages,dev,mask,voltages)
 #define cgs_get_firmware_info(dev, type, info)	\
 	CGS_CALL(get_firmware_info, dev, type, info)
+#define cgs_rel_firmware(dev, type)	\
+	CGS_CALL(rel_firmware, dev, type)
 #define cgs_set_powergating_state(dev, block_type, state)	\
 	CGS_CALL(set_powergating_state, dev, block_type, state)
 #define cgs_set_clockgating_state(dev, block_type, state)	\
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 8e345bfddb69..abbb658bdc1e 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -73,11 +73,14 @@ static int pp_sw_init(void *handle)
 
 	ret = hwmgr->hwmgr_func->backend_init(hwmgr);
 	if (ret)
-		goto err;
+		goto err1;
 
 	pr_info("amdgpu: powerplay initialized\n");
 
 	return 0;
+err1:
+	if (hwmgr->pptable_func->pptable_fini)
+		hwmgr->pptable_func->pptable_fini(hwmgr);
 err:
 	pr_err("amdgpu: powerplay initialization failed\n");
 	return ret;
@@ -100,6 +103,9 @@ static int pp_sw_fini(void *handle)
 	if (hwmgr->hwmgr_func->backend_fini != NULL)
 		ret = hwmgr->hwmgr_func->backend_fini(hwmgr);
 
+	if (hwmgr->pptable_func->pptable_fini)
+		hwmgr->pptable_func->pptable_fini(hwmgr);
+
 	return ret;
 }
 
@@ -170,7 +176,7 @@ static int pp_hw_fini(void *handle)
 
 static bool pp_is_idle(void *handle)
 {
-	return 0;
+	return false;
 }
 
 static int pp_wait_for_idle(void *handle)
@@ -530,6 +536,10 @@ int pp_dpm_dispatch_tasks(void *handle, enum amd_pp_event event_id, void *input,
 	case AMD_PP_EVENT_COMPLETE_INIT:
 		ret = pem_handle_event(pp_handle->eventmgr, event_id, &data);
 		break;
+	case AMD_PP_EVENT_READJUST_POWER_STATE:
+		pp_handle->hwmgr->current_ps = pp_handle->hwmgr->boot_ps;
+		ret = pem_handle_event(pp_handle->eventmgr, event_id, &data);
+		break;
 	default:
 		break;
 	}
@@ -734,12 +744,12 @@ static int pp_dpm_get_pp_table(void *handle, char **table)
 
 	PP_CHECK_HW(hwmgr);
 
-	if (hwmgr->hwmgr_func->get_pp_table == NULL) {
-		printk(KERN_INFO "%s was not implemented.\n", __func__);
-		return 0;
-	}
+	if (!hwmgr->soft_pp_table)
+		return -EINVAL;
 
-	return hwmgr->hwmgr_func->get_pp_table(hwmgr, table);
+	*table = (char *)hwmgr->soft_pp_table;
+
+	return hwmgr->soft_pp_table_size;
 }
 
 static int pp_dpm_set_pp_table(void *handle, const char *buf, size_t size)
@@ -753,12 +763,23 @@ static int pp_dpm_set_pp_table(void *handle, const char *buf, size_t size)
 
 	PP_CHECK_HW(hwmgr);
 
-	if (hwmgr->hwmgr_func->set_pp_table == NULL) {
-		printk(KERN_INFO "%s was not implemented.\n", __func__);
-		return 0;
+	if (!hwmgr->hardcode_pp_table) {
+		hwmgr->hardcode_pp_table =
+				kzalloc(hwmgr->soft_pp_table_size, GFP_KERNEL);
+
+		if (!hwmgr->hardcode_pp_table)
+			return -ENOMEM;
+
+		/* to avoid powerplay crash when hardcode pptable is empty */
+		memcpy(hwmgr->hardcode_pp_table, hwmgr->soft_pp_table,
+				hwmgr->soft_pp_table_size);
 	}
 
-	return hwmgr->hwmgr_func->set_pp_table(hwmgr, buf, size);
+	memcpy(hwmgr->hardcode_pp_table, buf, size);
+
+	hwmgr->soft_pp_table = hwmgr->hardcode_pp_table;
+
+	return amd_powerplay_reset(handle);
 }
 
 static int pp_dpm_force_clock_level(void *handle,
@@ -800,6 +821,82 @@ static int pp_dpm_print_clock_levels(void *handle,
 	return hwmgr->hwmgr_func->print_clock_levels(hwmgr, type, buf);
 }
 
+static int pp_dpm_get_sclk_od(void *handle)
+{
+	struct pp_hwmgr *hwmgr;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	PP_CHECK_HW(hwmgr);
+
+	if (hwmgr->hwmgr_func->get_sclk_od == NULL) {
+		printk(KERN_INFO "%s was not implemented.\n", __func__);
+		return 0;
+	}
+
+	return hwmgr->hwmgr_func->get_sclk_od(hwmgr);
+}
+
+static int pp_dpm_set_sclk_od(void *handle, uint32_t value)
+{
+	struct pp_hwmgr *hwmgr;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	PP_CHECK_HW(hwmgr);
+
+	if (hwmgr->hwmgr_func->set_sclk_od == NULL) {
+		printk(KERN_INFO "%s was not implemented.\n", __func__);
+		return 0;
+	}
+
+	return hwmgr->hwmgr_func->set_sclk_od(hwmgr, value);
+}
+
+static int pp_dpm_get_mclk_od(void *handle)
+{
+	struct pp_hwmgr *hwmgr;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	PP_CHECK_HW(hwmgr);
+
+	if (hwmgr->hwmgr_func->get_mclk_od == NULL) {
+		printk(KERN_INFO "%s was not implemented.\n", __func__);
+		return 0;
+	}
+
+	return hwmgr->hwmgr_func->get_mclk_od(hwmgr);
+}
+
+static int pp_dpm_set_mclk_od(void *handle, uint32_t value)
+{
+	struct pp_hwmgr *hwmgr;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	PP_CHECK_HW(hwmgr);
+
+	if (hwmgr->hwmgr_func->set_mclk_od == NULL) {
+		printk(KERN_INFO "%s was not implemented.\n", __func__);
+		return 0;
+	}
+
+	return hwmgr->hwmgr_func->set_mclk_od(hwmgr, value);
+}
+
 const struct amd_powerplay_funcs pp_dpm_funcs = {
 	.get_temperature = pp_dpm_get_temperature,
 	.load_firmware = pp_dpm_load_fw,
@@ -822,6 +919,10 @@ const struct amd_powerplay_funcs pp_dpm_funcs = {
 	.set_pp_table = pp_dpm_set_pp_table,
 	.force_clock_level = pp_dpm_force_clock_level,
 	.print_clock_levels = pp_dpm_print_clock_levels,
+	.get_sclk_od = pp_dpm_get_sclk_od,
+	.set_sclk_od = pp_dpm_set_sclk_od,
+	.get_mclk_od = pp_dpm_get_mclk_od,
+	.set_mclk_od = pp_dpm_set_mclk_od,
 };
 
 static int amd_pp_instance_init(struct amd_pp_init *pp_init,
@@ -903,6 +1004,44 @@ int amd_powerplay_fini(void *handle)
 	return 0;
 }
 
+int amd_powerplay_reset(void *handle)
+{
+	struct pp_instance *instance = (struct pp_instance *)handle;
+	struct pp_eventmgr *eventmgr;
+	struct pem_event_data event_data = { {0} };
+	int ret;
+
+	if (instance == NULL)
+		return -EINVAL;
+
+	eventmgr = instance->eventmgr;
+	if (!eventmgr || !eventmgr->pp_eventmgr_fini)
+		return -EINVAL;
+
+	eventmgr->pp_eventmgr_fini(eventmgr);
+
+	ret = pp_sw_fini(handle);
+	if (ret)
+		return ret;
+
+	kfree(instance->hwmgr->ps);
+
+	ret = pp_sw_init(handle);
+	if (ret)
+		return ret;
+
+	hw_init_power_state_table(instance->hwmgr);
+
+	if (eventmgr == NULL || eventmgr->pp_eventmgr_init == NULL)
+		return -EINVAL;
+
+	ret = eventmgr->pp_eventmgr_init(eventmgr);
+	if (ret)
+		return ret;
+
+	return pem_handle_event(eventmgr, AMD_PP_EVENT_COMPLETE_INIT, &event_data);
+}
+
 /* export this function to DAL */
 
 int amd_powerplay_display_configuration_change(void *handle,
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
index d6635cc4b0fc..635fc4b48184 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventactionchains.c
@@ -30,7 +30,6 @@ static const pem_event_action * const initialize_event[] = {
 	system_config_tasks,
 	setup_asic_tasks,
 	enable_dynamic_state_management_tasks,
-	enable_clock_power_gatings_tasks,
 	get_2d_performance_state_tasks,
 	set_performance_state_tasks,
 	initialize_thermal_controller_tasks,
@@ -140,7 +139,6 @@ static const pem_event_action * const resume_event[] = {
 	setup_asic_tasks,
 	enable_stutter_mode_tasks, /*must do this in boot state and before SMC is started */
 	enable_dynamic_state_management_tasks,
-	enable_clock_power_gatings_tasks,
 	enable_disable_bapm_tasks,
 	initialize_thermal_controller_tasks,
 	get_2d_performance_state_tasks,
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
index 46410e3c7349..fb88e4e5d625 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
@@ -58,9 +58,6 @@ static void pem_fini(struct pp_eventmgr *eventmgr)
 	pem_unregister_interrupts(eventmgr);
 
 	pem_handle_event(eventmgr, AMD_PP_EVENT_UNINITIALIZE, &event_data);
-
-	if (eventmgr != NULL)
-		kfree(eventmgr);
 }
 
 int eventmgr_init(struct pp_instance *handle)
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
index 5cd123472db4..b6f45fd01fa6 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
@@ -132,8 +132,7 @@ int pem_task_enable_dynamic_state_management(struct pp_eventmgr *eventmgr, struc
 
 int pem_task_disable_dynamic_state_management(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data)
 {
-	/* TODO */
-	return 0;
+	return phm_disable_dynamic_state_management(eventmgr->hwmgr);
 }
 
 int pem_task_enable_clock_power_gatings_tasks(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
index 436fc16dabb6..2028980f1ed4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
@@ -177,12 +177,12 @@ int cz_dpm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate)
 		cz_dpm_powerdown_uvd(hwmgr);
 	} else {
 		cz_dpm_powerup_uvd(hwmgr);
-		cgs_set_clockgating_state(hwmgr->device,
-						AMD_IP_BLOCK_TYPE_UVD,
-						AMD_PG_STATE_GATE);
 		cgs_set_powergating_state(hwmgr->device,
 						AMD_IP_BLOCK_TYPE_UVD,
 						AMD_CG_STATE_UNGATE);
+		cgs_set_clockgating_state(hwmgr->device,
+						AMD_IP_BLOCK_TYPE_UVD,
+						AMD_PG_STATE_GATE);
 		cz_dpm_update_uvd_dpm(hwmgr, false);
 	}
 
@@ -206,25 +206,26 @@ int cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
 							AMD_IP_BLOCK_TYPE_VCE,
 							AMD_PG_STATE_GATE);
 				cz_enable_disable_vce_dpm(hwmgr, false);
-			/* TODO: to figure out why vce can't be poweroff*/
+				cz_dpm_powerdown_vce(hwmgr);
 				cz_hwmgr->vce_power_gated = true;
 			} else {
 				cz_dpm_powerup_vce(hwmgr);
 				cz_hwmgr->vce_power_gated = false;
-				cgs_set_clockgating_state(
-							hwmgr->device,
-							AMD_IP_BLOCK_TYPE_VCE,
-							AMD_PG_STATE_GATE);
 				cgs_set_powergating_state(
 							hwmgr->device,
 							AMD_IP_BLOCK_TYPE_VCE,
 							AMD_CG_STATE_UNGATE);
+				cgs_set_clockgating_state(
+							hwmgr->device,
+							AMD_IP_BLOCK_TYPE_VCE,
+							AMD_PG_STATE_GATE);
 				cz_dpm_update_vce_dpm(hwmgr);
 				cz_enable_disable_vce_dpm(hwmgr, true);
 				return 0;
 			}
 		}
 	} else {
+		cz_hwmgr->vce_power_gated = bgate;
 		cz_dpm_update_vce_dpm(hwmgr);
 		cz_enable_disable_vce_dpm(hwmgr, !bgate);
 		return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
index 1f14c477d15d..8cc0df9b534a 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
@@ -1167,9 +1167,9 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 
 	cz_ps->action = cz_current_ps->action;
 
-	if ((force_high == false) && (cz_ps->action == FORCE_HIGH))
+	if (!force_high && (cz_ps->action == FORCE_HIGH))
 		cz_ps->action = CANCEL_FORCE_HIGH;
-	else if ((force_high == true) && (cz_ps->action != FORCE_HIGH))
+	else if (force_high && (cz_ps->action != FORCE_HIGH))
 		cz_ps->action = FORCE_HIGH;
 	else
 		cz_ps->action = DO_NOTHING;
@@ -1180,6 +1180,13 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 static int cz_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 {
 	int result = 0;
+	struct cz_hwmgr *data;
+
+	data = kzalloc(sizeof(struct cz_hwmgr), GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+
+	hwmgr->backend = data;
 
 	result = cz_initialize_dpm_defaults(hwmgr);
 	if (result != 0) {
@@ -1649,7 +1656,7 @@ static void cz_hw_print_display_cfg(
 	struct cz_hwmgr *hw_data = (struct cz_hwmgr *)(hwmgr->backend);
 	uint32_t data = 0;
 
-	if (hw_data->cc6_settings.cc6_setting_changed == true) {
+	if (hw_data->cc6_settings.cc6_setting_changed) {
 
 		hw_data->cc6_settings.cc6_setting_changed = false;
 
@@ -1909,15 +1916,7 @@ static const struct pp_hwmgr_func cz_hwmgr_funcs = {
 
 int cz_hwmgr_init(struct pp_hwmgr *hwmgr)
 {
-	struct cz_hwmgr *cz_hwmgr;
-	int ret = 0;
-
-	cz_hwmgr = kzalloc(sizeof(struct cz_hwmgr), GFP_KERNEL);
-	if (cz_hwmgr == NULL)
-		return -ENOMEM;
-
-	hwmgr->backend = cz_hwmgr;
 	hwmgr->hwmgr_func = &cz_hwmgr_funcs;
 	hwmgr->pptable_func = &pptable_funcs;
-	return ret;
+	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_clockpowergating.c
index e1b649bd5344..5afe82068b29 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_clockpowergating.c
@@ -56,7 +56,7 @@ int fiji_phm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate)
 		fiji_update_uvd_dpm(hwmgr, false);
 		cgs_set_clockgating_state(hwmgr->device,
 					  AMD_IP_BLOCK_TYPE_UVD,
-					  AMD_PG_STATE_UNGATE);
+					  AMD_CG_STATE_UNGATE);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
index 24a16e49b571..120a9e2c3152 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
@@ -581,25 +581,24 @@ static int fiji_patch_boot_state(struct pp_hwmgr *hwmgr,
 
 static int fiji_hwmgr_backend_fini(struct pp_hwmgr *hwmgr)
 {
-	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
-
-	if (data->soft_pp_table) {
-		kfree(data->soft_pp_table);
-		data->soft_pp_table = NULL;
-	}
-
 	return phm_hwmgr_backend_fini(hwmgr);
 }
 
 static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 {
-	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_hwmgr *data;
 	uint32_t i;
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 	bool stay_in_boot;
 	int result;
 
+	data = kzalloc(sizeof(struct fiji_hwmgr), GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+
+	hwmgr->backend = data;
+
 	data->dll_default_on = false;
 	data->sram_end = SMC_RAM_END;
 
@@ -633,6 +632,8 @@ static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vddci_control = FIJI_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = FIJI_VOLTAGE_CONTROL_NONE;
 
+	data->force_pcie_gen = PP_PCIEGenInvalid;
+
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 			VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2))
 		data->voltage_control = FIJI_VOLTAGE_CONTROL_BY_SVID2;
@@ -697,7 +698,7 @@ static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	if (0 == result) {
 		struct cgs_system_info sys_info = {0};
 
-		data->is_tlu_enabled = 0;
+		data->is_tlu_enabled = false;
 		hwmgr->platform_descriptor.hardwareActivityPerformanceLevels =
 				FIJI_MAX_HARDWARE_POWERLEVELS;
 		hwmgr->platform_descriptor.hardwarePerformanceLevels = 2;
@@ -732,7 +733,7 @@ static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_GEN_INFO;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_gen_cap = 0x30007;
+			data->pcie_gen_cap = AMDGPU_DEFAULT_PCIE_GEN_MASK;
 		else
 			data->pcie_gen_cap = (uint32_t)sys_info.value;
 		if (data->pcie_gen_cap & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
@@ -741,7 +742,7 @@ static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_MLW;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_lane_cap = 0x2f0000;
+			data->pcie_lane_cap = AMDGPU_DEFAULT_PCIE_MLW_MASK;
 		else
 			data->pcie_lane_cap = (uint32_t)sys_info.value;
 	} else {
@@ -1234,6 +1235,34 @@ static int fiji_program_voting_clients(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int fiji_clear_voting_clients(struct pp_hwmgr *hwmgr)
+{
+	/* Reset voting clients before disabling DPM */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			SCLK_PWRMGT_CNTL, RESET_SCLK_CNT, 1);
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			SCLK_PWRMGT_CNTL, RESET_BUSY_CNT, 1);
+
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_0, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_1, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_2, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_3, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_4, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_5, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_6, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_7, 0);
+
+	return 0;
+}
+
 /**
 * Get the location of various tables inside the FW image.
 *
@@ -1361,6 +1390,17 @@ static int fiji_copy_and_switch_arb_sets(struct pp_hwmgr *hwmgr,
 }
 
 /**
+* Call SMC to reset S0/S1 to S1 and Reset SMIO to initial value
+*
+* @param    hwmgr  the address of the powerplay hardware manager.
+* @return   if success then 0;
+*/
+static int fiji_reset_to_default(struct pp_hwmgr *hwmgr)
+{
+	return smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_ResetToDefaults);
+}
+
+/**
 * Initial switch from ARB F0->F1
 *
 * @param    hwmgr  the address of the powerplay hardware manager.
@@ -1373,6 +1413,21 @@ static int fiji_initial_switch_from_arbf0_to_f1(struct pp_hwmgr *hwmgr)
 			MC_CG_ARB_FREQ_F0, MC_CG_ARB_FREQ_F1);
 }
 
+static int fiji_force_switch_to_arbf0(struct pp_hwmgr *hwmgr)
+{
+	uint32_t tmp;
+
+	tmp = (cgs_read_ind_register(hwmgr->device,
+			CGS_IND_REG__SMC, ixSMC_SCRATCH9) &
+			0x0000ff00) >> 8;
+
+	if (tmp == MC_CG_ARB_FREQ_F0)
+		return 0;
+
+	return fiji_copy_and_switch_arb_sets(hwmgr,
+			tmp, MC_CG_ARB_FREQ_F0);
+}
+
 static int fiji_reset_single_dpm_table(struct pp_hwmgr *hwmgr,
 		struct fiji_single_dpm_table *dpm_table, uint32_t count)
 {
@@ -1395,7 +1450,7 @@ static void fiji_setup_pcie_table_entry(
 {
 	dpm_table->dpm_levels[index].value = pcie_gen;
 	dpm_table->dpm_levels[index].param1 = pcie_lanes;
-	dpm_table->dpm_levels[index].enabled = 1;
+	dpm_table->dpm_levels[index].enabled = true;
 }
 
 static int fiji_setup_default_pcie_table(struct pp_hwmgr *hwmgr)
@@ -1607,7 +1662,6 @@ static int fiji_populate_cac_table(struct pp_hwmgr *hwmgr,
 {
 	uint32_t count;
 	uint8_t index;
-	int result = 0;
 	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
@@ -1629,7 +1683,7 @@ static int fiji_populate_cac_table(struct pp_hwmgr *hwmgr,
 						VOLTAGE_SCALE)) / 25);
 	}
 
-	return result;
+	return 0;
 }
 
 /**
@@ -1830,7 +1884,7 @@ static uint16_t fiji_find_closest_vddci(struct pp_hwmgr *hwmgr, uint16_t vddci)
 
 	PP_ASSERT_WITH_CODE(false,
 			"VDDCI is larger than max VDDCI in VDDCI Voltage Table!",
-			return vddci_table->entries[i].value);
+			return vddci_table->entries[i-1].value);
 }
 
 static int fiji_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr,
@@ -3175,6 +3229,17 @@ static int fiji_enable_ulv(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int fiji_disable_ulv(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_ulv_parm *ulv = &(data->ulv);
+
+	if (ulv->ulv_supported)
+		return smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_DisableULV);
+
+	return 0;
+}
+
 static int fiji_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 {
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
@@ -3195,6 +3260,21 @@ static int fiji_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int fiji_disable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
+{
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_SclkDeepSleep)) {
+		if (smum_send_msg_to_smc(hwmgr->smumgr,
+				PPSMC_MSG_MASTER_DeepSleep_OFF)) {
+			PP_ASSERT_WITH_CODE(false,
+					"Attempt to disable Master Deep Sleep switch failed!",
+					return -1);
+		}
+	}
+
+	return 0;
+}
+
 static int fiji_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 {
 	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
@@ -3355,6 +3435,70 @@ static int fiji_start_dpm(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int fiji_disable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+
+	/* disable SCLK dpm */
+	if (!data->sclk_dpm_key_disabled)
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_DPM_Disable) == 0),
+				"Failed to disable SCLK DPM!",
+				return -1);
+
+	/* disable MCLK dpm */
+	if (!data->mclk_dpm_key_disabled) {
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+				PPSMC_MSG_MCLKDPM_SetEnabledMask, 1) == 0),
+				"Failed to force MCLK DPM0!",
+				return -1);
+
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_MCLKDPM_Disable) == 0),
+				"Failed to disable MCLK DPM!",
+				return -1);
+	}
+
+	return 0;
+}
+
+static int fiji_stop_dpm(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+
+	/* disable general power management */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, GENERAL_PWRMGT,
+			GLOBAL_PWRMGT_EN, 0);
+	/* disable sclk deep sleep */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SCLK_PWRMGT_CNTL,
+			DYNAMIC_PM_EN, 0);
+
+	/* disable PCIE dpm */
+	if (!data->pcie_dpm_key_disabled) {
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_PCIeDPM_Disable) == 0),
+				"Failed to disable pcie DPM during DPM Stop Function!",
+				return -1);
+	}
+
+	if (fiji_disable_sclk_mclk_dpm(hwmgr)) {
+		printk(KERN_ERR "Failed to disable Sclk DPM and Mclk DPM!");
+		return -1;
+	}
+
+	PP_ASSERT_WITH_CODE(
+			(smum_send_msg_to_smc(hwmgr->smumgr,
+					PPSMC_MSG_Voltage_Cntl_Disable) == 0),
+			"Failed to disable voltage DPM during DPM Stop Function!",
+			return -1);
+
+	return 0;
+}
+
 static void fiji_set_dpm_event_sources(struct pp_hwmgr *hwmgr,
 		uint32_t sources)
 {
@@ -3413,6 +3557,23 @@ static int fiji_enable_thermal_auto_throttle(struct pp_hwmgr *hwmgr)
 	return fiji_enable_auto_throttle_source(hwmgr, PHM_AutoThrottleSource_Thermal);
 }
 
+static int fiji_disable_auto_throttle_source(struct pp_hwmgr *hwmgr,
+		PHM_AutoThrottleSource source)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+
+	if (data->active_auto_throttle_sources & (1 << source)) {
+		data->active_auto_throttle_sources &= ~(1 << source);
+		fiji_set_dpm_event_sources(hwmgr, data->active_auto_throttle_sources);
+	}
+	return 0;
+}
+
+static int fiji_disable_thermal_auto_throttle(struct pp_hwmgr *hwmgr)
+{
+	return fiji_disable_auto_throttle_source(hwmgr, PHM_AutoThrottleSource_Thermal);
+}
+
 static int fiji_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 {
 	int tmp_result, result = 0;
@@ -3527,6 +3688,64 @@ static int fiji_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+static int fiji_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
+{
+	int tmp_result, result = 0;
+
+	tmp_result = (fiji_is_dpm_running(hwmgr)) ? 0 : -1;
+	PP_ASSERT_WITH_CODE(tmp_result == 0,
+			"DPM is not running right now, no need to disable DPM!",
+			return 0);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_ThermalController))
+		PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+				GENERAL_PWRMGT, THERMAL_PROTECTION_DIS, 1);
+
+	tmp_result = fiji_disable_power_containment(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable power containment!", result = tmp_result);
+
+	tmp_result = fiji_disable_smc_cac(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable SMC CAC!", result = tmp_result);
+
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			CG_SPLL_SPREAD_SPECTRUM, SSEN, 0);
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			GENERAL_PWRMGT, DYN_SPREAD_SPECTRUM_EN, 0);
+
+	tmp_result = fiji_disable_thermal_auto_throttle(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable thermal auto throttle!", result = tmp_result);
+
+	tmp_result = fiji_stop_dpm(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to stop DPM!", result = tmp_result);
+
+	tmp_result = fiji_disable_deep_sleep_master_switch(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable deep sleep master switch!", result = tmp_result);
+
+	tmp_result = fiji_disable_ulv(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable ULV!", result = tmp_result);
+
+	tmp_result = fiji_clear_voting_clients(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to clear voting clients!", result = tmp_result);
+
+	tmp_result = fiji_reset_to_default(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to reset to default!", result = tmp_result);
+
+	tmp_result = fiji_force_switch_to_arbf0(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to force to switch arbf0!", result = tmp_result);
+
+	return result;
+}
+
 static int fiji_force_dpm_highest(struct pp_hwmgr *hwmgr)
 {
 	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
@@ -4169,8 +4388,9 @@ static int fiji_freeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->sclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table &
 			(DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK))) {
-		PP_ASSERT_WITH_CODE(true == fiji_is_dpm_running(hwmgr),
-				"Trying to freeze SCLK DPM when DPM is disabled",);
+		PP_ASSERT_WITH_CODE(fiji_is_dpm_running(hwmgr),
+				    "Trying to freeze SCLK DPM when DPM is disabled",
+				    );
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr->smumgr,
 				PPSMC_MSG_SCLKDPM_FreezeLevel),
 				"Failed to freeze SCLK DPM during FreezeSclkMclkDPM Function!",
@@ -4180,8 +4400,9 @@ static int fiji_freeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->mclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table &
 		 DPMTABLE_OD_UPDATE_MCLK)) {
-		PP_ASSERT_WITH_CODE(true == fiji_is_dpm_running(hwmgr),
-				"Trying to freeze MCLK DPM when DPM is disabled",);
+		PP_ASSERT_WITH_CODE(fiji_is_dpm_running(hwmgr),
+				    "Trying to freeze MCLK DPM when DPM is disabled",
+				    );
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr->smumgr,
 				PPSMC_MSG_MCLKDPM_FreezeLevel),
 				"Failed to freeze MCLK DPM during FreezeSclkMclkDPM Function!",
@@ -4351,7 +4572,6 @@ static int fiji_trim_single_dpm_states(struct pp_hwmgr *hwmgr,
 static int fiji_trim_dpm_states(struct pp_hwmgr *hwmgr,
 		const struct fiji_power_state *fiji_ps)
 {
-	int result = 0;
 	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
 	uint32_t high_limit_count;
 
@@ -4371,7 +4591,7 @@ static int fiji_trim_dpm_states(struct pp_hwmgr *hwmgr,
 			fiji_ps->performance_levels[0].memory_clock,
 			fiji_ps->performance_levels[high_limit_count].memory_clock);
 
-	return result;
+	return 0;
 }
 
 static int fiji_generate_dpm_level_enable_mask(
@@ -4630,8 +4850,9 @@ static int fiji_unfreeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 		(data->need_update_smu7_dpm_table &
 		(DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK))) {
 
-		PP_ASSERT_WITH_CODE(true == fiji_is_dpm_running(hwmgr),
-				"Trying to Unfreeze SCLK DPM when DPM is disabled",);
+		PP_ASSERT_WITH_CODE(fiji_is_dpm_running(hwmgr),
+				    "Trying to Unfreeze SCLK DPM when DPM is disabled",
+				    );
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr->smumgr,
 				PPSMC_MSG_SCLKDPM_UnfreezeLevel),
 			"Failed to unfreeze SCLK DPM during UnFreezeSclkMclkDPM Function!",
@@ -4641,8 +4862,9 @@ static int fiji_unfreeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->mclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) {
 
-		PP_ASSERT_WITH_CODE(true == fiji_is_dpm_running(hwmgr),
-				"Trying to Unfreeze MCLK DPM when DPM is disabled",);
+		PP_ASSERT_WITH_CODE(fiji_is_dpm_running(hwmgr),
+				    "Trying to Unfreeze MCLK DPM when DPM is disabled",
+				    );
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr->smumgr,
 				PPSMC_MSG_SCLKDPM_UnfreezeLevel),
 		    "Failed to unfreeze MCLK DPM during UnFreezeSclkMclkDPM Function!",
@@ -5069,42 +5291,6 @@ static int fiji_get_fan_control_mode(struct pp_hwmgr *hwmgr)
 				CG_FDO_CTRL2, FDO_PWM_MODE);
 }
 
-static int fiji_get_pp_table(struct pp_hwmgr *hwmgr, char **table)
-{
-	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kmemdup(hwmgr->soft_pp_table,
-					      hwmgr->soft_pp_table_size,
-					      GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	*table = (char *)&data->soft_pp_table;
-
-	return hwmgr->soft_pp_table_size;
-}
-
-static int fiji_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size)
-{
-	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kzalloc(hwmgr->soft_pp_table_size, GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	memcpy(data->soft_pp_table, buf, size);
-
-	hwmgr->soft_pp_table = data->soft_pp_table;
-
-	/* TODO: re-init powerplay to implement modified pptable */
-
-	return 0;
-}
-
 static int fiji_force_clock_level(struct pp_hwmgr *hwmgr,
 		enum pp_clock_type type, uint32_t mask)
 {
@@ -5274,12 +5460,96 @@ bool fiji_check_smc_update_required_for_display_configuration(struct pp_hwmgr *h
 	return is_update_required;
 }
 
+static int fiji_get_sclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
+	struct fiji_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	int value;
+
+	value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) *
+			100 /
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return value;
+}
+
+static int fiji_set_sclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	struct pp_power_state  *ps;
+	struct fiji_power_state  *fiji_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	fiji_ps = cast_phw_fiji_power_state(&ps->hardware);
+
+	fiji_ps->performance_levels[fiji_ps->performance_level_count - 1].engine_clock =
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value *
+			value / 100 +
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return 0;
+}
+
+static int fiji_get_mclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
+	struct fiji_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	int value;
+
+	value = (mclk_table->dpm_levels[mclk_table->count - 1].value -
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) *
+			100 /
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return value;
+}
+
+static int fiji_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	struct pp_power_state  *ps;
+	struct fiji_power_state  *fiji_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	fiji_ps = cast_phw_fiji_power_state(&ps->hardware);
+
+	fiji_ps->performance_levels[fiji_ps->performance_level_count - 1].memory_clock =
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value *
+			value / 100 +
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return 0;
+}
 
 static const struct pp_hwmgr_func fiji_hwmgr_funcs = {
 	.backend_init = &fiji_hwmgr_backend_init,
 	.backend_fini = &fiji_hwmgr_backend_fini,
 	.asic_setup = &fiji_setup_asic_task,
 	.dynamic_state_management_enable = &fiji_enable_dpm_tasks,
+	.dynamic_state_management_disable = &fiji_disable_dpm_tasks,
 	.force_dpm_level = &fiji_dpm_force_dpm_level,
 	.get_num_of_pp_table_entries = &tonga_get_number_of_powerplay_table_entries,
 	.get_power_state_size = &fiji_get_power_state_size,
@@ -5312,24 +5582,18 @@ static const struct pp_hwmgr_func fiji_hwmgr_funcs = {
 	.get_fan_control_mode = fiji_get_fan_control_mode,
 	.check_states_equal = fiji_check_states_equal,
 	.check_smc_update_required_for_display_configuration = fiji_check_smc_update_required_for_display_configuration,
-	.get_pp_table = fiji_get_pp_table,
-	.set_pp_table = fiji_set_pp_table,
 	.force_clock_level = fiji_force_clock_level,
 	.print_clock_levels = fiji_print_clock_levels,
+	.get_sclk_od = fiji_get_sclk_od,
+	.set_sclk_od = fiji_set_sclk_od,
+	.get_mclk_od = fiji_get_mclk_od,
+	.set_mclk_od = fiji_set_mclk_od,
 };
 
 int fiji_hwmgr_init(struct pp_hwmgr *hwmgr)
 {
-	struct fiji_hwmgr  *data;
-	int ret = 0;
-
-	data = kzalloc(sizeof(struct fiji_hwmgr), GFP_KERNEL);
-	if (data == NULL)
-		return -ENOMEM;
-
-	hwmgr->backend = data;
 	hwmgr->hwmgr_func = &fiji_hwmgr_funcs;
 	hwmgr->pptable_func = &tonga_pptable_funcs;
 	pp_fiji_thermal_initialize(hwmgr);
-	return ret;
+	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
index 170edf5a772d..bf67c2a92c68 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
@@ -302,9 +302,6 @@ struct fiji_hwmgr {
 	bool                           pg_acp_init;
 	bool                           frtc_enabled;
 	bool                           frtc_status_changed;
-
-	/* soft pptable for re-uploading into smu */
-	void *soft_pp_table;
 };
 
 /* To convert to Q8.8 format for firmware */
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c
index db23a4068baf..44658451a8d2 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c
@@ -73,17 +73,18 @@ void fiji_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr)
 
 	if (!tmp) {
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
-				PHM_PlatformCaps_PowerContainment);
-
-		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 				PHM_PlatformCaps_CAC);
 
 		fiji_hwmgr->fast_watermark_threshold = 100;
 
-		tmp = 1;
-		fiji_hwmgr->enable_dte_feature = tmp ? false : true;
-		fiji_hwmgr->enable_tdc_limit_feature = tmp ? true : false;
-		fiji_hwmgr->enable_pkg_pwr_tracking_feature = tmp ? true : false;
+		if (hwmgr->powercontainment_enabled) {
+			phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+				    PHM_PlatformCaps_PowerContainment);
+			tmp = 1;
+			fiji_hwmgr->enable_dte_feature = tmp ? false : true;
+			fiji_hwmgr->enable_tdc_limit_feature = tmp ? true : false;
+			fiji_hwmgr->enable_pkg_pwr_tracking_feature = tmp ? true : false;
+		}
 	}
 }
 
@@ -459,6 +460,23 @@ int fiji_enable_smc_cac(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+int fiji_disable_smc_cac(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	int result = 0;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_CAC) && data->cac_enabled) {
+		int smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+				(uint16_t)(PPSMC_MSG_DisableCac));
+		PP_ASSERT_WITH_CODE((smc_result == 0),
+				"Failed to disable CAC in SMC.", result = -1);
+
+		data->cac_enabled = false;
+	}
+	return result;
+}
+
 int fiji_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n)
 {
 	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
@@ -528,6 +546,48 @@ int fiji_enable_power_containment(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+int fiji_disable_power_containment(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	int result = 0;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_PowerContainment) &&
+			data->power_containment_features) {
+		int smc_result;
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_TDCLimit) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_TDCLimitDisable));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable TDCLimit in SMC.",
+					result = smc_result);
+		}
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_DTE) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_DisableDTE));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable DTE in SMC.",
+					result = smc_result);
+		}
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_PkgPwrLimit) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_PkgPwrLimitDisable));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable PkgPwrTracking in SMC.",
+					result = smc_result);
+		}
+		data->power_containment_features = 0;
+	}
+
+	return result;
+}
+
 int fiji_power_control_set_level(struct pp_hwmgr *hwmgr)
 {
 	struct phm_ppt_v1_information *table_info =
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.h b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.h
index 55e58200f33a..fec772421733 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.h
@@ -36,6 +36,19 @@ enum fiji_pt_config_reg_type {
 #define POWERCONTAINMENT_FEATURE_TDCLimit        0x00000002
 #define POWERCONTAINMENT_FEATURE_PkgPwrLimit     0x00000004
 
+#define DIDT_SQ_CTRL0__UNUSED_0_MASK             0xffffffc0
+#define DIDT_SQ_CTRL0__UNUSED_0__SHIFT           0x6
+#define DIDT_TD_CTRL0__UNUSED_0_MASK             0xffffffc0
+#define DIDT_TD_CTRL0__UNUSED_0__SHIFT           0x6
+#define DIDT_TCP_CTRL0__UNUSED_0_MASK            0xffffffc0
+#define DIDT_TCP_CTRL0__UNUSED_0__SHIFT          0x6
+#define DIDT_SQ_TUNING_CTRL__UNUSED_0_MASK                 0xe0000000
+#define DIDT_SQ_TUNING_CTRL__UNUSED_0__SHIFT               0x0000001d
+#define DIDT_TD_TUNING_CTRL__UNUSED_0_MASK                 0xe0000000
+#define DIDT_TD_TUNING_CTRL__UNUSED_0__SHIFT               0x0000001d
+#define DIDT_TCP_TUNING_CTRL__UNUSED_0_MASK                0xe0000000
+#define DIDT_TCP_TUNING_CTRL__UNUSED_0__SHIFT              0x0000001d
+
 struct fiji_pt_config_reg {
 	uint32_t                           offset;
 	uint32_t                           mask;
@@ -58,7 +71,9 @@ void fiji_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr);
 int fiji_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr);
 int fiji_populate_pm_fuses(struct pp_hwmgr *hwmgr);
 int fiji_enable_smc_cac(struct pp_hwmgr *hwmgr);
+int fiji_disable_smc_cac(struct pp_hwmgr *hwmgr);
 int fiji_enable_power_containment(struct pp_hwmgr *hwmgr);
+int fiji_disable_power_containment(struct pp_hwmgr *hwmgr);
 int fiji_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n);
 int fiji_power_control_set_level(struct pp_hwmgr *hwmgr);
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c
index 7a705cee0cc2..a6abe81bc843 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c
@@ -59,8 +59,8 @@ int phm_dispatch_table(struct pp_hwmgr *hwmgr,
 		       struct phm_runtime_table_header *rt_table,
 		       void *input, void *output)
 {
-	int result = 0;
-	void *temp_storage = NULL;
+	int result;
+	void *temp_storage;
 
 	if (hwmgr == NULL || rt_table == NULL) {
 		printk(KERN_ERR "[ powerplay ] Invalid Parameter!\n");
@@ -73,12 +73,13 @@ int phm_dispatch_table(struct pp_hwmgr *hwmgr,
 			printk(KERN_ERR "[ powerplay ] Could not allocate table temporary storage\n");
 			return -ENOMEM;
 		}
+	} else {
+		temp_storage = NULL;
 	}
 
 	result = phm_run_table(hwmgr, rt_table, input, output, temp_storage);
 
-	if (NULL != temp_storage)
-		kfree(temp_storage);
+	kfree(temp_storage);
 
 	return result;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index fa208ada6892..789f98ad2615 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -154,6 +154,30 @@ int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr)
 	return ret;
 }
 
+int phm_disable_dynamic_state_management(struct pp_hwmgr *hwmgr)
+{
+	int ret = -1;
+	bool enabled;
+
+	PHM_FUNC_CHECK(hwmgr);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+		PHM_PlatformCaps_TablelessHardwareInterface)) {
+		if (hwmgr->hwmgr_func->dynamic_state_management_disable)
+			ret = hwmgr->hwmgr_func->dynamic_state_management_disable(hwmgr);
+	} else {
+		ret = phm_dispatch_table(hwmgr,
+				&(hwmgr->disable_dynamic_state_management),
+				NULL, NULL);
+	}
+
+	enabled = ret == 0 ? false : true;
+
+	cgs_notify_dpm_enabled(hwmgr->device, enabled);
+
+	return ret;
+}
+
 int phm_force_dpm_levels(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level)
 {
 	PHM_FUNC_CHECK(hwmgr);
@@ -306,11 +330,15 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr,
 {
 	PHM_FUNC_CHECK(hwmgr);
 
-	if (hwmgr->hwmgr_func->store_cc6_data == NULL)
+	if (display_config == NULL)
 		return -EINVAL;
 
 	hwmgr->display_config = *display_config;
-	/* to do pass other display configuration in furture */
+
+	if (hwmgr->hwmgr_func->store_cc6_data == NULL)
+		return -EINVAL;
+
+	/* TODO: pass other display configuration in the future */
 
 	if (hwmgr->hwmgr_func->store_cc6_data)
 		hwmgr->hwmgr_func->store_cc6_data(hwmgr,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
index 1c48917da3cf..27e07624ac28 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
@@ -24,6 +24,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <drm/amdgpu_drm.h>
 #include "cgs_common.h"
 #include "power_state.h"
 #include "hwmgr.h"
@@ -58,12 +59,13 @@ int hwmgr_init(struct amd_pp_init *pp_init, struct pp_instance *handle)
 	hwmgr->hw_revision = pp_init->rev_id;
 	hwmgr->usec_timeout = AMD_MAX_USEC_TIMEOUT;
 	hwmgr->power_source = PP_PowerSource_AC;
+	hwmgr->powercontainment_enabled = pp_init->powercontainment_enabled;
 
 	switch (hwmgr->chip_family) {
-	case AMD_FAMILY_CZ:
+	case AMDGPU_FAMILY_CZ:
 		cz_hwmgr_init(hwmgr);
 		break;
-	case AMD_FAMILY_VI:
+	case AMDGPU_FAMILY_VI:
 		switch (hwmgr->chip_id) {
 		case CHIP_TONGA:
 			tonga_hwmgr_init(hwmgr);
@@ -93,6 +95,15 @@ int hwmgr_fini(struct pp_hwmgr *hwmgr)
 	if (hwmgr == NULL || hwmgr->ps == NULL)
 		return -EINVAL;
 
+	/* do hwmgr finish*/
+	kfree(hwmgr->hardcode_pp_table);
+
+	kfree(hwmgr->backend);
+
+	kfree(hwmgr->start_thermal_controller.function_list);
+
+	kfree(hwmgr->set_temperature_range.function_list);
+
 	kfree(hwmgr->ps);
 	kfree(hwmgr);
 	return 0;
@@ -462,7 +473,7 @@ uint16_t phm_find_closest_vddci(struct pp_atomctrl_voltage_table *vddci_table, u
 
 	PP_ASSERT_WITH_CODE(false,
 			"VDDCI is larger than max VDDCI in VDDCI Voltage Table!",
-			return vddci_table->entries[i].value);
+			return vddci_table->entries[i-1].value);
 }
 
 int phm_find_boot_level(void *table,
@@ -523,7 +534,7 @@ int phm_initializa_dynamic_state_adjustment_rule_settings(struct pp_hwmgr *hwmgr
 
 	/* initialize vddc_dep_on_dal_pwrl table */
 	table_size = sizeof(uint32_t) + 4 * sizeof(struct phm_clock_voltage_dependency_record);
-	table_clk_vlt = (struct phm_clock_voltage_dependency_table *)kzalloc(table_size, GFP_KERNEL);
+	table_clk_vlt = kzalloc(table_size, GFP_KERNEL);
 
 	if (NULL == table_clk_vlt) {
 		printk(KERN_ERR "[ powerplay ] Can not allocate space for vddc_dep_on_dal_pwrl! \n");
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h
index 347fef127ce9..2930a3355948 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h
@@ -39,6 +39,7 @@ struct phm_ppt_v1_clock_voltage_dependency_record {
 	uint8_t phases;
 	uint8_t cks_enable;
 	uint8_t cks_voffset;
+	uint32_t sclk_offset;
 };
 
 typedef struct phm_ppt_v1_clock_voltage_dependency_record phm_ppt_v1_clock_voltage_dependency_record;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_clockpowergating.c
index 8f142a74ad08..b5edb5105986 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_clockpowergating.c
@@ -106,11 +106,17 @@ int polaris10_phm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate)
 	data->uvd_power_gated = bgate;
 
 	if (bgate) {
+		cgs_set_clockgating_state(hwmgr->device,
+				AMD_IP_BLOCK_TYPE_UVD,
+				AMD_CG_STATE_GATE);
 		polaris10_update_uvd_dpm(hwmgr, true);
 		polaris10_phm_powerdown_uvd(hwmgr);
 	} else {
 		polaris10_phm_powerup_uvd(hwmgr);
 		polaris10_update_uvd_dpm(hwmgr, false);
+		cgs_set_clockgating_state(hwmgr->device,
+				AMD_IP_BLOCK_TYPE_UVD,
+				AMD_CG_STATE_UNGATE);
 	}
 
 	return 0;
@@ -125,11 +131,19 @@ int polaris10_phm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
 
 	data->vce_power_gated = bgate;
 
-	if (bgate)
+	if (bgate) {
+		cgs_set_clockgating_state(hwmgr->device,
+				AMD_IP_BLOCK_TYPE_VCE,
+				AMD_CG_STATE_GATE);
+		polaris10_update_vce_dpm(hwmgr, true);
 		polaris10_phm_powerdown_vce(hwmgr);
-	else
+	} else {
 		polaris10_phm_powerup_vce(hwmgr);
-
+		polaris10_update_vce_dpm(hwmgr, false);
+		cgs_set_clockgating_state(hwmgr->device,
+				AMD_IP_BLOCK_TYPE_VCE,
+				AMD_CG_STATE_UNGATE);
+	}
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
index aa6be033f21b..769636a0c5b5 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
@@ -389,6 +389,34 @@ static int polaris10_program_voting_clients(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int polaris10_clear_voting_clients(struct pp_hwmgr *hwmgr)
+{
+	/* Reset voting clients before disabling DPM */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			SCLK_PWRMGT_CNTL, RESET_SCLK_CNT, 1);
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			SCLK_PWRMGT_CNTL, RESET_BUSY_CNT, 1);
+
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_0, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_1, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_2, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_3, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_4, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_5, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_6, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_7, 0);
+
+	return 0;
+}
+
 /**
 * Get the location of various tables inside the FW image.
 *
@@ -515,6 +543,11 @@ static int polaris10_copy_and_switch_arb_sets(struct pp_hwmgr *hwmgr,
 	return 0;
 }
 
+static int polaris10_reset_to_default(struct pp_hwmgr *hwmgr)
+{
+	return smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_ResetToDefaults);
+}
+
 /**
 * Initial switch from ARB F0->F1
 *
@@ -528,6 +561,21 @@ static int polaris10_initial_switch_from_arbf0_to_f1(struct pp_hwmgr *hwmgr)
 			MC_CG_ARB_FREQ_F0, MC_CG_ARB_FREQ_F1);
 }
 
+static int polaris10_force_switch_to_arbf0(struct pp_hwmgr *hwmgr)
+{
+	uint32_t tmp;
+
+	tmp = (cgs_read_ind_register(hwmgr->device,
+			CGS_IND_REG__SMC, ixSMC_SCRATCH9) &
+			0x0000ff00) >> 8;
+
+	if (tmp == MC_CG_ARB_FREQ_F0)
+		return 0;
+
+	return polaris10_copy_and_switch_arb_sets(hwmgr,
+			tmp, MC_CG_ARB_FREQ_F0);
+}
+
 static int polaris10_setup_default_pcie_table(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
@@ -732,7 +780,7 @@ static int polaris10_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr,
 			table->Smio[level] |=
 				data->mvdd_voltage_table.entries[level].smio_low;
 		}
-		table->SmioMask2 = data->vddci_voltage_table.mask_low;
+		table->SmioMask2 = data->mvdd_voltage_table.mask_low;
 
 		table->MvddLevelCount = (uint32_t) PP_HOST_TO_SMC_UL(count);
 	}
@@ -999,7 +1047,7 @@ static int polaris10_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr,
 				vddci = phm_find_closest_vddci(&(data->vddci_voltage_table),
 						(dep_table->entries[i].vddc -
 								(uint16_t)data->vddc_vddci_delta));
-				*voltage |= (vddci * VOLTAGE_SCALE) <<	VDDCI_SHIFT;
+				*voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 			}
 
 			if (POLARIS10_VOLTAGE_CONTROL_NONE == data->mvdd_control)
@@ -1296,7 +1344,6 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr,
 	}
 
 	mem_level->MclkFrequency = clock;
-	mem_level->StutterEnable = 0;
 	mem_level->EnabledForThrottle = 1;
 	mem_level->EnabledForActivity = 0;
 	mem_level->UpHyst = 0;
@@ -1304,7 +1351,6 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr,
 	mem_level->VoltageDownHyst = 0;
 	mem_level->ActivityLevel = (uint16_t)data->mclk_activity_target;
 	mem_level->StutterEnable = false;
-
 	mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
 
 	data->display_timing.num_existing_displays = info.display_count;
@@ -1358,12 +1404,12 @@ static int polaris10_populate_all_memory_levels(struct pp_hwmgr *hwmgr)
 			return result;
 	}
 
-	/* in order to prevent MC activity from stutter mode to push DPM up.
+	/* In order to prevent MC activity from stutter mode to push DPM up,
 	 * the UVD change complements this by putting the MCLK in
-	 * a higher state by default such that we are not effected by
+	 * a higher state by default such that we are not affected by
 	 * up threshold or and MCLK DPM latency.
 	 */
-	levels[0].ActivityLevel = (uint16_t)data->mclk_dpm0_activity_target;
+	levels[0].ActivityLevel = 0x1f;
 	CONVERT_FROM_HOST_TO_SMC_US(levels[0].ActivityLevel);
 
 	data->smc_state_table.MemoryDpmLevelCount =
@@ -1424,22 +1470,19 @@ static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr,
 
 	table->ACPILevel.Flags &= ~PPSMC_SWSTATE_FLAG_DC;
 
-	if (!data->sclk_dpm_key_disabled) {
-		/* Get MinVoltage and Frequency from DPM0,
-		 * already converted to SMC_UL */
-		sclk_frequency = data->dpm_table.sclk_table.dpm_levels[0].value;
-		result = polaris10_get_dependency_volt_by_clk(hwmgr,
-				table_info->vdd_dep_on_sclk,
-				table->ACPILevel.SclkFrequency,
-				&table->ACPILevel.MinVoltage, &mvdd);
-		PP_ASSERT_WITH_CODE((0 == result),
-				"Cannot find ACPI VDDC voltage value "
-				"in Clock Dependency Table", );
-	} else {
-		sclk_frequency = data->vbios_boot_state.sclk_bootup_value;
-		table->ACPILevel.MinVoltage =
-				data->vbios_boot_state.vddc_bootup_value * VOLTAGE_SCALE;
-	}
+
+	/* Get MinVoltage and Frequency from DPM0,
+	 * already converted to SMC_UL */
+	sclk_frequency = data->vbios_boot_state.sclk_bootup_value;
+	result = polaris10_get_dependency_volt_by_clk(hwmgr,
+			table_info->vdd_dep_on_sclk,
+			sclk_frequency,
+			&table->ACPILevel.MinVoltage, &mvdd);
+	PP_ASSERT_WITH_CODE((0 == result),
+			"Cannot find ACPI VDDC voltage value "
+			"in Clock Dependency Table",
+			);
+
 
 	result = polaris10_calculate_sclk_params(hwmgr, sclk_frequency,  &(table->ACPILevel.SclkSetting));
 	PP_ASSERT_WITH_CODE(result == 0, "Error retrieving Engine Clock dividers from VBIOS.", return result);
@@ -1464,24 +1507,17 @@ static int polaris10_populate_smc_acpi_level(struct pp_hwmgr *hwmgr,
 	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Fcw1_frac);
 	CONVERT_FROM_HOST_TO_SMC_US(table->ACPILevel.SclkSetting.Sclk_ss_slew_rate);
 
-	if (!data->mclk_dpm_key_disabled) {
-		/* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */
-		table->MemoryACPILevel.MclkFrequency =
-				data->dpm_table.mclk_table.dpm_levels[0].value;
-		result = polaris10_get_dependency_volt_by_clk(hwmgr,
-				table_info->vdd_dep_on_mclk,
-				table->MemoryACPILevel.MclkFrequency,
-				&table->MemoryACPILevel.MinVoltage, &mvdd);
-		PP_ASSERT_WITH_CODE((0 == result),
-				"Cannot find ACPI VDDCI voltage value "
-				"in Clock Dependency Table",
-				);
-	} else {
-		table->MemoryACPILevel.MclkFrequency =
-				data->vbios_boot_state.mclk_bootup_value;
-		table->MemoryACPILevel.MinVoltage =
-				data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE;
-	}
+
+	/* Get MinVoltage and Frequency from DPM0, already converted to SMC_UL */
+	table->MemoryACPILevel.MclkFrequency = data->vbios_boot_state.mclk_bootup_value;
+	result = polaris10_get_dependency_volt_by_clk(hwmgr,
+			table_info->vdd_dep_on_mclk,
+			table->MemoryACPILevel.MclkFrequency,
+			&table->MemoryACPILevel.MinVoltage, &mvdd);
+	PP_ASSERT_WITH_CODE((0 == result),
+			"Cannot find ACPI VDDCI voltage value "
+			"in Clock Dependency Table",
+			);
 
 	us_mvdd = 0;
 	if ((POLARIS10_VOLTAGE_CONTROL_NONE == data->mvdd_control) ||
@@ -1526,6 +1562,7 @@ static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr,
 	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
 			table_info->mm_dep_table;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
 
 	table->VceLevelCount = (uint8_t)(mm_table->count);
 	table->VceBootLevel = 0;
@@ -1535,9 +1572,18 @@ static int polaris10_populate_smc_vce_level(struct pp_hwmgr *hwmgr,
 		table->VceLevel[count].MinVoltage = 0;
 		table->VceLevel[count].MinVoltage |=
 				(mm_table->entries[count].vddc * VOLTAGE_SCALE) << VDDC_SHIFT;
+
+		if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+
 		table->VceLevel[count].MinVoltage |=
-				((mm_table->entries[count].vddc - data->vddc_vddci_delta) *
-						VOLTAGE_SCALE) << VDDCI_SHIFT;
+				(vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 		table->VceLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
 
 		/*retrieve divider value for VBIOS */
@@ -1566,6 +1612,7 @@ static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr,
 	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
 			table_info->mm_dep_table;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
 
 	table->SamuBootLevel = 0;
 	table->SamuLevelCount = (uint8_t)(mm_table->count);
@@ -1576,8 +1623,16 @@ static int polaris10_populate_smc_samu_level(struct pp_hwmgr *hwmgr,
 		table->SamuLevel[count].Frequency = mm_table->entries[count].samclock;
 		table->SamuLevel[count].MinVoltage |= (mm_table->entries[count].vddc *
 				VOLTAGE_SCALE) << VDDC_SHIFT;
-		table->SamuLevel[count].MinVoltage |= ((mm_table->entries[count].vddc -
-				data->vddc_vddci_delta) * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		table->SamuLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 		table->SamuLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
 
 		/* retrieve divider value for VBIOS */
@@ -1660,6 +1715,7 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
 	struct phm_ppt_v1_mm_clock_voltage_dependency_table *mm_table =
 			table_info->mm_dep_table;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t vddci;
 
 	table->UvdLevelCount = (uint8_t)(mm_table->count);
 	table->UvdBootLevel = 0;
@@ -1670,8 +1726,16 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
 		table->UvdLevel[count].DclkFrequency = mm_table->entries[count].dclk;
 		table->UvdLevel[count].MinVoltage |= (mm_table->entries[count].vddc *
 				VOLTAGE_SCALE) << VDDC_SHIFT;
-		table->UvdLevel[count].MinVoltage |= ((mm_table->entries[count].vddc -
-				data->vddc_vddci_delta) * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->vddci_control)
+			vddci = (uint32_t)phm_find_closest_vddci(&(data->vddci_voltage_table),
+						mm_table->entries[count].vddc - VDDC_VDDCI_DELTA);
+		else if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->vddci_control)
+			vddci = mm_table->entries[count].vddc - VDDC_VDDCI_DELTA;
+		else
+			vddci = (data->vbios_boot_state.vddci_bootup_value * VOLTAGE_SCALE) << VDDCI_SHIFT;
+
+		table->UvdLevel[count].MinVoltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 		table->UvdLevel[count].MinVoltage |= 1 << PHASES_SHIFT;
 
 		/* retrieve divider value for VBIOS */
@@ -1692,8 +1756,8 @@ static int polaris10_populate_smc_uvd_level(struct pp_hwmgr *hwmgr,
 		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].VclkFrequency);
 		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].DclkFrequency);
 		CONVERT_FROM_HOST_TO_SMC_UL(table->UvdLevel[count].MinVoltage);
-
 	}
+
 	return result;
 }
 
@@ -1761,12 +1825,9 @@ static int polaris10_populate_smc_initailial_state(struct pp_hwmgr *hwmgr)
 
 static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 {
-	uint32_t ro, efuse, efuse2, clock_freq, volt_without_cks,
-			volt_with_cks, value;
-	uint16_t clock_freq_u16;
+	uint32_t ro, efuse, volt_without_cks, volt_with_cks, value, max, min;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-	uint8_t type, i, j, cks_setting, stretch_amount, stretch_amount2,
-			volt_offset = 0;
+	uint8_t i, stretch_amount, volt_offset = 0;
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 	struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
@@ -1778,56 +1839,47 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 	 * if the part is SS or FF. if RO >= 1660MHz, part is FF.
 	 */
 	efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixSMU_EFUSE_0 + (146 * 4));
-	efuse2 = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixSMU_EFUSE_0 + (148 * 4));
+			ixSMU_EFUSE_0 + (67 * 4));
 	efuse &= 0xFF000000;
 	efuse = efuse >> 24;
-	efuse2 &= 0xF;
 
-	if (efuse2 == 1)
-		ro = (2300 - 1350) * efuse / 255 + 1350;
-	else
-		ro = (2500 - 1000) * efuse / 255 + 1000;
-
-	if (ro >= 1660)
-		type = 0;
-	else
-		type = 1;
+	if (hwmgr->chip_id == CHIP_POLARIS10) {
+		min = 1000;
+		max = 2300;
+	} else {
+		min = 1100;
+		max = 2100;
+	}
 
-	/* Populate Stretch amount */
-	data->smc_state_table.ClockStretcherAmount = stretch_amount;
+	ro = efuse * (max -min)/255 + min;
 
 	/* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */
 	for (i = 0; i < sclk_table->count; i++) {
 		data->smc_state_table.Sclk_CKS_masterEn0_7 |=
 				sclk_table->entries[i].cks_enable << i;
-		volt_without_cks = (uint32_t)((14041 *
-			(sclk_table->entries[i].clk/100) / 10000 + 3571 + 75 - ro) * 1000 /
-			(4026 - (13924 * (sclk_table->entries[i].clk/100) / 10000)));
-		volt_with_cks = (uint32_t)((13946 *
-			(sclk_table->entries[i].clk/100) / 10000 + 3320 + 45 - ro) * 1000 /
-			(3664 - (11454 * (sclk_table->entries[i].clk/100) / 10000)));
+		if (hwmgr->chip_id == CHIP_POLARIS10) {
+			volt_without_cks = (uint32_t)((2753594000U + (sclk_table->entries[i].clk/100) * 136418 -(ro - 70) * 1000000) / \
+						(2424180 - (sclk_table->entries[i].clk/100) * 1132925/1000));
+			volt_with_cks = (uint32_t)((2797202000U + sclk_table->entries[i].clk/100 * 3232 - (ro - 65) * 1000000) / \
+					(2522480 - sclk_table->entries[i].clk/100 * 115764/100));
+		} else {
+			volt_without_cks = (uint32_t)((2416794800U + (sclk_table->entries[i].clk/100) * 1476925/10 -(ro - 50) * 1000000) / \
+						(2625416 - (sclk_table->entries[i].clk/100) * (12586807/10000)));
+			volt_with_cks = (uint32_t)((2999656000U - sclk_table->entries[i].clk/100 * 392803 - (ro - 44) * 1000000) / \
+					(3422454 - sclk_table->entries[i].clk/100 * (18886376/10000)));
+		}
+
 		if (volt_without_cks >= volt_with_cks)
 			volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks +
-					sclk_table->entries[i].cks_voffset) * 100 / 625) + 1);
+					sclk_table->entries[i].cks_voffset) * 100 + 624) / 625);
+
 		data->smc_state_table.Sclk_voltageOffset[i] = volt_offset;
 	}
 
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			STRETCH_ENABLE, 0x0);
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			masterReset, 0x1);
-	/* PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, staticEnable, 0x1); */
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			masterReset, 0x0);
-
+	data->smc_state_table.LdoRefSel = (table_info->cac_dtp_table->ucCKS_LDO_REFSEL != 0) ? table_info->cac_dtp_table->ucCKS_LDO_REFSEL : 6;
 	/* Populate CKS Lookup Table */
-	if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5)
-		stretch_amount2 = 0;
-	else if (stretch_amount == 3 || stretch_amount == 4)
-		stretch_amount2 = 1;
-	else {
+	if (stretch_amount != 1 && stretch_amount != 2 && stretch_amount != 3 &&
+			stretch_amount != 4 && stretch_amount != 5) {
 		phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
 				PHM_PlatformCaps_ClockStretcher);
 		PP_ASSERT_WITH_CODE(false,
@@ -1835,69 +1887,6 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 				return -EINVAL);
 	}
 
-	value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixPWR_CKS_CNTL);
-	value &= 0xFFC2FF87;
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][0];
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][1];
-	clock_freq_u16 = (uint16_t)(PP_SMC_TO_HOST_UL(data->smc_state_table.
-			GraphicsLevel[data->smc_state_table.GraphicsDpmLevelCount - 1].SclkSetting.SclkFrequency) / 100);
-	if (polaris10_clock_stretcher_lookup_table[stretch_amount2][0] < clock_freq_u16
-	&& polaris10_clock_stretcher_lookup_table[stretch_amount2][1] > clock_freq_u16) {
-		/* Program PWR_CKS_CNTL. CKS_USE_FOR_LOW_FREQ */
-		value |= (polaris10_clock_stretcher_lookup_table[stretch_amount2][3]) << 16;
-		/* Program PWR_CKS_CNTL. CKS_LDO_REFSEL */
-		value |= (polaris10_clock_stretcher_lookup_table[stretch_amount2][2]) << 18;
-		/* Program PWR_CKS_CNTL. CKS_STRETCH_AMOUNT */
-		value |= (polaris10_clock_stretch_amount_conversion
-				[polaris10_clock_stretcher_lookup_table[stretch_amount2][3]]
-				 [stretch_amount]) << 3;
-	}
-	CONVERT_FROM_HOST_TO_SMC_US(data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq);
-	CONVERT_FROM_HOST_TO_SMC_US(data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq);
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][2] & 0x7F;
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting |=
-			(polaris10_clock_stretcher_lookup_table[stretch_amount2][3]) << 7;
-
-	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixPWR_CKS_CNTL, value);
-
-	/* Populate DDT Lookup Table */
-	for (i = 0; i < 4; i++) {
-		/* Assign the minimum and maximum VID stored
-		 * in the last row of Clock Stretcher Voltage Table.
-		 */
-		data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].minVID =
-				(uint8_t) polaris10_clock_stretcher_ddt_table[type][i][2];
-		data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].maxVID =
-				(uint8_t) polaris10_clock_stretcher_ddt_table[type][i][3];
-		/* Loop through each SCLK and check the frequency
-		 * to see if it lies within the frequency for clock stretcher.
-		 */
-		for (j = 0; j < data->smc_state_table.GraphicsDpmLevelCount; j++) {
-			cks_setting = 0;
-			clock_freq = PP_SMC_TO_HOST_UL(
-					data->smc_state_table.GraphicsLevel[j].SclkSetting.SclkFrequency);
-			/* Check the allowed frequency against the sclk level[j].
-			 *  Sclk's endianness has already been converted,
-			 *  and it's in 10Khz unit,
-			 *  as opposed to Data table, which is in Mhz unit.
-			 */
-			if (clock_freq >= (polaris10_clock_stretcher_ddt_table[type][i][0]) * 100) {
-				cks_setting |= 0x2;
-				if (clock_freq < (polaris10_clock_stretcher_ddt_table[type][i][1]) * 100)
-					cks_setting |= 0x1;
-			}
-			data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].setting
-							|= cks_setting << (j * 2);
-		}
-		CONVERT_FROM_HOST_TO_SMC_US(
-			data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].setting);
-	}
-
 	value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL);
 	value &= 0xFFFFFFFE;
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL, value);
@@ -1945,9 +1934,8 @@ static int polaris10_populate_vr_config(struct pp_hwmgr *hwmgr,
 	if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->mvdd_control) {
 		config = VR_SVI2_PLANE_2;
 		table->VRConfig |= (config << VRCONF_MVDD_SHIFT);
-	} else if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) {
-		config = VR_SMIO_PATTERN_2;
-		table->VRConfig |= (config << VRCONF_MVDD_SHIFT);
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, data->soft_regs_start +
+			offsetof(SMU74_SoftRegisters, AllowMvddSwitch), 0x1);
 	} else {
 		config = VR_STATIC_VOLTAGE;
 		table->VRConfig |= (config << VRCONF_MVDD_SHIFT);
@@ -1956,6 +1944,90 @@ static int polaris10_populate_vr_config(struct pp_hwmgr *hwmgr,
 	return 0;
 }
 
+
+int polaris10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	SMU74_Discrete_DpmTable  *table = &(data->smc_state_table);
+	int result = 0;
+	struct pp_atom_ctrl__avfs_parameters avfs_params = {0};
+	AVFS_meanNsigma_t AVFS_meanNsigma = { {0} };
+	AVFS_Sclk_Offset_t AVFS_SclkOffset = { {0} };
+	uint32_t tmp, i;
+	struct pp_smumgr *smumgr = hwmgr->smumgr;
+	struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend);
+
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)hwmgr->pptable;
+	struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
+			table_info->vdd_dep_on_sclk;
+
+
+	if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED)
+		return result;
+
+	result = atomctrl_get_avfs_information(hwmgr, &avfs_params);
+
+	if (0 == result) {
+		table->BTCGB_VDROOP_TABLE[0].a0  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a0);
+		table->BTCGB_VDROOP_TABLE[0].a1  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a1);
+		table->BTCGB_VDROOP_TABLE[0].a2  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a2);
+		table->BTCGB_VDROOP_TABLE[1].a0  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0);
+		table->BTCGB_VDROOP_TABLE[1].a1  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1);
+		table->BTCGB_VDROOP_TABLE[1].a2  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2);
+		table->AVFSGB_VDROOP_TABLE[0].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_m1);
+		table->AVFSGB_VDROOP_TABLE[0].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSON_m2);
+		table->AVFSGB_VDROOP_TABLE[0].b  = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_b);
+		table->AVFSGB_VDROOP_TABLE[0].m1_shift = 24;
+		table->AVFSGB_VDROOP_TABLE[0].m2_shift  = 12;
+		table->AVFSGB_VDROOP_TABLE[1].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1);
+		table->AVFSGB_VDROOP_TABLE[1].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2);
+		table->AVFSGB_VDROOP_TABLE[1].b  = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b);
+		table->AVFSGB_VDROOP_TABLE[1].m1_shift = 24;
+		table->AVFSGB_VDROOP_TABLE[1].m2_shift  = 12;
+		table->MaxVoltage                = PP_HOST_TO_SMC_US(avfs_params.usMaxVoltage_0_25mv);
+		AVFS_meanNsigma.Aconstant[0]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant0);
+		AVFS_meanNsigma.Aconstant[1]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant1);
+		AVFS_meanNsigma.Aconstant[2]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant2);
+		AVFS_meanNsigma.DC_tol_sigma      = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_DC_tol_sigma);
+		AVFS_meanNsigma.Platform_mean     = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_mean);
+		AVFS_meanNsigma.PSM_Age_CompFactor = PP_HOST_TO_SMC_US(avfs_params.usPSM_Age_ComFactor);
+		AVFS_meanNsigma.Platform_sigma     = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_sigma);
+
+		for (i = 0; i < NUM_VFT_COLUMNS; i++) {
+			AVFS_meanNsigma.Static_Voltage_Offset[i] = (uint8_t)(sclk_table->entries[i].cks_voffset * 100 / 625);
+			AVFS_SclkOffset.Sclk_Offset[i] = PP_HOST_TO_SMC_US((uint16_t)(sclk_table->entries[i].sclk_offset) / 100);
+		}
+
+		result = polaris10_read_smc_sram_dword(smumgr,
+				SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsMeanNSigma),
+				&tmp, data->sram_end);
+
+		polaris10_copy_bytes_to_smc(smumgr,
+					tmp,
+					(uint8_t *)&AVFS_meanNsigma,
+					sizeof(AVFS_meanNsigma_t),
+					data->sram_end);
+
+		result = polaris10_read_smc_sram_dword(smumgr,
+				SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsSclkOffsetTable),
+				&tmp, data->sram_end);
+		polaris10_copy_bytes_to_smc(smumgr,
+					tmp,
+					(uint8_t *)&AVFS_SclkOffset,
+					sizeof(AVFS_Sclk_Offset_t),
+					data->sram_end);
+
+		data->avfs_vdroop_override_setting = (avfs_params.ucEnableGB_VDROOP_TABLE_CKSON << BTCGB0_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_VDROOP_TABLE_CKSOFF << BTCGB1_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_FUSE_TABLE_CKSON << AVFSGB0_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_FUSE_TABLE_CKSOFF << AVFSGB1_Vdroop_Enable_SHIFT);
+		data->apply_avfs_cks_off_voltage = (avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage == 1) ? true : false;
+	}
+	return result;
+}
+
+
 /**
 * Initializes the SMC table and uploads it
 *
@@ -2056,6 +2128,10 @@ static int polaris10_init_smc_table(struct pp_hwmgr *hwmgr)
 				"Failed to populate Clock Stretcher Data Table!",
 				return result);
 	}
+
+	result = polaris10_populate_avfs_parameters(hwmgr);
+	PP_ASSERT_WITH_CODE(0 == result, "Failed to populate AVFS Parameters!", return result;);
+
 	table->CurrSclkPllRange = 0xff;
 	table->GraphicsVoltageChangeEnable  = 1;
 	table->GraphicsThermThrottleEnable  = 1;
@@ -2229,6 +2305,17 @@ static int polaris10_enable_ulv(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int polaris10_disable_ulv(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_ulv_parm *ulv = &(data->ulv);
+
+	if (ulv->ulv_supported)
+		return smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_DisableULV);
+
+	return 0;
+}
+
 static int polaris10_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 {
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
@@ -2249,9 +2336,27 @@ static int polaris10_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int polaris10_disable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
+{
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_SclkDeepSleep)) {
+		if (smum_send_msg_to_smc(hwmgr->smumgr,
+				PPSMC_MSG_MASTER_DeepSleep_OFF)) {
+			PP_ASSERT_WITH_CODE(false,
+					"Attempt to disable Master Deep Sleep switch failed!",
+					return -1);
+		}
+	}
+
+	return 0;
+}
+
 static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t soft_register_value = 0;
+	uint32_t handshake_disables_offset = data->soft_regs_start
+				+ offsetof(SMU74_SoftRegisters, HandshakeDisables);
 
 	/* enable SCLK dpm */
 	if (!data->sclk_dpm_key_disabled)
@@ -2262,6 +2367,12 @@ static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 
 	/* enable MCLK dpm */
 	if (0 == data->mclk_dpm_key_disabled) {
+/* Disable UVD - SMU handshake for MCLK. */
+		soft_register_value = cgs_read_ind_register(hwmgr->device,
+					CGS_IND_REG__SMC, handshake_disables_offset);
+		soft_register_value |= SMU7_UVD_MCLK_HANDSHAKE_DISABLE;
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+				handshake_disables_offset, soft_register_value);
 
 		PP_ASSERT_WITH_CODE(
 				(0 == smum_send_msg_to_smc(hwmgr->smumgr,
@@ -2269,7 +2380,6 @@ static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 				"Failed to enable MCLK DPM during DPM Start Function!",
 				return -1);
 
-
 		PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1);
 
 		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x5);
@@ -2338,6 +2448,58 @@ static int polaris10_start_dpm(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int polaris10_disable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+
+	/* disable SCLK dpm */
+	if (!data->sclk_dpm_key_disabled)
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_DPM_Disable) == 0),
+				"Failed to disable SCLK DPM!",
+				return -1);
+
+	/* disable MCLK dpm */
+	if (!data->mclk_dpm_key_disabled) {
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_MCLKDPM_Disable) == 0),
+				"Failed to disable MCLK DPM!",
+				return -1);
+	}
+
+	return 0;
+}
+
+static int polaris10_stop_dpm(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+
+	/* disable general power management */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, GENERAL_PWRMGT,
+			GLOBAL_PWRMGT_EN, 0);
+	/* disable sclk deep sleep */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SCLK_PWRMGT_CNTL,
+			DYNAMIC_PM_EN, 0);
+
+	/* disable PCIE dpm */
+	if (!data->pcie_dpm_key_disabled) {
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_PCIeDPM_Disable) == 0),
+				"Failed to disable pcie DPM during DPM Stop Function!",
+				return -1);
+	}
+
+	if (polaris10_disable_sclk_mclk_dpm(hwmgr)) {
+		printk(KERN_ERR "Failed to disable Sclk DPM and Mclk DPM!");
+		return -1;
+	}
+
+	return 0;
+}
+
 static void polaris10_set_dpm_event_sources(struct pp_hwmgr *hwmgr, uint32_t sources)
 {
 	bool protection;
@@ -2395,6 +2557,23 @@ static int polaris10_enable_thermal_auto_throttle(struct pp_hwmgr *hwmgr)
 	return polaris10_enable_auto_throttle_source(hwmgr, PHM_AutoThrottleSource_Thermal);
 }
 
+static int polaris10_disable_auto_throttle_source(struct pp_hwmgr *hwmgr,
+		PHM_AutoThrottleSource source)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+
+	if (data->active_auto_throttle_sources & (1 << source)) {
+		data->active_auto_throttle_sources &= ~(1 << source);
+		polaris10_set_dpm_event_sources(hwmgr, data->active_auto_throttle_sources);
+	}
+	return 0;
+}
+
+static int polaris10_disable_thermal_auto_throttle(struct pp_hwmgr *hwmgr)
+{
+	return polaris10_disable_auto_throttle_source(hwmgr, PHM_AutoThrottleSource_Thermal);
+}
+
 int polaris10_pcie_performance_request(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
@@ -2471,6 +2650,8 @@ int polaris10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to enable VR hot GPIO interrupt!", result = tmp_result);
 
+	smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)PPSMC_HasDisplay);
+
 	tmp_result = polaris10_enable_sclk_control(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to enable SCLK control!", result = tmp_result);
@@ -2487,6 +2668,10 @@ int polaris10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to enable deep sleep master switch!", result = tmp_result);
 
+	tmp_result = polaris10_enable_didt_config(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to enable deep sleep master switch!", result = tmp_result);
+
 	tmp_result = polaris10_start_dpm(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to start DPM!", result = tmp_result);
@@ -2516,8 +2701,60 @@ int polaris10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 
 int polaris10_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
 {
+	int tmp_result, result = 0;
 
-	return 0;
+	tmp_result = (polaris10_is_dpm_running(hwmgr)) ? 0 : -1;
+	PP_ASSERT_WITH_CODE(tmp_result == 0,
+			"DPM is not running right now, no need to disable DPM!",
+			return 0);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_ThermalController))
+		PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+				GENERAL_PWRMGT, THERMAL_PROTECTION_DIS, 1);
+
+	tmp_result = polaris10_disable_power_containment(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable power containment!", result = tmp_result);
+
+	tmp_result = polaris10_disable_smc_cac(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable SMC CAC!", result = tmp_result);
+
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			CG_SPLL_SPREAD_SPECTRUM, SSEN, 0);
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			GENERAL_PWRMGT, DYN_SPREAD_SPECTRUM_EN, 0);
+
+	tmp_result = polaris10_disable_thermal_auto_throttle(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable thermal auto throttle!", result = tmp_result);
+
+	tmp_result = polaris10_stop_dpm(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to stop DPM!", result = tmp_result);
+
+	tmp_result = polaris10_disable_deep_sleep_master_switch(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable deep sleep master switch!", result = tmp_result);
+
+	tmp_result = polaris10_disable_ulv(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable ULV!", result = tmp_result);
+
+	tmp_result = polaris10_clear_voting_clients(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to clear voting clients!", result = tmp_result);
+
+	tmp_result = polaris10_reset_to_default(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to reset to default!", result = tmp_result);
+
+	tmp_result = polaris10_force_switch_to_arbf0(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to force to switch arbf0!", result = tmp_result);
+
+	return result;
 }
 
 int polaris10_reset_asic_tasks(struct pp_hwmgr *hwmgr)
@@ -2528,13 +2765,6 @@ int polaris10_reset_asic_tasks(struct pp_hwmgr *hwmgr)
 
 int polaris10_hwmgr_backend_fini(struct pp_hwmgr *hwmgr)
 {
-	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-
-	if (data->soft_pp_table) {
-		kfree(data->soft_pp_table);
-		data->soft_pp_table = NULL;
-	}
-
 	return phm_hwmgr_backend_fini(hwmgr);
 }
 
@@ -2581,17 +2811,22 @@ int polaris10_set_features_platform_caps(struct pp_hwmgr *hwmgr)
 					PHM_PlatformCaps_DynamicUVDState);
 
 	/* power tune caps Assume disabled */
-	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 						PHM_PlatformCaps_SQRamping);
-	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 						PHM_PlatformCaps_DBRamping);
-	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 						PHM_PlatformCaps_TDRamping);
-	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 						PHM_PlatformCaps_TCPRamping);
 
-	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
-					PHM_PlatformCaps_PowerContainment);
+	if (hwmgr->powercontainment_enabled)
+		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+			    PHM_PlatformCaps_PowerContainment);
+	else
+		phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+			    PHM_PlatformCaps_PowerContainment);
+
 	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 							PHM_PlatformCaps_CAC);
 
@@ -2606,6 +2841,7 @@ int polaris10_set_features_platform_caps(struct pp_hwmgr *hwmgr)
 
 	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 						PHM_PlatformCaps_FanSpeedInTableIsRPM);
+
 	if (hwmgr->chip_id == CHIP_POLARIS11)
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 					PHM_PlatformCaps_SPLLShutdownSupport);
@@ -2638,7 +2874,7 @@ static int polaris10_get_evv_voltages(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
 	uint16_t vv_id;
-	uint16_t vddc = 0;
+	uint32_t vddc = 0;
 	uint16_t i, j;
 	uint32_t sclk = 0;
 	struct phm_ppt_v1_information *table_info =
@@ -2662,15 +2898,16 @@ static int polaris10_get_evv_voltages(struct pp_hwmgr *hwmgr)
 				}
 			}
 
+			if (atomctrl_get_voltage_evv_on_sclk_ai(hwmgr,
+						VOLTAGE_TYPE_VDDC,
+						sclk, vv_id, &vddc) != 0) {
+				printk(KERN_WARNING "failed to retrieving EVV voltage!\n");
+				continue;
+			}
 
-			PP_ASSERT_WITH_CODE(0 == atomctrl_get_voltage_evv_on_sclk_ai(hwmgr,
-							VOLTAGE_TYPE_VDDC, sclk, vv_id, &vddc),
-						"Error retrieving EVV voltage value!",
-						continue);
-
-
-			/* need to make sure vddc is less than 2v or else, it could burn the ASIC. */
-			PP_ASSERT_WITH_CODE((vddc < 2000 && vddc != 0),
+			/* need to make sure vddc is less than 2v or else, it could burn the ASIC.
+			 * real voltage level in unit of 0.01mv */
+			PP_ASSERT_WITH_CODE((vddc < 200000 && vddc != 0),
 					"Invalid VDDC value", result = -EINVAL;);
 
 			/* the voltage should not be zero nor equal to leakage ID */
@@ -2896,15 +3133,46 @@ static int polaris10_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+int polaris10_patch_voltage_workaround(struct pp_hwmgr *hwmgr)
+{
+	struct phm_ppt_v1_information *table_info =
+		       (struct phm_ppt_v1_information *)(hwmgr->pptable);
+	struct phm_ppt_v1_clock_voltage_dependency_table *dep_mclk_table =
+			table_info->vdd_dep_on_mclk;
+	struct phm_ppt_v1_voltage_lookup_table *lookup_table =
+			table_info->vddc_lookup_table;
+	uint32_t i;
+
+	if (hwmgr->chip_id == CHIP_POLARIS10 && hwmgr->hw_revision == 0xC7) {
+		if (lookup_table->entries[dep_mclk_table->entries[dep_mclk_table->count-1].vddInd].us_vdd >= 1000)
+			return 0;
+
+		for (i = 0; i < lookup_table->count; i++) {
+			if (lookup_table->entries[i].us_vdd < 0xff01 && lookup_table->entries[i].us_vdd >= 1000) {
+				dep_mclk_table->entries[dep_mclk_table->count-1].vddInd = (uint8_t) i;
+				return 0;
+			}
+		}
+	}
+	return 0;
+}
+
+
 int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 {
-	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_hwmgr *data;
 	struct pp_atomctrl_gpio_pin_assignment gpio_pin_assignment;
 	uint32_t temp_reg;
 	int result;
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 
+	data = kzalloc(sizeof(struct polaris10_hwmgr), GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+
+	hwmgr->backend = data;
+
 	data->dll_default_on = false;
 	data->sram_end = SMC_RAM_END;
 	data->mclk_dpm0_activity_target = 0xa;
@@ -2938,6 +3206,11 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vddci_control = POLARIS10_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = POLARIS10_VOLTAGE_CONTROL_NONE;
 
+	data->enable_tdc_limit_feature = true;
+	data->enable_pkg_pwr_tracking_feature = true;
+	data->force_pcie_gen = PP_PCIEGenInvalid;
+	data->mclk_stutter_mode_threshold = 40000;
+
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 			VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2))
 		data->voltage_control = POLARIS10_VOLTAGE_CONTROL_BY_SVID2;
@@ -2962,8 +3235,13 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 			data->vddci_control = POLARIS10_VOLTAGE_CONTROL_BY_SVID2;
 	}
 
+	if (table_info->cac_dtp_table->usClockStretchAmount != 0)
+		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+					PHM_PlatformCaps_ClockStretcher);
+
 	polaris10_set_features_platform_caps(hwmgr);
 
+	polaris10_patch_voltage_workaround(hwmgr);
 	polaris10_init_dpm_defaults(hwmgr);
 
 	/* Get leakage voltage based on leakage ID. */
@@ -2983,7 +3261,7 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	if (0 == result) {
 		struct cgs_system_info sys_info = {0};
 
-		data->is_tlu_enabled = 0;
+		data->is_tlu_enabled = false;
 
 		hwmgr->platform_descriptor.hardwareActivityPerformanceLevels =
 							POLARIS10_MAX_HARDWARE_POWERLEVELS;
@@ -3068,7 +3346,7 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_GEN_INFO;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_gen_cap = 0x30007;
+			data->pcie_gen_cap = AMDGPU_DEFAULT_PCIE_GEN_MASK;
 		else
 			data->pcie_gen_cap = (uint32_t)sys_info.value;
 		if (data->pcie_gen_cap & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
@@ -3077,7 +3355,7 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_MLW;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_lane_cap = 0x2f0000;
+			data->pcie_lane_cap = AMDGPU_DEFAULT_PCIE_MLW_MASK;
 		else
 			data->pcie_lane_cap = (uint32_t)sys_info.value;
 
@@ -3366,6 +3644,7 @@ static int polaris10_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 				    hwmgr->platform_descriptor.platformCaps,
 				    PHM_PlatformCaps_DisableMclkSwitchingForFrameLock);
 
+
 	disable_mclk_switching = (1 < info.display_count) ||
 				    disable_mclk_switching_for_frame_lock;
 
@@ -3520,10 +3799,11 @@ static int polaris10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
 	ATOM_Tonga_State *state_entry = (ATOM_Tonga_State *)state;
 	ATOM_Tonga_POWERPLAYTABLE *powerplay_table =
 			(ATOM_Tonga_POWERPLAYTABLE *)pp_table;
-	ATOM_Tonga_SCLK_Dependency_Table *sclk_dep_table =
-			(ATOM_Tonga_SCLK_Dependency_Table *)
+	PPTable_Generic_SubTable_Header *sclk_dep_table =
+			(PPTable_Generic_SubTable_Header *)
 			(((unsigned long)powerplay_table) +
 				le16_to_cpu(powerplay_table->usSclkDependencyTableOffset));
+
 	ATOM_Tonga_MCLK_Dependency_Table *mclk_dep_table =
 			(ATOM_Tonga_MCLK_Dependency_Table *)
 			(((unsigned long)powerplay_table) +
@@ -3575,7 +3855,11 @@ static int polaris10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
 	/* Performance levels are arranged from low to high. */
 	performance_level->memory_clock = mclk_dep_table->entries
 			[state_entry->ucMemoryClockIndexLow].ulMclk;
-	performance_level->engine_clock = sclk_dep_table->entries
+	if (sclk_dep_table->ucRevId == 0)
+		performance_level->engine_clock = ((ATOM_Tonga_SCLK_Dependency_Table *)sclk_dep_table)->entries
+			[state_entry->ucEngineClockIndexLow].ulSclk;
+	else if (sclk_dep_table->ucRevId == 1)
+		performance_level->engine_clock = ((ATOM_Polaris_SCLK_Dependency_Table *)sclk_dep_table)->entries
 			[state_entry->ucEngineClockIndexLow].ulSclk;
 	performance_level->pcie_gen = get_pcie_gen_support(data->pcie_gen_cap,
 			state_entry->ucPCIEGenLow);
@@ -3586,8 +3870,14 @@ static int polaris10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
 			[polaris10_power_state->performance_level_count++]);
 	performance_level->memory_clock = mclk_dep_table->entries
 			[state_entry->ucMemoryClockIndexHigh].ulMclk;
-	performance_level->engine_clock = sclk_dep_table->entries
+
+	if (sclk_dep_table->ucRevId == 0)
+		performance_level->engine_clock = ((ATOM_Tonga_SCLK_Dependency_Table *)sclk_dep_table)->entries
 			[state_entry->ucEngineClockIndexHigh].ulSclk;
+	else if (sclk_dep_table->ucRevId == 1)
+		performance_level->engine_clock = ((ATOM_Polaris_SCLK_Dependency_Table *)sclk_dep_table)->entries
+			[state_entry->ucEngineClockIndexHigh].ulSclk;
+
 	performance_level->pcie_gen = get_pcie_gen_support(data->pcie_gen_cap,
 			state_entry->ucPCIEGenHigh);
 	performance_level->pcie_lane = get_pcie_lane_support(data->pcie_lane_cap,
@@ -3645,7 +3935,6 @@ static int polaris10_get_pp_table_entry(struct pp_hwmgr *hwmgr,
 		switch (state->classification.ui_label) {
 		case PP_StateUILabel_Performance:
 			data->use_pcie_performance_levels = true;
-
 			for (i = 0; i < ps->performance_level_count; i++) {
 				if (data->pcie_gen_performance.max <
 						ps->performance_levels[i].pcie_gen)
@@ -3661,7 +3950,6 @@ static int polaris10_get_pp_table_entry(struct pp_hwmgr *hwmgr,
 						ps->performance_levels[i].pcie_lane)
 					data->pcie_lane_performance.max =
 							ps->performance_levels[i].pcie_lane;
-
 				if (data->pcie_lane_performance.min >
 						ps->performance_levels[i].pcie_lane)
 					data->pcie_lane_performance.min =
@@ -3861,8 +4149,8 @@ static int polaris10_freeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->sclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table &
 			(DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK))) {
-		PP_ASSERT_WITH_CODE(true == polaris10_is_dpm_running(hwmgr),
-				"Trying to freeze SCLK DPM when DPM is disabled",
+		PP_ASSERT_WITH_CODE(polaris10_is_dpm_running(hwmgr),
+				    "Trying to freeze SCLK DPM when DPM is disabled",
 				);
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr->smumgr,
 				PPSMC_MSG_SCLKDPM_FreezeLevel),
@@ -3873,8 +4161,8 @@ static int polaris10_freeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->mclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table &
 		 DPMTABLE_OD_UPDATE_MCLK)) {
-		PP_ASSERT_WITH_CODE(true == polaris10_is_dpm_running(hwmgr),
-				"Trying to freeze MCLK DPM when DPM is disabled",
+		PP_ASSERT_WITH_CODE(polaris10_is_dpm_running(hwmgr),
+				    "Trying to freeze MCLK DPM when DPM is disabled",
 				);
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr->smumgr,
 				PPSMC_MSG_MCLKDPM_FreezeLevel),
@@ -4034,7 +4322,6 @@ static int polaris10_trim_single_dpm_states(struct pp_hwmgr *hwmgr,
 static int polaris10_trim_dpm_states(struct pp_hwmgr *hwmgr,
 		const struct polaris10_power_state *polaris10_ps)
 {
-	int result = 0;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
 	uint32_t high_limit_count;
 
@@ -4054,7 +4341,7 @@ static int polaris10_trim_dpm_states(struct pp_hwmgr *hwmgr,
 			polaris10_ps->performance_levels[0].memory_clock,
 			polaris10_ps->performance_levels[high_limit_count].memory_clock);
 
-	return result;
+	return 0;
 }
 
 static int polaris10_generate_dpm_level_enable_mask(
@@ -4137,25 +4424,20 @@ int polaris10_update_uvd_dpm(struct pp_hwmgr *hwmgr, bool bgate)
 	return polaris10_enable_disable_uvd_dpm(hwmgr, !bgate);
 }
 
-static int polaris10_update_vce_dpm(struct pp_hwmgr *hwmgr, const void *input)
+int polaris10_update_vce_dpm(struct pp_hwmgr *hwmgr, bool bgate)
 {
-	const struct phm_set_power_state_input *states =
-			(const struct phm_set_power_state_input *)input;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-	const struct polaris10_power_state *polaris10_nps =
-			cast_const_phw_polaris10_power_state(states->pnew_state);
-	const struct polaris10_power_state *polaris10_cps =
-			cast_const_phw_polaris10_power_state(states->pcurrent_state);
-
 	uint32_t mm_boot_level_offset, mm_boot_level_value;
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 
-	if (polaris10_nps->vce_clks.evclk > 0 &&
-	(polaris10_cps == NULL || polaris10_cps->vce_clks.evclk == 0)) {
-
-		data->smc_state_table.VceBootLevel =
+	if (!bgate) {
+		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+						PHM_PlatformCaps_StablePState))
+			data->smc_state_table.VceBootLevel =
 				(uint8_t) (table_info->mm_dep_table->count - 1);
+		else
+			data->smc_state_table.VceBootLevel = 0;
 
 		mm_boot_level_offset = data->dpm_table_start +
 				offsetof(SMU74_Discrete_DpmTable, VceBootLevel);
@@ -4168,18 +4450,14 @@ static int polaris10_update_vce_dpm(struct pp_hwmgr *hwmgr, const void *input)
 		cgs_write_ind_register(hwmgr->device,
 				CGS_IND_REG__SMC, mm_boot_level_offset, mm_boot_level_value);
 
-		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) {
+		if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState))
 			smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
 					PPSMC_MSG_VCEDPM_SetEnabledMask,
 					(uint32_t)1 << data->smc_state_table.VceBootLevel);
-
-			polaris10_enable_disable_vce_dpm(hwmgr, true);
-		} else if (polaris10_nps->vce_clks.evclk == 0 &&
-				polaris10_cps != NULL &&
-				polaris10_cps->vce_clks.evclk > 0)
-			polaris10_enable_disable_vce_dpm(hwmgr, false);
 	}
 
+	polaris10_enable_disable_vce_dpm(hwmgr, !bgate);
+
 	return 0;
 }
 
@@ -4187,12 +4465,9 @@ int polaris10_update_samu_dpm(struct pp_hwmgr *hwmgr, bool bgate)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
 	uint32_t mm_boot_level_offset, mm_boot_level_value;
-	struct phm_ppt_v1_information *table_info =
-			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 
 	if (!bgate) {
-		data->smc_state_table.SamuBootLevel =
-				(uint8_t) (table_info->mm_dep_table->count - 1);
+		data->smc_state_table.SamuBootLevel = 0;
 		mm_boot_level_offset = data->dpm_table_start +
 				offsetof(SMU74_Discrete_DpmTable, SamuBootLevel);
 		mm_boot_level_offset /= 4;
@@ -4267,8 +4542,8 @@ static int polaris10_unfreeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 		(data->need_update_smu7_dpm_table &
 		(DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK))) {
 
-		PP_ASSERT_WITH_CODE(true == polaris10_is_dpm_running(hwmgr),
-				"Trying to Unfreeze SCLK DPM when DPM is disabled",
+		PP_ASSERT_WITH_CODE(polaris10_is_dpm_running(hwmgr),
+				    "Trying to Unfreeze SCLK DPM when DPM is disabled",
 				);
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr->smumgr,
 				PPSMC_MSG_SCLKDPM_UnfreezeLevel),
@@ -4279,8 +4554,8 @@ static int polaris10_unfreeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->mclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) {
 
-		PP_ASSERT_WITH_CODE(true == polaris10_is_dpm_running(hwmgr),
-				"Trying to Unfreeze MCLK DPM when DPM is disabled",
+		PP_ASSERT_WITH_CODE(polaris10_is_dpm_running(hwmgr),
+				    "Trying to Unfreeze MCLK DPM when DPM is disabled",
 				);
 		PP_ASSERT_WITH_CODE(0 == smum_send_msg_to_smc(hwmgr->smumgr,
 				PPSMC_MSG_SCLKDPM_UnfreezeLevel),
@@ -4327,6 +4602,17 @@ static int polaris10_notify_link_speed_change_after_state_change(
 	return 0;
 }
 
+static int polaris10_notify_smc_display(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+
+	smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+		(PPSMC_Msg)PPSMC_MSG_SetVBITimeout, data->frame_time_x2);
+	return (smum_send_msg_to_smc(hwmgr->smumgr, (PPSMC_Msg)PPSMC_HasDisplay) == 0) ?  0 : -EINVAL;
+}
+
+
+
 static int polaris10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *input)
 {
 	int tmp_result, result = 0;
@@ -4360,11 +4646,6 @@ static int polaris10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *i
 			"Failed to generate DPM level enabled mask!",
 			result = tmp_result);
 
-	tmp_result = polaris10_update_vce_dpm(hwmgr, input);
-	PP_ASSERT_WITH_CODE((0 == tmp_result),
-			"Failed to update VCE DPM!",
-			result = tmp_result);
-
 	tmp_result = polaris10_update_sclk_threshold(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to update SCLK threshold!",
@@ -4375,6 +4656,11 @@ static int polaris10_set_power_state_tasks(struct pp_hwmgr *hwmgr, const void *i
 			"Failed to program memory timing parameters!",
 			result = tmp_result);
 
+	tmp_result = polaris10_notify_smc_display(hwmgr);
+	PP_ASSERT_WITH_CODE((0 == tmp_result),
+			"Failed to notify smc display settings!",
+			result = tmp_result);
+
 	tmp_result = polaris10_unfreeze_sclk_mclk_dpm(hwmgr);
 	PP_ASSERT_WITH_CODE((0 == tmp_result),
 			"Failed to unfreeze SCLK MCLK DPM!",
@@ -4409,6 +4695,7 @@ static int polaris10_set_max_fan_pwm_output(struct pp_hwmgr *hwmgr, uint16_t us_
 			PPSMC_MSG_SetFanPwmMax, us_max_fan_pwm);
 }
 
+
 int polaris10_notify_smc_display_change(struct pp_hwmgr *hwmgr, bool has_display)
 {
 	PPSMC_Msg msg = has_display ? (PPSMC_Msg)PPSMC_HasDisplay : (PPSMC_Msg)PPSMC_NoDisplay;
@@ -4428,8 +4715,7 @@ int polaris10_notify_smc_display_config_after_ps_adjustment(struct pp_hwmgr *hwm
 
 	if (num_active_displays > 1)  /* to do && (pHwMgr->pPECI->displayConfiguration.bMultiMonitorInSync != TRUE)) */
 		polaris10_notify_smc_display_change(hwmgr, false);
-	else
-		polaris10_notify_smc_display_change(hwmgr, true);
+
 
 	return 0;
 }
@@ -4470,6 +4756,8 @@ int polaris10_program_display_gap(struct pp_hwmgr *hwmgr)
 	frame_time_in_us = 1000000 / refresh_rate;
 
 	pre_vbi_time_in_us = frame_time_in_us - 200 - mode_info.vblank_time_us;
+	data->frame_time_x2 = frame_time_in_us * 2 / 100;
+
 	display_gap2 = pre_vbi_time_in_us * (ref_clock / 100);
 
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixCG_DISPLAY_GAP_CNTL2, display_gap2);
@@ -4478,7 +4766,6 @@ int polaris10_program_display_gap(struct pp_hwmgr *hwmgr)
 
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, data->soft_regs_start + offsetof(SMU74_SoftRegisters, VBlankTimeout), (frame_time_in_us - pre_vbi_time_in_us));
 
-	polaris10_notify_smc_display_change(hwmgr, num_active_displays != 0);
 
 	return 0;
 }
@@ -4591,7 +4878,7 @@ int polaris10_upload_mc_firmware(struct pp_hwmgr *hwmgr)
 		return 0;
 	}
 
-	data->need_long_memory_training = true;
+	data->need_long_memory_training = false;
 
 /*
  *	PPMCME_FirmwareDescriptorEntry *pfd = NULL;
@@ -4721,42 +5008,6 @@ int polaris10_setup_asic_task(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
-static int polaris10_get_pp_table(struct pp_hwmgr *hwmgr, char **table)
-{
-	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kmemdup(hwmgr->soft_pp_table,
-					      hwmgr->soft_pp_table_size,
-					      GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	*table = (char *)&data->soft_pp_table;
-
-	return hwmgr->soft_pp_table_size;
-}
-
-static int polaris10_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size)
-{
-	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kzalloc(hwmgr->soft_pp_table_size, GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	memcpy(data->soft_pp_table, buf, size);
-
-	hwmgr->soft_pp_table = data->soft_pp_table;
-
-	/* TODO: re-init powerplay to implement modified pptable */
-
-	return 0;
-}
-
 static int polaris10_force_clock_level(struct pp_hwmgr *hwmgr,
 		enum pp_clock_type type, uint32_t mask)
 {
@@ -4899,6 +5150,89 @@ static int polaris10_get_fan_control_mode(struct pp_hwmgr *hwmgr)
 				CG_FDO_CTRL2, FDO_PWM_MODE);
 }
 
+static int polaris10_get_sclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
+	struct polaris10_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	int value;
+
+	value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) *
+			100 /
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return value;
+}
+
+static int polaris10_set_sclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	struct pp_power_state  *ps;
+	struct polaris10_power_state  *polaris10_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	polaris10_ps = cast_phw_polaris10_power_state(&ps->hardware);
+
+	polaris10_ps->performance_levels[polaris10_ps->performance_level_count - 1].engine_clock =
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value *
+			value / 100 +
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return 0;
+}
+
+static int polaris10_get_mclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
+	struct polaris10_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	int value;
+
+	value = (mclk_table->dpm_levels[mclk_table->count - 1].value -
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) *
+			100 /
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return value;
+}
+
+static int polaris10_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	struct pp_power_state  *ps;
+	struct polaris10_power_state  *polaris10_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	polaris10_ps = cast_phw_polaris10_power_state(&ps->hardware);
+
+	polaris10_ps->performance_levels[polaris10_ps->performance_level_count - 1].memory_clock =
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value *
+			value / 100 +
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return 0;
+}
 static const struct pp_hwmgr_func polaris10_hwmgr_funcs = {
 	.backend_init = &polaris10_hwmgr_backend_init,
 	.backend_fini = &polaris10_hwmgr_backend_fini,
@@ -4937,22 +5271,17 @@ static const struct pp_hwmgr_func polaris10_hwmgr_funcs = {
 	.check_states_equal = polaris10_check_states_equal,
 	.set_fan_control_mode = polaris10_set_fan_control_mode,
 	.get_fan_control_mode = polaris10_get_fan_control_mode,
-	.get_pp_table = polaris10_get_pp_table,
-	.set_pp_table = polaris10_set_pp_table,
 	.force_clock_level = polaris10_force_clock_level,
 	.print_clock_levels = polaris10_print_clock_levels,
 	.enable_per_cu_power_gating = polaris10_phm_enable_per_cu_power_gating,
+	.get_sclk_od = polaris10_get_sclk_od,
+	.set_sclk_od = polaris10_set_sclk_od,
+	.get_mclk_od = polaris10_get_mclk_od,
+	.set_mclk_od = polaris10_set_mclk_od,
 };
 
 int polaris10_hwmgr_init(struct pp_hwmgr *hwmgr)
 {
-	struct polaris10_hwmgr  *data;
-
-	data = kzalloc (sizeof(struct polaris10_hwmgr), GFP_KERNEL);
-	if (data == NULL)
-		return -ENOMEM;
-
-	hwmgr->backend = data;
 	hwmgr->hwmgr_func = &polaris10_hwmgr_funcs;
 	hwmgr->pptable_func = &tonga_pptable_funcs;
 	pp_polaris10_thermal_initialize(hwmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
index beedf35cbfa6..33c33947e827 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
@@ -309,9 +309,9 @@ struct polaris10_hwmgr {
 	uint32_t                           up_hyst;
 	uint32_t disable_dpm_mask;
 	bool apply_optimized_settings;
-
-	/* soft pptable for re-uploading into smu */
-	void *soft_pp_table;
+	uint32_t                              avfs_vdroop_override_setting;
+	bool                                  apply_avfs_cks_off_voltage;
+	uint32_t                              frame_time_x2;
 };
 
 /* To convert to Q8.8 format for firmware */
@@ -352,6 +352,6 @@ int polaris10_hwmgr_init(struct pp_hwmgr *hwmgr);
 int polaris10_update_uvd_dpm(struct pp_hwmgr *hwmgr, bool bgate);
 int polaris10_update_samu_dpm(struct pp_hwmgr *hwmgr, bool bgate);
 int polaris10_enable_disable_vce_dpm(struct pp_hwmgr *hwmgr, bool enable);
-
+int polaris10_update_vce_dpm(struct pp_hwmgr *hwmgr, bool bgate);
 #endif
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c
index 0b99ab3ba0c5..b9cb240a135d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c
@@ -28,10 +28,360 @@
 #include "polaris10_smumgr.h"
 #include "smu74_discrete.h"
 #include "pp_debug.h"
+#include "gca/gfx_8_0_d.h"
+#include "gca/gfx_8_0_sh_mask.h"
+#include "oss/oss_3_0_sh_mask.h"
 
 #define VOLTAGE_SCALE  4
 #define POWERTUNE_DEFAULT_SET_MAX    1
 
+uint32_t DIDTBlock_Info = SQ_IR_MASK | TCP_IR_MASK | TD_PCC_MASK;
+
+struct polaris10_pt_config_reg GCCACConfig_Polaris10[] = {
+/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ *      Offset                             Mask                                                Shift                                               Value       Type
+ * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ */
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00060013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00860013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01060013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01860013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02060013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02860013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x03060013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x03860013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x04060013, POLARIS10_CONFIGREG_GC_CAC_IND },
+
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x000E0013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x008E0013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x010E0013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x018E0013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x020E0013, POLARIS10_CONFIGREG_GC_CAC_IND },
+
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00100013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00900013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01100013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01900013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02100013, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02900013, POLARIS10_CONFIGREG_GC_CAC_IND },
+
+	{   0xFFFFFFFF  }
+};
+
+struct polaris10_pt_config_reg GCCACConfig_Polaris11[] = {
+/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ *      Offset                             Mask                                                Shift                                               Value       Type
+ * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ */
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00060011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00860011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01060011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01860011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02060011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02860011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x03060011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x03860011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x04060011, POLARIS10_CONFIGREG_GC_CAC_IND },
+
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x000E0011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x008E0011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x010E0011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x018E0011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x020E0011, POLARIS10_CONFIGREG_GC_CAC_IND },
+
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00100011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x00900011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01100011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x01900011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02100011, POLARIS10_CONFIGREG_GC_CAC_IND },
+	{   ixGC_CAC_CNTL,                     0xFFFFFFFF,                                         0,                                                  0x02900011, POLARIS10_CONFIGREG_GC_CAC_IND },
+
+	{   0xFFFFFFFF  }
+};
+
+struct polaris10_pt_config_reg DIDTConfig_Polaris10[] = {
+/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ *      Offset                             Mask                                                Shift                                               Value       Type
+ * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ */
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT0_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT0__SHIFT,                  0x0073,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT1_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT1__SHIFT,                  0x00ab,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT2_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT2__SHIFT,                  0x0084,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT3_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT3__SHIFT,                  0x005a,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT4_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT4__SHIFT,                  0x0067,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT5_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT5__SHIFT,                  0x0084,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT6_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT6__SHIFT,                  0x0027,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT7_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT7__SHIFT,                  0x0046,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT8_MASK,                   DIDT_SQ_WEIGHT8_11__WEIGHT8__SHIFT,                 0x00aa,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT9_MASK,                   DIDT_SQ_WEIGHT8_11__WEIGHT9__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT10_MASK,                  DIDT_SQ_WEIGHT8_11__WEIGHT10__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT11_MASK,                  DIDT_SQ_WEIGHT8_11__WEIGHT11__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL1,                   DIDT_SQ_CTRL1__MIN_POWER_MASK,                      DIDT_SQ_CTRL1__MIN_POWER__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL1,                   DIDT_SQ_CTRL1__MAX_POWER_MASK,                      DIDT_SQ_CTRL1__MAX_POWER__SHIFT,                    0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL_OCP,                DIDT_SQ_CTRL_OCP__UNUSED_0_MASK,                    DIDT_SQ_CTRL_OCP__UNUSED_0__SHIFT,                  0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL_OCP,                DIDT_SQ_CTRL_OCP__OCP_MAX_POWER_MASK,               DIDT_SQ_CTRL_OCP__OCP_MAX_POWER__SHIFT,             0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__MAX_POWER_DELTA_MASK,                DIDT_SQ_CTRL2__MAX_POWER_DELTA__SHIFT,              0x3853,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_0_MASK,                       DIDT_SQ_CTRL2__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,       DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,     0x005a,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_1_MASK,                       DIDT_SQ_CTRL2__UNUSED_1__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,       DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_2_MASK,                       DIDT_SQ_CTRL2__UNUSED_2__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,    DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT,  0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,       DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,       DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,   DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x0ebb,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__UNUSED_0_MASK,                  DIDT_SQ_STALL_CTRL__UNUSED_0__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,       DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,       DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,     0x3853,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,       DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,     0x3153,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__UNUSED_0_MASK,                 DIDT_SQ_TUNING_CTRL__UNUSED_0__SHIFT,               0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_SQ_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_SQ_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__PHASE_OFFSET_MASK,                   DIDT_SQ_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_SQ_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__UNUSED_0_MASK,                       DIDT_SQ_CTRL0__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT0_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT0__SHIFT,                  0x000a,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT1_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT1__SHIFT,                  0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT2_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT2__SHIFT,                  0x0017,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT3_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT3__SHIFT,                  0x002f,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT4_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT4__SHIFT,                  0x0046,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT5_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT5__SHIFT,                  0x005d,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT6_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT6__SHIFT,                  0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT7_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT7__SHIFT,                  0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL1,                   DIDT_TD_CTRL1__MIN_POWER_MASK,                      DIDT_TD_CTRL1__MIN_POWER__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL1,                   DIDT_TD_CTRL1__MAX_POWER_MASK,                      DIDT_TD_CTRL1__MAX_POWER__SHIFT,                    0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL_OCP,                DIDT_TD_CTRL_OCP__UNUSED_0_MASK,                    DIDT_TD_CTRL_OCP__UNUSED_0__SHIFT,                  0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL_OCP,                DIDT_TD_CTRL_OCP__OCP_MAX_POWER_MASK,               DIDT_TD_CTRL_OCP__OCP_MAX_POWER__SHIFT,             0x00ff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__MAX_POWER_DELTA_MASK,                DIDT_TD_CTRL2__MAX_POWER_DELTA__SHIFT,              0x3fff,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_0_MASK,                       DIDT_TD_CTRL2__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,       DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,     0x000f,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_1_MASK,                       DIDT_TD_CTRL2__UNUSED_1__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,       DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_2_MASK,                       DIDT_TD_CTRL2__UNUSED_2__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,    DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT,  0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,       DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,       DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,   DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x01aa,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__UNUSED_0_MASK,                  DIDT_TD_STALL_CTRL__UNUSED_0__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,       DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,       DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,     0x0dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,       DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,     0x0dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__UNUSED_0_MASK,                 DIDT_TD_TUNING_CTRL__UNUSED_0__SHIFT,               0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_TD_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_TD_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__PHASE_OFFSET_MASK,                   DIDT_TD_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_TD_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0009,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0009,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__UNUSED_0_MASK,                       DIDT_TD_CTRL0__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT0_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT0__SHIFT,                 0x0004,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT1_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT1__SHIFT,                 0x0037,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT2_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT2__SHIFT,                 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT3_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT3__SHIFT,                 0x00ff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT4_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT4__SHIFT,                 0x0054,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT5_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT5__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT6_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT6__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT7_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT7__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL1,                  DIDT_TCP_CTRL1__MIN_POWER_MASK,                     DIDT_TCP_CTRL1__MIN_POWER__SHIFT,                   0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL1,                  DIDT_TCP_CTRL1__MAX_POWER_MASK,                     DIDT_TCP_CTRL1__MAX_POWER__SHIFT,                   0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL_OCP,               DIDT_TCP_CTRL_OCP__UNUSED_0_MASK,                   DIDT_TCP_CTRL_OCP__UNUSED_0__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL_OCP,               DIDT_TCP_CTRL_OCP__OCP_MAX_POWER_MASK,              DIDT_TCP_CTRL_OCP__OCP_MAX_POWER__SHIFT,            0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__MAX_POWER_DELTA_MASK,               DIDT_TCP_CTRL2__MAX_POWER_DELTA__SHIFT,             0x3dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_0_MASK,                      DIDT_TCP_CTRL2__UNUSED_0__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,      DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,    0x0032,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_1_MASK,                      DIDT_TCP_CTRL2__UNUSED_1__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,      DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_2_MASK,                      DIDT_TCP_CTRL2__UNUSED_2__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,   DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT, 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,      DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,    0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,      DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,    0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,  DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x01aa,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__UNUSED_0_MASK,                 DIDT_TCP_STALL_CTRL__UNUSED_0__SHIFT,               0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,      DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,    0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,      DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,    0x3dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,      DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,    0x3dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__UNUSED_0_MASK,                DIDT_TCP_TUNING_CTRL__UNUSED_0__SHIFT,              0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_TCP_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_TCP_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__PHASE_OFFSET_MASK,                   DIDT_TCP_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_TCP_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__UNUSED_0_MASK,                       DIDT_TCP_CTRL0__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   0xFFFFFFFF  }
+};
+
+struct polaris10_pt_config_reg DIDTConfig_Polaris11[] = {
+/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ *      Offset                             Mask                                                Shift                                               Value       Type
+ * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ */
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT0_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT0__SHIFT,                  0x0073,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT1_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT1__SHIFT,                  0x00ab,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT2_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT2__SHIFT,                  0x0084,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT3_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT3__SHIFT,                  0x005a,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT4_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT4__SHIFT,                  0x0067,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT5_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT5__SHIFT,                  0x0084,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT6_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT6__SHIFT,                  0x0027,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT7_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT7__SHIFT,                  0x0046,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT8_MASK,                   DIDT_SQ_WEIGHT8_11__WEIGHT8__SHIFT,                 0x00aa,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT9_MASK,                   DIDT_SQ_WEIGHT8_11__WEIGHT9__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT10_MASK,                  DIDT_SQ_WEIGHT8_11__WEIGHT10__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT11_MASK,                  DIDT_SQ_WEIGHT8_11__WEIGHT11__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL1,                   DIDT_SQ_CTRL1__MIN_POWER_MASK,                      DIDT_SQ_CTRL1__MIN_POWER__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL1,                   DIDT_SQ_CTRL1__MAX_POWER_MASK,                      DIDT_SQ_CTRL1__MAX_POWER__SHIFT,                    0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL_OCP,                DIDT_SQ_CTRL_OCP__UNUSED_0_MASK,                    DIDT_SQ_CTRL_OCP__UNUSED_0__SHIFT,                  0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL_OCP,                DIDT_SQ_CTRL_OCP__OCP_MAX_POWER_MASK,               DIDT_SQ_CTRL_OCP__OCP_MAX_POWER__SHIFT,             0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__MAX_POWER_DELTA_MASK,                DIDT_SQ_CTRL2__MAX_POWER_DELTA__SHIFT,              0x3853,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_0_MASK,                       DIDT_SQ_CTRL2__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,       DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,     0x005a,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_1_MASK,                       DIDT_SQ_CTRL2__UNUSED_1__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,       DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_2_MASK,                       DIDT_SQ_CTRL2__UNUSED_2__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,    DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT,  0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,       DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,       DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,   DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x0ebb,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__UNUSED_0_MASK,                  DIDT_SQ_STALL_CTRL__UNUSED_0__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,       DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,       DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,     0x3853,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,       DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,     0x3153,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__UNUSED_0_MASK,                 DIDT_SQ_TUNING_CTRL__UNUSED_0__SHIFT,               0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_SQ_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_SQ_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__PHASE_OFFSET_MASK,                   DIDT_SQ_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_SQ_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__UNUSED_0_MASK,                       DIDT_SQ_CTRL0__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT0_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT0__SHIFT,                  0x000a,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT1_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT1__SHIFT,                  0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT2_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT2__SHIFT,                  0x0017,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT3_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT3__SHIFT,                  0x002f,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT4_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT4__SHIFT,                  0x0046,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT5_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT5__SHIFT,                  0x005d,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT6_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT6__SHIFT,                  0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT7_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT7__SHIFT,                  0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL1,                   DIDT_TD_CTRL1__MIN_POWER_MASK,                      DIDT_TD_CTRL1__MIN_POWER__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL1,                   DIDT_TD_CTRL1__MAX_POWER_MASK,                      DIDT_TD_CTRL1__MAX_POWER__SHIFT,                    0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL_OCP,                DIDT_TD_CTRL_OCP__UNUSED_0_MASK,                    DIDT_TD_CTRL_OCP__UNUSED_0__SHIFT,                  0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL_OCP,                DIDT_TD_CTRL_OCP__OCP_MAX_POWER_MASK,               DIDT_TD_CTRL_OCP__OCP_MAX_POWER__SHIFT,             0x00ff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__MAX_POWER_DELTA_MASK,                DIDT_TD_CTRL2__MAX_POWER_DELTA__SHIFT,              0x3fff,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_0_MASK,                       DIDT_TD_CTRL2__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,       DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,     0x000f,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_1_MASK,                       DIDT_TD_CTRL2__UNUSED_1__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,       DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_2_MASK,                       DIDT_TD_CTRL2__UNUSED_2__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,    DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT,  0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,       DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,       DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,     0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,   DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x01aa,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__UNUSED_0_MASK,                  DIDT_TD_STALL_CTRL__UNUSED_0__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,       DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,       DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,     0x0dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,       DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,     0x0dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__UNUSED_0_MASK,                 DIDT_TD_TUNING_CTRL__UNUSED_0__SHIFT,               0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_TD_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_TD_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__PHASE_OFFSET_MASK,                   DIDT_TD_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_TD_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0008,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0008,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__UNUSED_0_MASK,                       DIDT_TD_CTRL0__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT0_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT0__SHIFT,                 0x0004,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT1_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT1__SHIFT,                 0x0037,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT2_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT2__SHIFT,                 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT3_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT3__SHIFT,                 0x00ff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT4_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT4__SHIFT,                 0x0054,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT5_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT5__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT6_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT6__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT7_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT7__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL1,                  DIDT_TCP_CTRL1__MIN_POWER_MASK,                     DIDT_TCP_CTRL1__MIN_POWER__SHIFT,                   0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL1,                  DIDT_TCP_CTRL1__MAX_POWER_MASK,                     DIDT_TCP_CTRL1__MAX_POWER__SHIFT,                   0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL_OCP,               DIDT_TCP_CTRL_OCP__UNUSED_0_MASK,                   DIDT_TCP_CTRL_OCP__UNUSED_0__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL_OCP,               DIDT_TCP_CTRL_OCP__OCP_MAX_POWER_MASK,              DIDT_TCP_CTRL_OCP__OCP_MAX_POWER__SHIFT,            0xffff,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__MAX_POWER_DELTA_MASK,               DIDT_TCP_CTRL2__MAX_POWER_DELTA__SHIFT,             0x3dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_0_MASK,                      DIDT_TCP_CTRL2__UNUSED_0__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,      DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,    0x0032,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_1_MASK,                      DIDT_TCP_CTRL2__UNUSED_1__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,      DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_2_MASK,                      DIDT_TCP_CTRL2__UNUSED_2__SHIFT,                    0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,   DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT, 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,      DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,    0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,      DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,    0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,  DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x01aa,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__UNUSED_0_MASK,                 DIDT_TCP_STALL_CTRL__UNUSED_0__SHIFT,               0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,      DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,    0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,      DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,    0x3dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,      DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,    0x3dde,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__UNUSED_0_MASK,                DIDT_TCP_TUNING_CTRL__UNUSED_0__SHIFT,              0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_TCP_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_TCP_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__PHASE_OFFSET_MASK,                   DIDT_TCP_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_TCP_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0010,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__UNUSED_0_MASK,                       DIDT_TCP_CTRL0__UNUSED_0__SHIFT,                     0x0000,     POLARIS10_CONFIGREG_DIDT_IND },
+	{   0xFFFFFFFF  }
+};
+
 static const struct polaris10_pt_defaults polaris10_power_tune_data_set_array[POWERTUNE_DEFAULT_SET_MAX] = {
 	/* sviLoadLIneEn, SviLoadLineVddC, TDC_VDDC_ThrottleReleaseLimitPerc, TDC_MAWt,
 	 * TdcWaterfallCtl, DTEAmbientTempBase, DisplayCac, BAPM_TEMP_GRADIENT */
@@ -209,6 +559,187 @@ static int polaris10_min_max_vgnb_lpml_id_from_bapm_vddc(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int polaris10_enable_didt(struct pp_hwmgr *hwmgr, const bool enable)
+{
+
+	uint32_t en = enable ? 1 : 0;
+	int32_t result = 0;
+	uint32_t data;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SQRamping)) {
+		data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_CTRL0);
+		data &= ~DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK;
+		data |= ((en << DIDT_SQ_CTRL0__DIDT_CTRL_EN__SHIFT) & DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK);
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_SQ_CTRL0, data);
+		DIDTBlock_Info &= ~SQ_Enable_MASK;
+		DIDTBlock_Info |= en << SQ_Enable_SHIFT;
+	}
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DBRamping)) {
+		data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_CTRL0);
+		data &= ~DIDT_DB_CTRL0__DIDT_CTRL_EN_MASK;
+		data |= ((en << DIDT_DB_CTRL0__DIDT_CTRL_EN__SHIFT) & DIDT_DB_CTRL0__DIDT_CTRL_EN_MASK);
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_DB_CTRL0, data);
+		DIDTBlock_Info &= ~DB_Enable_MASK;
+		DIDTBlock_Info |= en << DB_Enable_SHIFT;
+	}
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TDRamping)) {
+		data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_CTRL0);
+		data &= ~DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK;
+		data |= ((en << DIDT_TD_CTRL0__DIDT_CTRL_EN__SHIFT) & DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK);
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TD_CTRL0, data);
+		DIDTBlock_Info &= ~TD_Enable_MASK;
+		DIDTBlock_Info |= en << TD_Enable_SHIFT;
+	}
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TCPRamping)) {
+		data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_CTRL0);
+		data &= ~DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK;
+		data |= ((en << DIDT_TCP_CTRL0__DIDT_CTRL_EN__SHIFT) & DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK);
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, ixDIDT_TCP_CTRL0, data);
+		DIDTBlock_Info &= ~TCP_Enable_MASK;
+		DIDTBlock_Info |= en << TCP_Enable_SHIFT;
+	}
+
+	if (enable)
+		result = smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, PPSMC_MSG_Didt_Block_Function, DIDTBlock_Info);
+
+	return result;
+}
+
+static int polaris10_program_pt_config_registers(struct pp_hwmgr *hwmgr,
+				struct polaris10_pt_config_reg *cac_config_regs)
+{
+	struct polaris10_pt_config_reg *config_regs = cac_config_regs;
+	uint32_t cache = 0;
+	uint32_t data = 0;
+
+	PP_ASSERT_WITH_CODE((config_regs != NULL), "Invalid config register table.", return -EINVAL);
+
+	while (config_regs->offset != 0xFFFFFFFF) {
+		if (config_regs->type == POLARIS10_CONFIGREG_CACHE)
+			cache |= ((config_regs->value << config_regs->shift) & config_regs->mask);
+		else {
+			switch (config_regs->type) {
+			case POLARIS10_CONFIGREG_SMC_IND:
+				data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, config_regs->offset);
+				break;
+
+			case POLARIS10_CONFIGREG_DIDT_IND:
+				data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__DIDT, config_regs->offset);
+				break;
+
+			case POLARIS10_CONFIGREG_GC_CAC_IND:
+				data = cgs_read_ind_register(hwmgr->device, CGS_IND_REG_GC_CAC, config_regs->offset);
+				break;
+
+			default:
+				data = cgs_read_register(hwmgr->device, config_regs->offset);
+				break;
+			}
+
+			data &= ~config_regs->mask;
+			data |= ((config_regs->value << config_regs->shift) & config_regs->mask);
+			data |= cache;
+
+			switch (config_regs->type) {
+			case POLARIS10_CONFIGREG_SMC_IND:
+				cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, config_regs->offset, data);
+				break;
+
+			case POLARIS10_CONFIGREG_DIDT_IND:
+				cgs_write_ind_register(hwmgr->device, CGS_IND_REG__DIDT, config_regs->offset, data);
+				break;
+
+			case POLARIS10_CONFIGREG_GC_CAC_IND:
+				cgs_write_ind_register(hwmgr->device, CGS_IND_REG_GC_CAC, config_regs->offset, data);
+				break;
+
+			default:
+				cgs_write_register(hwmgr->device, config_regs->offset, data);
+				break;
+			}
+			cache = 0;
+		}
+
+		config_regs++;
+	}
+
+	return 0;
+}
+
+int polaris10_enable_didt_config(struct pp_hwmgr *hwmgr)
+{
+	int result;
+	uint32_t num_se = 0;
+	uint32_t count, value, value2;
+	struct cgs_system_info sys_info = {0};
+
+	sys_info.size = sizeof(struct cgs_system_info);
+	sys_info.info_id = CGS_SYSTEM_INFO_GFX_SE_INFO;
+	result = cgs_query_system_info(hwmgr->device, &sys_info);
+
+
+	if (result == 0)
+		num_se = sys_info.value;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SQRamping) ||
+		phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DBRamping) ||
+		phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TDRamping) ||
+		phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TCPRamping)) {
+
+		/* TO DO Pre DIDT disable clock gating */
+		value = 0;
+		value2 = cgs_read_register(hwmgr->device, mmGRBM_GFX_INDEX);
+		for (count = 0; count < num_se; count++) {
+			value = SYS_GRBM_GFX_INDEX_DATA__INSTANCE_BROADCAST_WRITES_MASK
+				| SYS_GRBM_GFX_INDEX_DATA__SH_BROADCAST_WRITES_MASK
+				| (count << SYS_GRBM_GFX_INDEX_DATA__SE_INDEX__SHIFT);
+			cgs_write_register(hwmgr->device, mmGRBM_GFX_INDEX, value);
+
+			if (hwmgr->chip_id == CHIP_POLARIS10) {
+				result = polaris10_program_pt_config_registers(hwmgr, GCCACConfig_Polaris10);
+				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+				result = polaris10_program_pt_config_registers(hwmgr, DIDTConfig_Polaris10);
+				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+			} else if (hwmgr->chip_id == CHIP_POLARIS11) {
+				result = polaris10_program_pt_config_registers(hwmgr, GCCACConfig_Polaris11);
+				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+				result = polaris10_program_pt_config_registers(hwmgr, DIDTConfig_Polaris11);
+				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
+			}
+		}
+		cgs_write_register(hwmgr->device, mmGRBM_GFX_INDEX, value2);
+
+		result = polaris10_enable_didt(hwmgr, true);
+		PP_ASSERT_WITH_CODE((result == 0), "EnableDiDt failed.", return result);
+
+		/* TO DO Post DIDT enable clock gating */
+	}
+
+	return 0;
+}
+
+int polaris10_disable_didt_config(struct pp_hwmgr *hwmgr)
+{
+	int result;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_SQRamping) ||
+		phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DBRamping) ||
+		phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TDRamping) ||
+		phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_TCPRamping)) {
+		/* TO DO Pre DIDT disable clock gating */
+
+		result = polaris10_enable_didt(hwmgr, false);
+		PP_ASSERT_WITH_CODE((result == 0), "Post DIDT enable clock gating failed.", return result);
+		/* TO DO Post DIDT enable clock gating */
+	}
+
+	return 0;
+}
+
+
 static int polaris10_populate_bapm_vddc_base_leakage_sidd(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
@@ -286,7 +817,7 @@ int polaris10_populate_pm_fuses(struct pp_hwmgr *hwmgr)
 
 		if (polaris10_copy_bytes_to_smc(hwmgr->smumgr, pm_fuse_table_offset,
 				(uint8_t *)&data->power_tune_table,
-				sizeof(struct SMU74_Discrete_PmFuses), data->sram_end))
+				(sizeof(struct SMU74_Discrete_PmFuses) - 92), data->sram_end))
 			PP_ASSERT_WITH_CODE(false,
 					"Attempt to download PmFuseTable Failed!",
 					return -EINVAL);
@@ -312,6 +843,23 @@ int polaris10_enable_smc_cac(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+int polaris10_disable_smc_cac(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	int result = 0;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_CAC) && data->cac_enabled) {
+		int smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+				(uint16_t)(PPSMC_MSG_DisableCac));
+		PP_ASSERT_WITH_CODE((smc_result == 0),
+				"Failed to disable CAC in SMC.", result = -1);
+
+		data->cac_enabled = false;
+	}
+	return result;
+}
+
 int polaris10_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
@@ -373,6 +921,48 @@ int polaris10_enable_power_containment(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+int polaris10_disable_power_containment(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	int result = 0;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_PowerContainment) &&
+			data->power_containment_features) {
+		int smc_result;
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_TDCLimit) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_TDCLimitDisable));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable TDCLimit in SMC.",
+					result = smc_result);
+		}
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_DTE) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_DisableDTE));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable DTE in SMC.",
+					result = smc_result);
+		}
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_PkgPwrLimit) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_PkgPwrLimitDisable));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable PkgPwrTracking in SMC.",
+					result = smc_result);
+		}
+		data->power_containment_features = 0;
+	}
+
+	return result;
+}
+
 int polaris10_power_control_set_level(struct pp_hwmgr *hwmgr)
 {
 	struct phm_ppt_v1_information *table_info =
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.h b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.h
index 68bc1cb6d40c..bc78e28f010d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.h
@@ -27,15 +27,37 @@ enum polaris10_pt_config_reg_type {
 	POLARIS10_CONFIGREG_MMR = 0,
 	POLARIS10_CONFIGREG_SMC_IND,
 	POLARIS10_CONFIGREG_DIDT_IND,
+	POLARIS10_CONFIGREG_GC_CAC_IND,
 	POLARIS10_CONFIGREG_CACHE,
 	POLARIS10_CONFIGREG_MAX
 };
 
+#define DIDT_SQ_CTRL0__UNUSED_0_MASK    0xfffc0000
+#define DIDT_SQ_CTRL0__UNUSED_0__SHIFT  0x12
+#define DIDT_TD_CTRL0__UNUSED_0_MASK    0xfffc0000
+#define DIDT_TD_CTRL0__UNUSED_0__SHIFT  0x12
+#define DIDT_TCP_CTRL0__UNUSED_0_MASK   0xfffc0000
+#define DIDT_TCP_CTRL0__UNUSED_0__SHIFT 0x12
+#define DIDT_SQ_TUNING_CTRL__UNUSED_0_MASK                 0xc0000000
+#define DIDT_SQ_TUNING_CTRL__UNUSED_0__SHIFT               0x0000001e
+#define DIDT_TD_TUNING_CTRL__UNUSED_0_MASK                 0xc0000000
+#define DIDT_TD_TUNING_CTRL__UNUSED_0__SHIFT               0x0000001e
+#define DIDT_TCP_TUNING_CTRL__UNUSED_0_MASK                0xc0000000
+#define DIDT_TCP_TUNING_CTRL__UNUSED_0__SHIFT              0x0000001e
+
 /* PowerContainment Features */
 #define POWERCONTAINMENT_FEATURE_DTE             0x00000001
 #define POWERCONTAINMENT_FEATURE_TDCLimit        0x00000002
 #define POWERCONTAINMENT_FEATURE_PkgPwrLimit     0x00000004
 
+#define ixGC_CAC_CNTL 0x0000
+#define ixDIDT_SQ_STALL_CTRL 0x0004
+#define  ixDIDT_SQ_TUNING_CTRL 0x0005
+#define ixDIDT_TD_STALL_CTRL 0x0044
+#define ixDIDT_TD_TUNING_CTRL 0x0045
+#define ixDIDT_TCP_STALL_CTRL 0x0064
+#define ixDIDT_TCP_TUNING_CTRL 0x0065
+
 struct polaris10_pt_config_reg {
 	uint32_t                           offset;
 	uint32_t                           mask;
@@ -62,9 +84,11 @@ void polaris10_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr);
 int polaris10_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr);
 int polaris10_populate_pm_fuses(struct pp_hwmgr *hwmgr);
 int polaris10_enable_smc_cac(struct pp_hwmgr *hwmgr);
+int polaris10_disable_smc_cac(struct pp_hwmgr *hwmgr);
 int polaris10_enable_power_containment(struct pp_hwmgr *hwmgr);
+int polaris10_disable_power_containment(struct pp_hwmgr *hwmgr);
 int polaris10_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n);
 int polaris10_power_control_set_level(struct pp_hwmgr *hwmgr);
-
+int polaris10_enable_didt_config(struct pp_hwmgr *hwmgr);
 #endif  /* POLARIS10_POWERTUNE_H */
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
index aba167f7d167..b206632d4650 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
@@ -625,10 +625,14 @@ static int tf_polaris10_thermal_avfs_enable(struct pp_hwmgr *hwmgr,
 	int ret;
 	struct pp_smumgr *smumgr = (struct pp_smumgr *)(hwmgr->smumgr);
 	struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend);
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
 
-	if (smu_data->avfs.avfs_btc_status != AVFS_BTC_ENABLEAVFS)
+	if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED)
 		return 0;
 
+	ret = smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+			PPSMC_MSG_SetGBDroopSettings, data->avfs_vdroop_override_setting);
+
 	ret = (smum_send_msg_to_smc(smumgr, PPSMC_MSG_EnableAvfs) == 0) ?
 			0 : -1;
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
index 58742e0d1492..1944d289f846 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
@@ -44,6 +44,20 @@ bool acpi_atcs_functions_supported(void *device, uint32_t index)
 	return result == 0 ? (output_buf.function_bits & (1 << (index - 1))) != 0 : false;
 }
 
+bool acpi_atcs_notify_pcie_device_ready(void *device)
+{
+	int32_t temp_buffer = 1;
+
+	return cgs_call_acpi_method(device, CGS_ACPI_METHOD_ATCS,
+				ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION,
+						&temp_buffer,
+						NULL,
+						0,
+						sizeof(temp_buffer),
+						0);
+}
+
+
 int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 {
 	struct atcs_pref_req_input atcs_input;
@@ -52,7 +66,7 @@ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 	int result;
 	struct cgs_system_info info = {0};
 
-	if (!acpi_atcs_functions_supported(device, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST))
+	if (acpi_atcs_notify_pcie_device_ready(device))
 		return -EINVAL;
 
 	info.size = sizeof(struct cgs_system_info);
@@ -77,7 +91,7 @@ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 						ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST,
 						&atcs_input,
 						&atcs_output,
-						0,
+						1,
 						sizeof(atcs_input),
 						sizeof(atcs_output));
 		if (result != 0)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
index da9f5f1b6dc2..26f3e30d0fef 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
@@ -179,13 +179,12 @@ int atomctrl_set_engine_dram_timings_rv770(
 
 	/* They are both in 10KHz Units. */
 	engine_clock_parameters.ulTargetEngineClock =
-		(uint32_t) engine_clock & SET_CLOCK_FREQ_MASK;
-	engine_clock_parameters.ulTargetEngineClock |=
-		(COMPUTE_ENGINE_PLL_PARAM << 24);
+		cpu_to_le32((engine_clock & SET_CLOCK_FREQ_MASK) |
+			    ((COMPUTE_ENGINE_PLL_PARAM << 24)));
 
 	/* in 10 khz units.*/
 	engine_clock_parameters.sReserved.ulClock =
-		(uint32_t) memory_clock & SET_CLOCK_FREQ_MASK;
+		cpu_to_le32(memory_clock & SET_CLOCK_FREQ_MASK);
 	return cgs_atom_exec_cmd_table(hwmgr->device,
 			GetIndexIntoMasterTable(COMMAND, DynamicMemorySettings),
 			&engine_clock_parameters);
@@ -252,7 +251,7 @@ int atomctrl_get_memory_pll_dividers_si(
 	COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_1 mpll_parameters;
 	int result;
 
-	mpll_parameters.ulClock = (uint32_t) clock_value;
+	mpll_parameters.ulClock = cpu_to_le32(clock_value);
 	mpll_parameters.ucInputFlag = (uint8_t)((strobe_mode) ? 1 : 0);
 
 	result = cgs_atom_exec_cmd_table
@@ -262,9 +261,9 @@ int atomctrl_get_memory_pll_dividers_si(
 
 	if (0 == result) {
 		mpll_param->mpll_fb_divider.clk_frac =
-			mpll_parameters.ulFbDiv.usFbDivFrac;
+			le16_to_cpu(mpll_parameters.ulFbDiv.usFbDivFrac);
 		mpll_param->mpll_fb_divider.cl_kf =
-			mpll_parameters.ulFbDiv.usFbDiv;
+			le16_to_cpu(mpll_parameters.ulFbDiv.usFbDiv);
 		mpll_param->mpll_post_divider =
 			(uint32_t)mpll_parameters.ucPostDiv;
 		mpll_param->vco_mode =
@@ -300,7 +299,7 @@ int atomctrl_get_memory_pll_dividers_vi(struct pp_hwmgr *hwmgr,
 	COMPUTE_MEMORY_CLOCK_PARAM_PARAMETERS_V2_2 mpll_parameters;
 	int result;
 
-	mpll_parameters.ulClock.ulClock = (uint32_t)clock_value;
+	mpll_parameters.ulClock.ulClock = cpu_to_le32(clock_value);
 
 	result = cgs_atom_exec_cmd_table(hwmgr->device,
 			GetIndexIntoMasterTable(COMMAND, ComputeMemoryClockParam),
@@ -320,7 +319,7 @@ int atomctrl_get_engine_pll_dividers_kong(struct pp_hwmgr *hwmgr,
 	COMPUTE_MEMORY_ENGINE_PLL_PARAMETERS_V4 pll_parameters;
 	int result;
 
-	pll_parameters.ulClock = clock_value;
+	pll_parameters.ulClock = cpu_to_le32(clock_value);
 
 	result = cgs_atom_exec_cmd_table
 		(hwmgr->device,
@@ -329,7 +328,7 @@ int atomctrl_get_engine_pll_dividers_kong(struct pp_hwmgr *hwmgr,
 
 	if (0 == result) {
 		dividers->pll_post_divider = pll_parameters.ucPostDiv;
-		dividers->real_clock = pll_parameters.ulClock;
+		dividers->real_clock = le32_to_cpu(pll_parameters.ulClock);
 	}
 
 	return result;
@@ -343,7 +342,7 @@ int atomctrl_get_engine_pll_dividers_vi(
 	COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_6 pll_patameters;
 	int result;
 
-	pll_patameters.ulClock.ulClock = clock_value;
+	pll_patameters.ulClock.ulClock = cpu_to_le32(clock_value);
 	pll_patameters.ulClock.ucPostDiv = COMPUTE_GPUCLK_INPUT_FLAG_SCLK;
 
 	result = cgs_atom_exec_cmd_table
@@ -355,12 +354,12 @@ int atomctrl_get_engine_pll_dividers_vi(
 		dividers->pll_post_divider =
 			pll_patameters.ulClock.ucPostDiv;
 		dividers->real_clock =
-			pll_patameters.ulClock.ulClock;
+			le32_to_cpu(pll_patameters.ulClock.ulClock);
 
 		dividers->ul_fb_div.ul_fb_div_frac =
-			pll_patameters.ulFbDiv.usFbDivFrac;
+			le16_to_cpu(pll_patameters.ulFbDiv.usFbDivFrac);
 		dividers->ul_fb_div.ul_fb_div =
-			pll_patameters.ulFbDiv.usFbDiv;
+			le16_to_cpu(pll_patameters.ulFbDiv.usFbDiv);
 
 		dividers->uc_pll_ref_div =
 			pll_patameters.ucPllRefDiv;
@@ -380,7 +379,7 @@ int atomctrl_get_engine_pll_dividers_ai(struct pp_hwmgr *hwmgr,
 	COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_7 pll_patameters;
 	int result;
 
-	pll_patameters.ulClock.ulClock = clock_value;
+	pll_patameters.ulClock.ulClock = cpu_to_le32(clock_value);
 	pll_patameters.ulClock.ucPostDiv = COMPUTE_GPUCLK_INPUT_FLAG_SCLK;
 
 	result = cgs_atom_exec_cmd_table
@@ -412,7 +411,7 @@ int atomctrl_get_dfs_pll_dividers_vi(
 	COMPUTE_GPU_CLOCK_OUTPUT_PARAMETERS_V1_6 pll_patameters;
 	int result;
 
-	pll_patameters.ulClock.ulClock = clock_value;
+	pll_patameters.ulClock.ulClock = cpu_to_le32(clock_value);
 	pll_patameters.ulClock.ucPostDiv =
 		COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK;
 
@@ -425,12 +424,12 @@ int atomctrl_get_dfs_pll_dividers_vi(
 		dividers->pll_post_divider =
 			pll_patameters.ulClock.ucPostDiv;
 		dividers->real_clock =
-			pll_patameters.ulClock.ulClock;
+			le32_to_cpu(pll_patameters.ulClock.ulClock);
 
 		dividers->ul_fb_div.ul_fb_div_frac =
-			pll_patameters.ulFbDiv.usFbDivFrac;
+			le16_to_cpu(pll_patameters.ulFbDiv.usFbDivFrac);
 		dividers->ul_fb_div.ul_fb_div =
-			pll_patameters.ulFbDiv.usFbDiv;
+			le16_to_cpu(pll_patameters.ulFbDiv.usFbDiv);
 
 		dividers->uc_pll_ref_div =
 			pll_patameters.ucPllRefDiv;
@@ -519,13 +518,13 @@ int atomctrl_get_voltage_table_v3(
 
 	for (i = 0; i < voltage_object->asGpioVoltageObj.ucGpioEntryNum; i++) {
 		voltage_table->entries[i].value =
-			voltage_object->asGpioVoltageObj.asVolGpioLut[i].usVoltageValue;
+			le16_to_cpu(voltage_object->asGpioVoltageObj.asVolGpioLut[i].usVoltageValue);
 		voltage_table->entries[i].smio_low =
-			voltage_object->asGpioVoltageObj.asVolGpioLut[i].ulVoltageId;
+			le32_to_cpu(voltage_object->asGpioVoltageObj.asVolGpioLut[i].ulVoltageId);
 	}
 
 	voltage_table->mask_low    =
-		voltage_object->asGpioVoltageObj.ulGpioMaskVal;
+		le32_to_cpu(voltage_object->asGpioVoltageObj.ulGpioMaskVal);
 	voltage_table->count      =
 		voltage_object->asGpioVoltageObj.ucGpioEntryNum;
 	voltage_table->phase_delay =
@@ -552,13 +551,13 @@ static bool atomctrl_lookup_gpio_pin(
 				pin_assignment->ucGpioPinBitShift;
 			gpio_pin_assignment->us_gpio_pin_aindex =
 				le16_to_cpu(pin_assignment->usGpioPin_AIndex);
-			return false;
+			return true;
 		}
 
 		offset += offsetof(ATOM_GPIO_PIN_ASSIGNMENT, ucGPIO_ID) + 1;
 	}
 
-	return true;
+	return false;
 }
 
 /**
@@ -592,12 +591,12 @@ bool atomctrl_get_pp_assign_pin(
 		const uint32_t pinId,
 		pp_atomctrl_gpio_pin_assignment *gpio_pin_assignment)
 {
-	bool bRet = 0;
+	bool bRet = false;
 	ATOM_GPIO_PIN_LUT *gpio_lookup_table =
 		get_gpio_lookup_table(hwmgr->device);
 
 	PP_ASSERT_WITH_CODE((NULL != gpio_lookup_table),
-			"Could not find GPIO lookup Table in BIOS.", return -1);
+			"Could not find GPIO lookup Table in BIOS.", return false);
 
 	bRet = atomctrl_lookup_gpio_pin(gpio_lookup_table, pinId,
 		gpio_pin_assignment);
@@ -650,8 +649,8 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 		return -1;
 
 	if (getASICProfilingInfo->asHeader.ucTableFormatRevision < 3 ||
-			(getASICProfilingInfo->asHeader.ucTableFormatRevision == 3 &&
-			getASICProfilingInfo->asHeader.ucTableContentRevision < 4))
+	    (getASICProfilingInfo->asHeader.ucTableFormatRevision == 3 &&
+	     getASICProfilingInfo->asHeader.ucTableContentRevision < 4))
 		return -1;
 
 	/*-----------------------------------------------------------
@@ -662,37 +661,37 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 
 	switch (dpm_level) {
 	case 1:
-		fPowerDPMx = Convert_ULONG_ToFraction(getASICProfilingInfo->usPowerDpm1);
-		fDerateTDP = GetScaledFraction(getASICProfilingInfo->ulTdpDerateDPM1, 1000);
+		fPowerDPMx = Convert_ULONG_ToFraction(le16_to_cpu(getASICProfilingInfo->usPowerDpm1));
+		fDerateTDP = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulTdpDerateDPM1), 1000);
 		break;
 	case 2:
-		fPowerDPMx = Convert_ULONG_ToFraction(getASICProfilingInfo->usPowerDpm2);
-		fDerateTDP = GetScaledFraction(getASICProfilingInfo->ulTdpDerateDPM2, 1000);
+		fPowerDPMx = Convert_ULONG_ToFraction(le16_to_cpu(getASICProfilingInfo->usPowerDpm2));
+		fDerateTDP = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulTdpDerateDPM2), 1000);
 		break;
 	case 3:
-		fPowerDPMx = Convert_ULONG_ToFraction(getASICProfilingInfo->usPowerDpm3);
-		fDerateTDP = GetScaledFraction(getASICProfilingInfo->ulTdpDerateDPM3, 1000);
+		fPowerDPMx = Convert_ULONG_ToFraction(le16_to_cpu(getASICProfilingInfo->usPowerDpm3));
+		fDerateTDP = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulTdpDerateDPM3), 1000);
 		break;
 	case 4:
-		fPowerDPMx = Convert_ULONG_ToFraction(getASICProfilingInfo->usPowerDpm4);
-		fDerateTDP = GetScaledFraction(getASICProfilingInfo->ulTdpDerateDPM4, 1000);
+		fPowerDPMx = Convert_ULONG_ToFraction(le16_to_cpu(getASICProfilingInfo->usPowerDpm4));
+		fDerateTDP = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulTdpDerateDPM4), 1000);
 		break;
 	case 5:
-		fPowerDPMx = Convert_ULONG_ToFraction(getASICProfilingInfo->usPowerDpm5);
-		fDerateTDP = GetScaledFraction(getASICProfilingInfo->ulTdpDerateDPM5, 1000);
+		fPowerDPMx = Convert_ULONG_ToFraction(le16_to_cpu(getASICProfilingInfo->usPowerDpm5));
+		fDerateTDP = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulTdpDerateDPM5), 1000);
 		break;
 	case 6:
-		fPowerDPMx = Convert_ULONG_ToFraction(getASICProfilingInfo->usPowerDpm6);
-		fDerateTDP = GetScaledFraction(getASICProfilingInfo->ulTdpDerateDPM6, 1000);
+		fPowerDPMx = Convert_ULONG_ToFraction(le16_to_cpu(getASICProfilingInfo->usPowerDpm6));
+		fDerateTDP = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulTdpDerateDPM6), 1000);
 		break;
 	case 7:
-		fPowerDPMx = Convert_ULONG_ToFraction(getASICProfilingInfo->usPowerDpm7);
-		fDerateTDP = GetScaledFraction(getASICProfilingInfo->ulTdpDerateDPM7, 1000);
+		fPowerDPMx = Convert_ULONG_ToFraction(le16_to_cpu(getASICProfilingInfo->usPowerDpm7));
+		fDerateTDP = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulTdpDerateDPM7), 1000);
 		break;
 	default:
 		printk(KERN_ERR "DPM Level not supported\n");
 		fPowerDPMx = Convert_ULONG_ToFraction(1);
-		fDerateTDP = GetScaledFraction(getASICProfilingInfo->ulTdpDerateDPM0, 1000);
+		fDerateTDP = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulTdpDerateDPM0), 1000);
 	}
 
 	/*-------------------------
@@ -716,9 +715,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 		return result;
 
 	/* Finally, the actual fuse value */
-	ul_RO_fused = sOutput_FuseValues.ulEfuseValue;
-	fMin = GetScaledFraction(sRO_fuse.ulEfuseMin, 1);
-	fRange = GetScaledFraction(sRO_fuse.ulEfuseEncodeRange, 1);
+	ul_RO_fused = le32_to_cpu(sOutput_FuseValues.ulEfuseValue);
+	fMin = GetScaledFraction(le32_to_cpu(sRO_fuse.ulEfuseMin), 1);
+	fRange = GetScaledFraction(le32_to_cpu(sRO_fuse.ulEfuseEncodeRange), 1);
 	fRO_fused = fDecodeLinearFuse(ul_RO_fused, fMin, fRange, sRO_fuse.ucEfuseLength);
 
 	sCACm_fuse = getASICProfilingInfo->sCACm;
@@ -736,9 +735,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	if (result)
 		return result;
 
-	ul_CACm_fused = sOutput_FuseValues.ulEfuseValue;
-	fMin = GetScaledFraction(sCACm_fuse.ulEfuseMin, 1000);
-	fRange = GetScaledFraction(sCACm_fuse.ulEfuseEncodeRange, 1000);
+	ul_CACm_fused = le32_to_cpu(sOutput_FuseValues.ulEfuseValue);
+	fMin = GetScaledFraction(le32_to_cpu(sCACm_fuse.ulEfuseMin), 1000);
+	fRange = GetScaledFraction(le32_to_cpu(sCACm_fuse.ulEfuseEncodeRange), 1000);
 
 	fCACm_fused = fDecodeLinearFuse(ul_CACm_fused, fMin, fRange, sCACm_fuse.ucEfuseLength);
 
@@ -756,9 +755,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	if (result)
 		return result;
 
-	ul_CACb_fused = sOutput_FuseValues.ulEfuseValue;
-	fMin = GetScaledFraction(sCACb_fuse.ulEfuseMin, 1000);
-	fRange = GetScaledFraction(sCACb_fuse.ulEfuseEncodeRange, 1000);
+	ul_CACb_fused = le32_to_cpu(sOutput_FuseValues.ulEfuseValue);
+	fMin = GetScaledFraction(le32_to_cpu(sCACb_fuse.ulEfuseMin), 1000);
+	fRange = GetScaledFraction(le32_to_cpu(sCACb_fuse.ulEfuseEncodeRange), 1000);
 
 	fCACb_fused = fDecodeLinearFuse(ul_CACb_fused, fMin, fRange, sCACb_fuse.ucEfuseLength);
 
@@ -777,9 +776,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	if (result)
 		return result;
 
-	ul_Kt_Beta_fused = sOutput_FuseValues.ulEfuseValue;
-	fAverage = GetScaledFraction(sKt_Beta_fuse.ulEfuseEncodeAverage, 1000);
-	fRange = GetScaledFraction(sKt_Beta_fuse.ulEfuseEncodeRange, 1000);
+	ul_Kt_Beta_fused = le32_to_cpu(sOutput_FuseValues.ulEfuseValue);
+	fAverage = GetScaledFraction(le32_to_cpu(sKt_Beta_fuse.ulEfuseEncodeAverage), 1000);
+	fRange = GetScaledFraction(le32_to_cpu(sKt_Beta_fuse.ulEfuseEncodeRange), 1000);
 
 	fKt_Beta_fused = fDecodeLogisticFuse(ul_Kt_Beta_fused,
 			fAverage, fRange, sKt_Beta_fuse.ucEfuseLength);
@@ -798,9 +797,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	if (result)
 		return result;
 
-	ul_Kv_m_fused = sOutput_FuseValues.ulEfuseValue;
-	fAverage = GetScaledFraction(sKv_m_fuse.ulEfuseEncodeAverage, 1000);
-	fRange = GetScaledFraction((sKv_m_fuse.ulEfuseEncodeRange & 0x7fffffff), 1000);
+	ul_Kv_m_fused = le32_to_cpu(sOutput_FuseValues.ulEfuseValue);
+	fAverage = GetScaledFraction(le32_to_cpu(sKv_m_fuse.ulEfuseEncodeAverage), 1000);
+	fRange = GetScaledFraction((le32_to_cpu(sKv_m_fuse.ulEfuseEncodeRange) & 0x7fffffff), 1000);
 	fRange = fMultiply(fRange, ConvertToFraction(-1));
 
 	fKv_m_fused = fDecodeLogisticFuse(ul_Kv_m_fused,
@@ -820,9 +819,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	if (result)
 		return result;
 
-	ul_Kv_b_fused = sOutput_FuseValues.ulEfuseValue;
-	fAverage = GetScaledFraction(sKv_b_fuse.ulEfuseEncodeAverage, 1000);
-	fRange = GetScaledFraction(sKv_b_fuse.ulEfuseEncodeRange, 1000);
+	ul_Kv_b_fused = le32_to_cpu(sOutput_FuseValues.ulEfuseValue);
+	fAverage = GetScaledFraction(le32_to_cpu(sKv_b_fuse.ulEfuseEncodeAverage), 1000);
+	fRange = GetScaledFraction(le32_to_cpu(sKv_b_fuse.ulEfuseEncodeRange), 1000);
 
 	fKv_b_fused = fDecodeLogisticFuse(ul_Kv_b_fused,
 			fAverage, fRange, sKv_b_fuse.ucEfuseLength);
@@ -851,9 +850,9 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	if (result)
 		return result;
 
-	ul_FT_Lkg_V0NORM = sOutput_FuseValues.ulEfuseValue;
-	fLn_MaxDivMin = GetScaledFraction(getASICProfilingInfo->ulLkgEncodeLn_MaxDivMin, 10000);
-	fMin = GetScaledFraction(getASICProfilingInfo->ulLkgEncodeMin, 10000);
+	ul_FT_Lkg_V0NORM = le32_to_cpu(sOutput_FuseValues.ulEfuseValue);
+	fLn_MaxDivMin = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulLkgEncodeLn_MaxDivMin), 10000);
+	fMin = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulLkgEncodeMin), 10000);
 
 	fFT_Lkg_V0NORM = fDecodeLeakageID(ul_FT_Lkg_V0NORM,
 			fLn_MaxDivMin, fMin, getASICProfilingInfo->ucLkgEfuseLength);
@@ -863,40 +862,40 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	 * PART 2 - Grabbing all required values
 	 *-------------------------------------------
 	 */
-	fSM_A0 = fMultiply(GetScaledFraction(getASICProfilingInfo->ulSM_A0, 1000000),
+	fSM_A0 = fMultiply(GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulSM_A0), 1000000),
 			ConvertToFraction(uPow(-1, getASICProfilingInfo->ucSM_A0_sign)));
-	fSM_A1 = fMultiply(GetScaledFraction(getASICProfilingInfo->ulSM_A1, 1000000),
+	fSM_A1 = fMultiply(GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulSM_A1), 1000000),
 			ConvertToFraction(uPow(-1, getASICProfilingInfo->ucSM_A1_sign)));
-	fSM_A2 = fMultiply(GetScaledFraction(getASICProfilingInfo->ulSM_A2, 100000),
+	fSM_A2 = fMultiply(GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulSM_A2), 100000),
 			ConvertToFraction(uPow(-1, getASICProfilingInfo->ucSM_A2_sign)));
-	fSM_A3 = fMultiply(GetScaledFraction(getASICProfilingInfo->ulSM_A3, 1000000),
+	fSM_A3 = fMultiply(GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulSM_A3), 1000000),
 			ConvertToFraction(uPow(-1, getASICProfilingInfo->ucSM_A3_sign)));
-	fSM_A4 = fMultiply(GetScaledFraction(getASICProfilingInfo->ulSM_A4, 1000000),
+	fSM_A4 = fMultiply(GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulSM_A4), 1000000),
 			ConvertToFraction(uPow(-1, getASICProfilingInfo->ucSM_A4_sign)));
-	fSM_A5 = fMultiply(GetScaledFraction(getASICProfilingInfo->ulSM_A5, 1000),
+	fSM_A5 = fMultiply(GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulSM_A5), 1000),
 			ConvertToFraction(uPow(-1, getASICProfilingInfo->ucSM_A5_sign)));
-	fSM_A6 = fMultiply(GetScaledFraction(getASICProfilingInfo->ulSM_A6, 1000),
+	fSM_A6 = fMultiply(GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulSM_A6), 1000),
 			ConvertToFraction(uPow(-1, getASICProfilingInfo->ucSM_A6_sign)));
-	fSM_A7 = fMultiply(GetScaledFraction(getASICProfilingInfo->ulSM_A7, 1000),
+	fSM_A7 = fMultiply(GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulSM_A7), 1000),
 			ConvertToFraction(uPow(-1, getASICProfilingInfo->ucSM_A7_sign)));
 
-	fMargin_RO_a = ConvertToFraction(getASICProfilingInfo->ulMargin_RO_a);
-	fMargin_RO_b = ConvertToFraction(getASICProfilingInfo->ulMargin_RO_b);
-	fMargin_RO_c = ConvertToFraction(getASICProfilingInfo->ulMargin_RO_c);
+	fMargin_RO_a = ConvertToFraction(le32_to_cpu(getASICProfilingInfo->ulMargin_RO_a));
+	fMargin_RO_b = ConvertToFraction(le32_to_cpu(getASICProfilingInfo->ulMargin_RO_b));
+	fMargin_RO_c = ConvertToFraction(le32_to_cpu(getASICProfilingInfo->ulMargin_RO_c));
 
-	fMargin_fixed = ConvertToFraction(getASICProfilingInfo->ulMargin_fixed);
+	fMargin_fixed = ConvertToFraction(le32_to_cpu(getASICProfilingInfo->ulMargin_fixed));
 
 	fMargin_FMAX_mean = GetScaledFraction(
-			getASICProfilingInfo->ulMargin_Fmax_mean, 10000);
+		le32_to_cpu(getASICProfilingInfo->ulMargin_Fmax_mean), 10000);
 	fMargin_Plat_mean = GetScaledFraction(
-			getASICProfilingInfo->ulMargin_plat_mean, 10000);
+		le32_to_cpu(getASICProfilingInfo->ulMargin_plat_mean), 10000);
 	fMargin_FMAX_sigma = GetScaledFraction(
-			getASICProfilingInfo->ulMargin_Fmax_sigma, 10000);
+		le32_to_cpu(getASICProfilingInfo->ulMargin_Fmax_sigma), 10000);
 	fMargin_Plat_sigma = GetScaledFraction(
-			getASICProfilingInfo->ulMargin_plat_sigma, 10000);
+		le32_to_cpu(getASICProfilingInfo->ulMargin_plat_sigma), 10000);
 
 	fMargin_DC_sigma = GetScaledFraction(
-			getASICProfilingInfo->ulMargin_DC_sigma, 100);
+		le32_to_cpu(getASICProfilingInfo->ulMargin_DC_sigma), 100);
 	fMargin_DC_sigma = fDivide(fMargin_DC_sigma, ConvertToFraction(1000));
 
 	fCACm_fused = fDivide(fCACm_fused, ConvertToFraction(100));
@@ -908,14 +907,14 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	fSclk = GetScaledFraction(sclk, 100);
 
 	fV_max = fDivide(GetScaledFraction(
-			getASICProfilingInfo->ulMaxVddc, 1000), ConvertToFraction(4));
-	fT_prod = GetScaledFraction(getASICProfilingInfo->ulBoardCoreTemp, 10);
-	fLKG_Factor = GetScaledFraction(getASICProfilingInfo->ulEvvLkgFactor, 100);
-	fT_FT = GetScaledFraction(getASICProfilingInfo->ulLeakageTemp, 10);
+				 le32_to_cpu(getASICProfilingInfo->ulMaxVddc), 1000), ConvertToFraction(4));
+	fT_prod = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulBoardCoreTemp), 10);
+	fLKG_Factor = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulEvvLkgFactor), 100);
+	fT_FT = GetScaledFraction(le32_to_cpu(getASICProfilingInfo->ulLeakageTemp), 10);
 	fV_FT = fDivide(GetScaledFraction(
-			getASICProfilingInfo->ulLeakageVoltage, 1000), ConvertToFraction(4));
+				le32_to_cpu(getASICProfilingInfo->ulLeakageVoltage), 1000), ConvertToFraction(4));
 	fV_min = fDivide(GetScaledFraction(
-			getASICProfilingInfo->ulMinVddc, 1000), ConvertToFraction(4));
+				 le32_to_cpu(getASICProfilingInfo->ulMinVddc), 1000), ConvertToFraction(4));
 
 	/*-----------------------
 	 * PART 3
@@ -925,7 +924,7 @@ int atomctrl_calculate_voltage_evv_on_sclk(
 	fA_Term = fAdd(fMargin_RO_a, fAdd(fMultiply(fSM_A4, fSclk), fSM_A5));
 	fB_Term = fAdd(fAdd(fMultiply(fSM_A2, fSclk), fSM_A6), fMargin_RO_b);
 	fC_Term = fAdd(fMargin_RO_c,
-			fAdd(fMultiply(fSM_A0,fLkg_FT),
+			fAdd(fMultiply(fSM_A0, fLkg_FT),
 			fAdd(fMultiply(fSM_A1, fMultiply(fLkg_FT, fSclk)),
 			fAdd(fMultiply(fSM_A3, fSclk),
 			fSubtract(fSM_A7, fRO_fused)))));
@@ -1063,9 +1062,9 @@ int atomctrl_get_voltage_evv_on_sclk(
 	get_voltage_info_param_space.ucVoltageMode   =
 		ATOM_GET_VOLTAGE_EVV_VOLTAGE;
 	get_voltage_info_param_space.usVoltageLevel  =
-		virtual_voltage_Id;
+		cpu_to_le16(virtual_voltage_Id);
 	get_voltage_info_param_space.ulSCLKFreq      =
-		sclk;
+		cpu_to_le32(sclk);
 
 	result = cgs_atom_exec_cmd_table(hwmgr->device,
 			GetIndexIntoMasterTable(COMMAND, GetVoltageInfo),
@@ -1074,8 +1073,54 @@ int atomctrl_get_voltage_evv_on_sclk(
 	if (0 != result)
 		return result;
 
-	*voltage = ((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2 *)
-			(&get_voltage_info_param_space))->usVoltageLevel;
+	*voltage = le16_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2 *)
+				(&get_voltage_info_param_space))->usVoltageLevel);
+
+	return result;
+}
+
+/**
+ * atomctrl_get_voltage_evv gets voltage via call to ATOM COMMAND table.
+ * @param hwmgr	input: pointer to hwManager
+ * @param virtual_voltage_id      input: voltage id which match per voltage DPM state: 0xff01, 0xff02.. 0xff08
+ * @param voltage		       output: real voltage level in unit of mv
+ */
+int atomctrl_get_voltage_evv(struct pp_hwmgr *hwmgr,
+			     uint16_t virtual_voltage_id,
+			     uint16_t *voltage)
+{
+	int result;
+	int entry_id;
+	GET_VOLTAGE_INFO_INPUT_PARAMETER_V1_2 get_voltage_info_param_space;
+
+	/* search for leakage voltage ID 0xff01 ~ 0xff08 and sckl */
+	for (entry_id = 0; entry_id < hwmgr->dyn_state.vddc_dependency_on_sclk->count; entry_id++) {
+		if (hwmgr->dyn_state.vddc_dependency_on_sclk->entries[entry_id].v == virtual_voltage_id) {
+			/* found */
+			break;
+		}
+	}
+
+	PP_ASSERT_WITH_CODE(entry_id < hwmgr->dyn_state.vddc_dependency_on_sclk->count,
+	        "Can't find requested voltage id in vddc_dependency_on_sclk table!",
+	        return -EINVAL;
+	);
+
+	get_voltage_info_param_space.ucVoltageType = VOLTAGE_TYPE_VDDC;
+	get_voltage_info_param_space.ucVoltageMode = ATOM_GET_VOLTAGE_EVV_VOLTAGE;
+	get_voltage_info_param_space.usVoltageLevel = virtual_voltage_id;
+	get_voltage_info_param_space.ulSCLKFreq =
+		cpu_to_le32(hwmgr->dyn_state.vddc_dependency_on_sclk->entries[entry_id].clk);
+
+	result = cgs_atom_exec_cmd_table(hwmgr->device,
+			GetIndexIntoMasterTable(COMMAND, GetVoltageInfo),
+			&get_voltage_info_param_space);
+
+	if (0 != result)
+		return result;
+
+	*voltage = le16_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_2 *)
+				(&get_voltage_info_param_space))->usVoltageLevel);
 
 	return result;
 }
@@ -1165,8 +1210,8 @@ static int asic_internal_ss_get_ss_asignment(struct pp_hwmgr *hwmgr,
 
 	if (entry_found) {
 		ssEntry->speed_spectrum_percentage =
-			ssInfo->usSpreadSpectrumPercentage;
-		ssEntry->speed_spectrum_rate = ssInfo->usSpreadRateInKhz;
+			le16_to_cpu(ssInfo->usSpreadSpectrumPercentage);
+		ssEntry->speed_spectrum_rate = le16_to_cpu(ssInfo->usSpreadRateInKhz);
 
 		if (((GET_DATA_TABLE_MAJOR_REVISION(table) == 2) &&
 			(GET_DATA_TABLE_MINOR_REVISION(table) >= 2)) ||
@@ -1222,7 +1267,7 @@ int atomctrl_read_efuse(void *device, uint16_t start_index,
 	int result;
 	READ_EFUSE_VALUE_PARAMETER efuse_param;
 
-	efuse_param.sEfuse.usEfuseIndex = (start_index / 32) * 4;
+	efuse_param.sEfuse.usEfuseIndex = cpu_to_le16((start_index / 32) * 4);
 	efuse_param.sEfuse.ucBitShift = (uint8_t)
 			(start_index - ((start_index / 32) * 32));
 	efuse_param.sEfuse.ucBitLength  = (uint8_t)
@@ -1232,19 +1277,21 @@ int atomctrl_read_efuse(void *device, uint16_t start_index,
 			GetIndexIntoMasterTable(COMMAND, ReadEfuseValue),
 			&efuse_param);
 	if (!result)
-		*efuse = efuse_param.ulEfuseValue & mask;
+		*efuse = le32_to_cpu(efuse_param.ulEfuseValue) & mask;
 
 	return result;
 }
 
 int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock,
-								uint8_t level)
+			      uint8_t level)
 {
 	DYNAMICE_MEMORY_SETTINGS_PARAMETER_V2_1 memory_clock_parameters;
 	int result;
 
-	memory_clock_parameters.asDPMMCReg.ulClock.ulClockFreq = memory_clock & SET_CLOCK_FREQ_MASK;
-	memory_clock_parameters.asDPMMCReg.ulClock.ulComputeClockFlag = ADJUST_MC_SETTING_PARAM;
+	memory_clock_parameters.asDPMMCReg.ulClock.ulClockFreq =
+		memory_clock & SET_CLOCK_FREQ_MASK;
+	memory_clock_parameters.asDPMMCReg.ulClock.ulComputeClockFlag =
+		ADJUST_MC_SETTING_PARAM;
 	memory_clock_parameters.asDPMMCReg.ucMclkDPMState = level;
 
 	result = cgs_atom_exec_cmd_table
@@ -1256,7 +1303,7 @@ int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock,
 }
 
 int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
-				uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage)
+				uint32_t sclk, uint16_t virtual_voltage_Id, uint32_t *voltage)
 {
 
 	int result;
@@ -1264,8 +1311,8 @@ int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_
 
 	get_voltage_info_param_space.ucVoltageType = voltage_type;
 	get_voltage_info_param_space.ucVoltageMode = ATOM_GET_VOLTAGE_EVV_VOLTAGE;
-	get_voltage_info_param_space.usVoltageLevel = virtual_voltage_Id;
-	get_voltage_info_param_space.ulSCLKFreq = sclk;
+	get_voltage_info_param_space.usVoltageLevel = cpu_to_le16(virtual_voltage_Id);
+	get_voltage_info_param_space.ulSCLKFreq = cpu_to_le32(sclk);
 
 	result = cgs_atom_exec_cmd_table(hwmgr->device,
 			GetIndexIntoMasterTable(COMMAND, GetVoltageInfo),
@@ -1274,7 +1321,7 @@ int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_
 	if (0 != result)
 		return result;
 
-	*voltage = get_voltage_info_param_space.usVoltageLevel;
+	*voltage = le32_to_cpu(((GET_EVV_VOLTAGE_INFO_OUTPUT_PARAMETER_V1_3 *)(&get_voltage_info_param_space))->ulVoltageLevel);
 
 	return result;
 }
@@ -1295,10 +1342,57 @@ int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctr
 	for (i = 0; i < psmu_info->ucSclkEntryNum; i++) {
 		table->entry[i].ucVco_setting = psmu_info->asSclkFcwRangeEntry[i].ucVco_setting;
 		table->entry[i].ucPostdiv = psmu_info->asSclkFcwRangeEntry[i].ucPostdiv;
-		table->entry[i].usFcw_pcc = psmu_info->asSclkFcwRangeEntry[i].ucFcw_pcc;
-		table->entry[i].usFcw_trans_upper = psmu_info->asSclkFcwRangeEntry[i].ucFcw_trans_upper;
-		table->entry[i].usRcw_trans_lower = psmu_info->asSclkFcwRangeEntry[i].ucRcw_trans_lower;
+		table->entry[i].usFcw_pcc =
+			le16_to_cpu(psmu_info->asSclkFcwRangeEntry[i].ucFcw_pcc);
+		table->entry[i].usFcw_trans_upper =
+			le16_to_cpu(psmu_info->asSclkFcwRangeEntry[i].ucFcw_trans_upper);
+		table->entry[i].usRcw_trans_lower =
+			le16_to_cpu(psmu_info->asSclkFcwRangeEntry[i].ucRcw_trans_lower);
 	}
 
 	return 0;
 }
+
+int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr,
+				  struct pp_atom_ctrl__avfs_parameters *param)
+{
+	ATOM_ASIC_PROFILING_INFO_V3_6 *profile = NULL;
+
+	if (param == NULL)
+		return -EINVAL;
+
+	profile = (ATOM_ASIC_PROFILING_INFO_V3_6 *)
+			cgs_atom_get_data_table(hwmgr->device,
+					GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo),
+					NULL, NULL, NULL);
+	if (!profile)
+		return -1;
+
+	param->ulAVFS_meanNsigma_Acontant0 = le32_to_cpu(profile->ulAVFS_meanNsigma_Acontant0);
+	param->ulAVFS_meanNsigma_Acontant1 = le32_to_cpu(profile->ulAVFS_meanNsigma_Acontant1);
+	param->ulAVFS_meanNsigma_Acontant2 = le32_to_cpu(profile->ulAVFS_meanNsigma_Acontant2);
+	param->usAVFS_meanNsigma_DC_tol_sigma = le16_to_cpu(profile->usAVFS_meanNsigma_DC_tol_sigma);
+	param->usAVFS_meanNsigma_Platform_mean = le16_to_cpu(profile->usAVFS_meanNsigma_Platform_mean);
+	param->usAVFS_meanNsigma_Platform_sigma = le16_to_cpu(profile->usAVFS_meanNsigma_Platform_sigma);
+	param->ulGB_VDROOP_TABLE_CKSOFF_a0 = le32_to_cpu(profile->ulGB_VDROOP_TABLE_CKSOFF_a0);
+	param->ulGB_VDROOP_TABLE_CKSOFF_a1 = le32_to_cpu(profile->ulGB_VDROOP_TABLE_CKSOFF_a1);
+	param->ulGB_VDROOP_TABLE_CKSOFF_a2 = le32_to_cpu(profile->ulGB_VDROOP_TABLE_CKSOFF_a2);
+	param->ulGB_VDROOP_TABLE_CKSON_a0 = le32_to_cpu(profile->ulGB_VDROOP_TABLE_CKSON_a0);
+	param->ulGB_VDROOP_TABLE_CKSON_a1 = le32_to_cpu(profile->ulGB_VDROOP_TABLE_CKSON_a1);
+	param->ulGB_VDROOP_TABLE_CKSON_a2 = le32_to_cpu(profile->ulGB_VDROOP_TABLE_CKSON_a2);
+	param->ulAVFSGB_FUSE_TABLE_CKSOFF_m1 = le32_to_cpu(profile->ulAVFSGB_FUSE_TABLE_CKSOFF_m1);
+	param->usAVFSGB_FUSE_TABLE_CKSOFF_m2 = le16_to_cpu(profile->usAVFSGB_FUSE_TABLE_CKSOFF_m2);
+	param->ulAVFSGB_FUSE_TABLE_CKSOFF_b = le32_to_cpu(profile->ulAVFSGB_FUSE_TABLE_CKSOFF_b);
+	param->ulAVFSGB_FUSE_TABLE_CKSON_m1 = le32_to_cpu(profile->ulAVFSGB_FUSE_TABLE_CKSON_m1);
+	param->usAVFSGB_FUSE_TABLE_CKSON_m2 = le16_to_cpu(profile->usAVFSGB_FUSE_TABLE_CKSON_m2);
+	param->ulAVFSGB_FUSE_TABLE_CKSON_b = le32_to_cpu(profile->ulAVFSGB_FUSE_TABLE_CKSON_b);
+	param->usMaxVoltage_0_25mv = le16_to_cpu(profile->usMaxVoltage_0_25mv);
+	param->ucEnableGB_VDROOP_TABLE_CKSOFF = profile->ucEnableGB_VDROOP_TABLE_CKSOFF;
+	param->ucEnableGB_VDROOP_TABLE_CKSON = profile->ucEnableGB_VDROOP_TABLE_CKSON;
+	param->ucEnableGB_FUSE_TABLE_CKSOFF = profile->ucEnableGB_FUSE_TABLE_CKSOFF;
+	param->ucEnableGB_FUSE_TABLE_CKSON = profile->ucEnableGB_FUSE_TABLE_CKSON;
+	param->usPSM_Age_ComFactor = le16_to_cpu(profile->usPSM_Age_ComFactor);
+	param->ucEnableApplyAVFS_CKS_OFF_Voltage = profile->ucEnableApplyAVFS_CKS_OFF_Voltage;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
index d24ebb566905..fc898afce002 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
@@ -250,8 +250,38 @@ struct pp_atomctrl_gpio_pin_assignment {
 };
 typedef struct pp_atomctrl_gpio_pin_assignment pp_atomctrl_gpio_pin_assignment;
 
+struct pp_atom_ctrl__avfs_parameters {
+	uint32_t  ulAVFS_meanNsigma_Acontant0;
+	uint32_t  ulAVFS_meanNsigma_Acontant1;
+	uint32_t  ulAVFS_meanNsigma_Acontant2;
+	uint16_t usAVFS_meanNsigma_DC_tol_sigma;
+	uint16_t usAVFS_meanNsigma_Platform_mean;
+	uint16_t usAVFS_meanNsigma_Platform_sigma;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a0;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a1;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a2;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a0;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a1;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSOFF_m1;
+	uint16_t  usAVFSGB_FUSE_TABLE_CKSOFF_m2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSOFF_b;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSON_m1;
+	uint16_t  usAVFSGB_FUSE_TABLE_CKSON_m2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSON_b;
+	uint16_t  usMaxVoltage_0_25mv;
+	uint8_t  ucEnableGB_VDROOP_TABLE_CKSOFF;
+	uint8_t  ucEnableGB_VDROOP_TABLE_CKSON;
+	uint8_t  ucEnableGB_FUSE_TABLE_CKSOFF;
+	uint8_t  ucEnableGB_FUSE_TABLE_CKSON;
+	uint16_t usPSM_Age_ComFactor;
+	uint8_t  ucEnableApplyAVFS_CKS_OFF_Voltage;
+	uint8_t  ucReserved;
+};
+
 extern bool atomctrl_get_pp_assign_pin(struct pp_hwmgr *hwmgr, const uint32_t pinId, pp_atomctrl_gpio_pin_assignment *gpio_pin_assignment);
 extern int atomctrl_get_voltage_evv_on_sclk(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage);
+extern int atomctrl_get_voltage_evv(struct pp_hwmgr *hwmgr, uint16_t virtual_voltage_id, uint16_t *voltage);
 extern uint32_t atomctrl_get_mpll_reference_clock(struct pp_hwmgr *hwmgr);
 extern int atomctrl_get_memory_clock_spread_spectrum(struct pp_hwmgr *hwmgr, const uint32_t memory_clock, pp_atomctrl_internal_ss_info *ssInfo);
 extern int atomctrl_get_engine_clock_spread_spectrum(struct pp_hwmgr *hwmgr, const uint32_t engine_clock, pp_atomctrl_internal_ss_info *ssInfo);
@@ -276,7 +306,10 @@ extern int atomctrl_get_engine_pll_dividers_ai(struct pp_hwmgr *hwmgr, uint32_t
 extern int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clock,
 								uint8_t level);
 extern int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
-				uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage);
+				uint32_t sclk, uint16_t virtual_voltage_Id, uint32_t *voltage);
 extern int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl_sclk_range_table *table);
+
+extern int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl__avfs_parameters *param);
+
 #endif
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
index 009bd5963ed8..8f50a038396c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
@@ -50,55 +50,45 @@ typedef union _fInt {
  * Function Declarations
  *  -------------------------------------------------------------------------------
  */
-fInt ConvertToFraction(int);                       /* Use this to convert an INT to a FINT */
-fInt Convert_ULONG_ToFraction(uint32_t);              /* Use this to convert an uint32_t to a FINT */
-fInt GetScaledFraction(int, int);                  /* Use this to convert an INT to a FINT after scaling it by a factor */
-int ConvertBackToInteger(fInt);                    /* Convert a FINT back to an INT that is scaled by 1000 (i.e. last 3 digits are the decimal digits) */
-
-fInt fNegate(fInt);                                /* Returns -1 * input fInt value */
-fInt fAdd (fInt, fInt);                            /* Returns the sum of two fInt numbers */
-fInt fSubtract (fInt A, fInt B);                   /* Returns A-B - Sometimes easier than Adding negative numbers */
-fInt fMultiply (fInt, fInt);                       /* Returns the product of two fInt numbers */
-fInt fDivide (fInt A, fInt B);                     /* Returns A/B */
-fInt fGetSquare(fInt);                             /* Returns the square of a fInt number */
-fInt fSqrt(fInt);                                  /* Returns the Square Root of a fInt number */
-
-int uAbs(int);                                     /* Returns the Absolute value of the Int */
-fInt fAbs(fInt);                                   /* Returns the Absolute value of the fInt */
-int uPow(int base, int exponent);                  /* Returns base^exponent an INT */
-
-void SolveQuadracticEqn(fInt, fInt, fInt, fInt[]); /* Returns the 2 roots via the array */
-bool Equal(fInt, fInt);                         /* Returns true if two fInts are equal to each other */
-bool GreaterThan(fInt A, fInt B);               /* Returns true if A > B */
-
-fInt fExponential(fInt exponent);                  /* Can be used to calculate e^exponent */
-fInt fNaturalLog(fInt value);                      /* Can be used to calculate ln(value) */
+static fInt ConvertToFraction(int);                       /* Use this to convert an INT to a FINT */
+static fInt Convert_ULONG_ToFraction(uint32_t);           /* Use this to convert an uint32_t to a FINT */
+static fInt GetScaledFraction(int, int);                  /* Use this to convert an INT to a FINT after scaling it by a factor */
+static int ConvertBackToInteger(fInt);                    /* Convert a FINT back to an INT that is scaled by 1000 (i.e. last 3 digits are the decimal digits) */
+
+static fInt fNegate(fInt);                                /* Returns -1 * input fInt value */
+static fInt fAdd (fInt, fInt);                            /* Returns the sum of two fInt numbers */
+static fInt fSubtract (fInt A, fInt B);                   /* Returns A-B - Sometimes easier than Adding negative numbers */
+static fInt fMultiply (fInt, fInt);                       /* Returns the product of two fInt numbers */
+static fInt fDivide (fInt A, fInt B);                     /* Returns A/B */
+static fInt fGetSquare(fInt);                             /* Returns the square of a fInt number */
+static fInt fSqrt(fInt);                                  /* Returns the Square Root of a fInt number */
+
+static int uAbs(int);                                     /* Returns the Absolute value of the Int */
+static int uPow(int base, int exponent);                  /* Returns base^exponent an INT */
+
+static void SolveQuadracticEqn(fInt, fInt, fInt, fInt[]); /* Returns the 2 roots via the array */
+static bool Equal(fInt, fInt);                            /* Returns true if two fInts are equal to each other */
+static bool GreaterThan(fInt A, fInt B);                  /* Returns true if A > B */
+
+static fInt fExponential(fInt exponent);                  /* Can be used to calculate e^exponent */
+static fInt fNaturalLog(fInt value);                      /* Can be used to calculate ln(value) */
 
 /* Fuse decoding functions
  * -------------------------------------------------------------------------------------
  */
-fInt fDecodeLinearFuse(uint32_t fuse_value, fInt f_min, fInt f_range, uint32_t bitlength);
-fInt fDecodeLogisticFuse(uint32_t fuse_value, fInt f_average, fInt f_range, uint32_t bitlength);
-fInt fDecodeLeakageID (uint32_t leakageID_fuse, fInt ln_max_div_min, fInt f_min, uint32_t bitlength);
+static fInt fDecodeLinearFuse(uint32_t fuse_value, fInt f_min, fInt f_range, uint32_t bitlength);
+static fInt fDecodeLogisticFuse(uint32_t fuse_value, fInt f_average, fInt f_range, uint32_t bitlength);
+static fInt fDecodeLeakageID (uint32_t leakageID_fuse, fInt ln_max_div_min, fInt f_min, uint32_t bitlength);
 
 /* Internal Support Functions - Use these ONLY for testing or adding to internal functions
  * -------------------------------------------------------------------------------------
  * Some of the following functions take two INTs as their input - This is unsafe for a variety of reasons.
  */
-fInt Add (int, int);                               /* Add two INTs and return Sum as FINT */
-fInt Multiply (int, int);                          /* Multiply two INTs and return Product as FINT */
-fInt Divide (int, int);                            /* You get the idea... */
-fInt fNegate(fInt);
+static fInt Divide (int, int);                            /* Divide two INTs and return result as FINT */
+static fInt fNegate(fInt);
 
-int uGetScaledDecimal (fInt);                      /* Internal function */
-int GetReal (fInt A);                              /* Internal function */
-
-/* Future Additions and Incomplete Functions
- * -------------------------------------------------------------------------------------
- */
-int GetRoundedValue(fInt);                         /* Incomplete function - Useful only when Precision is lacking */
-                                                   /* Let us say we have 2.126 but can only handle 2 decimal points. We could */
-                                                   /* either chop of 6 and keep 2.12 or use this function to get 2.13, which is more accurate */
+static int uGetScaledDecimal (fInt);                      /* Internal function */
+static int GetReal (fInt A);                              /* Internal function */
 
 /* -------------------------------------------------------------------------------------
  * TROUBLESHOOTING INFORMATION
@@ -115,7 +105,7 @@ int GetRoundedValue(fInt);                         /* Incomplete function - Usef
  * START OF CODE
  * -------------------------------------------------------------------------------------
  */
-fInt fExponential(fInt exponent)        /*Can be used to calculate e^exponent*/
+static fInt fExponential(fInt exponent)        /*Can be used to calculate e^exponent*/
 {
 	uint32_t i;
 	bool bNegated = false;
@@ -154,7 +144,7 @@ fInt fExponential(fInt exponent)        /*Can be used to calculate e^exponent*/
 	return solution;
 }
 
-fInt fNaturalLog(fInt value)
+static fInt fNaturalLog(fInt value)
 {
 	uint32_t i;
 	fInt upper_bound = Divide(8, 1000);
@@ -179,7 +169,7 @@ fInt fNaturalLog(fInt value)
 	return (fAdd(solution, error_term));
 }
 
-fInt fDecodeLinearFuse(uint32_t fuse_value, fInt f_min, fInt f_range, uint32_t bitlength)
+static fInt fDecodeLinearFuse(uint32_t fuse_value, fInt f_min, fInt f_range, uint32_t bitlength)
 {
 	fInt f_fuse_value = Convert_ULONG_ToFraction(fuse_value);
 	fInt f_bit_max_value = Convert_ULONG_ToFraction((uPow(2, bitlength)) - 1);
@@ -194,7 +184,7 @@ fInt fDecodeLinearFuse(uint32_t fuse_value, fInt f_min, fInt f_range, uint32_t b
 }
 
 
-fInt fDecodeLogisticFuse(uint32_t fuse_value, fInt f_average, fInt f_range, uint32_t bitlength)
+static fInt fDecodeLogisticFuse(uint32_t fuse_value, fInt f_average, fInt f_range, uint32_t bitlength)
 {
 	fInt f_fuse_value = Convert_ULONG_ToFraction(fuse_value);
 	fInt f_bit_max_value = Convert_ULONG_ToFraction((uPow(2, bitlength)) - 1);
@@ -212,7 +202,7 @@ fInt fDecodeLogisticFuse(uint32_t fuse_value, fInt f_average, fInt f_range, uint
 	return f_decoded_value;
 }
 
-fInt fDecodeLeakageID (uint32_t leakageID_fuse, fInt ln_max_div_min, fInt f_min, uint32_t bitlength)
+static fInt fDecodeLeakageID (uint32_t leakageID_fuse, fInt ln_max_div_min, fInt f_min, uint32_t bitlength)
 {
 	fInt fLeakage;
 	fInt f_bit_max_value = Convert_ULONG_ToFraction((uPow(2, bitlength)) - 1);
@@ -225,7 +215,7 @@ fInt fDecodeLeakageID (uint32_t leakageID_fuse, fInt ln_max_div_min, fInt f_min,
 	return fLeakage;
 }
 
-fInt ConvertToFraction(int X) /*Add all range checking here. Is it possible to make fInt a private declaration? */
+static fInt ConvertToFraction(int X) /*Add all range checking here. Is it possible to make fInt a private declaration? */
 {
 	fInt temp;
 
@@ -237,13 +227,13 @@ fInt ConvertToFraction(int X) /*Add all range checking here. Is it possible to m
 	return temp;
 }
 
-fInt fNegate(fInt X)
+static fInt fNegate(fInt X)
 {
 	fInt CONSTANT_NEGONE = ConvertToFraction(-1);
 	return (fMultiply(X, CONSTANT_NEGONE));
 }
 
-fInt Convert_ULONG_ToFraction(uint32_t X)
+static fInt Convert_ULONG_ToFraction(uint32_t X)
 {
 	fInt temp;
 
@@ -255,7 +245,7 @@ fInt Convert_ULONG_ToFraction(uint32_t X)
 	return temp;
 }
 
-fInt GetScaledFraction(int X, int factor)
+static fInt GetScaledFraction(int X, int factor)
 {
 	int times_shifted, factor_shifted;
 	bool bNEGATED;
@@ -304,7 +294,7 @@ fInt GetScaledFraction(int X, int factor)
 }
 
 /* Addition using two fInts */
-fInt fAdd (fInt X, fInt Y)
+static fInt fAdd (fInt X, fInt Y)
 {
 	fInt Sum;
 
@@ -314,7 +304,7 @@ fInt fAdd (fInt X, fInt Y)
 }
 
 /* Addition using two fInts */
-fInt fSubtract (fInt X, fInt Y)
+static fInt fSubtract (fInt X, fInt Y)
 {
 	fInt Difference;
 
@@ -323,7 +313,7 @@ fInt fSubtract (fInt X, fInt Y)
 	return Difference;
 }
 
-bool Equal(fInt A, fInt B)
+static bool Equal(fInt A, fInt B)
 {
 	if (A.full == B.full)
 		return true;
@@ -331,7 +321,7 @@ bool Equal(fInt A, fInt B)
 		return false;
 }
 
-bool GreaterThan(fInt A, fInt B)
+static bool GreaterThan(fInt A, fInt B)
 {
 	if (A.full > B.full)
 		return true;
@@ -339,7 +329,7 @@ bool GreaterThan(fInt A, fInt B)
 		return false;
 }
 
-fInt fMultiply (fInt X, fInt Y) /* Uses 64-bit integers (int64_t) */
+static fInt fMultiply (fInt X, fInt Y) /* Uses 64-bit integers (int64_t) */
 {
 	fInt Product;
 	int64_t tempProduct;
@@ -363,7 +353,7 @@ fInt fMultiply (fInt X, fInt Y) /* Uses 64-bit integers (int64_t) */
 	return Product;
 }
 
-fInt fDivide (fInt X, fInt Y)
+static fInt fDivide (fInt X, fInt Y)
 {
 	fInt fZERO, fQuotient;
 	int64_t longlongX, longlongY;
@@ -384,7 +374,7 @@ fInt fDivide (fInt X, fInt Y)
 	return fQuotient;
 }
 
-int ConvertBackToInteger (fInt A) /*THIS is the function that will be used to check with the Golden settings table*/
+static int ConvertBackToInteger (fInt A) /*THIS is the function that will be used to check with the Golden settings table*/
 {
 	fInt fullNumber, scaledDecimal, scaledReal;
 
@@ -397,13 +387,13 @@ int ConvertBackToInteger (fInt A) /*THIS is the function that will be used to ch
 	return fullNumber.full;
 }
 
-fInt fGetSquare(fInt A)
+static fInt fGetSquare(fInt A)
 {
 	return fMultiply(A,A);
 }
 
 /* x_new = x_old - (x_old^2 - C) / (2 * x_old) */
-fInt fSqrt(fInt num)
+static fInt fSqrt(fInt num)
 {
 	fInt F_divide_Fprime, Fprime;
 	fInt test;
@@ -460,7 +450,7 @@ fInt fSqrt(fInt num)
 	return (x_new);
 }
 
-void SolveQuadracticEqn(fInt A, fInt B, fInt C, fInt Roots[])
+static void SolveQuadracticEqn(fInt A, fInt B, fInt C, fInt Roots[])
 {
 	fInt *pRoots = &Roots[0];
 	fInt temp, root_first, root_second;
@@ -498,52 +488,13 @@ void SolveQuadracticEqn(fInt A, fInt B, fInt C, fInt Roots[])
  * -----------------------------------------------------------------------------
  */
 
-/* Addition using two normal ints - Temporary - Use only for testing purposes?. */
-fInt Add (int X, int Y)
-{
-	fInt A, B, Sum;
-
-	A.full = (X << SHIFT_AMOUNT);
-	B.full = (Y << SHIFT_AMOUNT);
-
-	Sum.full = A.full + B.full;
-
-	return Sum;
-}
-
 /* Conversion Functions */
-int GetReal (fInt A)
+static int GetReal (fInt A)
 {
 	return (A.full >> SHIFT_AMOUNT);
 }
 
-/* Temporarily Disabled */
-int GetRoundedValue(fInt A) /*For now, round the 3rd decimal place */
-{
-	/* ROUNDING TEMPORARLY DISABLED
-	int temp = A.full;
-	int decimal_cutoff, decimal_mask = 0x000001FF;
-	decimal_cutoff = temp & decimal_mask;
-	if (decimal_cutoff > 0x147) {
-		temp += 673;
-	}*/
-
-	return ConvertBackToInteger(A)/10000; /*Temporary - in case this was used somewhere else */
-}
-
-fInt Multiply (int X, int Y)
-{
-	fInt A, B, Product;
-
-	A.full = X << SHIFT_AMOUNT;
-	B.full = Y << SHIFT_AMOUNT;
-
-	Product = fMultiply(A, B);
-
-	return Product;
-}
-
-fInt Divide (int X, int Y)
+static fInt Divide (int X, int Y)
 {
 	fInt A, B, Quotient;
 
@@ -555,7 +506,7 @@ fInt Divide (int X, int Y)
 	return Quotient;
 }
 
-int uGetScaledDecimal (fInt A) /*Converts the fractional portion to whole integers - Costly function */
+static int uGetScaledDecimal (fInt A) /*Converts the fractional portion to whole integers - Costly function */
 {
 	int dec[PRECISION];
 	int i, scaledDecimal = 0, tmp = A.partial.decimal;
@@ -570,7 +521,7 @@ int uGetScaledDecimal (fInt A) /*Converts the fractional portion to whole intege
 	return scaledDecimal;
 }
 
-int uPow(int base, int power)
+static int uPow(int base, int power)
 {
 	if (power == 0)
 		return 1;
@@ -578,15 +529,7 @@ int uPow(int base, int power)
 		return (base)*uPow(base, power - 1);
 }
 
-fInt fAbs(fInt A)
-{
-	if (A.partial.real < 0)
-		return (fMultiply(A, ConvertToFraction(-1)));
-	else
-		return A;
-}
-
-int uAbs(int X)
+static int uAbs(int X)
 {
 	if (X < 0)
 		return (X * -1);
@@ -594,7 +537,7 @@ int uAbs(int X)
 		return X;
 }
 
-fInt fRoundUpByStepSize(fInt A, fInt fStepSize, bool error_term)
+static fInt fRoundUpByStepSize(fInt A, fInt fStepSize, bool error_term)
 {
 	fInt solution;
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
index 2f1a14fe05b1..6c321b0d8a1e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
@@ -794,19 +794,35 @@ static const ATOM_PPLIB_STATE_V2 *get_state_entry_v2(
 static const ATOM_PPLIB_POWERPLAYTABLE *get_powerplay_table(
 				     struct pp_hwmgr *hwmgr)
 {
-	const void *table_addr = NULL;
+	const void *table_addr = hwmgr->soft_pp_table;
 	uint8_t frev, crev;
 	uint16_t size;
 
-	table_addr = cgs_atom_get_data_table(hwmgr->device,
-			GetIndexIntoMasterTable(DATA, PowerPlayInfo),
-			&size, &frev, &crev);
+	if (!table_addr) {
+		table_addr = cgs_atom_get_data_table(hwmgr->device,
+				GetIndexIntoMasterTable(DATA, PowerPlayInfo),
+				&size, &frev, &crev);
 
-	hwmgr->soft_pp_table = table_addr;
+		hwmgr->soft_pp_table = table_addr;
+		hwmgr->soft_pp_table_size = size;
+	}
 
 	return (const ATOM_PPLIB_POWERPLAYTABLE *)table_addr;
 }
 
+int pp_tables_get_response_times(struct pp_hwmgr *hwmgr,
+				uint32_t *vol_rep_time, uint32_t *bb_rep_time)
+{
+	const ATOM_PPLIB_POWERPLAYTABLE *powerplay_tab = get_powerplay_table(hwmgr);
+
+	PP_ASSERT_WITH_CODE(NULL != powerplay_tab,
+			    "Missing PowerPlay Table!", return -EINVAL);
+
+	*vol_rep_time = (uint32_t)le16_to_cpu(powerplay_tab->usVoltageTime);
+	*bb_rep_time = (uint32_t)le16_to_cpu(powerplay_tab->usBackbiasTime);
+
+	return 0;
+}
 
 int pp_tables_get_num_of_entries(struct pp_hwmgr *hwmgr,
 				     unsigned long *num_of_entries)
@@ -1499,7 +1515,7 @@ int get_number_of_vce_state_table_entries(
 	const ATOM_PPLIB_VCE_State_Table *vce_table =
 				    get_vce_state_table(hwmgr, table);
 
-	if (vce_table > 0)
+	if (vce_table)
 		return vce_table->numEntries;
 
 	return 0;
@@ -1589,11 +1605,6 @@ static int pp_tables_initialize(struct pp_hwmgr *hwmgr)
 
 static int pp_tables_uninitialize(struct pp_hwmgr *hwmgr)
 {
-	if (NULL != hwmgr->soft_pp_table) {
-		kfree(hwmgr->soft_pp_table);
-		hwmgr->soft_pp_table = NULL;
-	}
-
 	if (NULL != hwmgr->dyn_state.vddc_dependency_on_sclk) {
 		kfree(hwmgr->dyn_state.vddc_dependency_on_sclk);
 		hwmgr->dyn_state.vddc_dependency_on_sclk = NULL;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.h b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.h
index 30434802417e..baddaa75693b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.h
@@ -32,16 +32,19 @@ struct pp_hw_power_state;
 extern const struct pp_table_func pptable_funcs;
 
 typedef int (*pp_tables_hw_clock_info_callback)(struct pp_hwmgr *hwmgr,
-					struct pp_hw_power_state *hw_ps,
-							unsigned int index,
-						 const void *clock_info);
+						struct pp_hw_power_state *hw_ps,
+						unsigned int index,
+						const void *clock_info);
 
 int pp_tables_get_num_of_entries(struct pp_hwmgr *hwmgr,
-				     unsigned long *num_of_entries);
+				 unsigned long *num_of_entries);
 
 int pp_tables_get_entry(struct pp_hwmgr *hwmgr,
-						unsigned long entry_index,
-						struct pp_power_state *ps,
-				pp_tables_hw_clock_info_callback func);
+			unsigned long entry_index,
+			struct pp_power_state *ps,
+			pp_tables_hw_clock_info_callback func);
+
+int pp_tables_get_response_times(struct pp_hwmgr *hwmgr,
+				 uint32_t *vol_rep_time, uint32_t *bb_rep_time);
 
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
index 16fed487973b..c7dc111221c2 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
@@ -571,7 +571,7 @@ int tonga_disable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if (0 == data->sclk_dpm_key_disabled) {
 		/* Checking if DPM is running.  If we discover hang because of this, we should skip this message.*/
 		PP_ASSERT_WITH_CODE(
-				(0 == tonga_is_dpm_running(hwmgr)),
+				!tonga_is_dpm_running(hwmgr),
 				"Trying to Disable SCLK DPM when DPM is disabled",
 				return -1
 				);
@@ -587,7 +587,7 @@ int tonga_disable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if (0 == data->mclk_dpm_key_disabled) {
 		/* Checking if DPM is running.  If we discover hang because of this, we should skip this message. */
 		PP_ASSERT_WITH_CODE(
-				(0 == tonga_is_dpm_running(hwmgr)),
+				!tonga_is_dpm_running(hwmgr),
 				"Trying to Disable MCLK DPM when DPM is disabled",
 				return -1
 				);
@@ -614,7 +614,7 @@ int tonga_stop_dpm(struct pp_hwmgr *hwmgr)
 	if (0 == data->pcie_dpm_key_disabled) {
 		/* Checking if DPM is running.  If we discover hang because of this, we should skip this message.*/
 		PP_ASSERT_WITH_CODE(
-				(0 == tonga_is_dpm_running(hwmgr)),
+				!tonga_is_dpm_running(hwmgr),
 				"Trying to Disable PCIE DPM when DPM is disabled",
 				return -1
 				);
@@ -630,7 +630,7 @@ int tonga_stop_dpm(struct pp_hwmgr *hwmgr)
 
 	/* Checking if DPM is running.  If we discover hang because of this, we should skip this message.*/
 	PP_ASSERT_WITH_CODE(
-			(0 == tonga_is_dpm_running(hwmgr)),
+			!tonga_is_dpm_running(hwmgr),
 			"Trying to Disable Voltage CNTL when DPM is disabled",
 			return -1
 			);
@@ -688,8 +688,9 @@ int tonga_dpm_force_state(struct pp_hwmgr *hwmgr, uint32_t n)
 	uint32_t level_mask = 1 << n;
 
 	/* Checking if DPM is running.  If we discover hang because of this, we should skip this message. */
-	PP_ASSERT_WITH_CODE(0 == tonga_is_dpm_running(hwmgr),
-			"Trying to force SCLK when DPM is disabled", return -1;);
+	PP_ASSERT_WITH_CODE(!tonga_is_dpm_running(hwmgr),
+			    "Trying to force SCLK when DPM is disabled",
+			    return -1;);
 	if (0 == data->sclk_dpm_key_disabled)
 		return (0 == smum_send_msg_to_smc_with_parameter(
 							     hwmgr->smumgr,
@@ -712,8 +713,9 @@ int tonga_dpm_force_state_mclk(struct pp_hwmgr *hwmgr, uint32_t n)
 	uint32_t level_mask = 1 << n;
 
 	/* Checking if DPM is running.  If we discover hang because of this, we should skip this message. */
-	PP_ASSERT_WITH_CODE(0 == tonga_is_dpm_running(hwmgr),
-			"Trying to Force MCLK when DPM is disabled", return -1;);
+	PP_ASSERT_WITH_CODE(!tonga_is_dpm_running(hwmgr),
+			    "Trying to Force MCLK when DPM is disabled",
+			    return -1;);
 	if (0 == data->mclk_dpm_key_disabled)
 		return (0 == smum_send_msg_to_smc_with_parameter(
 								hwmgr->smumgr,
@@ -735,8 +737,9 @@ int tonga_dpm_force_state_pcie(struct pp_hwmgr *hwmgr, uint32_t n)
 	tonga_hwmgr *data = (tonga_hwmgr *)(hwmgr->backend);
 
 	/* Checking if DPM is running.  If we discover hang because of this, we should skip this message.*/
-	PP_ASSERT_WITH_CODE(0 == tonga_is_dpm_running(hwmgr),
-			"Trying to Force PCIE level when DPM is disabled", return -1;);
+	PP_ASSERT_WITH_CODE(!tonga_is_dpm_running(hwmgr),
+			    "Trying to Force PCIE level when DPM is disabled",
+			    return -1;);
 	if (0 == data->pcie_dpm_key_disabled)
 		return (0 == smum_send_msg_to_smc_with_parameter(
 							     hwmgr->smumgr,
@@ -774,7 +777,7 @@ int tonga_process_firmware_header(struct pp_hwmgr *hwmgr)
 
 	uint32_t tmp;
 	int result;
-	bool error = 0;
+	bool error = false;
 
 	result = tonga_read_smc_sram_dword(hwmgr->smumgr,
 				SMU72_FIRMWARE_HEADER_LOCATION +
@@ -933,11 +936,11 @@ int tonga_init_power_gate_state(struct pp_hwmgr *hwmgr)
 {
 	tonga_hwmgr *data = (tonga_hwmgr *)(hwmgr->backend);
 
-	data->uvd_power_gated = 0;
-	data->vce_power_gated = 0;
-	data->samu_power_gated = 0;
-	data->acp_power_gated = 0;
-	data->pg_acp_init = 1;
+	data->uvd_power_gated = false;
+	data->vce_power_gated = false;
+	data->samu_power_gated = false;
+	data->acp_power_gated = false;
+	data->pg_acp_init = true;
 
 	return 0;
 }
@@ -955,7 +958,7 @@ int tonga_check_for_dpm_running(struct pp_hwmgr *hwmgr)
 	 * because we may have test scenarios that need us intentionly disable SCLK/MCLK DPM,
 	 * whereas voltage control is a fundemental change that will not be disabled
 	 */
-	return (0 == tonga_is_dpm_running(hwmgr) ? 0 : 1);
+	return (!tonga_is_dpm_running(hwmgr) ? 0 : 1);
 }
 
 /**
@@ -968,7 +971,7 @@ int tonga_check_for_dpm_stopped(struct pp_hwmgr *hwmgr)
 {
 	tonga_hwmgr *data = (tonga_hwmgr *)(hwmgr->backend);
 
-	if (0 != tonga_is_dpm_running(hwmgr)) {
+	if (tonga_is_dpm_running(hwmgr)) {
 		/* If HW Virtualization is enabled, dpm_table_start will not have a valid value */
 		if (!data->dpm_table_start) {
 			return 1;
@@ -991,7 +994,7 @@ static int tonga_trim_voltage_table(struct pp_hwmgr *hwmgr,
 {
 	uint32_t table_size, i, j;
 	uint16_t vvalue;
-	bool bVoltageFound = 0;
+	bool bVoltageFound = false;
 	pp_atomctrl_voltage_table *table;
 
 	PP_ASSERT_WITH_CODE((NULL != voltage_table), "Voltage Table empty.", return -1;);
@@ -1007,11 +1010,11 @@ static int tonga_trim_voltage_table(struct pp_hwmgr *hwmgr,
 
 	for (i = 0; i < voltage_table->count; i++) {
 		vvalue = voltage_table->entries[i].value;
-		bVoltageFound = 0;
+		bVoltageFound = false;
 
 		for (j = 0; j < table->count; j++) {
 			if (vvalue == table->entries[j].value) {
-				bVoltageFound = 1;
+				bVoltageFound = true;
 				break;
 			}
 		}
@@ -1302,7 +1305,7 @@ static int tonga_populate_smc_mvdd_table(struct pp_hwmgr *hwmgr,
 			table->Smio[count] |=
 				data->mvdd_voltage_table.entries[count].smio_low;
 		}
-		table->SmioMask2 = data->vddci_voltage_table.mask_low;
+		table->SmioMask2 = data->mvdd_voltage_table.mask_low;
 
 		CONVERT_FROM_HOST_TO_SMC_UL(table->MvddLevelCount);
 	}
@@ -1331,7 +1334,6 @@ static int tonga_populate_cac_tables(struct pp_hwmgr *hwmgr,
 {
 	uint32_t count;
 	uint8_t index;
-	int result = 0;
 	tonga_hwmgr *data = (tonga_hwmgr *)(hwmgr->backend);
 	struct phm_ppt_v1_information *pptable_info = (struct phm_ppt_v1_information *)(hwmgr->pptable);
 	struct phm_ppt_v1_voltage_lookup_table *vddgfx_lookup_table = pptable_info->vddgfx_lookup_table;
@@ -1378,7 +1380,7 @@ static int tonga_populate_cac_tables(struct pp_hwmgr *hwmgr,
 		}
 	}
 
-	return result;
+	return 0;
 }
 
 
@@ -2042,7 +2044,7 @@ static int tonga_populate_single_memory_level(
 
 	if ((data->mclk_stutter_mode_threshold != 0) &&
 	    (memory_clock <= data->mclk_stutter_mode_threshold) &&
-	    (data->is_uvd_enabled == 0)
+	    (!data->is_uvd_enabled)
 	    && (PHM_READ_FIELD(hwmgr->device, DPG_PIPE_STUTTER_CONTROL, STUTTER_ENABLE) & 0x1)
 	    && (data->display_timing.num_existing_displays <= 2)
 	    && (data->display_timing.num_existing_displays != 0))
@@ -2705,7 +2707,7 @@ static int tonga_reset_single_dpm_table(
 
 	dpm_table->count = count;
 	for (i = 0; i < MAX_REGULAR_DPM_NUMBER; i++) {
-		dpm_table->dpm_levels[i].enabled = 0;
+		dpm_table->dpm_levels[i].enabled = false;
 	}
 
 	return 0;
@@ -2718,7 +2720,7 @@ static void tonga_setup_pcie_table_entry(
 {
 	dpm_table->dpm_levels[index].value = pcie_gen;
 	dpm_table->dpm_levels[index].param1 = pcie_lanes;
-	dpm_table->dpm_levels[index].enabled = 1;
+	dpm_table->dpm_levels[index].enabled = true;
 }
 
 static int tonga_setup_default_pcie_tables(struct pp_hwmgr *hwmgr)
@@ -2828,7 +2830,7 @@ static int tonga_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
 				allowed_vdd_sclk_table->entries[i].clk) {
 			data->dpm_table.sclk_table.dpm_levels[data->dpm_table.sclk_table.count].value =
 				allowed_vdd_sclk_table->entries[i].clk;
-			data->dpm_table.sclk_table.dpm_levels[data->dpm_table.sclk_table.count].enabled = 1; /*(i==0) ? 1 : 0; to do */
+			data->dpm_table.sclk_table.dpm_levels[data->dpm_table.sclk_table.count].enabled = true; /*(i==0) ? 1 : 0; to do */
 			data->dpm_table.sclk_table.count++;
 		}
 	}
@@ -2842,32 +2844,11 @@ static int tonga_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
 			allowed_vdd_mclk_table->entries[i].clk) {
 			data->dpm_table.mclk_table.dpm_levels[data->dpm_table.mclk_table.count].value =
 				allowed_vdd_mclk_table->entries[i].clk;
-			data->dpm_table.mclk_table.dpm_levels[data->dpm_table.mclk_table.count].enabled = 1; /*(i==0) ? 1 : 0; */
+			data->dpm_table.mclk_table.dpm_levels[data->dpm_table.mclk_table.count].enabled = true; /*(i==0) ? 1 : 0; */
 			data->dpm_table.mclk_table.count++;
 		}
 	}
 
-	/* Initialize Vddc DPM table based on allow Vddc values.  And populate corresponding std values. */
-	for (i = 0; i < allowed_vdd_sclk_table->count; i++) {
-		data->dpm_table.vddc_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].vddc;
-		/* tonga_hwmgr->dpm_table.VddcTable.dpm_levels[i].param1 = stdVoltageTable->entries[i].Leakage; */
-		/* param1 is for corresponding std voltage */
-		data->dpm_table.vddc_table.dpm_levels[i].enabled = 1;
-	}
-	data->dpm_table.vddc_table.count = allowed_vdd_sclk_table->count;
-
-	if (NULL != allowed_vdd_mclk_table) {
-		/* Initialize Vddci DPM table based on allow Mclk values */
-		for (i = 0; i < allowed_vdd_mclk_table->count; i++) {
-			data->dpm_table.vdd_ci_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].vddci;
-			data->dpm_table.vdd_ci_table.dpm_levels[i].enabled = 1;
-			data->dpm_table.mvdd_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].mvdd;
-			data->dpm_table.mvdd_table.dpm_levels[i].enabled = 1;
-		}
-		data->dpm_table.vdd_ci_table.count = allowed_vdd_mclk_table->count;
-		data->dpm_table.mvdd_table.count = allowed_vdd_mclk_table->count;
-	}
-
 	/* setup PCIE gen speed levels*/
 	tonga_setup_default_pcie_tables(hwmgr);
 
@@ -3047,8 +3028,8 @@ int tonga_init_smc_table(struct pp_hwmgr *hwmgr)
 
 	reg_value = 0;
 	if ((0 == reg_value) &&
-		(0 == atomctrl_get_pp_assign_pin(hwmgr,
-			VDDC_VRHOT_GPIO_PINID, &gpio_pin_assignment))) {
+		(atomctrl_get_pp_assign_pin(hwmgr, VDDC_VRHOT_GPIO_PINID,
+						&gpio_pin_assignment))) {
 		table->VRHotGpio = gpio_pin_assignment.uc_gpio_pin_bit_shift;
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_RegulatorHot);
@@ -3061,8 +3042,8 @@ int tonga_init_smc_table(struct pp_hwmgr *hwmgr)
 	/* ACDC Switch GPIO */
 	reg_value = 0;
 	if ((0 == reg_value) &&
-		(0 == atomctrl_get_pp_assign_pin(hwmgr,
-			PP_AC_DC_SWITCH_GPIO_PINID, &gpio_pin_assignment))) {
+		(atomctrl_get_pp_assign_pin(hwmgr, PP_AC_DC_SWITCH_GPIO_PINID,
+						&gpio_pin_assignment))) {
 		table->AcDcGpio = gpio_pin_assignment.uc_gpio_pin_bit_shift;
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_AutomaticDCTransition);
@@ -3084,8 +3065,7 @@ int tonga_init_smc_table(struct pp_hwmgr *hwmgr)
 	}
 
 	reg_value = 0;
-	if ((0 == reg_value) &&
-		(0 == atomctrl_get_pp_assign_pin(hwmgr,
+	if ((0 == reg_value) && (atomctrl_get_pp_assign_pin(hwmgr,
 			THERMAL_INT_OUTPUT_GPIO_PINID, &gpio_pin_assignment))) {
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_ThermalOutGPIO);
@@ -3156,7 +3136,7 @@ int tonga_upload_dpm_level_enable_mask(struct pp_hwmgr *hwmgr)
 
 	if (0 == data->sclk_dpm_key_disabled) {
 		/* Checking if DPM is running.  If we discover hang because of this, we should skip this message.*/
-		if (0 != tonga_is_dpm_running(hwmgr))
+		if (tonga_is_dpm_running(hwmgr))
 			printk(KERN_ERR "[ powerplay ] Trying to set Enable Mask when DPM is disabled \n");
 
 		if (0 != data->dpm_level_enable_mask.sclk_dpm_enable_mask) {
@@ -3171,7 +3151,7 @@ int tonga_upload_dpm_level_enable_mask(struct pp_hwmgr *hwmgr)
 
 	if (0 == data->mclk_dpm_key_disabled) {
 		/* Checking if DPM is running.  If we discover hang because of this, we should skip this message.*/
-		if (0 != tonga_is_dpm_running(hwmgr))
+		if (tonga_is_dpm_running(hwmgr))
 			printk(KERN_ERR "[ powerplay ] Trying to set Enable Mask when DPM is disabled \n");
 
 		if (0 != data->dpm_level_enable_mask.mclk_dpm_enable_mask) {
@@ -3282,7 +3262,7 @@ int tonga_initializa_dynamic_state_adjustment_rule_settings(struct pp_hwmgr *hwm
 
 	/* initialize vddc_dep_on_dal_pwrl table */
 	table_size = sizeof(uint32_t) + 4 * sizeof(struct phm_clock_voltage_dependency_record);
-	table_clk_vlt = (struct phm_clock_voltage_dependency_table *)kzalloc(table_size, GFP_KERNEL);
+	table_clk_vlt = kzalloc(table_size, GFP_KERNEL);
 
 	if (NULL == table_clk_vlt) {
 		printk(KERN_ERR "[ powerplay ] Can not allocate space for vddc_dep_on_dal_pwrl! \n");
@@ -3357,9 +3337,9 @@ int tonga_unforce_dpm_levels(struct pp_hwmgr *hwmgr)
 	tonga_hwmgr *data = (tonga_hwmgr *)(hwmgr->backend);
 	int result = 1;
 
-	PP_ASSERT_WITH_CODE (0 == tonga_is_dpm_running(hwmgr),
-		"Trying to Unforce DPM when DPM is disabled. Returning without sending SMC message.",
-							return result);
+	PP_ASSERT_WITH_CODE (!tonga_is_dpm_running(hwmgr),
+			     "Trying to Unforce DPM when DPM is disabled. Returning without sending SMC message.",
+			     return result);
 
 	if (0 == data->pcie_dpm_key_disabled) {
 		PP_ASSERT_WITH_CODE((0 == smum_send_msg_to_smc(
@@ -3763,7 +3743,7 @@ uint8_t tonga_get_memory_modile_index(struct pp_hwmgr *hwmgr)
 
 bool tonga_check_s0_mc_reg_index(uint16_t inReg, uint16_t *outReg)
 {
-	bool result = 1;
+	bool result = true;
 
 	switch (inReg) {
 	case  mmMC_SEQ_RAS_TIMING:
@@ -3847,7 +3827,7 @@ bool tonga_check_s0_mc_reg_index(uint16_t inReg, uint16_t *outReg)
 		break;
 
 	default:
-		result = 0;
+		result = false;
 		break;
 	}
 
@@ -4443,13 +4423,6 @@ int tonga_reset_asic_tasks(struct pp_hwmgr *hwmgr)
 
 int tonga_hwmgr_backend_fini(struct pp_hwmgr *hwmgr)
 {
-	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
-
-	if (data->soft_pp_table) {
-		kfree(data->soft_pp_table);
-		data->soft_pp_table = NULL;
-	}
-
 	return phm_hwmgr_backend_fini(hwmgr);
 }
 
@@ -4463,7 +4436,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 {
 	int result = 0;
 	SMU72_Discrete_DpmTable  *table = NULL;
-	tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+	tonga_hwmgr *data;
 	pp_atomctrl_gpio_pin_assignment gpio_pin_assignment;
 	struct phm_ppt_v1_information *pptable_info = (struct phm_ppt_v1_information *)(hwmgr->pptable);
 	phw_tonga_ulv_parm *ulv;
@@ -4472,7 +4445,13 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	PP_ASSERT_WITH_CODE((NULL != hwmgr),
 		"Invalid Parameter!", return -1;);
 
-	data->dll_defaule_on = 0;
+	data = kzalloc(sizeof(struct tonga_hwmgr), GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+
+	hwmgr->backend = data;
+
+	data->dll_defaule_on = false;
 	data->sram_end = SMC_RAM_END;
 
 	data->activity_target[0] = PPTONGA_TARGETACTIVITY_DFLT;
@@ -4510,6 +4489,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vdd_ci_control = TONGA_VOLTAGE_CONTROL_NONE;
 	data->vdd_gfx_control = TONGA_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = TONGA_VOLTAGE_CONTROL_NONE;
+	data->force_pcie_gen = PP_PCIEGenInvalid;
 
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 				VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2)) {
@@ -4577,13 +4557,13 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 
 	/* ULV Support*/
 	ulv = &(data->ulv);
-	ulv->ulv_supported = 0;
+	ulv->ulv_supported = false;
 
 	/* Initalize Dynamic State Adjustment Rule Settings*/
 	result = tonga_initializa_dynamic_state_adjustment_rule_settings(hwmgr);
 	if (result)
 		printk(KERN_ERR "[ powerplay ] tonga_initializa_dynamic_state_adjustment_rule_settings failed!\n");
-	data->uvd_enabled = 0;
+	data->uvd_enabled = false;
 
 	table = &(data->smc_state_table);
 
@@ -4591,7 +4571,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	* if ucGPIO_ID=VDDC_PCC_GPIO_PINID in GPIO_LUTable,
 	* Peak Current Control feature is enabled and we should program PCC HW register
 	*/
-	if (0 == atomctrl_get_pp_assign_pin(hwmgr, VDDC_PCC_GPIO_PINID, &gpio_pin_assignment)) {
+	if (atomctrl_get_pp_assign_pin(hwmgr, VDDC_PCC_GPIO_PINID, &gpio_pin_assignment)) {
 		uint32_t temp_reg = cgs_read_ind_register(hwmgr->device,
 										CGS_IND_REG__SMC, ixCNB_PWRMGT_CNTL);
 
@@ -4630,7 +4610,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 		PHM_PlatformCaps_SMU7);
 
-	data->vddc_phase_shed_control = 0;
+	data->vddc_phase_shed_control = false;
 
 	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
 		      PHM_PlatformCaps_UVDPowerGating);
@@ -4649,7 +4629,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	}
 
 	if (0 == result) {
-		data->is_tlu_enabled = 0;
+		data->is_tlu_enabled = false;
 		hwmgr->platform_descriptor.hardwareActivityPerformanceLevels =
 			TONGA_MAX_HARDWARE_POWERLEVELS;
 		hwmgr->platform_descriptor.hardwarePerformanceLevels = 2;
@@ -4659,7 +4639,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_GEN_INFO;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_gen_cap = 0x30007;
+			data->pcie_gen_cap = AMDGPU_DEFAULT_PCIE_GEN_MASK;
 		else
 			data->pcie_gen_cap = (uint32_t)sys_info.value;
 		if (data->pcie_gen_cap & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
@@ -4668,7 +4648,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_MLW;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_lane_cap = 0x2f0000;
+			data->pcie_lane_cap = AMDGPU_DEFAULT_PCIE_MLW_MASK;
 		else
 			data->pcie_lane_cap = (uint32_t)sys_info.value;
 	} else {
@@ -5330,9 +5310,8 @@ static int tonga_freeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->sclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table &
 		(DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK))) {
-		PP_ASSERT_WITH_CODE(
-			0 == tonga_is_dpm_running(hwmgr),
-			"Trying to freeze SCLK DPM when DPM is disabled",
+		PP_ASSERT_WITH_CODE(!tonga_is_dpm_running(hwmgr),
+				    "Trying to freeze SCLK DPM when DPM is disabled",
 			);
 		PP_ASSERT_WITH_CODE(
 			0 == smum_send_msg_to_smc(hwmgr->smumgr,
@@ -5344,8 +5323,8 @@ static int tonga_freeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->mclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table &
 		 DPMTABLE_OD_UPDATE_MCLK)) {
-		PP_ASSERT_WITH_CODE(0 == tonga_is_dpm_running(hwmgr),
-			"Trying to freeze MCLK DPM when DPM is disabled",
+		PP_ASSERT_WITH_CODE(!tonga_is_dpm_running(hwmgr),
+				    "Trying to freeze MCLK DPM when DPM is disabled",
 			);
 		PP_ASSERT_WITH_CODE(
 			0 == smum_send_msg_to_smc(hwmgr->smumgr,
@@ -5480,7 +5459,6 @@ static	int tonga_trim_single_dpm_states(struct pp_hwmgr *hwmgr,
 
 static int tonga_trim_dpm_states(struct pp_hwmgr *hwmgr, const struct tonga_power_state *hw_state)
 {
-	int result = 0;
 	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
 	uint32_t high_limit_count;
 
@@ -5500,7 +5478,7 @@ static int tonga_trim_dpm_states(struct pp_hwmgr *hwmgr, const struct tonga_powe
 						hw_state->performance_levels[0].memory_clock,
 						hw_state->performance_levels[high_limit_count].memory_clock);
 
-	return result;
+	return 0;
 }
 
 static int tonga_generate_dpm_level_enable_mask(struct pp_hwmgr *hwmgr, const void *input)
@@ -5647,8 +5625,8 @@ static int tonga_unfreeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 		(data->need_update_smu7_dpm_table &
 		(DPMTABLE_OD_UPDATE_SCLK + DPMTABLE_UPDATE_SCLK))) {
 
-		PP_ASSERT_WITH_CODE(0 == tonga_is_dpm_running(hwmgr),
-			"Trying to Unfreeze SCLK DPM when DPM is disabled",
+		PP_ASSERT_WITH_CODE(!tonga_is_dpm_running(hwmgr),
+				    "Trying to Unfreeze SCLK DPM when DPM is disabled",
 			);
 		PP_ASSERT_WITH_CODE(
 			 0 == smum_send_msg_to_smc(hwmgr->smumgr,
@@ -5660,9 +5638,8 @@ static int tonga_unfreeze_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 	if ((0 == data->mclk_dpm_key_disabled) &&
 		(data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK)) {
 
-		PP_ASSERT_WITH_CODE(
-				0 == tonga_is_dpm_running(hwmgr),
-				"Trying to Unfreeze MCLK DPM when DPM is disabled",
+		PP_ASSERT_WITH_CODE(!tonga_is_dpm_running(hwmgr),
+				    "Trying to Unfreeze MCLK DPM when DPM is disabled",
 				);
 		PP_ASSERT_WITH_CODE(
 			 0 == smum_send_msg_to_smc(hwmgr->smumgr,
@@ -6051,42 +6028,6 @@ static int tonga_get_fan_control_mode(struct pp_hwmgr *hwmgr)
 				CG_FDO_CTRL2, FDO_PWM_MODE);
 }
 
-static int tonga_get_pp_table(struct pp_hwmgr *hwmgr, char **table)
-{
-	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kmemdup(hwmgr->soft_pp_table,
-					      hwmgr->soft_pp_table_size,
-					      GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	*table = (char *)&data->soft_pp_table;
-
-	return hwmgr->soft_pp_table_size;
-}
-
-static int tonga_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size)
-{
-	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kzalloc(hwmgr->soft_pp_table_size, GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	memcpy(data->soft_pp_table, buf, size);
-
-	hwmgr->soft_pp_table = data->soft_pp_table;
-
-	/* TODO: re-init powerplay to implement modified pptable */
-
-	return 0;
-}
-
 static int tonga_force_clock_level(struct pp_hwmgr *hwmgr,
 		enum pp_clock_type type, uint32_t mask)
 {
@@ -6194,11 +6135,96 @@ static int tonga_print_clock_levels(struct pp_hwmgr *hwmgr,
 	return size;
 }
 
+static int tonga_get_sclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+	struct tonga_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
+	struct tonga_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	int value;
+
+	value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) *
+			100 /
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return value;
+}
+
+static int tonga_set_sclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+	struct tonga_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	struct pp_power_state  *ps;
+	struct tonga_power_state  *tonga_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	tonga_ps = cast_phw_tonga_power_state(&ps->hardware);
+
+	tonga_ps->performance_levels[tonga_ps->performance_level_count - 1].engine_clock =
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value *
+			value / 100 +
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return 0;
+}
+
+static int tonga_get_mclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+	struct tonga_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
+	struct tonga_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	int value;
+
+	value = (mclk_table->dpm_levels[mclk_table->count - 1].value -
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) *
+			100 /
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return value;
+}
+
+static int tonga_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+	struct tonga_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	struct pp_power_state  *ps;
+	struct tonga_power_state  *tonga_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	tonga_ps = cast_phw_tonga_power_state(&ps->hardware);
+
+	tonga_ps->performance_levels[tonga_ps->performance_level_count - 1].memory_clock =
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value *
+			value / 100 +
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return 0;
+}
+
 static const struct pp_hwmgr_func tonga_hwmgr_funcs = {
 	.backend_init = &tonga_hwmgr_backend_init,
 	.backend_fini = &tonga_hwmgr_backend_fini,
 	.asic_setup = &tonga_setup_asic_task,
 	.dynamic_state_management_enable = &tonga_enable_dpm_tasks,
+	.dynamic_state_management_disable = &tonga_disable_dpm_tasks,
 	.apply_state_adjust_rules = tonga_apply_state_adjust_rules,
 	.force_dpm_level = &tonga_force_dpm_level,
 	.power_state_set = tonga_set_power_state_tasks,
@@ -6232,22 +6258,16 @@ static const struct pp_hwmgr_func tonga_hwmgr_funcs = {
 	.check_states_equal = tonga_check_states_equal,
 	.set_fan_control_mode = tonga_set_fan_control_mode,
 	.get_fan_control_mode = tonga_get_fan_control_mode,
-	.get_pp_table = tonga_get_pp_table,
-	.set_pp_table = tonga_set_pp_table,
 	.force_clock_level = tonga_force_clock_level,
 	.print_clock_levels = tonga_print_clock_levels,
+	.get_sclk_od = tonga_get_sclk_od,
+	.set_sclk_od = tonga_set_sclk_od,
+	.get_mclk_od = tonga_get_mclk_od,
+	.set_mclk_od = tonga_set_mclk_od,
 };
 
 int tonga_hwmgr_init(struct pp_hwmgr *hwmgr)
 {
-	tonga_hwmgr  *data;
-
-	data = kzalloc (sizeof(tonga_hwmgr), GFP_KERNEL);
-	if (data == NULL)
-		return -ENOMEM;
-	memset(data, 0x00, sizeof(tonga_hwmgr));
-
-	hwmgr->backend = data;
 	hwmgr->hwmgr_func = &tonga_hwmgr_funcs;
 	hwmgr->pptable_func = &tonga_pptable_funcs;
 	pp_tonga_thermal_initialize(hwmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
index 573cd39fe78d..3961884bfa9b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
@@ -352,9 +352,6 @@ struct tonga_hwmgr {
 	bool                           samu_power_gated; /* 1: gated, 0:not gated */
 	bool                           acp_power_gated;  /* 1: gated, 0:not gated */
 	bool                           pg_acp_init;
-
-	/* soft pptable for re-uploading into smu */
-	void *soft_pp_table;
 };
 
 typedef struct tonga_hwmgr tonga_hwmgr;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h
index 1b44f4e9b8f5..f127198aafc4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h
@@ -197,6 +197,22 @@ typedef struct _ATOM_Tonga_SCLK_Dependency_Table {
 	ATOM_Tonga_SCLK_Dependency_Record entries[1];				 /* Dynamically allocate entries. */
 } ATOM_Tonga_SCLK_Dependency_Table;
 
+typedef struct _ATOM_Polaris_SCLK_Dependency_Record {
+	UCHAR  ucVddInd;											/* Base voltage */
+	USHORT usVddcOffset;										/* Offset relative to base voltage */
+	ULONG ulSclk;
+	USHORT usEdcCurrent;
+	UCHAR  ucReliabilityTemperature;
+	UCHAR  ucCKSVOffsetandDisable;			/* Bits 0~6: Voltage offset for CKS, Bit 7: Disable/enable for the SCLK level. */
+	ULONG  ulSclkOffset;
+} ATOM_Polaris_SCLK_Dependency_Record;
+
+typedef struct _ATOM_Polaris_SCLK_Dependency_Table {
+	UCHAR ucRevId;
+	UCHAR ucNumEntries;							/* Number of entries. */
+	ATOM_Polaris_SCLK_Dependency_Record entries[1];				 /* Dynamically allocate entries. */
+} ATOM_Polaris_SCLK_Dependency_Table;
+
 typedef struct _ATOM_Tonga_PCIE_Record {
 	UCHAR ucPCIEGenSpeed;
 	UCHAR usPCIELaneWidth;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
index 10e3630ee39d..cfb647f76cbe 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
@@ -167,8 +167,7 @@ static int get_vddc_lookup_table(
 	table_size = sizeof(uint32_t) +
 		sizeof(phm_ppt_v1_voltage_lookup_record) * max_levels;
 
-	table = (phm_ppt_v1_voltage_lookup_table *)
-		kzalloc(table_size, GFP_KERNEL);
+	table = kzalloc(table_size, GFP_KERNEL);
 
 	if (NULL == table)
 		return -ENOMEM;
@@ -302,7 +301,7 @@ static int init_dpm_2_parameters(
 			(((unsigned long)powerplay_table) + le16_to_cpu(powerplay_table->usPPMTableOffset));
 
 		if (0 != powerplay_table->usPPMTableOffset) {
-			if (1 == get_platform_power_management_table(hwmgr, atom_ppm_table)) {
+			if (get_platform_power_management_table(hwmgr, atom_ppm_table) == 0) {
 				phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 					PHM_PlatformCaps_EnablePlatformPowerManagement);
 			}
@@ -327,7 +326,7 @@ static int get_valid_clk(
 	table_size = sizeof(uint32_t) +
 		sizeof(uint32_t) * clk_volt_pp_table->count;
 
-	table = (struct phm_clock_array *)kzalloc(table_size, GFP_KERNEL);
+	table = kzalloc(table_size, GFP_KERNEL);
 
 	if (NULL == table)
 		return -ENOMEM;
@@ -377,8 +376,7 @@ static int get_mclk_voltage_dependency_table(
 	table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record)
 		* mclk_dep_table->ucNumEntries;
 
-	mclk_table = (phm_ppt_v1_clock_voltage_dependency_table *)
-		kzalloc(table_size, GFP_KERNEL);
+	mclk_table = kzalloc(table_size, GFP_KERNEL);
 
 	if (NULL == mclk_table)
 		return -ENOMEM;
@@ -408,41 +406,76 @@ static int get_mclk_voltage_dependency_table(
 static int get_sclk_voltage_dependency_table(
 		struct pp_hwmgr *hwmgr,
 		phm_ppt_v1_clock_voltage_dependency_table **pp_tonga_sclk_dep_table,
-		const ATOM_Tonga_SCLK_Dependency_Table * sclk_dep_table
+		const PPTable_Generic_SubTable_Header *sclk_dep_table
 		)
 {
 	uint32_t table_size, i;
 	phm_ppt_v1_clock_voltage_dependency_table *sclk_table;
 
-	PP_ASSERT_WITH_CODE((0 != sclk_dep_table->ucNumEntries),
-		"Invalid PowerPlay Table!", return -1);
+	if (sclk_dep_table->ucRevId < 1) {
+		const ATOM_Tonga_SCLK_Dependency_Table *tonga_table =
+			    (ATOM_Tonga_SCLK_Dependency_Table *)sclk_dep_table;
 
-	table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record)
-		* sclk_dep_table->ucNumEntries;
+		PP_ASSERT_WITH_CODE((0 != tonga_table->ucNumEntries),
+			"Invalid PowerPlay Table!", return -1);
 
-	sclk_table = (phm_ppt_v1_clock_voltage_dependency_table *)
-		kzalloc(table_size, GFP_KERNEL);
+		table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record)
+			* tonga_table->ucNumEntries;
 
-	if (NULL == sclk_table)
-		return -ENOMEM;
+		sclk_table = kzalloc(table_size, GFP_KERNEL);
 
-	memset(sclk_table, 0x00, table_size);
-
-	sclk_table->count = (uint32_t)sclk_dep_table->ucNumEntries;
-
-	for (i = 0; i < sclk_dep_table->ucNumEntries; i++) {
-		sclk_table->entries[i].vddInd =
-			sclk_dep_table->entries[i].ucVddInd;
-		sclk_table->entries[i].vdd_offset =
-			sclk_dep_table->entries[i].usVddcOffset;
-		sclk_table->entries[i].clk =
-			sclk_dep_table->entries[i].ulSclk;
-		sclk_table->entries[i].cks_enable =
-			(((sclk_dep_table->entries[i].ucCKSVOffsetandDisable & 0x80) >> 7) == 0) ? 1 : 0;
-		sclk_table->entries[i].cks_voffset =
-			(sclk_dep_table->entries[i].ucCKSVOffsetandDisable & 0x7F);
-	}
+		if (NULL == sclk_table)
+			return -ENOMEM;
+
+		memset(sclk_table, 0x00, table_size);
+
+		sclk_table->count = (uint32_t)tonga_table->ucNumEntries;
+
+		for (i = 0; i < tonga_table->ucNumEntries; i++) {
+			sclk_table->entries[i].vddInd =
+				tonga_table->entries[i].ucVddInd;
+			sclk_table->entries[i].vdd_offset =
+				tonga_table->entries[i].usVddcOffset;
+			sclk_table->entries[i].clk =
+				tonga_table->entries[i].ulSclk;
+			sclk_table->entries[i].cks_enable =
+				(((tonga_table->entries[i].ucCKSVOffsetandDisable & 0x80) >> 7) == 0) ? 1 : 0;
+			sclk_table->entries[i].cks_voffset =
+				(tonga_table->entries[i].ucCKSVOffsetandDisable & 0x7F);
+		}
+	} else {
+		const ATOM_Polaris_SCLK_Dependency_Table *polaris_table =
+			    (ATOM_Polaris_SCLK_Dependency_Table *)sclk_dep_table;
+
+		PP_ASSERT_WITH_CODE((0 != polaris_table->ucNumEntries),
+			"Invalid PowerPlay Table!", return -1);
+
+		table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record)
+			* polaris_table->ucNumEntries;
 
+		sclk_table = kzalloc(table_size, GFP_KERNEL);
+
+		if (NULL == sclk_table)
+			return -ENOMEM;
+
+		memset(sclk_table, 0x00, table_size);
+
+		sclk_table->count = (uint32_t)polaris_table->ucNumEntries;
+
+		for (i = 0; i < polaris_table->ucNumEntries; i++) {
+			sclk_table->entries[i].vddInd =
+				polaris_table->entries[i].ucVddInd;
+			sclk_table->entries[i].vdd_offset =
+				polaris_table->entries[i].usVddcOffset;
+			sclk_table->entries[i].clk =
+				polaris_table->entries[i].ulSclk;
+			sclk_table->entries[i].cks_enable =
+				(((polaris_table->entries[i].ucCKSVOffsetandDisable & 0x80) >> 7) == 0) ? 1 : 0;
+			sclk_table->entries[i].cks_voffset =
+				(polaris_table->entries[i].ucCKSVOffsetandDisable & 0x7F);
+			sclk_table->entries[i].sclk_offset = polaris_table->entries[i].ulSclkOffset;
+		}
+	}
 	*pp_tonga_sclk_dep_table = sclk_table;
 
 	return 0;
@@ -467,7 +500,7 @@ static int get_pcie_table(
 		table_size = sizeof(uint32_t) +
 			sizeof(phm_ppt_v1_pcie_record) * atom_pcie_table->ucNumEntries;
 
-		pcie_table = (phm_ppt_v1_pcie_table *)kzalloc(table_size, GFP_KERNEL);
+		pcie_table = kzalloc(table_size, GFP_KERNEL);
 
 		if (pcie_table == NULL)
 			return -ENOMEM;
@@ -504,7 +537,7 @@ static int get_pcie_table(
 		table_size = sizeof(uint32_t) +
 			sizeof(phm_ppt_v1_pcie_record) * atom_pcie_table->ucNumEntries;
 
-		pcie_table = (phm_ppt_v1_pcie_table *)kzalloc(table_size, GFP_KERNEL);
+		pcie_table = kzalloc(table_size, GFP_KERNEL);
 
 		if (pcie_table == NULL)
 			return -ENOMEM;
@@ -658,8 +691,7 @@ static int get_mm_clock_voltage_table(
 	table_size = sizeof(uint32_t) +
 		sizeof(phm_ppt_v1_mm_clock_voltage_dependency_record)
 		* mm_dependency_table->ucNumEntries;
-	mm_table = (phm_ppt_v1_mm_clock_voltage_dependency_table *)
-		kzalloc(table_size, GFP_KERNEL);
+	mm_table = kzalloc(table_size, GFP_KERNEL);
 
 	if (NULL == mm_table)
 		return -ENOMEM;
@@ -708,8 +740,8 @@ static int init_clock_voltage_dependency(
 	const ATOM_Tonga_MCLK_Dependency_Table *mclk_dep_table =
 		(const ATOM_Tonga_MCLK_Dependency_Table *)(((unsigned long) powerplay_table) +
 		le16_to_cpu(powerplay_table->usMclkDependencyTableOffset));
-	const ATOM_Tonga_SCLK_Dependency_Table *sclk_dep_table =
-		(const ATOM_Tonga_SCLK_Dependency_Table *)(((unsigned long) powerplay_table) +
+	const PPTable_Generic_SubTable_Header *sclk_dep_table =
+		(const PPTable_Generic_SubTable_Header *)(((unsigned long) powerplay_table) +
 		le16_to_cpu(powerplay_table->usSclkDependencyTableOffset));
 	const ATOM_Tonga_Hard_Limit_Table *pHardLimits =
 		(const ATOM_Tonga_Hard_Limit_Table *)(((unsigned long) powerplay_table) +
@@ -1036,54 +1068,46 @@ int tonga_pp_tables_initialize(struct pp_hwmgr *hwmgr)
 
 int tonga_pp_tables_uninitialize(struct pp_hwmgr *hwmgr)
 {
-	int result = 0;
 	struct phm_ppt_v1_information *pp_table_information =
 		(struct phm_ppt_v1_information *)(hwmgr->pptable);
 
-	if (NULL != hwmgr->soft_pp_table) {
-		kfree(hwmgr->soft_pp_table);
-		hwmgr->soft_pp_table = NULL;
-	}
-
-	if (NULL != pp_table_information->vdd_dep_on_sclk)
-		pp_table_information->vdd_dep_on_sclk = NULL;
+	kfree(pp_table_information->vdd_dep_on_sclk);
+	pp_table_information->vdd_dep_on_sclk = NULL;
 
-	if (NULL != pp_table_information->vdd_dep_on_mclk)
-		pp_table_information->vdd_dep_on_mclk = NULL;
+	kfree(pp_table_information->vdd_dep_on_mclk);
+	pp_table_information->vdd_dep_on_mclk = NULL;
 
-	if (NULL != pp_table_information->valid_mclk_values)
-		pp_table_information->valid_mclk_values = NULL;
+	kfree(pp_table_information->valid_mclk_values);
+	pp_table_information->valid_mclk_values = NULL;
 
-	if (NULL != pp_table_information->valid_sclk_values)
-		pp_table_information->valid_sclk_values = NULL;
+	kfree(pp_table_information->valid_sclk_values);
+	pp_table_information->valid_sclk_values = NULL;
 
-	if (NULL != pp_table_information->vddc_lookup_table)
-		pp_table_information->vddc_lookup_table = NULL;
+	kfree(pp_table_information->vddc_lookup_table);
+	pp_table_information->vddc_lookup_table = NULL;
 
-	if (NULL != pp_table_information->vddgfx_lookup_table)
-		pp_table_information->vddgfx_lookup_table = NULL;
+	kfree(pp_table_information->vddgfx_lookup_table);
+	pp_table_information->vddgfx_lookup_table = NULL;
 
-	if (NULL != pp_table_information->mm_dep_table)
-		pp_table_information->mm_dep_table = NULL;
+	kfree(pp_table_information->mm_dep_table);
+	pp_table_information->mm_dep_table = NULL;
 
-	if (NULL != pp_table_information->cac_dtp_table)
-		pp_table_information->cac_dtp_table = NULL;
+	kfree(pp_table_information->cac_dtp_table);
+	pp_table_information->cac_dtp_table = NULL;
 
-	if (NULL != hwmgr->dyn_state.cac_dtp_table)
-		hwmgr->dyn_state.cac_dtp_table = NULL;
+	kfree(hwmgr->dyn_state.cac_dtp_table);
+	hwmgr->dyn_state.cac_dtp_table = NULL;
 
-	if (NULL != pp_table_information->ppm_parameter_table)
-		pp_table_information->ppm_parameter_table = NULL;
+	kfree(pp_table_information->ppm_parameter_table);
+	pp_table_information->ppm_parameter_table = NULL;
 
-	if (NULL != pp_table_information->pcie_table)
-		pp_table_information->pcie_table = NULL;
+	kfree(pp_table_information->pcie_table);
+	pp_table_information->pcie_table = NULL;
 
-	if (NULL != hwmgr->pptable) {
-		kfree(hwmgr->pptable);
-		hwmgr->pptable = NULL;
-	}
+	kfree(hwmgr->pptable);
+	hwmgr->pptable = NULL;
 
-	return result;
+	return 0;
 }
 
 const struct pp_table_func tonga_pptable_funcs = {
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
index 50b367d44307..b764c8c05ec8 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
@@ -132,6 +132,7 @@ struct amd_pp_init {
 	uint32_t chip_family;
 	uint32_t chip_id;
 	uint32_t rev_id;
+	bool powercontainment_enabled;
 };
 enum amd_pp_display_config_type{
 	AMD_PP_DisplayConfigType_None = 0,
@@ -342,6 +343,10 @@ struct amd_powerplay_funcs {
 	int (*set_pp_table)(void *handle, const char *buf, size_t size);
 	int (*force_clock_level)(void *handle, enum pp_clock_type type, uint32_t mask);
 	int (*print_clock_levels)(void *handle, enum pp_clock_type type, char *buf);
+	int (*get_sclk_od)(void *handle);
+	int (*set_sclk_od)(void *handle, uint32_t value);
+	int (*get_mclk_od)(void *handle);
+	int (*set_mclk_od)(void *handle, uint32_t value);
 };
 
 struct amd_powerplay {
@@ -355,6 +360,8 @@ int amd_powerplay_init(struct amd_pp_init *pp_init,
 
 int amd_powerplay_fini(void *handle);
 
+int amd_powerplay_reset(void *handle);
+
 int amd_powerplay_display_configuration_change(void *handle,
 		const struct amd_pp_display_configuration *input);
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
index 56f712c7d07a..962cb5385951 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
@@ -340,6 +340,7 @@ extern int phm_powergate_vce(struct pp_hwmgr *hwmgr, bool gate);
 extern int phm_powerdown_uvd(struct pp_hwmgr *hwmgr);
 extern int phm_setup_asic(struct pp_hwmgr *hwmgr);
 extern int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr);
+extern int phm_disable_dynamic_state_management(struct pp_hwmgr *hwmgr);
 extern void phm_init_dynamic_caps(struct pp_hwmgr *hwmgr);
 extern bool phm_is_hw_access_blocked(struct pp_hwmgr *hwmgr);
 extern int phm_block_hw_access(struct pp_hwmgr *hwmgr, bool block);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 28f571449495..bf0d2accf7bf 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -278,6 +278,8 @@ struct pp_hwmgr_func {
 
 	int (*dynamic_state_management_enable)(
 						struct pp_hwmgr *hw_mgr);
+	int (*dynamic_state_management_disable)(
+						struct pp_hwmgr *hw_mgr);
 
 	int (*patch_boot_state)(struct pp_hwmgr *hwmgr,
 				     struct pp_hw_power_state *hw_ps);
@@ -333,11 +335,13 @@ struct pp_hwmgr_func {
 	int (*get_clock_by_type)(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks);
 	int (*get_max_high_clocks)(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks);
 	int (*power_off_asic)(struct pp_hwmgr *hwmgr);
-	int (*get_pp_table)(struct pp_hwmgr *hwmgr, char **table);
-	int (*set_pp_table)(struct pp_hwmgr *hwmgr, const char *buf, size_t size);
 	int (*force_clock_level)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask);
 	int (*print_clock_levels)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf);
 	int (*enable_per_cu_power_gating)(struct pp_hwmgr *hwmgr, bool enable);
+	int (*get_sclk_od)(struct pp_hwmgr *hwmgr);
+	int (*set_sclk_od)(struct pp_hwmgr *hwmgr, uint32_t value);
+	int (*get_mclk_od)(struct pp_hwmgr *hwmgr);
+	int (*set_mclk_od)(struct pp_hwmgr *hwmgr, uint32_t value);
 };
 
 struct pp_table_func {
@@ -411,6 +415,8 @@ struct phm_cac_tdp_table {
 	uint8_t  ucVr_I2C_Line;
 	uint8_t  ucPlx_I2C_address;
 	uint8_t  ucPlx_I2C_Line;
+	uint32_t usBoostPowerLimit;
+	uint8_t  ucCKS_LDO_REFSEL;
 };
 
 struct phm_ppm_table {
@@ -578,6 +584,7 @@ struct pp_hwmgr {
 	struct pp_smumgr *smumgr;
 	const void *soft_pp_table;
 	uint32_t soft_pp_table_size;
+	void *hardcode_pp_table;
 	bool need_pp_table_upload;
 	enum amd_dpm_forced_level dpm_level;
 	bool block_hw_access;
@@ -607,6 +614,7 @@ struct pp_hwmgr {
 	uint32_t num_ps;
 	struct pp_thermal_controller_info thermal_controller;
 	bool fan_ctrl_is_in_default_mode;
+	bool powercontainment_enabled;
 	uint32_t fan_ctrl_default_mode;
 	uint32_t tmin;
 	struct phm_microcode_version_info microcode_version_info;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
index 0c6a413eaa5b..b8f4b73c322e 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
@@ -27,6 +27,7 @@
 
 #pragma pack(push, 1)
 
+#define PPSMC_MSG_SetGBDroopSettings          ((uint16_t) 0x305)
 
 #define PPSMC_SWSTATE_FLAG_DC                           0x01
 #define PPSMC_SWSTATE_FLAG_UVD                          0x02
@@ -391,6 +392,8 @@ typedef uint16_t PPSMC_Result;
 #define PPSMC_MSG_SetGpuPllDfsForSclk         ((uint16_t) 0x300)
 #define PPSMC_MSG_Didt_Block_Function		  ((uint16_t) 0x301)
 
+#define PPSMC_MSG_SetVBITimeout               ((uint16_t) 0x306)
+
 #define PPSMC_MSG_SecureSRBMWrite             ((uint16_t) 0x600)
 #define PPSMC_MSG_SecureSRBMRead              ((uint16_t) 0x601)
 #define PPSMC_MSG_SetAddress                  ((uint16_t) 0x800)
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h b/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
index 3bd5e69b9045..3df5de2cdab0 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
@@ -26,3 +26,4 @@ extern bool acpi_atcs_functions_supported(void *device,
 extern int acpi_pcie_perf_request(void *device,
 						uint8_t perf_req,
 						bool advertise);
+extern bool acpi_atcs_notify_pcie_device_ready(void *device);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74.h b/drivers/gpu/drm/amd/powerplay/inc/smu74.h
index 1a12d85b8e97..fd10a9fa843d 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu74.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu74.h
@@ -34,6 +34,30 @@
 #define SMU__NUM_LCLK_DPM_LEVELS 8
 #define SMU__NUM_PCIE_DPM_LEVELS 8
 
+#define EXP_M1  35
+#define EXP_M2  92821
+#define EXP_B   66629747
+
+#define EXP_M1_1  365
+#define EXP_M2_1  658700
+#define EXP_B_1   305506134
+
+#define EXP_M1_2  189
+#define EXP_M2_2  379692
+#define EXP_B_2   194609469
+
+#define EXP_M1_3  99
+#define EXP_M2_3  217915
+#define EXP_B_3   122255994
+
+#define EXP_M1_4  51
+#define EXP_M2_4  122643
+#define EXP_B_4   74893384
+
+#define EXP_M1_5  423
+#define EXP_M2_5  1103326
+#define EXP_B_5   728122621
+
 enum SID_OPTION {
 	SID_OPTION_HI,
 	SID_OPTION_LO,
@@ -548,20 +572,20 @@ struct SMU74_Firmware_Header {
 	uint32_t CacConfigTable;
 	uint32_t CacStatusTable;
 
-
 	uint32_t mcRegisterTable;
 
-
 	uint32_t mcArbDramTimingTable;
 
-
-
-
 	uint32_t PmFuseTable;
 	uint32_t Globals;
 	uint32_t ClockStretcherTable;
 	uint32_t VftTable;
-	uint32_t Reserved[21];
+	uint32_t Reserved1;
+	uint32_t AvfsTable;
+	uint32_t AvfsCksOffGbvTable;
+	uint32_t AvfsMeanNSigma;
+	uint32_t AvfsSclkOffsetTable;
+	uint32_t Reserved[16];
 	uint32_t Signature;
 };
 
@@ -701,8 +725,6 @@ VR Config info is contained in dpmTable.VRConfig */
 struct SMU_ClockStretcherDataTableEntry {
 	uint8_t minVID;
 	uint8_t maxVID;
-
-
 	uint16_t setting;
 };
 typedef struct SMU_ClockStretcherDataTableEntry SMU_ClockStretcherDataTableEntry;
@@ -769,6 +791,43 @@ struct VFT_TABLE_t {
 typedef struct VFT_TABLE_t VFT_TABLE_t;
 
 
+/* Total margin, root mean square of Fmax + DC + Platform */
+struct AVFS_Margin_t {
+	VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_Margin_t AVFS_Margin_t;
+
+#define BTCGB_VDROOP_TABLE_MAX_ENTRIES 2
+#define AVFSGB_VDROOP_TABLE_MAX_ENTRIES 2
+
+struct GB_VDROOP_TABLE_t {
+	int32_t a0;
+	int32_t a1;
+	int32_t a2;
+	uint32_t spare;
+};
+typedef struct GB_VDROOP_TABLE_t GB_VDROOP_TABLE_t;
+
+struct AVFS_CksOff_Gbv_t {
+	VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_CksOff_Gbv_t AVFS_CksOff_Gbv_t;
+
+struct AVFS_meanNsigma_t {
+	uint32_t Aconstant[3];
+	uint16_t DC_tol_sigma;
+	uint16_t Platform_mean;
+	uint16_t Platform_sigma;
+	uint16_t PSM_Age_CompFactor;
+	uint8_t  Static_Voltage_Offset[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_meanNsigma_t AVFS_meanNsigma_t;
+
+struct AVFS_Sclk_Offset_t {
+	uint16_t Sclk_Offset[8];
+};
+typedef struct AVFS_Sclk_Offset_t AVFS_Sclk_Offset_t;
+
 #endif
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
index 0dfe82336dc7..899d6d8108c2 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
@@ -223,6 +223,16 @@ struct SMU74_Discrete_StateInfo {
 
 typedef struct SMU74_Discrete_StateInfo SMU74_Discrete_StateInfo;
 
+struct SMU_QuadraticCoeffs {
+	int32_t m1;
+	uint32_t b;
+
+	int16_t m2;
+	uint8_t m1_shift;
+	uint8_t m2_shift;
+};
+typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs;
+
 struct SMU74_Discrete_DpmTable {
 
 	SMU74_PIDController                  GraphicsPIDController;
@@ -258,7 +268,15 @@ struct SMU74_Discrete_DpmTable {
 	uint8_t                             ThermOutPolarity;
 	uint8_t                             ThermOutMode;
 	uint8_t                             BootPhases;
-	uint32_t                            Reserved[4];
+
+	uint8_t                             VRHotLevel;
+	uint8_t                             LdoRefSel;
+	uint8_t                             Reserved1[2];
+	uint16_t                            FanStartTemperature;
+	uint16_t                            FanStopTemperature;
+	uint16_t                            MaxVoltage;
+	uint16_t                            Reserved2;
+	uint32_t                            Reserved[1];
 
 	SMU74_Discrete_GraphicsLevel        GraphicsLevel[SMU74_MAX_LEVELS_GRAPHICS];
 	SMU74_Discrete_MemoryLevel          MemoryACPILevel;
@@ -347,6 +365,8 @@ struct SMU74_Discrete_DpmTable {
 
 	uint32_t                            CurrSclkPllRange;
 	sclkFcwRange_t                      SclkFcwRangeTable[NUM_SCLK_RANGE];
+	GB_VDROOP_TABLE_t                   BTCGB_VDROOP_TABLE[BTCGB_VDROOP_TABLE_MAX_ENTRIES];
+	SMU_QuadraticCoeffs                 AVFSGB_VDROOP_TABLE[AVFSGB_VDROOP_TABLE_MAX_ENTRIES];
 };
 
 typedef struct SMU74_Discrete_DpmTable SMU74_Discrete_DpmTable;
@@ -550,16 +570,6 @@ struct SMU7_AcpiScoreboard {
 
 typedef struct SMU7_AcpiScoreboard SMU7_AcpiScoreboard;
 
-struct SMU_QuadraticCoeffs {
-	int32_t m1;
-	uint32_t b;
-
-	int16_t m2;
-	uint8_t m1_shift;
-	uint8_t m2_shift;
-};
-typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs;
-
 struct SMU74_Discrete_PmFuses {
 	uint8_t BapmVddCVidHiSidd[8];
 	uint8_t BapmVddCVidLoSidd[8];
@@ -821,6 +831,17 @@ typedef struct SMU7_GfxCuPgScoreboard SMU7_GfxCuPgScoreboard;
 #define DB_PCC_SHIFT 26 
 #define DB_EDC_SHIFT 27
 
+#define BTCGB0_Vdroop_Enable_MASK  0x1
+#define BTCGB1_Vdroop_Enable_MASK  0x2
+#define AVFSGB0_Vdroop_Enable_MASK 0x4
+#define AVFSGB1_Vdroop_Enable_MASK 0x8
+
+#define BTCGB0_Vdroop_Enable_SHIFT  0
+#define BTCGB1_Vdroop_Enable_SHIFT  1
+#define AVFSGB0_Vdroop_Enable_SHIFT 2
+#define AVFSGB1_Vdroop_Enable_SHIFT 3
+
+
 #pragma pack(pop)
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
index fc9e3d1dd409..3c235f0177cd 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
@@ -131,6 +131,12 @@ extern int smu_free_memory(void *device, void *handle);
 	smum_wait_on_indirect_register(smumgr,				\
 				mm##port##_INDEX, index, value, mask)
 
+#define SMUM_WAIT_INDIRECT_REGISTER(smumgr, port, reg, value, mask)    \
+	    SMUM_WAIT_INDIRECT_REGISTER_GIVEN_INDEX(smumgr, port, ix##reg, value, mask)
+
+#define SMUM_WAIT_INDIRECT_FIELD(smumgr, port, reg, field, fieldval)                          \
+	    SMUM_WAIT_INDIRECT_REGISTER(smumgr, port, reg, (fieldval) << SMUM_FIELD_SHIFT(reg, field), \
+			            SMUM_FIELD_MASK(reg, field) )
 
 #define SMUM_WAIT_REGISTER_UNEQUAL_GIVEN_INDEX(smumgr,         \
 							index, value, mask) \
@@ -158,6 +164,10 @@ extern int smu_free_memory(void *device, void *handle);
 		(SMUM_FIELD_MASK(reg, field) & ((field_val) <<                 \
 			SMUM_FIELD_SHIFT(reg, field))))
 
+#define SMUM_READ_INDIRECT_FIELD(device, port, reg, field) \
+	    SMUM_GET_FIELD(cgs_read_ind_register(device, port, ix##reg), \
+			   reg, field)
+
 #define SMUM_WAIT_VFPF_INDIRECT_REGISTER_GIVEN_INDEX(smumgr,		\
 				port, index, value, mask)		\
 	smum_wait_on_indirect_register(smumgr,				\
@@ -191,6 +201,13 @@ extern int smu_free_memory(void *device, void *handle);
 			SMUM_SET_FIELD(cgs_read_ind_register(device, port, ix##reg), \
 			reg, field, fieldval))
 
+
+#define SMUM_WRITE_INDIRECT_FIELD(device, port, reg, field, fieldval)    		\
+		cgs_write_ind_register(device, port, ix##reg, 				\
+			SMUM_SET_FIELD(cgs_read_ind_register(device, port, ix##reg), 	\
+				       reg, field, fieldval))
+
+
 #define SMUM_WAIT_VFPF_INDIRECT_FIELD(smumgr, port, reg, field, fieldval) \
 	SMUM_WAIT_VFPF_INDIRECT_REGISTER(smumgr, port, reg,		\
 		(fieldval) << SMUM_FIELD_SHIFT(reg, field),		\
@@ -200,4 +217,16 @@ extern int smu_free_memory(void *device, void *handle);
 	SMUM_WAIT_VFPF_INDIRECT_REGISTER_UNEQUAL(smumgr, port, reg,	\
 		(fieldval) << SMUM_FIELD_SHIFT(reg, field),		\
 		SMUM_FIELD_MASK(reg, field))
+
+#define SMUM_WAIT_INDIRECT_REGISTER_UNEQUAL_GIVEN_INDEX(smumgr, port, index, value, mask)    \
+	smum_wait_for_indirect_register_unequal(smumgr,			\
+		mm##port##_INDEX, index, value, mask)
+
+#define SMUM_WAIT_INDIRECT_REGISTER_UNEQUAL(smumgr, port, reg, value, mask)    \
+	    SMUM_WAIT_INDIRECT_REGISTER_UNEQUAL_GIVEN_INDEX(smumgr, port, ix##reg, value, mask)
+
+#define SMUM_WAIT_INDIRECT_FIELD_UNEQUAL(smumgr, port, reg, field, fieldval)                          \
+	    SMUM_WAIT_INDIRECT_REGISTER_UNEQUAL(smumgr, port, reg, (fieldval) << SMUM_FIELD_SHIFT(reg, field), \
+			            SMUM_FIELD_MASK(reg, field) )
+
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
index 673a75c74e18..8e52a2e82db5 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
@@ -1006,10 +1006,16 @@ static int fiji_smu_init(struct pp_smumgr *smumgr)
 
 static int fiji_smu_fini(struct pp_smumgr *smumgr)
 {
+	struct fiji_smumgr *priv = (struct fiji_smumgr *)(smumgr->backend);
+
+	smu_free_memory(smumgr->device, (void *)priv->header_buffer.handle);
+
 	if (smumgr->backend) {
 		kfree(smumgr->backend);
 		smumgr->backend = NULL;
 	}
+
+	cgs_rel_firmware(smumgr->device, CGS_UCODE_ID_SMU);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index de618ead9db8..5dba7c509710 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -52,19 +52,18 @@
 static const SMU74_Discrete_GraphicsLevel avfs_graphics_level_polaris10[8] = {
 	/*  Min      pcie   DeepSleep Activity  CgSpll      CgSpll    CcPwr  CcPwr  Sclk         Enabled      Enabled                       Voltage    Power */
 	/* Voltage, DpmLevel, DivId,  Level,  FuncCntl3,  FuncCntl4,  DynRm, DynRm1 Did, Padding,ForActivity, ForThrottle, UpHyst, DownHyst, DownHyst, Throttle */
-	{ 0x3c0fd047, 0x00, 0x03, 0x1e00, 0x00200410, 0x87020000, 0, 0, 0x16, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x30750000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xa00fd047, 0x01, 0x04, 0x1e00, 0x00800510, 0x87020000, 0, 0, 0x16, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x409c0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x0410d047, 0x01, 0x00, 0x1e00, 0x00600410, 0x87020000, 0, 0, 0x0e, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x50c30000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x6810d047, 0x01, 0x00, 0x1e00, 0x00800410, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x60ea0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xcc10d047, 0x01, 0x00, 0x1e00, 0x00e00410, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0xe8fd0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x3011d047, 0x01, 0x00, 0x1e00, 0x00400510, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x70110100, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x9411d047, 0x01, 0x00, 0x1e00, 0x00a00510, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0xf8240100, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xf811d047, 0x01, 0x00, 0x1e00, 0x00000610, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x80380100, 0, 0, 0, 0, 0, 0, 0 } }
+	{ 0x100ea446, 0x00, 0x03, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x30750000, 0x3000, 0, 0x2600, 0, 0, 0x0004, 0x8f02, 0xffff, 0x2f00, 0x300e, 0x2700 } },
+	{ 0x400ea446, 0x01, 0x04, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x409c0000, 0x2000, 0, 0x1e00, 1, 1, 0x0004, 0x8300, 0xffff, 0x1f00, 0xcb5e, 0x1a00 } },
+	{ 0x740ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x50c30000, 0x2800, 0, 0x2000, 1, 1, 0x0004, 0x0c02, 0xffff, 0x2700, 0x6433, 0x2100 } },
+	{ 0xa40ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x60ea0000, 0x3000, 0, 0x2600, 1, 1, 0x0004, 0x8f02, 0xffff, 0x2f00, 0x300e, 0x2700 } },
+	{ 0xd80ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x70110100, 0x3800, 0, 0x2c00, 1, 1, 0x0004, 0x1203, 0xffff, 0x3600, 0xc9e2, 0x2e00 } },
+	{ 0x3c0fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x80380100, 0x2000, 0, 0x1e00, 2, 1, 0x0004, 0x8300, 0xffff, 0x1f00, 0xcb5e, 0x1a00 } },
+	{ 0x6c0fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x905f0100, 0x2400, 0, 0x1e00, 2, 1, 0x0004, 0x8901, 0xffff, 0x2300, 0x314c, 0x1d00 } },
+	{ 0xa00fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0xa0860100, 0x2800, 0, 0x2000, 2, 1, 0x0004, 0x0c02, 0xffff, 0x2700, 0x6433, 0x2100 } }
 };
 
 static const SMU74_Discrete_MemoryLevel avfs_memory_level_polaris10 =
-	{0x50140000, 0x50140000, 0x00320000, 0x00, 0x00,
-	 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x0000, 0x00, 0x00};
+	{0x100ea446, 0, 0x30750000, 0x01, 0x01, 0x01, 0x00, 0x00, 0x64, 0x00, 0x00, 0x1f00, 0x00, 0x00};
 
 /**
 * Set the address for reading/writing the SMC SRAM space.
@@ -219,6 +218,18 @@ bool polaris10_is_smc_ram_running(struct pp_smumgr *smumgr)
 	&& (0x20100 <= cgs_read_ind_register(smumgr->device, CGS_IND_REG__SMC, ixSMC_PC_C)));
 }
 
+static bool polaris10_is_hw_avfs_present(struct pp_smumgr *smumgr)
+{
+	uint32_t efuse;
+
+	efuse = cgs_read_ind_register(smumgr->device, CGS_IND_REG__SMC, ixSMU_EFUSE_0 + (49*4));
+	efuse &= 0x00000001;
+	if (efuse)
+		return true;
+
+	return false;
+}
+
 /**
 * Send a message to the SMC, and wait for its response.
 *
@@ -228,21 +239,27 @@ bool polaris10_is_smc_ram_running(struct pp_smumgr *smumgr)
 */
 int polaris10_send_msg_to_smc(struct pp_smumgr *smumgr, uint16_t msg)
 {
+	int ret;
+
 	if (!polaris10_is_smc_ram_running(smumgr))
 		return -1;
 
+
 	SMUM_WAIT_FIELD_UNEQUAL(smumgr, SMC_RESP_0, SMC_RESP, 0);
 
-	if (1 != SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP))
-		printk("Failed to send Previous Message.\n");
+	ret = SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP);
 
+	if (ret != 1)
+		printk("\n failed to send pre message %x ret is %d \n",  msg, ret);
 
 	cgs_write_register(smumgr->device, mmSMC_MESSAGE_0, msg);
 
 	SMUM_WAIT_FIELD_UNEQUAL(smumgr, SMC_RESP_0, SMC_RESP, 0);
 
-	if (1 != SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP))
-		printk("Failed to send Message.\n");
+	ret = SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP);
+
+	if (ret != 1)
+		printk("\n failed to send message %x ret is %d \n",  msg, ret);
 
 	return 0;
 }
@@ -469,6 +486,7 @@ int polaris10_smu_fini(struct pp_smumgr *smumgr)
 		kfree(smumgr->backend);
 		smumgr->backend = NULL;
 	}
+	cgs_rel_firmware(smumgr->device, CGS_UCODE_ID_SMU);
 	return 0;
 }
 
@@ -952,6 +970,11 @@ static int polaris10_smu_init(struct pp_smumgr *smumgr)
 		(cgs_handle_t)smu_data->smu_buffer.handle);
 		return -1;);
 
+	if (polaris10_is_hw_avfs_present(smumgr))
+		smu_data->avfs.avfs_btc_status = AVFS_BTC_BOOT;
+	else
+		smu_data->avfs.avfs_btc_status = AVFS_BTC_NOTSUPPORTED;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
index c483baf6b4fb..7723473e51a0 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <drm/amdgpu_drm.h>
 #include "pp_instance.h"
 #include "smumgr.h"
 #include "cgs_common.h"
@@ -52,10 +53,10 @@ int smum_init(struct amd_pp_init *pp_init, struct pp_instance *handle)
 	handle->smu_mgr = smumgr;
 
 	switch (smumgr->chip_family) {
-	case AMD_FAMILY_CZ:
+	case AMDGPU_FAMILY_CZ:
 		cz_smum_init(smumgr);
 		break;
-	case AMD_FAMILY_VI:
+	case AMDGPU_FAMILY_VI:
 		switch (smumgr->chip_id) {
 		case CHIP_TONGA:
 			tonga_smum_init(smumgr);
@@ -81,6 +82,7 @@ int smum_init(struct amd_pp_init *pp_init, struct pp_instance *handle)
 
 int smum_fini(struct pp_smumgr *smumgr)
 {
+	kfree(smumgr->device);
 	kfree(smumgr);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
index 32820b680d88..f42c536b3af1 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
@@ -328,10 +328,17 @@ int tonga_write_smc_sram_dword(struct pp_smumgr *smumgr,
 
 static int tonga_smu_fini(struct pp_smumgr *smumgr)
 {
+	struct tonga_smumgr *priv = (struct tonga_smumgr *)(smumgr->backend);
+
+	smu_free_memory(smumgr->device, (void *)priv->smu_buffer.handle);
+	smu_free_memory(smumgr->device, (void *)priv->header_buffer.handle);
+
 	if (smumgr->backend != NULL) {
 		kfree(smumgr->backend);
 		smumgr->backend = NULL;
 	}
+
+	cgs_rel_firmware(smumgr->device, CGS_UCODE_ID_SMU);
 	return 0;
 }
 
@@ -472,7 +479,6 @@ static int tonga_request_smu_reload_fw(struct pp_smumgr *smumgr)
 	struct tonga_smumgr *tonga_smu =
 		(struct tonga_smumgr *)(smumgr->backend);
 	uint16_t fw_to_load;
-	int result = 0;
 	struct SMU_DRAMData_TOC *toc;
 	/**
 	 * First time this gets called during SmuMgr init,
@@ -556,7 +562,7 @@ static int tonga_request_smu_reload_fw(struct pp_smumgr *smumgr)
 		smumgr, PPSMC_MSG_LoadUcodes, fw_to_load),
 		"Fail to Request SMU Load uCode", return 0);
 
-	return result;
+	return 0;
 }
 
 static int tonga_request_smu_load_specific_fw(struct pp_smumgr *smumgr,
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
index c89dc777768f..b961a1c6caf3 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
@@ -26,7 +26,7 @@ TRACE_EVENT(amd_sched_job,
 	    TP_fast_assign(
 			   __entry->entity = sched_job->s_entity;
 			   __entry->sched_job = sched_job;
-			   __entry->fence = &sched_job->s_fence->base;
+			   __entry->fence = &sched_job->s_fence->finished;
 			   __entry->name = sched_job->sched->name;
 			   __entry->job_count = kfifo_len(
 				   &sched_job->s_entity->job_queue) / sizeof(sched_job);
@@ -46,7 +46,7 @@ TRACE_EVENT(amd_sched_process_job,
 		    ),
 
 	    TP_fast_assign(
-		    __entry->fence = &fence->base;
+		    __entry->fence = &fence->finished;
 		    ),
 	    TP_printk("fence=%p signaled", __entry->fence)
 );
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index c16248cee779..ef312bb75fda 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -32,6 +32,7 @@
 
 static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);
 static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
+static void amd_sched_process_job(struct fence *f, struct fence_cb *cb);
 
 struct kmem_cache *sched_fence_slab;
 atomic_t sched_fence_slab_ref = ATOMIC_INIT(0);
@@ -140,7 +141,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
 		return r;
 
 	atomic_set(&entity->fence_seq, 0);
-	entity->fence_context = fence_context_alloc(1);
+	entity->fence_context = fence_context_alloc(2);
 
 	return 0;
 }
@@ -251,17 +252,21 @@ static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
 
 	s_fence = to_amd_sched_fence(fence);
 	if (s_fence && s_fence->sched == sched) {
-		/* Fence is from the same scheduler */
-		if (test_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &fence->flags)) {
-			/* Ignore it when it is already scheduled */
-			fence_put(entity->dependency);
-			return false;
-		}
 
-		/* Wait for fence to be scheduled */
-		entity->cb.func = amd_sched_entity_clear_dep;
-		list_add_tail(&entity->cb.node, &s_fence->scheduled_cb);
-		return true;
+		/*
+		 * Fence is from the same scheduler, only need to wait for
+		 * it to be scheduled
+		 */
+		fence = fence_get(&s_fence->scheduled);
+		fence_put(entity->dependency);
+		entity->dependency = fence;
+		if (!fence_add_callback(fence, &entity->cb,
+					amd_sched_entity_clear_dep))
+			return true;
+
+		/* Ignore it when it is already scheduled */
+		fence_put(fence);
+		return false;
 	}
 
 	if (!fence_add_callback(entity->dependency, &entity->cb,
@@ -319,46 +324,114 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
 	return added;
 }
 
-static void amd_sched_free_job(struct fence *f, struct fence_cb *cb) {
-	struct amd_sched_job *job = container_of(cb, struct amd_sched_job, cb_free_job);
-	schedule_work(&job->work_free_job);
-}
-
 /* job_finish is called after hw fence signaled, and
  * the job had already been deleted from ring_mirror_list
  */
-void amd_sched_job_finish(struct amd_sched_job *s_job)
+static void amd_sched_job_finish(struct work_struct *work)
 {
-	struct amd_sched_job *next;
+	struct amd_sched_job *s_job = container_of(work, struct amd_sched_job,
+						   finish_work);
 	struct amd_gpu_scheduler *sched = s_job->sched;
 
+	/* remove job from ring_mirror_list */
+	spin_lock(&sched->job_list_lock);
+	list_del_init(&s_job->node);
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
-		if (cancel_delayed_work(&s_job->work_tdr))
-			amd_sched_job_put(s_job);
+		struct amd_sched_job *next;
+
+		spin_unlock(&sched->job_list_lock);
+		cancel_delayed_work_sync(&s_job->work_tdr);
+		spin_lock(&sched->job_list_lock);
 
 		/* queue TDR for next job */
 		next = list_first_entry_or_null(&sched->ring_mirror_list,
 						struct amd_sched_job, node);
 
-		if (next) {
-			INIT_DELAYED_WORK(&next->work_tdr, s_job->timeout_callback);
-			amd_sched_job_get(next);
+		if (next)
 			schedule_delayed_work(&next->work_tdr, sched->timeout);
-		}
 	}
+	spin_unlock(&sched->job_list_lock);
+	sched->ops->free_job(s_job);
 }
 
-void amd_sched_job_begin(struct amd_sched_job *s_job)
+static void amd_sched_job_finish_cb(struct fence *f, struct fence_cb *cb)
+{
+	struct amd_sched_job *job = container_of(cb, struct amd_sched_job,
+						 finish_cb);
+	schedule_work(&job->finish_work);
+}
+
+static void amd_sched_job_begin(struct amd_sched_job *s_job)
 {
 	struct amd_gpu_scheduler *sched = s_job->sched;
 
+	spin_lock(&sched->job_list_lock);
+	list_add_tail(&s_job->node, &sched->ring_mirror_list);
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
-		list_first_entry_or_null(&sched->ring_mirror_list, struct amd_sched_job, node) == s_job)
-	{
-		INIT_DELAYED_WORK(&s_job->work_tdr, s_job->timeout_callback);
-		amd_sched_job_get(s_job);
+	    list_first_entry_or_null(&sched->ring_mirror_list,
+				     struct amd_sched_job, node) == s_job)
+		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+	spin_unlock(&sched->job_list_lock);
+}
+
+static void amd_sched_job_timedout(struct work_struct *work)
+{
+	struct amd_sched_job *job = container_of(work, struct amd_sched_job,
+						 work_tdr.work);
+
+	job->sched->ops->timedout_job(job);
+}
+
+void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
+{
+	struct amd_sched_job *s_job;
+
+	spin_lock(&sched->job_list_lock);
+	list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
+		if (fence_remove_callback(s_job->s_fence->parent, &s_job->s_fence->cb)) {
+			fence_put(s_job->s_fence->parent);
+			s_job->s_fence->parent = NULL;
+		}
+	}
+	atomic_set(&sched->hw_rq_count, 0);
+	spin_unlock(&sched->job_list_lock);
+}
+
+void amd_sched_job_recovery(struct amd_gpu_scheduler *sched)
+{
+	struct amd_sched_job *s_job, *tmp;
+	int r;
+
+	spin_lock(&sched->job_list_lock);
+	s_job = list_first_entry_or_null(&sched->ring_mirror_list,
+					 struct amd_sched_job, node);
+	if (s_job)
 		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+
+	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
+		struct amd_sched_fence *s_fence = s_job->s_fence;
+		struct fence *fence;
+
+		spin_unlock(&sched->job_list_lock);
+		fence = sched->ops->run_job(s_job);
+		atomic_inc(&sched->hw_rq_count);
+		if (fence) {
+			s_fence->parent = fence_get(fence);
+			r = fence_add_callback(fence, &s_fence->cb,
+					       amd_sched_process_job);
+			if (r == -ENOENT)
+				amd_sched_process_job(fence, &s_fence->cb);
+			else if (r)
+				DRM_ERROR("fence add callback failed (%d)\n",
+					  r);
+			fence_put(fence);
+		} else {
+			DRM_ERROR("Failed to run job!\n");
+			amd_sched_process_job(NULL, &s_fence->cb);
+		}
+		spin_lock(&sched->job_list_lock);
 	}
+	spin_unlock(&sched->job_list_lock);
 }
 
 /**
@@ -372,36 +445,29 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
 {
 	struct amd_sched_entity *entity = sched_job->s_entity;
 
-	sched_job->use_sched = 1;
-	fence_add_callback(&sched_job->s_fence->base,
-					&sched_job->cb_free_job, amd_sched_free_job);
 	trace_amd_sched_job(sched_job);
+	fence_add_callback(&sched_job->s_fence->finished, &sched_job->finish_cb,
+			   amd_sched_job_finish_cb);
 	wait_event(entity->sched->job_scheduled,
 		   amd_sched_entity_in(sched_job));
 }
 
 /* init a sched_job with basic field */
 int amd_sched_job_init(struct amd_sched_job *job,
-						struct amd_gpu_scheduler *sched,
-						struct amd_sched_entity *entity,
-						void (*timeout_cb)(struct work_struct *work),
-						void (*free_cb)(struct kref *refcount),
-						void *owner, struct fence **fence)
+		       struct amd_gpu_scheduler *sched,
+		       struct amd_sched_entity *entity,
+		       void *owner)
 {
-	INIT_LIST_HEAD(&job->node);
-	kref_init(&job->refcount);
 	job->sched = sched;
 	job->s_entity = entity;
 	job->s_fence = amd_sched_fence_create(entity, owner);
 	if (!job->s_fence)
 		return -ENOMEM;
 
-	job->s_fence->s_job = job;
-	job->timeout_callback = timeout_cb;
-	job->free_callback = free_cb;
+	INIT_WORK(&job->finish_work, amd_sched_job_finish);
+	INIT_LIST_HEAD(&job->node);
+	INIT_DELAYED_WORK(&job->work_tdr, amd_sched_job_timedout);
 
-	if (fence)
-		*fence = &job->s_fence->base;
 	return 0;
 }
 
@@ -450,23 +516,25 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
 	struct amd_sched_fence *s_fence =
 		container_of(cb, struct amd_sched_fence, cb);
 	struct amd_gpu_scheduler *sched = s_fence->sched;
-	unsigned long flags;
 
 	atomic_dec(&sched->hw_rq_count);
-
-	/* remove job from ring_mirror_list */
-	spin_lock_irqsave(&sched->job_list_lock, flags);
-	list_del_init(&s_fence->s_job->node);
-	sched->ops->finish_job(s_fence->s_job);
-	spin_unlock_irqrestore(&sched->job_list_lock, flags);
-
-	amd_sched_fence_signal(s_fence);
+	amd_sched_fence_finished(s_fence);
 
 	trace_amd_sched_process_job(s_fence);
-	fence_put(&s_fence->base);
+	fence_put(&s_fence->finished);
 	wake_up_interruptible(&sched->wake_up_worker);
 }
 
+static bool amd_sched_blocked(struct amd_gpu_scheduler *sched)
+{
+	if (kthread_should_park()) {
+		kthread_parkme();
+		return true;
+	}
+
+	return false;
+}
+
 static int amd_sched_main(void *param)
 {
 	struct sched_param sparam = {.sched_priority = 1};
@@ -476,14 +544,15 @@ static int amd_sched_main(void *param)
 	sched_setscheduler(current, SCHED_FIFO, &sparam);
 
 	while (!kthread_should_stop()) {
-		struct amd_sched_entity *entity;
+		struct amd_sched_entity *entity = NULL;
 		struct amd_sched_fence *s_fence;
 		struct amd_sched_job *sched_job;
 		struct fence *fence;
 
 		wait_event_interruptible(sched->wake_up_worker,
-			(entity = amd_sched_select_entity(sched)) ||
-			kthread_should_stop());
+					 (!amd_sched_blocked(sched) &&
+					  (entity = amd_sched_select_entity(sched))) ||
+					 kthread_should_stop());
 
 		if (!entity)
 			continue;
@@ -495,16 +564,19 @@ static int amd_sched_main(void *param)
 		s_fence = sched_job->s_fence;
 
 		atomic_inc(&sched->hw_rq_count);
-		amd_sched_job_pre_schedule(sched, sched_job);
+		amd_sched_job_begin(sched_job);
+
 		fence = sched->ops->run_job(sched_job);
 		amd_sched_fence_scheduled(s_fence);
 		if (fence) {
+			s_fence->parent = fence_get(fence);
 			r = fence_add_callback(fence, &s_fence->cb,
 					       amd_sched_process_job);
 			if (r == -ENOENT)
 				amd_sched_process_job(fence, &s_fence->cb);
 			else if (r)
-				DRM_ERROR("fence add callback failed (%d)\n", r);
+				DRM_ERROR("fence add callback failed (%d)\n",
+					  r);
 			fence_put(fence);
 		} else {
 			DRM_ERROR("Failed to run job!\n");
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 070095a9433c..7cbbbfb502ef 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -27,8 +27,6 @@
 #include <linux/kfifo.h>
 #include <linux/fence.h>
 
-#define AMD_SCHED_FENCE_SCHEDULED_BIT	FENCE_FLAG_USER_BITS
-
 struct amd_gpu_scheduler;
 struct amd_sched_rq;
 
@@ -68,36 +66,34 @@ struct amd_sched_rq {
 };
 
 struct amd_sched_fence {
-	struct fence                    base;
+	struct fence                    scheduled;
+	struct fence                    finished;
 	struct fence_cb                 cb;
-	struct list_head		scheduled_cb;
+	struct fence                    *parent;
 	struct amd_gpu_scheduler	*sched;
 	spinlock_t			lock;
 	void                            *owner;
-	struct amd_sched_job	*s_job;
 };
 
 struct amd_sched_job {
-	struct kref refcount;
 	struct amd_gpu_scheduler        *sched;
 	struct amd_sched_entity         *s_entity;
 	struct amd_sched_fence          *s_fence;
-	bool	use_sched;	/* true if the job goes to scheduler */
-	struct fence_cb                cb_free_job;
-	struct work_struct             work_free_job;
-	struct list_head			   node;
-	struct delayed_work work_tdr;
-	void (*timeout_callback) (struct work_struct *work);
-	void (*free_callback)(struct kref *refcount);
+	struct fence_cb			finish_cb;
+	struct work_struct		finish_work;
+	struct list_head		node;
+	struct delayed_work		work_tdr;
 };
 
-extern const struct fence_ops amd_sched_fence_ops;
+extern const struct fence_ops amd_sched_fence_ops_scheduled;
+extern const struct fence_ops amd_sched_fence_ops_finished;
 static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
 {
-	struct amd_sched_fence *__f = container_of(f, struct amd_sched_fence, base);
+	if (f->ops == &amd_sched_fence_ops_scheduled)
+		return container_of(f, struct amd_sched_fence, scheduled);
 
-	if (__f->base.ops == &amd_sched_fence_ops)
-		return __f;
+	if (f->ops == &amd_sched_fence_ops_finished)
+		return container_of(f, struct amd_sched_fence, finished);
 
 	return NULL;
 }
@@ -109,8 +105,8 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
 struct amd_sched_backend_ops {
 	struct fence *(*dependency)(struct amd_sched_job *sched_job);
 	struct fence *(*run_job)(struct amd_sched_job *sched_job);
-	void (*begin_job)(struct amd_sched_job *sched_job);
-	void (*finish_job)(struct amd_sched_job *sched_job);
+	void (*timedout_job)(struct amd_sched_job *sched_job);
+	void (*free_job)(struct amd_sched_job *sched_job);
 };
 
 enum amd_sched_priority {
@@ -152,25 +148,11 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job);
 struct amd_sched_fence *amd_sched_fence_create(
 	struct amd_sched_entity *s_entity, void *owner);
 void amd_sched_fence_scheduled(struct amd_sched_fence *fence);
-void amd_sched_fence_signal(struct amd_sched_fence *fence);
+void amd_sched_fence_finished(struct amd_sched_fence *fence);
 int amd_sched_job_init(struct amd_sched_job *job,
-					struct amd_gpu_scheduler *sched,
-					struct amd_sched_entity *entity,
-					void (*timeout_cb)(struct work_struct *work),
-					void (*free_cb)(struct kref* refcount),
-					void *owner, struct fence **fence);
-void amd_sched_job_pre_schedule(struct amd_gpu_scheduler *sched ,
-								struct amd_sched_job *s_job);
-void amd_sched_job_finish(struct amd_sched_job *s_job);
-void amd_sched_job_begin(struct amd_sched_job *s_job);
-static inline void amd_sched_job_get(struct amd_sched_job *job) {
-	if (job)
-		kref_get(&job->refcount);
-}
-
-static inline void amd_sched_job_put(struct amd_sched_job *job) {
-	if (job)
-		kref_put(&job->refcount, job->free_callback);
-}
-
+		       struct amd_gpu_scheduler *sched,
+		       struct amd_sched_entity *entity,
+		       void *owner);
+void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched);
+void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
 #endif
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
index 2a732c490375..6b63beaf7574 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
@@ -27,7 +27,8 @@
 #include <drm/drmP.h>
 #include "gpu_scheduler.h"
 
-struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity, void *owner)
+struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *entity,
+					       void *owner)
 {
 	struct amd_sched_fence *fence = NULL;
 	unsigned seq;
@@ -36,46 +37,37 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity
 	if (fence == NULL)
 		return NULL;
 
-	INIT_LIST_HEAD(&fence->scheduled_cb);
 	fence->owner = owner;
-	fence->sched = s_entity->sched;
+	fence->sched = entity->sched;
 	spin_lock_init(&fence->lock);
 
-	seq = atomic_inc_return(&s_entity->fence_seq);
-	fence_init(&fence->base, &amd_sched_fence_ops, &fence->lock,
-		   s_entity->fence_context, seq);
+	seq = atomic_inc_return(&entity->fence_seq);
+	fence_init(&fence->scheduled, &amd_sched_fence_ops_scheduled,
+		   &fence->lock, entity->fence_context, seq);
+	fence_init(&fence->finished, &amd_sched_fence_ops_finished,
+		   &fence->lock, entity->fence_context + 1, seq);
 
 	return fence;
 }
 
-void amd_sched_fence_signal(struct amd_sched_fence *fence)
+void amd_sched_fence_scheduled(struct amd_sched_fence *fence)
 {
-	int ret = fence_signal(&fence->base);
+	int ret = fence_signal(&fence->scheduled);
+
 	if (!ret)
-		FENCE_TRACE(&fence->base, "signaled from irq context\n");
+		FENCE_TRACE(&fence->scheduled, "signaled from irq context\n");
 	else
-		FENCE_TRACE(&fence->base, "was already signaled\n");
-}
-
-void amd_sched_job_pre_schedule(struct amd_gpu_scheduler *sched ,
-				struct amd_sched_job *s_job)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&sched->job_list_lock, flags);
-	list_add_tail(&s_job->node, &sched->ring_mirror_list);
-	sched->ops->begin_job(s_job);
-	spin_unlock_irqrestore(&sched->job_list_lock, flags);
+		FENCE_TRACE(&fence->scheduled, "was already signaled\n");
 }
 
-void amd_sched_fence_scheduled(struct amd_sched_fence *s_fence)
+void amd_sched_fence_finished(struct amd_sched_fence *fence)
 {
-	struct fence_cb *cur, *tmp;
+	int ret = fence_signal(&fence->finished);
 
-	set_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &s_fence->base.flags);
-	list_for_each_entry_safe(cur, tmp, &s_fence->scheduled_cb, node) {
-		list_del_init(&cur->node);
-		cur->func(&s_fence->base, cur);
-	}
+	if (!ret)
+		FENCE_TRACE(&fence->finished, "signaled from irq context\n");
+	else
+		FENCE_TRACE(&fence->finished, "was already signaled\n");
 }
 
 static const char *amd_sched_fence_get_driver_name(struct fence *fence)
@@ -105,6 +97,8 @@ static void amd_sched_fence_free(struct rcu_head *rcu)
 {
 	struct fence *f = container_of(rcu, struct fence, rcu);
 	struct amd_sched_fence *fence = to_amd_sched_fence(f);
+
+	fence_put(fence->parent);
 	kmem_cache_free(sched_fence_slab, fence);
 }
 
@@ -116,16 +110,41 @@ static void amd_sched_fence_free(struct rcu_head *rcu)
  * This function is called when the reference count becomes zero.
  * It just RCU schedules freeing up the fence.
  */
-static void amd_sched_fence_release(struct fence *f)
+static void amd_sched_fence_release_scheduled(struct fence *f)
 {
-	call_rcu(&f->rcu, amd_sched_fence_free);
+	struct amd_sched_fence *fence = to_amd_sched_fence(f);
+
+	call_rcu(&fence->finished.rcu, amd_sched_fence_free);
 }
 
-const struct fence_ops amd_sched_fence_ops = {
+/**
+ * amd_sched_fence_release_scheduled - drop extra reference
+ *
+ * @f: fence
+ *
+ * Drop the extra reference from the scheduled fence to the base fence.
+ */
+static void amd_sched_fence_release_finished(struct fence *f)
+{
+	struct amd_sched_fence *fence = to_amd_sched_fence(f);
+
+	fence_put(&fence->scheduled);
+}
+
+const struct fence_ops amd_sched_fence_ops_scheduled = {
+	.get_driver_name = amd_sched_fence_get_driver_name,
+	.get_timeline_name = amd_sched_fence_get_timeline_name,
+	.enable_signaling = amd_sched_fence_enable_signaling,
+	.signaled = NULL,
+	.wait = fence_default_wait,
+	.release = amd_sched_fence_release_scheduled,
+};
+
+const struct fence_ops amd_sched_fence_ops_finished = {
 	.get_driver_name = amd_sched_fence_get_driver_name,
 	.get_timeline_name = amd_sched_fence_get_timeline_name,
 	.enable_signaling = amd_sched_fence_enable_signaling,
 	.signaled = NULL,
 	.wait = fence_default_wait,
-	.release = amd_sched_fence_release,
+	.release = amd_sched_fence_release_finished,
 };
diff --git a/drivers/gpu/drm/arc/Kconfig b/drivers/gpu/drm/arc/Kconfig
index f9a13b658fea..f47d88ba4fa5 100644
--- a/drivers/gpu/drm/arc/Kconfig
+++ b/drivers/gpu/drm/arc/Kconfig
@@ -2,7 +2,6 @@ config DRM_ARCPGU
 	tristate "ARC PGU"
 	depends on DRM && OF
 	select DRM_KMS_CMA_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_KMS_HELPER
 	help
 	  Choose this option if you have an ARC PGU controller.
diff --git a/drivers/gpu/drm/arc/arcpgu_drv.c b/drivers/gpu/drm/arc/arcpgu_drv.c
index ccbdadb108dc..6d4ff34737cb 100644
--- a/drivers/gpu/drm/arc/arcpgu_drv.c
+++ b/drivers/gpu/drm/arc/arcpgu_drv.c
@@ -28,8 +28,7 @@ static void arcpgu_fb_output_poll_changed(struct drm_device *dev)
 {
 	struct arcpgu_drm_private *arcpgu = dev->dev_private;
 
-	if (arcpgu->fbdev)
-		drm_fbdev_cma_hotplug_event(arcpgu->fbdev);
+	drm_fbdev_cma_hotplug_event(arcpgu->fbdev);
 }
 
 static struct drm_mode_config_funcs arcpgu_drm_modecfg_funcs = {
@@ -49,7 +48,7 @@ static void arcpgu_setup_mode_config(struct drm_device *drm)
 	drm->mode_config.funcs = &arcpgu_drm_modecfg_funcs;
 }
 
-int arcpgu_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+static int arcpgu_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	int ret;
 
@@ -104,10 +103,8 @@ static int arcpgu_load(struct drm_device *drm)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	arcpgu->regs = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(arcpgu->regs)) {
-		dev_err(drm->dev, "Could not remap IO mem\n");
+	if (IS_ERR(arcpgu->regs))
 		return PTR_ERR(arcpgu->regs);
-	}
 
 	dev_info(drm->dev, "arc_pgu ID: 0x%x\n",
 		 arc_pgu_read(arcpgu, ARCPGU_REG_ID));
@@ -127,10 +124,11 @@ static int arcpgu_load(struct drm_device *drm)
 	encoder_node = of_parse_phandle(drm->dev->of_node, "encoder-slave", 0);
 	if (encoder_node) {
 		ret = arcpgu_drm_hdmi_init(drm, encoder_node);
+		of_node_put(encoder_node);
 		if (ret < 0)
 			return ret;
 	} else {
-		ret = arcpgu_drm_sim_init(drm, 0);
+		ret = arcpgu_drm_sim_init(drm, NULL);
 		if (ret < 0)
 			return ret;
 	}
@@ -151,7 +149,7 @@ static int arcpgu_load(struct drm_device *drm)
 	return 0;
 }
 
-int arcpgu_unload(struct drm_device *drm)
+static int arcpgu_unload(struct drm_device *drm)
 {
 	struct arcpgu_drm_private *arcpgu = drm->dev_private;
 
diff --git a/drivers/gpu/drm/arm/Kconfig b/drivers/gpu/drm/arm/Kconfig
index 1b2906568a48..9a18e1bd57b4 100644
--- a/drivers/gpu/drm/arm/Kconfig
+++ b/drivers/gpu/drm/arm/Kconfig
@@ -9,7 +9,6 @@ config DRM_HDLCD
 	depends on COMMON_CLK
 	select DRM_ARM
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_KMS_CMA_HELPER
 	help
 	  Choose this option if you have an ARM High Definition Colour LCD
diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
index 74279be20b75..d83b46a30327 100644
--- a/drivers/gpu/drm/arm/hdlcd_drv.c
+++ b/drivers/gpu/drm/arm/hdlcd_drv.c
@@ -102,8 +102,7 @@ static void hdlcd_fb_output_poll_changed(struct drm_device *drm)
 {
 	struct hdlcd_drm_private *hdlcd = drm->dev_private;
 
-	if (hdlcd->fbdev)
-		drm_fbdev_cma_hotplug_event(hdlcd->fbdev);
+	drm_fbdev_cma_hotplug_event(hdlcd->fbdev);
 }
 
 static const struct drm_mode_config_funcs hdlcd_mode_config_funcs = {
diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c
index e5b44e92f8cf..82171d223f2d 100644
--- a/drivers/gpu/drm/arm/malidp_drv.c
+++ b/drivers/gpu/drm/arm/malidp_drv.c
@@ -257,6 +257,7 @@ static int malidp_bind(struct device *dev)
 {
 	struct resource *res;
 	struct drm_device *drm;
+	struct device_node *ep;
 	struct malidp_drm *malidp;
 	struct malidp_hw_device *hwdev;
 	struct platform_device *pdev = to_platform_device(dev);
@@ -284,10 +285,8 @@ static int malidp_bind(struct device *dev)
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	hwdev->regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR(hwdev->regs)) {
-		DRM_ERROR("Failed to map control registers area\n");
+	if (IS_ERR(hwdev->regs))
 		return PTR_ERR(hwdev->regs);
-	}
 
 	hwdev->pclk = devm_clk_get(dev, "pclk");
 	if (IS_ERR(hwdev->pclk))
@@ -360,11 +359,14 @@ static int malidp_bind(struct device *dev)
 		goto register_fail;
 
 	/* Set the CRTC's port so that the encoder component can find it */
-	malidp->crtc.port = of_graph_get_next_endpoint(dev->of_node, NULL);
+	ep = of_graph_get_next_endpoint(dev->of_node, NULL);
+	if (!ep) {
+		ret = -EINVAL;
+		goto port_fail;
+	}
+	malidp->crtc.port = of_get_next_parent(ep);
 
 	ret = component_bind_all(dev, drm);
-	of_node_put(malidp->crtc.port);
-
 	if (ret) {
 		DRM_ERROR("Failed to bind all components\n");
 		goto bind_fail;
@@ -402,6 +404,9 @@ vblank_fail:
 irq_init_fail:
 	component_unbind_all(dev, drm);
 bind_fail:
+	of_node_put(malidp->crtc.port);
+	malidp->crtc.port = NULL;
+port_fail:
 	drm_dev_unregister(drm);
 register_fail:
 	malidp_de_planes_destroy(drm);
@@ -435,6 +440,8 @@ static void malidp_unbind(struct device *dev)
 	malidp_de_irq_fini(drm);
 	drm_vblank_cleanup(drm);
 	component_unbind_all(dev, drm);
+	of_node_put(malidp->crtc.port);
+	malidp->crtc.port = NULL;
 	drm_dev_unregister(drm);
 	malidp_de_planes_destroy(drm);
 	drm_mode_config_cleanup(drm);
diff --git a/drivers/gpu/drm/armada/Kconfig b/drivers/gpu/drm/armada/Kconfig
index eb773e9af313..15f3ecfb16f1 100644
--- a/drivers/gpu/drm/armada/Kconfig
+++ b/drivers/gpu/drm/armada/Kconfig
@@ -1,11 +1,7 @@
 config DRM_ARMADA
 	tristate "DRM support for Marvell Armada SoCs"
 	depends on DRM && HAVE_CLK && ARM
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	help
 	  Support the "LCD" controllers found on the Marvell Armada 510
 	  devices.  There are two controllers on the device, each controller
diff --git a/drivers/gpu/drm/armada/armada_crtc.c b/drivers/gpu/drm/armada/armada_crtc.c
index 34405e4a5d36..2f58e9e2a59c 100644
--- a/drivers/gpu/drm/armada/armada_crtc.c
+++ b/drivers/gpu/drm/armada/armada_crtc.c
@@ -410,7 +410,7 @@ static void armada_drm_crtc_irq(struct armada_crtc *dcrtc, u32 stat)
 		DRM_ERROR("graphics underflow on crtc %u\n", dcrtc->num);
 
 	if (stat & VSYNC_IRQ)
-		drm_handle_vblank(dcrtc->crtc.dev, dcrtc->num);
+		drm_crtc_handle_vblank(&dcrtc->crtc);
 
 	spin_lock(&dcrtc->irq_lock);
 	ovl_plane = dcrtc->plane;
diff --git a/drivers/gpu/drm/ast/Kconfig b/drivers/gpu/drm/ast/Kconfig
index 8a784c460c89..15f6ce7acb2a 100644
--- a/drivers/gpu/drm/ast/Kconfig
+++ b/drivers/gpu/drm/ast/Kconfig
@@ -2,11 +2,7 @@ config DRM_AST
 	tristate "AST server chips"
 	depends on DRM && PCI
 	select DRM_TTM
-	select FB_SYS_COPYAREA
-	select FB_SYS_FILLRECT
-	select FB_SYS_IMAGEBLIT
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_TTM
 	help
 	 Say yes for experimental AST GPU driver. Do not enable
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 7bc3aa6dda8c..904beaa932d0 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -295,9 +295,8 @@ static int ast_get_dram_info(struct drm_device *dev)
 static void ast_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct ast_framebuffer *ast_fb = to_ast_framebuffer(fb);
-	if (ast_fb->obj)
-		drm_gem_object_unreference_unlocked(ast_fb->obj);
 
+	drm_gem_object_unreference_unlocked(ast_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(fb);
 }
diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c
index 59f2f93b6f84..b29a41218fc9 100644
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c
@@ -186,17 +186,6 @@ static void ast_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *
 {
 }
 
-static int ast_bo_move(struct ttm_buffer_object *bo,
-		       bool evict, bool interruptible,
-		       bool no_wait_gpu,
-		       struct ttm_mem_reg *new_mem)
-{
-	int r;
-	r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
-	return r;
-}
-
-
 static void ast_ttm_backend_destroy(struct ttm_tt *tt)
 {
 	ttm_tt_fini(tt);
@@ -241,7 +230,7 @@ struct ttm_bo_driver ast_bo_driver = {
 	.ttm_tt_unpopulate = ast_ttm_tt_unpopulate,
 	.init_mem_type = ast_bo_init_mem_type,
 	.evict_flags = ast_bo_evict_flags,
-	.move = ast_bo_move,
+	.move = NULL,
 	.verify_access = ast_bo_verify_access,
 	.io_mem_reserve = &ast_ttm_io_mem_reserve,
 	.io_mem_free = &ast_ttm_io_mem_free,
diff --git a/drivers/gpu/drm/atmel-hlcdc/Kconfig b/drivers/gpu/drm/atmel-hlcdc/Kconfig
index 99b4f0698a30..32bcc4bad06a 100644
--- a/drivers/gpu/drm/atmel-hlcdc/Kconfig
+++ b/drivers/gpu/drm/atmel-hlcdc/Kconfig
@@ -3,7 +3,6 @@ config DRM_ATMEL_HLCDC
 	depends on DRM && OF && COMMON_CLK && MFD_ATMEL_HLCDC && ARM
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_KMS_CMA_HELPER
 	select DRM_PANEL
 	help
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
index 613f6c99b76a..a978381ef95b 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_crtc.c
@@ -383,7 +383,7 @@ static void atmel_hlcdc_crtc_finish_page_flip(struct atmel_hlcdc_crtc *crtc)
 
 void atmel_hlcdc_crtc_irq(struct drm_crtc *c)
 {
-	drm_handle_vblank(c->dev, 0);
+	drm_crtc_handle_vblank(c);
 	atmel_hlcdc_crtc_finish_page_flip(drm_crtc_to_atmel_hlcdc_crtc(c));
 }
 
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
index 473a475f27b1..6119b5085501 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
@@ -254,9 +254,10 @@ int atmel_hlcdc_create_outputs(struct drm_device *dev)
 		if (!ret)
 			ret = atmel_hlcdc_check_endpoint(dev, &ep);
 
-		of_node_put(ep_np);
-		if (ret)
+		if (ret) {
+			of_node_put(ep_np);
 			return ret;
+		}
 	}
 
 	for_each_endpoint_of_node(dev->dev->of_node, ep_np) {
@@ -264,9 +265,10 @@ int atmel_hlcdc_create_outputs(struct drm_device *dev)
 		if (!ret)
 			ret = atmel_hlcdc_attach_endpoint(dev, &ep);
 
-		of_node_put(ep_np);
-		if (ret)
+		if (ret) {
+			of_node_put(ep_np);
 			return ret;
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index aef3ca8a81fa..016c191221f3 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -339,6 +339,8 @@ atmel_hlcdc_plane_update_pos_and_size(struct atmel_hlcdc_plane *plane,
 
 		atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff,
 					     factor_reg);
+	} else {
+		atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff, 0);
 	}
 }
 
diff --git a/drivers/gpu/drm/bochs/Kconfig b/drivers/gpu/drm/bochs/Kconfig
index 5f8b0c2b9a44..f739763f47ce 100644
--- a/drivers/gpu/drm/bochs/Kconfig
+++ b/drivers/gpu/drm/bochs/Kconfig
@@ -2,10 +2,6 @@ config DRM_BOCHS
 	tristate "DRM Support for bochs dispi vga interface (qemu stdvga)"
 	depends on DRM && PCI
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
 	select DRM_TTM
 	help
 	  Choose this option for qemu.
diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c
index 6cf912c45e48..5c5638a777a1 100644
--- a/drivers/gpu/drm/bochs/bochs_mm.c
+++ b/drivers/gpu/drm/bochs/bochs_mm.c
@@ -165,15 +165,6 @@ static void bochs_ttm_io_mem_free(struct ttm_bo_device *bdev,
 {
 }
 
-static int bochs_bo_move(struct ttm_buffer_object *bo,
-			 bool evict, bool interruptible,
-			 bool no_wait_gpu,
-			 struct ttm_mem_reg *new_mem)
-{
-	return ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
-}
-
-
 static void bochs_ttm_backend_destroy(struct ttm_tt *tt)
 {
 	ttm_tt_fini(tt);
@@ -208,7 +199,7 @@ struct ttm_bo_driver bochs_bo_driver = {
 	.ttm_tt_unpopulate = ttm_pool_unpopulate,
 	.init_mem_type = bochs_bo_init_mem_type,
 	.evict_flags = bochs_bo_evict_flags,
-	.move = bochs_bo_move,
+	.move = NULL,
 	.verify_access = bochs_bo_verify_access,
 	.io_mem_reserve = &bochs_ttm_io_mem_reserve,
 	.io_mem_free = &bochs_ttm_io_mem_free,
@@ -474,8 +465,8 @@ int bochs_dumb_mmap_offset(struct drm_file *file, struct drm_device *dev,
 static void bochs_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct bochs_framebuffer *bochs_fb = to_bochs_framebuffer(fb);
-	if (bochs_fb->obj)
-		drm_gem_object_unreference_unlocked(bochs_fb->obj);
+
+	drm_gem_object_unreference_unlocked(bochs_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(fb);
 }
diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig
index a141921445f4..b590e678052d 100644
--- a/drivers/gpu/drm/bridge/Kconfig
+++ b/drivers/gpu/drm/bridge/Kconfig
@@ -58,6 +58,17 @@ config DRM_SII902X
 	---help---
 	  Silicon Image sii902x bridge chip driver.
 
+config DRM_TOSHIBA_TC358767
+	tristate "Toshiba TC358767 eDP bridge"
+	depends on OF
+	select DRM_KMS_HELPER
+	select REGMAP_I2C
+	select DRM_PANEL
+	---help---
+	  Toshiba TC358767 eDP bridge chip driver.
+
 source "drivers/gpu/drm/bridge/analogix/Kconfig"
 
+source "drivers/gpu/drm/bridge/adv7511/Kconfig"
+
 endmenu
diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile
index bfec9f8cb9d2..efdb07e878f5 100644
--- a/drivers/gpu/drm/bridge/Makefile
+++ b/drivers/gpu/drm/bridge/Makefile
@@ -6,4 +6,6 @@ obj-$(CONFIG_DRM_DW_HDMI_AHB_AUDIO) += dw-hdmi-ahb-audio.o
 obj-$(CONFIG_DRM_NXP_PTN3460) += nxp-ptn3460.o
 obj-$(CONFIG_DRM_PARADE_PS8622) += parade-ps8622.o
 obj-$(CONFIG_DRM_SII902X) += sii902x.o
+obj-$(CONFIG_DRM_TOSHIBA_TC358767) += tc358767.o
 obj-$(CONFIG_DRM_ANALOGIX_DP) += analogix/
+obj-$(CONFIG_DRM_I2C_ADV7511) += adv7511/
diff --git a/drivers/gpu/drm/bridge/adv7511/Kconfig b/drivers/gpu/drm/bridge/adv7511/Kconfig
new file mode 100644
index 000000000000..d2b0499ab7d7
--- /dev/null
+++ b/drivers/gpu/drm/bridge/adv7511/Kconfig
@@ -0,0 +1,15 @@
+config DRM_I2C_ADV7511
+	tristate "AV7511 encoder"
+	depends on OF
+	select DRM_KMS_HELPER
+	select REGMAP_I2C
+	help
+	  Support for the Analog Device ADV7511(W) and ADV7513 HDMI encoders.
+
+config DRM_I2C_ADV7533
+	bool "ADV7533 encoder"
+	depends on DRM_I2C_ADV7511
+	select DRM_MIPI_DSI
+	default y
+	help
+	  Support for the Analog Devices ADV7533 DSI to HDMI encoder.
diff --git a/drivers/gpu/drm/bridge/adv7511/Makefile b/drivers/gpu/drm/bridge/adv7511/Makefile
new file mode 100644
index 000000000000..9019327fff4c
--- /dev/null
+++ b/drivers/gpu/drm/bridge/adv7511/Makefile
@@ -0,0 +1,3 @@
+adv7511-y := adv7511_drv.o
+adv7511-$(CONFIG_DRM_I2C_ADV7533) += adv7533.o
+obj-$(CONFIG_DRM_I2C_ADV7511) += adv7511.o
diff --git a/drivers/gpu/drm/i2c/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h
index 38515b30cedf..161c923d6162 100644
--- a/drivers/gpu/drm/i2c/adv7511.h
+++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h
@@ -10,6 +10,11 @@
 #define __DRM_I2C_ADV7511_H__
 
 #include <linux/hdmi.h>
+#include <linux/i2c.h>
+#include <linux/regmap.h>
+
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_mipi_dsi.h>
 
 #define ADV7511_REG_CHIP_REVISION		0x00
 #define ADV7511_REG_N0				0x01
@@ -286,4 +291,102 @@ struct adv7511_video_config {
 	struct hdmi_avi_infoframe avi_infoframe;
 };
 
+enum adv7511_type {
+	ADV7511,
+	ADV7533,
+};
+
+struct adv7511 {
+	struct i2c_client *i2c_main;
+	struct i2c_client *i2c_edid;
+	struct i2c_client *i2c_cec;
+
+	struct regmap *regmap;
+	struct regmap *regmap_cec;
+	enum drm_connector_status status;
+	bool powered;
+
+	struct drm_display_mode curr_mode;
+
+	unsigned int f_tmds;
+
+	unsigned int current_edid_segment;
+	uint8_t edid_buf[256];
+	bool edid_read;
+
+	wait_queue_head_t wq;
+	struct drm_bridge bridge;
+	struct drm_connector connector;
+
+	bool embedded_sync;
+	enum adv7511_sync_polarity vsync_polarity;
+	enum adv7511_sync_polarity hsync_polarity;
+	bool rgb;
+
+	struct edid *edid;
+
+	struct gpio_desc *gpio_pd;
+
+	/* ADV7533 DSI RX related params */
+	struct device_node *host_node;
+	struct mipi_dsi_device *dsi;
+	u8 num_dsi_lanes;
+	bool use_timing_gen;
+
+	enum adv7511_type type;
+};
+
+#ifdef CONFIG_DRM_I2C_ADV7533
+void adv7533_dsi_power_on(struct adv7511 *adv);
+void adv7533_dsi_power_off(struct adv7511 *adv);
+void adv7533_mode_set(struct adv7511 *adv, struct drm_display_mode *mode);
+int adv7533_patch_registers(struct adv7511 *adv);
+void adv7533_uninit_cec(struct adv7511 *adv);
+int adv7533_init_cec(struct adv7511 *adv);
+int adv7533_attach_dsi(struct adv7511 *adv);
+void adv7533_detach_dsi(struct adv7511 *adv);
+int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv);
+#else
+static inline void adv7533_dsi_power_on(struct adv7511 *adv)
+{
+}
+
+static inline void adv7533_dsi_power_off(struct adv7511 *adv)
+{
+}
+
+static inline void adv7533_mode_set(struct adv7511 *adv,
+				    struct drm_display_mode *mode)
+{
+}
+
+static inline int adv7533_patch_registers(struct adv7511 *adv)
+{
+	return -ENODEV;
+}
+
+static inline void adv7533_uninit_cec(struct adv7511 *adv)
+{
+}
+
+static inline int adv7533_init_cec(struct adv7511 *adv)
+{
+	return -ENODEV;
+}
+
+static inline int adv7533_attach_dsi(struct adv7511 *adv)
+{
+	return -ENODEV;
+}
+
+static inline void adv7533_detach_dsi(struct adv7511 *adv)
+{
+}
+
+static inline int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv)
+{
+	return -ENODEV;
+}
+#endif
+
 #endif /* __DRM_I2C_ADV7511_H__ */
diff --git a/drivers/gpu/drm/i2c/adv7511.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
index a02112ba1c3d..ec8fb2ed3275 100644
--- a/drivers/gpu/drm/i2c/adv7511.c
+++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
@@ -8,51 +8,17 @@
 
 #include <linux/device.h>
 #include <linux/gpio/consumer.h>
-#include <linux/i2c.h>
 #include <linux/module.h>
-#include <linux/regmap.h>
+#include <linux/of_device.h>
 #include <linux/slab.h>
 
 #include <drm/drmP.h>
-#include <drm/drm_crtc_helper.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_encoder_slave.h>
 
 #include "adv7511.h"
 
-struct adv7511 {
-	struct i2c_client *i2c_main;
-	struct i2c_client *i2c_edid;
-
-	struct regmap *regmap;
-	struct regmap *packet_memory_regmap;
-	enum drm_connector_status status;
-	bool powered;
-
-	unsigned int f_tmds;
-
-	unsigned int current_edid_segment;
-	uint8_t edid_buf[256];
-	bool edid_read;
-
-	wait_queue_head_t wq;
-	struct drm_encoder *encoder;
-
-	bool embedded_sync;
-	enum adv7511_sync_polarity vsync_polarity;
-	enum adv7511_sync_polarity hsync_polarity;
-	bool rgb;
-
-	struct edid *edid;
-
-	struct gpio_desc *gpio_pd;
-};
-
-static struct adv7511 *encoder_to_adv7511(struct drm_encoder *encoder)
-{
-	return to_encoder_slave(encoder)->slave_priv;
-}
-
 /* ADI recommended values for proper operation. */
 static const struct reg_sequence adv7511_fixed_registers[] = {
 	{ 0x98, 0x03 },
@@ -394,6 +360,9 @@ static void adv7511_power_on(struct adv7511 *adv7511)
 	 */
 	regcache_sync(adv7511->regmap);
 
+	if (adv7511->type == ADV7533)
+		adv7533_dsi_power_on(adv7511);
+
 	adv7511->powered = true;
 }
 
@@ -405,6 +374,9 @@ static void adv7511_power_off(struct adv7511 *adv7511)
 			   ADV7511_POWER_POWER_DOWN);
 	regcache_mark_dirty(adv7511->regmap);
 
+	if (adv7511->type == ADV7533)
+		adv7533_dsi_power_off(adv7511);
+
 	adv7511->powered = false;
 }
 
@@ -430,7 +402,7 @@ static bool adv7511_hpd(struct adv7511 *adv7511)
 	return false;
 }
 
-static int adv7511_irq_process(struct adv7511 *adv7511)
+static int adv7511_irq_process(struct adv7511 *adv7511, bool process_hpd)
 {
 	unsigned int irq0, irq1;
 	int ret;
@@ -446,8 +418,8 @@ static int adv7511_irq_process(struct adv7511 *adv7511)
 	regmap_write(adv7511->regmap, ADV7511_REG_INT(0), irq0);
 	regmap_write(adv7511->regmap, ADV7511_REG_INT(1), irq1);
 
-	if (irq0 & ADV7511_INT0_HPD && adv7511->encoder)
-		drm_helper_hpd_irq_event(adv7511->encoder->dev);
+	if (process_hpd && irq0 & ADV7511_INT0_HPD && adv7511->bridge.encoder)
+		drm_helper_hpd_irq_event(adv7511->connector.dev);
 
 	if (irq0 & ADV7511_INT0_EDID_READY || irq1 & ADV7511_INT1_DDC_ERROR) {
 		adv7511->edid_read = true;
@@ -464,7 +436,7 @@ static irqreturn_t adv7511_irq_handler(int irq, void *devid)
 	struct adv7511 *adv7511 = devid;
 	int ret;
 
-	ret = adv7511_irq_process(adv7511);
+	ret = adv7511_irq_process(adv7511, true);
 	return ret < 0 ? IRQ_NONE : IRQ_HANDLED;
 }
 
@@ -481,7 +453,7 @@ static int adv7511_wait_for_edid(struct adv7511 *adv7511, int timeout)
 				adv7511->edid_read, msecs_to_jiffies(timeout));
 	} else {
 		for (; timeout > 0; timeout -= 25) {
-			ret = adv7511_irq_process(adv7511);
+			ret = adv7511_irq_process(adv7511, false);
 			if (ret < 0)
 				break;
 
@@ -563,13 +535,12 @@ static int adv7511_get_edid_block(void *data, u8 *buf, unsigned int block,
 }
 
 /* -----------------------------------------------------------------------------
- * Encoder operations
+ * ADV75xx helpers
  */
 
-static int adv7511_get_modes(struct drm_encoder *encoder,
+static int adv7511_get_modes(struct adv7511 *adv7511,
 			     struct drm_connector *connector)
 {
-	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
 	struct edid *edid;
 	unsigned int count;
 
@@ -606,21 +577,9 @@ static int adv7511_get_modes(struct drm_encoder *encoder,
 	return count;
 }
 
-static void adv7511_encoder_dpms(struct drm_encoder *encoder, int mode)
-{
-	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
-
-	if (mode == DRM_MODE_DPMS_ON)
-		adv7511_power_on(adv7511);
-	else
-		adv7511_power_off(adv7511);
-}
-
 static enum drm_connector_status
-adv7511_encoder_detect(struct drm_encoder *encoder,
-		       struct drm_connector *connector)
+adv7511_detect(struct adv7511 *adv7511, struct drm_connector *connector)
 {
-	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
 	enum drm_connector_status status;
 	unsigned int val;
 	bool hpd;
@@ -644,7 +603,7 @@ adv7511_encoder_detect(struct drm_encoder *encoder,
 	if (status == connector_status_connected && hpd && adv7511->powered) {
 		regcache_mark_dirty(adv7511->regmap);
 		adv7511_power_on(adv7511);
-		adv7511_get_modes(encoder, connector);
+		adv7511_get_modes(adv7511, connector);
 		if (adv7511->status == connector_status_connected)
 			status = connector_status_disconnected;
 	} else {
@@ -658,8 +617,8 @@ adv7511_encoder_detect(struct drm_encoder *encoder,
 	return status;
 }
 
-static int adv7511_encoder_mode_valid(struct drm_encoder *encoder,
-				      struct drm_display_mode *mode)
+static int adv7511_mode_valid(struct adv7511 *adv7511,
+			      struct drm_display_mode *mode)
 {
 	if (mode->clock > 165000)
 		return MODE_CLOCK_HIGH;
@@ -667,11 +626,10 @@ static int adv7511_encoder_mode_valid(struct drm_encoder *encoder,
 	return MODE_OK;
 }
 
-static void adv7511_encoder_mode_set(struct drm_encoder *encoder,
-				     struct drm_display_mode *mode,
-				     struct drm_display_mode *adj_mode)
+static void adv7511_mode_set(struct adv7511 *adv7511,
+			     struct drm_display_mode *mode,
+			     struct drm_display_mode *adj_mode)
 {
-	struct adv7511 *adv7511 = encoder_to_adv7511(encoder);
 	unsigned int low_refresh_rate;
 	unsigned int hsync_polarity = 0;
 	unsigned int vsync_polarity = 0;
@@ -754,6 +712,11 @@ static void adv7511_encoder_mode_set(struct drm_encoder *encoder,
 	regmap_update_bits(adv7511->regmap, 0x17,
 		0x60, (vsync_polarity << 6) | (hsync_polarity << 5));
 
+	if (adv7511->type == ADV7533)
+		adv7533_mode_set(adv7511, adj_mode);
+
+	drm_mode_copy(&adv7511->curr_mode, adj_mode);
+
 	/*
 	 * TODO Test first order 4:2:2 to 4:4:4 up conversion method, which is
 	 * supposed to give better results.
@@ -762,12 +725,114 @@ static void adv7511_encoder_mode_set(struct drm_encoder *encoder,
 	adv7511->f_tmds = mode->clock;
 }
 
-static const struct drm_encoder_slave_funcs adv7511_encoder_funcs = {
-	.dpms = adv7511_encoder_dpms,
-	.mode_valid = adv7511_encoder_mode_valid,
-	.mode_set = adv7511_encoder_mode_set,
-	.detect = adv7511_encoder_detect,
-	.get_modes = adv7511_get_modes,
+/* Connector funcs */
+static struct adv7511 *connector_to_adv7511(struct drm_connector *connector)
+{
+	return container_of(connector, struct adv7511, connector);
+}
+
+static int adv7511_connector_get_modes(struct drm_connector *connector)
+{
+	struct adv7511 *adv = connector_to_adv7511(connector);
+
+	return adv7511_get_modes(adv, connector);
+}
+
+static enum drm_mode_status
+adv7511_connector_mode_valid(struct drm_connector *connector,
+			     struct drm_display_mode *mode)
+{
+	struct adv7511 *adv = connector_to_adv7511(connector);
+
+	return adv7511_mode_valid(adv, mode);
+}
+
+static struct drm_connector_helper_funcs adv7511_connector_helper_funcs = {
+	.get_modes = adv7511_connector_get_modes,
+	.mode_valid = adv7511_connector_mode_valid,
+};
+
+static enum drm_connector_status
+adv7511_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct adv7511 *adv = connector_to_adv7511(connector);
+
+	return adv7511_detect(adv, connector);
+}
+
+static struct drm_connector_funcs adv7511_connector_funcs = {
+	.dpms = drm_atomic_helper_connector_dpms,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.detect = adv7511_connector_detect,
+	.destroy = drm_connector_cleanup,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+/* Bridge funcs */
+static struct adv7511 *bridge_to_adv7511(struct drm_bridge *bridge)
+{
+	return container_of(bridge, struct adv7511, bridge);
+}
+
+static void adv7511_bridge_enable(struct drm_bridge *bridge)
+{
+	struct adv7511 *adv = bridge_to_adv7511(bridge);
+
+	adv7511_power_on(adv);
+}
+
+static void adv7511_bridge_disable(struct drm_bridge *bridge)
+{
+	struct adv7511 *adv = bridge_to_adv7511(bridge);
+
+	adv7511_power_off(adv);
+}
+
+static void adv7511_bridge_mode_set(struct drm_bridge *bridge,
+				    struct drm_display_mode *mode,
+				    struct drm_display_mode *adj_mode)
+{
+	struct adv7511 *adv = bridge_to_adv7511(bridge);
+
+	adv7511_mode_set(adv, mode, adj_mode);
+}
+
+static int adv7511_bridge_attach(struct drm_bridge *bridge)
+{
+	struct adv7511 *adv = bridge_to_adv7511(bridge);
+	int ret;
+
+	if (!bridge->encoder) {
+		DRM_ERROR("Parent encoder object not found");
+		return -ENODEV;
+	}
+
+	adv->connector.polled = DRM_CONNECTOR_POLL_HPD;
+
+	ret = drm_connector_init(bridge->dev, &adv->connector,
+				 &adv7511_connector_funcs,
+				 DRM_MODE_CONNECTOR_HDMIA);
+	if (ret) {
+		DRM_ERROR("Failed to initialize connector with drm\n");
+		return ret;
+	}
+	drm_connector_helper_add(&adv->connector,
+				 &adv7511_connector_helper_funcs);
+	drm_mode_connector_attach_encoder(&adv->connector, bridge->encoder);
+
+	if (adv->type == ADV7533)
+		ret = adv7533_attach_dsi(adv);
+
+	return ret;
+}
+
+static struct drm_bridge_funcs adv7511_bridge_funcs = {
+	.enable = adv7511_bridge_enable,
+	.disable = adv7511_bridge_disable,
+	.mode_set = adv7511_bridge_mode_set,
+	.attach = adv7511_bridge_attach,
 };
 
 /* -----------------------------------------------------------------------------
@@ -780,8 +845,6 @@ static int adv7511_parse_dt(struct device_node *np,
 	const char *str;
 	int ret;
 
-	memset(config, 0, sizeof(*config));
-
 	of_property_read_u32(np, "adi,input-depth", &config->input_color_depth);
 	if (config->input_color_depth != 8 && config->input_color_depth != 10 &&
 	    config->input_color_depth != 12)
@@ -881,7 +944,17 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 	adv7511->powered = false;
 	adv7511->status = connector_status_disconnected;
 
-	ret = adv7511_parse_dt(dev->of_node, &link_config);
+	if (dev->of_node)
+		adv7511->type = (enum adv7511_type)of_device_get_match_data(dev);
+	else
+		adv7511->type = id->driver_data;
+
+	memset(&link_config, 0, sizeof(link_config));
+
+	if (adv7511->type == ADV7511)
+		ret = adv7511_parse_dt(dev->of_node, &link_config);
+	else
+		ret = adv7533_parse_dt(dev->of_node, adv7511);
 	if (ret)
 		return ret;
 
@@ -907,8 +980,12 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 		return ret;
 	dev_dbg(dev, "Rev. %d\n", val);
 
-	ret = regmap_register_patch(adv7511->regmap, adv7511_fixed_registers,
-				    ARRAY_SIZE(adv7511_fixed_registers));
+	if (adv7511->type == ADV7511)
+		ret = regmap_register_patch(adv7511->regmap,
+					    adv7511_fixed_registers,
+					    ARRAY_SIZE(adv7511_fixed_registers));
+	else
+		ret = adv7533_patch_registers(adv7511);
 	if (ret)
 		return ret;
 
@@ -923,6 +1000,12 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 	if (!adv7511->i2c_edid)
 		return -ENOMEM;
 
+	if (adv7511->type == ADV7533) {
+		ret = adv7533_init_cec(adv7511);
+		if (ret)
+			goto err_i2c_unregister_edid;
+	}
+
 	if (i2c->irq) {
 		init_waitqueue_head(&adv7511->wq);
 
@@ -931,7 +1014,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 						IRQF_ONESHOT, dev_name(dev),
 						adv7511);
 		if (ret)
-			goto err_i2c_unregister_device;
+			goto err_unregister_cec;
 	}
 
 	/* CEC is unused for now */
@@ -942,11 +1025,23 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 
 	i2c_set_clientdata(i2c, adv7511);
 
-	adv7511_set_link_config(adv7511, &link_config);
+	if (adv7511->type == ADV7511)
+		adv7511_set_link_config(adv7511, &link_config);
+
+	adv7511->bridge.funcs = &adv7511_bridge_funcs;
+	adv7511->bridge.of_node = dev->of_node;
+
+	ret = drm_bridge_add(&adv7511->bridge);
+	if (ret) {
+		dev_err(dev, "failed to add adv7511 bridge\n");
+		goto err_unregister_cec;
+	}
 
 	return 0;
 
-err_i2c_unregister_device:
+err_unregister_cec:
+	adv7533_uninit_cec(adv7511);
+err_i2c_unregister_edid:
 	i2c_unregister_device(adv7511->i2c_edid);
 
 	return ret;
@@ -956,66 +1051,71 @@ static int adv7511_remove(struct i2c_client *i2c)
 {
 	struct adv7511 *adv7511 = i2c_get_clientdata(i2c);
 
-	i2c_unregister_device(adv7511->i2c_edid);
-
-	kfree(adv7511->edid);
-
-	return 0;
-}
-
-static int adv7511_encoder_init(struct i2c_client *i2c, struct drm_device *dev,
-				struct drm_encoder_slave *encoder)
-{
+	if (adv7511->type == ADV7533) {
+		adv7533_detach_dsi(adv7511);
+		adv7533_uninit_cec(adv7511);
+	}
 
-	struct adv7511 *adv7511 = i2c_get_clientdata(i2c);
+	drm_bridge_remove(&adv7511->bridge);
 
-	encoder->slave_priv = adv7511;
-	encoder->slave_funcs = &adv7511_encoder_funcs;
+	i2c_unregister_device(adv7511->i2c_edid);
 
-	adv7511->encoder = &encoder->base;
+	kfree(adv7511->edid);
 
 	return 0;
 }
 
 static const struct i2c_device_id adv7511_i2c_ids[] = {
-	{ "adv7511", 0 },
-	{ "adv7511w", 0 },
-	{ "adv7513", 0 },
+	{ "adv7511", ADV7511 },
+	{ "adv7511w", ADV7511 },
+	{ "adv7513", ADV7511 },
+#ifdef CONFIG_DRM_I2C_ADV7533
+	{ "adv7533", ADV7533 },
+#endif
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, adv7511_i2c_ids);
 
 static const struct of_device_id adv7511_of_ids[] = {
-	{ .compatible = "adi,adv7511", },
-	{ .compatible = "adi,adv7511w", },
-	{ .compatible = "adi,adv7513", },
+	{ .compatible = "adi,adv7511", .data = (void *)ADV7511 },
+	{ .compatible = "adi,adv7511w", .data = (void *)ADV7511 },
+	{ .compatible = "adi,adv7513", .data = (void *)ADV7511 },
+#ifdef CONFIG_DRM_I2C_ADV7533
+	{ .compatible = "adi,adv7533", .data = (void *)ADV7533 },
+#endif
 	{ }
 };
 MODULE_DEVICE_TABLE(of, adv7511_of_ids);
 
-static struct drm_i2c_encoder_driver adv7511_driver = {
-	.i2c_driver = {
-		.driver = {
-			.name = "adv7511",
-			.of_match_table = adv7511_of_ids,
-		},
-		.id_table = adv7511_i2c_ids,
-		.probe = adv7511_probe,
-		.remove = adv7511_remove,
-	},
+static struct mipi_dsi_driver adv7533_dsi_driver = {
+	.driver.name = "adv7533",
+};
 
-	.encoder_init = adv7511_encoder_init,
+static struct i2c_driver adv7511_driver = {
+	.driver = {
+		.name = "adv7511",
+		.of_match_table = adv7511_of_ids,
+	},
+	.id_table = adv7511_i2c_ids,
+	.probe = adv7511_probe,
+	.remove = adv7511_remove,
 };
 
 static int __init adv7511_init(void)
 {
-	return drm_i2c_encoder_register(THIS_MODULE, &adv7511_driver);
+	if (IS_ENABLED(CONFIG_DRM_MIPI_DSI))
+		mipi_dsi_driver_register(&adv7533_dsi_driver);
+
+	return i2c_add_driver(&adv7511_driver);
 }
 module_init(adv7511_init);
 
 static void __exit adv7511_exit(void)
 {
-	drm_i2c_encoder_unregister(&adv7511_driver);
+	i2c_del_driver(&adv7511_driver);
+
+	if (IS_ENABLED(CONFIG_DRM_MIPI_DSI))
+		mipi_dsi_driver_unregister(&adv7533_dsi_driver);
 }
 module_exit(adv7511_exit);
 
diff --git a/drivers/gpu/drm/bridge/adv7511/adv7533.c b/drivers/gpu/drm/bridge/adv7511/adv7533.c
new file mode 100644
index 000000000000..5eebd15899b1
--- /dev/null
+++ b/drivers/gpu/drm/bridge/adv7511/adv7533.c
@@ -0,0 +1,265 @@
+/*
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/of_graph.h>
+
+#include "adv7511.h"
+
+static const struct reg_sequence adv7533_fixed_registers[] = {
+	{ 0x16, 0x20 },
+	{ 0x9a, 0xe0 },
+	{ 0xba, 0x70 },
+	{ 0xde, 0x82 },
+	{ 0xe4, 0x40 },
+	{ 0xe5, 0x80 },
+};
+
+static const struct reg_sequence adv7533_cec_fixed_registers[] = {
+	{ 0x15, 0xd0 },
+	{ 0x17, 0xd0 },
+	{ 0x24, 0x20 },
+	{ 0x57, 0x11 },
+};
+
+static const struct regmap_config adv7533_cec_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = 0xff,
+	.cache_type = REGCACHE_RBTREE,
+};
+
+static void adv7511_dsi_config_timing_gen(struct adv7511 *adv)
+{
+	struct mipi_dsi_device *dsi = adv->dsi;
+	struct drm_display_mode *mode = &adv->curr_mode;
+	unsigned int hsw, hfp, hbp, vsw, vfp, vbp;
+	u8 clock_div_by_lanes[] = { 6, 4, 3 };	/* 2, 3, 4 lanes */
+
+	hsw = mode->hsync_end - mode->hsync_start;
+	hfp = mode->hsync_start - mode->hdisplay;
+	hbp = mode->htotal - mode->hsync_end;
+	vsw = mode->vsync_end - mode->vsync_start;
+	vfp = mode->vsync_start - mode->vdisplay;
+	vbp = mode->vtotal - mode->vsync_end;
+
+	/* set pixel clock divider mode */
+	regmap_write(adv->regmap_cec, 0x16,
+		     clock_div_by_lanes[dsi->lanes - 2] << 3);
+
+	/* horizontal porch params */
+	regmap_write(adv->regmap_cec, 0x28, mode->htotal >> 4);
+	regmap_write(adv->regmap_cec, 0x29, (mode->htotal << 4) & 0xff);
+	regmap_write(adv->regmap_cec, 0x2a, hsw >> 4);
+	regmap_write(adv->regmap_cec, 0x2b, (hsw << 4) & 0xff);
+	regmap_write(adv->regmap_cec, 0x2c, hfp >> 4);
+	regmap_write(adv->regmap_cec, 0x2d, (hfp << 4) & 0xff);
+	regmap_write(adv->regmap_cec, 0x2e, hbp >> 4);
+	regmap_write(adv->regmap_cec, 0x2f, (hbp << 4) & 0xff);
+
+	/* vertical porch params */
+	regmap_write(adv->regmap_cec, 0x30, mode->vtotal >> 4);
+	regmap_write(adv->regmap_cec, 0x31, (mode->vtotal << 4) & 0xff);
+	regmap_write(adv->regmap_cec, 0x32, vsw >> 4);
+	regmap_write(adv->regmap_cec, 0x33, (vsw << 4) & 0xff);
+	regmap_write(adv->regmap_cec, 0x34, vfp >> 4);
+	regmap_write(adv->regmap_cec, 0x35, (vfp << 4) & 0xff);
+	regmap_write(adv->regmap_cec, 0x36, vbp >> 4);
+	regmap_write(adv->regmap_cec, 0x37, (vbp << 4) & 0xff);
+}
+
+void adv7533_dsi_power_on(struct adv7511 *adv)
+{
+	struct mipi_dsi_device *dsi = adv->dsi;
+
+	if (adv->use_timing_gen)
+		adv7511_dsi_config_timing_gen(adv);
+
+	/* set number of dsi lanes */
+	regmap_write(adv->regmap_cec, 0x1c, dsi->lanes << 4);
+
+	if (adv->use_timing_gen) {
+		/* reset internal timing generator */
+		regmap_write(adv->regmap_cec, 0x27, 0xcb);
+		regmap_write(adv->regmap_cec, 0x27, 0x8b);
+		regmap_write(adv->regmap_cec, 0x27, 0xcb);
+	} else {
+		/* disable internal timing generator */
+		regmap_write(adv->regmap_cec, 0x27, 0x0b);
+	}
+
+	/* enable hdmi */
+	regmap_write(adv->regmap_cec, 0x03, 0x89);
+	/* disable test mode */
+	regmap_write(adv->regmap_cec, 0x55, 0x00);
+
+	regmap_register_patch(adv->regmap_cec, adv7533_cec_fixed_registers,
+			      ARRAY_SIZE(adv7533_cec_fixed_registers));
+}
+
+void adv7533_dsi_power_off(struct adv7511 *adv)
+{
+	/* disable hdmi */
+	regmap_write(adv->regmap_cec, 0x03, 0x0b);
+	/* disable internal timing generator */
+	regmap_write(adv->regmap_cec, 0x27, 0x0b);
+}
+
+void adv7533_mode_set(struct adv7511 *adv, struct drm_display_mode *mode)
+{
+	struct mipi_dsi_device *dsi = adv->dsi;
+	int lanes, ret;
+
+	if (adv->num_dsi_lanes != 4)
+		return;
+
+	if (mode->clock > 80000)
+		lanes = 4;
+	else
+		lanes = 3;
+
+	if (lanes != dsi->lanes) {
+		mipi_dsi_detach(dsi);
+		dsi->lanes = lanes;
+		ret = mipi_dsi_attach(dsi);
+		if (ret)
+			dev_err(&dsi->dev, "failed to change host lanes\n");
+	}
+}
+
+int adv7533_patch_registers(struct adv7511 *adv)
+{
+	return regmap_register_patch(adv->regmap,
+				     adv7533_fixed_registers,
+				     ARRAY_SIZE(adv7533_fixed_registers));
+}
+
+void adv7533_uninit_cec(struct adv7511 *adv)
+{
+	i2c_unregister_device(adv->i2c_cec);
+}
+
+static const int cec_i2c_addr = 0x78;
+
+int adv7533_init_cec(struct adv7511 *adv)
+{
+	int ret;
+
+	adv->i2c_cec = i2c_new_dummy(adv->i2c_main->adapter, cec_i2c_addr >> 1);
+	if (!adv->i2c_cec)
+		return -ENOMEM;
+
+	adv->regmap_cec = devm_regmap_init_i2c(adv->i2c_cec,
+					&adv7533_cec_regmap_config);
+	if (IS_ERR(adv->regmap_cec)) {
+		ret = PTR_ERR(adv->regmap_cec);
+		goto err;
+	}
+
+	ret = regmap_register_patch(adv->regmap_cec,
+				    adv7533_cec_fixed_registers,
+				    ARRAY_SIZE(adv7533_cec_fixed_registers));
+	if (ret)
+		goto err;
+
+	return 0;
+err:
+	adv7533_uninit_cec(adv);
+	return ret;
+}
+
+int adv7533_attach_dsi(struct adv7511 *adv)
+{
+	struct device *dev = &adv->i2c_main->dev;
+	struct mipi_dsi_host *host;
+	struct mipi_dsi_device *dsi;
+	int ret = 0;
+	const struct mipi_dsi_device_info info = { .type = "adv7533",
+						   .channel = 0,
+						   .node = NULL,
+						 };
+
+	host = of_find_mipi_dsi_host_by_node(adv->host_node);
+	if (!host) {
+		dev_err(dev, "failed to find dsi host\n");
+		return -EPROBE_DEFER;
+	}
+
+	dsi = mipi_dsi_device_register_full(host, &info);
+	if (IS_ERR(dsi)) {
+		dev_err(dev, "failed to create dsi device\n");
+		ret = PTR_ERR(dsi);
+		goto err_dsi_device;
+	}
+
+	adv->dsi = dsi;
+
+	dsi->lanes = adv->num_dsi_lanes;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
+			  MIPI_DSI_MODE_EOT_PACKET | MIPI_DSI_MODE_VIDEO_HSE;
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret < 0) {
+		dev_err(dev, "failed to attach dsi to host\n");
+		goto err_dsi_attach;
+	}
+
+	return 0;
+
+err_dsi_attach:
+	mipi_dsi_device_unregister(dsi);
+err_dsi_device:
+	return ret;
+}
+
+void adv7533_detach_dsi(struct adv7511 *adv)
+{
+	mipi_dsi_detach(adv->dsi);
+	mipi_dsi_device_unregister(adv->dsi);
+}
+
+int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv)
+{
+	u32 num_lanes;
+	struct device_node *endpoint;
+
+	of_property_read_u32(np, "adi,dsi-lanes", &num_lanes);
+
+	if (num_lanes < 1 || num_lanes > 4)
+		return -EINVAL;
+
+	adv->num_dsi_lanes = num_lanes;
+
+	endpoint = of_graph_get_next_endpoint(np, NULL);
+	if (!endpoint)
+		return -ENODEV;
+
+	adv->host_node = of_graph_get_remote_port_parent(endpoint);
+	if (!adv->host_node) {
+		of_node_put(endpoint);
+		return -ENODEV;
+	}
+
+	of_node_put(endpoint);
+	of_node_put(adv->host_node);
+
+	adv->use_timing_gen = !of_property_read_bool(np,
+						"adi,disable-timing-generator");
+
+	/* TODO: Check if these need to be parsed by DT or not */
+	adv->rgb = true;
+	adv->embedded_sync = false;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
index 7699597070a1..32715daf73cb 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
@@ -938,7 +938,7 @@ int analogix_dp_get_modes(struct drm_connector *connector)
 		num_modes += drm_panel_get_modes(dp->plat_data->panel);
 
 	if (dp->plat_data->get_modes)
-		num_modes += dp->plat_data->get_modes(dp->plat_data);
+		num_modes += dp->plat_data->get_modes(dp->plat_data, connector);
 
 	return num_modes;
 }
@@ -1208,6 +1208,7 @@ static int analogix_dp_dt_parse_pdata(struct analogix_dp_device *dp)
 
 	switch (dp->plat_data->dev_type) {
 	case RK3288_DP:
+	case RK3399_EDP:
 		/*
 		 * Like Rk3288 DisplayPort TRM indicate that "Main link
 		 * containing 4 physical lanes of 2.7/1.62 Gbps/lane".
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h
index f09275d40f70..b45638043ec4 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h
@@ -127,10 +127,10 @@ enum analog_power_block {
 };
 
 enum dp_irq_type {
-	DP_IRQ_TYPE_HP_CABLE_IN,
-	DP_IRQ_TYPE_HP_CABLE_OUT,
-	DP_IRQ_TYPE_HP_CHANGE,
-	DP_IRQ_TYPE_UNKNOWN,
+	DP_IRQ_TYPE_HP_CABLE_IN  = BIT(0),
+	DP_IRQ_TYPE_HP_CABLE_OUT = BIT(1),
+	DP_IRQ_TYPE_HP_CHANGE    = BIT(2),
+	DP_IRQ_TYPE_UNKNOWN      = BIT(3),
 };
 
 struct video_info {
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
index 49205ef02be3..48030f0cf497 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c
@@ -74,8 +74,12 @@ void analogix_dp_init_analog_param(struct analogix_dp_device *dp)
 	reg = SEL_24M | TX_DVDD_BIT_1_0625V;
 	writel(reg, dp->reg_base + ANALOGIX_DP_ANALOG_CTL_2);
 
-	if (dp->plat_data && (dp->plat_data->dev_type == RK3288_DP)) {
-		writel(REF_CLK_24M, dp->reg_base + ANALOGIX_DP_PLL_REG_1);
+	if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) {
+		reg = REF_CLK_24M;
+		if (dp->plat_data->dev_type == RK3288_DP)
+			reg ^= REF_CLK_MASK;
+
+		writel(reg, dp->reg_base + ANALOGIX_DP_PLL_REG_1);
 		writel(0x95, dp->reg_base + ANALOGIX_DP_PLL_REG_2);
 		writel(0x40, dp->reg_base + ANALOGIX_DP_PLL_REG_3);
 		writel(0x58, dp->reg_base + ANALOGIX_DP_PLL_REG_4);
@@ -244,7 +248,7 @@ void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp,
 	u32 reg;
 	u32 phy_pd_addr = ANALOGIX_DP_PHY_PD;
 
-	if (dp->plat_data && (dp->plat_data->dev_type == RK3288_DP))
+	if (dp->plat_data && is_rockchip(dp->plat_data->dev_type))
 		phy_pd_addr = ANALOGIX_DP_PD;
 
 	switch (block) {
@@ -448,7 +452,7 @@ void analogix_dp_init_aux(struct analogix_dp_device *dp)
 	analogix_dp_reset_aux(dp);
 
 	/* Disable AUX transaction H/W retry */
-	if (dp->plat_data && (dp->plat_data->dev_type == RK3288_DP))
+	if (dp->plat_data && is_rockchip(dp->plat_data->dev_type))
 		reg = AUX_BIT_PERIOD_EXPECTED_DELAY(0) |
 		      AUX_HW_RETRY_COUNT_SEL(3) |
 		      AUX_HW_RETRY_INTERVAL_600_MICROSECONDS;
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h
index 337912b0aeab..cdcc6c5add5e 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h
@@ -163,8 +163,9 @@
 #define HSYNC_POLARITY_CFG			(0x1 << 0)
 
 /* ANALOGIX_DP_PLL_REG_1 */
-#define REF_CLK_24M				(0x1 << 1)
-#define REF_CLK_27M				(0x0 << 1)
+#define REF_CLK_24M				(0x1 << 0)
+#define REF_CLK_27M				(0x0 << 0)
+#define REF_CLK_MASK				(0x1 << 0)
 
 /* ANALOGIX_DP_LANE_MAP */
 #define LANE3_MAP_LOGIC_LANE_0			(0x0 << 6)
diff --git a/drivers/gpu/drm/bridge/dw-hdmi.c b/drivers/gpu/drm/bridge/dw-hdmi.c
index 70b1f7d4270b..77ab47341658 100644
--- a/drivers/gpu/drm/bridge/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/dw-hdmi.c
@@ -1495,14 +1495,6 @@ static void dw_hdmi_connector_force(struct drm_connector *connector)
 }
 
 static const struct drm_connector_funcs dw_hdmi_connector_funcs = {
-	.dpms = drm_helper_connector_dpms,
-	.fill_modes = drm_helper_probe_single_connector_modes,
-	.detect = dw_hdmi_connector_detect,
-	.destroy = dw_hdmi_connector_destroy,
-	.force = dw_hdmi_connector_force,
-};
-
-static const struct drm_connector_funcs dw_hdmi_atomic_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = dw_hdmi_connector_detect,
@@ -1634,14 +1626,9 @@ static int dw_hdmi_register(struct drm_device *drm, struct dw_hdmi *hdmi)
 	drm_connector_helper_add(&hdmi->connector,
 				 &dw_hdmi_connector_helper_funcs);
 
-	if (drm_core_check_feature(drm, DRIVER_ATOMIC))
-		drm_connector_init(drm, &hdmi->connector,
-				   &dw_hdmi_atomic_connector_funcs,
-				   DRM_MODE_CONNECTOR_HDMIA);
-	else
-		drm_connector_init(drm, &hdmi->connector,
-				   &dw_hdmi_connector_funcs,
-				   DRM_MODE_CONNECTOR_HDMIA);
+	drm_connector_init(drm, &hdmi->connector,
+			   &dw_hdmi_connector_funcs,
+			   DRM_MODE_CONNECTOR_HDMIA);
 
 	drm_mode_connector_attach_encoder(&hdmi->connector, encoder);
 
diff --git a/drivers/gpu/drm/bridge/parade-ps8622.c b/drivers/gpu/drm/bridge/parade-ps8622.c
index 5cd8dd7e5904..583b8ce614e3 100644
--- a/drivers/gpu/drm/bridge/parade-ps8622.c
+++ b/drivers/gpu/drm/bridge/parade-ps8622.c
@@ -636,9 +636,7 @@ static int ps8622_remove(struct i2c_client *client)
 {
 	struct ps8622_bridge *ps8622 = i2c_get_clientdata(client);
 
-	if (ps8622->bl)
-		backlight_device_unregister(ps8622->bl);
-
+	backlight_device_unregister(ps8622->bl);
 	drm_bridge_remove(&ps8622->bridge);
 
 	return 0;
diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
new file mode 100644
index 000000000000..a09825d8c94a
--- /dev/null
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -0,0 +1,1413 @@
+/*
+ * tc358767 eDP bridge driver
+ *
+ * Copyright (C) 2016 CogentEmbedded Inc
+ * Author: Andrey Gusakov <andrey.gusakov@cogentembedded.com>
+ *
+ * Copyright (C) 2016 Pengutronix, Philipp Zabel <p.zabel@pengutronix.de>
+ *
+ * Initially based on: drivers/gpu/drm/i2c/tda998x_drv.c
+ *
+ * Copyright (C) 2012 Texas Instruments
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_dp_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_of.h>
+#include <drm/drm_panel.h>
+
+/* Registers */
+
+/* Display Parallel Interface */
+#define DPIPXLFMT		0x0440
+#define VS_POL_ACTIVE_LOW		(1 << 10)
+#define HS_POL_ACTIVE_LOW		(1 << 9)
+#define DE_POL_ACTIVE_HIGH		(0 << 8)
+#define SUB_CFG_TYPE_CONFIG1		(0 << 2) /* LSB aligned */
+#define SUB_CFG_TYPE_CONFIG2		(1 << 2) /* Loosely Packed */
+#define SUB_CFG_TYPE_CONFIG3		(2 << 2) /* LSB aligned 8-bit */
+#define DPI_BPP_RGB888			(0 << 0)
+#define DPI_BPP_RGB666			(1 << 0)
+#define DPI_BPP_RGB565			(2 << 0)
+
+/* Video Path */
+#define VPCTRL0			0x0450
+#define OPXLFMT_RGB666			(0 << 8)
+#define OPXLFMT_RGB888			(1 << 8)
+#define FRMSYNC_DISABLED		(0 << 4) /* Video Timing Gen Disabled */
+#define FRMSYNC_ENABLED			(1 << 4) /* Video Timing Gen Enabled */
+#define MSF_DISABLED			(0 << 0) /* Magic Square FRC disabled */
+#define MSF_ENABLED			(1 << 0) /* Magic Square FRC enabled */
+#define HTIM01			0x0454
+#define HTIM02			0x0458
+#define VTIM01			0x045c
+#define VTIM02			0x0460
+#define VFUEN0			0x0464
+#define VFUEN				BIT(0)   /* Video Frame Timing Upload */
+
+/* System */
+#define TC_IDREG		0x0500
+#define SYSCTRL			0x0510
+#define DP0_AUDSRC_NO_INPUT		(0 << 3)
+#define DP0_AUDSRC_I2S_RX		(1 << 3)
+#define DP0_VIDSRC_NO_INPUT		(0 << 0)
+#define DP0_VIDSRC_DSI_RX		(1 << 0)
+#define DP0_VIDSRC_DPI_RX		(2 << 0)
+#define DP0_VIDSRC_COLOR_BAR		(3 << 0)
+
+/* Control */
+#define DP0CTL			0x0600
+#define VID_MN_GEN			BIT(6)   /* Auto-generate M/N values */
+#define EF_EN				BIT(5)   /* Enable Enhanced Framing */
+#define VID_EN				BIT(1)   /* Video transmission enable */
+#define DP_EN				BIT(0)   /* Enable DPTX function */
+
+/* Clocks */
+#define DP0_VIDMNGEN0		0x0610
+#define DP0_VIDMNGEN1		0x0614
+#define DP0_VMNGENSTATUS	0x0618
+
+/* Main Channel */
+#define DP0_SECSAMPLE		0x0640
+#define DP0_VIDSYNCDELAY	0x0644
+#define DP0_TOTALVAL		0x0648
+#define DP0_STARTVAL		0x064c
+#define DP0_ACTIVEVAL		0x0650
+#define DP0_SYNCVAL		0x0654
+#define DP0_MISC		0x0658
+#define TU_SIZE_RECOMMENDED		(0x3f << 16) /* LSCLK cycles per TU */
+#define BPC_6				(0 << 5)
+#define BPC_8				(1 << 5)
+
+/* AUX channel */
+#define DP0_AUXCFG0		0x0660
+#define DP0_AUXCFG1		0x0664
+#define AUX_RX_FILTER_EN		BIT(16)
+
+#define DP0_AUXADDR		0x0668
+#define DP0_AUXWDATA(i)		(0x066c + (i) * 4)
+#define DP0_AUXRDATA(i)		(0x067c + (i) * 4)
+#define DP0_AUXSTATUS		0x068c
+#define AUX_STATUS_MASK			0xf0
+#define AUX_STATUS_SHIFT		4
+#define AUX_TIMEOUT			BIT(1)
+#define AUX_BUSY			BIT(0)
+#define DP0_AUXI2CADR		0x0698
+
+/* Link Training */
+#define DP0_SRCCTRL		0x06a0
+#define DP0_SRCCTRL_SCRMBLDIS		BIT(13)
+#define DP0_SRCCTRL_EN810B		BIT(12)
+#define DP0_SRCCTRL_NOTP		(0 << 8)
+#define DP0_SRCCTRL_TP1			(1 << 8)
+#define DP0_SRCCTRL_TP2			(2 << 8)
+#define DP0_SRCCTRL_LANESKEW		BIT(7)
+#define DP0_SRCCTRL_SSCG		BIT(3)
+#define DP0_SRCCTRL_LANES_1		(0 << 2)
+#define DP0_SRCCTRL_LANES_2		(1 << 2)
+#define DP0_SRCCTRL_BW27		(1 << 1)
+#define DP0_SRCCTRL_BW162		(0 << 1)
+#define DP0_SRCCTRL_AUTOCORRECT		BIT(0)
+#define DP0_LTSTAT		0x06d0
+#define LT_LOOPDONE			BIT(13)
+#define LT_STATUS_MASK			(0x1f << 8)
+#define LT_CHANNEL1_EQ_BITS		(DP_CHANNEL_EQ_BITS << 4)
+#define LT_INTERLANE_ALIGN_DONE		BIT(3)
+#define LT_CHANNEL0_EQ_BITS		(DP_CHANNEL_EQ_BITS)
+#define DP0_SNKLTCHGREQ		0x06d4
+#define DP0_LTLOOPCTRL		0x06d8
+#define DP0_SNKLTCTRL		0x06e4
+
+/* PHY */
+#define DP_PHY_CTRL		0x0800
+#define DP_PHY_RST			BIT(28)  /* DP PHY Global Soft Reset */
+#define BGREN				BIT(25)  /* AUX PHY BGR Enable */
+#define PWR_SW_EN			BIT(24)  /* PHY Power Switch Enable */
+#define PHY_M1_RST			BIT(12)  /* Reset PHY1 Main Channel */
+#define PHY_RDY				BIT(16)  /* PHY Main Channels Ready */
+#define PHY_M0_RST			BIT(8)   /* Reset PHY0 Main Channel */
+#define PHY_A0_EN			BIT(1)   /* PHY Aux Channel0 Enable */
+#define PHY_M0_EN			BIT(0)   /* PHY Main Channel0 Enable */
+
+/* PLL */
+#define DP0_PLLCTRL		0x0900
+#define DP1_PLLCTRL		0x0904	/* not defined in DS */
+#define PXL_PLLCTRL		0x0908
+#define PLLUPDATE			BIT(2)
+#define PLLBYP				BIT(1)
+#define PLLEN				BIT(0)
+#define PXL_PLLPARAM		0x0914
+#define IN_SEL_REFCLK			(0 << 14)
+#define SYS_PLLPARAM		0x0918
+#define REF_FREQ_38M4			(0 << 8) /* 38.4 MHz */
+#define REF_FREQ_19M2			(1 << 8) /* 19.2 MHz */
+#define REF_FREQ_26M			(2 << 8) /* 26 MHz */
+#define REF_FREQ_13M			(3 << 8) /* 13 MHz */
+#define SYSCLK_SEL_LSCLK		(0 << 4)
+#define LSCLK_DIV_1			(0 << 0)
+#define LSCLK_DIV_2			(1 << 0)
+
+/* Test & Debug */
+#define TSTCTL			0x0a00
+#define PLL_DBG			0x0a04
+
+static bool tc_test_pattern;
+module_param_named(test, tc_test_pattern, bool, 0644);
+
+struct tc_edp_link {
+	struct drm_dp_link	base;
+	u8			assr;
+	int			scrambler_dis;
+	int			spread;
+	int			coding8b10b;
+	u8			swing;
+	u8			preemp;
+};
+
+struct tc_data {
+	struct device		*dev;
+	struct regmap		*regmap;
+	struct drm_dp_aux	aux;
+
+	struct drm_bridge	bridge;
+	struct drm_connector	connector;
+	struct drm_panel	*panel;
+
+	/* link settings */
+	struct tc_edp_link	link;
+
+	/* display edid */
+	struct edid		*edid;
+	/* current mode */
+	struct drm_display_mode	*mode;
+
+	u32			rev;
+	u8			assr;
+
+	struct gpio_desc	*sd_gpio;
+	struct gpio_desc	*reset_gpio;
+	struct clk		*refclk;
+};
+
+static inline struct tc_data *aux_to_tc(struct drm_dp_aux *a)
+{
+	return container_of(a, struct tc_data, aux);
+}
+
+static inline struct tc_data *bridge_to_tc(struct drm_bridge *b)
+{
+	return container_of(b, struct tc_data, bridge);
+}
+
+static inline struct tc_data *connector_to_tc(struct drm_connector *c)
+{
+	return container_of(c, struct tc_data, connector);
+}
+
+/* Simple macros to avoid repeated error checks */
+#define tc_write(reg, var)					\
+	do {							\
+		ret = regmap_write(tc->regmap, reg, var);	\
+		if (ret)					\
+			goto err;				\
+	} while (0)
+#define tc_read(reg, var)					\
+	do {							\
+		ret = regmap_read(tc->regmap, reg, var);	\
+		if (ret)					\
+			goto err;				\
+	} while (0)
+
+static inline int tc_poll_timeout(struct regmap *map, unsigned int addr,
+				  unsigned int cond_mask,
+				  unsigned int cond_value,
+				  unsigned long sleep_us, u64 timeout_us)
+{
+	ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
+	unsigned int val;
+	int ret;
+
+	for (;;) {
+		ret = regmap_read(map, addr, &val);
+		if (ret)
+			break;
+		if ((val & cond_mask) == cond_value)
+			break;
+		if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) {
+			ret = regmap_read(map, addr, &val);
+			break;
+		}
+		if (sleep_us)
+			usleep_range((sleep_us >> 2) + 1, sleep_us);
+	}
+	return ret ?: (((val & cond_mask) == cond_value) ? 0 : -ETIMEDOUT);
+}
+
+static int tc_aux_wait_busy(struct tc_data *tc, unsigned int timeout_ms)
+{
+	return tc_poll_timeout(tc->regmap, DP0_AUXSTATUS, AUX_BUSY, 0,
+			       1000, 1000 * timeout_ms);
+}
+
+static int tc_aux_get_status(struct tc_data *tc, u8 *reply)
+{
+	int ret;
+	u32 value;
+
+	ret = regmap_read(tc->regmap, DP0_AUXSTATUS, &value);
+	if (ret < 0)
+		return ret;
+	if (value & AUX_BUSY) {
+		if (value & AUX_TIMEOUT) {
+			dev_err(tc->dev, "i2c access timeout!\n");
+			return -ETIMEDOUT;
+		}
+		return -EBUSY;
+	}
+
+	*reply = (value & AUX_STATUS_MASK) >> AUX_STATUS_SHIFT;
+	return 0;
+}
+
+static ssize_t tc_aux_transfer(struct drm_dp_aux *aux,
+			       struct drm_dp_aux_msg *msg)
+{
+	struct tc_data *tc = aux_to_tc(aux);
+	size_t size = min_t(size_t, 8, msg->size);
+	u8 request = msg->request & ~DP_AUX_I2C_MOT;
+	u8 *buf = msg->buffer;
+	u32 tmp = 0;
+	int i = 0;
+	int ret;
+
+	if (size == 0)
+		return 0;
+
+	ret = tc_aux_wait_busy(tc, 100);
+	if (ret)
+		goto err;
+
+	if (request == DP_AUX_I2C_WRITE || request == DP_AUX_NATIVE_WRITE) {
+		/* Store data */
+		while (i < size) {
+			if (request == DP_AUX_NATIVE_WRITE)
+				tmp = tmp | (buf[i] << (8 * (i & 0x3)));
+			else
+				tmp = (tmp << 8) | buf[i];
+			i++;
+			if (((i % 4) == 0) || (i == size)) {
+				tc_write(DP0_AUXWDATA(i >> 2), tmp);
+				tmp = 0;
+			}
+		}
+	} else if (request != DP_AUX_I2C_READ &&
+		   request != DP_AUX_NATIVE_READ) {
+		return -EINVAL;
+	}
+
+	/* Store address */
+	tc_write(DP0_AUXADDR, msg->address);
+	/* Start transfer */
+	tc_write(DP0_AUXCFG0, ((size - 1) << 8) | request);
+
+	ret = tc_aux_wait_busy(tc, 100);
+	if (ret)
+		goto err;
+
+	ret = tc_aux_get_status(tc, &msg->reply);
+	if (ret)
+		goto err;
+
+	if (request == DP_AUX_I2C_READ || request == DP_AUX_NATIVE_READ) {
+		/* Read data */
+		while (i < size) {
+			if ((i % 4) == 0)
+				tc_read(DP0_AUXRDATA(i >> 2), &tmp);
+			buf[i] = tmp & 0xff;
+			tmp = tmp >> 8;
+			i++;
+		}
+	}
+
+	return size;
+err:
+	return ret;
+}
+
+static const char * const training_pattern1_errors[] = {
+	"No errors",
+	"Aux write error",
+	"Aux read error",
+	"Max voltage reached error",
+	"Loop counter expired error",
+	"res", "res", "res"
+};
+
+static const char * const training_pattern2_errors[] = {
+	"No errors",
+	"Aux write error",
+	"Aux read error",
+	"Clock recovery failed error",
+	"Loop counter expired error",
+	"res", "res", "res"
+};
+
+static u32 tc_srcctrl(struct tc_data *tc)
+{
+	/*
+	 * No training pattern, skew lane 1 data by two LSCLK cycles with
+	 * respect to lane 0 data, AutoCorrect Mode = 0
+	 */
+	u32 reg = DP0_SRCCTRL_NOTP | DP0_SRCCTRL_LANESKEW;
+
+	if (tc->link.scrambler_dis)
+		reg |= DP0_SRCCTRL_SCRMBLDIS;	/* Scrambler Disabled */
+	if (tc->link.coding8b10b)
+		/* Enable 8/10B Encoder (TxData[19:16] not used) */
+		reg |= DP0_SRCCTRL_EN810B;
+	if (tc->link.spread)
+		reg |= DP0_SRCCTRL_SSCG;	/* Spread Spectrum Enable */
+	if (tc->link.base.num_lanes == 2)
+		reg |= DP0_SRCCTRL_LANES_2;	/* Two Main Channel Lanes */
+	if (tc->link.base.rate != 162000)
+		reg |= DP0_SRCCTRL_BW27;	/* 2.7 Gbps link */
+	return reg;
+}
+
+static void tc_wait_pll_lock(struct tc_data *tc)
+{
+	/* Wait for PLL to lock: up to 2.09 ms, depending on refclk */
+	usleep_range(3000, 6000);
+}
+
+static int tc_pxl_pll_en(struct tc_data *tc, u32 refclk, u32 pixelclock)
+{
+	int ret;
+	int i_pre, best_pre = 1;
+	int i_post, best_post = 1;
+	int div, best_div = 1;
+	int mul, best_mul = 1;
+	int delta, best_delta;
+	int ext_div[] = {1, 2, 3, 5, 7};
+	int best_pixelclock = 0;
+	int vco_hi = 0;
+
+	dev_dbg(tc->dev, "PLL: requested %d pixelclock, ref %d\n", pixelclock,
+		refclk);
+	best_delta = pixelclock;
+	/* Loop over all possible ext_divs, skipping invalid configurations */
+	for (i_pre = 0; i_pre < ARRAY_SIZE(ext_div); i_pre++) {
+		/*
+		 * refclk / ext_pre_div should be in the 1 to 200 MHz range.
+		 * We don't allow any refclk > 200 MHz, only check lower bounds.
+		 */
+		if (refclk / ext_div[i_pre] < 1000000)
+			continue;
+		for (i_post = 0; i_post < ARRAY_SIZE(ext_div); i_post++) {
+			for (div = 1; div <= 16; div++) {
+				u32 clk;
+				u64 tmp;
+
+				tmp = pixelclock * ext_div[i_pre] *
+				      ext_div[i_post] * div;
+				do_div(tmp, refclk);
+				mul = tmp;
+
+				/* Check limits */
+				if ((mul < 1) || (mul > 128))
+					continue;
+
+				clk = (refclk / ext_div[i_pre] / div) * mul;
+				/*
+				 * refclk * mul / (ext_pre_div * pre_div)
+				 * should be in the 150 to 650 MHz range
+				 */
+				if ((clk > 650000000) || (clk < 150000000))
+					continue;
+
+				clk = clk / ext_div[i_post];
+				delta = clk - pixelclock;
+
+				if (abs(delta) < abs(best_delta)) {
+					best_pre = i_pre;
+					best_post = i_post;
+					best_div = div;
+					best_mul = mul;
+					best_delta = delta;
+					best_pixelclock = clk;
+				}
+			}
+		}
+	}
+	if (best_pixelclock == 0) {
+		dev_err(tc->dev, "Failed to calc clock for %d pixelclock\n",
+			pixelclock);
+		return -EINVAL;
+	}
+
+	dev_dbg(tc->dev, "PLL: got %d, delta %d\n", best_pixelclock,
+		best_delta);
+	dev_dbg(tc->dev, "PLL: %d / %d / %d * %d / %d\n", refclk,
+		ext_div[best_pre], best_div, best_mul, ext_div[best_post]);
+
+	/* if VCO >= 300 MHz */
+	if (refclk / ext_div[best_pre] / best_div * best_mul >= 300000000)
+		vco_hi = 1;
+	/* see DS */
+	if (best_div == 16)
+		best_div = 0;
+	if (best_mul == 128)
+		best_mul = 0;
+
+	/* Power up PLL and switch to bypass */
+	tc_write(PXL_PLLCTRL, PLLBYP | PLLEN);
+
+	tc_write(PXL_PLLPARAM,
+		 (vco_hi << 24) |		/* For PLL VCO >= 300 MHz = 1 */
+		 (ext_div[best_pre] << 20) |	/* External Pre-divider */
+		 (ext_div[best_post] << 16) |	/* External Post-divider */
+		 IN_SEL_REFCLK |		/* Use RefClk as PLL input */
+		 (best_div << 8) |		/* Divider for PLL RefClk */
+		 (best_mul << 0));		/* Multiplier for PLL */
+
+	/* Force PLL parameter update and disable bypass */
+	tc_write(PXL_PLLCTRL, PLLUPDATE | PLLEN);
+
+	tc_wait_pll_lock(tc);
+
+	return 0;
+err:
+	return ret;
+}
+
+static int tc_pxl_pll_dis(struct tc_data *tc)
+{
+	/* Enable PLL bypass, power down PLL */
+	return regmap_write(tc->regmap, PXL_PLLCTRL, PLLBYP);
+}
+
+static int tc_stream_clock_calc(struct tc_data *tc)
+{
+	int ret;
+	/*
+	 * If the Stream clock and Link Symbol clock are
+	 * asynchronous with each other, the value of M changes over
+	 * time. This way of generating link clock and stream
+	 * clock is called Asynchronous Clock mode. The value M
+	 * must change while the value N stays constant. The
+	 * value of N in this Asynchronous Clock mode must be set
+	 * to 2^15 or 32,768.
+	 *
+	 * LSCLK = 1/10 of high speed link clock
+	 *
+	 * f_STRMCLK = M/N * f_LSCLK
+	 * M/N = f_STRMCLK / f_LSCLK
+	 *
+	 */
+	tc_write(DP0_VIDMNGEN1, 32768);
+
+	return 0;
+err:
+	return ret;
+}
+
+static int tc_aux_link_setup(struct tc_data *tc)
+{
+	unsigned long rate;
+	u32 value;
+	int ret;
+
+	rate = clk_get_rate(tc->refclk);
+	switch (rate) {
+	case 38400000:
+		value = REF_FREQ_38M4;
+		break;
+	case 26000000:
+		value = REF_FREQ_26M;
+		break;
+	case 19200000:
+		value = REF_FREQ_19M2;
+		break;
+	case 13000000:
+		value = REF_FREQ_13M;
+		break;
+	default:
+		dev_err(tc->dev, "Invalid refclk rate: %lu Hz\n", rate);
+		return -EINVAL;
+	}
+
+	/* Setup DP-PHY / PLL */
+	value |= SYSCLK_SEL_LSCLK | LSCLK_DIV_2;
+	tc_write(SYS_PLLPARAM, value);
+
+	tc_write(DP_PHY_CTRL, BGREN | PWR_SW_EN | BIT(2) | PHY_A0_EN);
+
+	/*
+	 * Initially PLLs are in bypass. Force PLL parameter update,
+	 * disable PLL bypass, enable PLL
+	 */
+	tc_write(DP0_PLLCTRL, PLLUPDATE | PLLEN);
+	tc_wait_pll_lock(tc);
+
+	tc_write(DP1_PLLCTRL, PLLUPDATE | PLLEN);
+	tc_wait_pll_lock(tc);
+
+	ret = tc_poll_timeout(tc->regmap, DP_PHY_CTRL, PHY_RDY, PHY_RDY, 1,
+			      1000);
+	if (ret == -ETIMEDOUT) {
+		dev_err(tc->dev, "Timeout waiting for PHY to become ready");
+		return ret;
+	} else if (ret)
+		goto err;
+
+	/* Setup AUX link */
+	tc_write(DP0_AUXCFG1, AUX_RX_FILTER_EN |
+		 (0x06 << 8) |	/* Aux Bit Period Calculator Threshold */
+		 (0x3f << 0));	/* Aux Response Timeout Timer */
+
+	return 0;
+err:
+	dev_err(tc->dev, "tc_aux_link_setup failed: %d\n", ret);
+	return ret;
+}
+
+static int tc_get_display_props(struct tc_data *tc)
+{
+	int ret;
+	/* temp buffer */
+	u8 tmp[8];
+
+	/* Read DP Rx Link Capability */
+	ret = drm_dp_link_probe(&tc->aux, &tc->link.base);
+	if (ret < 0)
+		goto err_dpcd_read;
+	if ((tc->link.base.rate != 162000) && (tc->link.base.rate != 270000))
+		goto err_dpcd_inval;
+
+	ret = drm_dp_dpcd_readb(&tc->aux, DP_MAX_DOWNSPREAD, tmp);
+	if (ret < 0)
+		goto err_dpcd_read;
+	tc->link.spread = tmp[0] & BIT(0); /* 0.5% down spread */
+
+	ret = drm_dp_dpcd_readb(&tc->aux, DP_MAIN_LINK_CHANNEL_CODING, tmp);
+	if (ret < 0)
+		goto err_dpcd_read;
+	tc->link.coding8b10b = tmp[0] & BIT(0);
+	tc->link.scrambler_dis = 0;
+	/* read assr */
+	ret = drm_dp_dpcd_readb(&tc->aux, DP_EDP_CONFIGURATION_SET, tmp);
+	if (ret < 0)
+		goto err_dpcd_read;
+	tc->link.assr = tmp[0] & DP_ALTERNATE_SCRAMBLER_RESET_ENABLE;
+
+	dev_dbg(tc->dev, "DPCD rev: %d.%d, rate: %s, lanes: %d, framing: %s\n",
+		tc->link.base.revision >> 4, tc->link.base.revision & 0x0f,
+		(tc->link.base.rate == 162000) ? "1.62Gbps" : "2.7Gbps",
+		tc->link.base.num_lanes,
+		(tc->link.base.capabilities & DP_LINK_CAP_ENHANCED_FRAMING) ?
+		"enhanced" : "non-enhanced");
+	dev_dbg(tc->dev, "ANSI 8B/10B: %d\n", tc->link.coding8b10b);
+	dev_dbg(tc->dev, "Display ASSR: %d, TC358767 ASSR: %d\n",
+		tc->link.assr, tc->assr);
+
+	return 0;
+
+err_dpcd_read:
+	dev_err(tc->dev, "failed to read DPCD: %d\n", ret);
+	return ret;
+err_dpcd_inval:
+	dev_err(tc->dev, "invalid DPCD\n");
+	return -EINVAL;
+}
+
+static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode)
+{
+	int ret;
+	int vid_sync_dly;
+	int max_tu_symbol;
+
+	int left_margin = mode->htotal - mode->hsync_end;
+	int right_margin = mode->hsync_start - mode->hdisplay;
+	int hsync_len = mode->hsync_end - mode->hsync_start;
+	int upper_margin = mode->vtotal - mode->vsync_end;
+	int lower_margin = mode->vsync_start - mode->vdisplay;
+	int vsync_len = mode->vsync_end - mode->vsync_start;
+
+	dev_dbg(tc->dev, "set mode %dx%d\n",
+		mode->hdisplay, mode->vdisplay);
+	dev_dbg(tc->dev, "H margin %d,%d sync %d\n",
+		left_margin, right_margin, hsync_len);
+	dev_dbg(tc->dev, "V margin %d,%d sync %d\n",
+		upper_margin, lower_margin, vsync_len);
+	dev_dbg(tc->dev, "total: %dx%d\n", mode->htotal, mode->vtotal);
+
+
+	/* LCD Ctl Frame Size */
+	tc_write(VPCTRL0, (0x40 << 20) /* VSDELAY */ |
+		 OPXLFMT_RGB888 | FRMSYNC_DISABLED | MSF_DISABLED);
+	tc_write(HTIM01, (left_margin << 16) |		/* H back porch */
+			 (hsync_len << 0));		/* Hsync */
+	tc_write(HTIM02, (right_margin << 16) |		/* H front porch */
+			 (mode->hdisplay << 0));	/* width */
+	tc_write(VTIM01, (upper_margin << 16) |		/* V back porch */
+			 (vsync_len << 0));		/* Vsync */
+	tc_write(VTIM02, (lower_margin << 16) |		/* V front porch */
+			 (mode->vdisplay << 0));	/* height */
+	tc_write(VFUEN0, VFUEN);		/* update settings */
+
+	/* Test pattern settings */
+	tc_write(TSTCTL,
+		 (120 << 24) |	/* Red Color component value */
+		 (20 << 16) |	/* Green Color component value */
+		 (99 << 8) |	/* Blue Color component value */
+		 (1 << 4) |	/* Enable I2C Filter */
+		 (2 << 0) |	/* Color bar Mode */
+		 0);
+
+	/* DP Main Stream Attributes */
+	vid_sync_dly = hsync_len + left_margin + mode->hdisplay;
+	tc_write(DP0_VIDSYNCDELAY,
+		 (0x003e << 16) |	/* thresh_dly */
+		 (vid_sync_dly << 0));
+
+	tc_write(DP0_TOTALVAL, (mode->vtotal << 16) | (mode->htotal));
+
+	tc_write(DP0_STARTVAL,
+		 ((upper_margin + vsync_len) << 16) |
+		 ((left_margin + hsync_len) << 0));
+
+	tc_write(DP0_ACTIVEVAL, (mode->vdisplay << 16) | (mode->hdisplay));
+
+	tc_write(DP0_SYNCVAL, (vsync_len << 16) | (hsync_len << 0));
+
+	tc_write(DPIPXLFMT, VS_POL_ACTIVE_LOW | HS_POL_ACTIVE_LOW |
+		 DE_POL_ACTIVE_HIGH | SUB_CFG_TYPE_CONFIG1 | DPI_BPP_RGB888);
+
+	/*
+	 * Recommended maximum number of symbols transferred in a transfer unit:
+	 * DIV_ROUND_UP((input active video bandwidth in bytes) * tu_size,
+	 *              (output active video bandwidth in bytes))
+	 * Must be less than tu_size.
+	 */
+	max_tu_symbol = TU_SIZE_RECOMMENDED - 1;
+	tc_write(DP0_MISC, (max_tu_symbol << 23) | TU_SIZE_RECOMMENDED | BPC_8);
+
+	return 0;
+err:
+	return ret;
+}
+
+static int tc_link_training(struct tc_data *tc, int pattern)
+{
+	const char * const *errors;
+	u32 srcctrl = tc_srcctrl(tc) | DP0_SRCCTRL_SCRMBLDIS |
+		      DP0_SRCCTRL_AUTOCORRECT;
+	int timeout;
+	int retry;
+	u32 value;
+	int ret;
+
+	if (pattern == DP_TRAINING_PATTERN_1) {
+		srcctrl |= DP0_SRCCTRL_TP1;
+		errors = training_pattern1_errors;
+	} else {
+		srcctrl |= DP0_SRCCTRL_TP2;
+		errors = training_pattern2_errors;
+	}
+
+	/* Set DPCD 0x102 for Training Part 1 or 2 */
+	tc_write(DP0_SNKLTCTRL, DP_LINK_SCRAMBLING_DISABLE | pattern);
+
+	tc_write(DP0_LTLOOPCTRL,
+		 (0x0f << 28) |	/* Defer Iteration Count */
+		 (0x0f << 24) |	/* Loop Iteration Count */
+		 (0x0d << 0));	/* Loop Timer Delay */
+
+	retry = 5;
+	do {
+		/* Set DP0 Training Pattern */
+		tc_write(DP0_SRCCTRL, srcctrl);
+
+		/* Enable DP0 to start Link Training */
+		tc_write(DP0CTL, DP_EN);
+
+		/* wait */
+		timeout = 1000;
+		do {
+			tc_read(DP0_LTSTAT, &value);
+			udelay(1);
+		} while ((!(value & LT_LOOPDONE)) && (--timeout));
+		if (timeout == 0) {
+			dev_err(tc->dev, "Link training timeout!\n");
+		} else {
+			int pattern = (value >> 11) & 0x3;
+			int error = (value >> 8) & 0x7;
+
+			dev_dbg(tc->dev,
+				"Link training phase %d done after %d uS: %s\n",
+				pattern, 1000 - timeout, errors[error]);
+			if (pattern == DP_TRAINING_PATTERN_1 && error == 0)
+				break;
+			if (pattern == DP_TRAINING_PATTERN_2) {
+				value &= LT_CHANNEL1_EQ_BITS |
+					 LT_INTERLANE_ALIGN_DONE |
+					 LT_CHANNEL0_EQ_BITS;
+				/* in case of two lanes */
+				if ((tc->link.base.num_lanes == 2) &&
+				    (value == (LT_CHANNEL1_EQ_BITS |
+					       LT_INTERLANE_ALIGN_DONE |
+					       LT_CHANNEL0_EQ_BITS)))
+					break;
+				/* in case of one line */
+				if ((tc->link.base.num_lanes == 1) &&
+				    (value == (LT_INTERLANE_ALIGN_DONE |
+					       LT_CHANNEL0_EQ_BITS)))
+					break;
+			}
+		}
+		/* restart */
+		tc_write(DP0CTL, 0);
+		usleep_range(10, 20);
+	} while (--retry);
+	if (retry == 0) {
+		dev_err(tc->dev, "Failed to finish training phase %d\n",
+			pattern);
+	}
+
+	return 0;
+err:
+	return ret;
+}
+
+static int tc_main_link_setup(struct tc_data *tc)
+{
+	struct drm_dp_aux *aux = &tc->aux;
+	struct device *dev = tc->dev;
+	unsigned int rate;
+	u32 dp_phy_ctrl;
+	int timeout;
+	bool aligned;
+	bool ready;
+	u32 value;
+	int ret;
+	u8 tmp[8];
+
+	/* display mode should be set at this point */
+	if (!tc->mode)
+		return -EINVAL;
+
+	/* from excel file - DP0_SrcCtrl */
+	tc_write(DP0_SRCCTRL, DP0_SRCCTRL_SCRMBLDIS | DP0_SRCCTRL_EN810B |
+		 DP0_SRCCTRL_LANESKEW | DP0_SRCCTRL_LANES_2 |
+		 DP0_SRCCTRL_BW27 | DP0_SRCCTRL_AUTOCORRECT);
+	/* from excel file - DP1_SrcCtrl */
+	tc_write(0x07a0, 0x00003083);
+
+	rate = clk_get_rate(tc->refclk);
+	switch (rate) {
+	case 38400000:
+		value = REF_FREQ_38M4;
+		break;
+	case 26000000:
+		value = REF_FREQ_26M;
+		break;
+	case 19200000:
+		value = REF_FREQ_19M2;
+		break;
+	case 13000000:
+		value = REF_FREQ_13M;
+		break;
+	default:
+		return -EINVAL;
+	}
+	value |= SYSCLK_SEL_LSCLK | LSCLK_DIV_2;
+	tc_write(SYS_PLLPARAM, value);
+	/* Setup Main Link */
+	dp_phy_ctrl = BGREN | PWR_SW_EN | BIT(2) | PHY_A0_EN |  PHY_M0_EN;
+	tc_write(DP_PHY_CTRL, dp_phy_ctrl);
+	msleep(100);
+
+	/* PLL setup */
+	tc_write(DP0_PLLCTRL, PLLUPDATE | PLLEN);
+	tc_wait_pll_lock(tc);
+
+	tc_write(DP1_PLLCTRL, PLLUPDATE | PLLEN);
+	tc_wait_pll_lock(tc);
+
+	/* PXL PLL setup */
+	if (tc_test_pattern) {
+		ret = tc_pxl_pll_en(tc, clk_get_rate(tc->refclk),
+				    1000 * tc->mode->clock);
+		if (ret)
+			goto err;
+	}
+
+	/* Reset/Enable Main Links */
+	dp_phy_ctrl |= DP_PHY_RST | PHY_M1_RST | PHY_M0_RST;
+	tc_write(DP_PHY_CTRL, dp_phy_ctrl);
+	usleep_range(100, 200);
+	dp_phy_ctrl &= ~(DP_PHY_RST | PHY_M1_RST | PHY_M0_RST);
+	tc_write(DP_PHY_CTRL, dp_phy_ctrl);
+
+	timeout = 1000;
+	do {
+		tc_read(DP_PHY_CTRL, &value);
+		udelay(1);
+	} while ((!(value & PHY_RDY)) && (--timeout));
+
+	if (timeout == 0) {
+		dev_err(dev, "timeout waiting for phy become ready");
+		return -ETIMEDOUT;
+	}
+
+	/* Set misc: 8 bits per color */
+	ret = regmap_update_bits(tc->regmap, DP0_MISC, BPC_8, BPC_8);
+	if (ret)
+		goto err;
+
+	/*
+	 * ASSR mode
+	 * on TC358767 side ASSR configured through strap pin
+	 * seems there is no way to change this setting from SW
+	 *
+	 * check is tc configured for same mode
+	 */
+	if (tc->assr != tc->link.assr) {
+		dev_dbg(dev, "Trying to set display to ASSR: %d\n",
+			tc->assr);
+		/* try to set ASSR on display side */
+		tmp[0] = tc->assr;
+		ret = drm_dp_dpcd_writeb(aux, DP_EDP_CONFIGURATION_SET, tmp[0]);
+		if (ret < 0)
+			goto err_dpcd_read;
+		/* read back */
+		ret = drm_dp_dpcd_readb(aux, DP_EDP_CONFIGURATION_SET, tmp);
+		if (ret < 0)
+			goto err_dpcd_read;
+
+		if (tmp[0] != tc->assr) {
+			dev_warn(dev, "Failed to switch display ASSR to %d, falling back to unscrambled mode\n",
+				 tc->assr);
+			/* trying with disabled scrambler */
+			tc->link.scrambler_dis = 1;
+		}
+	}
+
+	/* Setup Link & DPRx Config for Training */
+	ret = drm_dp_link_configure(aux, &tc->link.base);
+	if (ret < 0)
+		goto err_dpcd_write;
+
+	/* DOWNSPREAD_CTRL */
+	tmp[0] = tc->link.spread ? DP_SPREAD_AMP_0_5 : 0x00;
+	/* MAIN_LINK_CHANNEL_CODING_SET */
+	tmp[1] =  tc->link.coding8b10b ? DP_SET_ANSI_8B10B : 0x00;
+	ret = drm_dp_dpcd_write(aux, DP_DOWNSPREAD_CTRL, tmp, 2);
+	if (ret < 0)
+		goto err_dpcd_write;
+
+	ret = tc_link_training(tc, DP_TRAINING_PATTERN_1);
+	if (ret)
+		goto err;
+
+	ret = tc_link_training(tc, DP_TRAINING_PATTERN_2);
+	if (ret)
+		goto err;
+
+	/* Clear DPCD 0x102 */
+	/* Note: Can Not use DP0_SNKLTCTRL (0x06E4) short cut */
+	tmp[0] = tc->link.scrambler_dis ? DP_LINK_SCRAMBLING_DISABLE : 0x00;
+	ret = drm_dp_dpcd_writeb(aux, DP_TRAINING_PATTERN_SET, tmp[0]);
+	if (ret < 0)
+		goto err_dpcd_write;
+
+	/* Clear Training Pattern, set AutoCorrect Mode = 1 */
+	tc_write(DP0_SRCCTRL, tc_srcctrl(tc) | DP0_SRCCTRL_AUTOCORRECT);
+
+	/* Wait */
+	timeout = 100;
+	do {
+		udelay(1);
+		/* Read DPCD 0x202-0x207 */
+		ret = drm_dp_dpcd_read_link_status(aux, tmp + 2);
+		if (ret < 0)
+			goto err_dpcd_read;
+		ready = (tmp[2] == ((DP_CHANNEL_EQ_BITS << 4) | /* Lane1 */
+				     DP_CHANNEL_EQ_BITS));      /* Lane0 */
+		aligned = tmp[4] & DP_INTERLANE_ALIGN_DONE;
+	} while ((--timeout) && !(ready && aligned));
+
+	if (timeout == 0) {
+		/* Read DPCD 0x200-0x201 */
+		ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT, tmp, 2);
+		if (ret < 0)
+			goto err_dpcd_read;
+		dev_info(dev, "0x0200 SINK_COUNT: 0x%02x\n", tmp[0]);
+		dev_info(dev, "0x0201 DEVICE_SERVICE_IRQ_VECTOR: 0x%02x\n",
+			 tmp[1]);
+		dev_info(dev, "0x0202 LANE0_1_STATUS: 0x%02x\n", tmp[2]);
+		dev_info(dev, "0x0204 LANE_ALIGN_STATUS_UPDATED: 0x%02x\n",
+			 tmp[4]);
+		dev_info(dev, "0x0205 SINK_STATUS: 0x%02x\n", tmp[5]);
+		dev_info(dev, "0x0206 ADJUST_REQUEST_LANE0_1: 0x%02x\n",
+			 tmp[6]);
+
+		if (!ready)
+			dev_err(dev, "Lane0/1 not ready\n");
+		if (!aligned)
+			dev_err(dev, "Lane0/1 not aligned\n");
+		return -EAGAIN;
+	}
+
+	ret = tc_set_video_mode(tc, tc->mode);
+	if (ret)
+		goto err;
+
+	/* Set M/N */
+	ret = tc_stream_clock_calc(tc);
+	if (ret)
+		goto err;
+
+	return 0;
+err_dpcd_read:
+	dev_err(tc->dev, "Failed to read DPCD: %d\n", ret);
+	return ret;
+err_dpcd_write:
+	dev_err(tc->dev, "Failed to write DPCD: %d\n", ret);
+err:
+	return ret;
+}
+
+static int tc_main_link_stream(struct tc_data *tc, int state)
+{
+	int ret;
+	u32 value;
+
+	dev_dbg(tc->dev, "stream: %d\n", state);
+
+	if (state) {
+		value = VID_MN_GEN | DP_EN;
+		if (tc->link.base.capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
+			value |= EF_EN;
+		tc_write(DP0CTL, value);
+		/*
+		 * VID_EN assertion should be delayed by at least N * LSCLK
+		 * cycles from the time VID_MN_GEN is enabled in order to
+		 * generate stable values for VID_M. LSCLK is 270 MHz or
+		 * 162 MHz, VID_N is set to 32768 in  tc_stream_clock_calc(),
+		 * so a delay of at least 203 us should suffice.
+		 */
+		usleep_range(500, 1000);
+		value |= VID_EN;
+		tc_write(DP0CTL, value);
+		/* Set input interface */
+		value = DP0_AUDSRC_NO_INPUT;
+		if (tc_test_pattern)
+			value |= DP0_VIDSRC_COLOR_BAR;
+		else
+			value |= DP0_VIDSRC_DPI_RX;
+		tc_write(SYSCTRL, value);
+	} else {
+		tc_write(DP0CTL, 0);
+	}
+
+	return 0;
+err:
+	return ret;
+}
+
+static enum drm_connector_status
+tc_connector_detect(struct drm_connector *connector, bool force)
+{
+	return connector_status_connected;
+}
+
+static void tc_bridge_pre_enable(struct drm_bridge *bridge)
+{
+	struct tc_data *tc = bridge_to_tc(bridge);
+
+	drm_panel_prepare(tc->panel);
+}
+
+static void tc_bridge_enable(struct drm_bridge *bridge)
+{
+	struct tc_data *tc = bridge_to_tc(bridge);
+	int ret;
+
+	ret = tc_main_link_setup(tc);
+	if (ret < 0) {
+		dev_err(tc->dev, "main link setup error: %d\n", ret);
+		return;
+	}
+
+	ret = tc_main_link_stream(tc, 1);
+	if (ret < 0) {
+		dev_err(tc->dev, "main link stream start error: %d\n", ret);
+		return;
+	}
+
+	drm_panel_enable(tc->panel);
+}
+
+static void tc_bridge_disable(struct drm_bridge *bridge)
+{
+	struct tc_data *tc = bridge_to_tc(bridge);
+	int ret;
+
+	drm_panel_disable(tc->panel);
+
+	ret = tc_main_link_stream(tc, 0);
+	if (ret < 0)
+		dev_err(tc->dev, "main link stream stop error: %d\n", ret);
+}
+
+static void tc_bridge_post_disable(struct drm_bridge *bridge)
+{
+	struct tc_data *tc = bridge_to_tc(bridge);
+
+	drm_panel_unprepare(tc->panel);
+}
+
+static bool tc_bridge_mode_fixup(struct drm_bridge *bridge,
+				 const struct drm_display_mode *mode,
+				 struct drm_display_mode *adj)
+{
+	/* Fixup sync polarities, both hsync and vsync are active low */
+	adj->flags = mode->flags;
+	adj->flags |= (DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC);
+	adj->flags &= ~(DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC);
+
+	return true;
+}
+
+static int tc_connector_mode_valid(struct drm_connector *connector,
+				   struct drm_display_mode *mode)
+{
+	/* Accept any mode */
+	return MODE_OK;
+}
+
+static void tc_bridge_mode_set(struct drm_bridge *bridge,
+			       struct drm_display_mode *mode,
+			       struct drm_display_mode *adj)
+{
+	struct tc_data *tc = bridge_to_tc(bridge);
+
+	tc->mode = mode;
+}
+
+static int tc_connector_get_modes(struct drm_connector *connector)
+{
+	struct tc_data *tc = connector_to_tc(connector);
+	struct edid *edid;
+	unsigned int count;
+
+	if (tc->panel && tc->panel->funcs && tc->panel->funcs->get_modes) {
+		count = tc->panel->funcs->get_modes(tc->panel);
+		if (count > 0)
+			return count;
+	}
+
+	edid = drm_get_edid(connector, &tc->aux.ddc);
+
+	kfree(tc->edid);
+	tc->edid = edid;
+	if (!edid)
+		return 0;
+
+	drm_mode_connector_update_edid_property(connector, edid);
+	count = drm_add_edid_modes(connector, edid);
+
+	return count;
+}
+
+static void tc_connector_set_polling(struct tc_data *tc,
+				     struct drm_connector *connector)
+{
+	/* TODO: add support for HPD */
+	connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+			    DRM_CONNECTOR_POLL_DISCONNECT;
+}
+
+static struct drm_encoder *
+tc_connector_best_encoder(struct drm_connector *connector)
+{
+	struct tc_data *tc = connector_to_tc(connector);
+
+	return tc->bridge.encoder;
+}
+
+static const struct drm_connector_helper_funcs tc_connector_helper_funcs = {
+	.get_modes = tc_connector_get_modes,
+	.mode_valid = tc_connector_mode_valid,
+	.best_encoder = tc_connector_best_encoder,
+};
+
+static void tc_connector_destroy(struct drm_connector *connector)
+{
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+}
+
+static const struct drm_connector_funcs tc_connector_funcs = {
+	.dpms = drm_atomic_helper_connector_dpms,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.detect = tc_connector_detect,
+	.destroy = tc_connector_destroy,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static int tc_bridge_attach(struct drm_bridge *bridge)
+{
+	u32 bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+	struct tc_data *tc = bridge_to_tc(bridge);
+	struct drm_device *drm = bridge->dev;
+	int ret;
+
+	/* Create eDP connector */
+	drm_connector_helper_add(&tc->connector, &tc_connector_helper_funcs);
+	ret = drm_connector_init(drm, &tc->connector, &tc_connector_funcs,
+				 DRM_MODE_CONNECTOR_eDP);
+	if (ret)
+		return ret;
+
+	if (tc->panel)
+		drm_panel_attach(tc->panel, &tc->connector);
+
+	drm_display_info_set_bus_formats(&tc->connector.display_info,
+					 &bus_format, 1);
+	drm_mode_connector_attach_encoder(&tc->connector, tc->bridge.encoder);
+
+	return 0;
+}
+
+static const struct drm_bridge_funcs tc_bridge_funcs = {
+	.attach = tc_bridge_attach,
+	.mode_set = tc_bridge_mode_set,
+	.pre_enable = tc_bridge_pre_enable,
+	.enable = tc_bridge_enable,
+	.disable = tc_bridge_disable,
+	.post_disable = tc_bridge_post_disable,
+	.mode_fixup = tc_bridge_mode_fixup,
+};
+
+static bool tc_readable_reg(struct device *dev, unsigned int reg)
+{
+	return reg != SYSCTRL;
+}
+
+static const struct regmap_range tc_volatile_ranges[] = {
+	regmap_reg_range(DP0_AUXWDATA(0), DP0_AUXSTATUS),
+	regmap_reg_range(DP0_LTSTAT, DP0_SNKLTCHGREQ),
+	regmap_reg_range(DP_PHY_CTRL, DP_PHY_CTRL),
+	regmap_reg_range(DP0_PLLCTRL, PXL_PLLCTRL),
+	regmap_reg_range(VFUEN0, VFUEN0),
+};
+
+static const struct regmap_access_table tc_volatile_table = {
+	.yes_ranges = tc_volatile_ranges,
+	.n_yes_ranges = ARRAY_SIZE(tc_volatile_ranges),
+};
+
+static bool tc_writeable_reg(struct device *dev, unsigned int reg)
+{
+	return (reg != TC_IDREG) &&
+	       (reg != DP0_LTSTAT) &&
+	       (reg != DP0_SNKLTCHGREQ);
+}
+
+static const struct regmap_config tc_regmap_config = {
+	.name = "tc358767",
+	.reg_bits = 16,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.max_register = PLL_DBG,
+	.cache_type = REGCACHE_RBTREE,
+	.readable_reg = tc_readable_reg,
+	.volatile_table = &tc_volatile_table,
+	.writeable_reg = tc_writeable_reg,
+	.reg_format_endian = REGMAP_ENDIAN_BIG,
+	.val_format_endian = REGMAP_ENDIAN_LITTLE,
+};
+
+static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id)
+{
+	struct device *dev = &client->dev;
+	struct device_node *ep;
+	struct tc_data *tc;
+	int ret;
+
+	tc = devm_kzalloc(dev, sizeof(*tc), GFP_KERNEL);
+	if (!tc)
+		return -ENOMEM;
+
+	tc->dev = dev;
+
+	/* port@2 is the output port */
+	ep = of_graph_get_endpoint_by_regs(dev->of_node, 2, -1);
+	if (ep) {
+		struct device_node *remote;
+
+		remote = of_graph_get_remote_port_parent(ep);
+		if (!remote) {
+			dev_warn(dev, "endpoint %s not connected\n",
+				 ep->full_name);
+			of_node_put(ep);
+			return -ENODEV;
+		}
+		of_node_put(ep);
+		tc->panel = of_drm_find_panel(remote);
+		if (tc->panel) {
+			dev_dbg(dev, "found panel %s\n", remote->full_name);
+		} else {
+			dev_dbg(dev, "waiting for panel %s\n",
+				remote->full_name);
+			of_node_put(remote);
+			return -EPROBE_DEFER;
+		}
+		of_node_put(remote);
+	}
+
+	/* Shut down GPIO is optional */
+	tc->sd_gpio = devm_gpiod_get_optional(dev, "shutdown", GPIOD_OUT_HIGH);
+	if (IS_ERR(tc->sd_gpio))
+		return PTR_ERR(tc->sd_gpio);
+
+	if (tc->sd_gpio) {
+		gpiod_set_value_cansleep(tc->sd_gpio, 0);
+		usleep_range(5000, 10000);
+	}
+
+	/* Reset GPIO is optional */
+	tc->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(tc->reset_gpio))
+		return PTR_ERR(tc->reset_gpio);
+
+	if (tc->reset_gpio) {
+		gpiod_set_value_cansleep(tc->reset_gpio, 1);
+		usleep_range(5000, 10000);
+	}
+
+	tc->refclk = devm_clk_get(dev, "ref");
+	if (IS_ERR(tc->refclk)) {
+		ret = PTR_ERR(tc->refclk);
+		dev_err(dev, "Failed to get refclk: %d\n", ret);
+		return ret;
+	}
+
+	tc->regmap = devm_regmap_init_i2c(client, &tc_regmap_config);
+	if (IS_ERR(tc->regmap)) {
+		ret = PTR_ERR(tc->regmap);
+		dev_err(dev, "Failed to initialize regmap: %d\n", ret);
+		return ret;
+	}
+
+	ret = regmap_read(tc->regmap, TC_IDREG, &tc->rev);
+	if (ret) {
+		dev_err(tc->dev, "can not read device ID: %d\n", ret);
+		return ret;
+	}
+
+	if ((tc->rev != 0x6601) && (tc->rev != 0x6603)) {
+		dev_err(tc->dev, "invalid device ID: 0x%08x\n", tc->rev);
+		return -EINVAL;
+	}
+
+	tc->assr = (tc->rev == 0x6601); /* Enable ASSR for eDP panels */
+
+	ret = tc_aux_link_setup(tc);
+	if (ret)
+		return ret;
+
+	/* Register DP AUX channel */
+	tc->aux.name = "TC358767 AUX i2c adapter";
+	tc->aux.dev = tc->dev;
+	tc->aux.transfer = tc_aux_transfer;
+	ret = drm_dp_aux_register(&tc->aux);
+	if (ret)
+		return ret;
+
+	ret = tc_get_display_props(tc);
+	if (ret)
+		goto err_unregister_aux;
+
+	tc_connector_set_polling(tc, &tc->connector);
+
+	tc->bridge.funcs = &tc_bridge_funcs;
+	tc->bridge.of_node = dev->of_node;
+	ret = drm_bridge_add(&tc->bridge);
+	if (ret) {
+		dev_err(dev, "Failed to add drm_bridge: %d\n", ret);
+		goto err_unregister_aux;
+	}
+
+	i2c_set_clientdata(client, tc);
+
+	return 0;
+err_unregister_aux:
+	drm_dp_aux_unregister(&tc->aux);
+	return ret;
+}
+
+static int tc_remove(struct i2c_client *client)
+{
+	struct tc_data *tc = i2c_get_clientdata(client);
+
+	drm_bridge_remove(&tc->bridge);
+	drm_dp_aux_unregister(&tc->aux);
+
+	tc_pxl_pll_dis(tc);
+
+	return 0;
+}
+
+static const struct i2c_device_id tc358767_i2c_ids[] = {
+	{ "tc358767", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, tc358767_i2c_ids);
+
+static const struct of_device_id tc358767_of_ids[] = {
+	{ .compatible = "toshiba,tc358767", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, tc358767_of_ids);
+
+static struct i2c_driver tc358767_driver = {
+	.driver = {
+		.name = "tc358767",
+		.of_match_table = tc358767_of_ids,
+	},
+	.id_table = tc358767_i2c_ids,
+	.probe = tc_probe,
+	.remove	= tc_remove,
+};
+module_i2c_driver(tc358767_driver);
+
+MODULE_AUTHOR("Andrey Gusakov <andrey.gusakov@cogentembedded.com>");
+MODULE_DESCRIPTION("tc358767 eDP encoder driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/cirrus/Kconfig b/drivers/gpu/drm/cirrus/Kconfig
index 9864559e5fb9..04b3c161dfae 100644
--- a/drivers/gpu/drm/cirrus/Kconfig
+++ b/drivers/gpu/drm/cirrus/Kconfig
@@ -1,11 +1,7 @@
 config DRM_CIRRUS_QEMU
 	tristate "Cirrus driver for QEMU emulated device"
 	depends on DRM && PCI
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_TTM
 	help
 	 This is a KMS driver for emulated cirrus device in qemu.
diff --git a/drivers/gpu/drm/cirrus/cirrus_main.c b/drivers/gpu/drm/cirrus/cirrus_main.c
index 32d32c5b7b17..80446e2d3ab6 100644
--- a/drivers/gpu/drm/cirrus/cirrus_main.c
+++ b/drivers/gpu/drm/cirrus/cirrus_main.c
@@ -17,8 +17,8 @@
 static void cirrus_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct cirrus_framebuffer *cirrus_fb = to_cirrus_framebuffer(fb);
-	if (cirrus_fb->obj)
-		drm_gem_object_unreference_unlocked(cirrus_fb->obj);
+
+	drm_gem_object_unreference_unlocked(cirrus_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(fb);
 }
diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c
index 6768b7b1af32..1cc9ee607128 100644
--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
+++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
@@ -186,17 +186,6 @@ static void cirrus_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
 {
 }
 
-static int cirrus_bo_move(struct ttm_buffer_object *bo,
-		       bool evict, bool interruptible,
-		       bool no_wait_gpu,
-		       struct ttm_mem_reg *new_mem)
-{
-	int r;
-	r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
-	return r;
-}
-
-
 static void cirrus_ttm_backend_destroy(struct ttm_tt *tt)
 {
 	ttm_tt_fini(tt);
@@ -241,7 +230,7 @@ struct ttm_bo_driver cirrus_bo_driver = {
 	.ttm_tt_unpopulate = cirrus_ttm_tt_unpopulate,
 	.init_mem_type = cirrus_bo_init_mem_type,
 	.evict_flags = cirrus_bo_evict_flags,
-	.move = cirrus_bo_move,
+	.move = NULL,
 	.verify_access = cirrus_bo_verify_access,
 	.io_mem_reserve = &cirrus_ttm_io_mem_reserve,
 	.io_mem_free = &cirrus_ttm_io_mem_free,
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index d99ab2f6663f..8d2f111fa113 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -404,8 +404,7 @@ drm_atomic_replace_property_blob(struct drm_property_blob **blob,
 	if (old_blob == new_blob)
 		return;
 
-	if (old_blob)
-		drm_property_unreference_blob(old_blob);
+	drm_property_unreference_blob(old_blob);
 	if (new_blob)
 		drm_property_reference_blob(new_blob);
 	*blob = new_blob;
@@ -1299,14 +1298,39 @@ EXPORT_SYMBOL(drm_atomic_add_affected_planes);
  */
 void drm_atomic_legacy_backoff(struct drm_atomic_state *state)
 {
+	struct drm_device *dev = state->dev;
+	unsigned crtc_mask = 0;
+	struct drm_crtc *crtc;
 	int ret;
+	bool global = false;
+
+	drm_for_each_crtc(crtc, dev) {
+		if (crtc->acquire_ctx != state->acquire_ctx)
+			continue;
+
+		crtc_mask |= drm_crtc_mask(crtc);
+		crtc->acquire_ctx = NULL;
+	}
+
+	if (WARN_ON(dev->mode_config.acquire_ctx == state->acquire_ctx)) {
+		global = true;
+
+		dev->mode_config.acquire_ctx = NULL;
+	}
 
 retry:
 	drm_modeset_backoff(state->acquire_ctx);
 
-	ret = drm_modeset_lock_all_ctx(state->dev, state->acquire_ctx);
+	ret = drm_modeset_lock_all_ctx(dev, state->acquire_ctx);
 	if (ret)
 		goto retry;
+
+	drm_for_each_crtc(crtc, dev)
+		if (drm_crtc_mask(crtc) & crtc_mask)
+			crtc->acquire_ctx = state->acquire_ctx;
+
+	if (global)
+		dev->mode_config.acquire_ctx = state->acquire_ctx;
 }
 EXPORT_SYMBOL(drm_atomic_legacy_backoff);
 
diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
index 059f7c39c582..a7916e5f8864 100644
--- a/drivers/gpu/drm/drm_cache.c
+++ b/drivers/gpu/drm/drm_cache.c
@@ -136,6 +136,7 @@ drm_clflush_virt_range(void *addr, unsigned long length)
 		mb();
 		for (; addr < end; addr += size)
 			clflushopt(addr);
+		clflushopt(end - 1); /* force serialisation */
 		mb();
 		return;
 	}
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index fd93e9c79d28..f1d9f0569d7f 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -396,6 +396,51 @@ void drm_mode_object_reference(struct drm_mode_object *obj)
 }
 EXPORT_SYMBOL(drm_mode_object_reference);
 
+/**
+ * drm_crtc_force_disable - Forcibly turn off a CRTC
+ * @crtc: CRTC to turn off
+ *
+ * Returns:
+ * Zero on success, error code on failure.
+ */
+int drm_crtc_force_disable(struct drm_crtc *crtc)
+{
+	struct drm_mode_set set = {
+		.crtc = crtc,
+	};
+
+	return drm_mode_set_config_internal(&set);
+}
+EXPORT_SYMBOL(drm_crtc_force_disable);
+
+/**
+ * drm_crtc_force_disable_all - Forcibly turn off all enabled CRTCs
+ * @dev: DRM device whose CRTCs to turn off
+ *
+ * Drivers may want to call this on unload to ensure that all displays are
+ * unlit and the GPU is in a consistent, low power state. Takes modeset locks.
+ *
+ * Returns:
+ * Zero on success, error code on failure.
+ */
+int drm_crtc_force_disable_all(struct drm_device *dev)
+{
+	struct drm_crtc *crtc;
+	int ret = 0;
+
+	drm_modeset_lock_all(dev);
+	drm_for_each_crtc(crtc, dev)
+		if (crtc->enabled) {
+			ret = drm_crtc_force_disable(crtc);
+			if (ret)
+				goto out;
+		}
+out:
+	drm_modeset_unlock_all(dev);
+	return ret;
+}
+EXPORT_SYMBOL(drm_crtc_force_disable_all);
+
 static void drm_framebuffer_free(struct kref *kref)
 {
 	struct drm_framebuffer *fb =
@@ -544,8 +589,6 @@ void drm_framebuffer_remove(struct drm_framebuffer *fb)
 	struct drm_device *dev;
 	struct drm_crtc *crtc;
 	struct drm_plane *plane;
-	struct drm_mode_set set;
-	int ret;
 
 	if (!fb)
 		return;
@@ -575,11 +618,7 @@ void drm_framebuffer_remove(struct drm_framebuffer *fb)
 		drm_for_each_crtc(crtc, dev) {
 			if (crtc->primary->fb == fb) {
 				/* should turn off the crtc */
-				memset(&set, 0, sizeof(struct drm_mode_set));
-				set.crtc = crtc;
-				set.fb = NULL;
-				ret = drm_mode_set_config_internal(&set);
-				if (ret)
+				if (drm_crtc_force_disable(crtc))
 					DRM_ERROR("failed to reset crtc %p when fb was deleted\n", crtc);
 			}
 		}
@@ -889,11 +928,11 @@ int drm_connector_init(struct drm_device *dev,
 	connector->dev = dev;
 	connector->funcs = funcs;
 
-	connector->connector_id = ida_simple_get(&config->connector_ida, 0, 0, GFP_KERNEL);
-	if (connector->connector_id < 0) {
-		ret = connector->connector_id;
+	ret = ida_simple_get(&config->connector_ida, 0, 0, GFP_KERNEL);
+	if (ret < 0)
 		goto out_put;
-	}
+	connector->index = ret;
+	ret = 0;
 
 	connector->connector_type = connector_type;
 	connector->connector_type_id =
@@ -941,7 +980,7 @@ out_put_type_id:
 		ida_remove(connector_ida, connector->connector_type_id);
 out_put_id:
 	if (ret)
-		ida_remove(&config->connector_ida, connector->connector_id);
+		ida_remove(&config->connector_ida, connector->index);
 out_put:
 	if (ret)
 		drm_mode_object_unregister(dev, &connector->base);
@@ -985,7 +1024,7 @@ void drm_connector_cleanup(struct drm_connector *connector)
 		   connector->connector_type_id);
 
 	ida_remove(&dev->mode_config.connector_ida,
-		   connector->connector_id);
+		   connector->index);
 
 	kfree(connector->display_info.bus_formats);
 	drm_mode_object_unregister(dev, &connector->base);
@@ -1068,23 +1107,16 @@ void drm_connector_unregister(struct drm_connector *connector)
 }
 EXPORT_SYMBOL(drm_connector_unregister);
 
-/**
- * drm_connector_register_all - register all connectors
- * @dev: drm device
- *
- * This function registers all connectors in sysfs and other places so that
- * userspace can start to access them. drm_connector_register_all() is called
- * automatically from drm_dev_register() to complete the device registration,
- * if they don't call drm_connector_register() on each connector individually.
- *
- * When a device is unplugged and should be removed from userspace access,
- * call drm_connector_unregister_all(), which is the inverse of this
- * function.
- *
- * Returns:
- * Zero on success, error code on failure.
- */
-int drm_connector_register_all(struct drm_device *dev)
+static void drm_connector_unregister_all(struct drm_device *dev)
+{
+	struct drm_connector *connector;
+
+	/* FIXME: taking the mode config mutex ends up in a clash with sysfs */
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
+		drm_connector_unregister(connector);
+}
+
+static int drm_connector_register_all(struct drm_device *dev)
 {
 	struct drm_connector *connector;
 	int ret;
@@ -1106,27 +1138,6 @@ err:
 	drm_connector_unregister_all(dev);
 	return ret;
 }
-EXPORT_SYMBOL(drm_connector_register_all);
-
-/**
- * drm_connector_unregister_all - unregister connector userspace interfaces
- * @dev: drm device
- *
- * This functions unregisters all connectors from sysfs and other places so
- * that userspace can no longer access them. Drivers should call this as the
- * first step tearing down the device instace, or when the underlying
- * physical device disappeared (e.g. USB unplug), right before calling
- * drm_dev_unregister().
- */
-void drm_connector_unregister_all(struct drm_device *dev)
-{
-	struct drm_connector *connector;
-
-	/* FIXME: taking the mode config mutex ends up in a clash with sysfs */
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
-		drm_connector_unregister(connector);
-}
-EXPORT_SYMBOL(drm_connector_unregister_all);
 
 static int drm_encoder_register_all(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index d61591274ff6..604d3ef72ffa 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -531,11 +531,11 @@ drm_crtc_helper_disable(struct drm_crtc *crtc)
 int drm_crtc_helper_set_config(struct drm_mode_set *set)
 {
 	struct drm_device *dev;
-	struct drm_crtc *new_crtc;
-	struct drm_encoder *save_encoders, *new_encoder, *encoder;
+	struct drm_crtc **save_encoder_crtcs, *new_crtc;
+	struct drm_encoder **save_connector_encoders, *new_encoder, *encoder;
 	bool mode_changed = false; /* if true do a full mode set */
 	bool fb_changed = false; /* if true and !mode_changed just do a flip */
-	struct drm_connector *save_connectors, *connector;
+	struct drm_connector *connector;
 	int count = 0, ro, fail = 0;
 	const struct drm_crtc_helper_funcs *crtc_funcs;
 	struct drm_mode_set save_set;
@@ -577,15 +577,15 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 	 * Allocate space for the backup of all (non-pointer) encoder and
 	 * connector data.
 	 */
-	save_encoders = kzalloc(dev->mode_config.num_encoder *
-				sizeof(struct drm_encoder), GFP_KERNEL);
-	if (!save_encoders)
+	save_encoder_crtcs = kzalloc(dev->mode_config.num_encoder *
+				sizeof(struct drm_crtc *), GFP_KERNEL);
+	if (!save_encoder_crtcs)
 		return -ENOMEM;
 
-	save_connectors = kzalloc(dev->mode_config.num_connector *
-				sizeof(struct drm_connector), GFP_KERNEL);
-	if (!save_connectors) {
-		kfree(save_encoders);
+	save_connector_encoders = kzalloc(dev->mode_config.num_connector *
+				sizeof(struct drm_encoder *), GFP_KERNEL);
+	if (!save_connector_encoders) {
+		kfree(save_encoder_crtcs);
 		return -ENOMEM;
 	}
 
@@ -596,12 +596,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 	 */
 	count = 0;
 	drm_for_each_encoder(encoder, dev) {
-		save_encoders[count++] = *encoder;
+		save_encoder_crtcs[count++] = encoder->crtc;
 	}
 
 	count = 0;
 	drm_for_each_connector(connector, dev) {
-		save_connectors[count++] = *connector;
+		save_connector_encoders[count++] = connector->encoder;
 	}
 
 	save_set.crtc = set->crtc;
@@ -634,8 +634,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 		mode_changed = true;
 	}
 
-	/* take a reference on all connectors in set */
+	/* take a reference on all unbound connectors in set, reuse the
+	 * already taken reference for bound connectors
+	 */
 	for (ro = 0; ro < set->num_connectors; ro++) {
+		if (set->connectors[ro]->encoder)
+			continue;
 		drm_connector_reference(set->connectors[ro]);
 	}
 
@@ -757,30 +761,28 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 		}
 	}
 
-	/* after fail drop reference on all connectors in save set */
-	count = 0;
-	drm_for_each_connector(connector, dev) {
-		drm_connector_unreference(&save_connectors[count++]);
-	}
-
-	kfree(save_connectors);
-	kfree(save_encoders);
+	kfree(save_connector_encoders);
+	kfree(save_encoder_crtcs);
 	return 0;
 
 fail:
 	/* Restore all previous data. */
 	count = 0;
 	drm_for_each_encoder(encoder, dev) {
-		*encoder = save_encoders[count++];
+		encoder->crtc = save_encoder_crtcs[count++];
 	}
 
 	count = 0;
 	drm_for_each_connector(connector, dev) {
-		*connector = save_connectors[count++];
+		connector->encoder = save_connector_encoders[count++];
 	}
 
-	/* after fail drop reference on all connectors in set */
+	/* after fail drop reference on all unbound connectors in set, let
+	 * bound connectors keep their reference
+	 */
 	for (ro = 0; ro < set->num_connectors; ro++) {
+		if (set->connectors[ro]->encoder)
+			continue;
 		drm_connector_unreference(set->connectors[ro]);
 	}
 
@@ -790,8 +792,8 @@ fail:
 				      save_set.y, save_set.fb))
 		DRM_ERROR("failed to restore config after modeset failure\n");
 
-	kfree(save_connectors);
-	kfree(save_encoders);
+	kfree(save_connector_encoders);
+	kfree(save_encoder_crtcs);
 	return ret;
 }
 EXPORT_SYMBOL(drm_crtc_helper_set_config);
diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
index 3334baacf43d..734f86a345f6 100644
--- a/drivers/gpu/drm/drm_dp_aux_dev.c
+++ b/drivers/gpu/drm/drm_dp_aux_dev.c
@@ -355,8 +355,7 @@ int drm_dp_aux_dev_init(void)
 
 	drm_dp_aux_dev_class = class_create(THIS_MODULE, "drm_dp_aux_dev");
 	if (IS_ERR(drm_dp_aux_dev_class)) {
-		res = PTR_ERR(drm_dp_aux_dev_class);
-		goto out;
+		return PTR_ERR(drm_dp_aux_dev_class);
 	}
 	drm_dp_aux_dev_class->dev_groups = drm_dp_aux_groups;
 
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 091053e995e5..8f11b8741e42 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -203,7 +203,7 @@ static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
 
 		ret = aux->transfer(aux, &msg);
 
-		if (ret > 0) {
+		if (ret >= 0) {
 			native_reply = msg.reply & DP_AUX_NATIVE_REPLY_MASK;
 			if (native_reply == DP_AUX_NATIVE_REPLY_ACK) {
 				if (ret == size)
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index a13edf5de2d6..04e457117980 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1493,11 +1493,8 @@ static void process_single_down_tx_qlock(struct drm_dp_mst_topology_mgr *mgr)
 	WARN_ON(!mutex_is_locked(&mgr->qlock));
 
 	/* construct a chunk from the first msg in the tx_msg queue */
-	if (list_empty(&mgr->tx_msg_downq)) {
-		mgr->tx_down_in_progress = false;
+	if (list_empty(&mgr->tx_msg_downq))
 		return;
-	}
-	mgr->tx_down_in_progress = true;
 
 	txmsg = list_first_entry(&mgr->tx_msg_downq, struct drm_dp_sideband_msg_tx, next);
 	ret = process_single_tx_qlock(mgr, txmsg, false);
@@ -1512,10 +1509,6 @@ static void process_single_down_tx_qlock(struct drm_dp_mst_topology_mgr *mgr)
 		txmsg->state = DRM_DP_SIDEBAND_TX_TIMEOUT;
 		wake_up(&mgr->tx_waitq);
 	}
-	if (list_empty(&mgr->tx_msg_downq)) {
-		mgr->tx_down_in_progress = false;
-		return;
-	}
 }
 
 /* called holding qlock */
@@ -1538,7 +1531,7 @@ static void drm_dp_queue_down_tx(struct drm_dp_mst_topology_mgr *mgr,
 {
 	mutex_lock(&mgr->qlock);
 	list_add_tail(&txmsg->next, &mgr->tx_msg_downq);
-	if (!mgr->tx_down_in_progress)
+	if (list_is_singular(&mgr->tx_msg_downq))
 		process_single_down_tx_qlock(mgr);
 	mutex_unlock(&mgr->qlock);
 }
@@ -2372,6 +2365,7 @@ EXPORT_SYMBOL(drm_dp_mst_hpd_irq);
 
 /**
  * drm_dp_mst_detect_port() - get connection status for an MST port
+ * @connector: DRM connector for this port
  * @mgr: manager for this port
  * @port: unverified pointer to a port
  *
@@ -2887,7 +2881,7 @@ static void drm_dp_tx_work(struct work_struct *work)
 	struct drm_dp_mst_topology_mgr *mgr = container_of(work, struct drm_dp_mst_topology_mgr, tx_work);
 
 	mutex_lock(&mgr->qlock);
-	if (mgr->tx_down_in_progress)
+	if (!list_empty(&mgr->tx_msg_downq))
 		process_single_down_tx_qlock(mgr);
 	mutex_unlock(&mgr->qlock);
 }
@@ -2927,11 +2921,9 @@ static void drm_dp_destroy_connector_work(struct work_struct *work)
 		drm_dp_port_teardown_pdt(port, port->pdt);
 
 		if (!port->input && port->vcpi.vcpi > 0) {
-			if (mgr->mst_state) {
-				drm_dp_mst_reset_vcpi_slots(mgr, port);
-				drm_dp_update_payload_part1(mgr);
-				drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
-			}
+			drm_dp_mst_reset_vcpi_slots(mgr, port);
+			drm_dp_update_payload_part1(mgr);
+			drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
 		}
 
 		kref_put(&port->kref, drm_dp_free_mst_port);
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index aead9ffcbe29..be27ed36f56e 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -362,9 +362,7 @@ EXPORT_SYMBOL(drm_put_dev);
 void drm_unplug_dev(struct drm_device *dev)
 {
 	/* for a USB device */
-	drm_minor_unregister(dev, DRM_MINOR_LEGACY);
-	drm_minor_unregister(dev, DRM_MINOR_RENDER);
-	drm_minor_unregister(dev, DRM_MINOR_CONTROL);
+	drm_dev_unregister(dev);
 
 	mutex_lock(&drm_global_mutex);
 
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index c0b0c718994a..1fd6eac1400c 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -596,3 +596,18 @@ void drm_fbdev_cma_hotplug_event(struct drm_fbdev_cma *fbdev_cma)
 		drm_fb_helper_hotplug_event(&fbdev_cma->fb_helper);
 }
 EXPORT_SYMBOL_GPL(drm_fbdev_cma_hotplug_event);
+
+/**
+ * drm_fbdev_cma_set_suspend - wrapper around drm_fb_helper_set_suspend
+ * @fbdev_cma: The drm_fbdev_cma struct, may be NULL
+ * @state: desired state, zero to resume, non-zero to suspend
+ *
+ * Calls drm_fb_helper_set_suspend, which is a wrapper around
+ * fb_set_suspend implemented by fbdev core.
+ */
+void drm_fbdev_cma_set_suspend(struct drm_fbdev_cma *fbdev_cma, int state)
+{
+	if (fbdev_cma)
+		drm_fb_helper_set_suspend(&fbdev_cma->fb_helper, state);
+}
+EXPORT_SYMBOL(drm_fbdev_cma_set_suspend);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 1f84ff5f1bf8..33af4a5ddca1 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -648,7 +648,7 @@ long drm_ioctl(struct file *filp,
 	int retcode = -EINVAL;
 	char stack_kdata[128];
 	char *kdata = NULL;
-	unsigned int usize, asize, drv_size;
+	unsigned int in_size, out_size, drv_size, ksize;
 	bool is_driver_ioctl;
 
 	dev = file_priv->minor->dev;
@@ -671,9 +671,12 @@ long drm_ioctl(struct file *filp,
 	}
 
 	drv_size = _IOC_SIZE(ioctl->cmd);
-	usize = _IOC_SIZE(cmd);
-	asize = max(usize, drv_size);
-	cmd = ioctl->cmd;
+	out_size = in_size = _IOC_SIZE(cmd);
+	if ((cmd & ioctl->cmd & IOC_IN) == 0)
+		in_size = 0;
+	if ((cmd & ioctl->cmd & IOC_OUT) == 0)
+		out_size = 0;
+	ksize = max(max(in_size, out_size), drv_size);
 
 	DRM_DEBUG("pid=%d, dev=0x%lx, auth=%d, %s\n",
 		  task_pid_nr(current),
@@ -693,30 +696,24 @@ long drm_ioctl(struct file *filp,
 	if (unlikely(retcode))
 		goto err_i1;
 
-	if (cmd & (IOC_IN | IOC_OUT)) {
-		if (asize <= sizeof(stack_kdata)) {
-			kdata = stack_kdata;
-		} else {
-			kdata = kmalloc(asize, GFP_KERNEL);
-			if (!kdata) {
-				retcode = -ENOMEM;
-				goto err_i1;
-			}
+	if (ksize <= sizeof(stack_kdata)) {
+		kdata = stack_kdata;
+	} else {
+		kdata = kmalloc(ksize, GFP_KERNEL);
+		if (!kdata) {
+			retcode = -ENOMEM;
+			goto err_i1;
 		}
-		if (asize > usize)
-			memset(kdata + usize, 0, asize - usize);
 	}
 
-	if (cmd & IOC_IN) {
-		if (copy_from_user(kdata, (void __user *)arg,
-				   usize) != 0) {
-			retcode = -EFAULT;
-			goto err_i1;
-		}
-	} else if (cmd & IOC_OUT) {
-		memset(kdata, 0, usize);
+	if (copy_from_user(kdata, (void __user *)arg, in_size) != 0) {
+		retcode = -EFAULT;
+		goto err_i1;
 	}
 
+	if (ksize > in_size)
+		memset(kdata + in_size, 0, ksize - in_size);
+
 	/* Enforce sane locking for kms driver ioctls. Core ioctls are
 	 * too messy still. */
 	if ((drm_core_check_feature(dev, DRIVER_MODESET) && is_driver_ioctl) ||
@@ -728,11 +725,8 @@ long drm_ioctl(struct file *filp,
 		mutex_unlock(&drm_global_mutex);
 	}
 
-	if (cmd & IOC_OUT) {
-		if (copy_to_user((void __user *)arg, kdata,
-				 usize) != 0)
-			retcode = -EFAULT;
-	}
+	if (copy_to_user((void __user *)arg, kdata, out_size) != 0)
+		retcode = -EFAULT;
 
       err_i1:
 	if (!ioctl)
@@ -759,7 +753,7 @@ EXPORT_SYMBOL(drm_ioctl);
  * shouldn't be used by any drivers.
  *
  * Returns:
- * True if the @nr corresponds to a DRM core ioctl numer, false otherwise.
+ * True if the @nr corresponds to a DRM core ioctl number, false otherwise.
  */
 bool drm_ioctl_flags(unsigned int nr, unsigned int *flags)
 {
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 8ca3d2bf2bda..77f357b2c386 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -532,7 +532,7 @@ int drm_irq_uninstall(struct drm_device *dev)
 
 	/*
 	 * Wake up any waiters so they don't hang. This is just to paper over
-	 * isssues for UMS drivers which aren't in full control of their
+	 * issues for UMS drivers which aren't in full control of their
 	 * vblank/irq handling. KMS drivers must ensure that vblanks are all
 	 * disabled when uninstalling the irq handler.
 	 */
@@ -594,7 +594,7 @@ int drm_control(struct drm_device *dev, void *data,
 		return 0;
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		return 0;
-	/* UMS was only ever support on pci devices. */
+	/* UMS was only ever supported on pci devices. */
 	if (WARN_ON(!dev->pdev))
 		return -EINVAL;
 
@@ -945,8 +945,8 @@ EXPORT_SYMBOL(drm_crtc_vblank_count);
  *
  * This is the legacy version of drm_crtc_vblank_count_and_time().
  */
-u32 drm_vblank_count_and_time(struct drm_device *dev, unsigned int pipe,
-			      struct timeval *vblanktime)
+static u32 drm_vblank_count_and_time(struct drm_device *dev, unsigned int pipe,
+				     struct timeval *vblanktime)
 {
 	struct drm_vblank_crtc *vblank = &dev->vblank[pipe];
 	u32 vblank_count;
@@ -963,7 +963,6 @@ u32 drm_vblank_count_and_time(struct drm_device *dev, unsigned int pipe,
 
 	return vblank_count;
 }
-EXPORT_SYMBOL(drm_vblank_count_and_time);
 
 /**
  * drm_crtc_vblank_count_and_time - retrieve "cooked" vblank counter value
@@ -975,8 +974,6 @@ EXPORT_SYMBOL(drm_vblank_count_and_time);
  * vblank events since the system was booted, including lost events due to
  * modesetting activity. Returns corresponding system timestamp of the time
  * of the vblank interval that corresponds to the current vblank counter value.
- *
- * This is the native KMS version of drm_vblank_count_and_time().
  */
 u32 drm_crtc_vblank_count_and_time(struct drm_crtc *crtc,
 				   struct timeval *vblanktime)
@@ -1588,12 +1585,6 @@ static int drm_queue_vblank_event(struct drm_device *dev, unsigned int pipe,
 
 	seq = drm_vblank_count_and_time(dev, pipe, &now);
 
-	if ((vblwait->request.type & _DRM_VBLANK_NEXTONMISS) &&
-	    (seq - vblwait->request.sequence) <= (1 << 23)) {
-		vblwait->request.sequence = seq + 1;
-		vblwait->reply.sequence = vblwait->request.sequence;
-	}
-
 	DRM_DEBUG("event on vblank count %d, current %d, crtc %u\n",
 		  vblwait->request.sequence, seq, pipe);
 
@@ -1690,6 +1681,11 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
 		goto done;
 	}
 
+	if ((flags & _DRM_VBLANK_NEXTONMISS) &&
+	    (seq - vblwait->request.sequence) <= (1 << 23)) {
+		vblwait->request.sequence = seq + 1;
+	}
+
 	if (flags & _DRM_VBLANK_EVENT) {
 		/* must hold on to the vblank ref until the event fires
 		 * drm_vblank_put will be called asynchronously
@@ -1697,14 +1693,8 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
 		return drm_queue_vblank_event(dev, pipe, vblwait, file_priv);
 	}
 
-	if ((flags & _DRM_VBLANK_NEXTONMISS) &&
-	    (seq - vblwait->request.sequence) <= (1<<23)) {
-		vblwait->request.sequence = seq + 1;
-	}
-
 	DRM_DEBUG("waiting on vblank count %d, crtc %u\n",
 		  vblwait->request.sequence, pipe);
-	vblank->last_wait = vblwait->request.sequence;
 	DRM_WAIT_ON(ret, vblank->queue, 3 * HZ,
 		    (((drm_vblank_count(dev, pipe) -
 		       vblwait->request.sequence) <= (1 << 23)) ||
diff --git a/drivers/gpu/drm/drm_memory.c b/drivers/gpu/drm/drm_memory.c
index 87a8cb73366f..fc0ebd273ef8 100644
--- a/drivers/gpu/drm/drm_memory.c
+++ b/drivers/gpu/drm/drm_memory.c
@@ -44,7 +44,7 @@
 # include <asm/agp.h>
 #else
 # ifdef __powerpc__
-#  define PAGE_AGP	__pgprot(_PAGE_KERNEL | _PAGE_NO_CACHE)
+#  define PAGE_AGP	pgprot_noncached_wc(PAGE_KERNEL)
 # else
 #  define PAGE_AGP	PAGE_KERNEL
 # endif
diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c
index 49311fc61d5d..af0d471ee246 100644
--- a/drivers/gpu/drm/drm_mipi_dsi.c
+++ b/drivers/gpu/drm/drm_mipi_dsi.c
@@ -999,17 +999,17 @@ int mipi_dsi_dcs_set_tear_on(struct mipi_dsi_device *dsi,
 EXPORT_SYMBOL(mipi_dsi_dcs_set_tear_on);
 
 /**
- * mipi_dsi_set_tear_scanline() - turn on the display module's Tearing Effect
- * output signal on the TE signal line when display module reaches line N
- * defined by STS[n:0].
+ * mipi_dsi_dcs_set_tear_scanline() - set the scanline to use as trigger for
+ *    the Tearing Effect output signal of the display module
  * @dsi: DSI peripheral device
- * @param: STS[10:0]
+ * @scanline: scanline to use as trigger
+ *
  * Return: 0 on success or a negative error code on failure
  */
-int mipi_dsi_set_tear_scanline(struct mipi_dsi_device *dsi, u16 param)
+int mipi_dsi_dcs_set_tear_scanline(struct mipi_dsi_device *dsi, u16 scanline)
 {
-	u8 payload[3] = { MIPI_DCS_SET_TEAR_SCANLINE, param >> 8,
-			  param & 0xff };
+	u8 payload[3] = { MIPI_DCS_SET_TEAR_SCANLINE, scanline >> 8,
+			  scanline & 0xff };
 	ssize_t err;
 
 	err = mipi_dsi_generic_write(dsi, payload, sizeof(payload));
@@ -1018,7 +1018,7 @@ int mipi_dsi_set_tear_scanline(struct mipi_dsi_device *dsi, u16 param)
 
 	return 0;
 }
-EXPORT_SYMBOL(mipi_dsi_set_tear_scanline);
+EXPORT_SYMBOL(mipi_dsi_dcs_set_tear_scanline);
 
 /**
  * mipi_dsi_dcs_set_pixel_format() - sets the pixel format for the RGB image
diff --git a/drivers/gpu/drm/drm_scatter.c b/drivers/gpu/drm/drm_scatter.c
index 4f0f3b36d537..bf70431073f6 100644
--- a/drivers/gpu/drm/drm_scatter.c
+++ b/drivers/gpu/drm/drm_scatter.c
@@ -41,7 +41,7 @@
 static inline void *drm_vmalloc_dma(unsigned long size)
 {
 #if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE)
-	return __vmalloc(size, GFP_KERNEL, PAGE_KERNEL | _PAGE_NO_CACHE);
+	return __vmalloc(size, GFP_KERNEL, pgprot_noncached_wc(PAGE_KERNEL));
 #else
 	return vmalloc_32(size);
 #endif
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index 43ff44a2b8e7..caa4e4ca616d 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -80,7 +80,7 @@ static pgprot_t drm_dma_prot(uint32_t map_type, struct vm_area_struct *vma)
 	pgprot_t tmp = vm_get_page_prot(vma->vm_flags);
 
 #if defined(__powerpc__) && defined(CONFIG_NOT_COHERENT_CACHE)
-	tmp |= _PAGE_NO_CACHE;
+	tmp = pgprot_noncached_wc(tmp);
 #endif
 	return tmp;
 }
@@ -593,7 +593,7 @@ static int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
 			 * pages and mappings in fault()
 			 */
 #if defined(__powerpc__)
-			pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
+			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 #endif
 			vma->vm_ops = &drm_vm_ops;
 			break;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 340d390306d8..ffd1b32caa8d 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -91,10 +91,8 @@ static void load_gpu(struct drm_device *dev)
 			int ret;
 
 			ret = etnaviv_gpu_init(g);
-			if (ret) {
-				dev_err(g->dev, "hw init failed: %d\n", ret);
+			if (ret)
 				priv->gpu[i] = NULL;
-			}
 		}
 	}
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index df9bcbab922f..56fb8637bb57 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -535,8 +535,7 @@ void etnaviv_gem_describe_objects(struct etnaviv_drm_private *priv,
 
 static void etnaviv_gem_shmem_release(struct etnaviv_gem_object *etnaviv_obj)
 {
-	if (etnaviv_obj->vaddr)
-		vunmap(etnaviv_obj->vaddr);
+	vunmap(etnaviv_obj->vaddr);
 	put_pages(etnaviv_obj);
 }
 
@@ -670,9 +669,7 @@ static struct drm_gem_object *__etnaviv_gem_new(struct drm_device *dev,
 	return obj;
 
 fail:
-	if (obj)
-		drm_gem_object_unreference_unlocked(obj);
-
+	drm_gem_object_unreference_unlocked(obj);
 	return ERR_PTR(ret);
 }
 
@@ -916,15 +913,12 @@ int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
 	get_task_struct(current);
 
 	ret = etnaviv_gem_obj_add(dev, &etnaviv_obj->base);
-	if (ret) {
-		drm_gem_object_unreference_unlocked(&etnaviv_obj->base);
-		return ret;
-	}
+	if (ret)
+		goto unreference;
 
 	ret = drm_gem_handle_create(file, &etnaviv_obj->base, handle);
-
+unreference:
 	/* drop reference from allocate - handle holds it now */
 	drm_gem_object_unreference_unlocked(&etnaviv_obj->base);
-
 	return ret;
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index ff6aa5dfb2d7..87ef34150d46 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -487,6 +487,47 @@ static int etnaviv_hw_reset(struct etnaviv_gpu *gpu)
 	return 0;
 }
 
+static void etnaviv_gpu_enable_mlcg(struct etnaviv_gpu *gpu)
+{
+	u32 pmc, ppc;
+
+	/* enable clock gating */
+	ppc = gpu_read(gpu, VIVS_PM_POWER_CONTROLS);
+	ppc |= VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING;
+
+	/* Disable stall module clock gating for 4.3.0.1 and 4.3.0.2 revs */
+	if (gpu->identity.revision == 0x4301 ||
+	    gpu->identity.revision == 0x4302)
+		ppc |= VIVS_PM_POWER_CONTROLS_DISABLE_STALL_MODULE_CLOCK_GATING;
+
+	gpu_write(gpu, VIVS_PM_POWER_CONTROLS, ppc);
+
+	pmc = gpu_read(gpu, VIVS_PM_MODULE_CONTROLS);
+
+	/* Disable PA clock gating for GC400+ except for GC420 */
+	if (gpu->identity.model >= chipModel_GC400 &&
+	    gpu->identity.model != chipModel_GC420)
+		pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PA;
+
+	/*
+	 * Disable PE clock gating on revs < 5.0.0.0 when HZ is
+	 * present without a bug fix.
+	 */
+	if (gpu->identity.revision < 0x5000 &&
+	    gpu->identity.minor_features0 & chipMinorFeatures0_HZ &&
+	    !(gpu->identity.minor_features1 &
+	      chipMinorFeatures1_DISABLE_PE_GATING))
+		pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PE;
+
+	if (gpu->identity.revision < 0x5422)
+		pmc |= BIT(15); /* Unknown bit */
+
+	pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_HZ;
+	pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_EZ;
+
+	gpu_write(gpu, VIVS_PM_MODULE_CONTROLS, pmc);
+}
+
 static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 {
 	u16 prefetch;
@@ -506,6 +547,9 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 		gpu_write(gpu, VIVS_MC_DEBUG_MEMORY, mc_memory_debug);
 	}
 
+	/* enable module-level clock gating */
+	etnaviv_gpu_enable_mlcg(gpu);
+
 	/*
 	 * Update GPU AXI cache atttribute to "cacheable, no allocate".
 	 * This is necessary to prevent the iMX6 SoC locking up.
@@ -553,8 +597,10 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	bool mmuv2;
 
 	ret = pm_runtime_get_sync(gpu->dev);
-	if (ret < 0)
+	if (ret < 0) {
+		dev_err(gpu->dev, "Failed to enable GPU power domain\n");
 		return ret;
+	}
 
 	etnaviv_hw_identify(gpu);
 
@@ -591,8 +637,10 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	}
 
 	ret = etnaviv_hw_reset(gpu);
-	if (ret)
+	if (ret) {
+		dev_err(gpu->dev, "GPU reset failed\n");
 		goto fail;
+	}
 
 	/* Setup IOMMU.. eventually we will (I think) do this once per context
 	 * and have separate page tables per context.  For now, to keep things
@@ -610,12 +658,14 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	}
 
 	if (!iommu) {
+		dev_err(gpu->dev, "Failed to allocate GPU IOMMU domain\n");
 		ret = -ENOMEM;
 		goto fail;
 	}
 
 	gpu->mmu = etnaviv_iommu_new(gpu, iommu, version);
 	if (!gpu->mmu) {
+		dev_err(gpu->dev, "Failed to instantiate GPU IOMMU\n");
 		iommu_domain_free(iommu);
 		ret = -ENOMEM;
 		goto fail;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
index 522cfd447892..16353ee81651 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
@@ -225,6 +225,7 @@ struct iommu_domain *etnaviv_iommu_domain_alloc(struct etnaviv_gpu *gpu)
 
 	etnaviv_domain->domain.type = __IOMMU_DOMAIN_PAGING;
 	etnaviv_domain->domain.ops = &etnaviv_iommu_ops.ops;
+	etnaviv_domain->domain.pgsize_bitmap = SZ_4K;
 	etnaviv_domain->domain.geometry.aperture_start = GPU_MEM_START;
 	etnaviv_domain->domain.geometry.aperture_end = GPU_MEM_START + PT_ENTRIES * SZ_4K - 1;
 
diff --git a/drivers/gpu/drm/etnaviv/state_hi.xml.h b/drivers/gpu/drm/etnaviv/state_hi.xml.h
index 6a7de5f1454a..807a3d9e0dd5 100644
--- a/drivers/gpu/drm/etnaviv/state_hi.xml.h
+++ b/drivers/gpu/drm/etnaviv/state_hi.xml.h
@@ -218,6 +218,13 @@ Copyright (C) 2015
 #define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_FE	0x00000001
 #define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_DE	0x00000002
 #define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PE	0x00000004
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_SH	0x00000008
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PA	0x00000010
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_SE	0x00000020
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA	0x00000040
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_TX	0x00000080
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_EZ	0x00010000
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_HZ	0x00020000
 
 #define VIVS_PM_MODULE_STATUS					0x00000108
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_FE		0x00000001
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index d814b3048ee5..83f61c513b7e 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -2,10 +2,6 @@ config DRM_EXYNOS
 	tristate "DRM Support for Samsung SoC EXYNOS Series"
 	depends on OF && DRM && (ARCH_S3C64XX || ARCH_EXYNOS || ARCH_MULTIPLATFORM)
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
 	select VIDEOMODE_HELPERS
 	help
 	  Choose this option if you have a Samsung SoC EXYNOS chipset.
@@ -15,7 +11,7 @@ if DRM_EXYNOS
 
 config DRM_EXYNOS_IOMMU
 	bool
-	depends on EXYNOS_IOMMU && ARM_DMA_USE_IOMMU
+	depends on EXYNOS_IOMMU
 	default y
 
 comment "CRTCs"
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index f6223f907c15..7f9901b7777b 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -31,7 +31,6 @@
 #include "exynos_drm_plane.h"
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fb.h"
-#include "exynos_drm_fbdev.h"
 #include "exynos_drm_iommu.h"
 
 /*
diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c
index 468498e3fec1..4f0850585b8e 100644
--- a/drivers/gpu/drm/exynos/exynos_dp.c
+++ b/drivers/gpu/drm/exynos/exynos_dp.c
@@ -34,7 +34,7 @@
 
 struct exynos_dp_device {
 	struct drm_encoder         encoder;
-	struct drm_connector       connector;
+	struct drm_connector       *connector;
 	struct drm_bridge          *ptn_bridge;
 	struct drm_device          *drm_dev;
 	struct device              *dev;
@@ -67,10 +67,10 @@ static int exynos_dp_poweroff(struct analogix_dp_plat_data *plat_data)
 	return exynos_dp_crtc_clock_enable(plat_data, false);
 }
 
-static int exynos_dp_get_modes(struct analogix_dp_plat_data *plat_data)
+static int exynos_dp_get_modes(struct analogix_dp_plat_data *plat_data,
+			       struct drm_connector *connector)
 {
 	struct exynos_dp_device *dp = to_dp(plat_data);
-	struct drm_connector *connector = &dp->connector;
 	struct drm_display_mode *mode;
 	int num_modes = 0;
 
@@ -103,6 +103,7 @@ static int exynos_dp_bridge_attach(struct analogix_dp_plat_data *plat_data,
 	int ret;
 
 	drm_connector_register(connector);
+	dp->connector = connector;
 
 	/* Pre-empt DP connector creation if there's a bridge */
 	if (dp->ptn_bridge) {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_core.c b/drivers/gpu/drm/exynos/exynos_drm_core.c
index 011211e4167d..edbd98ff293e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_core.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_core.c
@@ -15,7 +15,6 @@
 #include <drm/drmP.h>
 #include "exynos_drm_drv.h"
 #include "exynos_drm_crtc.h"
-#include "exynos_drm_fbdev.h"
 
 static LIST_HEAD(exynos_drm_subdrv_list);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 13d28d4229e2..877d2efa28e2 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -159,12 +159,7 @@ static int exynos_drm_load(struct drm_device *dev, unsigned long flags)
 	DRM_INFO("Exynos DRM: using %s device for DMA mapping operations\n",
 		 dev_name(private->dma_dev));
 
-	/*
-	 * create mapping to manage iommu table and set a pointer to iommu
-	 * mapping structure to iommu_mapping of private data.
-	 * also this iommu_mapping can be used to check if iommu is supported
-	 * or not.
-	 */
+	/* create common IOMMU mapping for all devices attached to Exynos DRM */
 	ret = drm_create_iommu_mapping(dev);
 	if (ret < 0) {
 		DRM_ERROR("failed to create iommu mapping.\n");
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index cc33ec9296e7..b39d521f093d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -224,8 +224,6 @@ struct exynos_drm_private {
 	struct drm_property *plane_zpos_property;
 
 	struct device *dma_dev;
-	unsigned long da_start;
-	unsigned long da_space_size;
 	void *mapping;
 
 	unsigned int pipe;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
index 67dcd6831291..fb49443bfd32 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -269,8 +269,7 @@ static void exynos_drm_fbdev_destroy(struct drm_device *dev,
 	struct exynos_drm_gem *exynos_gem = exynos_fbd->exynos_gem;
 	struct drm_framebuffer *fb;
 
-	if (exynos_gem->kvaddr)
-		vunmap(exynos_gem->kvaddr);
+	vunmap(exynos_gem->kvaddr);
 
 	/* release drm framebuffer and real buffer */
 	if (fb_helper->fb && fb_helper->fb->funcs) {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 3efe1aa89416..d47216488985 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -30,7 +30,6 @@
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fb.h"
-#include "exynos_drm_fbdev.h"
 #include "exynos_drm_crtc.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_iommu.h"
@@ -120,7 +119,6 @@ static struct fimd_driver_data s3c64xx_fimd_driver_data = {
 	.timing_base = 0x0,
 	.has_clksel = 1,
 	.has_limited_fmt = 1,
-	.has_hw_trigger = 1,
 };
 
 static struct fimd_driver_data exynos3_fimd_driver_data = {
@@ -171,14 +169,11 @@ static struct fimd_driver_data exynos5420_fimd_driver_data = {
 	.lcdblk_vt_shift = 24,
 	.lcdblk_bypass_shift = 15,
 	.lcdblk_mic_bypass_shift = 11,
-	.trg_type = I80_HW_TRG,
 	.has_shadowcon = 1,
 	.has_vidoutcon = 1,
 	.has_vtsel = 1,
 	.has_mic_bypass = 1,
 	.has_dp_clk = 1,
-	.has_hw_trigger = 1,
-	.has_trigger_per_te = 1,
 };
 
 struct fimd_context {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 493552368295..8564c3da0d22 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -48,13 +48,13 @@
 
 /* registers for base address */
 #define G2D_SRC_BASE_ADDR		0x0304
-#define G2D_SRC_STRIDE_REG		0x0308
+#define G2D_SRC_STRIDE			0x0308
 #define G2D_SRC_COLOR_MODE		0x030C
 #define G2D_SRC_LEFT_TOP		0x0310
 #define G2D_SRC_RIGHT_BOTTOM		0x0314
 #define G2D_SRC_PLANE2_BASE_ADDR	0x0318
 #define G2D_DST_BASE_ADDR		0x0404
-#define G2D_DST_STRIDE_REG		0x0408
+#define G2D_DST_STRIDE			0x0408
 #define G2D_DST_COLOR_MODE		0x040C
 #define G2D_DST_LEFT_TOP		0x0410
 #define G2D_DST_RIGHT_BOTTOM		0x0414
@@ -563,7 +563,7 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset)
 
 	switch (reg_offset) {
 	case G2D_SRC_BASE_ADDR:
-	case G2D_SRC_STRIDE_REG:
+	case G2D_SRC_STRIDE:
 	case G2D_SRC_COLOR_MODE:
 	case G2D_SRC_LEFT_TOP:
 	case G2D_SRC_RIGHT_BOTTOM:
@@ -573,7 +573,7 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset)
 		reg_type = REG_TYPE_SRC_PLANE2;
 		break;
 	case G2D_DST_BASE_ADDR:
-	case G2D_DST_STRIDE_REG:
+	case G2D_DST_STRIDE:
 	case G2D_DST_COLOR_MODE:
 	case G2D_DST_LEFT_TOP:
 	case G2D_DST_RIGHT_BOTTOM:
@@ -968,8 +968,8 @@ static int g2d_check_reg_offset(struct device *dev,
 			} else
 				buf_info->types[reg_type] = BUF_TYPE_GEM;
 			break;
-		case G2D_SRC_STRIDE_REG:
-		case G2D_DST_STRIDE_REG:
+		case G2D_SRC_STRIDE:
+		case G2D_DST_STRIDE:
 			if (for_addr)
 				goto err;
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.c b/drivers/gpu/drm/exynos/exynos_drm_iommu.c
index 7ca09ee19656..0f373702414e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_iommu.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.c
@@ -14,13 +14,27 @@
 
 #include <linux/dma-mapping.h>
 #include <linux/iommu.h>
-#include <linux/kref.h>
-
-#include <asm/dma-iommu.h>
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_iommu.h"
 
+static inline int configure_dma_max_seg_size(struct device *dev)
+{
+	if (!dev->dma_parms)
+		dev->dma_parms = kzalloc(sizeof(*dev->dma_parms), GFP_KERNEL);
+	if (!dev->dma_parms)
+		return -ENOMEM;
+
+	dma_set_max_seg_size(dev, DMA_BIT_MASK(32));
+	return 0;
+}
+
+static inline void clear_dma_max_seg_size(struct device *dev)
+{
+	kfree(dev->dma_parms);
+	dev->dma_parms = NULL;
+}
+
 /*
  * drm_create_iommu_mapping - create a mapping structure
  *
@@ -28,38 +42,22 @@
  */
 int drm_create_iommu_mapping(struct drm_device *drm_dev)
 {
-	struct dma_iommu_mapping *mapping = NULL;
 	struct exynos_drm_private *priv = drm_dev->dev_private;
 
-	if (!priv->da_start)
-		priv->da_start = EXYNOS_DEV_ADDR_START;
-	if (!priv->da_space_size)
-		priv->da_space_size = EXYNOS_DEV_ADDR_SIZE;
-
-	mapping = arm_iommu_create_mapping(&platform_bus_type, priv->da_start,
-						priv->da_space_size);
-
-	if (IS_ERR(mapping))
-		return PTR_ERR(mapping);
-
-	priv->mapping = mapping;
-
-	return 0;
+	return __exynos_iommu_create_mapping(priv, EXYNOS_DEV_ADDR_START,
+					     EXYNOS_DEV_ADDR_SIZE);
 }
 
 /*
  * drm_release_iommu_mapping - release iommu mapping structure
  *
  * @drm_dev: DRM device
- *
- * if mapping->kref becomes 0 then all things related to iommu mapping
- * will be released
  */
 void drm_release_iommu_mapping(struct drm_device *drm_dev)
 {
 	struct exynos_drm_private *priv = drm_dev->dev_private;
 
-	arm_iommu_release_mapping(priv->mapping);
+	__exynos_iommu_release_mapping(priv);
 }
 
 /*
@@ -77,25 +75,19 @@ int drm_iommu_attach_device(struct drm_device *drm_dev,
 	struct exynos_drm_private *priv = drm_dev->dev_private;
 	int ret;
 
-	if (!priv->mapping)
-		return 0;
-
-	subdrv_dev->dma_parms = devm_kzalloc(subdrv_dev,
-					sizeof(*subdrv_dev->dma_parms),
-					GFP_KERNEL);
-	if (!subdrv_dev->dma_parms)
-		return -ENOMEM;
-
-	dma_set_max_seg_size(subdrv_dev, 0xffffffffu);
-
-	if (subdrv_dev->archdata.mapping)
-		arm_iommu_detach_device(subdrv_dev);
+	if (get_dma_ops(priv->dma_dev) != get_dma_ops(subdrv_dev)) {
+		DRM_ERROR("Device %s lacks support for IOMMU\n",
+			  dev_name(subdrv_dev));
+		return -EINVAL;
+	}
 
-	ret = arm_iommu_attach_device(subdrv_dev, priv->mapping);
-	if (ret < 0) {
-		DRM_DEBUG_KMS("failed iommu attach.\n");
+	ret = configure_dma_max_seg_size(subdrv_dev);
+	if (ret)
 		return ret;
-	}
+
+	ret = __exynos_iommu_attach(priv, subdrv_dev);
+	if (ret)
+		clear_dma_max_seg_size(subdrv_dev);
 
 	return 0;
 }
@@ -113,10 +105,7 @@ void drm_iommu_detach_device(struct drm_device *drm_dev,
 				struct device *subdrv_dev)
 {
 	struct exynos_drm_private *priv = drm_dev->dev_private;
-	struct dma_iommu_mapping *mapping = priv->mapping;
-
-	if (!mapping || !mapping->domain)
-		return;
 
-	arm_iommu_detach_device(subdrv_dev);
+	__exynos_iommu_detach(priv, subdrv_dev);
+	clear_dma_max_seg_size(subdrv_dev);
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_iommu.h b/drivers/gpu/drm/exynos/exynos_drm_iommu.h
index 5ffebe02ee4d..c8de4913fdbe 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_iommu.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_iommu.h
@@ -17,6 +17,97 @@
 
 #ifdef CONFIG_DRM_EXYNOS_IOMMU
 
+#if defined(CONFIG_ARM_DMA_USE_IOMMU)
+#include <asm/dma-iommu.h>
+
+static inline int __exynos_iommu_create_mapping(struct exynos_drm_private *priv,
+					unsigned long start, unsigned long size)
+{
+	priv->mapping = arm_iommu_create_mapping(&platform_bus_type, start,
+						 size);
+	return IS_ERR(priv->mapping);
+}
+
+static inline void
+__exynos_iommu_release_mapping(struct exynos_drm_private *priv)
+{
+	arm_iommu_release_mapping(priv->mapping);
+}
+
+static inline int __exynos_iommu_attach(struct exynos_drm_private *priv,
+					struct device *dev)
+{
+	if (dev->archdata.mapping)
+		arm_iommu_detach_device(dev);
+
+	return arm_iommu_attach_device(dev, priv->mapping);
+}
+
+static inline void __exynos_iommu_detach(struct exynos_drm_private *priv,
+					 struct device *dev)
+{
+	arm_iommu_detach_device(dev);
+}
+
+#elif defined(CONFIG_IOMMU_DMA)
+#include <linux/dma-iommu.h>
+
+static inline int __exynos_iommu_create_mapping(struct exynos_drm_private *priv,
+					unsigned long start, unsigned long size)
+{
+	struct iommu_domain *domain;
+	int ret;
+
+	domain = iommu_domain_alloc(priv->dma_dev->bus);
+	if (!domain)
+		return -ENOMEM;
+
+	ret = iommu_get_dma_cookie(domain);
+	if (ret)
+		goto free_domain;
+
+	ret = iommu_dma_init_domain(domain, start, size);
+	if (ret)
+		goto put_cookie;
+
+	priv->mapping = domain;
+	return 0;
+
+put_cookie:
+	iommu_put_dma_cookie(domain);
+free_domain:
+	iommu_domain_free(domain);
+	return ret;
+}
+
+static inline void __exynos_iommu_release_mapping(struct exynos_drm_private *priv)
+{
+	struct iommu_domain *domain = priv->mapping;
+
+	iommu_put_dma_cookie(domain);
+	iommu_domain_free(domain);
+	priv->mapping = NULL;
+}
+
+static inline int __exynos_iommu_attach(struct exynos_drm_private *priv,
+					struct device *dev)
+{
+	struct iommu_domain *domain = priv->mapping;
+
+	return iommu_attach_device(domain, dev);
+}
+
+static inline void __exynos_iommu_detach(struct exynos_drm_private *priv,
+					 struct device *dev)
+{
+	struct iommu_domain *domain = priv->mapping;
+
+	iommu_detach_device(domain, dev);
+}
+#else
+#error Unsupported architecture and IOMMU/DMA-mapping glue code
+#endif
+
 int drm_create_iommu_mapping(struct drm_device *drm_dev);
 
 void drm_release_iommu_mapping(struct drm_device *drm_dev);
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index 1625d7c8a319..2275efe41acd 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -1820,6 +1820,7 @@ static int hdmi_probe(struct platform_device *pdev)
 		DRM_ERROR("Failed to find ddc node in device tree\n");
 		return -ENODEV;
 	}
+	of_node_put(dev->of_node);
 
 out_get_ddc_adpt:
 	hdata->ddc_adpt = of_find_i2c_adapter_by_node(ddc_node);
@@ -1838,6 +1839,7 @@ out_get_ddc_adpt:
 		ret = -ENODEV;
 		goto err_ddc;
 	}
+	of_node_put(dev->of_node);
 
 out_get_phy_port:
 	if (hdata->drv_data->is_apb_phy) {
diff --git a/drivers/gpu/drm/fsl-dcu/Kconfig b/drivers/gpu/drm/fsl-dcu/Kconfig
index b9c714de6e40..14a72c4c496d 100644
--- a/drivers/gpu/drm/fsl-dcu/Kconfig
+++ b/drivers/gpu/drm/fsl-dcu/Kconfig
@@ -5,12 +5,7 @@ config DRM_FSL_DCU
 	select BACKLIGHT_LCD_SUPPORT
 	select DRM_KMS_HELPER
 	select DRM_KMS_CMA_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_PANEL
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	select FB_SYS_FOPS
 	select REGMAP_MMIO
 	select VIDEOMODE_HELPERS
 	help
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
index 706de3278f1c..3371635cd4d7 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
@@ -44,6 +44,8 @@ static void fsl_dcu_drm_disable_crtc(struct drm_crtc *crtc)
 	struct drm_device *dev = crtc->dev;
 	struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
 
+	drm_crtc_vblank_off(crtc);
+
 	regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
 			   DCU_MODE_DCU_MODE_MASK,
 			   DCU_MODE_DCU_MODE(DCU_MODE_OFF));
@@ -61,6 +63,8 @@ static void fsl_dcu_drm_crtc_enable(struct drm_crtc *crtc)
 			   DCU_MODE_DCU_MODE(DCU_MODE_NORMAL));
 	regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
 		     DCU_UPDATE_MODE_READREG);
+
+	drm_crtc_vblank_on(crtc);
 }
 
 static void fsl_dcu_drm_crtc_mode_set_nofb(struct drm_crtc *crtc)
@@ -137,9 +141,10 @@ int fsl_dcu_drm_crtc_create(struct fsl_dcu_drm_device *fsl_dev)
 {
 	struct drm_plane *primary;
 	struct drm_crtc *crtc = &fsl_dev->crtc;
-	unsigned int i, j, reg_num;
 	int ret;
 
+	fsl_dcu_drm_init_planes(fsl_dev->drm);
+
 	primary = fsl_dcu_drm_primary_create_plane(fsl_dev->drm);
 	if (!primary)
 		return -ENOMEM;
@@ -153,19 +158,5 @@ int fsl_dcu_drm_crtc_create(struct fsl_dcu_drm_device *fsl_dev)
 
 	drm_crtc_helper_add(crtc, &fsl_dcu_drm_crtc_helper_funcs);
 
-	if (!strcmp(fsl_dev->soc->name, "ls1021a"))
-		reg_num = LS1021A_LAYER_REG_NUM;
-	else
-		reg_num = VF610_LAYER_REG_NUM;
-	for (i = 0; i < fsl_dev->soc->total_layer; i++) {
-		for (j = 1; j <= reg_num; j++)
-			regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(i, j), 0);
-	}
-	regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
-			   DCU_MODE_DCU_MODE_MASK,
-			   DCU_MODE_DCU_MODE(DCU_MODE_OFF));
-	regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
-		     DCU_UPDATE_MODE_READREG);
-
 	return 0;
 }
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
index 33727d5d826a..7882387f9bff 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
@@ -11,6 +11,7 @@
 
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
+#include <linux/console.h>
 #include <linux/io.h>
 #include <linux/mfd/syscon.h>
 #include <linux/mm.h>
@@ -22,6 +23,7 @@
 #include <linux/regmap.h>
 
 #include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
@@ -42,7 +44,6 @@ static const struct regmap_config fsl_dcu_regmap_config = {
 	.reg_bits = 32,
 	.reg_stride = 4,
 	.val_bits = 32,
-	.cache_type = REGCACHE_RBTREE,
 
 	.volatile_reg = fsl_dcu_drm_is_volatile_reg,
 };
@@ -228,11 +229,26 @@ static int fsl_dcu_drm_pm_suspend(struct device *dev)
 	if (!fsl_dev)
 		return 0;
 
+	disable_irq(fsl_dev->irq);
 	drm_kms_helper_poll_disable(fsl_dev->drm);
-	regcache_cache_only(fsl_dev->regmap, true);
-	regcache_mark_dirty(fsl_dev->regmap);
-	clk_disable(fsl_dev->clk);
-	clk_unprepare(fsl_dev->clk);
+
+	console_lock();
+	drm_fbdev_cma_set_suspend(fsl_dev->fbdev, 1);
+	console_unlock();
+
+	fsl_dev->state = drm_atomic_helper_suspend(fsl_dev->drm);
+	if (IS_ERR(fsl_dev->state)) {
+		console_lock();
+		drm_fbdev_cma_set_suspend(fsl_dev->fbdev, 0);
+		console_unlock();
+
+		drm_kms_helper_poll_enable(fsl_dev->drm);
+		enable_irq(fsl_dev->irq);
+		return PTR_ERR(fsl_dev->state);
+	}
+
+	clk_disable_unprepare(fsl_dev->pix_clk);
+	clk_disable_unprepare(fsl_dev->clk);
 
 	return 0;
 }
@@ -245,21 +261,27 @@ static int fsl_dcu_drm_pm_resume(struct device *dev)
 	if (!fsl_dev)
 		return 0;
 
-	ret = clk_enable(fsl_dev->clk);
+	ret = clk_prepare_enable(fsl_dev->clk);
 	if (ret < 0) {
 		dev_err(dev, "failed to enable dcu clk\n");
-		clk_unprepare(fsl_dev->clk);
 		return ret;
 	}
-	ret = clk_prepare(fsl_dev->clk);
+
+	ret = clk_prepare_enable(fsl_dev->pix_clk);
 	if (ret < 0) {
-		dev_err(dev, "failed to prepare dcu clk\n");
+		dev_err(dev, "failed to enable pix clk\n");
 		return ret;
 	}
 
+	fsl_dcu_drm_init_planes(fsl_dev->drm);
+	drm_atomic_helper_resume(fsl_dev->drm, fsl_dev->state);
+
+	console_lock();
+	drm_fbdev_cma_set_suspend(fsl_dev->fbdev, 0);
+	console_unlock();
+
 	drm_kms_helper_poll_enable(fsl_dev->drm);
-	regcache_cache_only(fsl_dev->regmap, false);
-	regcache_sync(fsl_dev->regmap);
+	enable_irq(fsl_dev->irq);
 
 	return 0;
 }
@@ -273,12 +295,14 @@ static const struct fsl_dcu_soc_data fsl_dcu_ls1021a_data = {
 	.name = "ls1021a",
 	.total_layer = 16,
 	.max_layer = 4,
+	.layer_regs = LS1021A_LAYER_REG_NUM,
 };
 
 static const struct fsl_dcu_soc_data fsl_dcu_vf610_data = {
 	.name = "vf610",
 	.total_layer = 64,
 	.max_layer = 6,
+	.layer_regs = VF610_LAYER_REG_NUM,
 };
 
 static const struct of_device_id fsl_dcu_of_match[] = {
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h
index c275f900ff23..3b371fe7491e 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h
@@ -175,6 +175,7 @@ struct fsl_dcu_soc_data {
 	unsigned int total_layer;
 	/*max layer number DCU supported*/
 	unsigned int max_layer;
+	unsigned int layer_regs;
 };
 
 struct fsl_dcu_drm_device {
@@ -193,6 +194,7 @@ struct fsl_dcu_drm_device {
 	struct drm_encoder encoder;
 	struct fsl_dcu_drm_connector connector;
 	const struct fsl_dcu_soc_data *soc;
+	struct drm_atomic_state *state;
 };
 
 void fsl_dcu_fbdev_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_kms.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_kms.c
index a6e4cd591960..d9d6cc1c8e39 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_kms.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_kms.c
@@ -43,7 +43,7 @@ int fsl_dcu_drm_modeset_init(struct fsl_dcu_drm_device *fsl_dev)
 	if (ret)
 		goto err;
 
-	ret = fsl_dcu_drm_connector_create(fsl_dev, &fsl_dev->encoder);
+	ret = fsl_dcu_create_outputs(fsl_dev);
 	if (ret)
 		goto err;
 
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_output.h b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_output.h
index 7093109fbc21..5a7b88e19e44 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_output.h
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_output.h
@@ -25,9 +25,8 @@ to_fsl_dcu_connector(struct drm_connector *con)
 		     : NULL;
 }
 
-int fsl_dcu_drm_connector_create(struct fsl_dcu_drm_device *fsl_dev,
-				 struct drm_encoder *encoder);
 int fsl_dcu_drm_encoder_create(struct fsl_dcu_drm_device *fsl_dev,
 			       struct drm_crtc *crtc);
+int fsl_dcu_create_outputs(struct fsl_dcu_drm_device *fsl_dev);
 
 #endif /* __FSL_DCU_DRM_CONNECTOR_H__ */
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
index 274558b3b32b..e50467a0deb0 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
@@ -217,6 +217,22 @@ static const u32 fsl_dcu_drm_plane_formats[] = {
 	DRM_FORMAT_YUV422,
 };
 
+void fsl_dcu_drm_init_planes(struct drm_device *dev)
+{
+	struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
+	int i, j;
+
+	for (i = 0; i < fsl_dev->soc->total_layer; i++) {
+		for (j = 1; j <= fsl_dev->soc->layer_regs; j++)
+			regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(i, j), 0);
+	}
+	regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
+			   DCU_MODE_DCU_MODE_MASK,
+			   DCU_MODE_DCU_MODE(DCU_MODE_OFF));
+	regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
+		     DCU_UPDATE_MODE_READREG);
+}
+
 struct drm_plane *fsl_dcu_drm_primary_create_plane(struct drm_device *dev)
 {
 	struct drm_plane *primary;
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.h b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.h
index d657f088d859..8ee45f813ee8 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.h
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.h
@@ -12,6 +12,7 @@
 #ifndef __FSL_DCU_DRM_PLANE_H__
 #define __FSL_DCU_DRM_PLANE_H__
 
+void fsl_dcu_drm_init_planes(struct drm_device *dev);
 struct drm_plane *fsl_dcu_drm_primary_create_plane(struct drm_device *dev);
 
 #endif /* __FSL_DCU_DRM_PLANE_H__ */
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
index 0b0989e503ea..26edcc899712 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/backlight.h>
+#include <linux/of_graph.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
@@ -132,12 +133,12 @@ static const struct drm_connector_helper_funcs connector_helper_funcs = {
 	.mode_valid = fsl_dcu_drm_connector_mode_valid,
 };
 
-int fsl_dcu_drm_connector_create(struct fsl_dcu_drm_device *fsl_dev,
-				 struct drm_encoder *encoder)
+static int fsl_dcu_attach_panel(struct fsl_dcu_drm_device *fsl_dev,
+				 struct drm_panel *panel)
 {
+	struct drm_encoder *encoder = &fsl_dev->encoder;
 	struct drm_connector *connector = &fsl_dev->connector.base;
 	struct drm_mode_config *mode_config = &fsl_dev->drm->mode_config;
-	struct device_node *panel_node;
 	int ret;
 
 	fsl_dev->connector.encoder = encoder;
@@ -161,21 +162,7 @@ int fsl_dcu_drm_connector_create(struct fsl_dcu_drm_device *fsl_dev,
 				      mode_config->dpms_property,
 				      DRM_MODE_DPMS_OFF);
 
-	panel_node = of_parse_phandle(fsl_dev->np, "fsl,panel", 0);
-	if (!panel_node) {
-		dev_err(fsl_dev->dev, "fsl,panel property not found\n");
-		ret = -ENODEV;
-		goto err_sysfs;
-	}
-
-	fsl_dev->connector.panel = of_drm_find_panel(panel_node);
-	if (!fsl_dev->connector.panel) {
-		ret = -EPROBE_DEFER;
-		goto err_panel;
-	}
-	of_node_put(panel_node);
-
-	ret = drm_panel_attach(fsl_dev->connector.panel, connector);
+	ret = drm_panel_attach(panel, connector);
 	if (ret) {
 		dev_err(fsl_dev->dev, "failed to attach panel\n");
 		goto err_sysfs;
@@ -183,11 +170,62 @@ int fsl_dcu_drm_connector_create(struct fsl_dcu_drm_device *fsl_dev,
 
 	return 0;
 
-err_panel:
-	of_node_put(panel_node);
 err_sysfs:
 	drm_connector_unregister(connector);
 err_cleanup:
 	drm_connector_cleanup(connector);
 	return ret;
 }
+
+static int fsl_dcu_attach_endpoint(struct fsl_dcu_drm_device *fsl_dev,
+				   const struct of_endpoint *ep)
+{
+	struct drm_bridge *bridge;
+	struct device_node *np;
+
+	np = of_graph_get_remote_port_parent(ep->local_node);
+
+	fsl_dev->connector.panel = of_drm_find_panel(np);
+	if (fsl_dev->connector.panel) {
+		of_node_put(np);
+		return fsl_dcu_attach_panel(fsl_dev, fsl_dev->connector.panel);
+	}
+
+	bridge = of_drm_find_bridge(np);
+	of_node_put(np);
+	if (!bridge)
+		return -ENODEV;
+
+	fsl_dev->encoder.bridge = bridge;
+	bridge->encoder = &fsl_dev->encoder;
+
+	return drm_bridge_attach(fsl_dev->drm, bridge);
+}
+
+int fsl_dcu_create_outputs(struct fsl_dcu_drm_device *fsl_dev)
+{
+	struct of_endpoint ep;
+	struct device_node *ep_node, *panel_node;
+	int ret;
+
+	/* This is for backward compatibility */
+	panel_node = of_parse_phandle(fsl_dev->np, "fsl,panel", 0);
+	if (panel_node) {
+		fsl_dev->connector.panel = of_drm_find_panel(panel_node);
+		of_node_put(panel_node);
+		if (!fsl_dev->connector.panel)
+			return -EPROBE_DEFER;
+		return fsl_dcu_attach_panel(fsl_dev, fsl_dev->connector.panel);
+	}
+
+	ep_node = of_graph_get_next_endpoint(fsl_dev->np, NULL);
+	if (!ep_node)
+		return -ENODEV;
+
+	ret = of_graph_parse_endpoint(ep_node, &ep);
+	of_node_put(ep_node);
+	if (ret)
+		return -ENODEV;
+
+	return fsl_dcu_attach_endpoint(fsl_dev, &ep);
+}
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_tcon.c b/drivers/gpu/drm/fsl-dcu/fsl_tcon.c
index bbe34f1c0505..bca09ea24632 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_tcon.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_tcon.c
@@ -92,6 +92,7 @@ struct fsl_tcon *fsl_tcon_init(struct device *dev)
 		goto err_node_put;
 	}
 
+	of_node_put(np);
 	clk_prepare_enable(tcon->ipg_clk);
 
 	dev_info(dev, "Using TCON in bypass mode\n");
diff --git a/drivers/gpu/drm/gma500/Kconfig b/drivers/gpu/drm/gma500/Kconfig
index 17f928ec84ea..8906d67494fc 100644
--- a/drivers/gpu/drm/gma500/Kconfig
+++ b/drivers/gpu/drm/gma500/Kconfig
@@ -1,11 +1,7 @@
 config DRM_GMA500
 	tristate "Intel GMA5/600 KMS Framebuffer"
 	depends on DRM && PCI && X86
-	select FB_CFB_COPYAREA
-	select FB_CFB_FILLRECT
-	select FB_CFB_IMAGEBLIT
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_TTM
 	# GMA500 depends on ACPI_VIDEO when ACPI is enabled, just like i915
 	select ACPI_VIDEO if ACPI
diff --git a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
index 28f9d90988ff..563f193fcfac 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c
@@ -246,8 +246,7 @@ static void cdv_hdmi_destroy(struct drm_connector *connector)
 {
 	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
 
-	if (gma_encoder->i2c_bus)
-		psb_intel_i2c_destroy(gma_encoder->i2c_bus);
+	psb_intel_i2c_destroy(gma_encoder->i2c_bus);
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
 	kfree(connector);
diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
index 813ef23a8054..38dc89083148 100644
--- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c
@@ -444,8 +444,7 @@ static void cdv_intel_lvds_destroy(struct drm_connector *connector)
 {
 	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
 
-	if (gma_encoder->i2c_bus)
-		psb_intel_i2c_destroy(gma_encoder->i2c_bus);
+	psb_intel_i2c_destroy(gma_encoder->i2c_bus);
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
 	kfree(connector);
@@ -780,12 +779,10 @@ out:
 failed_find:
 	mutex_unlock(&dev->mode_config.mutex);
 	printk(KERN_ERR "Failed find\n");
-	if (gma_encoder->ddc_bus)
-		psb_intel_i2c_destroy(gma_encoder->ddc_bus);
+	psb_intel_i2c_destroy(gma_encoder->ddc_bus);
 failed_ddc:
 	printk(KERN_ERR "Failed DDC\n");
-	if (gma_encoder->i2c_bus)
-		psb_intel_i2c_destroy(gma_encoder->i2c_bus);
+	psb_intel_i2c_destroy(gma_encoder->i2c_bus);
 failed_blc_i2c:
 	printk(KERN_ERR "Failed BLC\n");
 	drm_encoder_cleanup(encoder);
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index 7440bf90ac9c..0fcdce0817de 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -184,12 +184,6 @@ static int psbfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	return 0;
 }
 
-static int psbfb_ioctl(struct fb_info *info, unsigned int cmd,
-						unsigned long arg)
-{
-	return -ENOTTY;
-}
-
 static struct fb_ops psbfb_ops = {
 	.owner = THIS_MODULE,
 	.fb_check_var = drm_fb_helper_check_var,
@@ -201,7 +195,6 @@ static struct fb_ops psbfb_ops = {
 	.fb_imageblit = drm_fb_helper_cfb_imageblit,
 	.fb_mmap = psbfb_mmap,
 	.fb_sync = psbfb_sync,
-	.fb_ioctl = psbfb_ioctl,
 };
 
 static struct fb_ops psbfb_roll_ops = {
@@ -215,7 +208,6 @@ static struct fb_ops psbfb_roll_ops = {
 	.fb_imageblit = drm_fb_helper_cfb_imageblit,
 	.fb_pan_display = psbfb_pan,
 	.fb_mmap = psbfb_mmap,
-	.fb_ioctl = psbfb_ioctl,
 };
 
 static struct fb_ops psbfb_unaccel_ops = {
@@ -228,7 +220,6 @@ static struct fb_ops psbfb_unaccel_ops = {
 	.fb_copyarea = drm_fb_helper_cfb_copyarea,
 	.fb_imageblit = drm_fb_helper_cfb_imageblit,
 	.fb_mmap = psbfb_mmap,
-	.fb_ioctl = psbfb_ioctl,
 };
 
 /**
diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index 82b8ce418b27..50eb944fb78a 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -210,10 +210,8 @@ static int psb_driver_unload(struct drm_device *dev)
 			iounmap(dev_priv->aux_reg);
 			dev_priv->aux_reg = NULL;
 		}
-		if (dev_priv->aux_pdev)
-			pci_dev_put(dev_priv->aux_pdev);
-		if (dev_priv->lpc_pdev)
-			pci_dev_put(dev_priv->lpc_pdev);
+		pci_dev_put(dev_priv->aux_pdev);
+		pci_dev_put(dev_priv->lpc_pdev);
 
 		/* Destroy VBT data */
 		psb_intel_destroy_bios(dev);
diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c
index b1b93317d054..e55733ca46d2 100644
--- a/drivers/gpu/drm/gma500/psb_intel_lvds.c
+++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c
@@ -561,8 +561,7 @@ void psb_intel_lvds_destroy(struct drm_connector *connector)
 	struct gma_encoder *gma_encoder = gma_attached_encoder(connector);
 	struct psb_intel_lvds_priv *lvds_priv = gma_encoder->dev_priv;
 
-	if (lvds_priv->ddc_bus)
-		psb_intel_i2c_destroy(lvds_priv->ddc_bus);
+	psb_intel_i2c_destroy(lvds_priv->ddc_bus);
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
 	kfree(connector);
@@ -835,11 +834,9 @@ out:
 
 failed_find:
 	mutex_unlock(&dev->mode_config.mutex);
-	if (lvds_priv->ddc_bus)
-		psb_intel_i2c_destroy(lvds_priv->ddc_bus);
+	psb_intel_i2c_destroy(lvds_priv->ddc_bus);
 failed_ddc:
-	if (lvds_priv->i2c_bus)
-		psb_intel_i2c_destroy(lvds_priv->i2c_bus);
+	psb_intel_i2c_destroy(lvds_priv->i2c_bus);
 failed_blc_i2c:
 	drm_encoder_cleanup(encoder);
 	drm_connector_cleanup(connector);
diff --git a/drivers/gpu/drm/hisilicon/kirin/Kconfig b/drivers/gpu/drm/hisilicon/kirin/Kconfig
index ea0df6115f7e..499f64405dac 100644
--- a/drivers/gpu/drm/hisilicon/kirin/Kconfig
+++ b/drivers/gpu/drm/hisilicon/kirin/Kconfig
@@ -4,6 +4,7 @@ config DRM_HISI_KIRIN
 	select DRM_KMS_HELPER
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_CMA_HELPER
+	select HISI_KIRIN_DW_DSI
 	help
 	  Choose this option if you have a hisilicon Kirin chipsets(hi6220).
 	  If M is selected the module will be called kirin-drm.
diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
index ed76baad525f..c3707d47cd89 100644
--- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
+++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
@@ -487,6 +487,7 @@ static void ade_crtc_enable(struct drm_crtc *crtc)
 	ade_set_medianoc_qos(acrtc);
 	ade_display_enable(acrtc);
 	ade_dump_regs(ctx->base);
+	drm_crtc_vblank_on(crtc);
 	acrtc->enable = true;
 }
 
@@ -498,6 +499,7 @@ static void ade_crtc_disable(struct drm_crtc *crtc)
 	if (!acrtc->enable)
 		return;
 
+	drm_crtc_vblank_off(crtc);
 	ade_power_down(ctx);
 	acrtc->enable = false;
 }
@@ -965,21 +967,21 @@ static int ade_dts_parse(struct platform_device *pdev, struct ade_hw_ctx *ctx)
 	}
 
 	ctx->ade_core_clk = devm_clk_get(dev, "clk_ade_core");
-	if (!ctx->ade_core_clk) {
+	if (IS_ERR(ctx->ade_core_clk)) {
 		DRM_ERROR("failed to parse clk ADE_CORE\n");
-		return -ENODEV;
+		return PTR_ERR(ctx->ade_core_clk);
 	}
 
 	ctx->media_noc_clk = devm_clk_get(dev, "clk_codec_jpeg");
-	if (!ctx->media_noc_clk) {
+	if (IS_ERR(ctx->media_noc_clk)) {
 		DRM_ERROR("failed to parse clk CODEC_JPEG\n");
-	    return -ENODEV;
+		return PTR_ERR(ctx->media_noc_clk);
 	}
 
 	ctx->ade_pix_clk = devm_clk_get(dev, "clk_ade_pix");
-	if (!ctx->ade_pix_clk) {
+	if (IS_ERR(ctx->ade_pix_clk)) {
 		DRM_ERROR("failed to parse clk ADE_PIX\n");
-	    return -ENODEV;
+		return PTR_ERR(ctx->ade_pix_clk);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i2c/Kconfig b/drivers/gpu/drm/i2c/Kconfig
index 22c7ed63a001..4d341db462a2 100644
--- a/drivers/gpu/drm/i2c/Kconfig
+++ b/drivers/gpu/drm/i2c/Kconfig
@@ -1,12 +1,6 @@
 menu "I2C encoder or helper chips"
      depends on DRM && DRM_KMS_HELPER && I2C
 
-config DRM_I2C_ADV7511
-	tristate "AV7511 encoder"
-	select REGMAP_I2C
-	help
-	  Support for the Analog Device ADV7511(W) and ADV7513 HDMI encoders.
-
 config DRM_I2C_CH7006
 	tristate "Chrontel ch7006 TV encoder"
 	default m if DRM_NOUVEAU
diff --git a/drivers/gpu/drm/i2c/Makefile b/drivers/gpu/drm/i2c/Makefile
index 2c72eb584ab7..43aa33baebed 100644
--- a/drivers/gpu/drm/i2c/Makefile
+++ b/drivers/gpu/drm/i2c/Makefile
@@ -1,7 +1,5 @@
 ccflags-y := -Iinclude/drm
 
-obj-$(CONFIG_DRM_I2C_ADV7511) += adv7511.o
-
 ch7006-y := ch7006_drv.o ch7006_mode.o
 obj-$(CONFIG_DRM_I2C_CH7006) += ch7006.o
 
diff --git a/drivers/gpu/drm/i2c/ch7006_drv.c b/drivers/gpu/drm/i2c/ch7006_drv.c
index 0594c45f7164..e9e8ae2ec06b 100644
--- a/drivers/gpu/drm/i2c/ch7006_drv.c
+++ b/drivers/gpu/drm/i2c/ch7006_drv.c
@@ -361,13 +361,8 @@ static int ch7006_encoder_set_property(struct drm_encoder *encoder,
 
 		/* Disable the crtc to ensure a full modeset is
 		 * performed whenever it's turned on again. */
-		if (crtc) {
-			struct drm_mode_set modeset = {
-				.crtc = crtc,
-			};
-
-			drm_mode_set_config_internal(&modeset);
-		}
+		if (crtc)
+			drm_crtc_force_disable(crtc);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 1ea0e1f43397..fb27d187876c 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -34,15 +34,15 @@
  * low-power state and comes back to normal.
  */
 
-#define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin"
+#define I915_CSR_KBL "i915/kbl_dmc_ver1.bin"
 MODULE_FIRMWARE(I915_CSR_KBL);
 #define KBL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 1)
 
-#define I915_CSR_SKL "i915/skl_dmc_ver1_26.bin"
+#define I915_CSR_SKL "i915/skl_dmc_ver1.bin"
 MODULE_FIRMWARE(I915_CSR_SKL);
-#define SKL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 26)
+#define SKL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 23)
 
-#define I915_CSR_BXT "i915/bxt_dmc_ver1_07.bin"
+#define I915_CSR_BXT "i915/bxt_dmc_ver1.bin"
 MODULE_FIRMWARE(I915_CSR_BXT);
 #define BXT_CSR_VERSION_REQUIRED	CSR_VERSION(1, 7)
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 361977fd1170..a07da548ff49 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -928,6 +928,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
 						uint32_t *batch,
 						uint32_t index)
 {
+	struct drm_i915_private *dev_priv = engine->i915;
 	uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
 
 	/*
@@ -936,8 +937,8 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine,
 	 * this batch updates GEN8_L3SQCREG4 with default value we need to
 	 * set this bit here to retain the WA during flush.
 	 */
-	if (IS_SKL_REVID(engine->i915, 0, SKL_REVID_E0) ||
-	    IS_KBL_REVID(engine->i915, 0, KBL_REVID_E0))
+	if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_E0) ||
+	    IS_KBL_REVID(dev_priv, 0, KBL_REVID_E0))
 		l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
 
 	wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
@@ -1077,11 +1078,12 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
 				    uint32_t *offset)
 {
 	int ret;
+	struct drm_i915_private *dev_priv = engine->i915;
 	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
 
 	/* WaDisableCtxRestoreArbitration:skl,bxt */
-	if (IS_SKL_REVID(engine->i915, 0, SKL_REVID_D0) ||
-	    IS_BXT_REVID(engine->i915, 0, BXT_REVID_A1))
+	if (IS_SKL_REVID(dev_priv, 0, SKL_REVID_D0) ||
+	    IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1))
 		wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
 
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */
@@ -1099,7 +1101,7 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine,
 
 	/* WaClearSlmSpaceAtContextSwitch:kbl */
 	/* Actual scratch location is at 128 bytes offset */
-	if (IS_KBL_REVID(engine->i915, 0, KBL_REVID_A0)) {
+	if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) {
 		uint32_t scratch_addr
 			= engine->scratch.gtt_offset + 2*CACHELINE_BYTES;
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 948f24418eac..6bd352a8f30e 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -57,7 +57,7 @@
 
 static void gen9_init_clock_gating(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	/* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl */
 	I915_WRITE(CHICKEN_PAR1_1,
@@ -7044,7 +7044,7 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv,
 
 static void kabylake_init_clock_gating(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	gen9_init_clock_gating(dev);
 
@@ -7065,7 +7065,7 @@ static void kabylake_init_clock_gating(struct drm_device *dev)
 
 static void skylake_init_clock_gating(struct drm_device *dev)
 {
-	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	gen9_init_clock_gating(dev);
 
diff --git a/drivers/gpu/drm/imx/Kconfig b/drivers/gpu/drm/imx/Kconfig
index a1844b50546c..f2c9ae822149 100644
--- a/drivers/gpu/drm/imx/Kconfig
+++ b/drivers/gpu/drm/imx/Kconfig
@@ -1,7 +1,6 @@
 config DRM_IMX
 	tristate "DRM Support for Freescale i.MX"
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select VIDEOMODE_HELPERS
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_CMA_HELPER
diff --git a/drivers/gpu/drm/imx/dw_hdmi-imx.c b/drivers/gpu/drm/imx/dw_hdmi-imx.c
index a24631fdf4ad..359cd2765552 100644
--- a/drivers/gpu/drm/imx/dw_hdmi-imx.c
+++ b/drivers/gpu/drm/imx/dw_hdmi-imx.c
@@ -28,6 +28,11 @@ struct imx_hdmi {
 	struct regmap *regmap;
 };
 
+static inline struct imx_hdmi *enc_to_imx_hdmi(struct drm_encoder *e)
+{
+	return container_of(e, struct imx_hdmi, encoder);
+}
+
 static const struct dw_hdmi_mpll_config imx_mpll_cfg[] = {
 	{
 		45250000, {
@@ -109,15 +114,9 @@ static void dw_hdmi_imx_encoder_disable(struct drm_encoder *encoder)
 {
 }
 
-static void dw_hdmi_imx_encoder_mode_set(struct drm_encoder *encoder,
-					 struct drm_display_mode *mode,
-					 struct drm_display_mode *adj_mode)
+static void dw_hdmi_imx_encoder_enable(struct drm_encoder *encoder)
 {
-}
-
-static void dw_hdmi_imx_encoder_commit(struct drm_encoder *encoder)
-{
-	struct imx_hdmi *hdmi = container_of(encoder, struct imx_hdmi, encoder);
+	struct imx_hdmi *hdmi = enc_to_imx_hdmi(encoder);
 	int mux = drm_of_encoder_active_port_id(hdmi->dev->of_node, encoder);
 
 	regmap_update_bits(hdmi->regmap, IOMUXC_GPR3,
@@ -125,16 +124,23 @@ static void dw_hdmi_imx_encoder_commit(struct drm_encoder *encoder)
 			   mux << IMX6Q_GPR3_HDMI_MUX_CTL_SHIFT);
 }
 
-static void dw_hdmi_imx_encoder_prepare(struct drm_encoder *encoder)
+static int dw_hdmi_imx_atomic_check(struct drm_encoder *encoder,
+				    struct drm_crtc_state *crtc_state,
+				    struct drm_connector_state *conn_state)
 {
-	imx_drm_set_bus_format(encoder, MEDIA_BUS_FMT_RGB888_1X24);
+	struct imx_crtc_state *imx_crtc_state = to_imx_crtc_state(crtc_state);
+
+	imx_crtc_state->bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+	imx_crtc_state->di_hsync_pin = 2;
+	imx_crtc_state->di_vsync_pin = 3;
+
+	return 0;
 }
 
 static const struct drm_encoder_helper_funcs dw_hdmi_imx_encoder_helper_funcs = {
-	.mode_set   = dw_hdmi_imx_encoder_mode_set,
-	.prepare    = dw_hdmi_imx_encoder_prepare,
-	.commit     = dw_hdmi_imx_encoder_commit,
+	.enable     = dw_hdmi_imx_encoder_enable,
 	.disable    = dw_hdmi_imx_encoder_disable,
+	.atomic_check = dw_hdmi_imx_atomic_check,
 };
 
 static const struct drm_encoder_funcs dw_hdmi_imx_encoder_funcs = {
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 7746418a4c08..9f7dafce3a4c 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -15,10 +15,14 @@
  */
 #include <linux/component.h>
 #include <linux/device.h>
+#include <linux/dma-buf.h>
 #include <linux/fb.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <linux/reservation.h>
 #include <drm/drmP.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_gem_cma_helper.h>
@@ -41,6 +45,7 @@ struct imx_drm_device {
 	struct imx_drm_crtc			*crtc[MAX_CRTC];
 	unsigned int				pipes;
 	struct drm_fbdev_cma			*fbhelper;
+	struct drm_atomic_state			*state;
 };
 
 struct imx_drm_crtc {
@@ -85,45 +90,6 @@ static int imx_drm_driver_unload(struct drm_device *drm)
 	return 0;
 }
 
-static struct imx_drm_crtc *imx_drm_find_crtc(struct drm_crtc *crtc)
-{
-	struct imx_drm_device *imxdrm = crtc->dev->dev_private;
-	unsigned i;
-
-	for (i = 0; i < MAX_CRTC; i++)
-		if (imxdrm->crtc[i] && imxdrm->crtc[i]->crtc == crtc)
-			return imxdrm->crtc[i];
-
-	return NULL;
-}
-
-int imx_drm_set_bus_config(struct drm_encoder *encoder, u32 bus_format,
-		int hsync_pin, int vsync_pin, u32 bus_flags)
-{
-	struct imx_drm_crtc_helper_funcs *helper;
-	struct imx_drm_crtc *imx_crtc;
-
-	imx_crtc = imx_drm_find_crtc(encoder->crtc);
-	if (!imx_crtc)
-		return -EINVAL;
-
-	helper = &imx_crtc->imx_drm_helper_funcs;
-	if (helper->set_interface_pix_fmt)
-		return helper->set_interface_pix_fmt(encoder->crtc,
-					bus_format, hsync_pin, vsync_pin,
-					bus_flags);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(imx_drm_set_bus_config);
-
-int imx_drm_set_bus_format(struct drm_encoder *encoder, u32 bus_format)
-{
-	return imx_drm_set_bus_config(encoder, bus_format, 2, 3,
-				      DRM_BUS_FLAG_DE_HIGH |
-				      DRM_BUS_FLAG_PIXDATA_NEGEDGE);
-}
-EXPORT_SYMBOL_GPL(imx_drm_set_bus_format);
-
 int imx_drm_crtc_vblank_get(struct imx_drm_crtc *imx_drm_crtc)
 {
 	return drm_crtc_vblank_get(imx_drm_crtc->crtc);
@@ -208,6 +174,63 @@ static void imx_drm_output_poll_changed(struct drm_device *drm)
 static const struct drm_mode_config_funcs imx_drm_mode_config_funcs = {
 	.fb_create = drm_fb_cma_create,
 	.output_poll_changed = imx_drm_output_poll_changed,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = drm_atomic_helper_commit,
+};
+
+static void imx_drm_atomic_commit_tail(struct drm_atomic_state *state)
+{
+	struct drm_device *dev = state->dev;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *crtc_state;
+	struct drm_plane_state *plane_state;
+	struct drm_gem_cma_object *cma_obj;
+	struct fence *excl;
+	unsigned shared_count;
+	struct fence **shared;
+	unsigned int i, j;
+	int ret;
+
+	/* Wait for fences. */
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		plane_state = crtc->primary->state;
+		if (plane_state->fb) {
+			cma_obj = drm_fb_cma_get_gem_obj(plane_state->fb, 0);
+			if (cma_obj->base.dma_buf) {
+				ret = reservation_object_get_fences_rcu(
+					cma_obj->base.dma_buf->resv, &excl,
+					&shared_count, &shared);
+				if (unlikely(ret))
+					DRM_ERROR("failed to get fences "
+						  "for buffer\n");
+
+				if (excl) {
+					fence_wait(excl, false);
+					fence_put(excl);
+				}
+				for (j = 0; j < shared_count; i++) {
+					fence_wait(shared[j], false);
+					fence_put(shared[j]);
+				}
+			}
+		}
+	}
+
+	drm_atomic_helper_commit_modeset_disables(dev, state);
+
+	drm_atomic_helper_commit_planes(dev, state, true);
+
+	drm_atomic_helper_commit_modeset_enables(dev, state);
+
+	drm_atomic_helper_commit_hw_done(state);
+
+	drm_atomic_helper_wait_for_vblanks(dev, state);
+
+	drm_atomic_helper_cleanup_planes(dev, state);
+}
+
+static struct drm_mode_config_helper_funcs imx_drm_mode_config_helpers = {
+	.atomic_commit_tail = imx_drm_atomic_commit_tail,
 };
 
 /*
@@ -249,6 +272,7 @@ static int imx_drm_driver_load(struct drm_device *drm, unsigned long flags)
 	drm->mode_config.max_width = 4096;
 	drm->mode_config.max_height = 4096;
 	drm->mode_config.funcs = &imx_drm_mode_config_funcs;
+	drm->mode_config.helper_private = &imx_drm_mode_config_helpers;
 
 	drm_mode_config_init(drm);
 
@@ -279,6 +303,8 @@ static int imx_drm_driver_load(struct drm_device *drm, unsigned long flags)
 		}
 	}
 
+	drm_mode_config_reset(drm);
+
 	/*
 	 * All components are now initialised, so setup the fb helper.
 	 * The fb helper takes copies of key hardware information, so the
@@ -289,7 +315,6 @@ static int imx_drm_driver_load(struct drm_device *drm, unsigned long flags)
 		dev_warn(drm->dev, "Invalid legacyfb_depth.  Defaulting to 16bpp\n");
 		legacyfb_depth = 16;
 	}
-	drm_helper_disable_unused_functions(drm);
 	imxdrm->fbhelper = drm_fbdev_cma_init(drm, legacyfb_depth,
 				drm->mode_config.num_crtc, MAX_CRTC);
 	if (IS_ERR(imxdrm->fbhelper)) {
@@ -403,7 +428,8 @@ static const struct drm_ioctl_desc imx_drm_ioctls[] = {
 };
 
 static struct drm_driver imx_drm_driver = {
-	.driver_features	= DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME,
+	.driver_features	= DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME |
+				  DRIVER_ATOMIC,
 	.load			= imx_drm_driver_load,
 	.unload			= imx_drm_driver_unload,
 	.lastclose		= imx_drm_driver_lastclose,
@@ -491,6 +517,7 @@ static int imx_drm_platform_remove(struct platform_device *pdev)
 static int imx_drm_suspend(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
+	struct imx_drm_device *imxdrm;
 
 	/* The drm_dev is NULL before .load hook is called */
 	if (drm_dev == NULL)
@@ -498,17 +525,26 @@ static int imx_drm_suspend(struct device *dev)
 
 	drm_kms_helper_poll_disable(drm_dev);
 
+	imxdrm = drm_dev->dev_private;
+	imxdrm->state = drm_atomic_helper_suspend(drm_dev);
+	if (IS_ERR(imxdrm->state)) {
+		drm_kms_helper_poll_enable(drm_dev);
+		return PTR_ERR(imxdrm->state);
+	}
+
 	return 0;
 }
 
 static int imx_drm_resume(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
+	struct imx_drm_device *imx_drm;
 
 	if (drm_dev == NULL)
 		return 0;
 
-	drm_helper_resume_force_mode(drm_dev);
+	imx_drm = drm_dev->dev_private;
+	drm_atomic_helper_resume(drm_dev, imx_drm->state);
 	drm_kms_helper_poll_enable(drm_dev);
 
 	return 0;
diff --git a/drivers/gpu/drm/imx/imx-drm.h b/drivers/gpu/drm/imx/imx-drm.h
index 74320a1723b7..07d33e45f90f 100644
--- a/drivers/gpu/drm/imx/imx-drm.h
+++ b/drivers/gpu/drm/imx/imx-drm.h
@@ -15,12 +15,22 @@ struct platform_device;
 
 unsigned int imx_drm_crtc_id(struct imx_drm_crtc *crtc);
 
+struct imx_crtc_state {
+	struct drm_crtc_state			base;
+	u32					bus_format;
+	u32					bus_flags;
+	int					di_hsync_pin;
+	int					di_vsync_pin;
+};
+
+static inline struct imx_crtc_state *to_imx_crtc_state(struct drm_crtc_state *s)
+{
+	return container_of(s, struct imx_crtc_state, base);
+}
+
 struct imx_drm_crtc_helper_funcs {
 	int (*enable_vblank)(struct drm_crtc *crtc);
 	void (*disable_vblank)(struct drm_crtc *crtc);
-	int (*set_interface_pix_fmt)(struct drm_crtc *crtc,
-			u32 bus_format, int hsync_pin, int vsync_pin,
-			u32 bus_flags);
 	const struct drm_crtc_helper_funcs *crtc_helper_funcs;
 	const struct drm_crtc_funcs *crtc_funcs;
 };
@@ -42,11 +52,6 @@ void imx_drm_mode_config_init(struct drm_device *drm);
 
 struct drm_gem_cma_object *imx_drm_fb_get_obj(struct drm_framebuffer *fb);
 
-int imx_drm_set_bus_config(struct drm_encoder *encoder, u32 bus_format,
-		int hsync_pin, int vsync_pin, u32 bus_flags);
-int imx_drm_set_bus_format(struct drm_encoder *encoder,
-		u32 bus_format);
-
 int imx_drm_encoder_parse_of(struct drm_device *drm,
 	struct drm_encoder *encoder, struct device_node *np);
 
diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
index beff793bb717..b03919ed60ba 100644
--- a/drivers/gpu/drm/imx/imx-ldb.c
+++ b/drivers/gpu/drm/imx/imx-ldb.c
@@ -17,6 +17,8 @@
 #include <linux/clk.h>
 #include <linux/component.h>
 #include <drm/drmP.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_of.h>
@@ -49,9 +51,6 @@
 #define LDB_DI1_VS_POL_ACT_LOW		(1 << 10)
 #define LDB_BGREF_RMODE_INT		(1 << 15)
 
-#define con_to_imx_ldb_ch(x) container_of(x, struct imx_ldb_channel, connector)
-#define enc_to_imx_ldb_ch(x) container_of(x, struct imx_ldb_channel, encoder)
-
 struct imx_ldb;
 
 struct imx_ldb_channel {
@@ -66,9 +65,19 @@ struct imx_ldb_channel {
 	int edid_len;
 	struct drm_display_mode mode;
 	int mode_valid;
-	int bus_format;
+	u32 bus_format;
 };
 
+static inline struct imx_ldb_channel *con_to_imx_ldb_ch(struct drm_connector *c)
+{
+	return container_of(c, struct imx_ldb_channel, connector);
+}
+
+static inline struct imx_ldb_channel *enc_to_imx_ldb_ch(struct drm_encoder *e)
+{
+	return container_of(e, struct imx_ldb_channel, encoder);
+}
+
 struct bus_mux {
 	int reg;
 	int shift;
@@ -93,6 +102,32 @@ static enum drm_connector_status imx_ldb_connector_detect(
 	return connector_status_connected;
 }
 
+static void imx_ldb_ch_set_bus_format(struct imx_ldb_channel *imx_ldb_ch,
+				      u32 bus_format)
+{
+	struct imx_ldb *ldb = imx_ldb_ch->ldb;
+	int dual = ldb->ldb_ctrl & LDB_SPLIT_MODE_EN;
+
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_RGB666_1X7X3_SPWG:
+		break;
+	case MEDIA_BUS_FMT_RGB888_1X7X4_SPWG:
+		if (imx_ldb_ch->chno == 0 || dual)
+			ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH0_24;
+		if (imx_ldb_ch->chno == 1 || dual)
+			ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH1_24;
+		break;
+	case MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA:
+		if (imx_ldb_ch->chno == 0 || dual)
+			ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH0_24 |
+					 LDB_BIT_MAP_CH0_JEIDA;
+		if (imx_ldb_ch->chno == 1 || dual)
+			ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH1_24 |
+					 LDB_BIT_MAP_CH1_JEIDA;
+		break;
+	}
+}
+
 static int imx_ldb_connector_get_modes(struct drm_connector *connector)
 {
 	struct imx_ldb_channel *imx_ldb_ch = con_to_imx_ldb_ch(connector);
@@ -100,11 +135,7 @@ static int imx_ldb_connector_get_modes(struct drm_connector *connector)
 
 	if (imx_ldb_ch->panel && imx_ldb_ch->panel->funcs &&
 	    imx_ldb_ch->panel->funcs->get_modes) {
-		struct drm_display_info *di = &connector->display_info;
-
 		num_modes = imx_ldb_ch->panel->funcs->get_modes(imx_ldb_ch->panel);
-		if (!imx_ldb_ch->bus_format && di->num_bus_formats)
-			imx_ldb_ch->bus_format = di->bus_formats[0];
 		if (num_modes > 0)
 			return num_modes;
 	}
@@ -141,10 +172,6 @@ static struct drm_encoder *imx_ldb_connector_best_encoder(
 	return &imx_ldb_ch->encoder;
 }
 
-static void imx_ldb_encoder_dpms(struct drm_encoder *encoder, int mode)
-{
-}
-
 static void imx_ldb_set_clock(struct imx_ldb *ldb, int mux, int chno,
 		unsigned long serial_clk, unsigned long di_clk)
 {
@@ -173,43 +200,7 @@ static void imx_ldb_set_clock(struct imx_ldb *ldb, int mux, int chno,
 			chno);
 }
 
-static void imx_ldb_encoder_prepare(struct drm_encoder *encoder)
-{
-	struct imx_ldb_channel *imx_ldb_ch = enc_to_imx_ldb_ch(encoder);
-	struct imx_ldb *ldb = imx_ldb_ch->ldb;
-	int dual = ldb->ldb_ctrl & LDB_SPLIT_MODE_EN;
-	u32 bus_format;
-
-	switch (imx_ldb_ch->bus_format) {
-	default:
-		dev_warn(ldb->dev,
-			 "could not determine data mapping, default to 18-bit \"spwg\"\n");
-		/* fallthrough */
-	case MEDIA_BUS_FMT_RGB666_1X7X3_SPWG:
-		bus_format = MEDIA_BUS_FMT_RGB666_1X18;
-		break;
-	case MEDIA_BUS_FMT_RGB888_1X7X4_SPWG:
-		bus_format = MEDIA_BUS_FMT_RGB888_1X24;
-		if (imx_ldb_ch->chno == 0 || dual)
-			ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH0_24;
-		if (imx_ldb_ch->chno == 1 || dual)
-			ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH1_24;
-		break;
-	case MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA:
-		bus_format = MEDIA_BUS_FMT_RGB888_1X24;
-		if (imx_ldb_ch->chno == 0 || dual)
-			ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH0_24 |
-					 LDB_BIT_MAP_CH0_JEIDA;
-		if (imx_ldb_ch->chno == 1 || dual)
-			ldb->ldb_ctrl |= LDB_DATA_WIDTH_CH1_24 |
-					 LDB_BIT_MAP_CH1_JEIDA;
-		break;
-	}
-
-	imx_drm_set_bus_format(encoder, bus_format);
-}
-
-static void imx_ldb_encoder_commit(struct drm_encoder *encoder)
+static void imx_ldb_encoder_enable(struct drm_encoder *encoder)
 {
 	struct imx_ldb_channel *imx_ldb_ch = enc_to_imx_ldb_ch(encoder);
 	struct imx_ldb *ldb = imx_ldb_ch->ldb;
@@ -219,8 +210,13 @@ static void imx_ldb_encoder_commit(struct drm_encoder *encoder)
 	drm_panel_prepare(imx_ldb_ch->panel);
 
 	if (dual) {
+		clk_set_parent(ldb->clk_sel[mux], ldb->clk[0]);
+		clk_set_parent(ldb->clk_sel[mux], ldb->clk[1]);
+
 		clk_prepare_enable(ldb->clk[0]);
 		clk_prepare_enable(ldb->clk[1]);
+	} else {
+		clk_set_parent(ldb->clk_sel[mux], ldb->clk[imx_ldb_ch->chno]);
 	}
 
 	if (imx_ldb_ch == &ldb->channel[0] || dual) {
@@ -265,6 +261,7 @@ static void imx_ldb_encoder_mode_set(struct drm_encoder *encoder,
 	unsigned long serial_clk;
 	unsigned long di_clk = mode->clock * 1000;
 	int mux = drm_of_encoder_active_port_id(imx_ldb_ch->child, encoder);
+	u32 bus_format = imx_ldb_ch->bus_format;
 
 	if (mode->clock > 170000) {
 		dev_warn(ldb->dev,
@@ -286,18 +283,33 @@ static void imx_ldb_encoder_mode_set(struct drm_encoder *encoder,
 	}
 
 	/* FIXME - assumes straight connections DI0 --> CH0, DI1 --> CH1 */
-	if (imx_ldb_ch == &ldb->channel[0]) {
+	if (imx_ldb_ch == &ldb->channel[0] || dual) {
 		if (mode->flags & DRM_MODE_FLAG_NVSYNC)
 			ldb->ldb_ctrl |= LDB_DI0_VS_POL_ACT_LOW;
 		else if (mode->flags & DRM_MODE_FLAG_PVSYNC)
 			ldb->ldb_ctrl &= ~LDB_DI0_VS_POL_ACT_LOW;
 	}
-	if (imx_ldb_ch == &ldb->channel[1]) {
+	if (imx_ldb_ch == &ldb->channel[1] || dual) {
 		if (mode->flags & DRM_MODE_FLAG_NVSYNC)
 			ldb->ldb_ctrl |= LDB_DI1_VS_POL_ACT_LOW;
 		else if (mode->flags & DRM_MODE_FLAG_PVSYNC)
 			ldb->ldb_ctrl &= ~LDB_DI1_VS_POL_ACT_LOW;
 	}
+
+	if (!bus_format) {
+		struct drm_connector *connector;
+
+		drm_for_each_connector(connector, encoder->dev) {
+			struct drm_display_info *di = &connector->display_info;
+
+			if (connector->encoder == encoder &&
+			    di->num_bus_formats) {
+				bus_format = di->bus_formats[0];
+				break;
+			}
+		}
+	}
+	imx_ldb_ch_set_bus_format(imx_ldb_ch, bus_format);
 }
 
 static void imx_ldb_encoder_disable(struct drm_encoder *encoder)
@@ -357,11 +369,45 @@ static void imx_ldb_encoder_disable(struct drm_encoder *encoder)
 	drm_panel_unprepare(imx_ldb_ch->panel);
 }
 
+static int imx_ldb_encoder_atomic_check(struct drm_encoder *encoder,
+					struct drm_crtc_state *crtc_state,
+					struct drm_connector_state *conn_state)
+{
+	struct imx_crtc_state *imx_crtc_state = to_imx_crtc_state(crtc_state);
+	struct imx_ldb_channel *imx_ldb_ch = enc_to_imx_ldb_ch(encoder);
+	struct drm_display_info *di = &conn_state->connector->display_info;
+	u32 bus_format = imx_ldb_ch->bus_format;
+
+	/* Bus format description in DT overrides connector display info. */
+	if (!bus_format && di->num_bus_formats)
+		bus_format = di->bus_formats[0];
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_RGB666_1X7X3_SPWG:
+		imx_crtc_state->bus_format = MEDIA_BUS_FMT_RGB666_1X18;
+		break;
+	case MEDIA_BUS_FMT_RGB888_1X7X4_SPWG:
+	case MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA:
+		imx_crtc_state->bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	imx_crtc_state->di_hsync_pin = 2;
+	imx_crtc_state->di_vsync_pin = 3;
+
+	return 0;
+}
+
+
 static const struct drm_connector_funcs imx_ldb_connector_funcs = {
-	.dpms = drm_helper_connector_dpms,
+	.dpms = drm_atomic_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = imx_ldb_connector_detect,
 	.destroy = imx_drm_connector_destroy,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
 static const struct drm_connector_helper_funcs imx_ldb_connector_helper_funcs = {
@@ -374,11 +420,10 @@ static const struct drm_encoder_funcs imx_ldb_encoder_funcs = {
 };
 
 static const struct drm_encoder_helper_funcs imx_ldb_encoder_helper_funcs = {
-	.dpms = imx_ldb_encoder_dpms,
-	.prepare = imx_ldb_encoder_prepare,
-	.commit = imx_ldb_encoder_commit,
 	.mode_set = imx_ldb_encoder_mode_set,
+	.enable = imx_ldb_encoder_enable,
 	.disable = imx_ldb_encoder_disable,
+	.atomic_check = imx_ldb_encoder_atomic_check,
 };
 
 static int imx_ldb_get_clk(struct imx_ldb *ldb, int chno)
@@ -400,10 +445,10 @@ static int imx_ldb_register(struct drm_device *drm,
 	struct imx_ldb_channel *imx_ldb_ch)
 {
 	struct imx_ldb *ldb = imx_ldb_ch->ldb;
+	struct drm_encoder *encoder = &imx_ldb_ch->encoder;
 	int ret;
 
-	ret = imx_drm_encoder_parse_of(drm, &imx_ldb_ch->encoder,
-				       imx_ldb_ch->child);
+	ret = imx_drm_encoder_parse_of(drm, encoder, imx_ldb_ch->child);
 	if (ret)
 		return ret;
 
@@ -417,9 +462,8 @@ static int imx_ldb_register(struct drm_device *drm,
 			return ret;
 	}
 
-	drm_encoder_helper_add(&imx_ldb_ch->encoder,
-			&imx_ldb_encoder_helper_funcs);
-	drm_encoder_init(drm, &imx_ldb_ch->encoder, &imx_ldb_encoder_funcs,
+	drm_encoder_helper_add(encoder, &imx_ldb_encoder_helper_funcs);
+	drm_encoder_init(drm, encoder, &imx_ldb_encoder_funcs,
 			 DRM_MODE_ENCODER_LVDS, NULL);
 
 	drm_connector_helper_add(&imx_ldb_ch->connector,
@@ -427,11 +471,14 @@ static int imx_ldb_register(struct drm_device *drm,
 	drm_connector_init(drm, &imx_ldb_ch->connector,
 			   &imx_ldb_connector_funcs, DRM_MODE_CONNECTOR_LVDS);
 
-	if (imx_ldb_ch->panel)
-		drm_panel_attach(imx_ldb_ch->panel, &imx_ldb_ch->connector);
+	if (imx_ldb_ch->panel) {
+		ret = drm_panel_attach(imx_ldb_ch->panel,
+				       &imx_ldb_ch->connector);
+		if (ret)
+			return ret;
+	}
 
-	drm_mode_connector_attach_encoder(&imx_ldb_ch->connector,
-			&imx_ldb_ch->encoder);
+	drm_mode_connector_attach_encoder(&imx_ldb_ch->connector, encoder);
 
 	return 0;
 }
@@ -560,6 +607,7 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
 		struct imx_ldb_channel *channel;
 		struct device_node *ddc_node;
 		struct device_node *ep;
+		int bus_format;
 
 		ret = of_property_read_u32(child, "reg", &i);
 		if (ret || i < 0 || i > 1)
@@ -632,21 +680,22 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
 			}
 		}
 
-		channel->bus_format = of_get_bus_format(dev, child);
-		if (channel->bus_format == -EINVAL) {
+		bus_format = of_get_bus_format(dev, child);
+		if (bus_format == -EINVAL) {
 			/*
 			 * If no bus format was specified in the device tree,
 			 * we can still get it from the connected panel later.
 			 */
 			if (channel->panel && channel->panel->funcs &&
 			    channel->panel->funcs->get_modes)
-				channel->bus_format = 0;
+				bus_format = 0;
 		}
-		if (channel->bus_format < 0) {
+		if (bus_format < 0) {
 			dev_err(dev, "could not determine data mapping: %d\n",
-				channel->bus_format);
-			return channel->bus_format;
+				bus_format);
+			return bus_format;
 		}
+		channel->bus_format = bus_format;
 
 		ret = imx_ldb_register(drm, channel);
 		if (ret)
diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c
index baf788121287..5e875944ffa2 100644
--- a/drivers/gpu/drm/imx/imx-tve.c
+++ b/drivers/gpu/drm/imx/imx-tve.c
@@ -23,6 +23,7 @@
 #include <linux/spinlock.h>
 #include <linux/videodev2.h>
 #include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <video/imx-ipu-v3.h>
@@ -97,9 +98,6 @@
 /* TVE_TST_MODE_REG */
 #define TVE_TVDAC_TEST_MODE_MASK	(0x7 << 0)
 
-#define con_to_tve(x) container_of(x, struct imx_tve, connector)
-#define enc_to_tve(x) container_of(x, struct imx_tve, encoder)
-
 enum {
 	TVE_MODE_TVOUT,
 	TVE_MODE_VGA,
@@ -112,6 +110,8 @@ struct imx_tve {
 	spinlock_t lock;	/* register lock */
 	bool enabled;
 	int mode;
+	int di_hsync_pin;
+	int di_vsync_pin;
 
 	struct regmap *regmap;
 	struct regulator *dac_reg;
@@ -120,10 +120,18 @@ struct imx_tve {
 	struct clk *di_sel_clk;
 	struct clk_hw clk_hw_di;
 	struct clk *di_clk;
-	int vsync_pin;
-	int hsync_pin;
 };
 
+static inline struct imx_tve *con_to_tve(struct drm_connector *c)
+{
+	return container_of(c, struct imx_tve, connector);
+}
+
+static inline struct imx_tve *enc_to_tve(struct drm_encoder *e)
+{
+	return container_of(e, struct imx_tve, encoder);
+}
+
 static void tve_lock(void *__tve)
 __acquires(&tve->lock)
 {
@@ -148,8 +156,7 @@ static void tve_enable(struct imx_tve *tve)
 		tve->enabled = true;
 		clk_prepare_enable(tve->clk);
 		ret = regmap_update_bits(tve->regmap, TVE_COM_CONF_REG,
-					 TVE_IPU_CLK_EN | TVE_EN,
-					 TVE_IPU_CLK_EN | TVE_EN);
+					 TVE_EN, TVE_EN);
 	}
 
 	/* clear interrupt status register */
@@ -172,7 +179,7 @@ static void tve_disable(struct imx_tve *tve)
 	if (tve->enabled) {
 		tve->enabled = false;
 		ret = regmap_update_bits(tve->regmap, TVE_COM_CONF_REG,
-					 TVE_IPU_CLK_EN | TVE_EN, 0);
+					 TVE_EN, 0);
 		clk_disable_unprepare(tve->clk);
 	}
 }
@@ -275,36 +282,6 @@ static struct drm_encoder *imx_tve_connector_best_encoder(
 	return &tve->encoder;
 }
 
-static void imx_tve_encoder_dpms(struct drm_encoder *encoder, int mode)
-{
-	struct imx_tve *tve = enc_to_tve(encoder);
-	int ret;
-
-	ret = regmap_update_bits(tve->regmap, TVE_COM_CONF_REG,
-				 TVE_TV_OUT_MODE_MASK, TVE_TV_OUT_DISABLE);
-	if (ret < 0)
-		dev_err(tve->dev, "failed to disable TVOUT: %d\n", ret);
-}
-
-static void imx_tve_encoder_prepare(struct drm_encoder *encoder)
-{
-	struct imx_tve *tve = enc_to_tve(encoder);
-
-	tve_disable(tve);
-
-	switch (tve->mode) {
-	case TVE_MODE_VGA:
-		imx_drm_set_bus_config(encoder, MEDIA_BUS_FMT_GBR888_1X24,
-				       tve->hsync_pin, tve->vsync_pin,
-				       DRM_BUS_FLAG_DE_HIGH |
-				       DRM_BUS_FLAG_PIXDATA_NEGEDGE);
-		break;
-	case TVE_MODE_TVOUT:
-		imx_drm_set_bus_format(encoder, MEDIA_BUS_FMT_YUV8_1X24);
-		break;
-	}
-}
-
 static void imx_tve_encoder_mode_set(struct drm_encoder *encoder,
 				     struct drm_display_mode *orig_mode,
 				     struct drm_display_mode *mode)
@@ -333,6 +310,9 @@ static void imx_tve_encoder_mode_set(struct drm_encoder *encoder,
 			ret);
 	}
 
+	regmap_update_bits(tve->regmap, TVE_COM_CONF_REG,
+			   TVE_IPU_CLK_EN, TVE_IPU_CLK_EN);
+
 	if (tve->mode == TVE_MODE_VGA)
 		ret = tve_setup_vga(tve);
 	else
@@ -341,7 +321,7 @@ static void imx_tve_encoder_mode_set(struct drm_encoder *encoder,
 		dev_err(tve->dev, "failed to set configuration: %d\n", ret);
 }
 
-static void imx_tve_encoder_commit(struct drm_encoder *encoder)
+static void imx_tve_encoder_enable(struct drm_encoder *encoder)
 {
 	struct imx_tve *tve = enc_to_tve(encoder);
 
@@ -355,11 +335,28 @@ static void imx_tve_encoder_disable(struct drm_encoder *encoder)
 	tve_disable(tve);
 }
 
+static int imx_tve_atomic_check(struct drm_encoder *encoder,
+				struct drm_crtc_state *crtc_state,
+				struct drm_connector_state *conn_state)
+{
+	struct imx_crtc_state *imx_crtc_state = to_imx_crtc_state(crtc_state);
+	struct imx_tve *tve = enc_to_tve(encoder);
+
+	imx_crtc_state->bus_format = MEDIA_BUS_FMT_GBR888_1X24;
+	imx_crtc_state->di_hsync_pin = tve->di_hsync_pin;
+	imx_crtc_state->di_vsync_pin = tve->di_vsync_pin;
+
+	return 0;
+}
+
 static const struct drm_connector_funcs imx_tve_connector_funcs = {
-	.dpms = drm_helper_connector_dpms,
+	.dpms = drm_atomic_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = imx_tve_connector_detect,
 	.destroy = imx_drm_connector_destroy,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
 static const struct drm_connector_helper_funcs imx_tve_connector_helper_funcs = {
@@ -373,11 +370,10 @@ static const struct drm_encoder_funcs imx_tve_encoder_funcs = {
 };
 
 static const struct drm_encoder_helper_funcs imx_tve_encoder_helper_funcs = {
-	.dpms = imx_tve_encoder_dpms,
-	.prepare = imx_tve_encoder_prepare,
 	.mode_set = imx_tve_encoder_mode_set,
-	.commit = imx_tve_encoder_commit,
+	.enable = imx_tve_encoder_enable,
 	.disable = imx_tve_encoder_disable,
+	.atomic_check = imx_tve_atomic_check,
 };
 
 static irqreturn_t imx_tve_irq_handler(int irq, void *data)
@@ -495,8 +491,7 @@ static int imx_tve_register(struct drm_device *drm, struct imx_tve *tve)
 	encoder_type = tve->mode == TVE_MODE_VGA ?
 				DRM_MODE_ENCODER_DAC : DRM_MODE_ENCODER_TVDAC;
 
-	ret = imx_drm_encoder_parse_of(drm, &tve->encoder,
-				       tve->dev->of_node);
+	ret = imx_drm_encoder_parse_of(drm, &tve->encoder, tve->dev->of_node);
 	if (ret)
 		return ret;
 
@@ -587,15 +582,15 @@ static int imx_tve_bind(struct device *dev, struct device *master, void *data)
 
 	if (tve->mode == TVE_MODE_VGA) {
 		ret = of_property_read_u32(np, "fsl,hsync-pin",
-					   &tve->hsync_pin);
+					   &tve->di_hsync_pin);
 
 		if (ret < 0) {
-			dev_err(dev, "failed to get vsync pin\n");
+			dev_err(dev, "failed to get hsync pin\n");
 			return ret;
 		}
 
-		ret |= of_property_read_u32(np, "fsl,vsync-pin",
-					    &tve->vsync_pin);
+		ret = of_property_read_u32(np, "fsl,vsync-pin",
+					   &tve->di_vsync_pin);
 
 		if (ret < 0) {
 			dev_err(dev, "failed to get vsync pin\n");
@@ -633,7 +628,9 @@ static int imx_tve_bind(struct device *dev, struct device *master, void *data)
 
 	tve->dac_reg = devm_regulator_get(dev, "dac");
 	if (!IS_ERR(tve->dac_reg)) {
-		regulator_set_voltage(tve->dac_reg, 2750000, 2750000);
+		ret = regulator_set_voltage(tve->dac_reg, 2750000, 2750000);
+		if (ret)
+			return ret;
 		ret = regulator_enable(tve->dac_reg);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index fc040417e1e8..08e188bc10fc 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -18,12 +18,12 @@
 #include <linux/device.h>
 #include <linux/platform_device.h>
 #include <drm/drmP.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <linux/fb.h>
 #include <linux/clk.h>
 #include <linux/errno.h>
-#include <linux/reservation.h>
-#include <linux/dma-buf.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 
@@ -33,23 +33,6 @@
 
 #define DRIVER_DESC		"i.MX IPUv3 Graphics"
 
-enum ipu_flip_status {
-	IPU_FLIP_NONE,
-	IPU_FLIP_PENDING,
-	IPU_FLIP_SUBMITTED,
-};
-
-struct ipu_flip_work {
-	struct work_struct		unref_work;
-	struct drm_gem_object		*bo;
-	struct drm_pending_vblank_event *page_flip_event;
-	struct work_struct		fence_work;
-	struct ipu_crtc			*crtc;
-	struct fence			*excl;
-	unsigned			shared_count;
-	struct fence			**shared;
-};
-
 struct ipu_crtc {
 	struct device		*dev;
 	struct drm_crtc		base;
@@ -60,201 +43,166 @@ struct ipu_crtc {
 
 	struct ipu_dc		*dc;
 	struct ipu_di		*di;
-	int			enabled;
-	enum ipu_flip_status	flip_state;
-	struct workqueue_struct *flip_queue;
-	struct ipu_flip_work	*flip_work;
 	int			irq;
-	u32			bus_format;
-	u32			bus_flags;
-	int			di_hsync_pin;
-	int			di_vsync_pin;
 };
 
-#define to_ipu_crtc(x) container_of(x, struct ipu_crtc, base)
+static inline struct ipu_crtc *to_ipu_crtc(struct drm_crtc *crtc)
+{
+	return container_of(crtc, struct ipu_crtc, base);
+}
 
-static void ipu_fb_enable(struct ipu_crtc *ipu_crtc)
+static void ipu_crtc_enable(struct drm_crtc *crtc)
 {
+	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
 	struct ipu_soc *ipu = dev_get_drvdata(ipu_crtc->dev->parent);
 
-	if (ipu_crtc->enabled)
-		return;
-
 	ipu_dc_enable(ipu);
-	ipu_plane_enable(ipu_crtc->plane[0]);
-	/* Start DC channel and DI after IDMAC */
 	ipu_dc_enable_channel(ipu_crtc->dc);
 	ipu_di_enable(ipu_crtc->di);
-	drm_crtc_vblank_on(&ipu_crtc->base);
-
-	ipu_crtc->enabled = 1;
 }
 
-static void ipu_fb_disable(struct ipu_crtc *ipu_crtc)
+static void ipu_crtc_disable(struct drm_crtc *crtc)
 {
+	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
 	struct ipu_soc *ipu = dev_get_drvdata(ipu_crtc->dev->parent);
 
-	if (!ipu_crtc->enabled)
-		return;
-
-	/* Stop DC channel and DI before IDMAC */
 	ipu_dc_disable_channel(ipu_crtc->dc);
 	ipu_di_disable(ipu_crtc->di);
-	ipu_plane_disable(ipu_crtc->plane[0]);
 	ipu_dc_disable(ipu);
-	drm_crtc_vblank_off(&ipu_crtc->base);
 
-	ipu_crtc->enabled = 0;
+	spin_lock_irq(&crtc->dev->event_lock);
+	if (crtc->state->event) {
+		drm_crtc_send_vblank_event(crtc, crtc->state->event);
+		crtc->state->event = NULL;
+	}
+	spin_unlock_irq(&crtc->dev->event_lock);
 }
 
-static void ipu_crtc_dpms(struct drm_crtc *crtc, int mode)
+static void imx_drm_crtc_reset(struct drm_crtc *crtc)
 {
-	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
+	struct imx_crtc_state *state;
 
-	dev_dbg(ipu_crtc->dev, "%s mode: %d\n", __func__, mode);
-
-	switch (mode) {
-	case DRM_MODE_DPMS_ON:
-		ipu_fb_enable(ipu_crtc);
-		break;
-	case DRM_MODE_DPMS_STANDBY:
-	case DRM_MODE_DPMS_SUSPEND:
-	case DRM_MODE_DPMS_OFF:
-		ipu_fb_disable(ipu_crtc);
-		break;
+	if (crtc->state) {
+		if (crtc->state->mode_blob)
+			drm_property_unreference_blob(crtc->state->mode_blob);
+
+		state = to_imx_crtc_state(crtc->state);
+		memset(state, 0, sizeof(*state));
+	} else {
+		state = kzalloc(sizeof(*state), GFP_KERNEL);
+		if (!state)
+			return;
+		crtc->state = &state->base;
 	}
+
+	state->base.crtc = crtc;
 }
 
-static void ipu_flip_unref_work_func(struct work_struct *__work)
+static struct drm_crtc_state *imx_drm_crtc_duplicate_state(struct drm_crtc *crtc)
 {
-	struct ipu_flip_work *work =
-			container_of(__work, struct ipu_flip_work, unref_work);
+	struct imx_crtc_state *state;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return NULL;
 
-	drm_gem_object_unreference_unlocked(work->bo);
-	kfree(work);
+	__drm_atomic_helper_crtc_duplicate_state(crtc, &state->base);
+
+	WARN_ON(state->base.crtc != crtc);
+	state->base.crtc = crtc;
+
+	return &state->base;
 }
 
-static void ipu_flip_fence_work_func(struct work_struct *__work)
+static void imx_drm_crtc_destroy_state(struct drm_crtc *crtc,
+				       struct drm_crtc_state *state)
 {
-	struct ipu_flip_work *work =
-			container_of(__work, struct ipu_flip_work, fence_work);
-	int i;
-
-	/* wait for all fences attached to the FB obj to signal */
-	if (work->excl) {
-		fence_wait(work->excl, false);
-		fence_put(work->excl);
-	}
-	for (i = 0; i < work->shared_count; i++) {
-		fence_wait(work->shared[i], false);
-		fence_put(work->shared[i]);
-	}
+	__drm_atomic_helper_crtc_destroy_state(state);
+	kfree(to_imx_crtc_state(state));
+}
+
+static const struct drm_crtc_funcs ipu_crtc_funcs = {
+	.set_config = drm_atomic_helper_set_config,
+	.destroy = drm_crtc_cleanup,
+	.page_flip = drm_atomic_helper_page_flip,
+	.reset = imx_drm_crtc_reset,
+	.atomic_duplicate_state = imx_drm_crtc_duplicate_state,
+	.atomic_destroy_state = imx_drm_crtc_destroy_state,
+};
 
-	work->crtc->flip_state = IPU_FLIP_SUBMITTED;
+static irqreturn_t ipu_irq_handler(int irq, void *dev_id)
+{
+	struct ipu_crtc *ipu_crtc = dev_id;
+
+	imx_drm_handle_vblank(ipu_crtc->imx_crtc);
+
+	return IRQ_HANDLED;
 }
 
-static int ipu_page_flip(struct drm_crtc *crtc,
-		struct drm_framebuffer *fb,
-		struct drm_pending_vblank_event *event,
-		uint32_t page_flip_flags)
+static bool ipu_crtc_mode_fixup(struct drm_crtc *crtc,
+				  const struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted_mode)
 {
-	struct drm_gem_cma_object *cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
 	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
-	struct ipu_flip_work *flip_work;
+	struct videomode vm;
 	int ret;
 
-	if (ipu_crtc->flip_state != IPU_FLIP_NONE)
-		return -EBUSY;
-
-	ret = imx_drm_crtc_vblank_get(ipu_crtc->imx_crtc);
-	if (ret) {
-		dev_dbg(ipu_crtc->dev, "failed to acquire vblank counter\n");
-		list_del(&event->base.link);
-
-		return ret;
-	}
+	drm_display_mode_to_videomode(adjusted_mode, &vm);
 
-	flip_work = kzalloc(sizeof *flip_work, GFP_KERNEL);
-	if (!flip_work) {
-		ret = -ENOMEM;
-		goto put_vblank;
-	}
-	INIT_WORK(&flip_work->unref_work, ipu_flip_unref_work_func);
-	flip_work->page_flip_event = event;
+	ret = ipu_di_adjust_videomode(ipu_crtc->di, &vm);
+	if (ret)
+		return false;
 
-	/* get BO backing the old framebuffer and take a reference */
-	flip_work->bo = &drm_fb_cma_get_gem_obj(crtc->primary->fb, 0)->base;
-	drm_gem_object_reference(flip_work->bo);
+	if ((vm.vsync_len == 0) || (vm.hsync_len == 0))
+		return false;
 
-	ipu_crtc->flip_work = flip_work;
-	/*
-	 * If the object has a DMABUF attached, we need to wait on its fences
-	 * if there are any.
-	 */
-	if (cma_obj->base.dma_buf) {
-		INIT_WORK(&flip_work->fence_work, ipu_flip_fence_work_func);
-		flip_work->crtc = ipu_crtc;
+	drm_display_mode_from_videomode(&vm, adjusted_mode);
 
-		ret = reservation_object_get_fences_rcu(
-				cma_obj->base.dma_buf->resv, &flip_work->excl,
-				&flip_work->shared_count, &flip_work->shared);
+	return true;
+}
 
-		if (unlikely(ret)) {
-			DRM_ERROR("failed to get fences for buffer\n");
-			goto free_flip_work;
-		}
+static int ipu_crtc_atomic_check(struct drm_crtc *crtc,
+				 struct drm_crtc_state *state)
+{
+	u32 primary_plane_mask = 1 << drm_plane_index(crtc->primary);
 
-		/* No need to queue the worker if the are no fences */
-		if (!flip_work->excl && !flip_work->shared_count) {
-			ipu_crtc->flip_state = IPU_FLIP_SUBMITTED;
-		} else {
-			ipu_crtc->flip_state = IPU_FLIP_PENDING;
-			queue_work(ipu_crtc->flip_queue,
-				   &flip_work->fence_work);
-		}
-	} else {
-		ipu_crtc->flip_state = IPU_FLIP_SUBMITTED;
-	}
+	if (state->active && (primary_plane_mask & state->plane_mask) == 0)
+		return -EINVAL;
 
 	return 0;
-
-free_flip_work:
-	drm_gem_object_unreference_unlocked(flip_work->bo);
-	kfree(flip_work);
-	ipu_crtc->flip_work = NULL;
-put_vblank:
-	imx_drm_crtc_vblank_put(ipu_crtc->imx_crtc);
-
-	return ret;
 }
 
-static const struct drm_crtc_funcs ipu_crtc_funcs = {
-	.set_config = drm_crtc_helper_set_config,
-	.destroy = drm_crtc_cleanup,
-	.page_flip = ipu_page_flip,
-};
+static void ipu_crtc_atomic_begin(struct drm_crtc *crtc,
+				  struct drm_crtc_state *old_crtc_state)
+{
+	spin_lock_irq(&crtc->dev->event_lock);
+	if (crtc->state->event) {
+		WARN_ON(drm_crtc_vblank_get(crtc));
+		drm_crtc_arm_vblank_event(crtc, crtc->state->event);
+		crtc->state->event = NULL;
+	}
+	spin_unlock_irq(&crtc->dev->event_lock);
+}
 
-static int ipu_crtc_mode_set(struct drm_crtc *crtc,
-			       struct drm_display_mode *orig_mode,
-			       struct drm_display_mode *mode,
-			       int x, int y,
-			       struct drm_framebuffer *old_fb)
+static void ipu_crtc_mode_set_nofb(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_encoder *encoder;
 	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
+	struct drm_display_mode *mode = &crtc->state->adjusted_mode;
+	struct imx_crtc_state *imx_crtc_state = to_imx_crtc_state(crtc->state);
 	struct ipu_di_signal_cfg sig_cfg = {};
 	unsigned long encoder_types = 0;
-	int ret;
 
 	dev_dbg(ipu_crtc->dev, "%s: mode->hdisplay: %d\n", __func__,
 			mode->hdisplay);
 	dev_dbg(ipu_crtc->dev, "%s: mode->vdisplay: %d\n", __func__,
 			mode->vdisplay);
 
-	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head)
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		if (encoder->crtc == crtc)
 			encoder_types |= BIT(encoder->encoder_type);
+	}
 
 	dev_dbg(ipu_crtc->dev, "%s: attached to encoder types 0x%lx\n",
 		__func__, encoder_types);
@@ -272,114 +220,30 @@ static int ipu_crtc_mode_set(struct drm_crtc *crtc,
 	else
 		sig_cfg.clkflags = 0;
 
-	sig_cfg.enable_pol = !(ipu_crtc->bus_flags & DRM_BUS_FLAG_DE_LOW);
+	sig_cfg.enable_pol = !(imx_crtc_state->bus_flags & DRM_BUS_FLAG_DE_LOW);
 	/* Default to driving pixel data on negative clock edges */
-	sig_cfg.clk_pol = !!(ipu_crtc->bus_flags &
+	sig_cfg.clk_pol = !!(imx_crtc_state->bus_flags &
 			     DRM_BUS_FLAG_PIXDATA_POSEDGE);
-	sig_cfg.bus_format = ipu_crtc->bus_format;
+	sig_cfg.bus_format = imx_crtc_state->bus_format;
 	sig_cfg.v_to_h_sync = 0;
-	sig_cfg.hsync_pin = ipu_crtc->di_hsync_pin;
-	sig_cfg.vsync_pin = ipu_crtc->di_vsync_pin;
+	sig_cfg.hsync_pin = imx_crtc_state->di_hsync_pin;
+	sig_cfg.vsync_pin = imx_crtc_state->di_vsync_pin;
 
 	drm_display_mode_to_videomode(mode, &sig_cfg.mode);
 
-	ret = ipu_dc_init_sync(ipu_crtc->dc, ipu_crtc->di,
-			       mode->flags & DRM_MODE_FLAG_INTERLACE,
-			       ipu_crtc->bus_format, mode->hdisplay);
-	if (ret) {
-		dev_err(ipu_crtc->dev,
-				"initializing display controller failed with %d\n",
-				ret);
-		return ret;
-	}
-
-	ret = ipu_di_init_sync_panel(ipu_crtc->di, &sig_cfg);
-	if (ret) {
-		dev_err(ipu_crtc->dev,
-				"initializing panel failed with %d\n", ret);
-		return ret;
-	}
-
-	return ipu_plane_mode_set(ipu_crtc->plane[0], crtc, mode,
-				  crtc->primary->fb,
-				  0, 0, mode->hdisplay, mode->vdisplay,
-				  x, y, mode->hdisplay, mode->vdisplay,
-				  mode->flags & DRM_MODE_FLAG_INTERLACE);
-}
-
-static void ipu_crtc_handle_pageflip(struct ipu_crtc *ipu_crtc)
-{
-	unsigned long flags;
-	struct drm_device *drm = ipu_crtc->base.dev;
-	struct ipu_flip_work *work = ipu_crtc->flip_work;
-
-	spin_lock_irqsave(&drm->event_lock, flags);
-	if (work->page_flip_event)
-		drm_crtc_send_vblank_event(&ipu_crtc->base,
-					   work->page_flip_event);
-	imx_drm_crtc_vblank_put(ipu_crtc->imx_crtc);
-	spin_unlock_irqrestore(&drm->event_lock, flags);
-}
-
-static irqreturn_t ipu_irq_handler(int irq, void *dev_id)
-{
-	struct ipu_crtc *ipu_crtc = dev_id;
-
-	imx_drm_handle_vblank(ipu_crtc->imx_crtc);
-
-	if (ipu_crtc->flip_state == IPU_FLIP_SUBMITTED) {
-		struct ipu_plane *plane = ipu_crtc->plane[0];
-
-		ipu_plane_set_base(plane, ipu_crtc->base.primary->fb,
-				   plane->x, plane->y);
-		ipu_crtc_handle_pageflip(ipu_crtc);
-		queue_work(ipu_crtc->flip_queue,
-			   &ipu_crtc->flip_work->unref_work);
-		ipu_crtc->flip_state = IPU_FLIP_NONE;
-	}
-
-	return IRQ_HANDLED;
-}
-
-static bool ipu_crtc_mode_fixup(struct drm_crtc *crtc,
-				  const struct drm_display_mode *mode,
-				  struct drm_display_mode *adjusted_mode)
-{
-	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
-	struct videomode vm;
-	int ret;
-
-	drm_display_mode_to_videomode(adjusted_mode, &vm);
-
-	ret = ipu_di_adjust_videomode(ipu_crtc->di, &vm);
-	if (ret)
-		return false;
-
-	drm_display_mode_from_videomode(&vm, adjusted_mode);
-
-	return true;
-}
-
-static void ipu_crtc_prepare(struct drm_crtc *crtc)
-{
-	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
-
-	ipu_fb_disable(ipu_crtc);
-}
-
-static void ipu_crtc_commit(struct drm_crtc *crtc)
-{
-	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
-
-	ipu_fb_enable(ipu_crtc);
+	ipu_dc_init_sync(ipu_crtc->dc, ipu_crtc->di,
+			 mode->flags & DRM_MODE_FLAG_INTERLACE,
+			 imx_crtc_state->bus_format, mode->hdisplay);
+	ipu_di_init_sync_panel(ipu_crtc->di, &sig_cfg);
 }
 
 static const struct drm_crtc_helper_funcs ipu_helper_funcs = {
-	.dpms = ipu_crtc_dpms,
 	.mode_fixup = ipu_crtc_mode_fixup,
-	.mode_set = ipu_crtc_mode_set,
-	.prepare = ipu_crtc_prepare,
-	.commit = ipu_crtc_commit,
+	.mode_set_nofb = ipu_crtc_mode_set_nofb,
+	.atomic_check = ipu_crtc_atomic_check,
+	.atomic_begin = ipu_crtc_atomic_begin,
+	.disable = ipu_crtc_disable,
+	.enable = ipu_crtc_enable,
 };
 
 static int ipu_enable_vblank(struct drm_crtc *crtc)
@@ -398,23 +262,9 @@ static void ipu_disable_vblank(struct drm_crtc *crtc)
 	disable_irq_nosync(ipu_crtc->irq);
 }
 
-static int ipu_set_interface_pix_fmt(struct drm_crtc *crtc,
-		u32 bus_format, int hsync_pin, int vsync_pin, u32 bus_flags)
-{
-	struct ipu_crtc *ipu_crtc = to_ipu_crtc(crtc);
-
-	ipu_crtc->bus_format = bus_format;
-	ipu_crtc->bus_flags = bus_flags;
-	ipu_crtc->di_hsync_pin = hsync_pin;
-	ipu_crtc->di_vsync_pin = vsync_pin;
-
-	return 0;
-}
-
 static const struct imx_drm_crtc_helper_funcs ipu_crtc_helper_funcs = {
 	.enable_vblank = ipu_enable_vblank,
 	.disable_vblank = ipu_disable_vblank,
-	.set_interface_pix_fmt = ipu_set_interface_pix_fmt,
 	.crtc_funcs = &ipu_crtc_funcs,
 	.crtc_helper_funcs = &ipu_helper_funcs,
 };
@@ -496,8 +346,16 @@ static int ipu_crtc_init(struct ipu_crtc *ipu_crtc,
 						IPU_DP_FLOW_SYNC_FG,
 						drm_crtc_mask(&ipu_crtc->base),
 						DRM_PLANE_TYPE_OVERLAY);
-		if (IS_ERR(ipu_crtc->plane[1]))
+		if (IS_ERR(ipu_crtc->plane[1])) {
 			ipu_crtc->plane[1] = NULL;
+		} else {
+			ret = ipu_plane_get_resources(ipu_crtc->plane[1]);
+			if (ret) {
+				dev_err(ipu_crtc->dev, "getting plane 1 "
+					"resources failed with %d.\n", ret);
+				goto err_put_plane0_res;
+			}
+		}
 	}
 
 	ipu_crtc->irq = ipu_plane_irq(ipu_crtc->plane[0]);
@@ -505,16 +363,17 @@ static int ipu_crtc_init(struct ipu_crtc *ipu_crtc,
 			"imx_drm", ipu_crtc);
 	if (ret < 0) {
 		dev_err(ipu_crtc->dev, "irq request failed with %d.\n", ret);
-		goto err_put_plane_res;
+		goto err_put_plane1_res;
 	}
 	/* Only enable IRQ when we actually need it to trigger work. */
 	disable_irq(ipu_crtc->irq);
 
-	ipu_crtc->flip_queue = create_singlethread_workqueue("ipu-crtc-flip");
-
 	return 0;
 
-err_put_plane_res:
+err_put_plane1_res:
+	if (ipu_crtc->plane[1])
+		ipu_plane_put_resources(ipu_crtc->plane[1]);
+err_put_plane0_res:
 	ipu_plane_put_resources(ipu_crtc->plane[0]);
 err_remove_crtc:
 	imx_drm_remove_crtc(ipu_crtc->imx_crtc);
@@ -553,9 +412,10 @@ static void ipu_drm_unbind(struct device *dev, struct device *master,
 
 	imx_drm_remove_crtc(ipu_crtc->imx_crtc);
 
-	destroy_workqueue(ipu_crtc->flip_queue);
-	ipu_plane_put_resources(ipu_crtc->plane[0]);
 	ipu_put_resources(ipu_crtc);
+	if (ipu_crtc->plane[1])
+		ipu_plane_put_resources(ipu_crtc->plane[1]);
+	ipu_plane_put_resources(ipu_crtc->plane[0]);
 }
 
 static const struct component_ops ipu_crtc_ops = {
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.c b/drivers/gpu/drm/imx/ipuv3-plane.c
index a4bb44118d33..4ad67d015ec7 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.c
+++ b/drivers/gpu/drm/imx/ipuv3-plane.c
@@ -14,13 +14,19 @@
  */
 
 #include <drm/drmP.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_plane_helper.h>
 
 #include "video/imx-ipu-v3.h"
 #include "ipuv3-plane.h"
 
-#define to_ipu_plane(x)	container_of(x, struct ipu_plane, base)
+static inline struct ipu_plane *to_ipu_plane(struct drm_plane *p)
+{
+	return container_of(p, struct ipu_plane, base);
+}
 
 static const uint32_t ipu_plane_formats[] = {
 	DRM_FORMAT_ARGB1555,
@@ -53,62 +59,67 @@ int ipu_plane_irq(struct ipu_plane *ipu_plane)
 				     IPU_IRQ_EOF);
 }
 
-static int calc_vref(struct drm_display_mode *mode)
+static inline unsigned long
+drm_plane_state_to_eba(struct drm_plane_state *state)
 {
-	unsigned long htotal, vtotal;
+	struct drm_framebuffer *fb = state->fb;
+	struct drm_gem_cma_object *cma_obj;
 
-	htotal = mode->htotal;
-	vtotal = mode->vtotal;
+	cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
+	BUG_ON(!cma_obj);
 
-	if (!htotal || !vtotal)
-		return 60;
-
-	return DIV_ROUND_UP(mode->clock * 1000, vtotal * htotal);
+	return cma_obj->paddr + fb->offsets[0] +
+	       fb->pitches[0] * (state->src_y >> 16) +
+	       (fb->bits_per_pixel >> 3) * (state->src_x >> 16);
 }
 
-static inline int calc_bandwidth(int width, int height, unsigned int vref)
+static inline unsigned long
+drm_plane_state_to_ubo(struct drm_plane_state *state)
 {
-	return width * height * vref;
-}
+	struct drm_framebuffer *fb = state->fb;
+	struct drm_gem_cma_object *cma_obj;
+	unsigned long eba = drm_plane_state_to_eba(state);
 
-int ipu_plane_set_base(struct ipu_plane *ipu_plane, struct drm_framebuffer *fb,
-		       int x, int y)
-{
-	struct drm_gem_cma_object *cma_obj[3];
-	unsigned long eba, ubo, vbo;
-	int active, i;
+	cma_obj = drm_fb_cma_get_gem_obj(fb, 1);
+	BUG_ON(!cma_obj);
 
-	for (i = 0; i < drm_format_num_planes(fb->pixel_format); i++) {
-		cma_obj[i] = drm_fb_cma_get_gem_obj(fb, i);
-		if (!cma_obj[i]) {
-			DRM_DEBUG_KMS("plane %d entry is null.\n", i);
-			return -EFAULT;
-		}
-	}
+	return cma_obj->paddr + fb->offsets[1] +
+	       fb->pitches[1] * (state->src_y >> 16) / 2 +
+	       (state->src_x >> 16) / 2 - eba;
+}
 
-	eba = cma_obj[0]->paddr + fb->offsets[0] +
-	      fb->pitches[0] * y + (fb->bits_per_pixel >> 3) * x;
+static inline unsigned long
+drm_plane_state_to_vbo(struct drm_plane_state *state)
+{
+	struct drm_framebuffer *fb = state->fb;
+	struct drm_gem_cma_object *cma_obj;
+	unsigned long eba = drm_plane_state_to_eba(state);
 
-	if (eba & 0x7) {
-		DRM_DEBUG_KMS("base address must be a multiple of 8.\n");
-		return -EINVAL;
-	}
+	cma_obj = drm_fb_cma_get_gem_obj(fb, 2);
+	BUG_ON(!cma_obj);
 
-	if (fb->pitches[0] < 1 || fb->pitches[0] > 16384) {
-		DRM_DEBUG_KMS("pitches out of range.\n");
-		return -EINVAL;
-	}
+	return cma_obj->paddr + fb->offsets[2] +
+	       fb->pitches[2] * (state->src_y >> 16) / 2 +
+	       (state->src_x >> 16) / 2 - eba;
+}
 
-	if (ipu_plane->enabled && fb->pitches[0] != ipu_plane->stride[0]) {
-		DRM_DEBUG_KMS("pitches must not change while plane is enabled.\n");
-		return -EINVAL;
-	}
+static void ipu_plane_atomic_set_base(struct ipu_plane *ipu_plane,
+				      struct drm_plane_state *old_state)
+{
+	struct drm_plane *plane = &ipu_plane->base;
+	struct drm_plane_state *state = plane->state;
+	struct drm_framebuffer *fb = state->fb;
+	unsigned long eba, ubo, vbo;
+	int active;
 
-	ipu_plane->stride[0] = fb->pitches[0];
+	eba = drm_plane_state_to_eba(state);
 
 	switch (fb->pixel_format) {
 	case DRM_FORMAT_YUV420:
 	case DRM_FORMAT_YVU420:
+		if (old_state->fb)
+			break;
+
 		/*
 		 * Multiplanar formats have to meet the following restrictions:
 		 * - The (up to) three plane addresses are EBA, EBA+UBO, EBA+VBO
@@ -117,59 +128,28 @@ int ipu_plane_set_base(struct ipu_plane *ipu_plane, struct drm_framebuffer *fb,
 		 * - Only EBA may be changed while scanout is active
 		 * - The strides of U and V planes must be identical.
 		 */
-		ubo = cma_obj[1]->paddr + fb->offsets[1] +
-		      fb->pitches[1] * y / 2 + x / 2 - eba;
-		vbo = cma_obj[2]->paddr + fb->offsets[2] +
-		      fb->pitches[2] * y / 2 + x / 2 - eba;
+		ubo = drm_plane_state_to_ubo(state);
+		vbo = drm_plane_state_to_vbo(state);
 
-		if ((ubo & 0x7) || (vbo & 0x7)) {
-			DRM_DEBUG_KMS("U/V buffer offsets must be a multiple of 8.\n");
-			return -EINVAL;
-		}
-
-		if ((ubo > 0xfffff8) || (vbo > 0xfffff8)) {
-			DRM_DEBUG_KMS("U/V buffer offsets must be positive and not larger than 0xfffff8.\n");
-			return -EINVAL;
-		}
-
-		if (ipu_plane->enabled && ((ipu_plane->u_offset != ubo) ||
-					   (ipu_plane->v_offset != vbo))) {
-			DRM_DEBUG_KMS("U/V buffer offsets must not change while plane is enabled.\n");
-			return -EINVAL;
-		}
-
-		if (fb->pitches[1] != fb->pitches[2]) {
-			DRM_DEBUG_KMS("U/V pitches must be identical.\n");
-			return -EINVAL;
-		}
-
-		if (fb->pitches[1] < 1 || fb->pitches[1] > 16384) {
-			DRM_DEBUG_KMS("U/V pitches out of range.\n");
-			return -EINVAL;
-		}
-
-		if (ipu_plane->enabled &&
-		    (ipu_plane->stride[1] != fb->pitches[1])) {
-			DRM_DEBUG_KMS("U/V pitches must not change while plane is enabled.\n");
-			return -EINVAL;
-		}
-
-		ipu_plane->u_offset = ubo;
-		ipu_plane->v_offset = vbo;
-		ipu_plane->stride[1] = fb->pitches[1];
+		if (fb->pixel_format == DRM_FORMAT_YUV420)
+			ipu_cpmem_set_yuv_planar_full(ipu_plane->ipu_ch,
+						      fb->pitches[1], ubo, vbo);
+		else
+			ipu_cpmem_set_yuv_planar_full(ipu_plane->ipu_ch,
+						      fb->pitches[1], vbo, ubo);
 
 		dev_dbg(ipu_plane->base.dev->dev,
-			"phys = %pad %pad %pad, x = %d, y = %d",
-			&cma_obj[0]->paddr, &cma_obj[1]->paddr,
-			&cma_obj[2]->paddr, x, y);
+			"phy = %lu %lu %lu, x = %d, y = %d", eba, ubo, vbo,
+			state->src_x >> 16, state->src_y >> 16);
 		break;
 	default:
-		dev_dbg(ipu_plane->base.dev->dev, "phys = %pad, x = %d, y = %d",
-			&cma_obj[0]->paddr, x, y);
+		dev_dbg(ipu_plane->base.dev->dev, "phys = %lu, x = %d, y = %d",
+			eba, state->src_x >> 16, state->src_y >> 16);
+
 		break;
 	}
 
-	if (ipu_plane->enabled) {
+	if (old_state->fb) {
 		active = ipu_idmac_get_current_buffer(ipu_plane->ipu_ch);
 		ipu_cpmem_set_buffer(ipu_plane->ipu_ch, !active, eba);
 		ipu_idmac_select_buffer(ipu_plane->ipu_ch, !active);
@@ -177,155 +157,6 @@ int ipu_plane_set_base(struct ipu_plane *ipu_plane, struct drm_framebuffer *fb,
 		ipu_cpmem_set_buffer(ipu_plane->ipu_ch, 0, eba);
 		ipu_cpmem_set_buffer(ipu_plane->ipu_ch, 1, eba);
 	}
-
-	/* cache offsets for subsequent pageflips */
-	ipu_plane->x = x;
-	ipu_plane->y = y;
-
-	return 0;
-}
-
-int ipu_plane_mode_set(struct ipu_plane *ipu_plane, struct drm_crtc *crtc,
-		       struct drm_display_mode *mode,
-		       struct drm_framebuffer *fb, int crtc_x, int crtc_y,
-		       unsigned int crtc_w, unsigned int crtc_h,
-		       uint32_t src_x, uint32_t src_y,
-		       uint32_t src_w, uint32_t src_h, bool interlaced)
-{
-	struct device *dev = ipu_plane->base.dev->dev;
-	int ret;
-
-	/* no scaling */
-	if (src_w != crtc_w || src_h != crtc_h)
-		return -EINVAL;
-
-	/* clip to crtc bounds */
-	if (crtc_x < 0) {
-		if (-crtc_x > crtc_w)
-			return -EINVAL;
-		src_x += -crtc_x;
-		src_w -= -crtc_x;
-		crtc_w -= -crtc_x;
-		crtc_x = 0;
-	}
-	if (crtc_y < 0) {
-		if (-crtc_y > crtc_h)
-			return -EINVAL;
-		src_y += -crtc_y;
-		src_h -= -crtc_y;
-		crtc_h -= -crtc_y;
-		crtc_y = 0;
-	}
-	if (crtc_x + crtc_w > mode->hdisplay) {
-		if (crtc_x > mode->hdisplay)
-			return -EINVAL;
-		crtc_w = mode->hdisplay - crtc_x;
-		src_w = crtc_w;
-	}
-	if (crtc_y + crtc_h > mode->vdisplay) {
-		if (crtc_y > mode->vdisplay)
-			return -EINVAL;
-		crtc_h = mode->vdisplay - crtc_y;
-		src_h = crtc_h;
-	}
-	/* full plane minimum width is 13 pixels */
-	if (crtc_w < 13 && (ipu_plane->dp_flow != IPU_DP_FLOW_SYNC_FG))
-		return -EINVAL;
-	if (crtc_h < 2)
-		return -EINVAL;
-
-	/*
-	 * since we cannot touch active IDMAC channels, we do not support
-	 * resizing the enabled plane or changing its format
-	 */
-	if (ipu_plane->enabled) {
-		if (src_w != ipu_plane->w || src_h != ipu_plane->h ||
-		    fb->pixel_format != ipu_plane->base.fb->pixel_format)
-			return -EINVAL;
-
-		return ipu_plane_set_base(ipu_plane, fb, src_x, src_y);
-	}
-
-	switch (ipu_plane->dp_flow) {
-	case IPU_DP_FLOW_SYNC_BG:
-		ret = ipu_dp_setup_channel(ipu_plane->dp,
-				IPUV3_COLORSPACE_RGB,
-				IPUV3_COLORSPACE_RGB);
-		if (ret) {
-			dev_err(dev,
-				"initializing display processor failed with %d\n",
-				ret);
-			return ret;
-		}
-		ipu_dp_set_global_alpha(ipu_plane->dp, true, 0, true);
-		break;
-	case IPU_DP_FLOW_SYNC_FG:
-		ipu_dp_setup_channel(ipu_plane->dp,
-				ipu_drm_fourcc_to_colorspace(fb->pixel_format),
-				IPUV3_COLORSPACE_UNKNOWN);
-		ipu_dp_set_window_pos(ipu_plane->dp, crtc_x, crtc_y);
-		/* Enable local alpha on partial plane */
-		switch (fb->pixel_format) {
-		case DRM_FORMAT_ARGB1555:
-		case DRM_FORMAT_ABGR1555:
-		case DRM_FORMAT_RGBA5551:
-		case DRM_FORMAT_BGRA5551:
-		case DRM_FORMAT_ARGB4444:
-		case DRM_FORMAT_ARGB8888:
-		case DRM_FORMAT_ABGR8888:
-		case DRM_FORMAT_RGBA8888:
-		case DRM_FORMAT_BGRA8888:
-			ipu_dp_set_global_alpha(ipu_plane->dp, false, 0, false);
-			break;
-		default:
-			break;
-		}
-	}
-
-	ret = ipu_dmfc_alloc_bandwidth(ipu_plane->dmfc,
-			calc_bandwidth(crtc_w, crtc_h,
-				       calc_vref(mode)), 64);
-	if (ret) {
-		dev_err(dev, "allocating dmfc bandwidth failed with %d\n", ret);
-		return ret;
-	}
-
-	ipu_dmfc_config_wait4eot(ipu_plane->dmfc, crtc_w);
-
-	ipu_cpmem_zero(ipu_plane->ipu_ch);
-	ipu_cpmem_set_resolution(ipu_plane->ipu_ch, src_w, src_h);
-	ret = ipu_cpmem_set_fmt(ipu_plane->ipu_ch, fb->pixel_format);
-	if (ret < 0) {
-		dev_err(dev, "unsupported pixel format 0x%08x\n",
-			fb->pixel_format);
-		return ret;
-	}
-	ipu_cpmem_set_high_priority(ipu_plane->ipu_ch);
-	ipu_idmac_set_double_buffer(ipu_plane->ipu_ch, 1);
-	ipu_cpmem_set_stride(ipu_plane->ipu_ch, fb->pitches[0]);
-
-	ret = ipu_plane_set_base(ipu_plane, fb, src_x, src_y);
-	if (ret < 0)
-		return ret;
-	if (interlaced)
-		ipu_cpmem_interlaced_scan(ipu_plane->ipu_ch, fb->pitches[0]);
-
-	if (fb->pixel_format == DRM_FORMAT_YUV420) {
-		ipu_cpmem_set_yuv_planar_full(ipu_plane->ipu_ch,
-					      ipu_plane->stride[1],
-					      ipu_plane->u_offset,
-					      ipu_plane->v_offset);
-	} else if (fb->pixel_format == DRM_FORMAT_YVU420) {
-		ipu_cpmem_set_yuv_planar_full(ipu_plane->ipu_ch,
-					      ipu_plane->stride[1],
-					      ipu_plane->v_offset,
-					      ipu_plane->u_offset);
-	}
-
-	ipu_plane->w = src_w;
-	ipu_plane->h = src_h;
-
-	return 0;
 }
 
 void ipu_plane_put_resources(struct ipu_plane *ipu_plane)
@@ -372,7 +203,7 @@ err_out:
 	return ret;
 }
 
-void ipu_plane_enable(struct ipu_plane *ipu_plane)
+static void ipu_plane_enable(struct ipu_plane *ipu_plane)
 {
 	if (ipu_plane->dp)
 		ipu_dp_enable(ipu_plane->ipu);
@@ -380,14 +211,10 @@ void ipu_plane_enable(struct ipu_plane *ipu_plane)
 	ipu_idmac_enable_channel(ipu_plane->ipu_ch);
 	if (ipu_plane->dp)
 		ipu_dp_enable_channel(ipu_plane->dp);
-
-	ipu_plane->enabled = true;
 }
 
-void ipu_plane_disable(struct ipu_plane *ipu_plane)
+static void ipu_plane_disable(struct ipu_plane *ipu_plane)
 {
-	ipu_plane->enabled = false;
-
 	ipu_idmac_wait_busy(ipu_plane->ipu_ch, 50);
 
 	if (ipu_plane->dp)
@@ -398,74 +225,225 @@ void ipu_plane_disable(struct ipu_plane *ipu_plane)
 		ipu_dp_disable(ipu_plane->ipu);
 }
 
-/*
- * drm_plane API
- */
-
-static int ipu_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
-			    struct drm_framebuffer *fb, int crtc_x, int crtc_y,
-			    unsigned int crtc_w, unsigned int crtc_h,
-			    uint32_t src_x, uint32_t src_y,
-			    uint32_t src_w, uint32_t src_h)
+static int ipu_disable_plane(struct drm_plane *plane)
 {
 	struct ipu_plane *ipu_plane = to_ipu_plane(plane);
-	int ret = 0;
-
-	DRM_DEBUG_KMS("plane - %p\n", plane);
-
-	if (!ipu_plane->enabled)
-		ret = ipu_plane_get_resources(ipu_plane);
-	if (ret < 0)
-		return ret;
-
-	ret = ipu_plane_mode_set(ipu_plane, crtc, &crtc->hwmode, fb,
-			crtc_x, crtc_y, crtc_w, crtc_h,
-			src_x >> 16, src_y >> 16, src_w >> 16, src_h >> 16,
-			false);
-	if (ret < 0) {
-		ipu_plane_put_resources(ipu_plane);
-		return ret;
-	}
 
-	if (crtc != plane->crtc)
-		dev_dbg(plane->dev->dev, "crtc change: %p -> %p\n",
-				plane->crtc, crtc);
+	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
 
-	if (!ipu_plane->enabled)
-		ipu_plane_enable(ipu_plane);
+	ipu_plane_disable(ipu_plane);
 
 	return 0;
 }
 
-static int ipu_disable_plane(struct drm_plane *plane)
+static void ipu_plane_destroy(struct drm_plane *plane)
 {
 	struct ipu_plane *ipu_plane = to_ipu_plane(plane);
 
 	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
 
-	if (ipu_plane->enabled)
-		ipu_plane_disable(ipu_plane);
+	ipu_disable_plane(plane);
+	drm_plane_cleanup(plane);
+	kfree(ipu_plane);
+}
 
-	ipu_plane_put_resources(ipu_plane);
+static const struct drm_plane_funcs ipu_plane_funcs = {
+	.update_plane	= drm_atomic_helper_update_plane,
+	.disable_plane	= drm_atomic_helper_disable_plane,
+	.destroy	= ipu_plane_destroy,
+	.reset		= drm_atomic_helper_plane_reset,
+	.atomic_duplicate_state	= drm_atomic_helper_plane_duplicate_state,
+	.atomic_destroy_state	= drm_atomic_helper_plane_destroy_state,
+};
+
+static int ipu_plane_atomic_check(struct drm_plane *plane,
+				  struct drm_plane_state *state)
+{
+	struct drm_plane_state *old_state = plane->state;
+	struct drm_crtc_state *crtc_state;
+	struct device *dev = plane->dev->dev;
+	struct drm_framebuffer *fb = state->fb;
+	struct drm_framebuffer *old_fb = old_state->fb;
+	unsigned long eba, ubo, vbo, old_ubo, old_vbo;
+
+	/* Ok to disable */
+	if (!fb)
+		return 0;
+
+	if (!state->crtc)
+		return -EINVAL;
+
+	crtc_state =
+		drm_atomic_get_existing_crtc_state(state->state, state->crtc);
+	if (WARN_ON(!crtc_state))
+		return -EINVAL;
+
+	/* CRTC should be enabled */
+	if (!crtc_state->enable)
+		return -EINVAL;
+
+	/* no scaling */
+	if (state->src_w >> 16 != state->crtc_w ||
+	    state->src_h >> 16 != state->crtc_h)
+		return -EINVAL;
+
+	switch (plane->type) {
+	case DRM_PLANE_TYPE_PRIMARY:
+		/* full plane doesn't support partial off screen */
+		if (state->crtc_x || state->crtc_y ||
+		    state->crtc_w != crtc_state->adjusted_mode.hdisplay ||
+		    state->crtc_h != crtc_state->adjusted_mode.vdisplay)
+			return -EINVAL;
+
+		/* full plane minimum width is 13 pixels */
+		if (state->crtc_w < 13)
+			return -EINVAL;
+		break;
+	case DRM_PLANE_TYPE_OVERLAY:
+		if (state->crtc_x < 0 || state->crtc_y < 0)
+			return -EINVAL;
+
+		if (state->crtc_x + state->crtc_w >
+		    crtc_state->adjusted_mode.hdisplay)
+			return -EINVAL;
+		if (state->crtc_y + state->crtc_h >
+		    crtc_state->adjusted_mode.vdisplay)
+			return -EINVAL;
+		break;
+	default:
+		dev_warn(dev, "Unsupported plane type\n");
+		return -EINVAL;
+	}
+
+	if (state->crtc_h < 2)
+		return -EINVAL;
+
+	/*
+	 * since we cannot touch active IDMAC channels, we do not support
+	 * resizing the enabled plane or changing its format
+	 */
+	if (old_fb && (state->src_w != old_state->src_w ||
+			      state->src_h != old_state->src_h ||
+			      fb->pixel_format != old_fb->pixel_format))
+		return -EINVAL;
+
+	eba = drm_plane_state_to_eba(state);
+
+	if (eba & 0x7)
+		return -EINVAL;
+
+	if (fb->pitches[0] < 1 || fb->pitches[0] > 16384)
+		return -EINVAL;
+
+	if (old_fb && fb->pitches[0] != old_fb->pitches[0])
+		return -EINVAL;
+
+	switch (fb->pixel_format) {
+	case DRM_FORMAT_YUV420:
+	case DRM_FORMAT_YVU420:
+		/*
+		 * Multiplanar formats have to meet the following restrictions:
+		 * - The (up to) three plane addresses are EBA, EBA+UBO, EBA+VBO
+		 * - EBA, UBO and VBO are a multiple of 8
+		 * - UBO and VBO are unsigned and not larger than 0xfffff8
+		 * - Only EBA may be changed while scanout is active
+		 * - The strides of U and V planes must be identical.
+		 */
+		ubo = drm_plane_state_to_ubo(state);
+		vbo = drm_plane_state_to_vbo(state);
+
+		if ((ubo & 0x7) || (vbo & 0x7))
+			return -EINVAL;
+
+		if ((ubo > 0xfffff8) || (vbo > 0xfffff8))
+			return -EINVAL;
+
+		if (old_fb) {
+			old_ubo = drm_plane_state_to_ubo(old_state);
+			old_vbo = drm_plane_state_to_vbo(old_state);
+			if (ubo != old_ubo || vbo != old_vbo)
+				return -EINVAL;
+		}
+
+		if (fb->pitches[1] != fb->pitches[2])
+			return -EINVAL;
+
+		if (fb->pitches[1] < 1 || fb->pitches[1] > 16384)
+			return -EINVAL;
+
+		if (old_fb && old_fb->pitches[1] != fb->pitches[1])
+			return -EINVAL;
+	}
 
 	return 0;
 }
 
-static void ipu_plane_destroy(struct drm_plane *plane)
+static void ipu_plane_atomic_disable(struct drm_plane *plane,
+				     struct drm_plane_state *old_state)
+{
+	ipu_disable_plane(plane);
+}
+
+static void ipu_plane_atomic_update(struct drm_plane *plane,
+				    struct drm_plane_state *old_state)
 {
 	struct ipu_plane *ipu_plane = to_ipu_plane(plane);
+	struct drm_plane_state *state = plane->state;
+	enum ipu_color_space ics;
 
-	DRM_DEBUG_KMS("[%d] %s\n", __LINE__, __func__);
+	if (old_state->fb) {
+		ipu_plane_atomic_set_base(ipu_plane, old_state);
+		return;
+	}
 
-	ipu_disable_plane(plane);
-	drm_plane_cleanup(plane);
-	kfree(ipu_plane);
+	switch (ipu_plane->dp_flow) {
+	case IPU_DP_FLOW_SYNC_BG:
+		ipu_dp_setup_channel(ipu_plane->dp,
+					IPUV3_COLORSPACE_RGB,
+					IPUV3_COLORSPACE_RGB);
+		ipu_dp_set_global_alpha(ipu_plane->dp, true, 0, true);
+		break;
+	case IPU_DP_FLOW_SYNC_FG:
+		ics = ipu_drm_fourcc_to_colorspace(state->fb->pixel_format);
+		ipu_dp_setup_channel(ipu_plane->dp, ics,
+					IPUV3_COLORSPACE_UNKNOWN);
+		ipu_dp_set_window_pos(ipu_plane->dp, state->crtc_x,
+					state->crtc_y);
+		/* Enable local alpha on partial plane */
+		switch (state->fb->pixel_format) {
+		case DRM_FORMAT_ARGB1555:
+		case DRM_FORMAT_ABGR1555:
+		case DRM_FORMAT_RGBA5551:
+		case DRM_FORMAT_BGRA5551:
+		case DRM_FORMAT_ARGB4444:
+		case DRM_FORMAT_ARGB8888:
+		case DRM_FORMAT_ABGR8888:
+		case DRM_FORMAT_RGBA8888:
+		case DRM_FORMAT_BGRA8888:
+			ipu_dp_set_global_alpha(ipu_plane->dp, false, 0, false);
+			break;
+		default:
+			break;
+		}
+	}
+
+	ipu_dmfc_config_wait4eot(ipu_plane->dmfc, state->crtc_w);
+
+	ipu_cpmem_zero(ipu_plane->ipu_ch);
+	ipu_cpmem_set_resolution(ipu_plane->ipu_ch, state->src_w >> 16,
+					state->src_h >> 16);
+	ipu_cpmem_set_fmt(ipu_plane->ipu_ch, state->fb->pixel_format);
+	ipu_cpmem_set_high_priority(ipu_plane->ipu_ch);
+	ipu_idmac_set_double_buffer(ipu_plane->ipu_ch, 1);
+	ipu_cpmem_set_stride(ipu_plane->ipu_ch, state->fb->pitches[0]);
+	ipu_plane_atomic_set_base(ipu_plane, old_state);
+	ipu_plane_enable(ipu_plane);
 }
 
-static const struct drm_plane_funcs ipu_plane_funcs = {
-	.update_plane	= ipu_update_plane,
-	.disable_plane	= ipu_disable_plane,
-	.destroy	= ipu_plane_destroy,
+static const struct drm_plane_helper_funcs ipu_plane_helper_funcs = {
+	.atomic_check = ipu_plane_atomic_check,
+	.atomic_disable = ipu_plane_atomic_disable,
+	.atomic_update = ipu_plane_atomic_update,
 };
 
 struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
@@ -498,5 +476,7 @@ struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
 		return ERR_PTR(ret);
 	}
 
+	drm_plane_helper_add(&ipu_plane->base, &ipu_plane_helper_funcs);
+
 	return ipu_plane;
 }
diff --git a/drivers/gpu/drm/imx/ipuv3-plane.h b/drivers/gpu/drm/imx/ipuv3-plane.h
index 4448fd4ad4eb..338b88a74eb6 100644
--- a/drivers/gpu/drm/imx/ipuv3-plane.h
+++ b/drivers/gpu/drm/imx/ipuv3-plane.h
@@ -23,17 +23,6 @@ struct ipu_plane {
 
 	int			dma;
 	int			dp_flow;
-
-	int			x;
-	int			y;
-	int			w;
-	int			h;
-
-	unsigned int		u_offset;
-	unsigned int		v_offset;
-	unsigned int		stride[2];
-
-	bool			enabled;
 };
 
 struct ipu_plane *ipu_plane_init(struct drm_device *dev, struct ipu_soc *ipu,
@@ -48,11 +37,6 @@ int ipu_plane_mode_set(struct ipu_plane *plane, struct drm_crtc *crtc,
 		       uint32_t src_x, uint32_t src_y, uint32_t src_w,
 		       uint32_t src_h, bool interlaced);
 
-void ipu_plane_enable(struct ipu_plane *plane);
-void ipu_plane_disable(struct ipu_plane *plane);
-int ipu_plane_set_base(struct ipu_plane *plane, struct drm_framebuffer *fb,
-		       int x, int y);
-
 int ipu_plane_get_resources(struct ipu_plane *plane);
 void ipu_plane_put_resources(struct ipu_plane *plane);
 
diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c
index 2d1fd02cd3d6..1dad297b01fd 100644
--- a/drivers/gpu/drm/imx/parallel-display.c
+++ b/drivers/gpu/drm/imx/parallel-display.c
@@ -16,6 +16,7 @@
 #include <linux/component.h>
 #include <linux/module.h>
 #include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_panel.h>
@@ -25,9 +26,6 @@
 
 #include "imx-drm.h"
 
-#define con_to_imxpd(x) container_of(x, struct imx_parallel_display, connector)
-#define enc_to_imxpd(x) container_of(x, struct imx_parallel_display, encoder)
-
 struct imx_parallel_display {
 	struct drm_connector connector;
 	struct drm_encoder encoder;
@@ -37,8 +35,19 @@ struct imx_parallel_display {
 	u32 bus_format;
 	struct drm_display_mode mode;
 	struct drm_panel *panel;
+	struct drm_bridge *bridge;
 };
 
+static inline struct imx_parallel_display *con_to_imxpd(struct drm_connector *c)
+{
+	return container_of(c, struct imx_parallel_display, connector);
+}
+
+static inline struct imx_parallel_display *enc_to_imxpd(struct drm_encoder *e)
+{
+	return container_of(e, struct imx_parallel_display, encoder);
+}
+
 static enum drm_connector_status imx_pd_connector_detect(
 		struct drm_connector *connector, bool force)
 {
@@ -53,11 +62,7 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector)
 
 	if (imxpd->panel && imxpd->panel->funcs &&
 	    imxpd->panel->funcs->get_modes) {
-		struct drm_display_info *di = &connector->display_info;
-
 		num_modes = imxpd->panel->funcs->get_modes(imxpd->panel);
-		if (!imxpd->bus_format && di->num_bus_formats)
-			imxpd->bus_format = di->bus_formats[0];
 		if (num_modes > 0)
 			return num_modes;
 	}
@@ -69,10 +74,16 @@ static int imx_pd_connector_get_modes(struct drm_connector *connector)
 
 	if (np) {
 		struct drm_display_mode *mode = drm_mode_create(connector->dev);
+		int ret;
 
 		if (!mode)
 			return -EINVAL;
-		of_get_drm_display_mode(np, &imxpd->mode, OF_USE_NATIVE_MODE);
+
+		ret = of_get_drm_display_mode(np, &imxpd->mode,
+					      OF_USE_NATIVE_MODE);
+		if (ret)
+			return ret;
+
 		drm_mode_copy(mode, &imxpd->mode);
 		mode->type |= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED,
 		drm_mode_probed_add(connector, mode);
@@ -90,24 +101,7 @@ static struct drm_encoder *imx_pd_connector_best_encoder(
 	return &imxpd->encoder;
 }
 
-static void imx_pd_encoder_dpms(struct drm_encoder *encoder, int mode)
-{
-	struct imx_parallel_display *imxpd = enc_to_imxpd(encoder);
-
-	if (mode != DRM_MODE_DPMS_ON)
-		drm_panel_disable(imxpd->panel);
-	else
-		drm_panel_enable(imxpd->panel);
-}
-
-static void imx_pd_encoder_prepare(struct drm_encoder *encoder)
-{
-	struct imx_parallel_display *imxpd = enc_to_imxpd(encoder);
-	imx_drm_set_bus_config(encoder, imxpd->bus_format, 2, 3,
-			       imxpd->connector.display_info.bus_flags);
-}
-
-static void imx_pd_encoder_commit(struct drm_encoder *encoder)
+static void imx_pd_encoder_enable(struct drm_encoder *encoder)
 {
 	struct imx_parallel_display *imxpd = enc_to_imxpd(encoder);
 
@@ -115,12 +109,6 @@ static void imx_pd_encoder_commit(struct drm_encoder *encoder)
 	drm_panel_enable(imxpd->panel);
 }
 
-static void imx_pd_encoder_mode_set(struct drm_encoder *encoder,
-			 struct drm_display_mode *orig_mode,
-			 struct drm_display_mode *mode)
-{
-}
-
 static void imx_pd_encoder_disable(struct drm_encoder *encoder)
 {
 	struct imx_parallel_display *imxpd = enc_to_imxpd(encoder);
@@ -129,11 +117,33 @@ static void imx_pd_encoder_disable(struct drm_encoder *encoder)
 	drm_panel_unprepare(imxpd->panel);
 }
 
+static int imx_pd_encoder_atomic_check(struct drm_encoder *encoder,
+				       struct drm_crtc_state *crtc_state,
+				       struct drm_connector_state *conn_state)
+{
+	struct imx_crtc_state *imx_crtc_state = to_imx_crtc_state(crtc_state);
+	struct drm_display_info *di = &conn_state->connector->display_info;
+	struct imx_parallel_display *imxpd = enc_to_imxpd(encoder);
+
+	imx_crtc_state->bus_flags = di->bus_flags;
+	if (!imxpd->bus_format && di->num_bus_formats)
+		imx_crtc_state->bus_format = di->bus_formats[0];
+	else
+		imx_crtc_state->bus_format = imxpd->bus_format;
+	imx_crtc_state->di_hsync_pin = 2;
+	imx_crtc_state->di_vsync_pin = 3;
+
+	return 0;
+}
+
 static const struct drm_connector_funcs imx_pd_connector_funcs = {
-	.dpms = drm_helper_connector_dpms,
+	.dpms = drm_atomic_helper_connector_dpms,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.detect = imx_pd_connector_detect,
 	.destroy = imx_drm_connector_destroy,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
 static const struct drm_connector_helper_funcs imx_pd_connector_helper_funcs = {
@@ -146,20 +156,18 @@ static const struct drm_encoder_funcs imx_pd_encoder_funcs = {
 };
 
 static const struct drm_encoder_helper_funcs imx_pd_encoder_helper_funcs = {
-	.dpms = imx_pd_encoder_dpms,
-	.prepare = imx_pd_encoder_prepare,
-	.commit = imx_pd_encoder_commit,
-	.mode_set = imx_pd_encoder_mode_set,
+	.enable = imx_pd_encoder_enable,
 	.disable = imx_pd_encoder_disable,
+	.atomic_check = imx_pd_encoder_atomic_check,
 };
 
 static int imx_pd_register(struct drm_device *drm,
 	struct imx_parallel_display *imxpd)
 {
+	struct drm_encoder *encoder = &imxpd->encoder;
 	int ret;
 
-	ret = imx_drm_encoder_parse_of(drm, &imxpd->encoder,
-				       imxpd->dev->of_node);
+	ret = imx_drm_encoder_parse_of(drm, encoder, imxpd->dev->of_node);
 	if (ret)
 		return ret;
 
@@ -170,19 +178,33 @@ static int imx_pd_register(struct drm_device *drm,
 	 */
 	imxpd->connector.dpms = DRM_MODE_DPMS_OFF;
 
-	drm_encoder_helper_add(&imxpd->encoder, &imx_pd_encoder_helper_funcs);
-	drm_encoder_init(drm, &imxpd->encoder, &imx_pd_encoder_funcs,
+	drm_encoder_helper_add(encoder, &imx_pd_encoder_helper_funcs);
+	drm_encoder_init(drm, encoder, &imx_pd_encoder_funcs,
 			 DRM_MODE_ENCODER_NONE, NULL);
 
-	drm_connector_helper_add(&imxpd->connector,
-			&imx_pd_connector_helper_funcs);
-	drm_connector_init(drm, &imxpd->connector, &imx_pd_connector_funcs,
-			   DRM_MODE_CONNECTOR_VGA);
+	if (!imxpd->bridge) {
+		drm_connector_helper_add(&imxpd->connector,
+				&imx_pd_connector_helper_funcs);
+		drm_connector_init(drm, &imxpd->connector,
+				   &imx_pd_connector_funcs,
+				   DRM_MODE_CONNECTOR_VGA);
+	}
 
 	if (imxpd->panel)
 		drm_panel_attach(imxpd->panel, &imxpd->connector);
 
-	drm_mode_connector_attach_encoder(&imxpd->connector, &imxpd->encoder);
+	if (imxpd->bridge) {
+		imxpd->bridge->encoder = encoder;
+		encoder->bridge = imxpd->bridge;
+		ret = drm_bridge_attach(drm, imxpd->bridge);
+		if (ret < 0) {
+			dev_err(imxpd->dev, "failed to attach bridge: %d\n",
+				ret);
+			return ret;
+		}
+	} else {
+		drm_mode_connector_attach_encoder(&imxpd->connector, encoder);
+	}
 
 	return 0;
 }
@@ -195,6 +217,7 @@ static int imx_pd_bind(struct device *dev, struct device *master, void *data)
 	const u8 *edidp;
 	struct imx_parallel_display *imxpd;
 	int ret;
+	u32 bus_format = 0;
 	const char *fmt;
 
 	imxpd = devm_kzalloc(dev, sizeof(*imxpd), GFP_KERNEL);
@@ -208,14 +231,15 @@ static int imx_pd_bind(struct device *dev, struct device *master, void *data)
 	ret = of_property_read_string(np, "interface-pix-fmt", &fmt);
 	if (!ret) {
 		if (!strcmp(fmt, "rgb24"))
-			imxpd->bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+			bus_format = MEDIA_BUS_FMT_RGB888_1X24;
 		else if (!strcmp(fmt, "rgb565"))
-			imxpd->bus_format = MEDIA_BUS_FMT_RGB565_1X16;
+			bus_format = MEDIA_BUS_FMT_RGB565_1X16;
 		else if (!strcmp(fmt, "bgr666"))
-			imxpd->bus_format = MEDIA_BUS_FMT_RGB666_1X18;
+			bus_format = MEDIA_BUS_FMT_RGB666_1X18;
 		else if (!strcmp(fmt, "lvds666"))
-			imxpd->bus_format = MEDIA_BUS_FMT_RGB666_1X24_CPADHI;
+			bus_format = MEDIA_BUS_FMT_RGB666_1X24_CPADHI;
 	}
+	imxpd->bus_format = bus_format;
 
 	/* port@1 is the output port */
 	ep = of_graph_get_endpoint_by_regs(np, 1, -1);
@@ -223,13 +247,30 @@ static int imx_pd_bind(struct device *dev, struct device *master, void *data)
 		struct device_node *remote;
 
 		remote = of_graph_get_remote_port_parent(ep);
+		if (!remote) {
+			dev_warn(dev, "endpoint %s not connected\n",
+				 ep->full_name);
+			of_node_put(ep);
+			return -ENODEV;
+		}
 		of_node_put(ep);
-		if (remote) {
-			imxpd->panel = of_drm_find_panel(remote);
-			of_node_put(remote);
+
+		imxpd->panel = of_drm_find_panel(remote);
+		if (imxpd->panel) {
+			dev_dbg(dev, "found panel %s\n", remote->full_name);
+		} else {
+			imxpd->bridge = of_drm_find_bridge(remote);
+			if (imxpd->bridge)
+				dev_dbg(dev, "found bridge %s\n",
+					remote->full_name);
 		}
-		if (!imxpd->panel)
+		if (!imxpd->panel && !imxpd->bridge) {
+			dev_dbg(dev, "waiting for panel or bridge %s\n",
+				remote->full_name);
+			of_node_put(remote);
 			return -EPROBE_DEFER;
+		}
+		of_node_put(remote);
 	}
 
 	imxpd->dev = dev;
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
index ba812ef2c9d1..334562d06731 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c
@@ -1535,7 +1535,7 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi,
  * HDMI audio codec callbacks
  */
 
-static int mtk_hdmi_audio_hw_params(struct device *dev,
+static int mtk_hdmi_audio_hw_params(struct device *dev, void *data,
 				    struct hdmi_codec_daifmt *daifmt,
 				    struct hdmi_codec_params *params)
 {
@@ -1604,7 +1604,7 @@ static int mtk_hdmi_audio_hw_params(struct device *dev,
 	return 0;
 }
 
-static int mtk_hdmi_audio_startup(struct device *dev)
+static int mtk_hdmi_audio_startup(struct device *dev, void *data)
 {
 	struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
 
@@ -1615,7 +1615,7 @@ static int mtk_hdmi_audio_startup(struct device *dev)
 	return 0;
 }
 
-static void mtk_hdmi_audio_shutdown(struct device *dev)
+static void mtk_hdmi_audio_shutdown(struct device *dev, void *data)
 {
 	struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
 
@@ -1624,7 +1624,7 @@ static void mtk_hdmi_audio_shutdown(struct device *dev)
 	mtk_hdmi_audio_disable(hdmi);
 }
 
-int mtk_hdmi_audio_digital_mute(struct device *dev, bool enable)
+int mtk_hdmi_audio_digital_mute(struct device *dev, void *data, bool enable)
 {
 	struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
 
@@ -1638,7 +1638,7 @@ int mtk_hdmi_audio_digital_mute(struct device *dev, bool enable)
 	return 0;
 }
 
-static int mtk_hdmi_audio_get_eld(struct device *dev, uint8_t *buf, size_t len)
+static int mtk_hdmi_audio_get_eld(struct device *dev, void *data, uint8_t *buf, size_t len)
 {
 	struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
 
diff --git a/drivers/gpu/drm/mediatek/mtk_mipi_tx.c b/drivers/gpu/drm/mediatek/mtk_mipi_tx.c
index cf8f38d39e10..1c366f8cb2d0 100644
--- a/drivers/gpu/drm/mediatek/mtk_mipi_tx.c
+++ b/drivers/gpu/drm/mediatek/mtk_mipi_tx.c
@@ -431,7 +431,7 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev)
 	phy_set_drvdata(phy, mipi_tx);
 
 	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
-	if (IS_ERR(phy)) {
+	if (IS_ERR(phy_provider)) {
 		ret = PTR_ERR(phy_provider);
 		return ret;
 	}
diff --git a/drivers/gpu/drm/mgag200/Kconfig b/drivers/gpu/drm/mgag200/Kconfig
index 3a1c5fbae54a..520e5e668d6c 100644
--- a/drivers/gpu/drm/mgag200/Kconfig
+++ b/drivers/gpu/drm/mgag200/Kconfig
@@ -1,11 +1,7 @@
 config DRM_MGAG200
 	tristate "Kernel modesetting driver for MGA G200 server engines"
 	depends on DRM && PCI
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_TTM
 	help
 	 This is a KMS driver for the MGA G200 server chips, it
diff --git a/drivers/gpu/drm/mgag200/mgag200_main.c b/drivers/gpu/drm/mgag200/mgag200_main.c
index 615cbb08ba29..13798b3e6beb 100644
--- a/drivers/gpu/drm/mgag200/mgag200_main.c
+++ b/drivers/gpu/drm/mgag200/mgag200_main.c
@@ -17,8 +17,8 @@
 static void mga_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct mga_framebuffer *mga_fb = to_mga_framebuffer(fb);
-	if (mga_fb->obj)
-		drm_gem_object_unreference_unlocked(mga_fb->obj);
+
+	drm_gem_object_unreference_unlocked(mga_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(fb);
 }
diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c
index 9d5083d0f1ee..68268e55d595 100644
--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
@@ -186,17 +186,6 @@ static void mgag200_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_r
 {
 }
 
-static int mgag200_bo_move(struct ttm_buffer_object *bo,
-		       bool evict, bool interruptible,
-		       bool no_wait_gpu,
-		       struct ttm_mem_reg *new_mem)
-{
-	int r;
-	r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
-	return r;
-}
-
-
 static void mgag200_ttm_backend_destroy(struct ttm_tt *tt)
 {
 	ttm_tt_fini(tt);
@@ -241,7 +230,7 @@ struct ttm_bo_driver mgag200_bo_driver = {
 	.ttm_tt_unpopulate = mgag200_ttm_tt_unpopulate,
 	.init_mem_type = mgag200_bo_init_mem_type,
 	.evict_flags = mgag200_bo_evict_flags,
-	.move = mgag200_bo_move,
+	.move = NULL,
 	.verify_access = mgag200_bo_verify_access,
 	.io_mem_reserve = &mgag200_ttm_io_mem_reserve,
 	.io_mem_free = &mgag200_ttm_io_mem_free,
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 167a4971f47c..7c7a0314a756 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -10,6 +10,7 @@ config DRM_MSM
 	select SHMEM
 	select TMPFS
 	select QCOM_SCM
+	select SND_SOC_HDMI_CODEC if SND_SOC
 	default y
 	help
 	  DRM/KMS driver for MSM/snapdragon.
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 60cb02624dc0..4e2806cf778c 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -35,6 +35,7 @@ msm-y := \
 	mdp/mdp5/mdp5_crtc.o \
 	mdp/mdp5/mdp5_encoder.o \
 	mdp/mdp5/mdp5_irq.o \
+	mdp/mdp5/mdp5_mdss.o \
 	mdp/mdp5/mdp5_kms.o \
 	mdp/mdp5/mdp5_plane.o \
 	mdp/mdp5/mdp5_smp.o \
@@ -45,6 +46,7 @@ msm-y := \
 	msm_fence.o \
 	msm_gem.o \
 	msm_gem_prime.o \
+	msm_gem_shrinker.o \
 	msm_gem_submit.o \
 	msm_gpu.o \
 	msm_iommu.o \
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index fbe304ee6c80..f386f463278d 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -139,7 +139,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 	struct msm_drm_private *priv = gpu->dev->dev_private;
 	struct msm_ringbuffer *ring = gpu->rb;
-	unsigned i, ibs = 0;
+	unsigned i;
 
 	for (i = 0; i < submit->nr_cmds; i++) {
 		switch (submit->cmd[i].type) {
@@ -155,18 +155,11 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 				CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2);
 			OUT_RING(ring, submit->cmd[i].iova);
 			OUT_RING(ring, submit->cmd[i].size);
-			ibs++;
+			OUT_PKT2(ring);
 			break;
 		}
 	}
 
-	/* on a320, at least, we seem to need to pad things out to an
-	 * even number of qwords to avoid issue w/ CP hanging on wrap-
-	 * around:
-	 */
-	if (ibs % 2)
-		OUT_PKT2(ring);
-
 	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
 	OUT_RING(ring, submit->fence->seqno);
 
@@ -407,8 +400,8 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 		return ret;
 	}
 
-	adreno_gpu->memptrs = msm_gem_vaddr(adreno_gpu->memptrs_bo);
-	if (!adreno_gpu->memptrs) {
+	adreno_gpu->memptrs = msm_gem_get_vaddr(adreno_gpu->memptrs_bo);
+	if (IS_ERR(adreno_gpu->memptrs)) {
 		dev_err(drm->dev, "could not vmap memptrs\n");
 		return -ENOMEM;
 	}
@@ -426,8 +419,12 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 void adreno_gpu_cleanup(struct adreno_gpu *gpu)
 {
 	if (gpu->memptrs_bo) {
+		if (gpu->memptrs)
+			msm_gem_put_vaddr(gpu->memptrs_bo);
+
 		if (gpu->memptrs_iova)
 			msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id);
+
 		drm_gem_object_unreference_unlocked(gpu->memptrs_bo);
 	}
 	release_firmware(gpu->pm4);
diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c
index 6edcd6f57e70..ec572f8389ed 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.c
+++ b/drivers/gpu/drm/msm/dsi/dsi.c
@@ -29,7 +29,7 @@ static int dsi_get_phy(struct msm_dsi *msm_dsi)
 	struct platform_device *phy_pdev;
 	struct device_node *phy_node;
 
-	phy_node = of_parse_phandle(pdev->dev.of_node, "qcom,dsi-phy", 0);
+	phy_node = of_parse_phandle(pdev->dev.of_node, "phys", 0);
 	if (!phy_node) {
 		dev_err(&pdev->dev, "cannot find phy device\n");
 		return -ENXIO;
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
index 93c1ee094eac..63436d8ee470 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
@@ -29,6 +29,8 @@ static const struct msm_dsi_config apq8064_dsi_cfg = {
 	},
 	.bus_clk_names = dsi_v2_bus_clk_names,
 	.num_bus_clks = ARRAY_SIZE(dsi_v2_bus_clk_names),
+	.io_start = { 0x4700000, 0x5800000 },
+	.num_dsi = 2,
 };
 
 static const char * const dsi_6g_bus_clk_names[] = {
@@ -48,6 +50,8 @@ static const struct msm_dsi_config msm8974_apq8084_dsi_cfg = {
 	},
 	.bus_clk_names = dsi_6g_bus_clk_names,
 	.num_bus_clks = ARRAY_SIZE(dsi_6g_bus_clk_names),
+	.io_start = { 0xfd922800, 0xfd922b00 },
+	.num_dsi = 2,
 };
 
 static const char * const dsi_8916_bus_clk_names[] = {
@@ -66,6 +70,8 @@ static const struct msm_dsi_config msm8916_dsi_cfg = {
 	},
 	.bus_clk_names = dsi_8916_bus_clk_names,
 	.num_bus_clks = ARRAY_SIZE(dsi_8916_bus_clk_names),
+	.io_start = { 0x1a98000 },
+	.num_dsi = 1,
 };
 
 static const struct msm_dsi_config msm8994_dsi_cfg = {
@@ -84,6 +90,8 @@ static const struct msm_dsi_config msm8994_dsi_cfg = {
 	},
 	.bus_clk_names = dsi_6g_bus_clk_names,
 	.num_bus_clks = ARRAY_SIZE(dsi_6g_bus_clk_names),
+	.io_start = { 0xfd998000, 0xfd9a0000 },
+	.num_dsi = 2,
 };
 
 static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = {
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
index a68c836744a3..eeacc3232494 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
@@ -34,6 +34,8 @@ struct msm_dsi_config {
 	struct dsi_reg_config reg_cfg;
 	const char * const *bus_clk_names;
 	const int num_bus_clks;
+	const resource_size_t io_start[DSI_MAX];
+	const int num_dsi;
 };
 
 struct msm_dsi_cfg_handler {
diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index a3e47ad83eb3..f05ed0e1f3d6 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -1066,7 +1066,7 @@ static int dsi_cmd_dma_add(struct msm_dsi_host *msm_host,
 	}
 
 	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G) {
-		data = msm_gem_vaddr(msm_host->tx_gem_obj);
+		data = msm_gem_get_vaddr(msm_host->tx_gem_obj);
 		if (IS_ERR(data)) {
 			ret = PTR_ERR(data);
 			pr_err("%s: get vaddr failed, %d\n", __func__, ret);
@@ -1094,6 +1094,9 @@ static int dsi_cmd_dma_add(struct msm_dsi_host *msm_host,
 	if (packet.size < len)
 		memset(data + packet.size, 0xff, len - packet.size);
 
+	if (cfg_hnd->major == MSM_DSI_VER_MAJOR_6G)
+		msm_gem_put_vaddr(msm_host->tx_gem_obj);
+
 	return len;
 }
 
@@ -1543,7 +1546,7 @@ static int dsi_host_parse_lane_data(struct msm_dsi_host *msm_host,
 	u32 lane_map[4];
 	int ret, i, len, num_lanes;
 
-	prop = of_find_property(ep, "qcom,data-lane-map", &len);
+	prop = of_find_property(ep, "data-lanes", &len);
 	if (!prop) {
 		dev_dbg(dev, "failed to find data lane mapping\n");
 		return -EINVAL;
@@ -1558,7 +1561,7 @@ static int dsi_host_parse_lane_data(struct msm_dsi_host *msm_host,
 
 	msm_host->num_data_lanes = num_lanes;
 
-	ret = of_property_read_u32_array(ep, "qcom,data-lane-map", lane_map,
+	ret = of_property_read_u32_array(ep, "data-lanes", lane_map,
 					 num_lanes);
 	if (ret) {
 		dev_err(dev, "failed to read lane data\n");
@@ -1573,8 +1576,19 @@ static int dsi_host_parse_lane_data(struct msm_dsi_host *msm_host,
 		const int *swap = supported_data_lane_swaps[i];
 		int j;
 
+		/*
+		 * the data-lanes array we get from DT has a logical->physical
+		 * mapping. The "data lane swap" register field represents
+		 * supported configurations in a physical->logical mapping.
+		 * Translate the DT mapping to what we understand and find a
+		 * configuration that works.
+		 */
 		for (j = 0; j < num_lanes; j++) {
-			if (swap[j] != lane_map[j])
+			if (lane_map[j] < 0 || lane_map[j] > 3)
+				dev_err(dev, "bad physical lane entry %u\n",
+					lane_map[j]);
+
+			if (swap[lane_map[j]] != j)
 				break;
 		}
 
@@ -1594,20 +1608,13 @@ static int dsi_host_parse_dt(struct msm_dsi_host *msm_host)
 	struct device_node *endpoint, *device_node;
 	int ret;
 
-	ret = of_property_read_u32(np, "qcom,dsi-host-index", &msm_host->id);
-	if (ret) {
-		dev_err(dev, "%s: host index not specified, ret=%d\n",
-			__func__, ret);
-		return ret;
-	}
-
 	/*
-	 * Get the first endpoint node. In our case, dsi has one output port
-	 * to which the panel is connected. Don't return an error if a port
-	 * isn't defined. It's possible that there is nothing connected to
-	 * the dsi output.
+	 * Get the endpoint of the output port of the DSI host. In our case,
+	 * this is mapped to port number with reg = 1. Don't return an error if
+	 * the remote endpoint isn't defined. It's possible that there is
+	 * nothing connected to the dsi output.
 	 */
-	endpoint = of_graph_get_next_endpoint(np, NULL);
+	endpoint = of_graph_get_endpoint_by_regs(np, 1, -1);
 	if (!endpoint) {
 		dev_dbg(dev, "%s: no endpoint\n", __func__);
 		return 0;
@@ -1648,6 +1655,25 @@ err:
 	return ret;
 }
 
+static int dsi_host_get_id(struct msm_dsi_host *msm_host)
+{
+	struct platform_device *pdev = msm_host->pdev;
+	const struct msm_dsi_config *cfg = msm_host->cfg_hnd->cfg;
+	struct resource *res;
+	int i;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dsi_ctrl");
+	if (!res)
+		return -EINVAL;
+
+	for (i = 0; i < cfg->num_dsi; i++) {
+		if (cfg->io_start[i] == res->start)
+			return i;
+	}
+
+	return -EINVAL;
+}
+
 int msm_dsi_host_init(struct msm_dsi *msm_dsi)
 {
 	struct msm_dsi_host *msm_host = NULL;
@@ -1684,6 +1710,13 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi)
 		goto fail;
 	}
 
+	msm_host->id = dsi_host_get_id(msm_host);
+	if (msm_host->id < 0) {
+		ret = msm_host->id;
+		pr_err("%s: unable to identify DSI host index\n", __func__);
+		goto fail;
+	}
+
 	/* fixup base address by io offset */
 	msm_host->ctrl_base += msm_host->cfg_hnd->cfg->io_offset;
 
@@ -2245,9 +2278,9 @@ int msm_dsi_host_set_display_mode(struct mipi_dsi_host *host,
 	}
 
 	msm_host->mode = drm_mode_duplicate(msm_host->dev, mode);
-	if (IS_ERR(msm_host->mode)) {
+	if (!msm_host->mode) {
 		pr_err("%s: cannot duplicate mode\n", __func__);
-		return PTR_ERR(msm_host->mode);
+		return -ENOMEM;
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index e2f42d8ea294..f39386ed75e4 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -271,6 +271,30 @@ static const struct of_device_id dsi_phy_dt_match[] = {
 	{}
 };
 
+/*
+ * Currently, we only support one SoC for each PHY type. When we have multiple
+ * SoCs for the same PHY, we can try to make the index searching a bit more
+ * clever.
+ */
+static int dsi_phy_get_id(struct msm_dsi_phy *phy)
+{
+	struct platform_device *pdev = phy->pdev;
+	const struct msm_dsi_phy_cfg *cfg = phy->cfg;
+	struct resource *res;
+	int i;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dsi_phy");
+	if (!res)
+		return -EINVAL;
+
+	for (i = 0; i < cfg->num_dsi_phy; i++) {
+		if (cfg->io_start[i] == res->start)
+			return i;
+	}
+
+	return -EINVAL;
+}
+
 static int dsi_phy_driver_probe(struct platform_device *pdev)
 {
 	struct msm_dsi_phy *phy;
@@ -289,10 +313,10 @@ static int dsi_phy_driver_probe(struct platform_device *pdev)
 	phy->cfg = match->data;
 	phy->pdev = pdev;
 
-	ret = of_property_read_u32(dev->of_node,
-				"qcom,dsi-phy-index", &phy->id);
-	if (ret) {
-		dev_err(dev, "%s: PHY index not specified, %d\n",
+	phy->id = dsi_phy_get_id(phy);
+	if (phy->id < 0) {
+		ret = phy->id;
+		dev_err(dev, "%s: couldn't identify PHY index, %d\n",
 			__func__, ret);
 		goto fail;
 	}
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
index 0d54ed00386d..f24a85439b94 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
@@ -38,6 +38,8 @@ struct msm_dsi_phy_cfg {
 	 * Fill default H/W values in illegal cells, eg. cell {0, 1}.
 	 */
 	bool src_pll_truthtable[DSI_MAX][DSI_MAX];
+	const resource_size_t io_start[DSI_MAX];
+	const int num_dsi_phy;
 };
 
 extern const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_cfgs;
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c
index f4bc11af849a..c757e2070cac 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c
@@ -145,6 +145,8 @@ const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs = {
 	.ops = {
 		.enable = dsi_20nm_phy_enable,
 		.disable = dsi_20nm_phy_disable,
-	}
+	},
+	.io_start = { 0xfd998300, 0xfd9a0300 },
+	.num_dsi_phy = 2,
 };
 
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
index 96d1852af418..63d7fba31380 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
@@ -145,6 +145,8 @@ const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_cfgs = {
 		.enable = dsi_28nm_phy_enable,
 		.disable = dsi_28nm_phy_disable,
 	},
+	.io_start = { 0xfd922b00, 0xfd923100 },
+	.num_dsi_phy = 2,
 };
 
 const struct msm_dsi_phy_cfg dsi_phy_28nm_lp_cfgs = {
@@ -160,5 +162,7 @@ const struct msm_dsi_phy_cfg dsi_phy_28nm_lp_cfgs = {
 		.enable = dsi_28nm_phy_enable,
 		.disable = dsi_28nm_phy_disable,
 	},
+	.io_start = { 0x1a98500 },
+	.num_dsi_phy = 1,
 };
 
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
index 213355a3e767..7bdb9de54968 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
@@ -192,4 +192,6 @@ const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs = {
 		.enable = dsi_28nm_phy_enable,
 		.disable = dsi_28nm_phy_disable,
 	},
+	.io_start = { 0x4700300, 0x5800300 },
+	.num_dsi_phy = 2,
 };
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
index 51b9ea552f97..973720792236 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -19,6 +19,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_gpio.h>
 
+#include <sound/hdmi-codec.h>
 #include "hdmi.h"
 
 void msm_hdmi_set_mode(struct hdmi *hdmi, bool power_on)
@@ -434,6 +435,111 @@ static int msm_hdmi_get_gpio(struct device_node *of_node, const char *name)
 	return gpio;
 }
 
+/*
+ * HDMI audio codec callbacks
+ */
+static int msm_hdmi_audio_hw_params(struct device *dev, void *data,
+				    struct hdmi_codec_daifmt *daifmt,
+				    struct hdmi_codec_params *params)
+{
+	struct hdmi *hdmi = dev_get_drvdata(dev);
+	unsigned int chan;
+	unsigned int channel_allocation = 0;
+	unsigned int rate;
+	unsigned int level_shift  = 0; /* 0dB */
+	bool down_mix = false;
+
+	dev_dbg(dev, "%u Hz, %d bit, %d channels\n", params->sample_rate,
+		 params->sample_width, params->cea.channels);
+
+	switch (params->cea.channels) {
+	case 2:
+		/* FR and FL speakers */
+		channel_allocation  = 0;
+		chan = MSM_HDMI_AUDIO_CHANNEL_2;
+		break;
+	case 4:
+		/* FC, LFE, FR and FL speakers */
+		channel_allocation  = 0x3;
+		chan = MSM_HDMI_AUDIO_CHANNEL_4;
+		break;
+	case 6:
+		/* RR, RL, FC, LFE, FR and FL speakers */
+		channel_allocation  = 0x0B;
+		chan = MSM_HDMI_AUDIO_CHANNEL_6;
+		break;
+	case 8:
+		/* FRC, FLC, RR, RL, FC, LFE, FR and FL speakers */
+		channel_allocation  = 0x1F;
+		chan = MSM_HDMI_AUDIO_CHANNEL_8;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	switch (params->sample_rate) {
+	case 32000:
+		rate = HDMI_SAMPLE_RATE_32KHZ;
+		break;
+	case 44100:
+		rate = HDMI_SAMPLE_RATE_44_1KHZ;
+		break;
+	case 48000:
+		rate = HDMI_SAMPLE_RATE_48KHZ;
+		break;
+	case 88200:
+		rate = HDMI_SAMPLE_RATE_88_2KHZ;
+		break;
+	case 96000:
+		rate = HDMI_SAMPLE_RATE_96KHZ;
+		break;
+	case 176400:
+		rate = HDMI_SAMPLE_RATE_176_4KHZ;
+		break;
+	case 192000:
+		rate = HDMI_SAMPLE_RATE_192KHZ;
+		break;
+	default:
+		dev_err(dev, "rate[%d] not supported!\n",
+			params->sample_rate);
+		return -EINVAL;
+	}
+
+	msm_hdmi_audio_set_sample_rate(hdmi, rate);
+	msm_hdmi_audio_info_setup(hdmi, 1, chan, channel_allocation,
+			      level_shift, down_mix);
+
+	return 0;
+}
+
+static void msm_hdmi_audio_shutdown(struct device *dev, void *data)
+{
+	struct hdmi *hdmi = dev_get_drvdata(dev);
+
+	msm_hdmi_audio_info_setup(hdmi, 0, 0, 0, 0, 0);
+}
+
+static const struct hdmi_codec_ops msm_hdmi_audio_codec_ops = {
+	.hw_params = msm_hdmi_audio_hw_params,
+	.audio_shutdown = msm_hdmi_audio_shutdown,
+};
+
+static struct hdmi_codec_pdata codec_data = {
+	.ops = &msm_hdmi_audio_codec_ops,
+	.max_i2s_channels = 8,
+	.i2s = 1,
+};
+
+static int msm_hdmi_register_audio_driver(struct hdmi *hdmi, struct device *dev)
+{
+	hdmi->audio_pdev = platform_device_register_data(dev,
+							 HDMI_CODEC_DRV_NAME,
+							 PLATFORM_DEVID_AUTO,
+							 &codec_data,
+							 sizeof(codec_data));
+	return PTR_ERR_OR_ZERO(hdmi->audio_pdev);
+}
+
 static int msm_hdmi_bind(struct device *dev, struct device *master, void *data)
 {
 	struct drm_device *drm = dev_get_drvdata(master);
@@ -441,7 +547,7 @@ static int msm_hdmi_bind(struct device *dev, struct device *master, void *data)
 	static struct hdmi_platform_config *hdmi_cfg;
 	struct hdmi *hdmi;
 	struct device_node *of_node = dev->of_node;
-	int i;
+	int i, err;
 
 	hdmi_cfg = (struct hdmi_platform_config *)
 			of_device_get_match_data(dev);
@@ -468,6 +574,12 @@ static int msm_hdmi_bind(struct device *dev, struct device *master, void *data)
 		return PTR_ERR(hdmi);
 	priv->hdmi = hdmi;
 
+	err = msm_hdmi_register_audio_driver(hdmi, dev);
+	if (err) {
+		DRM_ERROR("Failed to attach an audio codec %d\n", err);
+		hdmi->audio_pdev = NULL;
+	}
+
 	return 0;
 }
 
@@ -477,6 +589,9 @@ static void msm_hdmi_unbind(struct device *dev, struct device *master,
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct msm_drm_private *priv = drm->dev_private;
 	if (priv->hdmi) {
+		if (priv->hdmi->audio_pdev)
+			platform_device_unregister(priv->hdmi->audio_pdev);
+
 		msm_hdmi_destroy(priv->hdmi);
 		priv->hdmi = NULL;
 	}
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h
index bc7ba0bdee07..accc9a61611d 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.h
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.h
@@ -50,6 +50,7 @@ struct hdmi_hdcp_ctrl;
 struct hdmi {
 	struct drm_device *dev;
 	struct platform_device *pdev;
+	struct platform_device *audio_pdev;
 
 	const struct hdmi_platform_config *config;
 
@@ -210,6 +211,19 @@ static inline int msm_hdmi_pll_8996_init(struct platform_device *pdev)
 /*
  * audio:
  */
+/* Supported HDMI Audio channels and rates */
+#define	MSM_HDMI_AUDIO_CHANNEL_2	0
+#define	MSM_HDMI_AUDIO_CHANNEL_4	1
+#define	MSM_HDMI_AUDIO_CHANNEL_6	2
+#define	MSM_HDMI_AUDIO_CHANNEL_8	3
+
+#define	HDMI_SAMPLE_RATE_32KHZ		0
+#define	HDMI_SAMPLE_RATE_44_1KHZ	1
+#define	HDMI_SAMPLE_RATE_48KHZ		2
+#define	HDMI_SAMPLE_RATE_88_2KHZ	3
+#define	HDMI_SAMPLE_RATE_96KHZ		4
+#define	HDMI_SAMPLE_RATE_176_4KHZ	5
+#define	HDMI_SAMPLE_RATE_192KHZ		6
 
 int msm_hdmi_audio_update(struct hdmi *hdmi);
 int msm_hdmi_audio_info_setup(struct hdmi *hdmi, bool enabled,
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_hdcp.c b/drivers/gpu/drm/msm/hdmi/hdmi_hdcp.c
index 0baaaaabd002..6e767979aab3 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_hdcp.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_hdcp.c
@@ -1430,7 +1430,7 @@ struct hdmi_hdcp_ctrl *msm_hdmi_hdcp_init(struct hdmi *hdmi)
 
 void msm_hdmi_hdcp_destroy(struct hdmi *hdmi)
 {
-	if (hdmi && hdmi->hdcp_ctrl) {
+	if (hdmi) {
 		kfree(hdmi->hdcp_ctrl);
 		hdmi->hdcp_ctrl = NULL;
 	}
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
index 35ad78a1dc1c..24258e3025e3 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_dtv_encoder.c
@@ -23,7 +23,6 @@
 
 struct mdp4_dtv_encoder {
 	struct drm_encoder base;
-	struct clk *src_clk;
 	struct clk *hdmi_clk;
 	struct clk *mdp_clk;
 	unsigned long int pixclock;
@@ -179,7 +178,6 @@ static void mdp4_dtv_encoder_disable(struct drm_encoder *encoder)
 	 */
 	mdp_irq_wait(&mdp4_kms->base, MDP4_IRQ_EXTERNAL_VSYNC);
 
-	clk_disable_unprepare(mdp4_dtv_encoder->src_clk);
 	clk_disable_unprepare(mdp4_dtv_encoder->hdmi_clk);
 	clk_disable_unprepare(mdp4_dtv_encoder->mdp_clk);
 
@@ -208,19 +206,21 @@ static void mdp4_dtv_encoder_enable(struct drm_encoder *encoder)
 
 	bs_set(mdp4_dtv_encoder, 1);
 
-	DBG("setting src_clk=%lu", pc);
+	DBG("setting mdp_clk=%lu", pc);
 
-	ret = clk_set_rate(mdp4_dtv_encoder->src_clk, pc);
+	ret = clk_set_rate(mdp4_dtv_encoder->mdp_clk, pc);
 	if (ret)
-		dev_err(dev->dev, "failed to set src_clk to %lu: %d\n", pc, ret);
-	clk_prepare_enable(mdp4_dtv_encoder->src_clk);
-	ret = clk_prepare_enable(mdp4_dtv_encoder->hdmi_clk);
-	if (ret)
-		dev_err(dev->dev, "failed to enable hdmi_clk: %d\n", ret);
+		dev_err(dev->dev, "failed to set mdp_clk to %lu: %d\n",
+			pc, ret);
+
 	ret = clk_prepare_enable(mdp4_dtv_encoder->mdp_clk);
 	if (ret)
 		dev_err(dev->dev, "failed to enabled mdp_clk: %d\n", ret);
 
+	ret = clk_prepare_enable(mdp4_dtv_encoder->hdmi_clk);
+	if (ret)
+		dev_err(dev->dev, "failed to enable hdmi_clk: %d\n", ret);
+
 	mdp4_write(mdp4_kms, REG_MDP4_DTV_ENABLE, 1);
 
 	mdp4_dtv_encoder->enabled = true;
@@ -235,7 +235,7 @@ static const struct drm_encoder_helper_funcs mdp4_dtv_encoder_helper_funcs = {
 long mdp4_dtv_round_pixclk(struct drm_encoder *encoder, unsigned long rate)
 {
 	struct mdp4_dtv_encoder *mdp4_dtv_encoder = to_mdp4_dtv_encoder(encoder);
-	return clk_round_rate(mdp4_dtv_encoder->src_clk, rate);
+	return clk_round_rate(mdp4_dtv_encoder->mdp_clk, rate);
 }
 
 /* initialize encoder */
@@ -257,13 +257,6 @@ struct drm_encoder *mdp4_dtv_encoder_init(struct drm_device *dev)
 			 DRM_MODE_ENCODER_TMDS, NULL);
 	drm_encoder_helper_add(encoder, &mdp4_dtv_encoder_helper_funcs);
 
-	mdp4_dtv_encoder->src_clk = devm_clk_get(dev->dev, "src_clk");
-	if (IS_ERR(mdp4_dtv_encoder->src_clk)) {
-		dev_err(dev->dev, "failed to get src_clk\n");
-		ret = PTR_ERR(mdp4_dtv_encoder->src_clk);
-		goto fail;
-	}
-
 	mdp4_dtv_encoder->hdmi_clk = devm_clk_get(dev->dev, "hdmi_clk");
 	if (IS_ERR(mdp4_dtv_encoder->hdmi_clk)) {
 		dev_err(dev->dev, "failed to get hdmi_clk\n");
@@ -271,9 +264,9 @@ struct drm_encoder *mdp4_dtv_encoder_init(struct drm_device *dev)
 		goto fail;
 	}
 
-	mdp4_dtv_encoder->mdp_clk = devm_clk_get(dev->dev, "mdp_clk");
+	mdp4_dtv_encoder->mdp_clk = devm_clk_get(dev->dev, "tv_clk");
 	if (IS_ERR(mdp4_dtv_encoder->mdp_clk)) {
-		dev_err(dev->dev, "failed to get mdp_clk\n");
+		dev_err(dev->dev, "failed to get tv_clk\n");
 		ret = PTR_ERR(mdp4_dtv_encoder->mdp_clk);
 		goto fail;
 	}
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
index f145d256e332..7b39e89fbc2b 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
@@ -158,6 +158,7 @@ static const char * const iommu_ports[] = {
 static void mdp4_destroy(struct msm_kms *kms)
 {
 	struct mdp4_kms *mdp4_kms = to_mdp4_kms(to_mdp_kms(kms));
+	struct device *dev = mdp4_kms->dev->dev;
 	struct msm_mmu *mmu = mdp4_kms->mmu;
 
 	if (mmu) {
@@ -167,8 +168,11 @@ static void mdp4_destroy(struct msm_kms *kms)
 
 	if (mdp4_kms->blank_cursor_iova)
 		msm_gem_put_iova(mdp4_kms->blank_cursor_bo, mdp4_kms->id);
-	if (mdp4_kms->blank_cursor_bo)
-		drm_gem_object_unreference_unlocked(mdp4_kms->blank_cursor_bo);
+	drm_gem_object_unreference_unlocked(mdp4_kms->blank_cursor_bo);
+
+	if (mdp4_kms->rpm_enabled)
+		pm_runtime_disable(dev);
+
 	kfree(mdp4_kms);
 }
 
@@ -436,7 +440,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev)
 	struct mdp4_kms *mdp4_kms;
 	struct msm_kms *kms = NULL;
 	struct msm_mmu *mmu;
-	int ret;
+	int irq, ret;
 
 	mdp4_kms = kzalloc(sizeof(*mdp4_kms), GFP_KERNEL);
 	if (!mdp4_kms) {
@@ -457,6 +461,15 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev)
 		goto fail;
 	}
 
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		ret = irq;
+		dev_err(dev->dev, "failed to get irq: %d\n", ret);
+		goto fail;
+	}
+
+	kms->irq = irq;
+
 	/* NOTE: driver for this regulator still missing upstream.. use
 	 * _get_exclusive() and ignore the error if it does not exist
 	 * (and hope that the bootloader left it on for us)
@@ -492,7 +505,7 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev)
 		goto fail;
 	}
 
-	mdp4_kms->axi_clk = devm_clk_get(&pdev->dev, "mdp_axi_clk");
+	mdp4_kms->axi_clk = devm_clk_get(&pdev->dev, "bus_clk");
 	if (IS_ERR(mdp4_kms->axi_clk)) {
 		dev_err(dev->dev, "failed to get axi_clk\n");
 		ret = PTR_ERR(mdp4_kms->axi_clk);
@@ -502,6 +515,9 @@ struct msm_kms *mdp4_kms_init(struct drm_device *dev)
 	clk_set_rate(mdp4_kms->clk, config->max_clk);
 	clk_set_rate(mdp4_kms->lut_clk, config->max_clk);
 
+	pm_runtime_enable(dev->dev);
+	mdp4_kms->rpm_enabled = true;
+
 	/* make sure things are off before attaching iommu (bootloader could
 	 * have left things on, in which case we'll start getting faults if
 	 * we don't disable):
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
index c5d045d5680d..25fb83997119 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.h
@@ -47,6 +47,8 @@ struct mdp4_kms {
 
 	struct mdp_irq error_handler;
 
+	bool rpm_enabled;
+
 	/* empty/blank cursor bo to use when cursor is "disabled" */
 	struct drm_gem_object *blank_cursor_bo;
 	uint32_t blank_cursor_iova;
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h
index b275ce11b24b..ca6ca30650a0 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h
@@ -8,19 +8,11 @@ http://github.com/freedreno/envytools/
 git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    676 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-02-10 17:07:21)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20915 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   2849 bytes, from 2015-09-18 12:07:28)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  37194 bytes, from 2015-09-18 12:07:28)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml             (  27887 bytes, from 2015-10-22 16:34:52)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    602 bytes, from 2015-10-22 16:35:02)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2015-05-20 20:03:07)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  41472 bytes, from 2016-01-22 18:18:18)
-- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml             (  10416 bytes, from 2015-05-20 20:03:14)
-
-Copyright (C) 2013-2015 by the following authors:
+- /local/mnt/workspace/source_trees/envytools/rnndb/../rnndb/mdp/mdp5.xml   (  36965 bytes, from 2016-05-10 05:06:30)
+- /local/mnt/workspace/source_trees/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-05-09 06:32:54)
+- /local/mnt/workspace/source_trees/envytools/rnndb/mdp/mdp_common.xml      (   2849 bytes, from 2016-01-07 08:45:55)
+
+Copyright (C) 2013-2016 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
 - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
@@ -198,118 +190,109 @@ static inline uint32_t MDSS_HW_VERSION_MAJOR(uint32_t val)
 #define MDSS_HW_INTR_STATUS_INTR_HDMI				0x00000100
 #define MDSS_HW_INTR_STATUS_INTR_EDP				0x00001000
 
-static inline uint32_t __offset_MDP(uint32_t idx)
-{
-	switch (idx) {
-		case 0: return (mdp5_cfg->mdp.base[0]);
-		default: return INVALID_IDX(idx);
-	}
-}
-static inline uint32_t REG_MDP5_MDP(uint32_t i0) { return 0x00000000 + __offset_MDP(i0); }
-
-static inline uint32_t REG_MDP5_MDP_HW_VERSION(uint32_t i0) { return 0x00000000 + __offset_MDP(i0); }
-#define MDP5_MDP_HW_VERSION_STEP__MASK				0x0000ffff
-#define MDP5_MDP_HW_VERSION_STEP__SHIFT				0
-static inline uint32_t MDP5_MDP_HW_VERSION_STEP(uint32_t val)
+#define REG_MDP5_HW_VERSION					0x00000000
+#define MDP5_HW_VERSION_STEP__MASK				0x0000ffff
+#define MDP5_HW_VERSION_STEP__SHIFT				0
+static inline uint32_t MDP5_HW_VERSION_STEP(uint32_t val)
 {
-	return ((val) << MDP5_MDP_HW_VERSION_STEP__SHIFT) & MDP5_MDP_HW_VERSION_STEP__MASK;
+	return ((val) << MDP5_HW_VERSION_STEP__SHIFT) & MDP5_HW_VERSION_STEP__MASK;
 }
-#define MDP5_MDP_HW_VERSION_MINOR__MASK				0x0fff0000
-#define MDP5_MDP_HW_VERSION_MINOR__SHIFT			16
-static inline uint32_t MDP5_MDP_HW_VERSION_MINOR(uint32_t val)
+#define MDP5_HW_VERSION_MINOR__MASK				0x0fff0000
+#define MDP5_HW_VERSION_MINOR__SHIFT				16
+static inline uint32_t MDP5_HW_VERSION_MINOR(uint32_t val)
 {
-	return ((val) << MDP5_MDP_HW_VERSION_MINOR__SHIFT) & MDP5_MDP_HW_VERSION_MINOR__MASK;
+	return ((val) << MDP5_HW_VERSION_MINOR__SHIFT) & MDP5_HW_VERSION_MINOR__MASK;
 }
-#define MDP5_MDP_HW_VERSION_MAJOR__MASK				0xf0000000
-#define MDP5_MDP_HW_VERSION_MAJOR__SHIFT			28
-static inline uint32_t MDP5_MDP_HW_VERSION_MAJOR(uint32_t val)
+#define MDP5_HW_VERSION_MAJOR__MASK				0xf0000000
+#define MDP5_HW_VERSION_MAJOR__SHIFT				28
+static inline uint32_t MDP5_HW_VERSION_MAJOR(uint32_t val)
 {
-	return ((val) << MDP5_MDP_HW_VERSION_MAJOR__SHIFT) & MDP5_MDP_HW_VERSION_MAJOR__MASK;
+	return ((val) << MDP5_HW_VERSION_MAJOR__SHIFT) & MDP5_HW_VERSION_MAJOR__MASK;
 }
 
-static inline uint32_t REG_MDP5_MDP_DISP_INTF_SEL(uint32_t i0) { return 0x00000004 + __offset_MDP(i0); }
-#define MDP5_MDP_DISP_INTF_SEL_INTF0__MASK			0x000000ff
-#define MDP5_MDP_DISP_INTF_SEL_INTF0__SHIFT			0
-static inline uint32_t MDP5_MDP_DISP_INTF_SEL_INTF0(enum mdp5_intf_type val)
+#define REG_MDP5_DISP_INTF_SEL					0x00000004
+#define MDP5_DISP_INTF_SEL_INTF0__MASK				0x000000ff
+#define MDP5_DISP_INTF_SEL_INTF0__SHIFT				0
+static inline uint32_t MDP5_DISP_INTF_SEL_INTF0(enum mdp5_intf_type val)
 {
-	return ((val) << MDP5_MDP_DISP_INTF_SEL_INTF0__SHIFT) & MDP5_MDP_DISP_INTF_SEL_INTF0__MASK;
+	return ((val) << MDP5_DISP_INTF_SEL_INTF0__SHIFT) & MDP5_DISP_INTF_SEL_INTF0__MASK;
 }
-#define MDP5_MDP_DISP_INTF_SEL_INTF1__MASK			0x0000ff00
-#define MDP5_MDP_DISP_INTF_SEL_INTF1__SHIFT			8
-static inline uint32_t MDP5_MDP_DISP_INTF_SEL_INTF1(enum mdp5_intf_type val)
+#define MDP5_DISP_INTF_SEL_INTF1__MASK				0x0000ff00
+#define MDP5_DISP_INTF_SEL_INTF1__SHIFT				8
+static inline uint32_t MDP5_DISP_INTF_SEL_INTF1(enum mdp5_intf_type val)
 {
-	return ((val) << MDP5_MDP_DISP_INTF_SEL_INTF1__SHIFT) & MDP5_MDP_DISP_INTF_SEL_INTF1__MASK;
+	return ((val) << MDP5_DISP_INTF_SEL_INTF1__SHIFT) & MDP5_DISP_INTF_SEL_INTF1__MASK;
 }
-#define MDP5_MDP_DISP_INTF_SEL_INTF2__MASK			0x00ff0000
-#define MDP5_MDP_DISP_INTF_SEL_INTF2__SHIFT			16
-static inline uint32_t MDP5_MDP_DISP_INTF_SEL_INTF2(enum mdp5_intf_type val)
+#define MDP5_DISP_INTF_SEL_INTF2__MASK				0x00ff0000
+#define MDP5_DISP_INTF_SEL_INTF2__SHIFT				16
+static inline uint32_t MDP5_DISP_INTF_SEL_INTF2(enum mdp5_intf_type val)
 {
-	return ((val) << MDP5_MDP_DISP_INTF_SEL_INTF2__SHIFT) & MDP5_MDP_DISP_INTF_SEL_INTF2__MASK;
+	return ((val) << MDP5_DISP_INTF_SEL_INTF2__SHIFT) & MDP5_DISP_INTF_SEL_INTF2__MASK;
 }
-#define MDP5_MDP_DISP_INTF_SEL_INTF3__MASK			0xff000000
-#define MDP5_MDP_DISP_INTF_SEL_INTF3__SHIFT			24
-static inline uint32_t MDP5_MDP_DISP_INTF_SEL_INTF3(enum mdp5_intf_type val)
+#define MDP5_DISP_INTF_SEL_INTF3__MASK				0xff000000
+#define MDP5_DISP_INTF_SEL_INTF3__SHIFT				24
+static inline uint32_t MDP5_DISP_INTF_SEL_INTF3(enum mdp5_intf_type val)
 {
-	return ((val) << MDP5_MDP_DISP_INTF_SEL_INTF3__SHIFT) & MDP5_MDP_DISP_INTF_SEL_INTF3__MASK;
+	return ((val) << MDP5_DISP_INTF_SEL_INTF3__SHIFT) & MDP5_DISP_INTF_SEL_INTF3__MASK;
 }
 
-static inline uint32_t REG_MDP5_MDP_INTR_EN(uint32_t i0) { return 0x00000010 + __offset_MDP(i0); }
+#define REG_MDP5_INTR_EN					0x00000010
 
-static inline uint32_t REG_MDP5_MDP_INTR_STATUS(uint32_t i0) { return 0x00000014 + __offset_MDP(i0); }
+#define REG_MDP5_INTR_STATUS					0x00000014
 
-static inline uint32_t REG_MDP5_MDP_INTR_CLEAR(uint32_t i0) { return 0x00000018 + __offset_MDP(i0); }
+#define REG_MDP5_INTR_CLEAR					0x00000018
 
-static inline uint32_t REG_MDP5_MDP_HIST_INTR_EN(uint32_t i0) { return 0x0000001c + __offset_MDP(i0); }
+#define REG_MDP5_HIST_INTR_EN					0x0000001c
 
-static inline uint32_t REG_MDP5_MDP_HIST_INTR_STATUS(uint32_t i0) { return 0x00000020 + __offset_MDP(i0); }
+#define REG_MDP5_HIST_INTR_STATUS				0x00000020
 
-static inline uint32_t REG_MDP5_MDP_HIST_INTR_CLEAR(uint32_t i0) { return 0x00000024 + __offset_MDP(i0); }
+#define REG_MDP5_HIST_INTR_CLEAR				0x00000024
 
-static inline uint32_t REG_MDP5_MDP_SPARE_0(uint32_t i0) { return 0x00000028 + __offset_MDP(i0); }
-#define MDP5_MDP_SPARE_0_SPLIT_DPL_SINGLE_FLUSH_EN		0x00000001
+#define REG_MDP5_SPARE_0					0x00000028
+#define MDP5_SPARE_0_SPLIT_DPL_SINGLE_FLUSH_EN			0x00000001
 
-static inline uint32_t REG_MDP5_MDP_SMP_ALLOC_W(uint32_t i0, uint32_t i1) { return 0x00000080 + __offset_MDP(i0) + 0x4*i1; }
+static inline uint32_t REG_MDP5_SMP_ALLOC_W(uint32_t i0) { return 0x00000080 + 0x4*i0; }
 
-static inline uint32_t REG_MDP5_MDP_SMP_ALLOC_W_REG(uint32_t i0, uint32_t i1) { return 0x00000080 + __offset_MDP(i0) + 0x4*i1; }
-#define MDP5_MDP_SMP_ALLOC_W_REG_CLIENT0__MASK			0x000000ff
-#define MDP5_MDP_SMP_ALLOC_W_REG_CLIENT0__SHIFT			0
-static inline uint32_t MDP5_MDP_SMP_ALLOC_W_REG_CLIENT0(uint32_t val)
+static inline uint32_t REG_MDP5_SMP_ALLOC_W_REG(uint32_t i0) { return 0x00000080 + 0x4*i0; }
+#define MDP5_SMP_ALLOC_W_REG_CLIENT0__MASK			0x000000ff
+#define MDP5_SMP_ALLOC_W_REG_CLIENT0__SHIFT			0
+static inline uint32_t MDP5_SMP_ALLOC_W_REG_CLIENT0(uint32_t val)
 {
-	return ((val) << MDP5_MDP_SMP_ALLOC_W_REG_CLIENT0__SHIFT) & MDP5_MDP_SMP_ALLOC_W_REG_CLIENT0__MASK;
+	return ((val) << MDP5_SMP_ALLOC_W_REG_CLIENT0__SHIFT) & MDP5_SMP_ALLOC_W_REG_CLIENT0__MASK;
 }
-#define MDP5_MDP_SMP_ALLOC_W_REG_CLIENT1__MASK			0x0000ff00
-#define MDP5_MDP_SMP_ALLOC_W_REG_CLIENT1__SHIFT			8
-static inline uint32_t MDP5_MDP_SMP_ALLOC_W_REG_CLIENT1(uint32_t val)
+#define MDP5_SMP_ALLOC_W_REG_CLIENT1__MASK			0x0000ff00
+#define MDP5_SMP_ALLOC_W_REG_CLIENT1__SHIFT			8
+static inline uint32_t MDP5_SMP_ALLOC_W_REG_CLIENT1(uint32_t val)
 {
-	return ((val) << MDP5_MDP_SMP_ALLOC_W_REG_CLIENT1__SHIFT) & MDP5_MDP_SMP_ALLOC_W_REG_CLIENT1__MASK;
+	return ((val) << MDP5_SMP_ALLOC_W_REG_CLIENT1__SHIFT) & MDP5_SMP_ALLOC_W_REG_CLIENT1__MASK;
 }
-#define MDP5_MDP_SMP_ALLOC_W_REG_CLIENT2__MASK			0x00ff0000
-#define MDP5_MDP_SMP_ALLOC_W_REG_CLIENT2__SHIFT			16
-static inline uint32_t MDP5_MDP_SMP_ALLOC_W_REG_CLIENT2(uint32_t val)
+#define MDP5_SMP_ALLOC_W_REG_CLIENT2__MASK			0x00ff0000
+#define MDP5_SMP_ALLOC_W_REG_CLIENT2__SHIFT			16
+static inline uint32_t MDP5_SMP_ALLOC_W_REG_CLIENT2(uint32_t val)
 {
-	return ((val) << MDP5_MDP_SMP_ALLOC_W_REG_CLIENT2__SHIFT) & MDP5_MDP_SMP_ALLOC_W_REG_CLIENT2__MASK;
+	return ((val) << MDP5_SMP_ALLOC_W_REG_CLIENT2__SHIFT) & MDP5_SMP_ALLOC_W_REG_CLIENT2__MASK;
 }
 
-static inline uint32_t REG_MDP5_MDP_SMP_ALLOC_R(uint32_t i0, uint32_t i1) { return 0x00000130 + __offset_MDP(i0) + 0x4*i1; }
+static inline uint32_t REG_MDP5_SMP_ALLOC_R(uint32_t i0) { return 0x00000130 + 0x4*i0; }
 
-static inline uint32_t REG_MDP5_MDP_SMP_ALLOC_R_REG(uint32_t i0, uint32_t i1) { return 0x00000130 + __offset_MDP(i0) + 0x4*i1; }
-#define MDP5_MDP_SMP_ALLOC_R_REG_CLIENT0__MASK			0x000000ff
-#define MDP5_MDP_SMP_ALLOC_R_REG_CLIENT0__SHIFT			0
-static inline uint32_t MDP5_MDP_SMP_ALLOC_R_REG_CLIENT0(uint32_t val)
+static inline uint32_t REG_MDP5_SMP_ALLOC_R_REG(uint32_t i0) { return 0x00000130 + 0x4*i0; }
+#define MDP5_SMP_ALLOC_R_REG_CLIENT0__MASK			0x000000ff
+#define MDP5_SMP_ALLOC_R_REG_CLIENT0__SHIFT			0
+static inline uint32_t MDP5_SMP_ALLOC_R_REG_CLIENT0(uint32_t val)
 {
-	return ((val) << MDP5_MDP_SMP_ALLOC_R_REG_CLIENT0__SHIFT) & MDP5_MDP_SMP_ALLOC_R_REG_CLIENT0__MASK;
+	return ((val) << MDP5_SMP_ALLOC_R_REG_CLIENT0__SHIFT) & MDP5_SMP_ALLOC_R_REG_CLIENT0__MASK;
 }
-#define MDP5_MDP_SMP_ALLOC_R_REG_CLIENT1__MASK			0x0000ff00
-#define MDP5_MDP_SMP_ALLOC_R_REG_CLIENT1__SHIFT			8
-static inline uint32_t MDP5_MDP_SMP_ALLOC_R_REG_CLIENT1(uint32_t val)
+#define MDP5_SMP_ALLOC_R_REG_CLIENT1__MASK			0x0000ff00
+#define MDP5_SMP_ALLOC_R_REG_CLIENT1__SHIFT			8
+static inline uint32_t MDP5_SMP_ALLOC_R_REG_CLIENT1(uint32_t val)
 {
-	return ((val) << MDP5_MDP_SMP_ALLOC_R_REG_CLIENT1__SHIFT) & MDP5_MDP_SMP_ALLOC_R_REG_CLIENT1__MASK;
+	return ((val) << MDP5_SMP_ALLOC_R_REG_CLIENT1__SHIFT) & MDP5_SMP_ALLOC_R_REG_CLIENT1__MASK;
 }
-#define MDP5_MDP_SMP_ALLOC_R_REG_CLIENT2__MASK			0x00ff0000
-#define MDP5_MDP_SMP_ALLOC_R_REG_CLIENT2__SHIFT			16
-static inline uint32_t MDP5_MDP_SMP_ALLOC_R_REG_CLIENT2(uint32_t val)
+#define MDP5_SMP_ALLOC_R_REG_CLIENT2__MASK			0x00ff0000
+#define MDP5_SMP_ALLOC_R_REG_CLIENT2__SHIFT			16
+static inline uint32_t MDP5_SMP_ALLOC_R_REG_CLIENT2(uint32_t val)
 {
-	return ((val) << MDP5_MDP_SMP_ALLOC_R_REG_CLIENT2__SHIFT) & MDP5_MDP_SMP_ALLOC_R_REG_CLIENT2__MASK;
+	return ((val) << MDP5_SMP_ALLOC_R_REG_CLIENT2__SHIFT) & MDP5_SMP_ALLOC_R_REG_CLIENT2__MASK;
 }
 
 static inline uint32_t __offset_IGC(enum mdp5_igc_type idx)
@@ -322,35 +305,35 @@ static inline uint32_t __offset_IGC(enum mdp5_igc_type idx)
 		default: return INVALID_IDX(idx);
 	}
 }
-static inline uint32_t REG_MDP5_MDP_IGC(uint32_t i0, enum mdp5_igc_type i1) { return 0x00000000 + __offset_MDP(i0) + __offset_IGC(i1); }
+static inline uint32_t REG_MDP5_IGC(enum mdp5_igc_type i0) { return 0x00000000 + __offset_IGC(i0); }
 
-static inline uint32_t REG_MDP5_MDP_IGC_LUT(uint32_t i0, enum mdp5_igc_type i1, uint32_t i2) { return 0x00000000 + __offset_MDP(i0) + __offset_IGC(i1) + 0x4*i2; }
+static inline uint32_t REG_MDP5_IGC_LUT(enum mdp5_igc_type i0, uint32_t i1) { return 0x00000000 + __offset_IGC(i0) + 0x4*i1; }
 
-static inline uint32_t REG_MDP5_MDP_IGC_LUT_REG(uint32_t i0, enum mdp5_igc_type i1, uint32_t i2) { return 0x00000000 + __offset_MDP(i0) + __offset_IGC(i1) + 0x4*i2; }
-#define MDP5_MDP_IGC_LUT_REG_VAL__MASK				0x00000fff
-#define MDP5_MDP_IGC_LUT_REG_VAL__SHIFT				0
-static inline uint32_t MDP5_MDP_IGC_LUT_REG_VAL(uint32_t val)
+static inline uint32_t REG_MDP5_IGC_LUT_REG(enum mdp5_igc_type i0, uint32_t i1) { return 0x00000000 + __offset_IGC(i0) + 0x4*i1; }
+#define MDP5_IGC_LUT_REG_VAL__MASK				0x00000fff
+#define MDP5_IGC_LUT_REG_VAL__SHIFT				0
+static inline uint32_t MDP5_IGC_LUT_REG_VAL(uint32_t val)
 {
-	return ((val) << MDP5_MDP_IGC_LUT_REG_VAL__SHIFT) & MDP5_MDP_IGC_LUT_REG_VAL__MASK;
+	return ((val) << MDP5_IGC_LUT_REG_VAL__SHIFT) & MDP5_IGC_LUT_REG_VAL__MASK;
 }
-#define MDP5_MDP_IGC_LUT_REG_INDEX_UPDATE			0x02000000
-#define MDP5_MDP_IGC_LUT_REG_DISABLE_PIPE_0			0x10000000
-#define MDP5_MDP_IGC_LUT_REG_DISABLE_PIPE_1			0x20000000
-#define MDP5_MDP_IGC_LUT_REG_DISABLE_PIPE_2			0x40000000
+#define MDP5_IGC_LUT_REG_INDEX_UPDATE				0x02000000
+#define MDP5_IGC_LUT_REG_DISABLE_PIPE_0				0x10000000
+#define MDP5_IGC_LUT_REG_DISABLE_PIPE_1				0x20000000
+#define MDP5_IGC_LUT_REG_DISABLE_PIPE_2				0x40000000
 
-static inline uint32_t REG_MDP5_MDP_SPLIT_DPL_EN(uint32_t i0) { return 0x000002f4 + __offset_MDP(i0); }
+#define REG_MDP5_SPLIT_DPL_EN					0x000002f4
 
-static inline uint32_t REG_MDP5_MDP_SPLIT_DPL_UPPER(uint32_t i0) { return 0x000002f8 + __offset_MDP(i0); }
-#define MDP5_MDP_SPLIT_DPL_UPPER_SMART_PANEL			0x00000002
-#define MDP5_MDP_SPLIT_DPL_UPPER_SMART_PANEL_FREE_RUN		0x00000004
-#define MDP5_MDP_SPLIT_DPL_UPPER_INTF1_SW_TRG_MUX		0x00000010
-#define MDP5_MDP_SPLIT_DPL_UPPER_INTF2_SW_TRG_MUX		0x00000100
+#define REG_MDP5_SPLIT_DPL_UPPER				0x000002f8
+#define MDP5_SPLIT_DPL_UPPER_SMART_PANEL			0x00000002
+#define MDP5_SPLIT_DPL_UPPER_SMART_PANEL_FREE_RUN		0x00000004
+#define MDP5_SPLIT_DPL_UPPER_INTF1_SW_TRG_MUX			0x00000010
+#define MDP5_SPLIT_DPL_UPPER_INTF2_SW_TRG_MUX			0x00000100
 
-static inline uint32_t REG_MDP5_MDP_SPLIT_DPL_LOWER(uint32_t i0) { return 0x000003f0 + __offset_MDP(i0); }
-#define MDP5_MDP_SPLIT_DPL_LOWER_SMART_PANEL			0x00000002
-#define MDP5_MDP_SPLIT_DPL_LOWER_SMART_PANEL_FREE_RUN		0x00000004
-#define MDP5_MDP_SPLIT_DPL_LOWER_INTF1_TG_SYNC			0x00000010
-#define MDP5_MDP_SPLIT_DPL_LOWER_INTF2_TG_SYNC			0x00000100
+#define REG_MDP5_SPLIT_DPL_LOWER				0x000003f0
+#define MDP5_SPLIT_DPL_LOWER_SMART_PANEL			0x00000002
+#define MDP5_SPLIT_DPL_LOWER_SMART_PANEL_FREE_RUN		0x00000004
+#define MDP5_SPLIT_DPL_LOWER_INTF1_TG_SYNC			0x00000010
+#define MDP5_SPLIT_DPL_LOWER_INTF2_TG_SYNC			0x00000100
 
 static inline uint32_t __offset_CTL(uint32_t idx)
 {
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
index 57f73f0c120d..ac9e4cde1380 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
@@ -26,7 +26,6 @@ const struct mdp5_cfg_hw msm8x74v1_config = {
 	.name = "msm8x74v1",
 	.mdp = {
 		.count = 1,
-		.base = { 0x00100 },
 		.caps = MDP_CAP_SMP |
 			0,
 	},
@@ -41,12 +40,12 @@ const struct mdp5_cfg_hw msm8x74v1_config = {
 	},
 	.ctl = {
 		.count = 5,
-		.base = { 0x00600, 0x00700, 0x00800, 0x00900, 0x00a00 },
+		.base = { 0x00500, 0x00600, 0x00700, 0x00800, 0x00900 },
 		.flush_hw_mask = 0x0003ffff,
 	},
 	.pipe_vig = {
 		.count = 3,
-		.base = { 0x01200, 0x01600, 0x01a00 },
+		.base = { 0x01100, 0x01500, 0x01900 },
 		.caps = MDP_PIPE_CAP_HFLIP |
 			MDP_PIPE_CAP_VFLIP |
 			MDP_PIPE_CAP_SCALE |
@@ -55,7 +54,7 @@ const struct mdp5_cfg_hw msm8x74v1_config = {
 	},
 	.pipe_rgb = {
 		.count = 3,
-		.base = { 0x01e00, 0x02200, 0x02600 },
+		.base = { 0x01d00, 0x02100, 0x02500 },
 		.caps = MDP_PIPE_CAP_HFLIP |
 			MDP_PIPE_CAP_VFLIP |
 			MDP_PIPE_CAP_SCALE |
@@ -63,26 +62,26 @@ const struct mdp5_cfg_hw msm8x74v1_config = {
 	},
 	.pipe_dma = {
 		.count = 2,
-		.base = { 0x02a00, 0x02e00 },
+		.base = { 0x02900, 0x02d00 },
 		.caps = MDP_PIPE_CAP_HFLIP |
 			MDP_PIPE_CAP_VFLIP |
 			0,
 	},
 	.lm = {
 		.count = 5,
-		.base = { 0x03200, 0x03600, 0x03a00, 0x03e00, 0x04200 },
+		.base = { 0x03100, 0x03500, 0x03900, 0x03d00, 0x04100 },
 		.nb_stages = 5,
 	},
 	.dspp = {
 		.count = 3,
-		.base = { 0x04600, 0x04a00, 0x04e00 },
+		.base = { 0x04500, 0x04900, 0x04d00 },
 	},
 	.pp = {
 		.count = 3,
-		.base = { 0x21b00, 0x21c00, 0x21d00 },
+		.base = { 0x21a00, 0x21b00, 0x21c00 },
 	},
 	.intf = {
-		.base = { 0x21100, 0x21300, 0x21500, 0x21700 },
+		.base = { 0x21000, 0x21200, 0x21400, 0x21600 },
 		.connect = {
 			[0] = INTF_eDP,
 			[1] = INTF_DSI,
@@ -97,7 +96,6 @@ const struct mdp5_cfg_hw msm8x74v2_config = {
 	.name = "msm8x74",
 	.mdp = {
 		.count = 1,
-		.base = { 0x00100 },
 		.caps = MDP_CAP_SMP |
 			0,
 	},
@@ -112,48 +110,48 @@ const struct mdp5_cfg_hw msm8x74v2_config = {
 	},
 	.ctl = {
 		.count = 5,
-		.base = { 0x00600, 0x00700, 0x00800, 0x00900, 0x00a00 },
+		.base = { 0x00500, 0x00600, 0x00700, 0x00800, 0x00900 },
 		.flush_hw_mask = 0x0003ffff,
 	},
 	.pipe_vig = {
 		.count = 3,
-		.base = { 0x01200, 0x01600, 0x01a00 },
+		.base = { 0x01100, 0x01500, 0x01900 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
 				MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_CSC |
 				MDP_PIPE_CAP_DECIMATION,
 	},
 	.pipe_rgb = {
 		.count = 3,
-		.base = { 0x01e00, 0x02200, 0x02600 },
+		.base = { 0x01d00, 0x02100, 0x02500 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
 				MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_DECIMATION,
 	},
 	.pipe_dma = {
 		.count = 2,
-		.base = { 0x02a00, 0x02e00 },
+		.base = { 0x02900, 0x02d00 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP,
 	},
 	.lm = {
 		.count = 5,
-		.base = { 0x03200, 0x03600, 0x03a00, 0x03e00, 0x04200 },
+		.base = { 0x03100, 0x03500, 0x03900, 0x03d00, 0x04100 },
 		.nb_stages = 5,
 		.max_width = 2048,
 		.max_height = 0xFFFF,
 	},
 	.dspp = {
 		.count = 3,
-		.base = { 0x04600, 0x04a00, 0x04e00 },
+		.base = { 0x04500, 0x04900, 0x04d00 },
 	},
 	.ad = {
 		.count = 2,
-		.base = { 0x13100, 0x13300 },
+		.base = { 0x13000, 0x13200 },
 	},
 	.pp = {
 		.count = 3,
-		.base = { 0x12d00, 0x12e00, 0x12f00 },
+		.base = { 0x12c00, 0x12d00, 0x12e00 },
 	},
 	.intf = {
-		.base = { 0x12500, 0x12700, 0x12900, 0x12b00 },
+		.base = { 0x12400, 0x12600, 0x12800, 0x12a00 },
 		.connect = {
 			[0] = INTF_eDP,
 			[1] = INTF_DSI,
@@ -168,7 +166,6 @@ const struct mdp5_cfg_hw apq8084_config = {
 	.name = "apq8084",
 	.mdp = {
 		.count = 1,
-		.base = { 0x00100 },
 		.caps = MDP_CAP_SMP |
 			0,
 	},
@@ -190,49 +187,49 @@ const struct mdp5_cfg_hw apq8084_config = {
 	},
 	.ctl = {
 		.count = 5,
-		.base = { 0x00600, 0x00700, 0x00800, 0x00900, 0x00a00 },
+		.base = { 0x00500, 0x00600, 0x00700, 0x00800, 0x00900 },
 		.flush_hw_mask = 0x003fffff,
 	},
 	.pipe_vig = {
 		.count = 4,
-		.base = { 0x01200, 0x01600, 0x01a00, 0x01e00 },
+		.base = { 0x01100, 0x01500, 0x01900, 0x01d00 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
 				MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_CSC |
 				MDP_PIPE_CAP_DECIMATION,
 	},
 	.pipe_rgb = {
 		.count = 4,
-		.base = { 0x02200, 0x02600, 0x02a00, 0x02e00 },
+		.base = { 0x02100, 0x02500, 0x02900, 0x02d00 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
 				MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_DECIMATION,
 	},
 	.pipe_dma = {
 		.count = 2,
-		.base = { 0x03200, 0x03600 },
+		.base = { 0x03100, 0x03500 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP,
 	},
 	.lm = {
 		.count = 6,
-		.base = { 0x03a00, 0x03e00, 0x04200, 0x04600, 0x04a00, 0x04e00 },
+		.base = { 0x03900, 0x03d00, 0x04100, 0x04500, 0x04900, 0x04d00 },
 		.nb_stages = 5,
 		.max_width = 2048,
 		.max_height = 0xFFFF,
 	},
 	.dspp = {
 		.count = 4,
-		.base = { 0x05200, 0x05600, 0x05a00, 0x05e00 },
+		.base = { 0x05100, 0x05500, 0x05900, 0x05d00 },
 
 	},
 	.ad = {
 		.count = 3,
-		.base = { 0x13500, 0x13700, 0x13900 },
+		.base = { 0x13400, 0x13600, 0x13800 },
 	},
 	.pp = {
 		.count = 4,
-		.base = { 0x12f00, 0x13000, 0x13100, 0x13200 },
+		.base = { 0x12e00, 0x12f00, 0x13000, 0x13100 },
 	},
 	.intf = {
-		.base = { 0x12500, 0x12700, 0x12900, 0x12b00, 0x12d00 },
+		.base = { 0x12400, 0x12600, 0x12800, 0x12a00, 0x12c00 },
 		.connect = {
 			[0] = INTF_eDP,
 			[1] = INTF_DSI,
@@ -247,7 +244,7 @@ const struct mdp5_cfg_hw msm8x16_config = {
 	.name = "msm8x16",
 	.mdp = {
 		.count = 1,
-		.base = { 0x01000 },
+		.base = { 0x0 },
 		.caps = MDP_CAP_SMP |
 			0,
 	},
@@ -261,41 +258,41 @@ const struct mdp5_cfg_hw msm8x16_config = {
 	},
 	.ctl = {
 		.count = 5,
-		.base = { 0x02000, 0x02200, 0x02400, 0x02600, 0x02800 },
+		.base = { 0x01000, 0x01200, 0x01400, 0x01600, 0x01800 },
 		.flush_hw_mask = 0x4003ffff,
 	},
 	.pipe_vig = {
 		.count = 1,
-		.base = { 0x05000 },
+		.base = { 0x04000 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
 				MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_CSC |
 				MDP_PIPE_CAP_DECIMATION,
 	},
 	.pipe_rgb = {
 		.count = 2,
-		.base = { 0x15000, 0x17000 },
+		.base = { 0x14000, 0x16000 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
 				MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_DECIMATION,
 	},
 	.pipe_dma = {
 		.count = 1,
-		.base = { 0x25000 },
+		.base = { 0x24000 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP,
 	},
 	.lm = {
 		.count = 2, /* LM0 and LM3 */
-		.base = { 0x45000, 0x48000 },
+		.base = { 0x44000, 0x47000 },
 		.nb_stages = 5,
 		.max_width = 2048,
 		.max_height = 0xFFFF,
 	},
 	.dspp = {
 		.count = 1,
-		.base = { 0x55000 },
+		.base = { 0x54000 },
 
 	},
 	.intf = {
-		.base = { 0x00000, 0x6b800 },
+		.base = { 0x00000, 0x6a800 },
 		.connect = {
 			[0] = INTF_DISABLED,
 			[1] = INTF_DSI,
@@ -308,7 +305,6 @@ const struct mdp5_cfg_hw msm8x94_config = {
 	.name = "msm8x94",
 	.mdp = {
 		.count = 1,
-		.base = { 0x01000 },
 		.caps = MDP_CAP_SMP |
 			0,
 	},
@@ -330,49 +326,49 @@ const struct mdp5_cfg_hw msm8x94_config = {
 	},
 	.ctl = {
 		.count = 5,
-		.base = { 0x02000, 0x02200, 0x02400, 0x02600, 0x02800 },
+		.base = { 0x01000, 0x01200, 0x01400, 0x01600, 0x01800 },
 		.flush_hw_mask = 0xf0ffffff,
 	},
 	.pipe_vig = {
 		.count = 4,
-		.base = { 0x05000, 0x07000, 0x09000, 0x0b000 },
+		.base = { 0x04000, 0x06000, 0x08000, 0x0a000 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
 				MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_CSC |
 				MDP_PIPE_CAP_DECIMATION,
 	},
 	.pipe_rgb = {
 		.count = 4,
-		.base = { 0x15000, 0x17000, 0x19000, 0x1b000 },
+		.base = { 0x14000, 0x16000, 0x18000, 0x1a000 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP |
 				MDP_PIPE_CAP_SCALE | MDP_PIPE_CAP_DECIMATION,
 	},
 	.pipe_dma = {
 		.count = 2,
-		.base = { 0x25000, 0x27000 },
+		.base = { 0x24000, 0x26000 },
 		.caps = MDP_PIPE_CAP_HFLIP | MDP_PIPE_CAP_VFLIP,
 	},
 	.lm = {
 		.count = 6,
-		.base = { 0x45000, 0x46000, 0x47000, 0x48000, 0x49000, 0x4a000 },
+		.base = { 0x44000, 0x45000, 0x46000, 0x47000, 0x48000, 0x49000 },
 		.nb_stages = 8,
 		.max_width = 2048,
 		.max_height = 0xFFFF,
 	},
 	.dspp = {
 		.count = 4,
-		.base = { 0x55000, 0x57000, 0x59000, 0x5b000 },
+		.base = { 0x54000, 0x56000, 0x58000, 0x5a000 },
 
 	},
 	.ad = {
 		.count = 3,
-		.base = { 0x79000, 0x79800, 0x7a000 },
+		.base = { 0x78000, 0x78800, 0x79000 },
 	},
 	.pp = {
 		.count = 4,
-		.base = { 0x71000, 0x71800, 0x72000, 0x72800 },
+		.base = { 0x70000, 0x70800, 0x71000, 0x71800 },
 	},
 	.intf = {
-		.base = { 0x6b000, 0x6b800, 0x6c000, 0x6c800, 0x6d000 },
+		.base = { 0x6a000, 0x6a800, 0x6b000, 0x6b800, 0x6c000 },
 		.connect = {
 			[0] = INTF_DISABLED,
 			[1] = INTF_DSI,
@@ -387,19 +383,18 @@ const struct mdp5_cfg_hw msm8x96_config = {
 	.name = "msm8x96",
 	.mdp = {
 		.count = 1,
-		.base = { 0x01000 },
 		.caps = MDP_CAP_DSC |
 			MDP_CAP_CDM |
 			0,
 	},
 	.ctl = {
 		.count = 5,
-		.base = { 0x02000, 0x02200, 0x02400, 0x02600, 0x02800 },
+		.base = { 0x01000, 0x01200, 0x01400, 0x01600, 0x01800 },
 		.flush_hw_mask = 0xf4ffffff,
 	},
 	.pipe_vig = {
 		.count = 4,
-		.base = { 0x05000, 0x07000, 0x09000, 0x0b000 },
+		.base = { 0x04000, 0x06000, 0x08000, 0x0a000 },
 		.caps = MDP_PIPE_CAP_HFLIP	|
 			MDP_PIPE_CAP_VFLIP	|
 			MDP_PIPE_CAP_SCALE	|
@@ -410,7 +405,7 @@ const struct mdp5_cfg_hw msm8x96_config = {
 	},
 	.pipe_rgb = {
 		.count = 4,
-		.base = { 0x15000, 0x17000, 0x19000, 0x1b000 },
+		.base = { 0x14000, 0x16000, 0x18000, 0x1a000 },
 		.caps = MDP_PIPE_CAP_HFLIP	|
 			MDP_PIPE_CAP_VFLIP	|
 			MDP_PIPE_CAP_SCALE	|
@@ -420,7 +415,7 @@ const struct mdp5_cfg_hw msm8x96_config = {
 	},
 	.pipe_dma = {
 		.count = 2,
-		.base = { 0x25000, 0x27000 },
+		.base = { 0x24000, 0x26000 },
 		.caps = MDP_PIPE_CAP_HFLIP	|
 			MDP_PIPE_CAP_VFLIP	|
 			MDP_PIPE_CAP_SW_PIX_EXT	|
@@ -428,33 +423,33 @@ const struct mdp5_cfg_hw msm8x96_config = {
 	},
 	.lm = {
 		.count = 6,
-		.base = { 0x45000, 0x46000, 0x47000, 0x48000, 0x49000, 0x4a000 },
+		.base = { 0x44000, 0x45000, 0x46000, 0x47000, 0x48000, 0x49000 },
 		.nb_stages = 8,
 		.max_width = 2560,
 		.max_height = 0xFFFF,
 	},
 	.dspp = {
 		.count = 2,
-		.base = { 0x55000, 0x57000 },
+		.base = { 0x54000, 0x56000 },
 	},
 	.ad = {
 		.count = 3,
-		.base = { 0x79000, 0x79800, 0x7a000 },
+		.base = { 0x78000, 0x78800, 0x79000 },
 	},
 	.pp = {
 		.count = 4,
-		.base = { 0x71000, 0x71800, 0x72000, 0x72800 },
+		.base = { 0x70000, 0x70800, 0x71000, 0x71800 },
 	},
 	.cdm = {
 		.count = 1,
-		.base = { 0x7a200 },
+		.base = { 0x79200 },
 	},
 	.dsc = {
 		.count = 2,
-		.base = { 0x81000, 0x81400 },
+		.base = { 0x80000, 0x80400 },
 	},
 	.intf = {
-		.base = { 0x6b000, 0x6b800, 0x6c000, 0x6c800, 0x6d000 },
+		.base = { 0x6a000, 0x6a800, 0x6b000, 0x6b800, 0x6c000 },
 		.connect = {
 			[0] = INTF_DISABLED,
 			[1] = INTF_DSI,
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
index 69094cb28103..c627ab6d0061 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
@@ -272,22 +272,22 @@ int mdp5_cmd_encoder_set_split_display(struct drm_encoder *encoder,
 	 * start signal for the slave encoder
 	 */
 	if (intf_num == 1)
-		data |= MDP5_MDP_SPLIT_DPL_UPPER_INTF2_SW_TRG_MUX;
+		data |= MDP5_SPLIT_DPL_UPPER_INTF2_SW_TRG_MUX;
 	else if (intf_num == 2)
-		data |= MDP5_MDP_SPLIT_DPL_UPPER_INTF1_SW_TRG_MUX;
+		data |= MDP5_SPLIT_DPL_UPPER_INTF1_SW_TRG_MUX;
 	else
 		return -EINVAL;
 
 	/* Smart Panel, Sync mode */
-	data |= MDP5_MDP_SPLIT_DPL_UPPER_SMART_PANEL;
+	data |= MDP5_SPLIT_DPL_UPPER_SMART_PANEL;
 
 	/* Make sure clocks are on when connectors calling this function. */
 	mdp5_enable(mdp5_kms);
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_SPLIT_DPL_UPPER(0), data);
+	mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_UPPER, data);
 
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_SPLIT_DPL_LOWER(0),
-			MDP5_MDP_SPLIT_DPL_LOWER_SMART_PANEL);
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_SPLIT_DPL_EN(0), 1);
+	mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_LOWER,
+		   MDP5_SPLIT_DPL_LOWER_SMART_PANEL);
+	mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_EN, 1);
 	mdp5_disable(mdp5_kms);
 
 	return 0;
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
index 4e8ed739f558..fa2be7ce9468 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
@@ -490,8 +490,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc,
 	struct mdp5_kms *mdp5_kms = get_kms(crtc);
 	struct drm_gem_object *cursor_bo, *old_bo = NULL;
 	uint32_t blendcfg, cursor_addr, stride;
-	int ret, bpp, lm;
-	unsigned int depth;
+	int ret, lm;
 	enum mdp5_cursor_alpha cur_alpha = CURSOR_ALPHA_PER_PIXEL;
 	uint32_t flush_mask = mdp_ctl_flush_mask_cursor(0);
 	uint32_t roi_w, roi_h;
@@ -521,8 +520,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc,
 		return -EINVAL;
 
 	lm = mdp5_crtc->lm;
-	drm_fb_get_bpp_depth(DRM_FORMAT_ARGB8888, &depth, &bpp);
-	stride = width * (bpp >> 3);
+	stride = width * drm_format_plane_cpp(DRM_FORMAT_ARGB8888, 0);
 
 	spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags);
 	old_bo = mdp5_crtc->cursor.scanout_bo;
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c
index 4e81ca4f964a..d021edc3b307 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c
@@ -118,31 +118,31 @@ static void set_display_intf(struct mdp5_kms *mdp5_kms,
 	u32 intf_sel;
 
 	spin_lock_irqsave(&mdp5_kms->resource_lock, flags);
-	intf_sel = mdp5_read(mdp5_kms, REG_MDP5_MDP_DISP_INTF_SEL(0));
+	intf_sel = mdp5_read(mdp5_kms, REG_MDP5_DISP_INTF_SEL);
 
 	switch (intf->num) {
 	case 0:
-		intf_sel &= ~MDP5_MDP_DISP_INTF_SEL_INTF0__MASK;
-		intf_sel |= MDP5_MDP_DISP_INTF_SEL_INTF0(intf->type);
+		intf_sel &= ~MDP5_DISP_INTF_SEL_INTF0__MASK;
+		intf_sel |= MDP5_DISP_INTF_SEL_INTF0(intf->type);
 		break;
 	case 1:
-		intf_sel &= ~MDP5_MDP_DISP_INTF_SEL_INTF1__MASK;
-		intf_sel |= MDP5_MDP_DISP_INTF_SEL_INTF1(intf->type);
+		intf_sel &= ~MDP5_DISP_INTF_SEL_INTF1__MASK;
+		intf_sel |= MDP5_DISP_INTF_SEL_INTF1(intf->type);
 		break;
 	case 2:
-		intf_sel &= ~MDP5_MDP_DISP_INTF_SEL_INTF2__MASK;
-		intf_sel |= MDP5_MDP_DISP_INTF_SEL_INTF2(intf->type);
+		intf_sel &= ~MDP5_DISP_INTF_SEL_INTF2__MASK;
+		intf_sel |= MDP5_DISP_INTF_SEL_INTF2(intf->type);
 		break;
 	case 3:
-		intf_sel &= ~MDP5_MDP_DISP_INTF_SEL_INTF3__MASK;
-		intf_sel |= MDP5_MDP_DISP_INTF_SEL_INTF3(intf->type);
+		intf_sel &= ~MDP5_DISP_INTF_SEL_INTF3__MASK;
+		intf_sel |= MDP5_DISP_INTF_SEL_INTF3(intf->type);
 		break;
 	default:
 		BUG();
 		break;
 	}
 
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_DISP_INTF_SEL(0), intf_sel);
+	mdp5_write(mdp5_kms, REG_MDP5_DISP_INTF_SEL, intf_sel);
 	spin_unlock_irqrestore(&mdp5_kms->resource_lock, flags);
 }
 
@@ -557,7 +557,7 @@ int mdp5_ctl_pair(struct mdp5_ctl *ctlx, struct mdp5_ctl *ctly, bool enable)
 	if (!enable) {
 		ctlx->pair = NULL;
 		ctly->pair = NULL;
-		mdp5_write(mdp5_kms, REG_MDP5_MDP_SPARE_0(0), 0);
+		mdp5_write(mdp5_kms, REG_MDP5_SPARE_0, 0);
 		return 0;
 	} else if ((ctlx->pair != NULL) || (ctly->pair != NULL)) {
 		dev_err(ctl_mgr->dev->dev, "CTLs already paired\n");
@@ -570,8 +570,8 @@ int mdp5_ctl_pair(struct mdp5_ctl *ctlx, struct mdp5_ctl *ctly, bool enable)
 	ctlx->pair = ctly;
 	ctly->pair = ctlx;
 
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_SPARE_0(0),
-		MDP5_MDP_SPARE_0_SPLIT_DPL_SINGLE_FLUSH_EN);
+	mdp5_write(mdp5_kms, REG_MDP5_SPARE_0,
+		   MDP5_SPARE_0_SPLIT_DPL_SINGLE_FLUSH_EN);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
index 1d95f9fd9dc7..fe0c22230883 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
@@ -322,18 +322,18 @@ int mdp5_encoder_set_split_display(struct drm_encoder *encoder,
 	 * to use the master's enable signal for the slave encoder.
 	 */
 	if (intf_num == 1)
-		data |= MDP5_MDP_SPLIT_DPL_LOWER_INTF2_TG_SYNC;
+		data |= MDP5_SPLIT_DPL_LOWER_INTF2_TG_SYNC;
 	else if (intf_num == 2)
-		data |= MDP5_MDP_SPLIT_DPL_LOWER_INTF1_TG_SYNC;
+		data |= MDP5_SPLIT_DPL_LOWER_INTF1_TG_SYNC;
 	else
 		return -EINVAL;
 
 	/* Make sure clocks are on when connectors calling this function. */
 	mdp5_enable(mdp5_kms);
 	/* Dumb Panel, Sync mode */
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_SPLIT_DPL_UPPER(0), 0);
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_SPLIT_DPL_LOWER(0), data);
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_SPLIT_DPL_EN(0), 1);
+	mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_UPPER, 0);
+	mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_LOWER, data);
+	mdp5_write(mdp5_kms, REG_MDP5_SPLIT_DPL_EN, 1);
 
 	mdp5_ctl_pair(mdp5_encoder->ctl, mdp5_slave_enc->ctl, true);
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c
index 73bc3e312fd4..d53e5510fd7c 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_irq.c
@@ -15,7 +15,6 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <linux/irqdomain.h>
 #include <linux/irq.h>
 
 #include "msm_drv.h"
@@ -24,9 +23,9 @@
 void mdp5_set_irqmask(struct mdp_kms *mdp_kms, uint32_t irqmask,
 		uint32_t old_irqmask)
 {
-	mdp5_write(to_mdp5_kms(mdp_kms), REG_MDP5_MDP_INTR_CLEAR(0),
-		irqmask ^ (irqmask & old_irqmask));
-	mdp5_write(to_mdp5_kms(mdp_kms), REG_MDP5_MDP_INTR_EN(0), irqmask);
+	mdp5_write(to_mdp5_kms(mdp_kms), REG_MDP5_INTR_CLEAR,
+		   irqmask ^ (irqmask & old_irqmask));
+	mdp5_write(to_mdp5_kms(mdp_kms), REG_MDP5_INTR_EN, irqmask);
 }
 
 static void mdp5_irq_error_handler(struct mdp_irq *irq, uint32_t irqstatus)
@@ -38,8 +37,8 @@ void mdp5_irq_preinstall(struct msm_kms *kms)
 {
 	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
 	mdp5_enable(mdp5_kms);
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_INTR_CLEAR(0), 0xffffffff);
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_INTR_EN(0), 0x00000000);
+	mdp5_write(mdp5_kms, REG_MDP5_INTR_CLEAR, 0xffffffff);
+	mdp5_write(mdp5_kms, REG_MDP5_INTR_EN, 0x00000000);
 	mdp5_disable(mdp5_kms);
 }
 
@@ -55,7 +54,9 @@ int mdp5_irq_postinstall(struct msm_kms *kms)
 			MDP5_IRQ_INTF2_UNDER_RUN |
 			MDP5_IRQ_INTF3_UNDER_RUN;
 
+	mdp5_enable(mdp5_kms);
 	mdp_irq_register(mdp_kms, error_handler);
+	mdp5_disable(mdp5_kms);
 
 	return 0;
 }
@@ -64,21 +65,22 @@ void mdp5_irq_uninstall(struct msm_kms *kms)
 {
 	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
 	mdp5_enable(mdp5_kms);
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_INTR_EN(0), 0x00000000);
+	mdp5_write(mdp5_kms, REG_MDP5_INTR_EN, 0x00000000);
 	mdp5_disable(mdp5_kms);
 }
 
-static void mdp5_irq_mdp(struct mdp_kms *mdp_kms)
+irqreturn_t mdp5_irq(struct msm_kms *kms)
 {
+	struct mdp_kms *mdp_kms = to_mdp_kms(kms);
 	struct mdp5_kms *mdp5_kms = to_mdp5_kms(mdp_kms);
 	struct drm_device *dev = mdp5_kms->dev;
 	struct msm_drm_private *priv = dev->dev_private;
 	unsigned int id;
 	uint32_t status, enable;
 
-	enable = mdp5_read(mdp5_kms, REG_MDP5_MDP_INTR_EN(0));
-	status = mdp5_read(mdp5_kms, REG_MDP5_MDP_INTR_STATUS(0)) & enable;
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_INTR_CLEAR(0), status);
+	enable = mdp5_read(mdp5_kms, REG_MDP5_INTR_EN);
+	status = mdp5_read(mdp5_kms, REG_MDP5_INTR_STATUS) & enable;
+	mdp5_write(mdp5_kms, REG_MDP5_INTR_CLEAR, status);
 
 	VERB("status=%08x", status);
 
@@ -87,29 +89,6 @@ static void mdp5_irq_mdp(struct mdp_kms *mdp_kms)
 	for (id = 0; id < priv->num_crtcs; id++)
 		if (status & mdp5_crtc_vblank(priv->crtcs[id]))
 			drm_handle_vblank(dev, id);
-}
-
-irqreturn_t mdp5_irq(struct msm_kms *kms)
-{
-	struct mdp_kms *mdp_kms = to_mdp_kms(kms);
-	struct mdp5_kms *mdp5_kms = to_mdp5_kms(mdp_kms);
-	uint32_t intr;
-
-	intr = mdp5_read(mdp5_kms, REG_MDSS_HW_INTR_STATUS);
-
-	VERB("intr=%08x", intr);
-
-	if (intr & MDSS_HW_INTR_STATUS_INTR_MDP) {
-		mdp5_irq_mdp(mdp_kms);
-		intr &= ~MDSS_HW_INTR_STATUS_INTR_MDP;
-	}
-
-	while (intr) {
-		irq_hw_number_t hwirq = fls(intr) - 1;
-		generic_handle_irq(irq_find_mapping(
-				mdp5_kms->irqcontroller.domain, hwirq));
-		intr &= ~(1 << hwirq);
-	}
 
 	return IRQ_HANDLED;
 }
@@ -135,81 +114,3 @@ void mdp5_disable_vblank(struct msm_kms *kms, struct drm_crtc *crtc)
 			mdp5_crtc_vblank(crtc), false);
 	mdp5_disable(mdp5_kms);
 }
-
-/*
- * interrupt-controller implementation, so sub-blocks (hdmi/eDP/dsi/etc)
- * can register to get their irq's delivered
- */
-
-#define VALID_IRQS  (MDSS_HW_INTR_STATUS_INTR_DSI0 | \
-		MDSS_HW_INTR_STATUS_INTR_DSI1 | \
-		MDSS_HW_INTR_STATUS_INTR_HDMI | \
-		MDSS_HW_INTR_STATUS_INTR_EDP)
-
-static void mdp5_hw_mask_irq(struct irq_data *irqd)
-{
-	struct mdp5_kms *mdp5_kms = irq_data_get_irq_chip_data(irqd);
-	smp_mb__before_atomic();
-	clear_bit(irqd->hwirq, &mdp5_kms->irqcontroller.enabled_mask);
-	smp_mb__after_atomic();
-}
-
-static void mdp5_hw_unmask_irq(struct irq_data *irqd)
-{
-	struct mdp5_kms *mdp5_kms = irq_data_get_irq_chip_data(irqd);
-	smp_mb__before_atomic();
-	set_bit(irqd->hwirq, &mdp5_kms->irqcontroller.enabled_mask);
-	smp_mb__after_atomic();
-}
-
-static struct irq_chip mdp5_hw_irq_chip = {
-	.name		= "mdp5",
-	.irq_mask	= mdp5_hw_mask_irq,
-	.irq_unmask	= mdp5_hw_unmask_irq,
-};
-
-static int mdp5_hw_irqdomain_map(struct irq_domain *d,
-		unsigned int irq, irq_hw_number_t hwirq)
-{
-	struct mdp5_kms *mdp5_kms = d->host_data;
-
-	if (!(VALID_IRQS & (1 << hwirq)))
-		return -EPERM;
-
-	irq_set_chip_and_handler(irq, &mdp5_hw_irq_chip, handle_level_irq);
-	irq_set_chip_data(irq, mdp5_kms);
-
-	return 0;
-}
-
-static struct irq_domain_ops mdp5_hw_irqdomain_ops = {
-	.map = mdp5_hw_irqdomain_map,
-	.xlate = irq_domain_xlate_onecell,
-};
-
-
-int mdp5_irq_domain_init(struct mdp5_kms *mdp5_kms)
-{
-	struct device *dev = mdp5_kms->dev->dev;
-	struct irq_domain *d;
-
-	d = irq_domain_add_linear(dev->of_node, 32,
-			&mdp5_hw_irqdomain_ops, mdp5_kms);
-	if (!d) {
-		dev_err(dev, "mdp5 irq domain add failed\n");
-		return -ENXIO;
-	}
-
-	mdp5_kms->irqcontroller.enabled_mask = 0;
-	mdp5_kms->irqcontroller.domain = d;
-
-	return 0;
-}
-
-void mdp5_irq_domain_fini(struct mdp5_kms *mdp5_kms)
-{
-	if (mdp5_kms->irqcontroller.domain) {
-		irq_domain_remove(mdp5_kms->irqcontroller.domain);
-		mdp5_kms->irqcontroller.domain = NULL;
-	}
-}
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
index f0c285b1c027..ed7143d35b25 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
@@ -16,6 +16,7 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/of_irq.h>
 
 #include "msm_drv.h"
 #include "msm_mmu.h"
@@ -28,10 +29,11 @@ static const char *iommu_ports[] = {
 static int mdp5_hw_init(struct msm_kms *kms)
 {
 	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
-	struct drm_device *dev = mdp5_kms->dev;
+	struct platform_device *pdev = mdp5_kms->pdev;
 	unsigned long flags;
 
-	pm_runtime_get_sync(dev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+	mdp5_enable(mdp5_kms);
 
 	/* Magic unknown register writes:
 	 *
@@ -58,12 +60,13 @@ static int mdp5_hw_init(struct msm_kms *kms)
 	 */
 
 	spin_lock_irqsave(&mdp5_kms->resource_lock, flags);
-	mdp5_write(mdp5_kms, REG_MDP5_MDP_DISP_INTF_SEL(0), 0);
+	mdp5_write(mdp5_kms, REG_MDP5_DISP_INTF_SEL, 0);
 	spin_unlock_irqrestore(&mdp5_kms->resource_lock, flags);
 
 	mdp5_ctlm_hw_reset(mdp5_kms->ctlm);
 
-	pm_runtime_put_sync(dev->dev);
+	mdp5_disable(mdp5_kms);
+	pm_runtime_put_sync(&pdev->dev);
 
 	return 0;
 }
@@ -111,26 +114,15 @@ static int mdp5_set_split_display(struct msm_kms *kms,
 		return mdp5_encoder_set_split_display(encoder, slave_encoder);
 }
 
-static void mdp5_destroy(struct msm_kms *kms)
+static void mdp5_kms_destroy(struct msm_kms *kms)
 {
 	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
 	struct msm_mmu *mmu = mdp5_kms->mmu;
 
-	mdp5_irq_domain_fini(mdp5_kms);
-
 	if (mmu) {
 		mmu->funcs->detach(mmu, iommu_ports, ARRAY_SIZE(iommu_ports));
 		mmu->funcs->destroy(mmu);
 	}
-
-	if (mdp5_kms->ctlm)
-		mdp5_ctlm_destroy(mdp5_kms->ctlm);
-	if (mdp5_kms->smp)
-		mdp5_smp_destroy(mdp5_kms->smp);
-	if (mdp5_kms->cfg)
-		mdp5_cfg_destroy(mdp5_kms->cfg);
-
-	kfree(mdp5_kms);
 }
 
 static const struct mdp_kms_funcs kms_funcs = {
@@ -148,7 +140,7 @@ static const struct mdp_kms_funcs kms_funcs = {
 		.get_format      = mdp_get_format,
 		.round_pixclk    = mdp5_round_pixclk,
 		.set_split_display = mdp5_set_split_display,
-		.destroy         = mdp5_destroy,
+		.destroy         = mdp5_kms_destroy,
 	},
 	.set_irqmask         = mdp5_set_irqmask,
 };
@@ -345,13 +337,6 @@ static int modeset_init(struct mdp5_kms *mdp5_kms)
 
 	hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg);
 
-	/* register our interrupt-controller for hdmi/eDP/dsi/etc
-	 * to use for irqs routed through mdp:
-	 */
-	ret = mdp5_irq_domain_init(mdp5_kms);
-	if (ret)
-		goto fail;
-
 	/* construct CRTCs and their private planes: */
 	for (i = 0; i < hw_cfg->pipe_rgb.count; i++) {
 		struct drm_plane *plane;
@@ -419,17 +404,17 @@ fail:
 	return ret;
 }
 
-static void read_hw_revision(struct mdp5_kms *mdp5_kms,
-		uint32_t *major, uint32_t *minor)
+static void read_mdp_hw_revision(struct mdp5_kms *mdp5_kms,
+				 u32 *major, u32 *minor)
 {
-	uint32_t version;
+	u32 version;
 
 	mdp5_enable(mdp5_kms);
-	version = mdp5_read(mdp5_kms, REG_MDSS_HW_VERSION);
+	version = mdp5_read(mdp5_kms, REG_MDP5_HW_VERSION);
 	mdp5_disable(mdp5_kms);
 
-	*major = FIELD(version, MDSS_HW_VERSION_MAJOR);
-	*minor = FIELD(version, MDSS_HW_VERSION_MINOR);
+	*major = FIELD(version, MDP5_HW_VERSION_MAJOR);
+	*minor = FIELD(version, MDP5_HW_VERSION_MINOR);
 
 	DBG("MDP5 version v%d.%d", *major, *minor);
 }
@@ -574,51 +559,146 @@ static u32 mdp5_get_vblank_counter(struct drm_device *dev, unsigned int pipe)
 
 struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 {
-	struct platform_device *pdev = dev->platformdev;
-	struct mdp5_cfg *config;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct platform_device *pdev;
 	struct mdp5_kms *mdp5_kms;
-	struct msm_kms *kms = NULL;
+	struct mdp5_cfg *config;
+	struct msm_kms *kms;
 	struct msm_mmu *mmu;
-	uint32_t major, minor;
-	int i, ret;
+	int irq, i, ret;
 
-	mdp5_kms = kzalloc(sizeof(*mdp5_kms), GFP_KERNEL);
-	if (!mdp5_kms) {
-		dev_err(dev->dev, "failed to allocate kms\n");
-		ret = -ENOMEM;
+	/* priv->kms would have been populated by the MDP5 driver */
+	kms = priv->kms;
+	if (!kms)
+		return NULL;
+
+	mdp5_kms = to_mdp5_kms(to_mdp_kms(kms));
+
+	mdp_kms_init(&mdp5_kms->base, &kms_funcs);
+
+	pdev = mdp5_kms->pdev;
+
+	irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+	if (irq < 0) {
+		ret = irq;
+		dev_err(&pdev->dev, "failed to get irq: %d\n", ret);
 		goto fail;
 	}
 
-	spin_lock_init(&mdp5_kms->resource_lock);
+	kms->irq = irq;
 
-	mdp_kms_init(&mdp5_kms->base, &kms_funcs);
+	config = mdp5_cfg_get_config(mdp5_kms->cfg);
 
-	kms = &mdp5_kms->base.base;
+	/* make sure things are off before attaching iommu (bootloader could
+	 * have left things on, in which case we'll start getting faults if
+	 * we don't disable):
+	 */
+	mdp5_enable(mdp5_kms);
+	for (i = 0; i < MDP5_INTF_NUM_MAX; i++) {
+		if (mdp5_cfg_intf_is_virtual(config->hw->intf.connect[i]) ||
+		    !config->hw->intf.base[i])
+			continue;
+		mdp5_write(mdp5_kms, REG_MDP5_INTF_TIMING_ENGINE_EN(i), 0);
 
-	mdp5_kms->dev = dev;
+		mdp5_write(mdp5_kms, REG_MDP5_INTF_FRAME_LINE_COUNT_EN(i), 0x3);
+	}
+	mdp5_disable(mdp5_kms);
+	mdelay(16);
 
-	/* mdp5_kms->mmio actually represents the MDSS base address */
-	mdp5_kms->mmio = msm_ioremap(pdev, "mdp_phys", "MDP5");
-	if (IS_ERR(mdp5_kms->mmio)) {
-		ret = PTR_ERR(mdp5_kms->mmio);
+	if (config->platform.iommu) {
+		mmu = msm_iommu_new(&pdev->dev, config->platform.iommu);
+		if (IS_ERR(mmu)) {
+			ret = PTR_ERR(mmu);
+			dev_err(&pdev->dev, "failed to init iommu: %d\n", ret);
+			iommu_domain_free(config->platform.iommu);
+			goto fail;
+		}
+
+		ret = mmu->funcs->attach(mmu, iommu_ports,
+				ARRAY_SIZE(iommu_ports));
+		if (ret) {
+			dev_err(&pdev->dev, "failed to attach iommu: %d\n",
+				ret);
+			mmu->funcs->destroy(mmu);
+			goto fail;
+		}
+	} else {
+		dev_info(&pdev->dev,
+			 "no iommu, fallback to phys contig buffers for scanout\n");
+		mmu = NULL;
+	}
+	mdp5_kms->mmu = mmu;
+
+	mdp5_kms->id = msm_register_mmu(dev, mmu);
+	if (mdp5_kms->id < 0) {
+		ret = mdp5_kms->id;
+		dev_err(&pdev->dev, "failed to register mdp5 iommu: %d\n", ret);
 		goto fail;
 	}
 
-	mdp5_kms->vbif = msm_ioremap(pdev, "vbif_phys", "VBIF");
-	if (IS_ERR(mdp5_kms->vbif)) {
-		ret = PTR_ERR(mdp5_kms->vbif);
+	ret = modeset_init(mdp5_kms);
+	if (ret) {
+		dev_err(&pdev->dev, "modeset_init failed: %d\n", ret);
 		goto fail;
 	}
 
-	mdp5_kms->vdd = devm_regulator_get(&pdev->dev, "vdd");
-	if (IS_ERR(mdp5_kms->vdd)) {
-		ret = PTR_ERR(mdp5_kms->vdd);
+	dev->mode_config.min_width = 0;
+	dev->mode_config.min_height = 0;
+	dev->mode_config.max_width = config->hw->lm.max_width;
+	dev->mode_config.max_height = config->hw->lm.max_height;
+
+	dev->driver->get_vblank_timestamp = mdp5_get_vblank_timestamp;
+	dev->driver->get_scanout_position = mdp5_get_scanoutpos;
+	dev->driver->get_vblank_counter = mdp5_get_vblank_counter;
+	dev->max_vblank_count = 0xffffffff;
+	dev->vblank_disable_immediate = true;
+
+	return kms;
+fail:
+	if (kms)
+		mdp5_kms_destroy(kms);
+	return ERR_PTR(ret);
+}
+
+static void mdp5_destroy(struct platform_device *pdev)
+{
+	struct mdp5_kms *mdp5_kms = platform_get_drvdata(pdev);
+
+	if (mdp5_kms->ctlm)
+		mdp5_ctlm_destroy(mdp5_kms->ctlm);
+	if (mdp5_kms->smp)
+		mdp5_smp_destroy(mdp5_kms->smp);
+	if (mdp5_kms->cfg)
+		mdp5_cfg_destroy(mdp5_kms->cfg);
+
+	if (mdp5_kms->rpm_enabled)
+		pm_runtime_disable(&pdev->dev);
+}
+
+static int mdp5_init(struct platform_device *pdev, struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct mdp5_kms *mdp5_kms;
+	struct mdp5_cfg *config;
+	u32 major, minor;
+	int ret;
+
+	mdp5_kms = devm_kzalloc(&pdev->dev, sizeof(*mdp5_kms), GFP_KERNEL);
+	if (!mdp5_kms) {
+		ret = -ENOMEM;
 		goto fail;
 	}
 
-	ret = regulator_enable(mdp5_kms->vdd);
-	if (ret) {
-		dev_err(dev->dev, "failed to enable regulator vdd: %d\n", ret);
+	platform_set_drvdata(pdev, mdp5_kms);
+
+	spin_lock_init(&mdp5_kms->resource_lock);
+
+	mdp5_kms->dev = dev;
+	mdp5_kms->pdev = pdev;
+
+	mdp5_kms->mmio = msm_ioremap(pdev, "mdp_phys", "MDP5");
+	if (IS_ERR(mdp5_kms->mmio)) {
+		ret = PTR_ERR(mdp5_kms->mmio);
 		goto fail;
 	}
 
@@ -629,9 +709,6 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 	ret = get_clk(pdev, &mdp5_kms->ahb_clk, "iface_clk", true);
 	if (ret)
 		goto fail;
-	ret = get_clk(pdev, &mdp5_kms->src_clk, "core_clk_src", true);
-	if (ret)
-		goto fail;
 	ret = get_clk(pdev, &mdp5_kms->core_clk, "core_clk", true);
 	if (ret)
 		goto fail;
@@ -646,9 +723,12 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 	 * rate first, then figure out hw revision, and then set a
 	 * more optimal rate:
 	 */
-	clk_set_rate(mdp5_kms->src_clk, 200000000);
+	clk_set_rate(mdp5_kms->core_clk, 200000000);
+
+	pm_runtime_enable(&pdev->dev);
+	mdp5_kms->rpm_enabled = true;
 
-	read_hw_revision(mdp5_kms, &major, &minor);
+	read_mdp_hw_revision(mdp5_kms, &major, &minor);
 
 	mdp5_kms->cfg = mdp5_cfg_init(mdp5_kms, major, minor);
 	if (IS_ERR(mdp5_kms->cfg)) {
@@ -661,7 +741,7 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 	mdp5_kms->caps = config->hw->mdp.caps;
 
 	/* TODO: compute core clock rate at runtime */
-	clk_set_rate(mdp5_kms->src_clk, config->hw->max_clk);
+	clk_set_rate(mdp5_kms->core_clk, config->hw->max_clk);
 
 	/*
 	 * Some chipsets have a Shared Memory Pool (SMP), while others
@@ -684,73 +764,76 @@ struct msm_kms *mdp5_kms_init(struct drm_device *dev)
 		goto fail;
 	}
 
-	/* make sure things are off before attaching iommu (bootloader could
-	 * have left things on, in which case we'll start getting faults if
-	 * we don't disable):
-	 */
-	mdp5_enable(mdp5_kms);
-	for (i = 0; i < MDP5_INTF_NUM_MAX; i++) {
-		if (mdp5_cfg_intf_is_virtual(config->hw->intf.connect[i]) ||
-				!config->hw->intf.base[i])
-			continue;
-		mdp5_write(mdp5_kms, REG_MDP5_INTF_TIMING_ENGINE_EN(i), 0);
+	/* set uninit-ed kms */
+	priv->kms = &mdp5_kms->base.base;
 
-		mdp5_write(mdp5_kms, REG_MDP5_INTF_FRAME_LINE_COUNT_EN(i), 0x3);
-	}
-	mdp5_disable(mdp5_kms);
-	mdelay(16);
+	return 0;
+fail:
+	mdp5_destroy(pdev);
+	return ret;
+}
 
-	if (config->platform.iommu) {
-		mmu = msm_iommu_new(&pdev->dev, config->platform.iommu);
-		if (IS_ERR(mmu)) {
-			ret = PTR_ERR(mmu);
-			dev_err(dev->dev, "failed to init iommu: %d\n", ret);
-			iommu_domain_free(config->platform.iommu);
-			goto fail;
-		}
+static int mdp5_bind(struct device *dev, struct device *master, void *data)
+{
+	struct drm_device *ddev = dev_get_drvdata(master);
+	struct platform_device *pdev = to_platform_device(dev);
 
-		ret = mmu->funcs->attach(mmu, iommu_ports,
-				ARRAY_SIZE(iommu_ports));
-		if (ret) {
-			dev_err(dev->dev, "failed to attach iommu: %d\n", ret);
-			mmu->funcs->destroy(mmu);
-			goto fail;
-		}
-	} else {
-		dev_info(dev->dev, "no iommu, fallback to phys "
-				"contig buffers for scanout\n");
-		mmu = NULL;
-	}
-	mdp5_kms->mmu = mmu;
+	DBG("");
 
-	mdp5_kms->id = msm_register_mmu(dev, mmu);
-	if (mdp5_kms->id < 0) {
-		ret = mdp5_kms->id;
-		dev_err(dev->dev, "failed to register mdp5 iommu: %d\n", ret);
-		goto fail;
-	}
+	return mdp5_init(pdev, ddev);
+}
 
-	ret = modeset_init(mdp5_kms);
-	if (ret) {
-		dev_err(dev->dev, "modeset_init failed: %d\n", ret);
-		goto fail;
-	}
+static void mdp5_unbind(struct device *dev, struct device *master,
+			void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
 
-	dev->mode_config.min_width = 0;
-	dev->mode_config.min_height = 0;
-	dev->mode_config.max_width = config->hw->lm.max_width;
-	dev->mode_config.max_height = config->hw->lm.max_height;
+	mdp5_destroy(pdev);
+}
 
-	dev->driver->get_vblank_timestamp = mdp5_get_vblank_timestamp;
-	dev->driver->get_scanout_position = mdp5_get_scanoutpos;
-	dev->driver->get_vblank_counter = mdp5_get_vblank_counter;
-	dev->max_vblank_count = 0xffffffff;
-	dev->vblank_disable_immediate = true;
+static const struct component_ops mdp5_ops = {
+	.bind   = mdp5_bind,
+	.unbind = mdp5_unbind,
+};
 
-	return kms;
+static int mdp5_dev_probe(struct platform_device *pdev)
+{
+	DBG("");
+	return component_add(&pdev->dev, &mdp5_ops);
+}
 
-fail:
-	if (kms)
-		mdp5_destroy(kms);
-	return ERR_PTR(ret);
+static int mdp5_dev_remove(struct platform_device *pdev)
+{
+	DBG("");
+	component_del(&pdev->dev, &mdp5_ops);
+	return 0;
+}
+
+static const struct of_device_id mdp5_dt_match[] = {
+	{ .compatible = "qcom,mdp5", },
+	/* to support downstream DT files */
+	{ .compatible = "qcom,mdss_mdp", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, mdp5_dt_match);
+
+static struct platform_driver mdp5_driver = {
+	.probe = mdp5_dev_probe,
+	.remove = mdp5_dev_remove,
+	.driver = {
+		.name = "msm_mdp",
+		.of_match_table = mdp5_dt_match,
+	},
+};
+
+void __init msm_mdp_register(void)
+{
+	DBG("");
+	platform_driver_register(&mdp5_driver);
+}
+
+void __exit msm_mdp_unregister(void)
+{
+	DBG("");
+	platform_driver_unregister(&mdp5_driver);
 }
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
index 9a25898239d3..03738927be10 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
@@ -31,6 +31,8 @@ struct mdp5_kms {
 
 	struct drm_device *dev;
 
+	struct platform_device *pdev;
+
 	struct mdp5_cfg_handler *cfg;
 	uint32_t caps;	/* MDP capabilities (MDP_CAP_XXX bits) */
 
@@ -43,29 +45,23 @@ struct mdp5_kms {
 	struct mdp5_ctl_manager *ctlm;
 
 	/* io/register spaces: */
-	void __iomem *mmio, *vbif;
-
-	struct regulator *vdd;
+	void __iomem *mmio;
 
 	struct clk *axi_clk;
 	struct clk *ahb_clk;
-	struct clk *src_clk;
 	struct clk *core_clk;
 	struct clk *lut_clk;
 	struct clk *vsync_clk;
 
 	/*
 	 * lock to protect access to global resources: ie., following register:
-	 *	- REG_MDP5_MDP_DISP_INTF_SEL
+	 *	- REG_MDP5_DISP_INTF_SEL
 	 */
 	spinlock_t resource_lock;
 
-	struct mdp_irq error_handler;
+	bool rpm_enabled;
 
-	struct {
-		volatile unsigned long enabled_mask;
-		struct irq_domain *domain;
-	} irqcontroller;
+	struct mdp_irq error_handler;
 };
 #define to_mdp5_kms(x) container_of(x, struct mdp5_kms, base)
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c
new file mode 100644
index 000000000000..d444a6901fff
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mdss.c
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/irqdomain.h>
+#include <linux/irq.h>
+
+#include "msm_drv.h"
+#include "mdp5_kms.h"
+
+/*
+ * If needed, this can become more specific: something like struct mdp5_mdss,
+ * which contains a 'struct msm_mdss base' member.
+ */
+struct msm_mdss {
+	struct drm_device *dev;
+
+	void __iomem *mmio, *vbif;
+
+	struct regulator *vdd;
+
+	struct {
+		volatile unsigned long enabled_mask;
+		struct irq_domain *domain;
+	} irqcontroller;
+};
+
+static inline void mdss_write(struct msm_mdss *mdss, u32 reg, u32 data)
+{
+	msm_writel(data, mdss->mmio + reg);
+}
+
+static inline u32 mdss_read(struct msm_mdss *mdss, u32 reg)
+{
+	return msm_readl(mdss->mmio + reg);
+}
+
+static irqreturn_t mdss_irq(int irq, void *arg)
+{
+	struct msm_mdss *mdss = arg;
+	u32 intr;
+
+	intr = mdss_read(mdss, REG_MDSS_HW_INTR_STATUS);
+
+	VERB("intr=%08x", intr);
+
+	while (intr) {
+		irq_hw_number_t hwirq = fls(intr) - 1;
+
+		generic_handle_irq(irq_find_mapping(
+				mdss->irqcontroller.domain, hwirq));
+		intr &= ~(1 << hwirq);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * interrupt-controller implementation, so sub-blocks (MDP/HDMI/eDP/DSI/etc)
+ * can register to get their irq's delivered
+ */
+
+#define VALID_IRQS  (MDSS_HW_INTR_STATUS_INTR_MDP | \
+		MDSS_HW_INTR_STATUS_INTR_DSI0 | \
+		MDSS_HW_INTR_STATUS_INTR_DSI1 | \
+		MDSS_HW_INTR_STATUS_INTR_HDMI | \
+		MDSS_HW_INTR_STATUS_INTR_EDP)
+
+static void mdss_hw_mask_irq(struct irq_data *irqd)
+{
+	struct msm_mdss *mdss = irq_data_get_irq_chip_data(irqd);
+
+	smp_mb__before_atomic();
+	clear_bit(irqd->hwirq, &mdss->irqcontroller.enabled_mask);
+	smp_mb__after_atomic();
+}
+
+static void mdss_hw_unmask_irq(struct irq_data *irqd)
+{
+	struct msm_mdss *mdss = irq_data_get_irq_chip_data(irqd);
+
+	smp_mb__before_atomic();
+	set_bit(irqd->hwirq, &mdss->irqcontroller.enabled_mask);
+	smp_mb__after_atomic();
+}
+
+static struct irq_chip mdss_hw_irq_chip = {
+	.name		= "mdss",
+	.irq_mask	= mdss_hw_mask_irq,
+	.irq_unmask	= mdss_hw_unmask_irq,
+};
+
+static int mdss_hw_irqdomain_map(struct irq_domain *d, unsigned int irq,
+				 irq_hw_number_t hwirq)
+{
+	struct msm_mdss *mdss = d->host_data;
+
+	if (!(VALID_IRQS & (1 << hwirq)))
+		return -EPERM;
+
+	irq_set_chip_and_handler(irq, &mdss_hw_irq_chip, handle_level_irq);
+	irq_set_chip_data(irq, mdss);
+
+	return 0;
+}
+
+static struct irq_domain_ops mdss_hw_irqdomain_ops = {
+	.map = mdss_hw_irqdomain_map,
+	.xlate = irq_domain_xlate_onecell,
+};
+
+
+static int mdss_irq_domain_init(struct msm_mdss *mdss)
+{
+	struct device *dev = mdss->dev->dev;
+	struct irq_domain *d;
+
+	d = irq_domain_add_linear(dev->of_node, 32, &mdss_hw_irqdomain_ops,
+				  mdss);
+	if (!d) {
+		dev_err(dev, "mdss irq domain add failed\n");
+		return -ENXIO;
+	}
+
+	mdss->irqcontroller.enabled_mask = 0;
+	mdss->irqcontroller.domain = d;
+
+	return 0;
+}
+
+void msm_mdss_destroy(struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_mdss *mdss = priv->mdss;
+
+	if (!mdss)
+		return;
+
+	irq_domain_remove(mdss->irqcontroller.domain);
+	mdss->irqcontroller.domain = NULL;
+
+	regulator_disable(mdss->vdd);
+
+	pm_runtime_put_sync(dev->dev);
+
+	pm_runtime_disable(dev->dev);
+}
+
+int msm_mdss_init(struct drm_device *dev)
+{
+	struct platform_device *pdev = dev->platformdev;
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_mdss *mdss;
+	int ret;
+
+	DBG("");
+
+	if (!of_device_is_compatible(dev->dev->of_node, "qcom,mdss"))
+		return 0;
+
+	mdss = devm_kzalloc(dev->dev, sizeof(*mdss), GFP_KERNEL);
+	if (!mdss) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	mdss->dev = dev;
+
+	mdss->mmio = msm_ioremap(pdev, "mdss_phys", "MDSS");
+	if (IS_ERR(mdss->mmio)) {
+		ret = PTR_ERR(mdss->mmio);
+		goto fail;
+	}
+
+	mdss->vbif = msm_ioremap(pdev, "vbif_phys", "VBIF");
+	if (IS_ERR(mdss->vbif)) {
+		ret = PTR_ERR(mdss->vbif);
+		goto fail;
+	}
+
+	/* Regulator to enable GDSCs in downstream kernels */
+	mdss->vdd = devm_regulator_get(dev->dev, "vdd");
+	if (IS_ERR(mdss->vdd)) {
+		ret = PTR_ERR(mdss->vdd);
+		goto fail;
+	}
+
+	ret = regulator_enable(mdss->vdd);
+	if (ret) {
+		dev_err(dev->dev, "failed to enable regulator vdd: %d\n",
+			ret);
+		goto fail;
+	}
+
+	ret = devm_request_irq(dev->dev, platform_get_irq(pdev, 0),
+			       mdss_irq, 0, "mdss_isr", mdss);
+	if (ret) {
+		dev_err(dev->dev, "failed to init irq: %d\n", ret);
+		goto fail_irq;
+	}
+
+	ret = mdss_irq_domain_init(mdss);
+	if (ret) {
+		dev_err(dev->dev, "failed to init sub-block irqs: %d\n", ret);
+		goto fail_irq;
+	}
+
+	priv->mdss = mdss;
+
+	pm_runtime_enable(dev->dev);
+
+	/*
+	 * TODO: This is needed as the MDSS GDSC is only tied to MDSS's power
+	 * domain. Remove this once runtime PM is adapted for all the devices.
+	 */
+	pm_runtime_get_sync(dev->dev);
+
+	return 0;
+fail_irq:
+	regulator_disable(mdss->vdd);
+fail:
+	return ret;
+}
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_smp.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_smp.c
index 6f425c25d9fe..27d7b55b52c9 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_smp.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_smp.c
@@ -42,7 +42,7 @@
  *
  *     configured:
  *     The block is allocated to some client, and assigned to that
- *     client in MDP5_MDP_SMP_ALLOC registers.
+ *     client in MDP5_SMP_ALLOC registers.
  *
  *     inuse:
  *     The block is being actively used by a client.
@@ -59,7 +59,7 @@
  *     mdp5_smp_commit.
  *
  *  2) mdp5_smp_configure():
- *     As hw is programmed, before FLUSH, MDP5_MDP_SMP_ALLOC registers
+ *     As hw is programmed, before FLUSH, MDP5_SMP_ALLOC registers
  *     are configured for the union(pending, inuse)
  *     Current pending is copied to configured.
  *     It is assumed that mdp5_smp_request and mdp5_smp_configure not run
@@ -311,25 +311,25 @@ static void update_smp_state(struct mdp5_smp *smp,
 		int idx = blk / 3;
 		int fld = blk % 3;
 
-		val = mdp5_read(mdp5_kms, REG_MDP5_MDP_SMP_ALLOC_W_REG(0, idx));
+		val = mdp5_read(mdp5_kms, REG_MDP5_SMP_ALLOC_W_REG(idx));
 
 		switch (fld) {
 		case 0:
-			val &= ~MDP5_MDP_SMP_ALLOC_W_REG_CLIENT0__MASK;
-			val |= MDP5_MDP_SMP_ALLOC_W_REG_CLIENT0(cid);
+			val &= ~MDP5_SMP_ALLOC_W_REG_CLIENT0__MASK;
+			val |= MDP5_SMP_ALLOC_W_REG_CLIENT0(cid);
 			break;
 		case 1:
-			val &= ~MDP5_MDP_SMP_ALLOC_W_REG_CLIENT1__MASK;
-			val |= MDP5_MDP_SMP_ALLOC_W_REG_CLIENT1(cid);
+			val &= ~MDP5_SMP_ALLOC_W_REG_CLIENT1__MASK;
+			val |= MDP5_SMP_ALLOC_W_REG_CLIENT1(cid);
 			break;
 		case 2:
-			val &= ~MDP5_MDP_SMP_ALLOC_W_REG_CLIENT2__MASK;
-			val |= MDP5_MDP_SMP_ALLOC_W_REG_CLIENT2(cid);
+			val &= ~MDP5_SMP_ALLOC_W_REG_CLIENT2__MASK;
+			val |= MDP5_SMP_ALLOC_W_REG_CLIENT2(cid);
 			break;
 		}
 
-		mdp5_write(mdp5_kms, REG_MDP5_MDP_SMP_ALLOC_W_REG(0, idx), val);
-		mdp5_write(mdp5_kms, REG_MDP5_MDP_SMP_ALLOC_R_REG(0, idx), val);
+		mdp5_write(mdp5_kms, REG_MDP5_SMP_ALLOC_W_REG(idx), val);
+		mdp5_write(mdp5_kms, REG_MDP5_SMP_ALLOC_R_REG(idx), val);
 	}
 }
 
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index a02dc2b27739..26f859ec24b3 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -21,6 +21,16 @@
 #include "msm_gpu.h"
 #include "msm_kms.h"
 
+
+/*
+ * MSM driver version:
+ * - 1.0.0 - initial interface
+ * - 1.1.0 - adds madvise, and support for submits with > 4 cmd buffers
+ */
+#define MSM_VERSION_MAJOR	1
+#define MSM_VERSION_MINOR	1
+#define MSM_VERSION_PATCHLEVEL	0
+
 static void msm_fb_output_poll_changed(struct drm_device *dev)
 {
 	struct msm_drm_private *priv = dev->dev_private;
@@ -195,6 +205,8 @@ static int msm_drm_uninit(struct device *dev)
 		kfree(vbl_ev);
 	}
 
+	msm_gem_shrinker_cleanup(ddev);
+
 	drm_kms_helper_poll_fini(ddev);
 
 	drm_dev_unregister(ddev);
@@ -215,10 +227,8 @@ static int msm_drm_uninit(struct device *dev)
 	flush_workqueue(priv->atomic_wq);
 	destroy_workqueue(priv->atomic_wq);
 
-	if (kms) {
-		pm_runtime_disable(dev);
+	if (kms)
 		kms->funcs->destroy(kms);
-	}
 
 	if (gpu) {
 		mutex_lock(&ddev->struct_mutex);
@@ -237,6 +247,8 @@ static int msm_drm_uninit(struct device *dev)
 
 	component_unbind_all(dev, ddev);
 
+	msm_mdss_destroy(ddev);
+
 	ddev->dev_private = NULL;
 	drm_dev_unref(ddev);
 
@@ -282,6 +294,7 @@ static int msm_init_vram(struct drm_device *dev)
 	if (node) {
 		struct resource r;
 		ret = of_address_to_resource(node, 0, &r);
+		of_node_put(node);
 		if (ret)
 			return ret;
 		size = r.end - r.start;
@@ -350,6 +363,14 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 	}
 
 	ddev->dev_private = priv;
+	priv->dev = ddev;
+
+	ret = msm_mdss_init(ddev);
+	if (ret) {
+		kfree(priv);
+		drm_dev_unref(ddev);
+		return ret;
+	}
 
 	priv->wq = alloc_ordered_workqueue("msm", 0);
 	priv->atomic_wq = alloc_ordered_workqueue("msm:atomic", 0);
@@ -365,6 +386,7 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 	/* Bind all our sub-components: */
 	ret = component_bind_all(dev, ddev);
 	if (ret) {
+		msm_mdss_destroy(ddev);
 		kfree(priv);
 		drm_dev_unref(ddev);
 		return ret;
@@ -374,9 +396,12 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 	if (ret)
 		goto fail;
 
+	msm_gem_shrinker_init(ddev);
+
 	switch (get_mdp_ver(pdev)) {
 	case 4:
 		kms = mdp4_kms_init(ddev);
+		priv->kms = kms;
 		break;
 	case 5:
 		kms = mdp5_kms_init(ddev);
@@ -398,10 +423,7 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 		goto fail;
 	}
 
-	priv->kms = kms;
-
 	if (kms) {
-		pm_runtime_enable(dev);
 		ret = kms->funcs->hw_init(kms);
 		if (ret) {
 			dev_err(dev, "kms hw init failed: %d\n", ret);
@@ -417,12 +439,14 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 		goto fail;
 	}
 
-	pm_runtime_get_sync(dev);
-	ret = drm_irq_install(ddev, platform_get_irq(pdev, 0));
-	pm_runtime_put_sync(dev);
-	if (ret < 0) {
-		dev_err(dev, "failed to install IRQ handler\n");
-		goto fail;
+	if (kms) {
+		pm_runtime_get_sync(dev);
+		ret = drm_irq_install(ddev, kms->irq);
+		pm_runtime_put_sync(dev);
+		if (ret < 0) {
+			dev_err(dev, "failed to install IRQ handler\n");
+			goto fail;
+		}
 	}
 
 	ret = drm_dev_register(ddev, 0);
@@ -682,6 +706,44 @@ static int msm_ioctl_wait_fence(struct drm_device *dev, void *data,
 	return msm_wait_fence(priv->gpu->fctx, args->fence, &timeout, true);
 }
 
+static int msm_ioctl_gem_madvise(struct drm_device *dev, void *data,
+		struct drm_file *file)
+{
+	struct drm_msm_gem_madvise *args = data;
+	struct drm_gem_object *obj;
+	int ret;
+
+	switch (args->madv) {
+	case MSM_MADV_DONTNEED:
+	case MSM_MADV_WILLNEED:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+
+	obj = drm_gem_object_lookup(file, args->handle);
+	if (!obj) {
+		ret = -ENOENT;
+		goto unlock;
+	}
+
+	ret = msm_gem_madvise(obj, args->madv);
+	if (ret >= 0) {
+		args->retained = ret;
+		ret = 0;
+	}
+
+	drm_gem_object_unreference(obj);
+
+unlock:
+	mutex_unlock(&dev->struct_mutex);
+	return ret;
+}
+
 static const struct drm_ioctl_desc msm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(MSM_GET_PARAM,    msm_ioctl_get_param,    DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(MSM_GEM_NEW,      msm_ioctl_gem_new,      DRM_AUTH|DRM_RENDER_ALLOW),
@@ -690,6 +752,7 @@ static const struct drm_ioctl_desc msm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(MSM_GEM_CPU_FINI, msm_ioctl_gem_cpu_fini, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(MSM_GEM_SUBMIT,   msm_ioctl_gem_submit,   DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(MSM_WAIT_FENCE,   msm_ioctl_wait_fence,   DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(MSM_GEM_MADVISE,  msm_ioctl_gem_madvise,  DRM_AUTH|DRM_RENDER_ALLOW),
 };
 
 static const struct vm_operations_struct vm_ops = {
@@ -755,8 +818,9 @@ static struct drm_driver msm_driver = {
 	.name               = "msm",
 	.desc               = "MSM Snapdragon DRM",
 	.date               = "20130625",
-	.major              = 1,
-	.minor              = 0,
+	.major              = MSM_VERSION_MAJOR,
+	.minor              = MSM_VERSION_MINOR,
+	.patchlevel         = MSM_VERSION_PATCHLEVEL,
 };
 
 #ifdef CONFIG_PM_SLEEP
@@ -796,22 +860,146 @@ static int compare_of(struct device *dev, void *data)
 	return dev->of_node == data;
 }
 
-static int add_components(struct device *dev, struct component_match **matchptr,
-		const char *name)
+/*
+ * Identify what components need to be added by parsing what remote-endpoints
+ * our MDP output ports are connected to. In the case of LVDS on MDP4, there
+ * is no external component that we need to add since LVDS is within MDP4
+ * itself.
+ */
+static int add_components_mdp(struct device *mdp_dev,
+			      struct component_match **matchptr)
+{
+	struct device_node *np = mdp_dev->of_node;
+	struct device_node *ep_node;
+	struct device *master_dev;
+
+	/*
+	 * on MDP4 based platforms, the MDP platform device is the component
+	 * master that adds other display interface components to itself.
+	 *
+	 * on MDP5 based platforms, the MDSS platform device is the component
+	 * master that adds MDP5 and other display interface components to
+	 * itself.
+	 */
+	if (of_device_is_compatible(np, "qcom,mdp4"))
+		master_dev = mdp_dev;
+	else
+		master_dev = mdp_dev->parent;
+
+	for_each_endpoint_of_node(np, ep_node) {
+		struct device_node *intf;
+		struct of_endpoint ep;
+		int ret;
+
+		ret = of_graph_parse_endpoint(ep_node, &ep);
+		if (ret) {
+			dev_err(mdp_dev, "unable to parse port endpoint\n");
+			of_node_put(ep_node);
+			return ret;
+		}
+
+		/*
+		 * The LCDC/LVDS port on MDP4 is a speacial case where the
+		 * remote-endpoint isn't a component that we need to add
+		 */
+		if (of_device_is_compatible(np, "qcom,mdp4") &&
+		    ep.port == 0) {
+			of_node_put(ep_node);
+			continue;
+		}
+
+		/*
+		 * It's okay if some of the ports don't have a remote endpoint
+		 * specified. It just means that the port isn't connected to
+		 * any external interface.
+		 */
+		intf = of_graph_get_remote_port_parent(ep_node);
+		if (!intf) {
+			of_node_put(ep_node);
+			continue;
+		}
+
+		component_match_add(master_dev, matchptr, compare_of, intf);
+
+		of_node_put(intf);
+		of_node_put(ep_node);
+	}
+
+	return 0;
+}
+
+static int compare_name_mdp(struct device *dev, void *data)
 {
-	struct device_node *np = dev->of_node;
-	unsigned i;
+	return (strstr(dev_name(dev), "mdp") != NULL);
+}
+
+static int add_display_components(struct device *dev,
+				  struct component_match **matchptr)
+{
+	struct device *mdp_dev;
+	int ret;
+
+	/*
+	 * MDP5 based devices don't have a flat hierarchy. There is a top level
+	 * parent: MDSS, and children: MDP5, DSI, HDMI, eDP etc. Populate the
+	 * children devices, find the MDP5 node, and then add the interfaces
+	 * to our components list.
+	 */
+	if (of_device_is_compatible(dev->of_node, "qcom,mdss")) {
+		ret = of_platform_populate(dev->of_node, NULL, NULL, dev);
+		if (ret) {
+			dev_err(dev, "failed to populate children devices\n");
+			return ret;
+		}
 
-	for (i = 0; ; i++) {
-		struct device_node *node;
+		mdp_dev = device_find_child(dev, NULL, compare_name_mdp);
+		if (!mdp_dev) {
+			dev_err(dev, "failed to find MDSS MDP node\n");
+			of_platform_depopulate(dev);
+			return -ENODEV;
+		}
 
-		node = of_parse_phandle(np, name, i);
-		if (!node)
-			break;
+		put_device(mdp_dev);
 
-		component_match_add(dev, matchptr, compare_of, node);
+		/* add the MDP component itself */
+		component_match_add(dev, matchptr, compare_of,
+				    mdp_dev->of_node);
+	} else {
+		/* MDP4 */
+		mdp_dev = dev;
 	}
 
+	ret = add_components_mdp(mdp_dev, matchptr);
+	if (ret)
+		of_platform_depopulate(dev);
+
+	return ret;
+}
+
+/*
+ * We don't know what's the best binding to link the gpu with the drm device.
+ * Fow now, we just hunt for all the possible gpus that we support, and add them
+ * as components.
+ */
+static const struct of_device_id msm_gpu_match[] = {
+	{ .compatible = "qcom,adreno-3xx" },
+	{ .compatible = "qcom,kgsl-3d0" },
+	{ },
+};
+
+static int add_gpu_components(struct device *dev,
+			      struct component_match **matchptr)
+{
+	struct device_node *np;
+
+	np = of_find_matching_node(NULL, msm_gpu_match);
+	if (!np)
+		return 0;
+
+	component_match_add(dev, matchptr, compare_of, np);
+
+	of_node_put(np);
+
 	return 0;
 }
 
@@ -837,9 +1025,15 @@ static const struct component_master_ops msm_drm_ops = {
 static int msm_pdev_probe(struct platform_device *pdev)
 {
 	struct component_match *match = NULL;
+	int ret;
 
-	add_components(&pdev->dev, &match, "connectors");
-	add_components(&pdev->dev, &match, "gpus");
+	ret = add_display_components(&pdev->dev, &match);
+	if (ret)
+		return ret;
+
+	ret = add_gpu_components(&pdev->dev, &match);
+	if (ret)
+		return ret;
 
 	pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32);
 	return component_master_add_with_match(&pdev->dev, &msm_drm_ops, match);
@@ -848,20 +1042,14 @@ static int msm_pdev_probe(struct platform_device *pdev)
 static int msm_pdev_remove(struct platform_device *pdev)
 {
 	component_master_del(&pdev->dev, &msm_drm_ops);
+	of_platform_depopulate(&pdev->dev);
 
 	return 0;
 }
 
-static const struct platform_device_id msm_id[] = {
-	{ "mdp", 0 },
-	{ }
-};
-
 static const struct of_device_id dt_match[] = {
-	{ .compatible = "qcom,mdp4", .data = (void *) 4 },	/* mdp4 */
-	{ .compatible = "qcom,mdp5", .data = (void *) 5 },	/* mdp5 */
-	/* to support downstream DT files */
-	{ .compatible = "qcom,mdss_mdp", .data = (void *) 5 },  /* mdp5 */
+	{ .compatible = "qcom,mdp4", .data = (void *)4 },	/* MDP4 */
+	{ .compatible = "qcom,mdss", .data = (void *)5 },	/* MDP5 MDSS */
 	{}
 };
 MODULE_DEVICE_TABLE(of, dt_match);
@@ -874,12 +1062,12 @@ static struct platform_driver msm_platform_driver = {
 		.of_match_table = dt_match,
 		.pm     = &msm_pm_ops,
 	},
-	.id_table   = msm_id,
 };
 
 static int __init msm_drm_register(void)
 {
 	DBG("init");
+	msm_mdp_register();
 	msm_dsi_register();
 	msm_edp_register();
 	msm_hdmi_register();
@@ -895,6 +1083,7 @@ static void __exit msm_drm_unregister(void)
 	adreno_unregister();
 	msm_edp_unregister();
 	msm_dsi_unregister();
+	msm_mdp_unregister();
 }
 
 module_init(msm_drm_register);
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 5b2963f32291..b4bc7f1ef717 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -46,6 +46,7 @@
 struct msm_kms;
 struct msm_gpu;
 struct msm_mmu;
+struct msm_mdss;
 struct msm_rd_state;
 struct msm_perf_state;
 struct msm_gem_submit;
@@ -77,11 +78,16 @@ struct msm_vblank_ctrl {
 
 struct msm_drm_private {
 
+	struct drm_device *dev;
+
 	struct msm_kms *kms;
 
 	/* subordinate devices, if present: */
 	struct platform_device *gpu_pdev;
 
+	/* top level MDSS wrapper device (for MDP5 only) */
+	struct msm_mdss *mdss;
+
 	/* possibly this should be in the kms component, but it is
 	 * shared by both mdp4 and mdp5..
 	 */
@@ -147,6 +153,9 @@ struct msm_drm_private {
 		struct drm_mm mm;
 	} vram;
 
+	struct notifier_block vmap_notifier;
+	struct shrinker shrinker;
+
 	struct msm_vblank_ctrl vblank_ctrl;
 };
 
@@ -165,6 +174,9 @@ void msm_gem_submit_free(struct msm_gem_submit *submit);
 int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		struct drm_file *file);
 
+void msm_gem_shrinker_init(struct drm_device *dev);
+void msm_gem_shrinker_cleanup(struct drm_device *dev);
+
 int msm_gem_mmap_obj(struct drm_gem_object *obj,
 			struct vm_area_struct *vma);
 int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
@@ -189,8 +201,13 @@ struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev,
 		struct dma_buf_attachment *attach, struct sg_table *sg);
 int msm_gem_prime_pin(struct drm_gem_object *obj);
 void msm_gem_prime_unpin(struct drm_gem_object *obj);
-void *msm_gem_vaddr_locked(struct drm_gem_object *obj);
-void *msm_gem_vaddr(struct drm_gem_object *obj);
+void *msm_gem_get_vaddr_locked(struct drm_gem_object *obj);
+void *msm_gem_get_vaddr(struct drm_gem_object *obj);
+void msm_gem_put_vaddr_locked(struct drm_gem_object *obj);
+void msm_gem_put_vaddr(struct drm_gem_object *obj);
+int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv);
+void msm_gem_purge(struct drm_gem_object *obj);
+void msm_gem_vunmap(struct drm_gem_object *obj);
 int msm_gem_sync_object(struct drm_gem_object *obj,
 		struct msm_fence_context *fctx, bool exclusive);
 void msm_gem_move_to_active(struct drm_gem_object *obj,
@@ -257,6 +274,9 @@ static inline int msm_dsi_modeset_init(struct msm_dsi *msm_dsi,
 }
 #endif
 
+void __init msm_mdp_register(void);
+void __exit msm_mdp_unregister(void);
+
 #ifdef CONFIG_DEBUG_FS
 void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m);
 void msm_gem_describe_objects(struct list_head *list, struct seq_file *m);
diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c
index 7919c24c6ddd..95cf8fe72ee5 100644
--- a/drivers/gpu/drm/msm/msm_fb.c
+++ b/drivers/gpu/drm/msm/msm_fb.c
@@ -49,8 +49,8 @@ static void msm_framebuffer_destroy(struct drm_framebuffer *fb)
 
 	for (i = 0; i < n; i++) {
 		struct drm_gem_object *bo = msm_fb->planes[i];
-		if (bo)
-			drm_gem_object_unreference_unlocked(bo);
+
+		drm_gem_object_unreference_unlocked(bo);
 	}
 
 	kfree(msm_fb);
diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c
index 1a061e3e8b9e..ffd4a338ca12 100644
--- a/drivers/gpu/drm/msm/msm_fbdev.c
+++ b/drivers/gpu/drm/msm/msm_fbdev.c
@@ -158,7 +158,11 @@ static int msm_fbdev_create(struct drm_fb_helper *helper,
 
 	dev->mode_config.fb_base = paddr;
 
-	fbi->screen_base = msm_gem_vaddr_locked(fbdev->bo);
+	fbi->screen_base = msm_gem_get_vaddr_locked(fbdev->bo);
+	if (IS_ERR(fbi->screen_base)) {
+		ret = PTR_ERR(fbi->screen_base);
+		goto fail_unlock;
+	}
 	fbi->screen_size = fbdev->bo->size;
 	fbi->fix.smem_start = paddr;
 	fbi->fix.smem_len = fbdev->bo->size;
@@ -247,6 +251,7 @@ void msm_fbdev_free(struct drm_device *dev)
 
 	/* this will free the backing object */
 	if (fbdev->fb) {
+		msm_gem_put_vaddr(fbdev->bo);
 		drm_framebuffer_unregister_private(fbdev->fb);
 		drm_framebuffer_remove(fbdev->fb);
 	}
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 7daf4054dd2b..6cd4af443139 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -276,6 +276,26 @@ uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj)
 	return offset;
 }
 
+static void
+put_iova(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct msm_drm_private *priv = obj->dev->dev_private;
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	int id;
+
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
+	for (id = 0; id < ARRAY_SIZE(msm_obj->domain); id++) {
+		struct msm_mmu *mmu = priv->mmus[id];
+		if (mmu && msm_obj->domain[id].iova) {
+			uint32_t offset = msm_obj->domain[id].iova;
+			mmu->funcs->unmap(mmu, offset, msm_obj->sgt, obj->size);
+			msm_obj->domain[id].iova = 0;
+		}
+	}
+}
+
 /* should be called under struct_mutex.. although it can be called
  * from atomic context without struct_mutex to acquire an extra
  * iova ref if you know one is already held.
@@ -388,7 +408,7 @@ fail:
 	return ret;
 }
 
-void *msm_gem_vaddr_locked(struct drm_gem_object *obj)
+void *msm_gem_get_vaddr_locked(struct drm_gem_object *obj)
 {
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
 	WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex));
@@ -398,19 +418,94 @@ void *msm_gem_vaddr_locked(struct drm_gem_object *obj)
 			return ERR_CAST(pages);
 		msm_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT,
 				VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+		if (msm_obj->vaddr == NULL)
+			return ERR_PTR(-ENOMEM);
 	}
+	msm_obj->vmap_count++;
 	return msm_obj->vaddr;
 }
 
-void *msm_gem_vaddr(struct drm_gem_object *obj)
+void *msm_gem_get_vaddr(struct drm_gem_object *obj)
 {
 	void *ret;
 	mutex_lock(&obj->dev->struct_mutex);
-	ret = msm_gem_vaddr_locked(obj);
+	ret = msm_gem_get_vaddr_locked(obj);
 	mutex_unlock(&obj->dev->struct_mutex);
 	return ret;
 }
 
+void msm_gem_put_vaddr_locked(struct drm_gem_object *obj)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex));
+	WARN_ON(msm_obj->vmap_count < 1);
+	msm_obj->vmap_count--;
+}
+
+void msm_gem_put_vaddr(struct drm_gem_object *obj)
+{
+	mutex_lock(&obj->dev->struct_mutex);
+	msm_gem_put_vaddr_locked(obj);
+	mutex_unlock(&obj->dev->struct_mutex);
+}
+
+/* Update madvise status, returns true if not purged, else
+ * false or -errno.
+ */
+int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+	WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex));
+
+	if (msm_obj->madv != __MSM_MADV_PURGED)
+		msm_obj->madv = madv;
+
+	return (msm_obj->madv != __MSM_MADV_PURGED);
+}
+
+void msm_gem_purge(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+	WARN_ON(!is_purgeable(msm_obj));
+	WARN_ON(obj->import_attach);
+
+	put_iova(obj);
+
+	msm_gem_vunmap(obj);
+
+	put_pages(obj);
+
+	msm_obj->madv = __MSM_MADV_PURGED;
+
+	drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping);
+	drm_gem_free_mmap_offset(obj);
+
+	/* Our goal here is to return as much of the memory as
+	 * is possible back to the system as we are called from OOM.
+	 * To do this we must instruct the shmfs to drop all of its
+	 * backing pages, *now*.
+	 */
+	shmem_truncate_range(file_inode(obj->filp), 0, (loff_t)-1);
+
+	invalidate_mapping_pages(file_inode(obj->filp)->i_mapping,
+			0, (loff_t)-1);
+}
+
+void msm_gem_vunmap(struct drm_gem_object *obj)
+{
+	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+	if (!msm_obj->vaddr || WARN_ON(!is_vunmapable(msm_obj)))
+		return;
+
+	vunmap(msm_obj->vaddr);
+	msm_obj->vaddr = NULL;
+}
+
 /* must be called before _move_to_active().. */
 int msm_gem_sync_object(struct drm_gem_object *obj,
 		struct msm_fence_context *fctx, bool exclusive)
@@ -462,6 +557,7 @@ void msm_gem_move_to_active(struct drm_gem_object *obj,
 		struct msm_gpu *gpu, bool exclusive, struct fence *fence)
 {
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
+	WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED);
 	msm_obj->gpu = gpu;
 	if (exclusive)
 		reservation_object_add_excl_fence(msm_obj->resv, fence);
@@ -530,13 +626,27 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
 	struct reservation_object_list *fobj;
 	struct fence *fence;
 	uint64_t off = drm_vma_node_start(&obj->vma_node);
+	const char *madv;
 
 	WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex));
 
-	seq_printf(m, "%08x: %c %2d (%2d) %08llx %p %zu\n",
+	switch (msm_obj->madv) {
+	case __MSM_MADV_PURGED:
+		madv = " purged";
+		break;
+	case MSM_MADV_DONTNEED:
+		madv = " purgeable";
+		break;
+	case MSM_MADV_WILLNEED:
+	default:
+		madv = "";
+		break;
+	}
+
+	seq_printf(m, "%08x: %c %2d (%2d) %08llx %p %zu%s\n",
 			msm_obj->flags, is_active(msm_obj) ? 'A' : 'I',
 			obj->name, obj->refcount.refcount.counter,
-			off, msm_obj->vaddr, obj->size);
+			off, msm_obj->vaddr, obj->size, madv);
 
 	rcu_read_lock();
 	fobj = rcu_dereference(robj->fence);
@@ -576,9 +686,7 @@ void msm_gem_describe_objects(struct list_head *list, struct seq_file *m)
 void msm_gem_free_object(struct drm_gem_object *obj)
 {
 	struct drm_device *dev = obj->dev;
-	struct msm_drm_private *priv = obj->dev->dev_private;
 	struct msm_gem_object *msm_obj = to_msm_bo(obj);
-	int id;
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
@@ -587,13 +695,7 @@ void msm_gem_free_object(struct drm_gem_object *obj)
 
 	list_del(&msm_obj->mm_list);
 
-	for (id = 0; id < ARRAY_SIZE(msm_obj->domain); id++) {
-		struct msm_mmu *mmu = priv->mmus[id];
-		if (mmu && msm_obj->domain[id].iova) {
-			uint32_t offset = msm_obj->domain[id].iova;
-			mmu->funcs->unmap(mmu, offset, msm_obj->sgt, obj->size);
-		}
-	}
+	put_iova(obj);
 
 	if (obj->import_attach) {
 		if (msm_obj->vaddr)
@@ -607,7 +709,7 @@ void msm_gem_free_object(struct drm_gem_object *obj)
 
 		drm_prime_gem_destroy(obj, msm_obj->sgt);
 	} else {
-		vunmap(msm_obj->vaddr);
+		msm_gem_vunmap(obj);
 		put_pages(obj);
 	}
 
@@ -686,6 +788,7 @@ static int msm_gem_new_impl(struct drm_device *dev,
 		msm_obj->vram_node = (void *)&msm_obj[1];
 
 	msm_obj->flags = flags;
+	msm_obj->madv = MSM_MADV_WILLNEED;
 
 	if (resv) {
 		msm_obj->resv = resv;
@@ -727,9 +830,7 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev,
 	return obj;
 
 fail:
-	if (obj)
-		drm_gem_object_unreference(obj);
-
+	drm_gem_object_unreference(obj);
 	return ERR_PTR(ret);
 }
 
@@ -772,8 +873,6 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev,
 	return obj;
 
 fail:
-	if (obj)
-		drm_gem_object_unreference_unlocked(obj);
-
+	drm_gem_object_unreference_unlocked(obj);
 	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 9facd4b6ffd9..b2f13cfe945e 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -29,6 +29,16 @@ struct msm_gem_object {
 
 	uint32_t flags;
 
+	/**
+	 * Advice: are the backing pages purgeable?
+	 */
+	uint8_t madv;
+
+	/**
+	 * count of active vmap'ing
+	 */
+	uint8_t vmap_count;
+
 	/* And object is either:
 	 *  inactive - on priv->inactive_list
 	 *  active   - on one one of the gpu's active_list..  well, at
@@ -72,7 +82,16 @@ static inline bool is_active(struct msm_gem_object *msm_obj)
 	return msm_obj->gpu != NULL;
 }
 
-#define MAX_CMDS 4
+static inline bool is_purgeable(struct msm_gem_object *msm_obj)
+{
+	return (msm_obj->madv == MSM_MADV_DONTNEED) && msm_obj->sgt &&
+			!msm_obj->base.dma_buf && !msm_obj->base.import_attach;
+}
+
+static inline bool is_vunmapable(struct msm_gem_object *msm_obj)
+{
+	return (msm_obj->vmap_count == 0) && msm_obj->vaddr;
+}
 
 /* Created per submit-ioctl, to track bo's and cmdstream bufs, etc,
  * associated with the cmdstream submission for synchronization (and
@@ -95,7 +114,7 @@ struct msm_gem_submit {
 		uint32_t size;  /* in dwords */
 		uint32_t iova;
 		uint32_t idx;   /* cmdstream buffer idx in bos[] */
-	} cmd[MAX_CMDS];
+	} *cmd;  /* array of size nr_cmds */
 	struct {
 		uint32_t flags;
 		struct msm_gem_object *obj;
diff --git a/drivers/gpu/drm/msm/msm_gem_prime.c b/drivers/gpu/drm/msm/msm_gem_prime.c
index 6b90890faffe..60bb290700ce 100644
--- a/drivers/gpu/drm/msm/msm_gem_prime.c
+++ b/drivers/gpu/drm/msm/msm_gem_prime.c
@@ -33,12 +33,12 @@ struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj)
 
 void *msm_gem_prime_vmap(struct drm_gem_object *obj)
 {
-	return msm_gem_vaddr(obj);
+	return msm_gem_get_vaddr(obj);
 }
 
 void msm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
 {
-	/* TODO msm_gem_vunmap() */
+	msm_gem_put_vaddr(obj);
 }
 
 int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c
new file mode 100644
index 000000000000..283d2841ba58
--- /dev/null
+++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2016 Red Hat
+ * Author: Rob Clark <robdclark@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "msm_drv.h"
+#include "msm_gem.h"
+
+static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
+{
+	if (!mutex_is_locked(mutex))
+		return false;
+
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_MUTEXES)
+	return mutex->owner == task;
+#else
+	/* Since UP may be pre-empted, we cannot assume that we own the lock */
+	return false;
+#endif
+}
+
+static bool msm_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
+{
+	if (!mutex_trylock(&dev->struct_mutex)) {
+		if (!mutex_is_locked_by(&dev->struct_mutex, current))
+			return false;
+		*unlock = false;
+	} else {
+		*unlock = true;
+	}
+
+	return true;
+}
+
+
+static unsigned long
+msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	struct msm_drm_private *priv =
+		container_of(shrinker, struct msm_drm_private, shrinker);
+	struct drm_device *dev = priv->dev;
+	struct msm_gem_object *msm_obj;
+	unsigned long count = 0;
+	bool unlock;
+
+	if (!msm_gem_shrinker_lock(dev, &unlock))
+		return 0;
+
+	list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) {
+		if (is_purgeable(msm_obj))
+			count += msm_obj->base.size >> PAGE_SHIFT;
+	}
+
+	if (unlock)
+		mutex_unlock(&dev->struct_mutex);
+
+	return count;
+}
+
+static unsigned long
+msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	struct msm_drm_private *priv =
+		container_of(shrinker, struct msm_drm_private, shrinker);
+	struct drm_device *dev = priv->dev;
+	struct msm_gem_object *msm_obj;
+	unsigned long freed = 0;
+	bool unlock;
+
+	if (!msm_gem_shrinker_lock(dev, &unlock))
+		return SHRINK_STOP;
+
+	list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) {
+		if (freed >= sc->nr_to_scan)
+			break;
+		if (is_purgeable(msm_obj)) {
+			msm_gem_purge(&msm_obj->base);
+			freed += msm_obj->base.size >> PAGE_SHIFT;
+		}
+	}
+
+	if (unlock)
+		mutex_unlock(&dev->struct_mutex);
+
+	if (freed > 0)
+		pr_info_ratelimited("Purging %lu bytes\n", freed << PAGE_SHIFT);
+
+	return freed;
+}
+
+static int
+msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
+{
+	struct msm_drm_private *priv =
+		container_of(nb, struct msm_drm_private, vmap_notifier);
+	struct drm_device *dev = priv->dev;
+	struct msm_gem_object *msm_obj;
+	unsigned unmapped = 0;
+	bool unlock;
+
+	if (!msm_gem_shrinker_lock(dev, &unlock))
+		return NOTIFY_DONE;
+
+	list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) {
+		if (is_vunmapable(msm_obj)) {
+			msm_gem_vunmap(&msm_obj->base);
+			/* since we don't know any better, lets bail after a few
+			 * and if necessary the shrinker will be invoked again.
+			 * Seems better than unmapping *everything*
+			 */
+			if (++unmapped >= 15)
+				break;
+		}
+	}
+
+	if (unlock)
+		mutex_unlock(&dev->struct_mutex);
+
+	*(unsigned long *)ptr += unmapped;
+
+	if (unmapped > 0)
+		pr_info_ratelimited("Purging %u vmaps\n", unmapped);
+
+	return NOTIFY_DONE;
+}
+
+/**
+ * msm_gem_shrinker_init - Initialize msm shrinker
+ * @dev_priv: msm device
+ *
+ * This function registers and sets up the msm shrinker.
+ */
+void msm_gem_shrinker_init(struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	priv->shrinker.count_objects = msm_gem_shrinker_count;
+	priv->shrinker.scan_objects = msm_gem_shrinker_scan;
+	priv->shrinker.seeks = DEFAULT_SEEKS;
+	WARN_ON(register_shrinker(&priv->shrinker));
+
+	priv->vmap_notifier.notifier_call = msm_gem_shrinker_vmap;
+	WARN_ON(register_vmap_purge_notifier(&priv->vmap_notifier));
+}
+
+/**
+ * msm_gem_shrinker_cleanup - Clean up msm shrinker
+ * @dev_priv: msm device
+ *
+ * This function unregisters the msm shrinker.
+ */
+void msm_gem_shrinker_cleanup(struct drm_device *dev)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	WARN_ON(unregister_vmap_purge_notifier(&priv->vmap_notifier));
+	unregister_shrinker(&priv->shrinker);
+}
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index b89ca5174863..9766f9ae4b7d 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -29,10 +29,11 @@
 #define BO_PINNED   0x2000
 
 static struct msm_gem_submit *submit_create(struct drm_device *dev,
-		struct msm_gpu *gpu, int nr)
+		struct msm_gpu *gpu, int nr_bos, int nr_cmds)
 {
 	struct msm_gem_submit *submit;
-	int sz = sizeof(*submit) + (nr * sizeof(submit->bos[0]));
+	int sz = sizeof(*submit) + (nr_bos * sizeof(submit->bos[0])) +
+			(nr_cmds * sizeof(*submit->cmd));
 
 	submit = kmalloc(sz, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
 	if (!submit)
@@ -40,12 +41,15 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
 
 	submit->dev = dev;
 	submit->gpu = gpu;
+	submit->fence = NULL;
 	submit->pid = get_pid(task_pid(current));
+	submit->cmd = (void *)&submit->bos[nr_bos];
 
 	/* initially, until copy_from_user() and bo lookup succeeds: */
 	submit->nr_bos = 0;
 	submit->nr_cmds = 0;
 
+	INIT_LIST_HEAD(&submit->node);
 	INIT_LIST_HEAD(&submit->bo_list);
 	ww_acquire_init(&submit->ticket, &reservation_ww_class);
 
@@ -75,6 +79,11 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 		void __user *userptr =
 			u64_to_user_ptr(args->bos + (i * sizeof(submit_bo)));
 
+		/* make sure we don't have garbage flags, in case we hit
+		 * error path before flags is initialized:
+		 */
+		submit->bos[i].flags = 0;
+
 		ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
 		if (ret) {
 			ret = -EFAULT;
@@ -272,7 +281,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob
 	/* For now, just map the entire thing.  Eventually we probably
 	 * to do it page-by-page, w/ kmap() if not vmap()d..
 	 */
-	ptr = msm_gem_vaddr_locked(&obj->base);
+	ptr = msm_gem_get_vaddr_locked(&obj->base);
 
 	if (IS_ERR(ptr)) {
 		ret = PTR_ERR(ptr);
@@ -325,6 +334,8 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob
 		last_offset = off;
 	}
 
+	msm_gem_put_vaddr_locked(&obj->base);
+
 	return 0;
 }
 
@@ -362,14 +373,15 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	if (args->pipe != MSM_PIPE_3D0)
 		return -EINVAL;
 
-	if (args->nr_cmds > MAX_CMDS)
-		return -EINVAL;
-
-	submit = submit_create(dev, gpu, args->nr_bos);
-	if (!submit)
-		return -ENOMEM;
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
 
-	mutex_lock(&dev->struct_mutex);
+	submit = submit_create(dev, gpu, args->nr_bos, args->nr_cmds);
+	if (!submit) {
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
 
 	ret = submit_lookup_objects(submit, args, file);
 	if (ret)
@@ -455,6 +467,7 @@ out:
 	submit_cleanup(submit);
 	if (ret)
 		msm_gem_submit_free(submit);
+out_unlock:
 	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index a7a0b6d9b057..3a294d0da3a0 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -59,10 +59,10 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint32_t iova,
 		return -EINVAL;
 
 	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		u32 pa = sg_phys(sg) - sg->offset;
+		dma_addr_t pa = sg_phys(sg) - sg->offset;
 		size_t bytes = sg->length + sg->offset;
 
-		VERB("map[%d]: %08x %08x(%zx)", i, iova, pa, bytes);
+		VERB("map[%d]: %08x %08lx(%zx)", i, da, (unsigned long)pa, bytes);
 
 		ret = iommu_map(domain, da, pa, bytes, prot);
 		if (ret)
@@ -101,7 +101,7 @@ static int msm_iommu_unmap(struct msm_mmu *mmu, uint32_t iova,
 		if (unmapped < bytes)
 			return unmapped;
 
-		VERB("unmap[%d]: %08x(%zx)", i, iova, bytes);
+		VERB("unmap[%d]: %08x(%zx)", i, da, bytes);
 
 		BUG_ON(!PAGE_ALIGNED(bytes));
 
diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h
index e32222c3d44f..40e41e5cdbc6 100644
--- a/drivers/gpu/drm/msm/msm_kms.h
+++ b/drivers/gpu/drm/msm/msm_kms.h
@@ -61,10 +61,8 @@ struct msm_kms_funcs {
 struct msm_kms {
 	const struct msm_kms_funcs *funcs;
 
-	/* irq handling: */
-	bool in_irq;
-	struct list_head irq_list;    /* list of mdp4_irq */
-	uint32_t vblank_mask;         /* irq bits set for userspace vblank */
+	/* irq number to be passed on to drm_irq_install */
+	int irq;
 };
 
 static inline void msm_kms_init(struct msm_kms *kms,
@@ -75,5 +73,7 @@ static inline void msm_kms_init(struct msm_kms *kms,
 
 struct msm_kms *mdp4_kms_init(struct drm_device *dev);
 struct msm_kms *mdp5_kms_init(struct drm_device *dev);
+int msm_mdss_init(struct drm_device *dev);
+void msm_mdss_destroy(struct drm_device *dev);
 
 #endif /* __MSM_KMS_H__ */
diff --git a/drivers/gpu/drm/msm/msm_perf.c b/drivers/gpu/drm/msm/msm_perf.c
index 830857c47c86..17fe4e53e0d1 100644
--- a/drivers/gpu/drm/msm/msm_perf.c
+++ b/drivers/gpu/drm/msm/msm_perf.c
@@ -132,7 +132,7 @@ static ssize_t perf_read(struct file *file, char __user *buf,
 		size_t sz, loff_t *ppos)
 {
 	struct msm_perf_state *perf = file->private_data;
-	int n = 0, ret;
+	int n = 0, ret = 0;
 
 	mutex_lock(&perf->read_lock);
 
@@ -143,9 +143,10 @@ static ssize_t perf_read(struct file *file, char __user *buf,
 	}
 
 	n = min((int)sz, perf->buftot - perf->bufpos);
-	ret = copy_to_user(buf, &perf->buf[perf->bufpos], n);
-	if (ret)
+	if (copy_to_user(buf, &perf->buf[perf->bufpos], n)) {
+		ret = -EFAULT;
 		goto out;
+	}
 
 	perf->bufpos += n;
 	*ppos += n;
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index b48f73ac6389..3a5fdfcd67ae 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -27,6 +27,11 @@
  * This bypasses drm_debugfs_create_files() mainly because we need to use
  * our own fops for a bit more control.  In particular, we don't want to
  * do anything if userspace doesn't have the debugfs file open.
+ *
+ * The module-param "rd_full", which defaults to false, enables snapshotting
+ * all (non-written) buffers in the submit, rather than just cmdstream bo's.
+ * This is useful to capture the contents of (for example) vbo's or textures,
+ * or shader programs (if not emitted inline in cmdstream).
  */
 
 #ifdef CONFIG_DEBUG_FS
@@ -40,6 +45,10 @@
 #include "msm_gpu.h"
 #include "msm_gem.h"
 
+static bool rd_full = false;
+MODULE_PARM_DESC(rd_full, "If true, $debugfs/.../rd will snapshot all buffer contents");
+module_param_named(rd_full, rd_full, bool, 0600);
+
 enum rd_sect_type {
 	RD_NONE,
 	RD_TEST,       /* ascii text */
@@ -140,9 +149,10 @@ static ssize_t rd_read(struct file *file, char __user *buf,
 		goto out;
 
 	n = min_t(int, sz, circ_count_to_end(&rd->fifo));
-	ret = copy_to_user(buf, fptr, n);
-	if (ret)
+	if (copy_to_user(buf, fptr, n)) {
+		ret = -EFAULT;
 		goto out;
+	}
 
 	fifo->tail = (fifo->tail + n) & (BUF_SZ - 1);
 	*ppos += n;
@@ -277,6 +287,31 @@ void msm_rd_debugfs_cleanup(struct drm_minor *minor)
 	kfree(rd);
 }
 
+static void snapshot_buf(struct msm_rd_state *rd,
+		struct msm_gem_submit *submit, int idx,
+		uint32_t iova, uint32_t size)
+{
+	struct msm_gem_object *obj = submit->bos[idx].obj;
+	const char *buf;
+
+	buf = msm_gem_get_vaddr_locked(&obj->base);
+	if (IS_ERR(buf))
+		return;
+
+	if (iova) {
+		buf += iova - submit->bos[idx].iova;
+	} else {
+		iova = submit->bos[idx].iova;
+		size = obj->base.size;
+	}
+
+	rd_write_section(rd, RD_GPUADDR,
+			(uint32_t[2]){ iova, size }, 8);
+	rd_write_section(rd, RD_BUFFER_CONTENTS, buf, size);
+
+	msm_gem_put_vaddr_locked(&obj->base);
+}
+
 /* called under struct_mutex */
 void msm_rd_dump_submit(struct msm_gem_submit *submit)
 {
@@ -300,24 +335,27 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit)
 
 	rd_write_section(rd, RD_CMD, msg, ALIGN(n, 4));
 
-	/* could be nice to have an option (module-param?) to snapshot
-	 * all the bo's associated with the submit.  Handy to see vtx
-	 * buffers, etc.  For now just the cmdstream bo's is enough.
-	 */
+	if (rd_full) {
+		for (i = 0; i < submit->nr_bos; i++) {
+			/* buffers that are written to probably don't start out
+			 * with anything interesting:
+			 */
+			if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE)
+				continue;
+
+			snapshot_buf(rd, submit, i, 0, 0);
+		}
+	}
 
 	for (i = 0; i < submit->nr_cmds; i++) {
-		uint32_t idx  = submit->cmd[i].idx;
 		uint32_t iova = submit->cmd[i].iova;
 		uint32_t szd  = submit->cmd[i].size; /* in dwords */
-		struct msm_gem_object *obj = submit->bos[idx].obj;
-		const char *buf = msm_gem_vaddr_locked(&obj->base);
 
-		buf += iova - submit->bos[idx].iova;
-
-		rd_write_section(rd, RD_GPUADDR,
-				(uint32_t[2]){ iova, szd * 4 }, 8);
-		rd_write_section(rd, RD_BUFFER_CONTENTS,
-				buf, szd * 4);
+		/* snapshot cmdstream bo's (if we haven't already): */
+		if (!rd_full) {
+			snapshot_buf(rd, submit, submit->cmd[i].idx,
+					submit->cmd[i].iova, szd * 4);
+		}
 
 		switch (submit->cmd[i].type) {
 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 1f14b908b221..f326cf6a32e6 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -39,7 +39,11 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size)
 		goto fail;
 	}
 
-	ring->start = msm_gem_vaddr_locked(ring->bo);
+	ring->start = msm_gem_get_vaddr_locked(ring->bo);
+	if (IS_ERR(ring->start)) {
+		ret = PTR_ERR(ring->start);
+		goto fail;
+	}
 	ring->end   = ring->start + (size / 4);
 	ring->cur   = ring->start;
 
@@ -55,7 +59,9 @@ fail:
 
 void msm_ringbuffer_destroy(struct msm_ringbuffer *ring)
 {
-	if (ring->bo)
+	if (ring->bo) {
+		msm_gem_put_vaddr(ring->bo);
 		drm_gem_object_unreference_unlocked(ring->bo);
+	}
 	kfree(ring);
 }
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index 5ab13e7939db..2922a82cba8e 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -3,13 +3,7 @@ config DRM_NOUVEAU
 	depends on DRM && PCI
         select FW_LOADER
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_TTM
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
-	select FB
-	select FRAMEBUFFER_CONSOLE if !EXPERT
 	select FB_BACKLIGHT if DRM_NOUVEAU_BACKLIGHT
 	select ACPI_VIDEO if ACPI && X86 && BACKLIGHT_CLASS_DEVICE && INPUT
 	select X86_PLATFORM_DEVICES if ACPI && X86
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c
index aea81a547e85..34c0f2f67548 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c
@@ -125,18 +125,8 @@ nv04_display_destroy(struct drm_device *dev)
 	struct nv04_display *disp = nv04_display(dev);
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_encoder *encoder;
-	struct drm_crtc *crtc;
 	struct nouveau_crtc *nv_crtc;
 
-	/* Turn every CRTC off. */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct drm_mode_set modeset = {
-			.crtc = crtc,
-		};
-
-		drm_mode_set_config_internal(&modeset);
-	}
-
 	/* Restore state */
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, base.base.head)
 		encoder->enc_restore(&encoder->base.base);
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
index a665b78b2af5..434d1e29f279 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
@@ -749,13 +749,8 @@ static int nv17_tv_set_property(struct drm_encoder *encoder,
 
 		/* Disable the crtc to ensure a full modeset is
 		 * performed whenever it's turned on again. */
-		if (crtc) {
-			struct drm_mode_set modeset = {
-				.crtc = crtc,
-			};
-
-			drm_mode_set_config_internal(&modeset);
-		}
+		if (crtc)
+			drm_crtc_force_disable(crtc);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
index 331620a52afa..287a7d6fa480 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
@@ -29,6 +29,7 @@ struct nv_device_info_v0 {
 #define NV_DEVICE_INFO_V0_FERMI                                            0x07
 #define NV_DEVICE_INFO_V0_KEPLER                                           0x08
 #define NV_DEVICE_INFO_V0_MAXWELL                                          0x09
+#define NV_DEVICE_INFO_V0_PASCAL                                           0x0a
 	__u8  family;
 	__u8  pad06[2];
 	__u64 ram_size;
diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h
index 982aad8fa645..e6e9537537cf 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/class.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/class.h
@@ -39,6 +39,7 @@
 #define KEPLER_CHANNEL_GPFIFO_A                       /* cla06f.h */ 0x0000a06f
 #define KEPLER_CHANNEL_GPFIFO_B                       /* cla06f.h */ 0x0000a16f
 #define MAXWELL_CHANNEL_GPFIFO_A                      /* cla06f.h */ 0x0000b06f
+#define PASCAL_CHANNEL_GPFIFO_A                       /* cla06f.h */ 0x0000c06f
 
 #define NV50_DISP                                     /* cl5070.h */ 0x00005070
 #define G82_DISP                                      /* cl5070.h */ 0x00008270
@@ -50,6 +51,8 @@
 #define GK110_DISP                                    /* cl5070.h */ 0x00009270
 #define GM107_DISP                                    /* cl5070.h */ 0x00009470
 #define GM200_DISP                                    /* cl5070.h */ 0x00009570
+#define GP100_DISP                                    /* cl5070.h */ 0x00009770
+#define GP104_DISP                                    /* cl5070.h */ 0x00009870
 
 #define NV31_MPEG                                                    0x00003174
 #define G82_MPEG                                                     0x00008274
@@ -86,6 +89,8 @@
 #define GK110_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000927d
 #define GM107_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000947d
 #define GM200_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000957d
+#define GP100_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000977d
+#define GP104_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000987d
 
 #define NV50_DISP_OVERLAY_CHANNEL_DMA                 /* cl507e.h */ 0x0000507e
 #define G82_DISP_OVERLAY_CHANNEL_DMA                  /* cl507e.h */ 0x0000827e
@@ -105,6 +110,8 @@
 #define MAXWELL_A                                     /* cl9097.h */ 0x0000b097
 #define MAXWELL_B                                     /* cl9097.h */ 0x0000b197
 
+#define PASCAL_A                                      /* cl9097.h */ 0x0000c097
+
 #define NV74_BSP                                                     0x000074b0
 
 #define GT212_MSVLD                                                  0x000085b1
@@ -128,6 +135,8 @@
 #define FERMI_DMA                                                    0x000090b5
 #define KEPLER_DMA_COPY_A                                            0x0000a0b5
 #define MAXWELL_DMA_COPY_A                                           0x0000b0b5
+#define PASCAL_DMA_COPY_A                                            0x0000c0b5
+#define PASCAL_DMA_COPY_B                                            0x0000c1b5
 
 #define FERMI_DECOMPRESS                                             0x000090b8
 
@@ -137,6 +146,7 @@
 #define KEPLER_COMPUTE_B                                             0x0000a1c0
 #define MAXWELL_COMPUTE_A                                            0x0000b0c0
 #define MAXWELL_COMPUTE_B                                            0x0000b1c0
+#define PASCAL_COMPUTE_A                                             0x0000c0c0
 
 #define NV74_CIPHER                                                  0x000074c1
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
index c612dc1f1eb4..7ea8aa7ca408 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
@@ -16,9 +16,9 @@ enum nvkm_devidx {
 	NVKM_SUBDEV_MC,
 	NVKM_SUBDEV_BUS,
 	NVKM_SUBDEV_TIMER,
+	NVKM_SUBDEV_INSTMEM,
 	NVKM_SUBDEV_FB,
 	NVKM_SUBDEV_LTC,
-	NVKM_SUBDEV_INSTMEM,
 	NVKM_SUBDEV_MMU,
 	NVKM_SUBDEV_BAR,
 	NVKM_SUBDEV_PMU,
@@ -33,7 +33,10 @@ enum nvkm_devidx {
 	NVKM_ENGINE_CE0,
 	NVKM_ENGINE_CE1,
 	NVKM_ENGINE_CE2,
-	NVKM_ENGINE_CE_LAST = NVKM_ENGINE_CE2,
+	NVKM_ENGINE_CE3,
+	NVKM_ENGINE_CE4,
+	NVKM_ENGINE_CE5,
+	NVKM_ENGINE_CE_LAST = NVKM_ENGINE_CE5,
 
 	NVKM_ENGINE_CIPHER,
 	NVKM_ENGINE_DISP,
@@ -50,7 +53,8 @@ enum nvkm_devidx {
 
 	NVKM_ENGINE_NVENC0,
 	NVKM_ENGINE_NVENC1,
-	NVKM_ENGINE_NVENC_LAST = NVKM_ENGINE_NVENC1,
+	NVKM_ENGINE_NVENC2,
+	NVKM_ENGINE_NVENC_LAST = NVKM_ENGINE_NVENC2,
 
 	NVKM_ENGINE_NVDEC,
 	NVKM_ENGINE_PM,
@@ -102,6 +106,7 @@ struct nvkm_device {
 		NV_C0    = 0xc0,
 		NV_E0    = 0xe0,
 		GM100    = 0x110,
+		GP100    = 0x130,
 	} card_type;
 	u32 chipset;
 	u8  chiprev;
@@ -136,7 +141,7 @@ struct nvkm_device {
 	struct nvkm_volt *volt;
 
 	struct nvkm_engine *bsp;
-	struct nvkm_engine *ce[3];
+	struct nvkm_engine *ce[6];
 	struct nvkm_engine *cipher;
 	struct nvkm_disp *disp;
 	struct nvkm_dma *dma;
@@ -149,7 +154,7 @@ struct nvkm_device {
 	struct nvkm_engine *mspdec;
 	struct nvkm_engine *msppp;
 	struct nvkm_engine *msvld;
-	struct nvkm_engine *nvenc[2];
+	struct nvkm_engine *nvenc[3];
 	struct nvkm_engine *nvdec;
 	struct nvkm_pm *pm;
 	struct nvkm_engine *sec;
@@ -170,7 +175,6 @@ struct nvkm_device_func {
 	void (*fini)(struct nvkm_device *, bool suspend);
 	resource_size_t (*resource_addr)(struct nvkm_device *, unsigned bar);
 	resource_size_t (*resource_size)(struct nvkm_device *, unsigned bar);
-	bool cpu_coherent;
 };
 
 struct nvkm_device_quirk {
@@ -206,7 +210,7 @@ struct nvkm_device_chip {
 	int (*volt    )(struct nvkm_device *, int idx, struct nvkm_volt **);
 
 	int (*bsp     )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*ce[3]   )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*ce[6]   )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*cipher  )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*disp    )(struct nvkm_device *, int idx, struct nvkm_disp **);
 	int (*dma     )(struct nvkm_device *, int idx, struct nvkm_dma **);
@@ -219,7 +223,7 @@ struct nvkm_device_chip {
 	int (*mspdec  )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*msppp   )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*msvld   )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*nvenc[2])(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*nvenc[3])(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*nvdec   )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*pm      )(struct nvkm_device *, int idx, struct nvkm_pm **);
 	int (*sec     )(struct nvkm_device *, int idx, struct nvkm_engine **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
index b5370cb56e3c..e5c9b6268dcc 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
@@ -28,6 +28,7 @@ struct nvkm_device_tegra {
 	} iommu;
 
 	int gpu_speedo;
+	int gpu_speedo_id;
 };
 
 struct nvkm_device_tegra_func {
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
index 594d719ba41e..d3d26a1e215d 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
@@ -7,4 +7,6 @@ int gf100_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gk104_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gm107_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gm200_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
+int gp100_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
+int gp104_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
index d4fdce27b297..e82049667ce4 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
@@ -32,4 +32,6 @@ int gk104_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gk110_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gm107_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gm200_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
+int gp100_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
+int gp104_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
index 15ddfcf5e8db..ed92fec5292c 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
@@ -66,4 +66,5 @@ int gk20a_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gm107_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gm200_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gm20b_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
+int gp100_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
index 6515f5810a26..89cf99307828 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
@@ -42,4 +42,5 @@ int gk20a_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gm107_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gm200_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gm20b_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
+int gp100_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h
index e39a1fea930b..a72f3290528a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h
@@ -7,6 +7,9 @@ struct nvkm_bios {
 	u32 size;
 	u8 *data;
 
+	u32 image0_size;
+	u32 imaged_addr;
+
 	u32 bmp_offset;
 	u32 bit_offset;
 
@@ -22,10 +25,9 @@ struct nvkm_bios {
 u8  nvbios_checksum(const u8 *data, int size);
 u16 nvbios_findstr(const u8 *data, int size, const char *str, int len);
 int nvbios_memcmp(struct nvkm_bios *, u32 addr, const char *, u32 len);
-
-#define nvbios_rd08(b,o) (b)->data[(o)]
-#define nvbios_rd16(b,o) get_unaligned_le16(&(b)->data[(o)])
-#define nvbios_rd32(b,o) get_unaligned_le32(&(b)->data[(o)])
+u8  nvbios_rd08(struct nvkm_bios *, u32 addr);
+u16 nvbios_rd16(struct nvkm_bios *, u32 addr);
+u32 nvbios_rd32(struct nvkm_bios *, u32 addr);
 
 int nvkm_bios_new(struct nvkm_device *, int, struct nvkm_bios **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h
index db10c11f0595..c5a6ebd5a478 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h
@@ -25,7 +25,8 @@ u16 nvbios_outp_match(struct nvkm_bios *, u16 type, u16 mask,
 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_outp *);
 
 struct nvbios_ocfg {
-	u16 match;
+	u8  proto;
+	u8  flags;
 	u16 clkcmp[2];
 };
 
@@ -33,7 +34,7 @@ u16 nvbios_ocfg_entry(struct nvkm_bios *, u16 outp, u8 idx,
 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
 u16 nvbios_ocfg_parse(struct nvkm_bios *, u16 outp, u8 idx,
 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *);
-u16 nvbios_ocfg_match(struct nvkm_bios *, u16 outp, u16 type,
+u16 nvbios_ocfg_match(struct nvkm_bios *, u16 outp, u8 proto, u8 flags,
 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *);
 u16 nvbios_oclk_match(struct nvkm_bios *, u16 cmp, u32 khz);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
index 0a734fd06acf..3a410275fa71 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
@@ -56,6 +56,8 @@ struct nvkm_fb {
 		int regions;
 	} tile;
 
+	u8 page;
+
 	struct nvkm_memory *mmu_rd;
 	struct nvkm_memory *mmu_wr;
 };
@@ -91,6 +93,8 @@ int gk104_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gk20a_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gm107_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gm200_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
+int gp100_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
+int gp104_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 
 #include <subdev/bios.h>
 #include <subdev/bios/ramcfg.h>
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
index c6b90b6543b3..cd755baf9cab 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
@@ -38,4 +38,5 @@ int gk104_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gk20a_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gm107_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gm200_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
+int gp100_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
index 2e80682b2da1..27d25b18d85c 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
@@ -7,11 +7,14 @@ struct nvkm_mc {
 	struct nvkm_subdev subdev;
 };
 
-void nvkm_mc_intr(struct nvkm_mc *, bool *handled);
-void nvkm_mc_intr_unarm(struct nvkm_mc *);
-void nvkm_mc_intr_rearm(struct nvkm_mc *);
-void nvkm_mc_reset(struct nvkm_mc *, enum nvkm_devidx);
-void nvkm_mc_unk260(struct nvkm_mc *, u32 data);
+void nvkm_mc_enable(struct nvkm_device *, enum nvkm_devidx);
+void nvkm_mc_disable(struct nvkm_device *, enum nvkm_devidx);
+void nvkm_mc_reset(struct nvkm_device *, enum nvkm_devidx);
+void nvkm_mc_intr(struct nvkm_device *, bool *handled);
+void nvkm_mc_intr_unarm(struct nvkm_device *);
+void nvkm_mc_intr_rearm(struct nvkm_device *);
+void nvkm_mc_intr_mask(struct nvkm_device *, enum nvkm_devidx, bool enable);
+void nvkm_mc_unk260(struct nvkm_device *, u32 data);
 
 int nv04_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int nv11_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
@@ -24,4 +27,5 @@ int gt215_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gf100_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gk104_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gk20a_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
+int gp100_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
index ddb913889d7e..e6523e2cea9f 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
@@ -47,6 +47,7 @@ int g94_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gf100_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gf106_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gk104_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
+int gp100_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 
 /* pcie functions */
 int nvkm_pcie_set_link(struct nvkm_pci *, enum nvkm_pcie_speed, u8 width);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
index c6edd95a5b69..b04c38c07761 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
@@ -43,9 +43,8 @@ struct nvkm_secboot {
 	const struct nvkm_secboot_func *func;
 	struct nvkm_subdev subdev;
 
+	enum nvkm_devidx devidx;
 	u32 base;
-	u32 irq_mask;
-	u32 enable_mask;
 };
 #define nvkm_secboot(p) container_of((p), struct nvkm_secboot, subdev)
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
index 8fb575a92c48..71ebbfd4484f 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
@@ -8,10 +8,11 @@ struct nvkm_top {
 	struct list_head device;
 };
 
-u32 nvkm_top_reset(struct nvkm_top *, enum nvkm_devidx);
-u32 nvkm_top_intr(struct nvkm_top *, u32 intr, u64 *subdevs);
-enum nvkm_devidx nvkm_top_fault(struct nvkm_top *, int fault);
-enum nvkm_devidx nvkm_top_engine(struct nvkm_top *, int, int *runl, int *engn);
+u32 nvkm_top_reset(struct nvkm_device *, enum nvkm_devidx);
+u32 nvkm_top_intr(struct nvkm_device *, u32 intr, u64 *subdevs);
+u32 nvkm_top_intr_mask(struct nvkm_device *, enum nvkm_devidx);
+enum nvkm_devidx nvkm_top_fault(struct nvkm_device *, int fault);
+enum nvkm_devidx nvkm_top_engine(struct nvkm_device *, int, int *runl, int *engn);
 
 int gk104_top_new(struct nvkm_device *, int, struct nvkm_top **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
index feff55cff05b..b765f4ffcde6 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
@@ -12,6 +12,9 @@ struct nvkm_volt {
 		u32 uv;
 		u8 vid;
 	} vid[256];
+
+	u32 max_uv;
+	u32 min_uv;
 };
 
 int nvkm_volt_get(struct nvkm_volt *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index eb7de487a2b3..7bd4683216d0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -100,6 +100,7 @@ nouveau_abi16_swclass(struct nouveau_drm *drm)
 	case NV_DEVICE_INFO_V0_FERMI:
 	case NV_DEVICE_INFO_V0_KEPLER:
 	case NV_DEVICE_INFO_V0_MAXWELL:
+	case NV_DEVICE_INFO_V0_PASCAL:
 		return NVIF_CLASS_SW_GF100;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 5e3f3e826476..528bdeffb339 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -209,8 +209,7 @@ nouveau_bo_new(struct drm_device *dev, int size, int align,
 	nvbo->tile_flags = tile_flags;
 	nvbo->bo.bdev = &drm->ttm.bdev;
 
-	if (!nvxx_device(&drm->device)->func->cpu_coherent)
-		nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED;
+	nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED;
 
 	nvbo->page_shift = 12;
 	if (drm->client.vm) {
@@ -424,13 +423,7 @@ nouveau_bo_map(struct nouveau_bo *nvbo)
 	if (ret)
 		return ret;
 
-	/*
-	 * TTM buffers allocated using the DMA API already have a mapping, let's
-	 * use it instead.
-	 */
-	if (!nvbo->force_coherent)
-		ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages,
-				  &nvbo->kmap);
+	ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages, &nvbo->kmap);
 
 	ttm_bo_unreserve(&nvbo->bo);
 	return ret;
@@ -442,12 +435,7 @@ nouveau_bo_unmap(struct nouveau_bo *nvbo)
 	if (!nvbo)
 		return;
 
-	/*
-	 * TTM buffers allocated using the DMA API already had a coherent
-	 * mapping which we used, no need to unmap.
-	 */
-	if (!nvbo->force_coherent)
-		ttm_bo_kunmap(&nvbo->kmap);
+	ttm_bo_kunmap(&nvbo->kmap);
 }
 
 void
@@ -506,35 +494,13 @@ nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
 	return 0;
 }
 
-static inline void *
-_nouveau_bo_mem_index(struct nouveau_bo *nvbo, unsigned index, void *mem, u8 sz)
-{
-	struct ttm_dma_tt *dma_tt;
-	u8 *m = mem;
-
-	index *= sz;
-
-	if (m) {
-		/* kmap'd address, return the corresponding offset */
-		m += index;
-	} else {
-		/* DMA-API mapping, lookup the right address */
-		dma_tt = (struct ttm_dma_tt *)nvbo->bo.ttm;
-		m = dma_tt->cpu_address[index / PAGE_SIZE];
-		m += index % PAGE_SIZE;
-	}
-
-	return m;
-}
-#define nouveau_bo_mem_index(o, i, m) _nouveau_bo_mem_index(o, i, m, sizeof(*m))
-
 void
 nouveau_bo_wr16(struct nouveau_bo *nvbo, unsigned index, u16 val)
 {
 	bool is_iomem;
 	u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
 
-	mem = nouveau_bo_mem_index(nvbo, index, mem);
+	mem += index;
 
 	if (is_iomem)
 		iowrite16_native(val, (void __force __iomem *)mem);
@@ -548,7 +514,7 @@ nouveau_bo_rd32(struct nouveau_bo *nvbo, unsigned index)
 	bool is_iomem;
 	u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
 
-	mem = nouveau_bo_mem_index(nvbo, index, mem);
+	mem += index;
 
 	if (is_iomem)
 		return ioread32_native((void __force __iomem *)mem);
@@ -562,7 +528,7 @@ nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, u32 val)
 	bool is_iomem;
 	u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
 
-	mem = nouveau_bo_mem_index(nvbo, index, mem);
+	mem += index;
 
 	if (is_iomem)
 		iowrite32_native(val, (void __force __iomem *)mem);
@@ -1082,7 +1048,6 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 				ret = ttm_bo_move_accel_cleanup(bo,
 								&fence->base,
 								evict,
-								no_wait_gpu,
 								new_mem);
 				nouveau_fence_unref(&fence);
 			}
@@ -1104,6 +1069,10 @@ nouveau_bo_move_init(struct nouveau_drm *drm)
 			    struct ttm_mem_reg *, struct ttm_mem_reg *);
 		int (*init)(struct nouveau_channel *, u32 handle);
 	} _methods[] = {
+		{  "COPY", 4, 0xc1b5, nve0_bo_move_copy, nve0_bo_move_init },
+		{  "GRCE", 0, 0xc1b5, nve0_bo_move_copy, nvc0_bo_move_init },
+		{  "COPY", 4, 0xc0b5, nve0_bo_move_copy, nve0_bo_move_init },
+		{  "GRCE", 0, 0xc0b5, nve0_bo_move_copy, nvc0_bo_move_init },
 		{  "COPY", 4, 0xb0b5, nve0_bo_move_copy, nve0_bo_move_init },
 		{  "GRCE", 0, 0xb0b5, nve0_bo_move_copy, nvc0_bo_move_init },
 		{  "COPY", 4, 0xa0b5, nve0_bo_move_copy, nve0_bo_move_init },
@@ -1289,6 +1258,10 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 	struct nouveau_drm_tile *new_tile = NULL;
 	int ret = 0;
 
+	ret = ttm_bo_wait(bo, intr, no_wait_gpu);
+	if (ret)
+		return ret;
+
 	if (nvbo->pin_refcnt)
 		NV_WARN(drm, "Moving pinned object %p!\n", nvbo);
 
@@ -1324,7 +1297,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 	/* Fallback to software copy. */
 	ret = ttm_bo_wait(bo, intr, no_wait_gpu);
 	if (ret == 0)
-		ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
+		ret = ttm_bo_move_memcpy(bo, evict, intr, no_wait_gpu, new_mem);
 
 out:
 	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
@@ -1488,14 +1461,6 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm)
 	dev = drm->dev;
 	pdev = device->dev;
 
-	/*
-	 * Objects matching this condition have been marked as force_coherent,
-	 * so use the DMA API for them.
-	 */
-	if (!nvxx_device(&drm->device)->func->cpu_coherent &&
-	    ttm->caching_state == tt_uncached)
-		return ttm_dma_populate(ttm_dma, dev->dev);
-
 #if IS_ENABLED(CONFIG_AGP)
 	if (drm->agp.bridge) {
 		return ttm_agp_tt_populate(ttm);
@@ -1553,16 +1518,6 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm)
 	dev = drm->dev;
 	pdev = device->dev;
 
-	/*
-	 * Objects matching this condition have been marked as force_coherent,
-	 * so use the DMA API for them.
-	 */
-	if (!nvxx_device(&drm->device)->func->cpu_coherent &&
-	    ttm->caching_state == tt_uncached) {
-		ttm_dma_unpopulate(ttm_dma, dev->dev);
-		return;
-	}
-
 #if IS_ENABLED(CONFIG_AGP)
 	if (drm->agp.bridge) {
 		ttm_agp_tt_unpopulate(ttm);
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index b1d2527c5625..f9b3c811187e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -191,7 +191,8 @@ static int
 nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
 		    u32 engine, struct nouveau_channel **pchan)
 {
-	static const u16 oclasses[] = { MAXWELL_CHANNEL_GPFIFO_A,
+	static const u16 oclasses[] = { PASCAL_CHANNEL_GPFIFO_A,
+					MAXWELL_CHANNEL_GPFIFO_A,
 					KEPLER_CHANNEL_GPFIFO_B,
 					KEPLER_CHANNEL_GPFIFO_A,
 					FERMI_CHANNEL_GPFIFO,
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 6072fe292db8..afbf557b23d4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -47,7 +47,7 @@ nouveau_display_vblank_handler(struct nvif_notify *notify)
 {
 	struct nouveau_crtc *nv_crtc =
 		container_of(notify, typeof(*nv_crtc), vblank);
-	drm_handle_vblank(nv_crtc->base.dev, nv_crtc->index);
+	drm_crtc_handle_vblank(&nv_crtc->base);
 	return NVIF_NOTIFY_KEEP;
 }
 
@@ -495,6 +495,8 @@ nouveau_display_create(struct drm_device *dev)
 
 	if (nouveau_modeset != 2 && drm->vbios.dcb.entries) {
 		static const u16 oclass[] = {
+			GP104_DISP,
+			GP100_DISP,
 			GM200_DISP,
 			GM107_DISP,
 			GK110_DISP,
@@ -554,6 +556,7 @@ nouveau_display_destroy(struct drm_device *dev)
 	nouveau_display_vblank_fini(dev);
 
 	drm_kms_helper_poll_fini(dev);
+	drm_crtc_force_disable_all(dev);
 	drm_mode_config_cleanup(dev);
 
 	if (disp->dtor)
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 295e7621cc68..66c1280c0f1f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -198,6 +198,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 		case KEPLER_CHANNEL_GPFIFO_A:
 		case KEPLER_CHANNEL_GPFIFO_B:
 		case MAXWELL_CHANNEL_GPFIFO_A:
+		case PASCAL_CHANNEL_GPFIFO_A:
 			ret = nvc0_fence_create(drm);
 			break;
 		default:
@@ -316,7 +317,16 @@ static int nouveau_drm_probe(struct pci_dev *pdev,
 	if (vga_switcheroo_client_probe_defer(pdev))
 		return -EPROBE_DEFER;
 
-	/* remove conflicting drivers (vesafb, efifb etc) */
+	/* We need to check that the chipset is supported before booting
+	 * fbdev off the hardware, as there's no way to put it back.
+	 */
+	ret = nvkm_device_pci_new(pdev, NULL, "error", true, false, 0, &device);
+	if (ret)
+		return ret;
+
+	nvkm_device_del(&device);
+
+	/* Remove conflicting drivers (vesafb, efifb etc). */
 	aper = alloc_apertures(3);
 	if (!aper)
 		return -ENOMEM;
@@ -430,6 +440,11 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 	nouveau_vga_init(drm);
 
 	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+		if (!nvxx_device(&drm->device)->mmu) {
+			ret = -ENOSYS;
+			goto fail_device;
+		}
+
 		ret = nvkm_vm_new(nvxx_device(&drm->device), 0, (1ULL << 40),
 				  0x1000, NULL, &drm->client.vm);
 		if (ret)
@@ -490,7 +505,11 @@ nouveau_drm_unload(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	pm_runtime_get_sync(dev->dev);
+	if (nouveau_runtime_pm != 0) {
+		pm_runtime_get_sync(dev->dev);
+		pm_runtime_forbid(dev->dev);
+	}
+
 	nouveau_fbcon_fini(dev);
 	nouveau_accel_fini(drm);
 	nouveau_hwmon_fini(dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 57aaf98a26f9..d1f248fd3506 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -552,6 +552,8 @@ nouveau_fbcon_init(struct drm_device *dev)
 	if (ret)
 		goto fini;
 
+	if (fbcon->helper.fbdev)
+		fbcon->helper.fbdev->pixmap.buf_align = 4;
 	return 0;
 
 fini:
diff --git a/drivers/gpu/drm/nouveau/nouveau_hwmon.c b/drivers/gpu/drm/nouveau/nouveau_hwmon.c
index 1ff4166af26e..71f764bf4cc6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_hwmon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_hwmon.c
@@ -535,6 +535,40 @@ static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO,
 			  nouveau_hwmon_get_in0_input, NULL, 0);
 
 static ssize_t
+nouveau_hwmon_get_in0_min(struct device *d,
+			    struct device_attribute *a, char *buf)
+{
+	struct drm_device *dev = dev_get_drvdata(d);
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nvkm_volt *volt = nvxx_volt(&drm->device);
+
+	if (!volt || !volt->min_uv)
+		return -ENODEV;
+
+	return sprintf(buf, "%i\n", volt->min_uv / 1000);
+}
+
+static SENSOR_DEVICE_ATTR(in0_min, S_IRUGO,
+			  nouveau_hwmon_get_in0_min, NULL, 0);
+
+static ssize_t
+nouveau_hwmon_get_in0_max(struct device *d,
+			    struct device_attribute *a, char *buf)
+{
+	struct drm_device *dev = dev_get_drvdata(d);
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nvkm_volt *volt = nvxx_volt(&drm->device);
+
+	if (!volt || !volt->max_uv)
+		return -ENODEV;
+
+	return sprintf(buf, "%i\n", volt->max_uv / 1000);
+}
+
+static SENSOR_DEVICE_ATTR(in0_max, S_IRUGO,
+			  nouveau_hwmon_get_in0_max, NULL, 0);
+
+static ssize_t
 nouveau_hwmon_get_in0_label(struct device *d,
 			    struct device_attribute *a, char *buf)
 {
@@ -594,6 +628,8 @@ static struct attribute *hwmon_pwm_fan_attributes[] = {
 
 static struct attribute *hwmon_in0_attributes[] = {
 	&sensor_dev_attr_in0_input.dev_attr.attr,
+	&sensor_dev_attr_in0_min.dev_attr.attr,
+	&sensor_dev_attr_in0_max.dev_attr.attr,
 	&sensor_dev_attr_in0_label.dev_attr.attr,
 	NULL
 };
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index bcee91497eb9..1825dbc33192 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -164,6 +164,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 	case NV_DEVICE_INFO_V0_FERMI:
 	case NV_DEVICE_INFO_V0_KEPLER:
 	case NV_DEVICE_INFO_V0_MAXWELL:
+	case NV_DEVICE_INFO_V0_PASCAL:
 		node->memtype = (nvbo->tile_flags & 0xff00) >> 8;
 		break;
 	default:
diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c
index 0f3e4bb411cc..7d9248b8c664 100644
--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c
@@ -82,7 +82,6 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	uint32_t fg;
 	uint32_t bg;
 	uint32_t dsize;
-	uint32_t width;
 	uint32_t *data = (uint32_t *)image->data;
 	int ret;
 
@@ -93,9 +92,6 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	if (ret)
 		return ret;
 
-	width = ALIGN(image->width, 8);
-	dsize = ALIGN(width * image->height, 32) >> 5;
-
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
 		fg = ((uint32_t *) info->pseudo_palette)[image->fg_color];
@@ -111,10 +107,11 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 			 ((image->dx + image->width) & 0xffff));
 	OUT_RING(chan, bg);
 	OUT_RING(chan, fg);
-	OUT_RING(chan, (image->height << 16) | width);
+	OUT_RING(chan, (image->height << 16) | image->width);
 	OUT_RING(chan, (image->height << 16) | image->width);
 	OUT_RING(chan, (image->dy << 16) | (image->dx & 0xffff));
 
+	dsize = ALIGN(image->width * image->height, 32) >> 5;
 	while (dsize) {
 		int iter_len = dsize > 128 ? 128 : dsize;
 
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 7a7788212df7..7d0edcbcfca7 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -297,6 +297,8 @@ nv50_core_create(struct nvif_device *device, struct nvif_object *disp,
 		.pushbuf = 0xb0007d00,
 	};
 	static const s32 oclass[] = {
+		GP104_DISP_CORE_CHANNEL_DMA,
+		GP100_DISP_CORE_CHANNEL_DMA,
 		GM200_DISP_CORE_CHANNEL_DMA,
 		GM107_DISP_CORE_CHANNEL_DMA,
 		GK110_DISP_CORE_CHANNEL_DMA,
diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c
index 33d9ee0fac40..1aeb698e9707 100644
--- a/drivers/gpu/drm/nouveau/nv50_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c
@@ -95,7 +95,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	struct nouveau_fbdev *nfbdev = info->par;
 	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
 	struct nouveau_channel *chan = drm->channel;
-	uint32_t width, dwords, *data = (uint32_t *)image->data;
+	uint32_t dwords, *data = (uint32_t *)image->data;
 	uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel));
 	uint32_t *palette = info->pseudo_palette;
 	int ret;
@@ -107,9 +107,6 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	if (ret)
 		return ret;
 
-	width = ALIGN(image->width, 32);
-	dwords = (width * image->height) >> 5;
-
 	BEGIN_NV04(chan, NvSub2D, 0x0814, 2);
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
@@ -128,6 +125,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	OUT_RING(chan, 0);
 	OUT_RING(chan, image->dy);
 
+	dwords = ALIGN(image->width * image->height, 32) >> 5;
 	while (dwords) {
 		int push = dwords > 2047 ? 2047 : dwords;
 
diff --git a/drivers/gpu/drm/nouveau/nvc0_fbcon.c b/drivers/gpu/drm/nouveau/nvc0_fbcon.c
index a0913359ac05..839f4c8c1805 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fbcon.c
@@ -95,7 +95,7 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	struct nouveau_fbdev *nfbdev = info->par;
 	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
 	struct nouveau_channel *chan = drm->channel;
-	uint32_t width, dwords, *data = (uint32_t *)image->data;
+	uint32_t dwords, *data = (uint32_t *)image->data;
 	uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel));
 	uint32_t *palette = info->pseudo_palette;
 	int ret;
@@ -107,9 +107,6 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	if (ret)
 		return ret;
 
-	width = ALIGN(image->width, 32);
-	dwords = (width * image->height) >> 5;
-
 	BEGIN_NVC0(chan, NvSub2D, 0x0814, 2);
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
@@ -128,6 +125,7 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	OUT_RING  (chan, 0);
 	OUT_RING  (chan, image->dy);
 
+	dwords = ALIGN(image->width * image->height, 32) >> 5;
 	while (dwords) {
 		int push = dwords > 2047 ? 2047 : dwords;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
index b18557858f19..19044aba265e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
@@ -57,6 +57,9 @@ nvkm_subdev_name[NVKM_SUBDEV_NR] = {
 	[NVKM_ENGINE_CE0     ] = "ce0",
 	[NVKM_ENGINE_CE1     ] = "ce1",
 	[NVKM_ENGINE_CE2     ] = "ce2",
+	[NVKM_ENGINE_CE3     ] = "ce3",
+	[NVKM_ENGINE_CE4     ] = "ce4",
+	[NVKM_ENGINE_CE5     ] = "ce5",
 	[NVKM_ENGINE_CIPHER  ] = "cipher",
 	[NVKM_ENGINE_DISP    ] = "disp",
 	[NVKM_ENGINE_DMAOBJ  ] = "dma",
@@ -71,6 +74,7 @@ nvkm_subdev_name[NVKM_SUBDEV_NR] = {
 	[NVKM_ENGINE_MSVLD   ] = "msvld",
 	[NVKM_ENGINE_NVENC0  ] = "nvenc0",
 	[NVKM_ENGINE_NVENC1  ] = "nvenc1",
+	[NVKM_ENGINE_NVENC2  ] = "nvenc2",
 	[NVKM_ENGINE_NVDEC   ] = "nvdec",
 	[NVKM_ENGINE_PM      ] = "pm",
 	[NVKM_ENGINE_SEC     ] = "sec",
@@ -105,7 +109,7 @@ nvkm_subdev_fini(struct nvkm_subdev *subdev, bool suspend)
 		}
 	}
 
-	nvkm_mc_reset(device->mc, subdev->index);
+	nvkm_mc_reset(device, subdev->index);
 
 	time = ktime_to_us(ktime_get()) - time;
 	nvkm_trace(subdev, "%s completed in %lldus\n", action, time);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
index 9c19d59b47df..a4458a8eb30a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
@@ -3,3 +3,5 @@ nvkm-y += nvkm/engine/ce/gf100.o
 nvkm-y += nvkm/engine/ce/gk104.o
 nvkm-y += nvkm/engine/ce/gm107.o
 nvkm-y += nvkm/engine/ce/gm200.o
+nvkm-y += nvkm/engine/ce/gp100.o
+nvkm-y += nvkm/engine/ce/gp104.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp100.c
new file mode 100644
index 000000000000..c7710456bc30
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp100.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "priv.h"
+#include <core/enum.h>
+
+#include <nvif/class.h>
+
+static const struct nvkm_enum
+gp100_ce_launcherr_report[] = {
+	{ 0x0, "NO_ERR" },
+	{ 0x1, "2D_LAYER_EXCEEDS_DEPTH" },
+	{ 0x2, "INVALID_ALIGNMENT" },
+	{ 0x3, "MEM2MEM_RECT_OUT_OF_BOUNDS" },
+	{ 0x4, "SRC_LINE_EXCEEDS_PITCH" },
+	{ 0x5, "SRC_LINE_EXCEEDS_NEG_PITCH" },
+	{ 0x6, "DST_LINE_EXCEEDS_PITCH" },
+	{ 0x7, "DST_LINE_EXCEEDS_NEG_PITCH" },
+	{ 0x8, "BAD_SRC_PIXEL_COMP_REF" },
+	{ 0x9, "INVALID_VALUE" },
+	{ 0xa, "UNUSED_FIELD" },
+	{ 0xb, "INVALID_OPERATION" },
+	{ 0xc, "NO_RESOURCES" },
+	{ 0xd, "INVALID_CONFIG" },
+	{}
+};
+
+static void
+gp100_ce_intr_launcherr(struct nvkm_engine *ce, const u32 base)
+{
+	struct nvkm_subdev *subdev = &ce->subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0x104418 + base);
+	const struct nvkm_enum *en =
+		nvkm_enum_find(gp100_ce_launcherr_report, stat & 0x0000000f);
+	nvkm_warn(subdev, "LAUNCHERR %08x [%s]\n", stat, en ? en->name : "");
+}
+
+void
+gp100_ce_intr(struct nvkm_engine *ce)
+{
+	const u32 base = (ce->subdev.index - NVKM_ENGINE_CE0) * 0x80;
+	struct nvkm_subdev *subdev = &ce->subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 mask = nvkm_rd32(device, 0x10440c + base);
+	u32 intr = nvkm_rd32(device, 0x104410 + base) & mask;
+	if (intr & 0x00000001) { //XXX: guess
+		nvkm_warn(subdev, "BLOCKPIPE\n");
+		nvkm_wr32(device, 0x104410 + base, 0x00000001);
+		intr &= ~0x00000001;
+	}
+	if (intr & 0x00000002) { //XXX: guess
+		nvkm_warn(subdev, "NONBLOCKPIPE\n");
+		nvkm_wr32(device, 0x104410 + base, 0x00000002);
+		intr &= ~0x00000002;
+	}
+	if (intr & 0x00000004) {
+		gp100_ce_intr_launcherr(ce, base);
+		nvkm_wr32(device, 0x104410 + base, 0x00000004);
+		intr &= ~0x00000004;
+	}
+	if (intr) {
+		nvkm_warn(subdev, "intr %08x\n", intr);
+		nvkm_wr32(device, 0x104410 + base, intr);
+	}
+}
+
+static const struct nvkm_engine_func
+gp100_ce = {
+	.intr = gp100_ce_intr,
+	.sclass = {
+		{ -1, -1, PASCAL_DMA_COPY_A },
+		{}
+	}
+};
+
+int
+gp100_ce_new(struct nvkm_device *device, int index,
+	     struct nvkm_engine **pengine)
+{
+	return nvkm_engine_new_(&gp100_ce, device, index, true, pengine);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp104.c
new file mode 100644
index 000000000000..20e019788a53
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp104.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "priv.h"
+#include <core/enum.h>
+
+#include <nvif/class.h>
+
+static const struct nvkm_engine_func
+gp104_ce = {
+	.intr = gp100_ce_intr,
+	.sclass = {
+		{ -1, -1, PASCAL_DMA_COPY_B },
+		{ -1, -1, PASCAL_DMA_COPY_A },
+		{}
+	}
+};
+
+int
+gp104_ce_new(struct nvkm_device *device, int index,
+	     struct nvkm_engine **pengine)
+{
+	return nvkm_engine_new_(&gp104_ce, device, index, true, pengine);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
index e2fa8b161943..2dce405976ad 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
@@ -4,4 +4,5 @@
 
 void gt215_ce_intr(struct nvkm_falcon *, struct nvkm_fifo_chan *);
 void gk104_ce_intr(struct nvkm_engine *);
+void gp100_ce_intr(struct nvkm_engine *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 4572debcb0c9..7218a067a6c5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2148,6 +2148,67 @@ nv12b_chipset = {
 	.sw = gf100_sw_new,
 };
 
+static const struct nvkm_device_chip
+nv130_chipset = {
+	.name = "GP100",
+	.bar = gf100_bar_new,
+	.bios = nvkm_bios_new,
+	.bus = gf100_bus_new,
+	.devinit = gm200_devinit_new,
+	.fb = gp100_fb_new,
+	.fuse = gm107_fuse_new,
+	.gpio = gk104_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.ltc = gp100_ltc_new,
+	.mc = gp100_mc_new,
+	.mmu = gf100_mmu_new,
+	.secboot = gm200_secboot_new,
+	.pci = gp100_pci_new,
+	.timer = gk20a_timer_new,
+	.top = gk104_top_new,
+	.ce[0] = gp100_ce_new,
+	.ce[1] = gp100_ce_new,
+	.ce[2] = gp100_ce_new,
+	.ce[3] = gp100_ce_new,
+	.ce[4] = gp100_ce_new,
+	.ce[5] = gp100_ce_new,
+	.dma = gf119_dma_new,
+	.disp = gp100_disp_new,
+	.fifo = gp100_fifo_new,
+	.gr = gp100_gr_new,
+	.sw = gf100_sw_new,
+};
+
+static const struct nvkm_device_chip
+nv134_chipset = {
+	.name = "GP104",
+	.bar = gf100_bar_new,
+	.bios = nvkm_bios_new,
+	.bus = gf100_bus_new,
+	.devinit = gm200_devinit_new,
+	.fb = gp104_fb_new,
+	.fuse = gm107_fuse_new,
+	.gpio = gk104_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.ltc = gp100_ltc_new,
+	.mc = gp100_mc_new,
+	.mmu = gf100_mmu_new,
+	.pci = gp100_pci_new,
+	.timer = gk20a_timer_new,
+	.top = gk104_top_new,
+	.ce[0] = gp104_ce_new,
+	.ce[1] = gp104_ce_new,
+	.ce[2] = gp104_ce_new,
+	.ce[3] = gp104_ce_new,
+	.disp = gp104_disp_new,
+	.dma = gf119_dma_new,
+	.fifo = gp100_fifo_new,
+};
+
 static int
 nvkm_device_event_ctor(struct nvkm_object *object, void *data, u32 size,
 		       struct nvkm_notify *notify)
@@ -2221,6 +2282,9 @@ nvkm_device_engine(struct nvkm_device *device, int index)
 	_(CE0    , device->ce[0]   ,  device->ce[0]);
 	_(CE1    , device->ce[1]   ,  device->ce[1]);
 	_(CE2    , device->ce[2]   ,  device->ce[2]);
+	_(CE3    , device->ce[3]   ,  device->ce[3]);
+	_(CE4    , device->ce[4]   ,  device->ce[4]);
+	_(CE5    , device->ce[5]   ,  device->ce[5]);
 	_(CIPHER , device->cipher  ,  device->cipher);
 	_(DISP   , device->disp    , &device->disp->engine);
 	_(DMAOBJ , device->dma     , &device->dma->engine);
@@ -2235,6 +2299,7 @@ nvkm_device_engine(struct nvkm_device *device, int index)
 	_(MSVLD  , device->msvld   ,  device->msvld);
 	_(NVENC0 , device->nvenc[0],  device->nvenc[0]);
 	_(NVENC1 , device->nvenc[1],  device->nvenc[1]);
+	_(NVENC2 , device->nvenc[2],  device->nvenc[2]);
 	_(NVDEC  , device->nvdec   ,  device->nvdec);
 	_(PM     , device->pm      , &device->pm->engine);
 	_(SEC    , device->sec     ,  device->sec);
@@ -2492,6 +2557,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 			case 0x100: device->card_type = NV_E0; break;
 			case 0x110:
 			case 0x120: device->card_type = GM100; break;
+			case 0x130: device->card_type = GP100; break;
 			default:
 				break;
 			}
@@ -2576,6 +2642,8 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		case 0x124: device->chip = &nv124_chipset; break;
 		case 0x126: device->chip = &nv126_chipset; break;
 		case 0x12b: device->chip = &nv12b_chipset; break;
+		case 0x130: device->chip = &nv130_chipset; break;
+		case 0x134: device->chip = &nv134_chipset; break;
 		default:
 			nvdev_error(device, "unknown chipset (%08x)\n", boot0);
 			goto done;
@@ -2659,6 +2727,9 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		_(NVKM_ENGINE_CE0     ,    ce[0]);
 		_(NVKM_ENGINE_CE1     ,    ce[1]);
 		_(NVKM_ENGINE_CE2     ,    ce[2]);
+		_(NVKM_ENGINE_CE3     ,    ce[3]);
+		_(NVKM_ENGINE_CE4     ,    ce[4]);
+		_(NVKM_ENGINE_CE5     ,    ce[5]);
 		_(NVKM_ENGINE_CIPHER  ,   cipher);
 		_(NVKM_ENGINE_DISP    ,     disp);
 		_(NVKM_ENGINE_DMAOBJ  ,      dma);
@@ -2673,6 +2744,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		_(NVKM_ENGINE_MSVLD   ,    msvld);
 		_(NVKM_ENGINE_NVENC0  , nvenc[0]);
 		_(NVKM_ENGINE_NVENC1  , nvenc[1]);
+		_(NVKM_ENGINE_NVENC2  , nvenc[2]);
 		_(NVKM_ENGINE_NVDEC   ,    nvdec);
 		_(NVKM_ENGINE_PM      ,       pm);
 		_(NVKM_ENGINE_SEC     ,      sec);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index 18fab3973ce5..b1b693219db3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -1614,7 +1614,6 @@ nvkm_device_pci_func = {
 	.fini = nvkm_device_pci_fini,
 	.resource_addr = nvkm_device_pci_resource_addr,
 	.resource_size = nvkm_device_pci_resource_size,
-	.cpu_coherent = !IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_ARM64),
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index ec12efb4689a..939682f18788 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -191,13 +191,11 @@ static irqreturn_t
 nvkm_device_tegra_intr(int irq, void *arg)
 {
 	struct nvkm_device_tegra *tdev = arg;
-	struct nvkm_mc *mc = tdev->device.mc;
+	struct nvkm_device *device = &tdev->device;
 	bool handled = false;
-	if (likely(mc)) {
-		nvkm_mc_intr_unarm(mc);
-		nvkm_mc_intr(mc, &handled);
-		nvkm_mc_intr_rearm(mc);
-	}
+	nvkm_mc_intr_unarm(device);
+	nvkm_mc_intr(device, &handled);
+	nvkm_mc_intr_rearm(device);
 	return handled ? IRQ_HANDLED : IRQ_NONE;
 }
 
@@ -247,7 +245,6 @@ nvkm_device_tegra_func = {
 	.fini = nvkm_device_tegra_fini,
 	.resource_addr = nvkm_device_tegra_resource_addr,
 	.resource_size = nvkm_device_tegra_resource_size,
-	.cpu_coherent = false,
 };
 
 int
@@ -313,6 +310,7 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
 		goto remove;
 
 	tdev->gpu_speedo = tegra_sku_info.gpu_speedo_value;
+	tdev->gpu_speedo_id = tegra_sku_info.gpu_speedo_id;
 	ret = nvkm_device_ctor(&nvkm_device_tegra_func, NULL, &pdev->dev,
 			       NVKM_DEVICE_TEGRA, pdev->id, NULL,
 			       cfg, dbg, detect, mmio, subdev_mask,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
index 137066426ed7..79a8f71cf788 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
@@ -102,6 +102,7 @@ nvkm_udevice_info(struct nvkm_udevice *udev, void *data, u32 size)
 	case NV_C0: args->v0.family = NV_DEVICE_INFO_V0_FERMI; break;
 	case NV_E0: args->v0.family = NV_DEVICE_INFO_V0_KEPLER; break;
 	case GM100: args->v0.family = NV_DEVICE_INFO_V0_MAXWELL; break;
+	case GP100: args->v0.family = NV_DEVICE_INFO_V0_PASCAL; break;
 	default:
 		args->v0.family = 0;
 		break;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
index a74c5dd27dc0..77a52b54a31e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
@@ -10,6 +10,8 @@ nvkm-y += nvkm/engine/disp/gk104.o
 nvkm-y += nvkm/engine/disp/gk110.o
 nvkm-y += nvkm/engine/disp/gm107.o
 nvkm-y += nvkm/engine/disp/gm200.o
+nvkm-y += nvkm/engine/disp/gp100.o
+nvkm-y += nvkm/engine/disp/gp104.o
 
 nvkm-y += nvkm/engine/disp/outp.o
 nvkm-y += nvkm/engine/disp/outpdp.o
@@ -18,6 +20,7 @@ nvkm-y += nvkm/engine/disp/piornv50.o
 nvkm-y += nvkm/engine/disp/sornv50.o
 nvkm-y += nvkm/engine/disp/sorg94.o
 nvkm-y += nvkm/engine/disp/sorgf119.o
+nvkm-y += nvkm/engine/disp/sorgm107.o
 nvkm-y += nvkm/engine/disp/sorgm200.o
 nvkm-y += nvkm/engine/disp/dport.o
 
@@ -44,12 +47,15 @@ nvkm-y += nvkm/engine/disp/rootgk104.o
 nvkm-y += nvkm/engine/disp/rootgk110.o
 nvkm-y += nvkm/engine/disp/rootgm107.o
 nvkm-y += nvkm/engine/disp/rootgm200.o
+nvkm-y += nvkm/engine/disp/rootgp100.o
+nvkm-y += nvkm/engine/disp/rootgp104.o
 
 nvkm-y += nvkm/engine/disp/channv50.o
 nvkm-y += nvkm/engine/disp/changf119.o
 
 nvkm-y += nvkm/engine/disp/dmacnv50.o
 nvkm-y += nvkm/engine/disp/dmacgf119.o
+nvkm-y += nvkm/engine/disp/dmacgp104.o
 
 nvkm-y += nvkm/engine/disp/basenv50.o
 nvkm-y += nvkm/engine/disp/baseg84.o
@@ -58,6 +64,7 @@ nvkm-y += nvkm/engine/disp/basegt215.o
 nvkm-y += nvkm/engine/disp/basegf119.o
 nvkm-y += nvkm/engine/disp/basegk104.o
 nvkm-y += nvkm/engine/disp/basegk110.o
+nvkm-y += nvkm/engine/disp/basegp104.o
 
 nvkm-y += nvkm/engine/disp/corenv50.o
 nvkm-y += nvkm/engine/disp/coreg84.o
@@ -69,6 +76,8 @@ nvkm-y += nvkm/engine/disp/coregk104.o
 nvkm-y += nvkm/engine/disp/coregk110.o
 nvkm-y += nvkm/engine/disp/coregm107.o
 nvkm-y += nvkm/engine/disp/coregm200.o
+nvkm-y += nvkm/engine/disp/coregp100.o
+nvkm-y += nvkm/engine/disp/coregp104.o
 
 nvkm-y += nvkm/engine/disp/ovlynv50.o
 nvkm-y += nvkm/engine/disp/ovlyg84.o
@@ -76,6 +85,7 @@ nvkm-y += nvkm/engine/disp/ovlygt200.o
 nvkm-y += nvkm/engine/disp/ovlygt215.o
 nvkm-y += nvkm/engine/disp/ovlygf119.o
 nvkm-y += nvkm/engine/disp/ovlygk104.o
+nvkm-y += nvkm/engine/disp/ovlygp104.o
 
 nvkm-y += nvkm/engine/disp/piocnv50.o
 nvkm-y += nvkm/engine/disp/piocgf119.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp104.c
new file mode 100644
index 000000000000..51688e37c54e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp104.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "dmacnv50.h"
+#include "rootnv50.h"
+
+#include <nvif/class.h>
+
+const struct nv50_disp_dmac_oclass
+gp104_disp_base_oclass = {
+	.base.oclass = GK110_DISP_BASE_CHANNEL_DMA,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = nv50_disp_base_new,
+	.func = &gp104_disp_dmac_func,
+	.mthd = &gf119_disp_base_chan_mthd,
+	.chid = 1,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h
index aee374884c96..f5f683d9fd20 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h
@@ -85,6 +85,7 @@ extern const struct nv50_disp_mthd_list gf119_disp_core_mthd_pior;
 extern const struct nv50_disp_chan_mthd gf119_disp_base_chan_mthd;
 
 extern const struct nv50_disp_chan_mthd gk104_disp_core_chan_mthd;
+extern const struct nv50_disp_chan_mthd gk104_disp_ovly_chan_mthd;
 
 struct nv50_disp_pioc_oclass {
 	int (*ctor)(const struct nv50_disp_chan_func *,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c
index 6b1dc703dac7..21fbf89b6319 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c
@@ -171,7 +171,7 @@ gf119_disp_core_chan_mthd = {
 	}
 };
 
-static void
+void
 gf119_disp_core_fini(struct nv50_disp_dmac *chan)
 {
 	struct nv50_disp *disp = chan->base.root->disp;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp100.c
new file mode 100644
index 000000000000..d5dff6619d4d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp100.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "dmacnv50.h"
+#include "rootnv50.h"
+
+#include <nvif/class.h>
+
+const struct nv50_disp_dmac_oclass
+gp100_disp_core_oclass = {
+	.base.oclass = GP100_DISP_CORE_CHANNEL_DMA,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = nv50_disp_core_new,
+	.func = &gf119_disp_core_func,
+	.mthd = &gk104_disp_core_chan_mthd,
+	.chid = 0,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp104.c
new file mode 100644
index 000000000000..6922f4007b61
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp104.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "dmacnv50.h"
+#include "rootnv50.h"
+
+#include <subdev/timer.h>
+
+#include <nvif/class.h>
+
+static int
+gp104_disp_core_init(struct nv50_disp_dmac *chan)
+{
+	struct nv50_disp *disp = chan->base.root->disp;
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+
+	/* enable error reporting */
+	nvkm_mask(device, 0x6100a0, 0x00000001, 0x00000001);
+
+	/* initialise channel for dma command submission */
+	nvkm_wr32(device, 0x611494, chan->push);
+	nvkm_wr32(device, 0x611498, 0x00010000);
+	nvkm_wr32(device, 0x61149c, 0x00000001);
+	nvkm_mask(device, 0x610490, 0x00000010, 0x00000010);
+	nvkm_wr32(device, 0x640000, 0x00000000);
+	nvkm_wr32(device, 0x610490, 0x01000013);
+
+	/* wait for it to go inactive */
+	if (nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x610490) & 0x80000000))
+			break;
+	) < 0) {
+		nvkm_error(subdev, "core init: %08x\n",
+			   nvkm_rd32(device, 0x610490));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+const struct nv50_disp_dmac_func
+gp104_disp_core_func = {
+	.init = gp104_disp_core_init,
+	.fini = gf119_disp_core_fini,
+	.bind = gf119_disp_dmac_bind,
+};
+
+const struct nv50_disp_dmac_oclass
+gp104_disp_core_oclass = {
+	.base.oclass = GP104_DISP_CORE_CHANNEL_DMA,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = nv50_disp_core_new,
+	.func = &gp104_disp_core_func,
+	.mthd = &gk104_disp_core_chan_mthd,
+	.chid = 0,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c
index 876b14549a58..a57f7cef307a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c
@@ -36,7 +36,7 @@ gf119_disp_dmac_bind(struct nv50_disp_dmac *chan,
 				 chan->base.chid << 27 | 0x00000001);
 }
 
-static void
+void
 gf119_disp_dmac_fini(struct nv50_disp_dmac *chan)
 {
 	struct nv50_disp *disp = chan->base.root->disp;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp104.c
new file mode 100644
index 000000000000..ad24c2c57696
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp104.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "dmacnv50.h"
+#include "rootnv50.h"
+
+#include <subdev/timer.h>
+
+static int
+gp104_disp_dmac_init(struct nv50_disp_dmac *chan)
+{
+	struct nv50_disp *disp = chan->base.root->disp;
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	int chid = chan->base.chid;
+
+	/* enable error reporting */
+	nvkm_mask(device, 0x6100a0, 0x00000001 << chid, 0x00000001 << chid);
+
+	/* initialise channel for dma command submission */
+	nvkm_wr32(device, 0x611494 + (chid * 0x0010), chan->push);
+	nvkm_wr32(device, 0x611498 + (chid * 0x0010), 0x00010000);
+	nvkm_wr32(device, 0x61149c + (chid * 0x0010), 0x00000001);
+	nvkm_mask(device, 0x610490 + (chid * 0x0010), 0x00000010, 0x00000010);
+	nvkm_wr32(device, 0x640000 + (chid * 0x1000), 0x00000000);
+	nvkm_wr32(device, 0x610490 + (chid * 0x0010), 0x00000013);
+
+	/* wait for it to go inactive */
+	if (nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x610490 + (chid * 0x10)) & 0x80000000))
+			break;
+	) < 0) {
+		nvkm_error(subdev, "ch %d init: %08x\n", chid,
+			   nvkm_rd32(device, 0x610490 + (chid * 0x10)));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+const struct nv50_disp_dmac_func
+gp104_disp_dmac_func = {
+	.init = gp104_disp_dmac_init,
+	.fini = gf119_disp_dmac_fini,
+	.bind = gf119_disp_dmac_bind,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
index fc84eb8b5c45..43ac05857853 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
@@ -25,8 +25,12 @@ int nv50_disp_dmac_bind(struct nv50_disp_dmac *, struct nvkm_object *, u32);
 extern const struct nv50_disp_dmac_func nv50_disp_core_func;
 
 extern const struct nv50_disp_dmac_func gf119_disp_dmac_func;
+void gf119_disp_dmac_fini(struct nv50_disp_dmac *);
 int gf119_disp_dmac_bind(struct nv50_disp_dmac *, struct nvkm_object *, u32);
 extern const struct nv50_disp_dmac_func gf119_disp_core_func;
+void gf119_disp_core_fini(struct nv50_disp_dmac *);
+
+extern const struct nv50_disp_dmac_func gp104_disp_dmac_func;
 
 struct nv50_disp_dmac_oclass {
 	int (*ctor)(const struct nv50_disp_dmac_func *,
@@ -88,4 +92,10 @@ extern const struct nv50_disp_dmac_oclass gk110_disp_base_oclass;
 extern const struct nv50_disp_dmac_oclass gm107_disp_core_oclass;
 
 extern const struct nv50_disp_dmac_oclass gm200_disp_core_oclass;
+
+extern const struct nv50_disp_dmac_oclass gp100_disp_core_oclass;
+
+extern const struct nv50_disp_dmac_oclass gp104_disp_core_oclass;
+extern const struct nv50_disp_dmac_oclass gp104_disp_base_oclass;
+extern const struct nv50_disp_dmac_oclass gp104_disp_ovly_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
index f0314664349c..29e84b241cca 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
@@ -79,8 +79,7 @@ exec_lookup(struct nv50_disp *disp, int head, int or, u32 ctrl,
 	list_for_each_entry(outp, &disp->base.outp, head) {
 		if ((outp->info.hasht & 0xff) == type &&
 		    (outp->info.hashm & mask) == mask) {
-			*data = nvbios_outp_match(bios, outp->info.hasht,
-							outp->info.hashm,
+			*data = nvbios_outp_match(bios, outp->info.hasht, mask,
 						  ver, hdr, cnt, len, info);
 			if (!*data)
 				return NULL;
@@ -155,25 +154,21 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf)
 	if (!outp)
 		return NULL;
 
+	*conf = (ctrl & 0x00000f00) >> 8;
 	switch (outp->info.type) {
 	case DCB_OUTPUT_TMDS:
-		*conf = (ctrl & 0x00000f00) >> 8;
 		if (*conf == 5)
 			*conf |= 0x0100;
 		break;
 	case DCB_OUTPUT_LVDS:
-		*conf = disp->sor.lvdsconf;
+		*conf |= disp->sor.lvdsconf;
 		break;
-	case DCB_OUTPUT_DP:
-		*conf = (ctrl & 0x00000f00) >> 8;
-		break;
-	case DCB_OUTPUT_ANALOG:
 	default:
-		*conf = 0x00ff;
 		break;
 	}
 
-	data = nvbios_ocfg_match(bios, data, *conf, &ver, &hdr, &cnt, &len, &info2);
+	data = nvbios_ocfg_match(bios, data, *conf & 0xff, *conf >> 8,
+				 &ver, &hdr, &cnt, &len, &info2);
 	if (data && id < 0xff) {
 		data = nvbios_oclk_match(bios, info2.clkcmp[id], pclk);
 		if (data) {
@@ -418,7 +413,7 @@ gf119_disp_intr_supervisor(struct work_struct *work)
 	nvkm_wr32(device, 0x6101d0, 0x80000000);
 }
 
-static void
+void
 gf119_disp_intr_error(struct nv50_disp *disp, int chid)
 {
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
@@ -466,7 +461,7 @@ gf119_disp_intr(struct nv50_disp *disp)
 		u32 stat = nvkm_rd32(device, 0x61009c);
 		int chid = ffs(stat) - 1;
 		if (chid >= 0)
-			gf119_disp_intr_error(disp, chid);
+			disp->func->intr_error(disp, chid);
 		intr &= ~0x00000002;
 	}
 
@@ -510,6 +505,7 @@ gf119_disp_new_(const struct nv50_disp_func *func, struct nvkm_device *device,
 static const struct nv50_disp_func
 gf119_disp = {
 	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
 	.root = &gf119_disp_root_oclass,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
index a86384b8e388..37f145cf30d7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
@@ -27,6 +27,7 @@
 static const struct nv50_disp_func
 gk104_disp = {
 	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
 	.root = &gk104_disp_root_oclass,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c
index 0d574c7e594a..e14ac946608c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c
@@ -27,6 +27,7 @@
 static const struct nv50_disp_func
 gk110_disp = {
 	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
 	.root = &gk110_disp_root_oclass,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
index b6944142d616..2f2437cc5891 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
@@ -27,6 +27,7 @@
 static const struct nv50_disp_func
 gm107_disp = {
 	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
 	.root = &gm107_disp_root_oclass,
@@ -36,7 +37,7 @@ gm107_disp = {
 	.outp.internal.crt = nv50_dac_output_new,
 	.outp.internal.tmds = nv50_sor_output_new,
 	.outp.internal.lvds = nv50_sor_output_new,
-	.outp.internal.dp = gf119_sor_dp_new,
+	.outp.internal.dp = gm107_sor_dp_new,
 	.dac.nr = 3,
 	.dac.power = nv50_dac_power,
 	.dac.sense = nv50_dac_sense,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
index 67eec8620719..9f368d4ee61e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
@@ -27,6 +27,7 @@
 static const struct nv50_disp_func
 gm200_disp = {
 	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
 	.root = &gm200_disp_root_oclass,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c
new file mode 100644
index 000000000000..4f81bf31435e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "nv50.h"
+#include "rootnv50.h"
+
+static const struct nv50_disp_func
+gp100_disp = {
+	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
+	.uevent = &gf119_disp_chan_uevent,
+	.super = gf119_disp_intr_supervisor,
+	.root = &gp100_disp_root_oclass,
+	.head.vblank_init = gf119_disp_vblank_init,
+	.head.vblank_fini = gf119_disp_vblank_fini,
+	.head.scanoutpos = gf119_disp_root_scanoutpos,
+	.outp.internal.crt = nv50_dac_output_new,
+	.outp.internal.tmds = nv50_sor_output_new,
+	.outp.internal.lvds = nv50_sor_output_new,
+	.outp.internal.dp = gm200_sor_dp_new,
+	.dac.nr = 3,
+	.dac.power = nv50_dac_power,
+	.dac.sense = nv50_dac_sense,
+	.sor.nr = 4,
+	.sor.power = nv50_sor_power,
+	.sor.hda_eld = gf119_hda_eld,
+	.sor.hdmi = gk104_hdmi_ctrl,
+	.sor.magic = gm200_sor_magic,
+};
+
+int
+gp100_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
+{
+	return gf119_disp_new_(&gp100_disp, device, index, pdisp);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp104.c
new file mode 100644
index 000000000000..3bf3380336e4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp104.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "nv50.h"
+#include "rootnv50.h"
+
+static void
+gp104_disp_intr_error(struct nv50_disp *disp, int chid)
+{
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 mthd = nvkm_rd32(device, 0x6111f0 + (chid * 12));
+	u32 data = nvkm_rd32(device, 0x6111f4 + (chid * 12));
+	u32 unkn = nvkm_rd32(device, 0x6111f8 + (chid * 12));
+
+	nvkm_error(subdev, "chid %d mthd %04x data %08x %08x %08x\n",
+		   chid, (mthd & 0x0000ffc), data, mthd, unkn);
+
+	if (chid < ARRAY_SIZE(disp->chan)) {
+		switch (mthd & 0xffc) {
+		case 0x0080:
+			nv50_disp_chan_mthd(disp->chan[chid], NV_DBG_ERROR);
+			break;
+		default:
+			break;
+		}
+	}
+
+	nvkm_wr32(device, 0x61009c, (1 << chid));
+	nvkm_wr32(device, 0x6111f0 + (chid * 12), 0x90000000);
+}
+
+static const struct nv50_disp_func
+gp104_disp = {
+	.intr = gf119_disp_intr,
+	.intr_error = gp104_disp_intr_error,
+	.uevent = &gf119_disp_chan_uevent,
+	.super = gf119_disp_intr_supervisor,
+	.root = &gp104_disp_root_oclass,
+	.head.vblank_init = gf119_disp_vblank_init,
+	.head.vblank_fini = gf119_disp_vblank_fini,
+	.head.scanoutpos = gf119_disp_root_scanoutpos,
+	.outp.internal.crt = nv50_dac_output_new,
+	.outp.internal.tmds = nv50_sor_output_new,
+	.outp.internal.lvds = nv50_sor_output_new,
+	.outp.internal.dp = gm200_sor_dp_new,
+	.dac.nr = 3,
+	.dac.power = nv50_dac_power,
+	.dac.sense = nv50_dac_sense,
+	.sor.nr = 4,
+	.sor.power = nv50_sor_power,
+	.sor.hda_eld = gf119_hda_eld,
+	.sor.hdmi = gk104_hdmi_ctrl,
+	.sor.magic = gm200_sor_magic,
+};
+
+int
+gp104_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
+{
+	return gf119_disp_new_(&gp104_disp, device, index, pdisp);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
index 4226d2153b9c..fbb8c7dc18fd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
@@ -32,6 +32,7 @@
 #include <subdev/bios/init.h>
 #include <subdev/bios/pll.h>
 #include <subdev/devinit.h>
+#include <subdev/timer.h>
 
 static const struct nvkm_disp_oclass *
 nv50_disp_root_(struct nvkm_disp *base)
@@ -269,8 +270,7 @@ exec_lookup(struct nv50_disp *disp, int head, int or, u32 ctrl,
 	list_for_each_entry(outp, &disp->base.outp, head) {
 		if ((outp->info.hasht & 0xff) == type &&
 		    (outp->info.hashm & mask) == mask) {
-			*data = nvbios_outp_match(bios, outp->info.hasht,
-							outp->info.hashm,
+			*data = nvbios_outp_match(bios, outp->info.hasht, mask,
 						  ver, hdr, cnt, len, info);
 			if (!*data)
 				return NULL;
@@ -387,22 +387,17 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf)
 	if (!outp)
 		return NULL;
 
+	*conf = (ctrl & 0x00000f00) >> 8;
 	if (outp->info.location == 0) {
 		switch (outp->info.type) {
 		case DCB_OUTPUT_TMDS:
-			*conf = (ctrl & 0x00000f00) >> 8;
 			if (*conf == 5)
 				*conf |= 0x0100;
 			break;
 		case DCB_OUTPUT_LVDS:
-			*conf = disp->sor.lvdsconf;
+			*conf |= disp->sor.lvdsconf;
 			break;
-		case DCB_OUTPUT_DP:
-			*conf = (ctrl & 0x00000f00) >> 8;
-			break;
-		case DCB_OUTPUT_ANALOG:
 		default:
-			*conf = 0x00ff;
 			break;
 		}
 	} else {
@@ -410,7 +405,8 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf)
 		pclk = pclk / 2;
 	}
 
-	data = nvbios_ocfg_match(bios, data, *conf, &ver, &hdr, &cnt, &len, &info2);
+	data = nvbios_ocfg_match(bios, data, *conf & 0xff, *conf >> 8,
+				 &ver, &hdr, &cnt, &len, &info2);
 	if (data && id < 0xff) {
 		data = nvbios_oclk_match(bios, info2.clkcmp[id], pclk);
 		if (data) {
@@ -430,6 +426,134 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf)
 	return outp;
 }
 
+static bool
+nv50_disp_dptmds_war(struct nvkm_device *device)
+{
+	switch (device->chipset) {
+	case 0x94:
+	case 0x96:
+	case 0x98:
+	case 0xaa:
+	case 0xac:
+		return true;
+	default:
+		break;
+	}
+	return false;
+}
+
+static bool
+nv50_disp_dptmds_war_needed(struct nv50_disp *disp, struct dcb_output *outp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	const u32 soff = __ffs(outp->or) * 0x800;
+	if (nv50_disp_dptmds_war(device) && outp->type == DCB_OUTPUT_TMDS) {
+		switch (nvkm_rd32(device, 0x614300 + soff) & 0x00030000) {
+		case 0x00000000:
+		case 0x00030000:
+			return true;
+		default:
+			break;
+		}
+	}
+	return false;
+
+}
+
+static void
+nv50_disp_dptmds_war_2(struct nv50_disp *disp, struct dcb_output *outp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	const u32 soff = __ffs(outp->or) * 0x800;
+
+	if (!nv50_disp_dptmds_war_needed(disp, outp))
+		return;
+
+	nvkm_mask(device, 0x00e840, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x614300 + soff, 0x03000000, 0x03000000);
+	nvkm_mask(device, 0x61c10c + soff, 0x00000001, 0x00000001);
+
+	nvkm_mask(device, 0x61c00c + soff, 0x0f000000, 0x00000000);
+	nvkm_mask(device, 0x61c008 + soff, 0xff000000, 0x14000000);
+	nvkm_usec(device, 400, NVKM_DELAY);
+	nvkm_mask(device, 0x61c008 + soff, 0xff000000, 0x00000000);
+	nvkm_mask(device, 0x61c00c + soff, 0x0f000000, 0x01000000);
+
+	if (nvkm_rd32(device, 0x61c004 + soff) & 0x00000001) {
+		u32 seqctl = nvkm_rd32(device, 0x61c030 + soff);
+		u32  pu_pc = seqctl & 0x0000000f;
+		nvkm_wr32(device, 0x61c040 + soff + pu_pc * 4, 0x1f008000);
+	}
+}
+
+static void
+nv50_disp_dptmds_war_3(struct nv50_disp *disp, struct dcb_output *outp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	const u32 soff = __ffs(outp->or) * 0x800;
+	u32 sorpwr;
+
+	if (!nv50_disp_dptmds_war_needed(disp, outp))
+		return;
+
+	sorpwr = nvkm_rd32(device, 0x61c004 + soff);
+	if (sorpwr & 0x00000001) {
+		u32 seqctl = nvkm_rd32(device, 0x61c030 + soff);
+		u32  pd_pc = (seqctl & 0x00000f00) >> 8;
+		u32  pu_pc =  seqctl & 0x0000000f;
+
+		nvkm_wr32(device, 0x61c040 + soff + pd_pc * 4, 0x1f008000);
+
+		nvkm_msec(device, 2000,
+			if (!(nvkm_rd32(device, 0x61c030 + soff) & 0x10000000))
+				break;
+		);
+		nvkm_mask(device, 0x61c004 + soff, 0x80000001, 0x80000000);
+		nvkm_msec(device, 2000,
+			if (!(nvkm_rd32(device, 0x61c030 + soff) & 0x10000000))
+				break;
+		);
+
+		nvkm_wr32(device, 0x61c040 + soff + pd_pc * 4, 0x00002000);
+		nvkm_wr32(device, 0x61c040 + soff + pu_pc * 4, 0x1f000000);
+	}
+
+	nvkm_mask(device, 0x61c10c + soff, 0x00000001, 0x00000000);
+	nvkm_mask(device, 0x614300 + soff, 0x03000000, 0x00000000);
+
+	if (sorpwr & 0x00000001) {
+		nvkm_mask(device, 0x61c004 + soff, 0x80000001, 0x80000001);
+	}
+}
+
+static void
+nv50_disp_update_sppll1(struct nv50_disp *disp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	bool used = false;
+	int sor;
+
+	if (!nv50_disp_dptmds_war(device))
+		return;
+
+	for (sor = 0; sor < disp->func->sor.nr; sor++) {
+		u32 clksor = nvkm_rd32(device, 0x614300 + (sor * 0x800));
+		switch (clksor & 0x03000000) {
+		case 0x02000000:
+		case 0x03000000:
+			used = true;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (used)
+		return;
+
+	nvkm_mask(device, 0x00e840, 0x80000000, 0x00000000);
+}
+
 static void
 nv50_disp_intr_unk10_0(struct nv50_disp *disp, int head)
 {
@@ -683,6 +807,8 @@ nv50_disp_intr_unk20_2(struct nv50_disp *disp, int head)
 
 	nvkm_mask(device, hreg, 0x0000000f, hval);
 	nvkm_mask(device, oreg, mask, oval);
+
+	nv50_disp_dptmds_war_2(disp, &outp->info);
 }
 
 /* If programming a TMDS output on a SOR that can also be configured for
@@ -724,6 +850,7 @@ nv50_disp_intr_unk40_0(struct nv50_disp *disp, int head)
 
 	if (outp->info.location == 0 && outp->info.type == DCB_OUTPUT_TMDS)
 		nv50_disp_intr_unk40_0_tmds(disp, &outp->info);
+	nv50_disp_dptmds_war_3(disp, &outp->info);
 }
 
 void
@@ -771,6 +898,7 @@ nv50_disp_intr_supervisor(struct work_struct *work)
 				continue;
 			nv50_disp_intr_unk40_0(disp, head);
 		}
+		nv50_disp_update_sppll1(disp);
 	}
 
 	nvkm_wr32(device, 0x610030, 0x80000000);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
index aecebd8717e5..1e1de6bfe85a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
@@ -68,6 +68,7 @@ struct nv50_disp_func_outp {
 
 struct nv50_disp_func {
 	void (*intr)(struct nv50_disp *);
+	void (*intr_error)(struct nv50_disp *, int chid);
 
 	const struct nvkm_event_func *uevent;
 	void (*super)(struct work_struct *);
@@ -114,4 +115,5 @@ void gf119_disp_vblank_init(struct nv50_disp *, int);
 void gf119_disp_vblank_fini(struct nv50_disp *, int);
 void gf119_disp_intr(struct nv50_disp *);
 void gf119_disp_intr_supervisor(struct work_struct *);
+void gf119_disp_intr_error(struct nv50_disp *, int);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
index e9067ba4e179..4e983f6d7032 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
@@ -62,7 +62,12 @@ int g94_sor_dp_lnk_pwr(struct nvkm_output_dp *, int);
 int gf119_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
 		     struct nvkm_output **);
 int gf119_sor_dp_lnk_ctl(struct nvkm_output_dp *, int, int, bool);
+int gf119_sor_dp_drv_ctl(struct nvkm_output_dp *, int, int, int, int);
 
-int  gm200_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
-		      struct nvkm_output **);
+int gm107_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
+		     struct nvkm_output **);
+int gm107_sor_dp_pattern(struct nvkm_output_dp *, int);
+
+int gm200_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
+		     struct nvkm_output **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c
index 2e2dc0641ef2..2f0220b39f34 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c
@@ -80,7 +80,7 @@ gk104_disp_ovly_mthd_base = {
 	}
 };
 
-static const struct nv50_disp_chan_mthd
+const struct nv50_disp_chan_mthd
 gk104_disp_ovly_chan_mthd = {
 	.name = "Overlay",
 	.addr = 0x001000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp104.c
new file mode 100644
index 000000000000..97e2dd2d908e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp104.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "dmacnv50.h"
+#include "rootnv50.h"
+
+#include <nvif/class.h>
+
+const struct nv50_disp_dmac_oclass
+gp104_disp_ovly_oclass = {
+	.base.oclass = GK104_DISP_OVERLAY_CONTROL_DMA,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = nv50_disp_ovly_new,
+	.func = &gp104_disp_dmac_func,
+	.mthd = &gk104_disp_ovly_chan_mthd,
+	.chid = 5,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c
new file mode 100644
index 000000000000..ac8fdd728ec6
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "rootnv50.h"
+#include "dmacnv50.h"
+
+#include <nvif/class.h>
+
+static const struct nv50_disp_root_func
+gp100_disp_root = {
+	.init = gf119_disp_root_init,
+	.fini = gf119_disp_root_fini,
+	.dmac = {
+		&gp100_disp_core_oclass,
+		&gk110_disp_base_oclass,
+		&gk104_disp_ovly_oclass,
+	},
+	.pioc = {
+		&gk104_disp_oimm_oclass,
+		&gk104_disp_curs_oclass,
+	},
+};
+
+static int
+gp100_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass,
+		    void *data, u32 size, struct nvkm_object **pobject)
+{
+	return nv50_disp_root_new_(&gp100_disp_root, disp, oclass,
+				   data, size, pobject);
+}
+
+const struct nvkm_disp_oclass
+gp100_disp_root_oclass = {
+	.base.oclass = GP100_DISP,
+	.base.minver = -1,
+	.base.maxver = -1,
+	.ctor = gp100_disp_root_new,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp104.c
new file mode 100644
index 000000000000..8443e04dc626
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp104.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "rootnv50.h"
+#include "dmacnv50.h"
+
+#include <nvif/class.h>
+
+static const struct nv50_disp_root_func
+gp104_disp_root = {
+	.init = gf119_disp_root_init,
+	.fini = gf119_disp_root_fini,
+	.dmac = {
+		&gp104_disp_core_oclass,
+		&gp104_disp_base_oclass,
+		&gp104_disp_ovly_oclass,
+	},
+	.pioc = {
+		&gk104_disp_oimm_oclass,
+		&gk104_disp_curs_oclass,
+	},
+};
+
+static int
+gp104_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass,
+		    void *data, u32 size, struct nvkm_object **pobject)
+{
+	return nv50_disp_root_new_(&gp104_disp_root, disp, oclass,
+				   data, size, pobject);
+}
+
+const struct nvkm_disp_oclass
+gp104_disp_root_oclass = {
+	.base.oclass = GP104_DISP,
+	.base.minver = -1,
+	.base.maxver = -1,
+	.ctor = gp104_disp_root_new,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
index cb449ed8d92c..ad00f1724b72 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
@@ -40,4 +40,6 @@ extern const struct nvkm_disp_oclass gk104_disp_root_oclass;
 extern const struct nvkm_disp_oclass gk110_disp_root_oclass;
 extern const struct nvkm_disp_oclass gm107_disp_root_oclass;
 extern const struct nvkm_disp_oclass gm200_disp_root_oclass;
+extern const struct nvkm_disp_oclass gp100_disp_root_oclass;
+extern const struct nvkm_disp_oclass gp104_disp_root_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
index b4b41b135643..49bd5da194e1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
@@ -40,8 +40,8 @@ static int
 gf119_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
 {
 	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
-	const u32 loff = gf119_sor_loff(outp);
-	nvkm_mask(device, 0x61c110 + loff, 0x0f0f0f0f, 0x01010101 * pattern);
+	const u32 soff = gf119_sor_soff(outp);
+	nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, 0x01010101 * pattern);
 	return 0;
 }
 
@@ -64,7 +64,7 @@ gf119_sor_dp_lnk_ctl(struct nvkm_output_dp *outp, int nr, int bw, bool ef)
 	return 0;
 }
 
-static int
+int
 gf119_sor_dp_drv_ctl(struct nvkm_output_dp *outp,
 		     int ln, int vs, int pe, int pc)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
new file mode 100644
index 000000000000..37790b2617c5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "nv50.h"
+#include "outpdp.h"
+
+int
+gm107_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
+{
+	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
+	const u32 soff = outp->base.or * 0x800;
+	const u32 data = 0x01010101 * pattern;
+	if (outp->base.info.sorconf.link & 1)
+		nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, data);
+	else
+		nvkm_mask(device, 0x61c12c + soff, 0x0f0f0f0f, data);
+	return 0;
+}
+
+static const struct nvkm_output_dp_func
+gm107_sor_dp_func = {
+	.pattern = gm107_sor_dp_pattern,
+	.lnk_pwr = g94_sor_dp_lnk_pwr,
+	.lnk_ctl = gf119_sor_dp_lnk_ctl,
+	.drv_ctl = gf119_sor_dp_drv_ctl,
+};
+
+int
+gm107_sor_dp_new(struct nvkm_disp *disp, int index,
+		 struct dcb_output *dcbE, struct nvkm_output **poutp)
+{
+	return nvkm_output_dp_new_(&gm107_sor_dp_func, disp, index, dcbE, poutp);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
index 2cfbef9c344f..c44fa7ea672a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
@@ -57,19 +57,6 @@ gm200_sor_dp_lane_map(struct nvkm_device *device, u8 lane)
 }
 
 static int
-gm200_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
-{
-	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
-	const u32 soff = gm200_sor_soff(outp);
-	const u32 data = 0x01010101 * pattern;
-	if (outp->base.info.sorconf.link & 1)
-		nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, data);
-	else
-		nvkm_mask(device, 0x61c12c + soff, 0x0f0f0f0f, data);
-	return 0;
-}
-
-static int
 gm200_sor_dp_lnk_pwr(struct nvkm_output_dp *outp, int nr)
 {
 	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
@@ -129,7 +116,7 @@ gm200_sor_dp_drv_ctl(struct nvkm_output_dp *outp,
 
 static const struct nvkm_output_dp_func
 gm200_sor_dp_func = {
-	.pattern = gm200_sor_dp_pattern,
+	.pattern = gm107_sor_dp_pattern,
 	.lnk_pwr = gm200_sor_dp_lnk_pwr,
 	.lnk_ctl = gf119_sor_dp_lnk_ctl,
 	.drv_ctl = gm200_sor_dp_drv_ctl,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
index 65e5d291ecda..98651a43bc12 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
@@ -13,6 +13,7 @@ nvkm-y += nvkm/engine/fifo/gk20a.o
 nvkm-y += nvkm/engine/fifo/gm107.o
 nvkm-y += nvkm/engine/fifo/gm200.o
 nvkm-y += nvkm/engine/fifo/gm20b.o
+nvkm-y += nvkm/engine/fifo/gp100.o
 
 nvkm-y += nvkm/engine/fifo/chan.o
 nvkm-y += nvkm/engine/fifo/channv50.o
@@ -31,3 +32,4 @@ nvkm-y += nvkm/engine/fifo/gpfifogf100.o
 nvkm-y += nvkm/engine/fifo/gpfifogk104.o
 nvkm-y += nvkm/engine/fifo/gpfifogk110.o
 nvkm-y += nvkm/engine/fifo/gpfifogm200.o
+nvkm-y += nvkm/engine/fifo/gpfifogp100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
index e06f4d46f802..230f64e5f731 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
@@ -27,4 +27,5 @@ int gk104_fifo_gpfifo_new(struct nvkm_fifo *, const struct nvkm_oclass *,
 extern const struct nvkm_fifo_chan_oclass gk104_fifo_gpfifo_oclass;
 extern const struct nvkm_fifo_chan_oclass gk110_fifo_gpfifo_oclass;
 extern const struct nvkm_fifo_chan_oclass gm200_fifo_gpfifo_oclass;
+extern const struct nvkm_fifo_chan_oclass gp100_fifo_gpfifo_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index 743f3a189f28..103c0afaaa6d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -329,7 +329,7 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
 	}
 
 	if (eu == NULL) {
-		enum nvkm_devidx engidx = nvkm_top_fault(device->top, unit);
+		enum nvkm_devidx engidx = nvkm_top_fault(device, unit);
 		if (engidx < NVKM_SUBDEV_NR) {
 			const char *src = nvkm_subdev_name[engidx];
 			char *dst = en;
@@ -589,7 +589,6 @@ gk104_fifo_oneinit(struct nvkm_fifo *base)
 	struct gk104_fifo *fifo = gk104_fifo(base);
 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_top *top = device->top;
 	int engn, runl, pbid, ret, i, j;
 	enum nvkm_devidx engidx;
 	u32 *map;
@@ -608,7 +607,7 @@ gk104_fifo_oneinit(struct nvkm_fifo *base)
 
 	/* Determine runlist configuration from topology device info. */
 	i = 0;
-	while ((int)(engidx = nvkm_top_engine(top, i++, &runl, &engn)) >= 0) {
+	while ((int)(engidx = nvkm_top_engine(device, i++, &runl, &engn)) >= 0) {
 		/* Determine which PBDMA handles requests for this engine. */
 		for (j = 0, pbid = -1; j < fifo->pbdma_nr; j++) {
 			if (map[j] & (1 << runl)) {
@@ -617,8 +616,8 @@ gk104_fifo_oneinit(struct nvkm_fifo *base)
 			}
 		}
 
-		nvkm_debug(subdev, "engine %2d: runlist %2d pbdma %2d\n",
-			   engn, runl, pbid);
+		nvkm_debug(subdev, "engine %2d: runlist %2d pbdma %2d (%s)\n",
+			   engn, runl, pbid, nvkm_subdev_name[engidx]);
 
 		fifo->engine[engn].engine = nvkm_device_engine(device, engidx);
 		fifo->engine[engn].runl = runl;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
new file mode 100644
index 000000000000..eff83f7fb705
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "gk104.h"
+#include "changk104.h"
+
+static const struct nvkm_enum
+gp100_fifo_fault_engine[] = {
+	{ 0x01, "DISPLAY" },
+	{ 0x03, "IFB", NULL, NVKM_ENGINE_IFB },
+	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
+	{ 0x05, "BAR2", NULL, NVKM_SUBDEV_INSTMEM },
+	{ 0x06, "HOST0" },
+	{ 0x07, "HOST1" },
+	{ 0x08, "HOST2" },
+	{ 0x09, "HOST3" },
+	{ 0x0a, "HOST4" },
+	{ 0x0b, "HOST5" },
+	{ 0x0c, "HOST6" },
+	{ 0x0d, "HOST7" },
+	{ 0x0e, "HOST8" },
+	{ 0x0f, "HOST9" },
+	{ 0x10, "HOST10" },
+	{ 0x13, "PERF" },
+	{ 0x17, "PMU" },
+	{ 0x18, "PTP" },
+	{ 0x1f, "PHYSICAL" },
+	{}
+};
+
+static const struct gk104_fifo_func
+gp100_fifo = {
+	.fault.engine = gp100_fifo_fault_engine,
+	.fault.reason = gk104_fifo_fault_reason,
+	.fault.hubclient = gk104_fifo_fault_hubclient,
+	.fault.gpcclient = gk104_fifo_fault_gpcclient,
+	.chan = {
+		&gp100_fifo_gpfifo_oclass,
+		NULL
+	},
+};
+
+int
+gp100_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo)
+{
+	return gk104_fifo_new_(&gp100_fifo, device, index, 4096, pfifo);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogp100.c
new file mode 100644
index 000000000000..1530a9217aea
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogp100.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "changk104.h"
+
+#include <nvif/class.h>
+
+const struct nvkm_fifo_chan_oclass
+gp100_fifo_gpfifo_oclass = {
+	.base.oclass = PASCAL_CHANNEL_GPFIFO_A,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = gk104_fifo_gpfifo_new,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
index 290ed0db8047..f1c494182248 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
@@ -31,6 +31,7 @@ nvkm-y += nvkm/engine/gr/gk20a.o
 nvkm-y += nvkm/engine/gr/gm107.o
 nvkm-y += nvkm/engine/gr/gm200.o
 nvkm-y += nvkm/engine/gr/gm20b.o
+nvkm-y += nvkm/engine/gr/gp100.o
 
 nvkm-y += nvkm/engine/gr/ctxnv40.o
 nvkm-y += nvkm/engine/gr/ctxnv50.o
@@ -48,3 +49,4 @@ nvkm-y += nvkm/engine/gr/ctxgk20a.o
 nvkm-y += nvkm/engine/gr/ctxgm107.o
 nvkm-y += nvkm/engine/gr/ctxgm200.o
 nvkm-y += nvkm/engine/gr/ctxgm20b.o
+nvkm-y += nvkm/engine/gr/ctxgp100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
index b02d8f50ea6a..bc77eea351a5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -1240,7 +1240,7 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	u32 idle_timeout;
 
-	nvkm_mc_unk260(device->mc, 0);
+	nvkm_mc_unk260(device, 0);
 
 	gf100_gr_mmio(gr, grctx->hub);
 	gf100_gr_mmio(gr, grctx->gpc);
@@ -1264,7 +1264,7 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	gf100_gr_icmd(gr, grctx->icmd);
 	nvkm_wr32(device, 0x404154, idle_timeout);
 	gf100_gr_mthd(gr, grctx->mthd);
-	nvkm_mc_unk260(device->mc, 1);
+	nvkm_mc_unk260(device, 1);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
index ac895edce164..52048b5a5274 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
@@ -101,6 +101,8 @@ void gm200_grctx_generate_405b60(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gm20b_grctx;
 
+extern const struct gf100_grctx_func gp100_grctx;
+
 /* context init value lists */
 
 extern const struct gf100_gr_pack gf100_grctx_pack_icmd[];
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
index f521de11a299..c925ade5880e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -226,7 +226,7 @@ gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	u32 idle_timeout;
 	int i;
 
-	nvkm_mc_unk260(device->mc, 0);
+	nvkm_mc_unk260(device, 0);
 
 	gf100_gr_mmio(gr, grctx->hub);
 	gf100_gr_mmio(gr, grctx->gpc);
@@ -253,7 +253,7 @@ gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	gf100_gr_icmd(gr, grctx->icmd);
 	nvkm_wr32(device, 0x404154, idle_timeout);
 	gf100_gr_mthd(gr, grctx->mthd);
-	nvkm_mc_unk260(device->mc, 1);
+	nvkm_mc_unk260(device, 1);
 }
 
 const struct gf100_grctx_func
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
index 9ba337778ef5..c46b3fdf7203 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
@@ -950,7 +950,7 @@ gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	u32 idle_timeout;
 	int i;
 
-	nvkm_mc_unk260(device->mc, 0);
+	nvkm_mc_unk260(device, 0);
 
 	gf100_gr_mmio(gr, grctx->hub);
 	gf100_gr_mmio(gr, grctx->gpc);
@@ -979,7 +979,7 @@ gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	gf100_gr_icmd(gr, grctx->icmd);
 	nvkm_wr32(device, 0x404154, idle_timeout);
 	gf100_gr_mthd(gr, grctx->mthd);
-	nvkm_mc_unk260(device->mc, 1);
+	nvkm_mc_unk260(device, 1);
 
 	nvkm_mask(device, 0x418800, 0x00200000, 0x00200000);
 	nvkm_mask(device, 0x41be10, 0x00800000, 0x00800000);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
new file mode 100644
index 000000000000..3d1ae7ddf7dd
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "ctxgf100.h"
+
+#include <subdev/fb.h>
+
+/*******************************************************************************
+ * PGRAPH context implementation
+ ******************************************************************************/
+
+static void
+gp100_grctx_generate_pagepool(struct gf100_grctx *info)
+{
+	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
+	const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS;
+	const int s = 8;
+	const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), access);
+	mmio_refn(info, 0x40800c, 0x00000000, s, b);
+	mmio_wr32(info, 0x408010, 0x80000000);
+	mmio_refn(info, 0x419004, 0x00000000, s, b);
+	mmio_wr32(info, 0x419008, 0x00000000);
+}
+
+static void
+gp100_grctx_generate_attrib(struct gf100_grctx *info)
+{
+	struct gf100_gr *gr = info->gr;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	const u32  alpha = grctx->alpha_nr;
+	const u32 attrib = grctx->attrib_nr;
+	const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
+	const u32   size = roundup(gr->tpc_total * pertpc, 0x80);
+	const u32 access = NV_MEM_ACCESS_RW;
+	const int s = 12;
+	const int b = mmio_vram(info, size, (1 << s), access);
+	const int max_batches = 0xffff;
+	u32 ao = 0;
+	u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total;
+	int gpc, ppc, n = 0;
+
+	mmio_refn(info, 0x418810, 0x80000000, s, b);
+	mmio_refn(info, 0x419848, 0x10000000, s, b);
+	mmio_refn(info, 0x419c2c, 0x10000000, s, b);
+	mmio_refn(info, 0x419b00, 0x00000000, s, b);
+	mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7);
+	mmio_wr32(info, 0x405830, attrib);
+	mmio_wr32(info, 0x40585c, alpha);
+	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
+			const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc];
+			const u32 u = 0x418ea0 + (n * 0x04);
+			const u32 o = PPC_UNIT(gpc, ppc, 0);
+			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
+				continue;
+			mmio_wr32(info, o + 0xc0, bs);
+			mmio_wr32(info, o + 0xf4, bo);
+			mmio_wr32(info, o + 0xf0, bs);
+			bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
+			mmio_wr32(info, o + 0xe4, as);
+			mmio_wr32(info, o + 0xf8, ao);
+			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
+			mmio_wr32(info, u, bs);
+		}
+	}
+
+	mmio_wr32(info, 0x418eec, 0x00000000);
+	mmio_wr32(info, 0x41befc, 0x00000000);
+}
+
+static void
+gp100_grctx_generate_405b60(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
+	u32 dist[TPC_MAX / 4] = {};
+	u32 gpcs[GPC_MAX * 2] = {};
+	u8  tpcnr[GPC_MAX];
+	int tpc, gpc, i;
+
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
+
+	/* won't result in the same distribution as the binary driver where
+	 * some of the gpcs have more tpcs than others, but this shall do
+	 * for the moment.  the code for earlier gpus has this issue too.
+	 */
+	for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
+		do {
+			gpc = (gpc + 1) % gr->gpc_nr;
+		} while(!tpcnr[gpc]);
+		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
+
+		dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
+		gpcs[gpc + (gr->gpc_nr * (tpc / 4))] |= i << (tpc * 8);
+	}
+
+	for (i = 0; i < dist_nr; i++)
+		nvkm_wr32(device, 0x405b60 + (i * 4), dist[i]);
+	for (i = 0; i < gr->gpc_nr * 2; i++)
+		nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]);
+}
+
+static void
+gp100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	u32 idle_timeout, tmp;
+	int i;
+
+	gf100_gr_mmio(gr, gr->fuc_sw_ctx);
+
+	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
+
+	grctx->pagepool(info);
+	grctx->bundle(info);
+	grctx->attrib(info);
+	grctx->unkn(gr);
+
+	gm200_grctx_generate_tpcid(gr);
+	gf100_grctx_generate_r406028(gr);
+	gk104_grctx_generate_r418bb8(gr);
+
+	for (i = 0; i < 8; i++)
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
+	nvkm_wr32(device, 0x406500, 0x00000000);
+
+	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
+
+	for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
+		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 5);
+	nvkm_wr32(device, 0x4041c4, tmp);
+
+	gp100_grctx_generate_405b60(gr);
+
+	gf100_gr_icmd(gr, gr->fuc_bundle);
+	nvkm_wr32(device, 0x404154, idle_timeout);
+	gf100_gr_mthd(gr, gr->fuc_method);
+}
+
+const struct gf100_grctx_func
+gp100_grctx = {
+	.main  = gp100_grctx_generate_main,
+	.unkn  = gk104_grctx_generate_unkn,
+	.bundle = gm107_grctx_generate_bundle,
+	.bundle_size = 0x3000,
+	.bundle_min_gpm_fifo_depth = 0x180,
+	.bundle_token_limit = 0x1080,
+	.pagepool = gp100_grctx_generate_pagepool,
+	.pagepool_size = 0x20000,
+	.attrib = gp100_grctx_generate_attrib,
+	.attrib_nr_max = 0x660,
+	.attrib_nr = 0x440,
+	.alpha_nr_max = 0xc00,
+	.alpha_nr = 0x800,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index 9513badb8220..157919c788e6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -949,22 +949,41 @@ gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
 }
 
 static const struct nvkm_enum gf100_mp_warp_error[] = {
-	{ 0x00, "NO_ERROR" },
-	{ 0x01, "STACK_MISMATCH" },
+	{ 0x01, "STACK_ERROR" },
+	{ 0x02, "API_STACK_ERROR" },
+	{ 0x03, "RET_EMPTY_STACK_ERROR" },
+	{ 0x04, "PC_WRAP" },
 	{ 0x05, "MISALIGNED_PC" },
-	{ 0x08, "MISALIGNED_GPR" },
-	{ 0x09, "INVALID_OPCODE" },
-	{ 0x0d, "GPR_OUT_OF_BOUNDS" },
-	{ 0x0e, "MEM_OUT_OF_BOUNDS" },
-	{ 0x0f, "UNALIGNED_MEM_ACCESS" },
+	{ 0x06, "PC_OVERFLOW" },
+	{ 0x07, "MISALIGNED_IMMC_ADDR" },
+	{ 0x08, "MISALIGNED_REG" },
+	{ 0x09, "ILLEGAL_INSTR_ENCODING" },
+	{ 0x0a, "ILLEGAL_SPH_INSTR_COMBO" },
+	{ 0x0b, "ILLEGAL_INSTR_PARAM" },
+	{ 0x0c, "INVALID_CONST_ADDR" },
+	{ 0x0d, "OOR_REG" },
+	{ 0x0e, "OOR_ADDR" },
+	{ 0x0f, "MISALIGNED_ADDR" },
 	{ 0x10, "INVALID_ADDR_SPACE" },
-	{ 0x11, "INVALID_PARAM" },
+	{ 0x11, "ILLEGAL_INSTR_PARAM2" },
+	{ 0x12, "INVALID_CONST_ADDR_LDC" },
+	{ 0x13, "GEOMETRY_SM_ERROR" },
+	{ 0x14, "DIVERGENT" },
+	{ 0x15, "WARP_EXIT" },
 	{}
 };
 
 static const struct nvkm_bitfield gf100_mp_global_error[] = {
+	{ 0x00000001, "SM_TO_SM_FAULT" },
+	{ 0x00000002, "L1_ERROR" },
 	{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
-	{ 0x00000008, "OUT_OF_STACK_SPACE" },
+	{ 0x00000008, "PHYSICAL_STACK_OVERFLOW" },
+	{ 0x00000010, "BPT_INT" },
+	{ 0x00000020, "BPT_PAUSE" },
+	{ 0x00000040, "SINGLE_STEP_COMPLETE" },
+	{ 0x20000000, "ECC_SEC_ERROR" },
+	{ 0x40000000, "ECC_DED_ERROR" },
+	{ 0x80000000, "TIMEOUT" },
 	{}
 };
 
@@ -1438,24 +1457,30 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_secboot *sb = device->secboot;
 	int i;
+	int ret = 0;
 
 	if (gr->firmware) {
 		/* load fuc microcode */
-		nvkm_mc_unk260(device->mc, 0);
+		nvkm_mc_unk260(device, 0);
 
 		/* securely-managed falcons must be reset using secure boot */
 		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
-			nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
+			ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
 		else
 			gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c,
 					 &gr->fuc409d);
+		if (ret)
+			return ret;
+
 		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
-			nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
+			ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
 		else
 			gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac,
 					 &gr->fuc41ad);
+		if (ret)
+			return ret;
 
-		nvkm_mc_unk260(device->mc, 1);
+		nvkm_mc_unk260(device, 1);
 
 		/* start both of them running */
 		nvkm_wr32(device, 0x409840, 0xffffffff);
@@ -1557,7 +1582,7 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	}
 
 	/* load HUB microcode */
-	nvkm_mc_unk260(device->mc, 0);
+	nvkm_mc_unk260(device, 0);
 	nvkm_wr32(device, 0x4091c0, 0x01000000);
 	for (i = 0; i < gr->func->fecs.ucode->data.size / 4; i++)
 		nvkm_wr32(device, 0x4091c4, gr->func->fecs.ucode->data.data[i]);
@@ -1580,7 +1605,7 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 			nvkm_wr32(device, 0x41a188, i >> 6);
 		nvkm_wr32(device, 0x41a184, gr->func->gpccs.ucode->code.data[i]);
 	}
-	nvkm_mc_unk260(device->mc, 1);
+	nvkm_mc_unk260(device, 1);
 
 	/* load register lists */
 	gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index 2b98abdb9270..268b8d60ff73 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -292,4 +292,6 @@ extern const struct gf100_gr_init gm107_gr_init_l1c_0[];
 extern const struct gf100_gr_init gm107_gr_init_wwdx_0[];
 extern const struct gf100_gr_init gm107_gr_init_cbm_0[];
 void gm107_gr_init_bios(struct gf100_gr *);
+
+void gm200_gr_init_gpc_mmu(struct gf100_gr *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
index 4ca8ed15191c..de8b806b88fd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
@@ -361,6 +361,5 @@ gk20a_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
 	if (ret)
 		return ret;
 
-
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
index 4dfa4513bb6c..6435f1257572 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
@@ -38,7 +38,7 @@ gm200_gr_rops(struct gf100_gr *gr)
 	return nvkm_rd32(gr->base.engine.subdev.device, 0x12006c);
 }
 
-static void
+void
 gm200_gr_init_gpc_mmu(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
new file mode 100644
index 000000000000..26ad79def0ff
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "gf100.h"
+#include "ctxgf100.h"
+
+#include <nvif/class.h>
+
+/*******************************************************************************
+ * PGRAPH engine/subdev functions
+ ******************************************************************************/
+
+static void
+gp100_gr_init_rop_active_fbps(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	/*XXX: otherwise identical to gm200 aside from mask.. do everywhere? */
+	const u32 fbp_count = nvkm_rd32(device, 0x12006c) & 0x0000000f;
+	nvkm_mask(device, 0x408850, 0x0000000f, fbp_count); /* zrop */
+	nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
+}
+
+static int
+gp100_gr_init(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
+	u32 data[TPC_MAX / 8] = {};
+	u8  tpcnr[GPC_MAX];
+	int gpc, tpc, rop;
+	int i;
+
+	gr->func->init_gpc_mmu(gr);
+
+	gf100_gr_mmio(gr, gr->fuc_sw_nonctx);
+
+	nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
+
+	memset(data, 0x00, sizeof(data));
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
+	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
+		do {
+			gpc = (gpc + 1) % gr->gpc_nr;
+		} while (!tpcnr[gpc]);
+		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
+
+		data[i / 8] |= tpc << ((i % 8) * 4);
+	}
+
+	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
+	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
+	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
+	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
+			  gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
+							 gr->tpc_total);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
+	}
+
+	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
+	nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804));
+
+	gr->func->init_rop_active_fbps(gr);
+
+	nvkm_wr32(device, 0x400500, 0x00010001);
+	nvkm_wr32(device, 0x400100, 0xffffffff);
+	nvkm_wr32(device, 0x40013c, 0xffffffff);
+	nvkm_wr32(device, 0x400124, 0x00000002);
+	nvkm_wr32(device, 0x409c24, 0x000f0002);
+	nvkm_wr32(device, 0x405848, 0xc0000000);
+	nvkm_mask(device, 0x40584c, 0x00000000, 0x00000001);
+	nvkm_wr32(device, 0x404000, 0xc0000000);
+	nvkm_wr32(device, 0x404600, 0xc0000000);
+	nvkm_wr32(device, 0x408030, 0xc0000000);
+	nvkm_wr32(device, 0x404490, 0xc0000000);
+	nvkm_wr32(device, 0x406018, 0xc0000000);
+	nvkm_wr32(device, 0x407020, 0x40000000);
+	nvkm_wr32(device, 0x405840, 0xc0000000);
+	nvkm_wr32(device, 0x405844, 0x00ffffff);
+	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
+
+	nvkm_mask(device, 0x419c9c, 0x00010000, 0x00010000);
+	nvkm_mask(device, 0x419c9c, 0x00020000, 0x00020000);
+
+	gr->func->init_ppc_exceptions(gr);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
+		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000105);
+		}
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
+	}
+
+	for (rop = 0; rop < gr->rop_nr; rop++) {
+		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
+	}
+
+	nvkm_wr32(device, 0x400108, 0xffffffff);
+	nvkm_wr32(device, 0x400138, 0xffffffff);
+	nvkm_wr32(device, 0x400118, 0xffffffff);
+	nvkm_wr32(device, 0x400130, 0xffffffff);
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
+
+	gf100_gr_zbc_init(gr);
+
+	return gf100_gr_init_ctxctl(gr);
+}
+
+static const struct gf100_gr_func
+gp100_gr = {
+	.init = gp100_gr_init,
+	.init_gpc_mmu = gm200_gr_init_gpc_mmu,
+	.init_rop_active_fbps = gp100_gr_init_rop_active_fbps,
+	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+	.rops = gm200_gr_rops,
+	.ppc_nr = 2,
+	.grctx = &gp100_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+		{ -1, -1, PASCAL_A, &gf100_fermi },
+		{ -1, -1, PASCAL_COMPUTE_A },
+		{}
+	}
+};
+
+int
+gp100_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gm200_gr_new_(&gp100_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
index e15b9627b07e..f3c30b2a788e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
@@ -26,6 +26,49 @@
 #include <subdev/bios.h>
 #include <subdev/bios/bmp.h>
 #include <subdev/bios/bit.h>
+#include <subdev/bios/image.h>
+
+static bool
+nvbios_addr(struct nvkm_bios *bios, u32 *addr, u8 size)
+{
+	u32 p = *addr;
+
+	if (*addr > bios->image0_size && bios->imaged_addr) {
+		*addr -= bios->image0_size;
+		*addr += bios->imaged_addr;
+	}
+
+	if (unlikely(*addr + size >= bios->size)) {
+		nvkm_error(&bios->subdev, "OOB %d %08x %08x\n", size, p, *addr);
+		return false;
+	}
+
+	return true;
+}
+
+u8
+nvbios_rd08(struct nvkm_bios *bios, u32 addr)
+{
+	if (likely(nvbios_addr(bios, &addr, 1)))
+		return bios->data[addr];
+	return 0x00;
+}
+
+u16
+nvbios_rd16(struct nvkm_bios *bios, u32 addr)
+{
+	if (likely(nvbios_addr(bios, &addr, 2)))
+		return get_unaligned_le16(&bios->data[addr]);
+	return 0x0000;
+}
+
+u32
+nvbios_rd32(struct nvkm_bios *bios, u32 addr)
+{
+	if (likely(nvbios_addr(bios, &addr, 4)))
+		return get_unaligned_le32(&bios->data[addr]);
+	return 0x00000000;
+}
 
 u8
 nvbios_checksum(const u8 *data, int size)
@@ -100,8 +143,9 @@ int
 nvkm_bios_new(struct nvkm_device *device, int index, struct nvkm_bios **pbios)
 {
 	struct nvkm_bios *bios;
+	struct nvbios_image image;
 	struct bit_entry bit_i;
-	int ret;
+	int ret, idx = 0;
 
 	if (!(bios = *pbios = kzalloc(sizeof(*bios), GFP_KERNEL)))
 		return -ENOMEM;
@@ -111,6 +155,19 @@ nvkm_bios_new(struct nvkm_device *device, int index, struct nvkm_bios **pbios)
 	if (ret)
 		return ret;
 
+	/* Some tables have weird pointers that need adjustment before
+	 * they're dereferenced.  I'm not entirely sure why...
+	 */
+	if (nvbios_image(bios, idx++, &image)) {
+		bios->image0_size = image.size;
+		while (nvbios_image(bios, idx++, &image)) {
+			if (image.type == 0xe0) {
+				bios->imaged_addr = image.base;
+				break;
+			}
+		}
+	}
+
 	/* detect type of vbios we're dealing with */
 	bios->bmp_offset = nvbios_findstr(bios->data, bios->size,
 					  "\xff\x7f""NV\0", 5);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c
index a5e92135cd77..9efb1b48cd54 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c
@@ -141,7 +141,8 @@ nvbios_ocfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
 {
 	u16 data = nvbios_ocfg_entry(bios, outp, idx, ver, hdr, cnt, len);
 	if (data) {
-		info->match     = nvbios_rd16(bios, data + 0x00);
+		info->proto     = nvbios_rd08(bios, data + 0x00);
+		info->flags     = nvbios_rd16(bios, data + 0x01);
 		info->clkcmp[0] = nvbios_rd16(bios, data + 0x02);
 		info->clkcmp[1] = nvbios_rd16(bios, data + 0x04);
 	}
@@ -149,12 +150,13 @@ nvbios_ocfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
 }
 
 u16
-nvbios_ocfg_match(struct nvkm_bios *bios, u16 outp, u16 type,
+nvbios_ocfg_match(struct nvkm_bios *bios, u16 outp, u8 proto, u8 flags,
 		  u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *info)
 {
 	u16 data, idx = 0;
 	while ((data = nvbios_ocfg_parse(bios, outp, idx++, ver, hdr, cnt, len, info))) {
-		if (info->match == type)
+		if ((info->proto == proto || info->proto == 0xff) &&
+		    (info->flags == flags))
 			break;
 	}
 	return data;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
index 05332476354a..d89e78c4e689 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
@@ -40,6 +40,7 @@ nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 				case 0x30:
 				case 0x40:
 				case 0x41:
+				case 0x42:
 					*hdr = nvbios_rd08(bios, data + 0x01);
 					*len = nvbios_rd08(bios, data + 0x02);
 					*cnt = nvbios_rd08(bios, data + 0x03);
@@ -70,6 +71,7 @@ nvbios_dpout_entry(struct nvkm_bios *bios, u8 idx,
 			break;
 		case 0x40:
 		case 0x41:
+		case 0x42:
 			*hdr = nvbios_rd08(bios, data + 0x04);
 			*cnt = 0;
 			*len = 0;
@@ -109,6 +111,7 @@ nvbios_dpout_parse(struct nvkm_bios *bios, u8 idx,
 			break;
 		case 0x40:
 		case 0x41:
+		case 0x42:
 			info->flags     = nvbios_rd08(bios, data + 0x04);
 			info->script[0] = nvbios_rd16(bios, data + 0x05);
 			info->script[1] = nvbios_rd16(bios, data + 0x07);
@@ -180,6 +183,11 @@ nvbios_dpcfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
 			info->pe    = nvbios_rd08(bios, data + 0x02);
 			info->tx_pu = nvbios_rd08(bios, data + 0x03);
 			break;
+		case 0x42:
+			info->dc    = nvbios_rd08(bios, data + 0x00);
+			info->pe    = nvbios_rd08(bios, data + 0x01);
+			info->tx_pu = nvbios_rd08(bios, data + 0x02);
+			break;
 		default:
 			data = 0x0000;
 			break;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/image.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/image.c
index 74b14cf09308..1dbff7aeafec 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/image.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/image.c
@@ -68,11 +68,16 @@ nvbios_imagen(struct nvkm_bios *bios, struct nvbios_image *image)
 bool
 nvbios_image(struct nvkm_bios *bios, int idx, struct nvbios_image *image)
 {
+	u32 imaged_addr = bios->imaged_addr;
 	memset(image, 0x00, sizeof(*image));
+	bios->imaged_addr = 0;
 	do {
 		image->base += image->size;
-		if (image->last || !nvbios_imagen(bios, image))
+		if (image->last || !nvbios_imagen(bios, image)) {
+			bios->imaged_addr = imaged_addr;
 			return false;
+		}
 	} while(idx--);
+	bios->imaged_addr = imaged_addr;
 	return true;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c
index 91a7dc56e406..2ca23a9157ab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c
@@ -77,15 +77,17 @@ g84_pll_mapping[] = {
 	{}
 };
 
-static u16
+static u32
 pll_limits_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
 	struct bit_entry bit_C;
-	u16 data = 0x0000;
+	u32 data = 0x0000;
 
 	if (!bit_entry(bios, 'C', &bit_C)) {
 		if (bit_C.version == 1 && bit_C.length >= 10)
 			data = nvbios_rd16(bios, bit_C.offset + 8);
+		if (bit_C.version == 2 && bit_C.length >= 4)
+			data = nvbios_rd32(bios, bit_C.offset + 0);
 		if (data) {
 			*ver = nvbios_rd08(bios, data + 0);
 			*hdr = nvbios_rd08(bios, data + 1);
@@ -137,12 +139,12 @@ pll_map(struct nvkm_bios *bios)
 	}
 }
 
-static u16
+static u32
 pll_map_reg(struct nvkm_bios *bios, u32 reg, u32 *type, u8 *ver, u8 *len)
 {
 	struct pll_mapping *map;
 	u8  hdr, cnt;
-	u16 data;
+	u32 data;
 
 	data = pll_limits_table(bios, ver, &hdr, &cnt, len);
 	if (data && *ver >= 0x30) {
@@ -160,7 +162,7 @@ pll_map_reg(struct nvkm_bios *bios, u32 reg, u32 *type, u8 *ver, u8 *len)
 	map = pll_map(bios);
 	while (map && map->reg) {
 		if (map->reg == reg && *ver >= 0x20) {
-			u16 addr = (data += hdr);
+			u32 addr = (data += hdr);
 			*type = map->type;
 			while (cnt--) {
 				if (nvbios_rd32(bios, data) == map->reg)
@@ -179,12 +181,12 @@ pll_map_reg(struct nvkm_bios *bios, u32 reg, u32 *type, u8 *ver, u8 *len)
 	return 0x0000;
 }
 
-static u16
+static u32
 pll_map_type(struct nvkm_bios *bios, u8 type, u32 *reg, u8 *ver, u8 *len)
 {
 	struct pll_mapping *map;
 	u8  hdr, cnt;
-	u16 data;
+	u32 data;
 
 	data = pll_limits_table(bios, ver, &hdr, &cnt, len);
 	if (data && *ver >= 0x30) {
@@ -202,7 +204,7 @@ pll_map_type(struct nvkm_bios *bios, u8 type, u32 *reg, u8 *ver, u8 *len)
 	map = pll_map(bios);
 	while (map && map->reg) {
 		if (map->type == type && *ver >= 0x20) {
-			u16 addr = (data += hdr);
+			u32 addr = (data += hdr);
 			*reg = map->reg;
 			while (cnt--) {
 				if (nvbios_rd32(bios, data) == map->reg)
@@ -228,7 +230,7 @@ nvbios_pll_parse(struct nvkm_bios *bios, u32 type, struct nvbios_pll *info)
 	struct nvkm_device *device = subdev->device;
 	u8  ver, len;
 	u32 reg = type;
-	u16 data;
+	u32 data;
 
 	if (type > PLL_MAX) {
 		reg  = type;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c
index c268e5afe852..b4a308f3cf7b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c
@@ -26,21 +26,6 @@
 #include <subdev/bios/image.h>
 #include <subdev/bios/pmu.h>
 
-static u32
-weirdo_pointer(struct nvkm_bios *bios, u32 data)
-{
-	struct nvbios_image image;
-	int idx = 0;
-	if (nvbios_image(bios, idx++, &image)) {
-		data -= image.size;
-		while (nvbios_image(bios, idx++, &image)) {
-			if (image.type == 0xe0)
-				return image.base + data;
-		}
-	}
-	return 0;
-}
-
 u32
 nvbios_pmuTe(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
@@ -50,7 +35,7 @@ nvbios_pmuTe(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 	if (!bit_entry(bios, 'p', &bit_p)) {
 		if (bit_p.version == 2 && bit_p.length >= 4)
 			data = nvbios_rd32(bios, bit_p.offset + 0x00);
-		if ((data = weirdo_pointer(bios, data))) {
+		if (data) {
 			*ver = nvbios_rd08(bios, data + 0x00); /* maybe? */
 			*hdr = nvbios_rd08(bios, data + 0x01);
 			*len = nvbios_rd08(bios, data + 0x02);
@@ -97,8 +82,7 @@ nvbios_pmuRm(struct nvkm_bios *bios, u8 type, struct nvbios_pmuR *info)
 	u32 data;
 	memset(info, 0x00, sizeof(*info));
 	while ((data = nvbios_pmuEp(bios, idx++, &ver, &hdr, &pmuE))) {
-		if ( pmuE.type == type &&
-		    (data = weirdo_pointer(bios, pmuE.data))) {
+		if (pmuE.type == type && (data = pmuE.data)) {
 			info->init_addr_pmu = nvbios_rd32(bios, data + 0x08);
 			info->args_addr_pmu = nvbios_rd32(bios, data + 0x0c);
 			info->boot_addr     = data + 0x30;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
index d0ae7454764e..b57c370c725d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
@@ -30,11 +30,11 @@ nvbios_rammapTe(struct nvkm_bios *bios, u8 *ver, u8 *hdr,
 		u8 *cnt, u8 *len, u8 *snr, u8 *ssz)
 {
 	struct bit_entry bit_P;
-	u16 rammap = 0x0000;
+	u32 rammap = 0x0000;
 
 	if (!bit_entry(bios, 'P', &bit_P)) {
 		if (bit_P.version == 2)
-			rammap = nvbios_rd16(bios, bit_P.offset + 4);
+			rammap = nvbios_rd32(bios, bit_P.offset + 4);
 
 		if (rammap) {
 			*ver = nvbios_rd08(bios, rammap + 0);
@@ -61,7 +61,7 @@ nvbios_rammapEe(struct nvkm_bios *bios, int idx,
 		u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
 	u8  snr, ssz;
-	u16 rammap = nvbios_rammapTe(bios, ver, hdr, cnt, len, &snr, &ssz);
+	u32 rammap = nvbios_rammapTe(bios, ver, hdr, cnt, len, &snr, &ssz);
 	if (rammap && idx < *cnt) {
 		rammap = rammap + *hdr + (idx * (*len + (snr * ssz)));
 		*hdr = *len;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gf100.c
index 78c449b417b7..89d5543118cf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gf100.c
@@ -99,7 +99,7 @@ read_div(struct gf100_clk *clk, int doff, u32 dsrc, u32 dctl)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
 	u32 ssrc = nvkm_rd32(device, dsrc + (doff * 4));
-	u32 sctl = nvkm_rd32(device, dctl + (doff * 4));
+	u32 sclk, sctl, sdiv = 2;
 
 	switch (ssrc & 0x00000003) {
 	case 0:
@@ -109,13 +109,21 @@ read_div(struct gf100_clk *clk, int doff, u32 dsrc, u32 dctl)
 	case 2:
 		return 100000;
 	case 3:
-		if (sctl & 0x80000000) {
-			u32 sclk = read_vco(clk, dsrc + (doff * 4));
-			u32 sdiv = (sctl & 0x0000003f) + 2;
-			return (sclk * 2) / sdiv;
+		sclk = read_vco(clk, dsrc + (doff * 4));
+
+		/* Memclk has doff of 0 despite its alt. location */
+		if (doff <= 2) {
+			sctl = nvkm_rd32(device, dctl + (doff * 4));
+
+			if (sctl & 0x80000000) {
+				if (ssrc & 0x100)
+					sctl >>= 8;
+
+				sdiv = (sctl & 0x3f) + 2;
+			}
 		}
 
-		return read_vco(clk, dsrc + (doff * 4));
+		return (sclk * 2) / sdiv;
 	default:
 		return 0;
 	}
@@ -366,11 +374,17 @@ gf100_clk_prog_2(struct gf100_clk *clk, int idx)
 		if (info->coef) {
 			nvkm_wr32(device, addr + 0x04, info->coef);
 			nvkm_mask(device, addr + 0x00, 0x00000001, 0x00000001);
+
+			/* Test PLL lock */
+			nvkm_mask(device, addr + 0x00, 0x00000010, 0x00000000);
 			nvkm_msec(device, 2000,
 				if (nvkm_rd32(device, addr + 0x00) & 0x00020000)
 					break;
 			);
-			nvkm_mask(device, addr + 0x00, 0x00020004, 0x00000004);
+			nvkm_mask(device, addr + 0x00, 0x00000010, 0x00000010);
+
+			/* Enable sync mode */
+			nvkm_mask(device, addr + 0x00, 0x00000004, 0x00000004);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk104.c
index 975c401bccab..06bc0d2d6ae1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk104.c
@@ -393,11 +393,17 @@ gk104_clk_prog_2(struct gk104_clk *clk, int idx)
 	if (info->coef) {
 		nvkm_wr32(device, addr + 0x04, info->coef);
 		nvkm_mask(device, addr + 0x00, 0x00000001, 0x00000001);
+
+		/* Test PLL lock */
+		nvkm_mask(device, addr + 0x00, 0x00000010, 0x00000000);
 		nvkm_msec(device, 2000,
 			if (nvkm_rd32(device, addr + 0x00) & 0x00020000)
 				break;
 		);
-		nvkm_mask(device, addr + 0x00, 0x00020004, 0x00000004);
+		nvkm_mask(device, addr + 0x00, 0x00000010, 0x00000010);
+
+		/* Enable sync mode */
+		nvkm_mask(device, addr + 0x00, 0x00000004, 0x00000004);
 	}
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c
index 5f0ee24e31b8..218893e3e5f9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c
@@ -28,69 +28,6 @@
 #include <core/tegra.h>
 #include <subdev/timer.h>
 
-#define KHZ (1000)
-#define MHZ (KHZ * 1000)
-
-#define MASK(w)	((1 << w) - 1)
-
-#define GPCPLL_CFG		(SYS_GPCPLL_CFG_BASE + 0)
-#define GPCPLL_CFG_ENABLE	BIT(0)
-#define GPCPLL_CFG_IDDQ		BIT(1)
-#define GPCPLL_CFG_LOCK_DET_OFF	BIT(4)
-#define GPCPLL_CFG_LOCK		BIT(17)
-
-#define GPCPLL_COEFF		(SYS_GPCPLL_CFG_BASE + 4)
-#define GPCPLL_COEFF_M_SHIFT	0
-#define GPCPLL_COEFF_M_WIDTH	8
-#define GPCPLL_COEFF_N_SHIFT	8
-#define GPCPLL_COEFF_N_WIDTH	8
-#define GPCPLL_COEFF_P_SHIFT	16
-#define GPCPLL_COEFF_P_WIDTH	6
-
-#define GPCPLL_CFG2			(SYS_GPCPLL_CFG_BASE + 0xc)
-#define GPCPLL_CFG2_SETUP2_SHIFT	16
-#define GPCPLL_CFG2_PLL_STEPA_SHIFT	24
-
-#define GPCPLL_CFG3			(SYS_GPCPLL_CFG_BASE + 0x18)
-#define GPCPLL_CFG3_PLL_STEPB_SHIFT	16
-
-#define GPC_BCASE_GPCPLL_CFG_BASE		0x00132800
-#define GPCPLL_NDIV_SLOWDOWN			(SYS_GPCPLL_CFG_BASE + 0x1c)
-#define GPCPLL_NDIV_SLOWDOWN_NDIV_LO_SHIFT	0
-#define GPCPLL_NDIV_SLOWDOWN_NDIV_MID_SHIFT	8
-#define GPCPLL_NDIV_SLOWDOWN_STEP_SIZE_LO2MID_SHIFT	16
-#define GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT	22
-#define GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT	31
-
-#define SEL_VCO				(SYS_GPCPLL_CFG_BASE + 0x100)
-#define SEL_VCO_GPC2CLK_OUT_SHIFT	0
-
-#define GPC2CLK_OUT			(SYS_GPCPLL_CFG_BASE + 0x250)
-#define GPC2CLK_OUT_SDIV14_INDIV4_WIDTH	1
-#define GPC2CLK_OUT_SDIV14_INDIV4_SHIFT	31
-#define GPC2CLK_OUT_SDIV14_INDIV4_MODE	1
-#define GPC2CLK_OUT_VCODIV_WIDTH	6
-#define GPC2CLK_OUT_VCODIV_SHIFT	8
-#define GPC2CLK_OUT_VCODIV1		0
-#define GPC2CLK_OUT_VCODIV_MASK		(MASK(GPC2CLK_OUT_VCODIV_WIDTH) << \
-					GPC2CLK_OUT_VCODIV_SHIFT)
-#define GPC2CLK_OUT_BYPDIV_WIDTH	6
-#define GPC2CLK_OUT_BYPDIV_SHIFT	0
-#define GPC2CLK_OUT_BYPDIV31		0x3c
-#define GPC2CLK_OUT_INIT_MASK	((MASK(GPC2CLK_OUT_SDIV14_INDIV4_WIDTH) << \
-		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT)\
-		| (MASK(GPC2CLK_OUT_VCODIV_WIDTH) << GPC2CLK_OUT_VCODIV_SHIFT)\
-		| (MASK(GPC2CLK_OUT_BYPDIV_WIDTH) << GPC2CLK_OUT_BYPDIV_SHIFT))
-#define GPC2CLK_OUT_INIT_VAL	((GPC2CLK_OUT_SDIV14_INDIV4_MODE << \
-		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT) \
-		| (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT) \
-		| (GPC2CLK_OUT_BYPDIV31 << GPC2CLK_OUT_BYPDIV_SHIFT))
-
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG	(GPC_BCASE_GPCPLL_CFG_BASE + 0xa0)
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT	24
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK \
-	    (0x1 << GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT)
-
 static const u8 _pl_to_div[] = {
 /* PL:   0, 1, 2, 3, 4, 5, 6,  7,  8,  9, 10, 11, 12, 13, 14 */
 /* p: */ 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 12, 16, 20, 24, 32,
@@ -124,7 +61,7 @@ static const struct gk20a_clk_pllg_params gk20a_pllg_params = {
 	.min_pl = 1, .max_pl = 32,
 };
 
-static void
+void
 gk20a_pllg_read_mnp(struct gk20a_clk *clk, struct gk20a_pll *pll)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
@@ -136,20 +73,33 @@ gk20a_pllg_read_mnp(struct gk20a_clk *clk, struct gk20a_pll *pll)
 	pll->pl = (val >> GPCPLL_COEFF_P_SHIFT) & MASK(GPCPLL_COEFF_P_WIDTH);
 }
 
-static u32
-gk20a_pllg_calc_rate(struct gk20a_clk *clk)
+void
+gk20a_pllg_write_mnp(struct gk20a_clk *clk, const struct gk20a_pll *pll)
+{
+	struct nvkm_device *device = clk->base.subdev.device;
+	u32 val;
+
+	val = (pll->m & MASK(GPCPLL_COEFF_M_WIDTH)) << GPCPLL_COEFF_M_SHIFT;
+	val |= (pll->n & MASK(GPCPLL_COEFF_N_WIDTH)) << GPCPLL_COEFF_N_SHIFT;
+	val |= (pll->pl & MASK(GPCPLL_COEFF_P_WIDTH)) << GPCPLL_COEFF_P_SHIFT;
+	nvkm_wr32(device, GPCPLL_COEFF, val);
+}
+
+u32
+gk20a_pllg_calc_rate(struct gk20a_clk *clk, struct gk20a_pll *pll)
 {
 	u32 rate;
 	u32 divider;
 
-	rate = clk->parent_rate * clk->pll.n;
-	divider = clk->pll.m * clk->pl_to_div(clk->pll.pl);
+	rate = clk->parent_rate * pll->n;
+	divider = pll->m * clk->pl_to_div(pll->pl);
 
 	return rate / divider / 2;
 }
 
-static int
-gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
+int
+gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate,
+		    struct gk20a_pll *pll)
 {
 	struct nvkm_subdev *subdev = &clk->base.subdev;
 	u32 target_clk_f, ref_clk_f, target_freq;
@@ -163,16 +113,13 @@ gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
 	target_clk_f = rate * 2 / KHZ;
 	ref_clk_f = clk->parent_rate / KHZ;
 
-	max_vco_f = clk->params->max_vco;
+	target_vco_f = target_clk_f + target_clk_f / 50;
+	max_vco_f = max(clk->params->max_vco, target_vco_f);
 	min_vco_f = clk->params->min_vco;
 	best_m = clk->params->max_m;
 	best_n = clk->params->min_n;
 	best_pl = clk->params->min_pl;
 
-	target_vco_f = target_clk_f + target_clk_f / 50;
-	if (max_vco_f < target_vco_f)
-		max_vco_f = target_vco_f;
-
 	/* min_pl <= high_pl <= max_pl */
 	high_pl = (max_vco_f + target_vco_f - 1) / target_vco_f;
 	high_pl = min(high_pl, clk->params->max_pl);
@@ -195,9 +142,7 @@ gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
 		target_vco_f = target_clk_f * clk->pl_to_div(pl);
 
 		for (m = clk->params->min_m; m <= clk->params->max_m; m++) {
-			u32 u_f, vco_f;
-
-			u_f = ref_clk_f / m;
+			u32 u_f = ref_clk_f / m;
 
 			if (u_f < clk->params->min_u)
 				break;
@@ -211,6 +156,8 @@ gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
 				break;
 
 			for (; n <= n2; n++) {
+				u32 vco_f;
+
 				if (n < clk->params->min_n)
 					continue;
 				if (n > clk->params->max_n)
@@ -247,16 +194,16 @@ found_match:
 			   "no best match for target @ %dMHz on gpc_pll",
 			   target_clk_f / KHZ);
 
-	clk->pll.m = best_m;
-	clk->pll.n = best_n;
-	clk->pll.pl = best_pl;
+	pll->m = best_m;
+	pll->n = best_n;
+	pll->pl = best_pl;
 
-	target_freq = gk20a_pllg_calc_rate(clk);
+	target_freq = gk20a_pllg_calc_rate(clk, pll);
 
 	nvkm_debug(subdev,
-		   "actual target freq %d MHz, M %d, N %d, PL %d(div%d)\n",
-		   target_freq / MHZ, clk->pll.m, clk->pll.n, clk->pll.pl,
-		   clk->pl_to_div(clk->pll.pl));
+		   "actual target freq %d KHz, M %d, N %d, PL %d(div%d)\n",
+		   target_freq / KHZ, pll->m, pll->n, pll->pl,
+		   clk->pl_to_div(pll->pl));
 	return 0;
 }
 
@@ -265,45 +212,36 @@ gk20a_pllg_slide(struct gk20a_clk *clk, u32 n)
 {
 	struct nvkm_subdev *subdev = &clk->base.subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 val;
-	int ramp_timeout;
+	struct gk20a_pll pll;
+	int ret = 0;
 
 	/* get old coefficients */
-	val = nvkm_rd32(device, GPCPLL_COEFF);
+	gk20a_pllg_read_mnp(clk, &pll);
 	/* do nothing if NDIV is the same */
-	if (n == ((val >> GPCPLL_COEFF_N_SHIFT) & MASK(GPCPLL_COEFF_N_WIDTH)))
+	if (n == pll.n)
 		return 0;
 
-	/* setup */
-	nvkm_mask(device, GPCPLL_CFG2, 0xff << GPCPLL_CFG2_PLL_STEPA_SHIFT,
-		0x2b << GPCPLL_CFG2_PLL_STEPA_SHIFT);
-	nvkm_mask(device, GPCPLL_CFG3, 0xff << GPCPLL_CFG3_PLL_STEPB_SHIFT,
-		0xb << GPCPLL_CFG3_PLL_STEPB_SHIFT);
-
 	/* pll slowdown mode */
 	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
 		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT),
 		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT));
 
 	/* new ndiv ready for ramp */
-	val = nvkm_rd32(device, GPCPLL_COEFF);
-	val &= ~(MASK(GPCPLL_COEFF_N_WIDTH) << GPCPLL_COEFF_N_SHIFT);
-	val |= (n & MASK(GPCPLL_COEFF_N_WIDTH)) << GPCPLL_COEFF_N_SHIFT;
+	pll.n = n;
 	udelay(1);
-	nvkm_wr32(device, GPCPLL_COEFF, val);
+	gk20a_pllg_write_mnp(clk, &pll);
 
 	/* dynamic ramp to new ndiv */
-	val = nvkm_rd32(device, GPCPLL_NDIV_SLOWDOWN);
-	val |= 0x1 << GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT;
 	udelay(1);
-	nvkm_wr32(device, GPCPLL_NDIV_SLOWDOWN, val);
+	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
+		  BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT),
+		  BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT));
 
-	for (ramp_timeout = 500; ramp_timeout > 0; ramp_timeout--) {
-		udelay(1);
-		val = nvkm_rd32(device, GPC_BCAST_NDIV_SLOWDOWN_DEBUG);
-		if (val & GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK)
-			break;
-	}
+	/* wait for ramping to complete */
+	if (nvkm_wait_usec(device, 500, GPC_BCAST_NDIV_SLOWDOWN_DEBUG,
+		GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK,
+		GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK) < 0)
+		ret = -ETIMEDOUT;
 
 	/* exit slowdown mode */
 	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
@@ -311,21 +249,35 @@ gk20a_pllg_slide(struct gk20a_clk *clk, u32 n)
 		BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT), 0);
 	nvkm_rd32(device, GPCPLL_NDIV_SLOWDOWN);
 
-	if (ramp_timeout <= 0) {
-		nvkm_error(subdev, "gpcpll dynamic ramp timeout\n");
-		return -ETIMEDOUT;
-	}
-
-	return 0;
+	return ret;
 }
 
-static void
+static int
 gk20a_pllg_enable(struct gk20a_clk *clk)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
+	u32 val;
 
 	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, GPCPLL_CFG_ENABLE);
 	nvkm_rd32(device, GPCPLL_CFG);
+
+	/* enable lock detection */
+	val = nvkm_rd32(device, GPCPLL_CFG);
+	if (val & GPCPLL_CFG_LOCK_DET_OFF) {
+		val &= ~GPCPLL_CFG_LOCK_DET_OFF;
+		nvkm_wr32(device, GPCPLL_CFG, val);
+	}
+
+	/* wait for lock */
+	if (nvkm_wait_usec(device, 300, GPCPLL_CFG, GPCPLL_CFG_LOCK,
+			   GPCPLL_CFG_LOCK) < 0)
+		return -ETIMEDOUT;
+
+	/* switch to VCO mode */
+	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT),
+		BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+
+	return 0;
 }
 
 static void
@@ -333,117 +285,81 @@ gk20a_pllg_disable(struct gk20a_clk *clk)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
 
+	/* put PLL in bypass before disabling it */
+	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT), 0);
+
 	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, 0);
 	nvkm_rd32(device, GPCPLL_CFG);
 }
 
 static int
-_gk20a_pllg_program_mnp(struct gk20a_clk *clk, bool allow_slide)
+gk20a_pllg_program_mnp(struct gk20a_clk *clk, const struct gk20a_pll *pll)
 {
 	struct nvkm_subdev *subdev = &clk->base.subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 val, cfg;
-	struct gk20a_pll old_pll;
-	u32 n_lo;
-
-	/* get old coefficients */
-	gk20a_pllg_read_mnp(clk, &old_pll);
-
-	/* do NDIV slide if there is no change in M and PL */
-	cfg = nvkm_rd32(device, GPCPLL_CFG);
-	if (allow_slide && clk->pll.m == old_pll.m &&
-	    clk->pll.pl == old_pll.pl && (cfg & GPCPLL_CFG_ENABLE)) {
-		return gk20a_pllg_slide(clk, clk->pll.n);
-	}
-
-	/* slide down to NDIV_LO */
-	if (allow_slide && (cfg & GPCPLL_CFG_ENABLE)) {
-		int ret;
-
-		n_lo = DIV_ROUND_UP(old_pll.m * clk->params->min_vco,
-				    clk->parent_rate / KHZ);
-		ret = gk20a_pllg_slide(clk, n_lo);
+	struct gk20a_pll cur_pll;
+	int ret;
 
-		if (ret)
-			return ret;
-	}
+	gk20a_pllg_read_mnp(clk, &cur_pll);
 
-	/* split FO-to-bypass jump in halfs by setting out divider 1:2 */
+	/* split VCO-to-bypass jump in half by setting out divider 1:2 */
 	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
-		0x2 << GPC2CLK_OUT_VCODIV_SHIFT);
-
-	/* put PLL in bypass before programming it */
-	val = nvkm_rd32(device, SEL_VCO);
-	val &= ~(BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+		  GPC2CLK_OUT_VCODIV2 << GPC2CLK_OUT_VCODIV_SHIFT);
+	/* Intentional 2nd write to assure linear divider operation */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV2 << GPC2CLK_OUT_VCODIV_SHIFT);
+	nvkm_rd32(device, GPC2CLK_OUT);
 	udelay(2);
-	nvkm_wr32(device, SEL_VCO, val);
-
-	/* get out from IDDQ */
-	val = nvkm_rd32(device, GPCPLL_CFG);
-	if (val & GPCPLL_CFG_IDDQ) {
-		val &= ~GPCPLL_CFG_IDDQ;
-		nvkm_wr32(device, GPCPLL_CFG, val);
-		nvkm_rd32(device, GPCPLL_CFG);
-		udelay(2);
-	}
 
 	gk20a_pllg_disable(clk);
 
-	nvkm_debug(subdev, "%s: m=%d n=%d pl=%d\n", __func__,
-		   clk->pll.m, clk->pll.n, clk->pll.pl);
-
-	n_lo = DIV_ROUND_UP(clk->pll.m * clk->params->min_vco,
-			    clk->parent_rate / KHZ);
-	val = clk->pll.m << GPCPLL_COEFF_M_SHIFT;
-	val |= (allow_slide ? n_lo : clk->pll.n) << GPCPLL_COEFF_N_SHIFT;
-	val |= clk->pll.pl << GPCPLL_COEFF_P_SHIFT;
-	nvkm_wr32(device, GPCPLL_COEFF, val);
+	gk20a_pllg_write_mnp(clk, pll);
 
-	gk20a_pllg_enable(clk);
-
-	val = nvkm_rd32(device, GPCPLL_CFG);
-	if (val & GPCPLL_CFG_LOCK_DET_OFF) {
-		val &= ~GPCPLL_CFG_LOCK_DET_OFF;
-		nvkm_wr32(device, GPCPLL_CFG, val);
-	}
-
-	if (nvkm_usec(device, 300,
-		if (nvkm_rd32(device, GPCPLL_CFG) & GPCPLL_CFG_LOCK)
-			break;
-	) < 0)
-		return -ETIMEDOUT;
-
-	/* switch to VCO mode */
-	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT),
-		  BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+	ret = gk20a_pllg_enable(clk);
+	if (ret)
+		return ret;
 
 	/* restore out divider 1:1 */
-	val = nvkm_rd32(device, GPC2CLK_OUT);
-	if ((val & GPC2CLK_OUT_VCODIV_MASK) !=
-	    (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT)) {
-		val &= ~GPC2CLK_OUT_VCODIV_MASK;
-		val |= GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT;
-		udelay(2);
-		nvkm_wr32(device, GPC2CLK_OUT, val);
-		/* Intentional 2nd write to assure linear divider operation */
-		nvkm_wr32(device, GPC2CLK_OUT, val);
-		nvkm_rd32(device, GPC2CLK_OUT);
-	}
+	udelay(2);
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT);
+	/* Intentional 2nd write to assure linear divider operation */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT);
+	nvkm_rd32(device, GPC2CLK_OUT);
 
-	/* slide up to new NDIV */
-	return allow_slide ? gk20a_pllg_slide(clk, clk->pll.n) : 0;
+	return 0;
 }
 
 static int
-gk20a_pllg_program_mnp(struct gk20a_clk *clk)
+gk20a_pllg_program_mnp_slide(struct gk20a_clk *clk, const struct gk20a_pll *pll)
 {
-	int err;
+	struct gk20a_pll cur_pll;
+	int ret;
 
-	err = _gk20a_pllg_program_mnp(clk, true);
-	if (err)
-		err = _gk20a_pllg_program_mnp(clk, false);
+	if (gk20a_pllg_is_enabled(clk)) {
+		gk20a_pllg_read_mnp(clk, &cur_pll);
+
+		/* just do NDIV slide if there is no change to M and PL */
+		if (pll->m == cur_pll.m && pll->pl == cur_pll.pl)
+			return gk20a_pllg_slide(clk, pll->n);
+
+		/* slide down to current NDIV_LO */
+		cur_pll.n = gk20a_pllg_n_lo(clk, &cur_pll);
+		ret = gk20a_pllg_slide(clk, cur_pll.n);
+		if (ret)
+			return ret;
+	}
+
+	/* program MNP with the new clock parameters and new NDIV_LO */
+	cur_pll = *pll;
+	cur_pll.n = gk20a_pllg_n_lo(clk, &cur_pll);
+	ret = gk20a_pllg_program_mnp(clk, &cur_pll);
+	if (ret)
+		return ret;
 
-	return err;
+	/* slide up to new NDIV */
+	return gk20a_pllg_slide(clk, pll->n);
 }
 
 static struct nvkm_pstate
@@ -546,13 +462,14 @@ gk20a_clk_read(struct nvkm_clk *base, enum nv_clk_src src)
 	struct gk20a_clk *clk = gk20a_clk(base);
 	struct nvkm_subdev *subdev = &clk->base.subdev;
 	struct nvkm_device *device = subdev->device;
+	struct gk20a_pll pll;
 
 	switch (src) {
 	case nv_clk_src_crystal:
 		return device->crystal;
 	case nv_clk_src_gpc:
-		gk20a_pllg_read_mnp(clk, &clk->pll);
-		return gk20a_pllg_calc_rate(clk) / GK20A_CLK_GPC_MDIV;
+		gk20a_pllg_read_mnp(clk, &pll);
+		return gk20a_pllg_calc_rate(clk, &pll) / GK20A_CLK_GPC_MDIV;
 	default:
 		nvkm_error(subdev, "invalid clock source %d\n", src);
 		return -EINVAL;
@@ -565,15 +482,20 @@ gk20a_clk_calc(struct nvkm_clk *base, struct nvkm_cstate *cstate)
 	struct gk20a_clk *clk = gk20a_clk(base);
 
 	return gk20a_pllg_calc_mnp(clk, cstate->domain[nv_clk_src_gpc] *
-					 GK20A_CLK_GPC_MDIV);
+					 GK20A_CLK_GPC_MDIV, &clk->pll);
 }
 
 int
 gk20a_clk_prog(struct nvkm_clk *base)
 {
 	struct gk20a_clk *clk = gk20a_clk(base);
+	int ret;
+
+	ret = gk20a_pllg_program_mnp_slide(clk, &clk->pll);
+	if (ret)
+		ret = gk20a_pllg_program_mnp(clk, &clk->pll);
 
-	return gk20a_pllg_program_mnp(clk);
+	return ret;
 }
 
 void
@@ -581,29 +503,62 @@ gk20a_clk_tidy(struct nvkm_clk *base)
 {
 }
 
+int
+gk20a_clk_setup_slide(struct gk20a_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 step_a, step_b;
+
+	switch (clk->parent_rate) {
+	case 12000000:
+	case 12800000:
+	case 13000000:
+		step_a = 0x2b;
+		step_b = 0x0b;
+		break;
+	case 19200000:
+		step_a = 0x12;
+		step_b = 0x08;
+		break;
+	case 38400000:
+		step_a = 0x04;
+		step_b = 0x05;
+		break;
+	default:
+		nvkm_error(subdev, "invalid parent clock rate %u KHz",
+			   clk->parent_rate / KHZ);
+		return -EINVAL;
+	}
+
+	nvkm_mask(device, GPCPLL_CFG2, 0xff << GPCPLL_CFG2_PLL_STEPA_SHIFT,
+		step_a << GPCPLL_CFG2_PLL_STEPA_SHIFT);
+	nvkm_mask(device, GPCPLL_CFG3, 0xff << GPCPLL_CFG3_PLL_STEPB_SHIFT,
+		step_b << GPCPLL_CFG3_PLL_STEPB_SHIFT);
+
+	return 0;
+}
+
 void
 gk20a_clk_fini(struct nvkm_clk *base)
 {
 	struct nvkm_device *device = base->subdev.device;
 	struct gk20a_clk *clk = gk20a_clk(base);
-	u32 val;
 
 	/* slide to VCO min */
-	val = nvkm_rd32(device, GPCPLL_CFG);
-	if (val & GPCPLL_CFG_ENABLE) {
+	if (gk20a_pllg_is_enabled(clk)) {
 		struct gk20a_pll pll;
 		u32 n_lo;
 
 		gk20a_pllg_read_mnp(clk, &pll);
-		n_lo = DIV_ROUND_UP(pll.m * clk->params->min_vco,
-				    clk->parent_rate / KHZ);
+		n_lo = gk20a_pllg_n_lo(clk, &pll);
 		gk20a_pllg_slide(clk, n_lo);
 	}
 
-	/* put PLL in bypass before disabling it */
-	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT), 0);
-
 	gk20a_pllg_disable(clk);
+
+	/* set IDDQ */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_IDDQ, 1);
 }
 
 static int
@@ -614,9 +569,18 @@ gk20a_clk_init(struct nvkm_clk *base)
 	struct nvkm_device *device = subdev->device;
 	int ret;
 
+	/* get out from IDDQ */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_IDDQ, 0);
+	nvkm_rd32(device, GPCPLL_CFG);
+	udelay(5);
+
 	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_INIT_MASK,
 		  GPC2CLK_OUT_INIT_VAL);
 
+	ret = gk20a_clk_setup_slide(clk);
+	if (ret)
+		return ret;
+
 	/* Start with lowest frequency */
 	base->func->calc(base, &base->func->pstates[0].base);
 	ret = base->func->prog(&clk->base);
@@ -646,7 +610,7 @@ gk20a_clk = {
 };
 
 int
-_gk20a_clk_ctor(struct nvkm_device *device, int index,
+gk20a_clk_ctor(struct nvkm_device *device, int index,
 		const struct nvkm_clk_func *func,
 		const struct gk20a_clk_pllg_params *params,
 		struct gk20a_clk *clk)
@@ -685,7 +649,7 @@ gk20a_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
 		return -ENOMEM;
 	*pclk = &clk->base;
 
-	ret = _gk20a_clk_ctor(device, index, &gk20a_clk, &gk20a_pllg_params,
+	ret = gk20a_clk_ctor(device, index, &gk20a_clk, &gk20a_pllg_params,
 			      clk);
 
 	clk->pl_to_div = pl_to_div;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h
index 13c46740197d..0d1450972162 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h
@@ -24,9 +24,79 @@
 #ifndef __NVKM_CLK_GK20A_H__
 #define __NVKM_CLK_GK20A_H__
 
+#define KHZ (1000)
+#define MHZ (KHZ * 1000)
+
+#define MASK(w)	((1 << (w)) - 1)
+
 #define GK20A_CLK_GPC_MDIV 1000
 
 #define SYS_GPCPLL_CFG_BASE	0x00137000
+#define GPCPLL_CFG		(SYS_GPCPLL_CFG_BASE + 0)
+#define GPCPLL_CFG_ENABLE	BIT(0)
+#define GPCPLL_CFG_IDDQ		BIT(1)
+#define GPCPLL_CFG_LOCK_DET_OFF	BIT(4)
+#define GPCPLL_CFG_LOCK		BIT(17)
+
+#define GPCPLL_CFG2		(SYS_GPCPLL_CFG_BASE + 0xc)
+#define GPCPLL_CFG2_SETUP2_SHIFT	16
+#define GPCPLL_CFG2_PLL_STEPA_SHIFT	24
+
+#define GPCPLL_CFG3			(SYS_GPCPLL_CFG_BASE + 0x18)
+#define GPCPLL_CFG3_VCO_CTRL_SHIFT		0
+#define GPCPLL_CFG3_VCO_CTRL_WIDTH		9
+#define GPCPLL_CFG3_VCO_CTRL_MASK		\
+	(MASK(GPCPLL_CFG3_VCO_CTRL_WIDTH) << GPCPLL_CFG3_VCO_CTRL_SHIFT)
+#define GPCPLL_CFG3_PLL_STEPB_SHIFT		16
+#define GPCPLL_CFG3_PLL_STEPB_WIDTH		8
+
+#define GPCPLL_COEFF		(SYS_GPCPLL_CFG_BASE + 4)
+#define GPCPLL_COEFF_M_SHIFT	0
+#define GPCPLL_COEFF_M_WIDTH	8
+#define GPCPLL_COEFF_N_SHIFT	8
+#define GPCPLL_COEFF_N_WIDTH	8
+#define GPCPLL_COEFF_N_MASK	\
+	(MASK(GPCPLL_COEFF_N_WIDTH) << GPCPLL_COEFF_N_SHIFT)
+#define GPCPLL_COEFF_P_SHIFT	16
+#define GPCPLL_COEFF_P_WIDTH	6
+
+#define GPCPLL_NDIV_SLOWDOWN			(SYS_GPCPLL_CFG_BASE + 0x1c)
+#define GPCPLL_NDIV_SLOWDOWN_NDIV_LO_SHIFT	0
+#define GPCPLL_NDIV_SLOWDOWN_NDIV_MID_SHIFT	8
+#define GPCPLL_NDIV_SLOWDOWN_STEP_SIZE_LO2MID_SHIFT	16
+#define GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT	22
+#define GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT	31
+
+#define GPC_BCAST_GPCPLL_CFG_BASE		0x00132800
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG	(GPC_BCAST_GPCPLL_CFG_BASE + 0xa0)
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT	24
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK \
+	(0x1 << GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT)
+
+#define SEL_VCO				(SYS_GPCPLL_CFG_BASE + 0x100)
+#define SEL_VCO_GPC2CLK_OUT_SHIFT	0
+
+#define GPC2CLK_OUT			(SYS_GPCPLL_CFG_BASE + 0x250)
+#define GPC2CLK_OUT_SDIV14_INDIV4_WIDTH	1
+#define GPC2CLK_OUT_SDIV14_INDIV4_SHIFT	31
+#define GPC2CLK_OUT_SDIV14_INDIV4_MODE	1
+#define GPC2CLK_OUT_VCODIV_WIDTH	6
+#define GPC2CLK_OUT_VCODIV_SHIFT	8
+#define GPC2CLK_OUT_VCODIV1		0
+#define GPC2CLK_OUT_VCODIV2		2
+#define GPC2CLK_OUT_VCODIV_MASK		(MASK(GPC2CLK_OUT_VCODIV_WIDTH) << \
+					GPC2CLK_OUT_VCODIV_SHIFT)
+#define GPC2CLK_OUT_BYPDIV_WIDTH	6
+#define GPC2CLK_OUT_BYPDIV_SHIFT	0
+#define GPC2CLK_OUT_BYPDIV31		0x3c
+#define GPC2CLK_OUT_INIT_MASK	((MASK(GPC2CLK_OUT_SDIV14_INDIV4_WIDTH) << \
+		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT)\
+		| (MASK(GPC2CLK_OUT_VCODIV_WIDTH) << GPC2CLK_OUT_VCODIV_SHIFT)\
+		| (MASK(GPC2CLK_OUT_BYPDIV_WIDTH) << GPC2CLK_OUT_BYPDIV_SHIFT))
+#define GPC2CLK_OUT_INIT_VAL	((GPC2CLK_OUT_SDIV14_INDIV4_MODE << \
+		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT) \
+		| (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT) \
+		| (GPC2CLK_OUT_BYPDIV31 << GPC2CLK_OUT_BYPDIV_SHIFT))
 
 /* All frequencies in Khz */
 struct gk20a_clk_pllg_params {
@@ -54,7 +124,29 @@ struct gk20a_clk {
 };
 #define gk20a_clk(p) container_of((p), struct gk20a_clk, base)
 
-int _gk20a_clk_ctor(struct nvkm_device *, int, const struct nvkm_clk_func *,
+u32 gk20a_pllg_calc_rate(struct gk20a_clk *, struct gk20a_pll *);
+int gk20a_pllg_calc_mnp(struct gk20a_clk *, unsigned long, struct gk20a_pll *);
+void gk20a_pllg_read_mnp(struct gk20a_clk *, struct gk20a_pll *);
+void gk20a_pllg_write_mnp(struct gk20a_clk *, const struct gk20a_pll *);
+
+static inline bool
+gk20a_pllg_is_enabled(struct gk20a_clk *clk)
+{
+	struct nvkm_device *device = clk->base.subdev.device;
+	u32 val;
+
+	val = nvkm_rd32(device, GPCPLL_CFG);
+	return val & GPCPLL_CFG_ENABLE;
+}
+
+static inline u32
+gk20a_pllg_n_lo(struct gk20a_clk *clk, struct gk20a_pll *pll)
+{
+	return DIV_ROUND_UP(pll->m * clk->params->min_vco,
+			    clk->parent_rate / KHZ);
+}
+
+int gk20a_clk_ctor(struct nvkm_device *, int, const struct nvkm_clk_func *,
 		    const struct gk20a_clk_pllg_params *, struct gk20a_clk *);
 void gk20a_clk_fini(struct nvkm_clk *);
 int gk20a_clk_read(struct nvkm_clk *, enum nv_clk_src);
@@ -62,4 +154,6 @@ int gk20a_clk_calc(struct nvkm_clk *, struct nvkm_cstate *);
 int gk20a_clk_prog(struct nvkm_clk *);
 void gk20a_clk_tidy(struct nvkm_clk *);
 
+int gk20a_clk_setup_slide(struct gk20a_clk *);
+
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c
index 71b2bbb61973..b284e949f732 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c
@@ -21,20 +21,123 @@
  */
 
 #include <subdev/clk.h>
+#include <subdev/volt.h>
+#include <subdev/timer.h>
 #include <core/device.h>
+#include <core/tegra.h>
 
 #include "priv.h"
 #include "gk20a.h"
 
-#define KHZ (1000)
-#define MHZ (KHZ * 1000)
-
-#define MASK(w)	((1 << w) - 1)
+#define GPCPLL_CFG_SYNC_MODE	BIT(2)
 
 #define BYPASSCTRL_SYS	(SYS_GPCPLL_CFG_BASE + 0x340)
 #define BYPASSCTRL_SYS_GPCPLL_SHIFT	0
 #define BYPASSCTRL_SYS_GPCPLL_WIDTH	1
 
+#define GPCPLL_CFG2_SDM_DIN_SHIFT	0
+#define GPCPLL_CFG2_SDM_DIN_WIDTH	8
+#define GPCPLL_CFG2_SDM_DIN_MASK	\
+	(MASK(GPCPLL_CFG2_SDM_DIN_WIDTH) << GPCPLL_CFG2_SDM_DIN_SHIFT)
+#define GPCPLL_CFG2_SDM_DIN_NEW_SHIFT	8
+#define GPCPLL_CFG2_SDM_DIN_NEW_WIDTH	15
+#define GPCPLL_CFG2_SDM_DIN_NEW_MASK	\
+	(MASK(GPCPLL_CFG2_SDM_DIN_NEW_WIDTH) << GPCPLL_CFG2_SDM_DIN_NEW_SHIFT)
+#define GPCPLL_CFG2_SETUP2_SHIFT	16
+#define GPCPLL_CFG2_PLL_STEPA_SHIFT	24
+
+#define GPCPLL_DVFS0	(SYS_GPCPLL_CFG_BASE + 0x10)
+#define GPCPLL_DVFS0_DFS_COEFF_SHIFT	0
+#define GPCPLL_DVFS0_DFS_COEFF_WIDTH	7
+#define GPCPLL_DVFS0_DFS_COEFF_MASK	\
+	(MASK(GPCPLL_DVFS0_DFS_COEFF_WIDTH) << GPCPLL_DVFS0_DFS_COEFF_SHIFT)
+#define GPCPLL_DVFS0_DFS_DET_MAX_SHIFT	8
+#define GPCPLL_DVFS0_DFS_DET_MAX_WIDTH	7
+#define GPCPLL_DVFS0_DFS_DET_MAX_MASK	\
+	(MASK(GPCPLL_DVFS0_DFS_DET_MAX_WIDTH) << GPCPLL_DVFS0_DFS_DET_MAX_SHIFT)
+
+#define GPCPLL_DVFS1		(SYS_GPCPLL_CFG_BASE + 0x14)
+#define GPCPLL_DVFS1_DFS_EXT_DET_SHIFT		0
+#define GPCPLL_DVFS1_DFS_EXT_DET_WIDTH		7
+#define GPCPLL_DVFS1_DFS_EXT_STRB_SHIFT		7
+#define GPCPLL_DVFS1_DFS_EXT_STRB_WIDTH		1
+#define GPCPLL_DVFS1_DFS_EXT_CAL_SHIFT		8
+#define GPCPLL_DVFS1_DFS_EXT_CAL_WIDTH		7
+#define GPCPLL_DVFS1_DFS_EXT_SEL_SHIFT		15
+#define GPCPLL_DVFS1_DFS_EXT_SEL_WIDTH		1
+#define GPCPLL_DVFS1_DFS_CTRL_SHIFT		16
+#define GPCPLL_DVFS1_DFS_CTRL_WIDTH		12
+#define GPCPLL_DVFS1_EN_SDM_SHIFT		28
+#define GPCPLL_DVFS1_EN_SDM_WIDTH		1
+#define GPCPLL_DVFS1_EN_SDM_BIT			BIT(28)
+#define GPCPLL_DVFS1_EN_DFS_SHIFT		29
+#define GPCPLL_DVFS1_EN_DFS_WIDTH		1
+#define GPCPLL_DVFS1_EN_DFS_BIT			BIT(29)
+#define GPCPLL_DVFS1_EN_DFS_CAL_SHIFT		30
+#define GPCPLL_DVFS1_EN_DFS_CAL_WIDTH		1
+#define GPCPLL_DVFS1_EN_DFS_CAL_BIT		BIT(30)
+#define GPCPLL_DVFS1_DFS_CAL_DONE_SHIFT		31
+#define GPCPLL_DVFS1_DFS_CAL_DONE_WIDTH		1
+#define GPCPLL_DVFS1_DFS_CAL_DONE_BIT		BIT(31)
+
+#define GPC_BCAST_GPCPLL_DVFS2	(GPC_BCAST_GPCPLL_CFG_BASE + 0x20)
+#define GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT	BIT(16)
+
+#define GPCPLL_CFG3_PLL_DFS_TESTOUT_SHIFT	24
+#define GPCPLL_CFG3_PLL_DFS_TESTOUT_WIDTH	7
+
+#define DFS_DET_RANGE	6	/* -2^6 ... 2^6-1 */
+#define SDM_DIN_RANGE	12	/* -2^12 ... 2^12-1 */
+
+struct gm20b_clk_dvfs_params {
+	s32 coeff_slope;
+	s32 coeff_offs;
+	u32 vco_ctrl;
+};
+
+static const struct gm20b_clk_dvfs_params gm20b_dvfs_params = {
+	.coeff_slope = -165230,
+	.coeff_offs = 214007,
+	.vco_ctrl = 0x7 << 3,
+};
+
+/*
+ * base.n is now the *integer* part of the N factor.
+ * sdm_din contains n's decimal part.
+ */
+struct gm20b_pll {
+	struct gk20a_pll base;
+	u32 sdm_din;
+};
+
+struct gm20b_clk_dvfs {
+	u32 dfs_coeff;
+	s32 dfs_det_max;
+	s32 dfs_ext_cal;
+};
+
+struct gm20b_clk {
+	/* currently applied parameters */
+	struct gk20a_clk base;
+	struct gm20b_clk_dvfs dvfs;
+	u32 uv;
+
+	/* new parameters to apply */
+	struct gk20a_pll new_pll;
+	struct gm20b_clk_dvfs new_dvfs;
+	u32 new_uv;
+
+	const struct gm20b_clk_dvfs_params *dvfs_params;
+
+	/* fused parameters */
+	s32 uvdet_slope;
+	s32 uvdet_offs;
+
+	/* safe frequency we can use at minimum voltage */
+	u32 safe_fmax_vmin;
+};
+#define gm20b_clk(p) container_of((gk20a_clk(p)), struct gm20b_clk, base)
+
 static u32 pl_to_div(u32 pl)
 {
 	return pl;
@@ -53,6 +156,484 @@ static const struct gk20a_clk_pllg_params gm20b_pllg_params = {
 	.min_pl = 1, .max_pl = 31,
 };
 
+static void
+gm20b_pllg_read_mnp(struct gm20b_clk *clk, struct gm20b_pll *pll)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 val;
+
+	gk20a_pllg_read_mnp(&clk->base, &pll->base);
+	val = nvkm_rd32(device, GPCPLL_CFG2);
+	pll->sdm_din = (val >> GPCPLL_CFG2_SDM_DIN_SHIFT) &
+		       MASK(GPCPLL_CFG2_SDM_DIN_WIDTH);
+}
+
+static void
+gm20b_pllg_write_mnp(struct gm20b_clk *clk, const struct gm20b_pll *pll)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+
+	nvkm_mask(device, GPCPLL_CFG2, GPCPLL_CFG2_SDM_DIN_MASK,
+		  pll->sdm_din << GPCPLL_CFG2_SDM_DIN_SHIFT);
+	gk20a_pllg_write_mnp(&clk->base, &pll->base);
+}
+
+/*
+ * Determine DFS_COEFF for the requested voltage. Always select external
+ * calibration override equal to the voltage, and set maximum detection
+ * limit "0" (to make sure that PLL output remains under F/V curve when
+ * voltage increases).
+ */
+static void
+gm20b_dvfs_calc_det_coeff(struct gm20b_clk *clk, s32 uv,
+			  struct gm20b_clk_dvfs *dvfs)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	const struct gm20b_clk_dvfs_params *p = clk->dvfs_params;
+	u32 coeff;
+	/* Work with mv as uv would likely trigger an overflow */
+	s32 mv = DIV_ROUND_CLOSEST(uv, 1000);
+
+	/* coeff = slope * voltage + offset */
+	coeff = DIV_ROUND_CLOSEST(mv * p->coeff_slope, 1000) + p->coeff_offs;
+	coeff = DIV_ROUND_CLOSEST(coeff, 1000);
+	dvfs->dfs_coeff = min_t(u32, coeff, MASK(GPCPLL_DVFS0_DFS_COEFF_WIDTH));
+
+	dvfs->dfs_ext_cal = DIV_ROUND_CLOSEST(uv - clk->uvdet_offs,
+					     clk->uvdet_slope);
+	/* should never happen */
+	if (abs(dvfs->dfs_ext_cal) >= BIT(DFS_DET_RANGE))
+		nvkm_error(subdev, "dfs_ext_cal overflow!\n");
+
+	dvfs->dfs_det_max = 0;
+
+	nvkm_debug(subdev, "%s uv: %d coeff: %x, ext_cal: %d, det_max: %d\n",
+		   __func__, uv, dvfs->dfs_coeff, dvfs->dfs_ext_cal,
+		   dvfs->dfs_det_max);
+}
+
+/*
+ * Solve equation for integer and fractional part of the effective NDIV:
+ *
+ * n_eff = n_int + 1/2 + (SDM_DIN / 2^(SDM_DIN_RANGE + 1)) +
+ *         (DVFS_COEFF * DVFS_DET_DELTA) / 2^DFS_DET_RANGE
+ *
+ * The SDM_DIN LSB is finally shifted out, since it is not accessible by sw.
+ */
+static void
+gm20b_dvfs_calc_ndiv(struct gm20b_clk *clk, u32 n_eff, u32 *n_int, u32 *sdm_din)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	const struct gk20a_clk_pllg_params *p = clk->base.params;
+	u32 n;
+	s32 det_delta;
+	u32 rem, rem_range;
+
+	/* calculate current ext_cal and subtract previous one */
+	det_delta = DIV_ROUND_CLOSEST(((s32)clk->uv) - clk->uvdet_offs,
+				      clk->uvdet_slope);
+	det_delta -= clk->dvfs.dfs_ext_cal;
+	det_delta = min(det_delta, clk->dvfs.dfs_det_max);
+	det_delta *= clk->dvfs.dfs_coeff;
+
+	/* integer part of n */
+	n = (n_eff << DFS_DET_RANGE) - det_delta;
+	/* should never happen! */
+	if (n <= 0) {
+		nvkm_error(subdev, "ndiv <= 0 - setting to 1...\n");
+		n = 1 << DFS_DET_RANGE;
+	}
+	if (n >> DFS_DET_RANGE > p->max_n) {
+		nvkm_error(subdev, "ndiv > max_n - setting to max_n...\n");
+		n = p->max_n << DFS_DET_RANGE;
+	}
+	*n_int = n >> DFS_DET_RANGE;
+
+	/* fractional part of n */
+	rem = ((u32)n) & MASK(DFS_DET_RANGE);
+	rem_range = SDM_DIN_RANGE + 1 - DFS_DET_RANGE;
+	/* subtract 2^SDM_DIN_RANGE to account for the 1/2 of the equation */
+	rem = (rem << rem_range) - BIT(SDM_DIN_RANGE);
+	/* lose 8 LSB and clip - sdm_din only keeps the most significant byte */
+	*sdm_din = (rem >> BITS_PER_BYTE) & MASK(GPCPLL_CFG2_SDM_DIN_WIDTH);
+
+	nvkm_debug(subdev, "%s n_eff: %d, n_int: %d, sdm_din: %d\n", __func__,
+		   n_eff, *n_int, *sdm_din);
+}
+
+static int
+gm20b_pllg_slide(struct gm20b_clk *clk, u32 n)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct gm20b_pll pll;
+	u32 n_int, sdm_din;
+	int ret = 0;
+
+	/* calculate the new n_int/sdm_din for this n/uv */
+	gm20b_dvfs_calc_ndiv(clk, n, &n_int, &sdm_din);
+
+	/* get old coefficients */
+	gm20b_pllg_read_mnp(clk, &pll);
+	/* do nothing if NDIV is the same */
+	if (n_int == pll.base.n && sdm_din == pll.sdm_din)
+		return 0;
+
+	/* pll slowdown mode */
+	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
+		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT),
+		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT));
+
+	/* new ndiv ready for ramp */
+	/* in DVFS mode SDM is updated via "new" field */
+	nvkm_mask(device, GPCPLL_CFG2, GPCPLL_CFG2_SDM_DIN_NEW_MASK,
+		  sdm_din << GPCPLL_CFG2_SDM_DIN_NEW_SHIFT);
+	pll.base.n = n_int;
+	udelay(1);
+	gk20a_pllg_write_mnp(&clk->base, &pll.base);
+
+	/* dynamic ramp to new ndiv */
+	udelay(1);
+	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
+		  BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT),
+		  BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT));
+
+	/* wait for ramping to complete */
+	if (nvkm_wait_usec(device, 500, GPC_BCAST_NDIV_SLOWDOWN_DEBUG,
+		GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK,
+		GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK) < 0)
+		ret = -ETIMEDOUT;
+
+	/* in DVFS mode complete SDM update */
+	nvkm_mask(device, GPCPLL_CFG2, GPCPLL_CFG2_SDM_DIN_MASK,
+		  sdm_din << GPCPLL_CFG2_SDM_DIN_SHIFT);
+
+	/* exit slowdown mode */
+	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
+		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT) |
+		BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT), 0);
+	nvkm_rd32(device, GPCPLL_NDIV_SLOWDOWN);
+
+	return ret;
+}
+
+static int
+gm20b_pllg_enable(struct gm20b_clk *clk)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, GPCPLL_CFG_ENABLE);
+	nvkm_rd32(device, GPCPLL_CFG);
+
+	/* In DVFS mode lock cannot be used - so just delay */
+	udelay(40);
+
+	/* set SYNC_MODE for glitchless switch out of bypass */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_SYNC_MODE,
+		       GPCPLL_CFG_SYNC_MODE);
+	nvkm_rd32(device, GPCPLL_CFG);
+
+	/* switch to VCO mode */
+	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT),
+		  BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+
+	return 0;
+}
+
+static void
+gm20b_pllg_disable(struct gm20b_clk *clk)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+
+	/* put PLL in bypass before disabling it */
+	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT), 0);
+
+	/* clear SYNC_MODE before disabling PLL */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_SYNC_MODE, 0);
+
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, 0);
+	nvkm_rd32(device, GPCPLL_CFG);
+}
+
+static int
+gm20b_pllg_program_mnp(struct gm20b_clk *clk, const struct gk20a_pll *pll)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct gm20b_pll cur_pll;
+	u32 n_int, sdm_din;
+	/* if we only change pdiv, we can do a glitchless transition */
+	bool pdiv_only;
+	int ret;
+
+	gm20b_dvfs_calc_ndiv(clk, pll->n, &n_int, &sdm_din);
+	gm20b_pllg_read_mnp(clk, &cur_pll);
+	pdiv_only = cur_pll.base.n == n_int && cur_pll.sdm_din == sdm_din &&
+		    cur_pll.base.m == pll->m;
+
+	/* need full sequence if clock not enabled yet */
+	if (!gk20a_pllg_is_enabled(&clk->base))
+		pdiv_only = false;
+
+	/* split VCO-to-bypass jump in half by setting out divider 1:2 */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV2 << GPC2CLK_OUT_VCODIV_SHIFT);
+	/* Intentional 2nd write to assure linear divider operation */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV2 << GPC2CLK_OUT_VCODIV_SHIFT);
+	nvkm_rd32(device, GPC2CLK_OUT);
+	udelay(2);
+
+	if (pdiv_only) {
+		u32 old = cur_pll.base.pl;
+		u32 new = pll->pl;
+
+		/*
+		 * we can do a glitchless transition only if the old and new PL
+		 * parameters share at least one bit set to 1. If this is not
+		 * the case, calculate and program an interim PL that will allow
+		 * us to respect that rule.
+		 */
+		if ((old & new) == 0) {
+			cur_pll.base.pl = min(old | BIT(ffs(new) - 1),
+					      new | BIT(ffs(old) - 1));
+			gk20a_pllg_write_mnp(&clk->base, &cur_pll.base);
+		}
+
+		cur_pll.base.pl = new;
+		gk20a_pllg_write_mnp(&clk->base, &cur_pll.base);
+	} else {
+		/* disable before programming if more than pdiv changes */
+		gm20b_pllg_disable(clk);
+
+		cur_pll.base = *pll;
+		cur_pll.base.n = n_int;
+		cur_pll.sdm_din = sdm_din;
+		gm20b_pllg_write_mnp(clk, &cur_pll);
+
+		ret = gm20b_pllg_enable(clk);
+		if (ret)
+			return ret;
+	}
+
+	/* restore out divider 1:1 */
+	udelay(2);
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT);
+	/* Intentional 2nd write to assure linear divider operation */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT);
+	nvkm_rd32(device, GPC2CLK_OUT);
+
+	return 0;
+}
+
+static int
+gm20b_pllg_program_mnp_slide(struct gm20b_clk *clk, const struct gk20a_pll *pll)
+{
+	struct gk20a_pll cur_pll;
+	int ret;
+
+	if (gk20a_pllg_is_enabled(&clk->base)) {
+		gk20a_pllg_read_mnp(&clk->base, &cur_pll);
+
+		/* just do NDIV slide if there is no change to M and PL */
+		if (pll->m == cur_pll.m && pll->pl == cur_pll.pl)
+			return gm20b_pllg_slide(clk, pll->n);
+
+		/* slide down to current NDIV_LO */
+		cur_pll.n = gk20a_pllg_n_lo(&clk->base, &cur_pll);
+		ret = gm20b_pllg_slide(clk, cur_pll.n);
+		if (ret)
+			return ret;
+	}
+
+	/* program MNP with the new clock parameters and new NDIV_LO */
+	cur_pll = *pll;
+	cur_pll.n = gk20a_pllg_n_lo(&clk->base, &cur_pll);
+	ret = gm20b_pllg_program_mnp(clk, &cur_pll);
+	if (ret)
+		return ret;
+
+	/* slide up to new NDIV */
+	return gm20b_pllg_slide(clk, pll->n);
+}
+
+static int
+gm20b_clk_calc(struct nvkm_clk *base, struct nvkm_cstate *cstate)
+{
+	struct gm20b_clk *clk = gm20b_clk(base);
+	struct nvkm_subdev *subdev = &base->subdev;
+	struct nvkm_volt *volt = base->subdev.device->volt;
+	int ret;
+
+	ret = gk20a_pllg_calc_mnp(&clk->base, cstate->domain[nv_clk_src_gpc] *
+					     GK20A_CLK_GPC_MDIV, &clk->new_pll);
+	if (ret)
+		return ret;
+
+	clk->new_uv = volt->vid[cstate->voltage].uv;
+	gm20b_dvfs_calc_det_coeff(clk, clk->new_uv, &clk->new_dvfs);
+
+	nvkm_debug(subdev, "%s uv: %d uv\n", __func__, clk->new_uv);
+
+	return 0;
+}
+
+/*
+ * Compute PLL parameters that are always safe for the current voltage
+ */
+static void
+gm20b_dvfs_calc_safe_pll(struct gm20b_clk *clk, struct gk20a_pll *pll)
+{
+	u32 rate = gk20a_pllg_calc_rate(&clk->base, pll) / KHZ;
+	u32 parent_rate = clk->base.parent_rate / KHZ;
+	u32 nmin, nsafe;
+
+	/* remove a safe margin of 10% */
+	if (rate > clk->safe_fmax_vmin)
+		rate = rate * (100 - 10) / 100;
+
+	/* gpc2clk */
+	rate *= 2;
+
+	nmin = DIV_ROUND_UP(pll->m * clk->base.params->min_vco, parent_rate);
+	nsafe = pll->m * rate / (clk->base.parent_rate);
+
+	if (nsafe < nmin) {
+		pll->pl = DIV_ROUND_UP(nmin * parent_rate, pll->m * rate);
+		nsafe = nmin;
+	}
+
+	pll->n = nsafe;
+}
+
+static void
+gm20b_dvfs_program_coeff(struct gm20b_clk *clk, u32 coeff)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+
+	/* strobe to read external DFS coefficient */
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT);
+
+	nvkm_mask(device, GPCPLL_DVFS0, GPCPLL_DVFS0_DFS_COEFF_MASK,
+		  coeff << GPCPLL_DVFS0_DFS_COEFF_SHIFT);
+
+	udelay(1);
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT, 0);
+}
+
+static void
+gm20b_dvfs_program_ext_cal(struct gm20b_clk *clk, u32 dfs_det_cal)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+	u32 val;
+
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2, MASK(DFS_DET_RANGE + 1),
+		  dfs_det_cal);
+	udelay(1);
+
+	val = nvkm_rd32(device, GPCPLL_DVFS1);
+	if (!(val & BIT(25))) {
+		/* Use external value to overwrite calibration value */
+		val |= BIT(25) | BIT(16);
+		nvkm_wr32(device, GPCPLL_DVFS1, val);
+	}
+}
+
+static void
+gm20b_dvfs_program_dfs_detection(struct gm20b_clk *clk,
+				 struct gm20b_clk_dvfs *dvfs)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+
+	/* strobe to read external DFS coefficient */
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT);
+
+	nvkm_mask(device, GPCPLL_DVFS0,
+		  GPCPLL_DVFS0_DFS_COEFF_MASK | GPCPLL_DVFS0_DFS_DET_MAX_MASK,
+		  dvfs->dfs_coeff << GPCPLL_DVFS0_DFS_COEFF_SHIFT |
+		  dvfs->dfs_det_max << GPCPLL_DVFS0_DFS_DET_MAX_SHIFT);
+
+	udelay(1);
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT, 0);
+
+	gm20b_dvfs_program_ext_cal(clk, dvfs->dfs_ext_cal);
+}
+
+static int
+gm20b_clk_prog(struct nvkm_clk *base)
+{
+	struct gm20b_clk *clk = gm20b_clk(base);
+	u32 cur_freq;
+	int ret;
+
+	/* No change in DVFS settings? */
+	if (clk->uv == clk->new_uv)
+		goto prog;
+
+	/*
+	 * Interim step for changing DVFS detection settings: low enough
+	 * frequency to be safe at at DVFS coeff = 0.
+	 *
+	 * 1. If voltage is increasing:
+	 * - safe frequency target matches the lowest - old - frequency
+	 * - DVFS settings are still old
+	 * - Voltage already increased to new level by volt, but maximum
+	 *   detection limit assures PLL output remains under F/V curve
+	 *
+	 * 2. If voltage is decreasing:
+	 * - safe frequency target matches the lowest - new - frequency
+	 * - DVFS settings are still old
+	 * - Voltage is also old, it will be lowered by volt afterwards
+	 *
+	 * Interim step can be skipped if old frequency is below safe minimum,
+	 * i.e., it is low enough to be safe at any voltage in operating range
+	 * with zero DVFS coefficient.
+	 */
+	cur_freq = nvkm_clk_read(&clk->base.base, nv_clk_src_gpc);
+	if (cur_freq > clk->safe_fmax_vmin) {
+		struct gk20a_pll pll_safe;
+
+		if (clk->uv < clk->new_uv)
+			/* voltage will raise: safe frequency is current one */
+			pll_safe = clk->base.pll;
+		else
+			/* voltage will drop: safe frequency is new one */
+			pll_safe = clk->new_pll;
+
+		gm20b_dvfs_calc_safe_pll(clk, &pll_safe);
+		ret = gm20b_pllg_program_mnp_slide(clk, &pll_safe);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * DVFS detection settings transition:
+	 * - Set DVFS coefficient zero
+	 * - Set calibration level to new voltage
+	 * - Set DVFS coefficient to match new voltage
+	 */
+	gm20b_dvfs_program_coeff(clk, 0);
+	gm20b_dvfs_program_ext_cal(clk, clk->new_dvfs.dfs_ext_cal);
+	gm20b_dvfs_program_coeff(clk, clk->new_dvfs.dfs_coeff);
+	gm20b_dvfs_program_dfs_detection(clk, &clk->new_dvfs);
+
+prog:
+	clk->uv = clk->new_uv;
+	clk->dvfs = clk->new_dvfs;
+	clk->base.pll = clk->new_pll;
+
+	return gm20b_pllg_program_mnp_slide(clk, &clk->base.pll);
+}
+
 static struct nvkm_pstate
 gm20b_pstates[] = {
 	{
@@ -133,9 +714,99 @@ gm20b_pstates[] = {
 			.voltage = 12,
 		},
 	},
-
 };
 
+static void
+gm20b_clk_fini(struct nvkm_clk *base)
+{
+	struct nvkm_device *device = base->subdev.device;
+	struct gm20b_clk *clk = gm20b_clk(base);
+
+	/* slide to VCO min */
+	if (gk20a_pllg_is_enabled(&clk->base)) {
+		struct gk20a_pll pll;
+		u32 n_lo;
+
+		gk20a_pllg_read_mnp(&clk->base, &pll);
+		n_lo = gk20a_pllg_n_lo(&clk->base, &pll);
+		gm20b_pllg_slide(clk, n_lo);
+	}
+
+	gm20b_pllg_disable(clk);
+
+	/* set IDDQ */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_IDDQ, 1);
+}
+
+static int
+gm20b_clk_init_dvfs(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	struct nvkm_device *device = subdev->device;
+	bool fused = clk->uvdet_offs && clk->uvdet_slope;
+	static const s32 ADC_SLOPE_UV = 10000; /* default ADC detection slope */
+	u32 data;
+	int ret;
+
+	/* Enable NA DVFS */
+	nvkm_mask(device, GPCPLL_DVFS1, GPCPLL_DVFS1_EN_DFS_BIT,
+		  GPCPLL_DVFS1_EN_DFS_BIT);
+
+	/* Set VCO_CTRL */
+	if (clk->dvfs_params->vco_ctrl)
+		nvkm_mask(device, GPCPLL_CFG3, GPCPLL_CFG3_VCO_CTRL_MASK,
+		      clk->dvfs_params->vco_ctrl << GPCPLL_CFG3_VCO_CTRL_SHIFT);
+
+	if (fused) {
+		/* Start internal calibration, but ignore results */
+		nvkm_mask(device, GPCPLL_DVFS1, GPCPLL_DVFS1_EN_DFS_CAL_BIT,
+			  GPCPLL_DVFS1_EN_DFS_CAL_BIT);
+
+		/* got uvdev parameters from fuse, skip calibration */
+		goto calibrated;
+	}
+
+	/*
+	 * If calibration parameters are not fused, start internal calibration,
+	 * wait for completion, and use results along with default slope to
+	 * calculate ADC offset during boot.
+	 */
+	nvkm_mask(device, GPCPLL_DVFS1, GPCPLL_DVFS1_EN_DFS_CAL_BIT,
+			  GPCPLL_DVFS1_EN_DFS_CAL_BIT);
+
+	/* Wait for internal calibration done (spec < 2us). */
+	ret = nvkm_wait_usec(device, 10, GPCPLL_DVFS1,
+			     GPCPLL_DVFS1_DFS_CAL_DONE_BIT,
+			     GPCPLL_DVFS1_DFS_CAL_DONE_BIT);
+	if (ret < 0) {
+		nvkm_error(subdev, "GPCPLL calibration timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	data = nvkm_rd32(device, GPCPLL_CFG3) >>
+			 GPCPLL_CFG3_PLL_DFS_TESTOUT_SHIFT;
+	data &= MASK(GPCPLL_CFG3_PLL_DFS_TESTOUT_WIDTH);
+
+	clk->uvdet_slope = ADC_SLOPE_UV;
+	clk->uvdet_offs = ((s32)clk->uv) - data * ADC_SLOPE_UV;
+
+	nvkm_debug(subdev, "calibrated DVFS parameters: offs %d, slope %d\n",
+		   clk->uvdet_offs, clk->uvdet_slope);
+
+calibrated:
+	/* Compute and apply initial DVFS parameters */
+	gm20b_dvfs_calc_det_coeff(clk, clk->uv, &clk->dvfs);
+	gm20b_dvfs_program_coeff(clk, 0);
+	gm20b_dvfs_program_ext_cal(clk, clk->dvfs.dfs_ext_cal);
+	gm20b_dvfs_program_coeff(clk, clk->dvfs.dfs_coeff);
+	gm20b_dvfs_program_dfs_detection(clk, &clk->new_dvfs);
+
+	return 0;
+}
+
+/* Forward declaration to detect speedo >=1 in gm20b_clk_init() */
+static const struct nvkm_clk_func gm20b_clk;
+
 static int
 gm20b_clk_init(struct nvkm_clk *base)
 {
@@ -143,15 +814,56 @@ gm20b_clk_init(struct nvkm_clk *base)
 	struct nvkm_subdev *subdev = &clk->base.subdev;
 	struct nvkm_device *device = subdev->device;
 	int ret;
+	u32 data;
+
+	/* get out from IDDQ */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_IDDQ, 0);
+	nvkm_rd32(device, GPCPLL_CFG);
+	udelay(5);
+
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_INIT_MASK,
+		  GPC2CLK_OUT_INIT_VAL);
 
 	/* Set the global bypass control to VCO */
 	nvkm_mask(device, BYPASSCTRL_SYS,
 	       MASK(BYPASSCTRL_SYS_GPCPLL_WIDTH) << BYPASSCTRL_SYS_GPCPLL_SHIFT,
 	       0);
 
+	ret = gk20a_clk_setup_slide(clk);
+	if (ret)
+		return ret;
+
+	/* If not fused, set RAM SVOP PDP data 0x2, and enable fuse override */
+	data = nvkm_rd32(device, 0x021944);
+	if (!(data & 0x3)) {
+		data |= 0x2;
+		nvkm_wr32(device, 0x021944, data);
+
+		data = nvkm_rd32(device, 0x021948);
+		data |=  0x1;
+		nvkm_wr32(device, 0x021948, data);
+	}
+
+	/* Disable idle slow down  */
+	nvkm_mask(device, 0x20160, 0x003f0000, 0x0);
+
+	/* speedo >= 1? */
+	if (clk->base.func == &gm20b_clk) {
+		struct gm20b_clk *_clk = gm20b_clk(base);
+		struct nvkm_volt *volt = device->volt;
+
+		/* Get current voltage */
+		_clk->uv = nvkm_volt_get(volt);
+
+		/* Initialize DVFS */
+		ret = gm20b_clk_init_dvfs(_clk);
+		if (ret)
+			return ret;
+	}
+
 	/* Start with lowest frequency */
 	base->func->calc(base, &base->func->pstates[0].base);
-	ret = base->func->prog(&clk->base);
+	ret = base->func->prog(base);
 	if (ret) {
 		nvkm_error(subdev, "cannot initialize clock\n");
 		return ret;
@@ -169,6 +881,7 @@ gm20b_clk_speedo0 = {
 	.prog = gk20a_clk_prog,
 	.tidy = gk20a_clk_tidy,
 	.pstates = gm20b_pstates,
+	/* Speedo 0 only supports 12 voltages */
 	.nr_pstates = ARRAY_SIZE(gm20b_pstates) - 1,
 	.domains = {
 		{ nv_clk_src_crystal, 0xff },
@@ -177,8 +890,26 @@ gm20b_clk_speedo0 = {
 	},
 };
 
-int
-gm20b_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
+static const struct nvkm_clk_func
+gm20b_clk = {
+	.init = gm20b_clk_init,
+	.fini = gm20b_clk_fini,
+	.read = gk20a_clk_read,
+	.calc = gm20b_clk_calc,
+	.prog = gm20b_clk_prog,
+	.tidy = gk20a_clk_tidy,
+	.pstates = gm20b_pstates,
+	.nr_pstates = ARRAY_SIZE(gm20b_pstates),
+	.domains = {
+		{ nv_clk_src_crystal, 0xff },
+		{ nv_clk_src_gpc, 0xff, 0, "core", GK20A_CLK_GPC_MDIV },
+		{ nv_clk_src_max },
+	},
+};
+
+static int
+gm20b_clk_new_speedo0(struct nvkm_device *device, int index,
+		      struct nvkm_clk **pclk)
 {
 	struct gk20a_clk *clk;
 	int ret;
@@ -188,11 +919,156 @@ gm20b_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
 		return -ENOMEM;
 	*pclk = &clk->base;
 
-	ret = _gk20a_clk_ctor(device, index, &gm20b_clk_speedo0,
-			      &gm20b_pllg_params, clk);
+	ret = gk20a_clk_ctor(device, index, &gm20b_clk_speedo0,
+			     &gm20b_pllg_params, clk);
 
 	clk->pl_to_div = pl_to_div;
 	clk->div_to_pl = div_to_pl;
 
 	return ret;
 }
+
+/* FUSE register */
+#define FUSE_RESERVED_CALIB0	0x204
+#define FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_SHIFT	0
+#define FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_WIDTH	4
+#define FUSE_RESERVED_CALIB0_INTERCEPT_INT_SHIFT	4
+#define FUSE_RESERVED_CALIB0_INTERCEPT_INT_WIDTH	10
+#define FUSE_RESERVED_CALIB0_SLOPE_FRAC_SHIFT		14
+#define FUSE_RESERVED_CALIB0_SLOPE_FRAC_WIDTH		10
+#define FUSE_RESERVED_CALIB0_SLOPE_INT_SHIFT		24
+#define FUSE_RESERVED_CALIB0_SLOPE_INT_WIDTH		6
+#define FUSE_RESERVED_CALIB0_FUSE_REV_SHIFT		30
+#define FUSE_RESERVED_CALIB0_FUSE_REV_WIDTH		2
+
+static int
+gm20b_clk_init_fused_params(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	u32 val = 0;
+	u32 rev = 0;
+
+#if IS_ENABLED(CONFIG_ARCH_TEGRA)
+	tegra_fuse_readl(FUSE_RESERVED_CALIB0, &val);
+	rev = (val >> FUSE_RESERVED_CALIB0_FUSE_REV_SHIFT) &
+	      MASK(FUSE_RESERVED_CALIB0_FUSE_REV_WIDTH);
+#endif
+
+	/* No fused parameters, we will calibrate later */
+	if (rev == 0)
+		return -EINVAL;
+
+	/* Integer part in mV + fractional part in uV */
+	clk->uvdet_slope = ((val >> FUSE_RESERVED_CALIB0_SLOPE_INT_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_SLOPE_INT_WIDTH)) * 1000 +
+			((val >> FUSE_RESERVED_CALIB0_SLOPE_FRAC_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_SLOPE_FRAC_WIDTH));
+
+	/* Integer part in mV + fractional part in 100uV */
+	clk->uvdet_offs = ((val >> FUSE_RESERVED_CALIB0_INTERCEPT_INT_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_INTERCEPT_INT_WIDTH)) * 1000 +
+			((val >> FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_SHIFT) &
+			 MASK(FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_WIDTH)) * 100;
+
+	nvkm_debug(subdev, "fused calibration data: slope %d, offs %d\n",
+		   clk->uvdet_slope, clk->uvdet_offs);
+	return 0;
+}
+
+static int
+gm20b_clk_init_safe_fmax(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	struct nvkm_volt *volt = subdev->device->volt;
+	struct nvkm_pstate *pstates = clk->base.base.func->pstates;
+	int nr_pstates = clk->base.base.func->nr_pstates;
+	int vmin, id = 0;
+	u32 fmax = 0;
+	int i;
+
+	/* find lowest voltage we can use */
+	vmin = volt->vid[0].uv;
+	for (i = 1; i < volt->vid_nr; i++) {
+		if (volt->vid[i].uv <= vmin) {
+			vmin = volt->vid[i].uv;
+			id = volt->vid[i].vid;
+		}
+	}
+
+	/* find max frequency at this voltage */
+	for (i = 0; i < nr_pstates; i++)
+		if (pstates[i].base.voltage == id)
+			fmax = max(fmax,
+				   pstates[i].base.domain[nv_clk_src_gpc]);
+
+	if (!fmax) {
+		nvkm_error(subdev, "failed to evaluate safe fmax\n");
+		return -EINVAL;
+	}
+
+	/* we are safe at 90% of the max frequency */
+	clk->safe_fmax_vmin = fmax * (100 - 10) / 100;
+	nvkm_debug(subdev, "safe fmax @ vmin = %u Khz\n", clk->safe_fmax_vmin);
+
+	return 0;
+}
+
+int
+gm20b_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
+{
+	struct nvkm_device_tegra *tdev = device->func->tegra(device);
+	struct gm20b_clk *clk;
+	struct nvkm_subdev *subdev;
+	struct gk20a_clk_pllg_params *clk_params;
+	int ret;
+
+	/* Speedo 0 GPUs cannot use noise-aware PLL */
+	if (tdev->gpu_speedo_id == 0)
+		return gm20b_clk_new_speedo0(device, index, pclk);
+
+	/* Speedo >= 1, use NAPLL */
+	clk = kzalloc(sizeof(*clk) + sizeof(*clk_params), GFP_KERNEL);
+	if (!clk)
+		return -ENOMEM;
+	*pclk = &clk->base.base;
+	subdev = &clk->base.base.subdev;
+
+	/* duplicate the clock parameters since we will patch them below */
+	clk_params = (void *) (clk + 1);
+	*clk_params = gm20b_pllg_params;
+	ret = gk20a_clk_ctor(device, index, &gm20b_clk, clk_params,
+			     &clk->base);
+	if (ret)
+		return ret;
+
+	/*
+	 * NAPLL can only work with max_u, clamp the m range so
+	 * gk20a_pllg_calc_mnp always uses it
+	 */
+	clk_params->max_m = clk_params->min_m = DIV_ROUND_UP(clk_params->max_u,
+						(clk->base.parent_rate / KHZ));
+	if (clk_params->max_m == 0) {
+		nvkm_warn(subdev, "cannot use NAPLL, using legacy clock...\n");
+		kfree(clk);
+		return gm20b_clk_new_speedo0(device, index, pclk);
+	}
+
+	clk->base.pl_to_div = pl_to_div;
+	clk->base.div_to_pl = div_to_pl;
+
+	clk->dvfs_params = &gm20b_dvfs_params;
+
+	ret = gm20b_clk_init_fused_params(clk);
+	/*
+	 * we will calibrate during init - should never happen on
+	 * prod parts
+	 */
+	if (ret)
+		nvkm_warn(subdev, "no fused calibration parameters\n");
+
+	ret = gm20b_clk_init_safe_fmax(clk);
+	if (ret)
+		return ret;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
index 842d5de96d73..edcc157e6ac8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
@@ -24,6 +24,8 @@ nvkm-y += nvkm/subdev/fb/gk104.o
 nvkm-y += nvkm/subdev/fb/gk20a.o
 nvkm-y += nvkm/subdev/fb/gm107.o
 nvkm-y += nvkm/subdev/fb/gm200.o
+nvkm-y += nvkm/subdev/fb/gp100.o
+nvkm-y += nvkm/subdev/fb/gp104.o
 
 nvkm-y += nvkm/subdev/fb/ram.o
 nvkm-y += nvkm/subdev/fb/ramnv04.o
@@ -41,6 +43,7 @@ nvkm-y += nvkm/subdev/fb/rammcp77.o
 nvkm-y += nvkm/subdev/fb/ramgf100.o
 nvkm-y += nvkm/subdev/fb/ramgk104.o
 nvkm-y += nvkm/subdev/fb/ramgm107.o
+nvkm-y += nvkm/subdev/fb/ramgp100.o
 nvkm-y += nvkm/subdev/fb/sddr2.o
 nvkm-y += nvkm/subdev/fb/sddr3.o
 nvkm-y += nvkm/subdev/fb/gddr3.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
index ce90242b8cce..a7049c041594 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
@@ -25,6 +25,7 @@
 #include "ram.h"
 
 #include <core/memory.h>
+#include <core/option.h>
 #include <subdev/bios.h>
 #include <subdev/bios/M0203.h>
 #include <engine/gr.h>
@@ -134,6 +135,10 @@ nvkm_fb_init(struct nvkm_subdev *subdev)
 
 	if (fb->func->init)
 		fb->func->init(fb);
+	if (fb->func->init_page)
+		fb->func->init_page(fb);
+	if (fb->func->init_unkn)
+		fb->func->init_unkn(fb);
 	return 0;
 }
 
@@ -171,6 +176,7 @@ nvkm_fb_ctor(const struct nvkm_fb_func *func, struct nvkm_device *device,
 	nvkm_subdev_ctor(&nvkm_fb, device, index, &fb->subdev);
 	fb->func = func;
 	fb->tile.regions = fb->func->tile.regions;
+	fb->page = nvkm_longopt(device->cfgopt, "NvFbBigPage", 0);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
index e649ead5ccfc..76433cc66fff 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
@@ -72,6 +72,22 @@ gf100_fb_oneinit(struct nvkm_fb *fb)
 }
 
 void
+gf100_fb_init_page(struct nvkm_fb *fb)
+{
+	struct nvkm_device *device = fb->subdev.device;
+	switch (fb->page) {
+	case 16:
+		nvkm_mask(device, 0x100c80, 0x00000001, 0x00000001);
+		break;
+	case 17:
+	default:
+		nvkm_mask(device, 0x100c80, 0x00000001, 0x00000000);
+		fb->page = 17;
+		break;
+	}
+}
+
+void
 gf100_fb_init(struct nvkm_fb *base)
 {
 	struct gf100_fb *fb = gf100_fb(base);
@@ -79,8 +95,6 @@ gf100_fb_init(struct nvkm_fb *base)
 
 	if (fb->r100c10_page)
 		nvkm_wr32(device, 0x100c10, fb->r100c10 >> 8);
-
-	nvkm_mask(device, 0x100c80, 0x00000001, 0x00000000); /* 128KiB lpg */
 }
 
 void *
@@ -125,6 +139,7 @@ gf100_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
 	.init = gf100_fb_init,
+	.init_page = gf100_fb_init_page,
 	.intr = gf100_fb_intr,
 	.ram_new = gf100_ram_new,
 	.memtype_valid = gf100_fb_memtype_valid,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h
index 2160e5a39c9a..449f431644b3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h
@@ -14,4 +14,6 @@ int gf100_fb_new_(const struct nvkm_fb_func *, struct nvkm_device *,
 void *gf100_fb_dtor(struct nvkm_fb *);
 void gf100_fb_init(struct nvkm_fb *);
 void gf100_fb_intr(struct nvkm_fb *);
+
+void gp100_fb_init(struct nvkm_fb *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c
index b41f0f70038c..4245e2e6e604 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c
@@ -29,6 +29,7 @@ gk104_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
 	.init = gf100_fb_init,
+	.init_page = gf100_fb_init_page,
 	.intr = gf100_fb_intr,
 	.ram_new = gk104_ram_new,
 	.memtype_valid = gf100_fb_memtype_valid,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c
index 7306f7dfc3b9..f815fe2bbf08 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c
@@ -27,7 +27,6 @@ static void
 gk20a_fb_init(struct nvkm_fb *fb)
 {
 	struct nvkm_device *device = fb->subdev.device;
-	nvkm_mask(device, 0x100c80, 0x00000001, 0x00000000); /* 128KiB lpg */
 	nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(fb->mmu_wr) >> 8);
 	nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(fb->mmu_rd) >> 8);
 }
@@ -36,6 +35,7 @@ static const struct nvkm_fb_func
 gk20a_fb = {
 	.oneinit = gf100_fb_oneinit,
 	.init = gk20a_fb_init,
+	.init_page = gf100_fb_init_page,
 	.memtype_valid = gf100_fb_memtype_valid,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c
index 4869fdb753c9..db699025f546 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c
@@ -29,6 +29,7 @@ gm107_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
 	.init = gf100_fb_init,
+	.init_page = gf100_fb_init_page,
 	.intr = gf100_fb_intr,
 	.ram_new = gm107_ram_new,
 	.memtype_valid = gf100_fb_memtype_valid,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
index 44f5716f64d8..62f653240be3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
@@ -26,6 +26,24 @@
 
 #include <core/memory.h>
 
+void
+gm200_fb_init_page(struct nvkm_fb *fb)
+{
+	struct nvkm_device *device = fb->subdev.device;
+	switch (fb->page) {
+	case 16:
+		nvkm_mask(device, 0x100c80, 0x00000801, 0x00000001);
+		break;
+	case 17:
+		nvkm_mask(device, 0x100c80, 0x00000801, 0x00000000);
+		break;
+	default:
+		nvkm_mask(device, 0x100c80, 0x00000800, 0x00000800);
+		fb->page = 0;
+		break;
+	}
+}
+
 static void
 gm200_fb_init(struct nvkm_fb *base)
 {
@@ -48,6 +66,7 @@ gm200_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
 	.init = gm200_fb_init,
+	.init_page = gm200_fb_init_page,
 	.intr = gf100_fb_intr,
 	.ram_new = gm107_ram_new,
 	.memtype_valid = gf100_fb_memtype_valid,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c
new file mode 100644
index 000000000000..98474aec1921
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "gf100.h"
+#include "ram.h"
+
+#include <core/memory.h>
+
+static void
+gp100_fb_init_unkn(struct nvkm_fb *base)
+{
+	struct nvkm_device *device = gf100_fb(base)->base.subdev.device;
+	nvkm_wr32(device, 0x1fac80, nvkm_rd32(device, 0x100c80));
+	nvkm_wr32(device, 0x1facc4, nvkm_rd32(device, 0x100cc4));
+	nvkm_wr32(device, 0x1facc8, nvkm_rd32(device, 0x100cc8));
+	nvkm_wr32(device, 0x1faccc, nvkm_rd32(device, 0x100ccc));
+}
+
+void
+gp100_fb_init(struct nvkm_fb *base)
+{
+	struct gf100_fb *fb = gf100_fb(base);
+	struct nvkm_device *device = fb->base.subdev.device;
+
+	if (fb->r100c10_page)
+		nvkm_wr32(device, 0x100c10, fb->r100c10 >> 8);
+
+	nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(fb->base.mmu_wr) >> 8);
+	nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(fb->base.mmu_rd) >> 8);
+	nvkm_mask(device, 0x100cc4, 0x00060000,
+		  max(nvkm_memory_size(fb->base.mmu_rd) >> 16, (u64)2) << 17);
+}
+
+static const struct nvkm_fb_func
+gp100_fb = {
+	.dtor = gf100_fb_dtor,
+	.oneinit = gf100_fb_oneinit,
+	.init = gp100_fb_init,
+	.init_page = gm200_fb_init_page,
+	.init_unkn = gp100_fb_init_unkn,
+	.ram_new = gp100_ram_new,
+	.memtype_valid = gf100_fb_memtype_valid,
+};
+
+int
+gp100_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb)
+{
+	return gf100_fb_new_(&gp100_fb, device, index, pfb);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp104.c
new file mode 100644
index 000000000000..92cb71861bec
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp104.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "gf100.h"
+#include "ram.h"
+
+#include <core/memory.h>
+
+static const struct nvkm_fb_func
+gp104_fb = {
+	.dtor = gf100_fb_dtor,
+	.oneinit = gf100_fb_oneinit,
+	.init = gp100_fb_init,
+	.init_page = gm200_fb_init_page,
+	.ram_new = gp100_ram_new,
+	.memtype_valid = gf100_fb_memtype_valid,
+};
+
+int
+gp104_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb)
+{
+	return gf100_fb_new_(&gp104_fb, device, index, pfb);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
index d97d640e60a0..e905d44fa1d5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
@@ -8,6 +8,8 @@ struct nvkm_fb_func {
 	void *(*dtor)(struct nvkm_fb *);
 	int (*oneinit)(struct nvkm_fb *);
 	void (*init)(struct nvkm_fb *);
+	void (*init_page)(struct nvkm_fb *);
+	void (*init_unkn)(struct nvkm_fb *);
 	void (*intr)(struct nvkm_fb *);
 
 	struct {
@@ -60,5 +62,8 @@ void nv46_fb_tile_init(struct nvkm_fb *, int i, u32 addr, u32 size,
 		       u32 pitch, u32 flags, struct nvkm_fb_tile *);
 
 int gf100_fb_oneinit(struct nvkm_fb *);
+void gf100_fb_init_page(struct nvkm_fb *);
 bool gf100_fb_memtype_valid(struct nvkm_fb *, u32);
+
+void gm200_fb_init_page(struct nvkm_fb *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
index f816cbf2ced3..b9ec0ae6723a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
@@ -47,4 +47,5 @@ int mcp77_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 int gf100_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 int gk104_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 int gm107_ram_new(struct nvkm_fb *, struct nvkm_ram **);
+int gp100_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c
new file mode 100644
index 000000000000..f3be408b5e5e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright 2013 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "ram.h"
+
+#include <subdev/bios.h>
+#include <subdev/bios/init.h>
+#include <subdev/bios/rammap.h>
+
+static int
+gp100_ram_init(struct nvkm_ram *ram)
+{
+	struct nvkm_subdev *subdev = &ram->fb->subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_bios *bios = device->bios;
+	u8  ver, hdr, cnt, len, snr, ssz;
+	u32 data;
+	int i;
+
+	/* run a bunch of tables from rammap table.  there's actually
+	 * individual pointers for each rammap entry too, but, nvidia
+	 * seem to just run the last two entries' scripts early on in
+	 * their init, and never again.. we'll just run 'em all once
+	 * for now.
+	 *
+	 * i strongly suspect that each script is for a separate mode
+	 * (likely selected by 0x9a065c's lower bits?), and the
+	 * binary driver skips the one that's already been setup by
+	 * the init tables.
+	 */
+	data = nvbios_rammapTe(bios, &ver, &hdr, &cnt, &len, &snr, &ssz);
+	if (!data || hdr < 0x15)
+		return -EINVAL;
+
+	cnt  = nvbios_rd08(bios, data + 0x14); /* guess at count */
+	data = nvbios_rd32(bios, data + 0x10); /* guess u32... */
+	if (cnt) {
+		u32 save = nvkm_rd32(device, 0x9a065c) & 0x000000f0;
+		for (i = 0; i < cnt; i++, data += 4) {
+			if (i != save >> 4) {
+				nvkm_mask(device, 0x9a065c, 0x000000f0, i << 4);
+				nvbios_exec(&(struct nvbios_init) {
+						.subdev = subdev,
+						.bios = bios,
+						.offset = nvbios_rd32(bios, data),
+						.execute = 1,
+					    });
+			}
+		}
+		nvkm_mask(device, 0x9a065c, 0x000000f0, save);
+	}
+
+	nvkm_mask(device, 0x9a0584, 0x11000000, 0x00000000);
+	nvkm_wr32(device, 0x10ecc0, 0xffffffff);
+	nvkm_mask(device, 0x9a0160, 0x00000010, 0x00000010);
+	return 0;
+}
+
+static const struct nvkm_ram_func
+gp100_ram_func = {
+	.init = gp100_ram_init,
+	.get = gf100_ram_get,
+	.put = gf100_ram_put,
+};
+
+int
+gp100_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
+{
+	struct nvkm_ram *ram;
+	struct nvkm_subdev *subdev = &fb->subdev;
+	struct nvkm_device *device = subdev->device;
+	enum nvkm_ram_type type = nvkm_fb_bios_memtype(device->bios);
+	const u32 rsvd_head = ( 256 * 1024); /* vga memory */
+	const u32 rsvd_tail = (1024 * 1024); /* vbios etc */
+	u32 fbpa_num = nvkm_rd32(device, 0x022438), fbpa;
+	u32 fbio_opt = nvkm_rd32(device, 0x021c14);
+	u64 part, size = 0, comm = ~0ULL;
+	bool mixed = false;
+	int ret;
+
+	nvkm_debug(subdev, "022438: %08x\n", fbpa_num);
+	nvkm_debug(subdev, "021c14: %08x\n", fbio_opt);
+	for (fbpa = 0; fbpa < fbpa_num; fbpa++) {
+		if (!(fbio_opt & (1 << fbpa))) {
+			part = nvkm_rd32(device, 0x90020c + (fbpa * 0x4000));
+			nvkm_debug(subdev, "fbpa %02x: %lld MiB\n", fbpa, part);
+			part = part << 20;
+			if (part != comm) {
+				if (comm != ~0ULL)
+					mixed = true;
+				comm = min(comm, part);
+			}
+			size = size + part;
+		}
+	}
+
+	ret = nvkm_ram_new_(&gp100_ram_func, fb, type, size, 0, &ram);
+	*pram = ram;
+	if (ret)
+		return ret;
+
+	nvkm_mm_fini(&ram->vram);
+
+	if (mixed) {
+		ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT,
+				   ((comm * fbpa_num) - rsvd_head) >>
+				   NVKM_RAM_MM_SHIFT, 1);
+		if (ret)
+			return ret;
+
+		ret = nvkm_mm_init(&ram->vram, (0x1000000000ULL + comm) >>
+				   NVKM_RAM_MM_SHIFT,
+				   (size - (comm * fbpa_num) - rsvd_tail) >>
+				   NVKM_RAM_MM_SHIFT, 1);
+		if (ret)
+			return ret;
+	} else {
+		ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT,
+				   (size - rsvd_head - rsvd_tail) >>
+				   NVKM_RAM_MM_SHIFT, 1);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
index 323c79abe468..41bd5d0f7692 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
@@ -276,6 +276,8 @@ nvkm_iccsense_oneinit(struct nvkm_subdev *subdev)
 		struct pwr_rail_t *r = &stbl.rail[i];
 		struct nvkm_iccsense_rail *rail;
 		struct nvkm_iccsense_sensor *sensor;
+		int (*read)(struct nvkm_iccsense *,
+			    struct nvkm_iccsense_rail *);
 
 		if (!r->mode || r->resistor_mohm == 0)
 			continue;
@@ -284,31 +286,31 @@ nvkm_iccsense_oneinit(struct nvkm_subdev *subdev)
 		if (!sensor)
 			continue;
 
-		rail = kmalloc(sizeof(*rail), GFP_KERNEL);
-		if (!rail)
-			return -ENOMEM;
-
 		switch (sensor->type) {
 		case NVBIOS_EXTDEV_INA209:
 			if (r->rail != 0)
 				continue;
-			rail->read = nvkm_iccsense_ina209_read;
+			read = nvkm_iccsense_ina209_read;
 			break;
 		case NVBIOS_EXTDEV_INA219:
 			if (r->rail != 0)
 				continue;
-			rail->read = nvkm_iccsense_ina219_read;
+			read = nvkm_iccsense_ina219_read;
 			break;
 		case NVBIOS_EXTDEV_INA3221:
 			if (r->rail >= 3)
 				continue;
-			rail->read = nvkm_iccsense_ina3221_read;
+			read = nvkm_iccsense_ina3221_read;
 			break;
 		default:
 			continue;
 		}
 
+		rail = kmalloc(sizeof(*rail), GFP_KERNEL);
+		if (!rail)
+			return -ENOMEM;
 		sensor->rail_mask |= 1 << r->rail;
+		rail->read = read;
 		rail->sensor = sensor;
 		rail->idx = r->rail;
 		rail->mohm = r->resistor_mohm;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
index 932b366598aa..12d6f4f102cb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
@@ -3,3 +3,4 @@ nvkm-y += nvkm/subdev/ltc/gf100.o
 nvkm-y += nvkm/subdev/ltc/gk104.o
 nvkm-y += nvkm/subdev/ltc/gm107.o
 nvkm-y += nvkm/subdev/ltc/gm200.o
+nvkm-y += nvkm/subdev/ltc/gp100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
index c9eb677967a8..4a0fa0a9b802 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
@@ -23,7 +23,6 @@
  */
 #include "priv.h"
 
-#include <core/enum.h>
 #include <subdev/fb.h>
 #include <subdev/timer.h>
 
@@ -71,7 +70,7 @@ gf100_ltc_zbc_clear_depth(struct nvkm_ltc *ltc, int i, const u32 depth)
 	nvkm_wr32(device, 0x17ea58, depth);
 }
 
-static const struct nvkm_bitfield
+const struct nvkm_bitfield
 gf100_ltc_lts_intr_name[] = {
 	{ 0x00000001, "IDLE_ERROR_IQ" },
 	{ 0x00000002, "IDLE_ERROR_CBC" },
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
index e292f5679418..ec0a3844b2d1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
@@ -68,18 +68,22 @@ gm107_ltc_zbc_clear_depth(struct nvkm_ltc *ltc, int i, const u32 depth)
 	nvkm_wr32(device, 0x17e34c, depth);
 }
 
-static void
-gm107_ltc_lts_isr(struct nvkm_ltc *ltc, int c, int s)
+void
+gm107_ltc_intr_lts(struct nvkm_ltc *ltc, int c, int s)
 {
 	struct nvkm_subdev *subdev = &ltc->subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 base = 0x140000 + (c * 0x2000) + (s * 0x200);
-	u32 stat = nvkm_rd32(device, base + 0x00c);
+	u32 base = 0x140400 + (c * 0x2000) + (s * 0x200);
+	u32 intr = nvkm_rd32(device, base + 0x00c);
+	u16 stat = intr & 0x0000ffff;
+	char msg[128];
 
 	if (stat) {
-		nvkm_error(subdev, "LTC%d_LTS%d: %08x\n", c, s, stat);
-		nvkm_wr32(device, base + 0x00c, stat);
+		nvkm_snprintbf(msg, sizeof(msg), gf100_ltc_lts_intr_name, stat);
+		nvkm_error(subdev, "LTC%d_LTS%d: %08x [%s]\n", c, s, intr, msg);
 	}
+
+	nvkm_wr32(device, base + 0x00c, intr);
 }
 
 void
@@ -92,7 +96,7 @@ gm107_ltc_intr(struct nvkm_ltc *ltc)
 	while (mask) {
 		u32 s, c = __ffs(mask);
 		for (s = 0; s < ltc->lts_nr; s++)
-			gm107_ltc_lts_isr(ltc, c, s);
+			gm107_ltc_intr_lts(ltc, c, s);
 		mask &= ~(1 << c);
 	}
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
index 2a29bfd5125a..e18e0dc19ec8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
@@ -46,7 +46,7 @@ static const struct nvkm_ltc_func
 gm200_ltc = {
 	.oneinit = gm200_ltc_oneinit,
 	.init = gm200_ltc_init,
-	.intr = gm107_ltc_intr, /*XXX: not validated */
+	.intr = gm107_ltc_intr,
 	.cbc_clear = gm107_ltc_cbc_clear,
 	.cbc_wait = gm107_ltc_cbc_wait,
 	.zbc = 16,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c
new file mode 100644
index 000000000000..0bdfb2f40266
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "priv.h"
+
+static void
+gp100_ltc_intr(struct nvkm_ltc *ltc)
+{
+	struct nvkm_device *device = ltc->subdev.device;
+	u32 mask;
+
+	mask = nvkm_rd32(device, 0x0001c0);
+	while (mask) {
+		u32 s, c = __ffs(mask);
+		for (s = 0; s < ltc->lts_nr; s++)
+			gm107_ltc_intr_lts(ltc, c, s);
+		mask &= ~(1 << c);
+	}
+}
+
+static int
+gp100_ltc_oneinit(struct nvkm_ltc *ltc)
+{
+	struct nvkm_device *device = ltc->subdev.device;
+	ltc->ltc_nr = nvkm_rd32(device, 0x12006c);
+	ltc->lts_nr = nvkm_rd32(device, 0x17e280) >> 28;
+	/*XXX: tagram allocation - TBD */
+	return nvkm_mm_init(&ltc->tags, 0, 0, 1);
+}
+
+static void
+gp100_ltc_init(struct nvkm_ltc *ltc)
+{
+	/*XXX: PMU LS call to setup tagram address */
+}
+
+static const struct nvkm_ltc_func
+gp100_ltc = {
+	.oneinit = gp100_ltc_oneinit,
+	.init = gp100_ltc_init,
+	.intr = gp100_ltc_intr,
+	.cbc_clear = gm107_ltc_cbc_clear,
+	.cbc_wait = gm107_ltc_cbc_wait,
+	.zbc = 16,
+	.zbc_clear_color = gm107_ltc_zbc_clear_color,
+	.zbc_clear_depth = gm107_ltc_zbc_clear_depth,
+	.invalidate = gf100_ltc_invalidate,
+	.flush = gf100_ltc_flush,
+};
+
+int
+gp100_ltc_new(struct nvkm_device *device, int index, struct nvkm_ltc **pltc)
+{
+	return nvkm_ltc_new_(&gp100_ltc, device, index, pltc);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
index 6d81c695ed0d..8b95f96e3ffa 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
@@ -2,6 +2,7 @@
 #define __NVKM_LTC_PRIV_H__
 #define nvkm_ltc(p) container_of((p), struct nvkm_ltc, subdev)
 #include <subdev/ltc.h>
+#include <core/enum.h>
 
 int nvkm_ltc_new_(const struct nvkm_ltc_func *, struct nvkm_device *,
 		  int index, struct nvkm_ltc **);
@@ -31,8 +32,10 @@ void gf100_ltc_zbc_clear_color(struct nvkm_ltc *, int, const u32[4]);
 void gf100_ltc_zbc_clear_depth(struct nvkm_ltc *, int, const u32);
 void gf100_ltc_invalidate(struct nvkm_ltc *);
 void gf100_ltc_flush(struct nvkm_ltc *);
+extern const struct nvkm_bitfield gf100_ltc_lts_intr_name[];
 
 void gm107_ltc_intr(struct nvkm_ltc *);
+void gm107_ltc_intr_lts(struct nvkm_ltc *, int ltc, int lts);
 void gm107_ltc_cbc_clear(struct nvkm_ltc *, u32, u32);
 void gm107_ltc_cbc_wait(struct nvkm_ltc *);
 void gm107_ltc_zbc_clear_color(struct nvkm_ltc *, int, const u32[4]);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
index 49695ac7be2e..12943f92c206 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
@@ -10,3 +10,4 @@ nvkm-y += nvkm/subdev/mc/gt215.o
 nvkm-y += nvkm/subdev/mc/gf100.o
 nvkm-y += nvkm/subdev/mc/gk104.o
 nvkm-y += nvkm/subdev/mc/gk20a.o
+nvkm-y += nvkm/subdev/mc/gp100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
index 350a8caa84c8..6b25e25f9eba 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
@@ -27,43 +27,67 @@
 #include <subdev/top.h>
 
 void
-nvkm_mc_unk260(struct nvkm_mc *mc, u32 data)
+nvkm_mc_unk260(struct nvkm_device *device, u32 data)
 {
-	if (mc->func->unk260)
+	struct nvkm_mc *mc = device->mc;
+	if (likely(mc) && mc->func->unk260)
 		mc->func->unk260(mc, data);
 }
 
 void
-nvkm_mc_intr_unarm(struct nvkm_mc *mc)
+nvkm_mc_intr_mask(struct nvkm_device *device, enum nvkm_devidx devidx, bool en)
 {
-	return mc->func->intr_unarm(mc);
+	struct nvkm_mc *mc = device->mc;
+	const struct nvkm_mc_map *map;
+	if (likely(mc) && mc->func->intr_mask) {
+		u32 mask = nvkm_top_intr_mask(device, devidx);
+		for (map = mc->func->intr; !mask && map->stat; map++) {
+			if (map->unit == devidx)
+				mask = map->stat;
+		}
+		mc->func->intr_mask(mc, mask, en ? mask : 0);
+	}
+}
+
+void
+nvkm_mc_intr_unarm(struct nvkm_device *device)
+{
+	struct nvkm_mc *mc = device->mc;
+	if (likely(mc))
+		mc->func->intr_unarm(mc);
 }
 
 void
-nvkm_mc_intr_rearm(struct nvkm_mc *mc)
+nvkm_mc_intr_rearm(struct nvkm_device *device)
 {
-	return mc->func->intr_rearm(mc);
+	struct nvkm_mc *mc = device->mc;
+	if (likely(mc))
+		mc->func->intr_rearm(mc);
 }
 
 static u32
-nvkm_mc_intr_mask(struct nvkm_mc *mc)
+nvkm_mc_intr_stat(struct nvkm_mc *mc)
 {
-	u32 intr = mc->func->intr_mask(mc);
+	u32 intr = mc->func->intr_stat(mc);
 	if (WARN_ON_ONCE(intr == 0xffffffff))
 		intr = 0; /* likely fallen off the bus */
 	return intr;
 }
 
 void
-nvkm_mc_intr(struct nvkm_mc *mc, bool *handled)
+nvkm_mc_intr(struct nvkm_device *device, bool *handled)
 {
-	struct nvkm_device *device = mc->subdev.device;
+	struct nvkm_mc *mc = device->mc;
 	struct nvkm_subdev *subdev;
-	const struct nvkm_mc_map *map = mc->func->intr;
-	u32 stat, intr = nvkm_mc_intr_mask(mc);
+	const struct nvkm_mc_map *map;
+	u32 stat, intr;
 	u64 subdevs;
 
-	stat = nvkm_top_intr(device->top, intr, &subdevs);
+	if (unlikely(!mc))
+		return;
+
+	intr = nvkm_mc_intr_stat(mc);
+	stat = nvkm_top_intr(device, intr, &subdevs);
 	while (subdevs) {
 		enum nvkm_devidx subidx = __ffs64(subdevs);
 		subdev = nvkm_device_subdev(device, subidx);
@@ -72,14 +96,13 @@ nvkm_mc_intr(struct nvkm_mc *mc, bool *handled)
 		subdevs &= ~BIT_ULL(subidx);
 	}
 
-	while (map->stat) {
+	for (map = mc->func->intr; map->stat; map++) {
 		if (intr & map->stat) {
 			subdev = nvkm_device_subdev(device, map->unit);
 			if (subdev)
 				nvkm_subdev_intr(subdev);
 			stat &= ~map->stat;
 		}
-		map++;
 	}
 
 	if (stat)
@@ -87,22 +110,32 @@ nvkm_mc_intr(struct nvkm_mc *mc, bool *handled)
 	*handled = intr != 0;
 }
 
-static void
-nvkm_mc_reset_(struct nvkm_mc *mc, enum nvkm_devidx devidx)
+static u32
+nvkm_mc_reset_mask(struct nvkm_device *device, bool isauto,
+		   enum nvkm_devidx devidx)
 {
-	struct nvkm_device *device = mc->subdev.device;
+	struct nvkm_mc *mc = device->mc;
 	const struct nvkm_mc_map *map;
-	u64 pmc_enable;
-
-	if (!(pmc_enable = nvkm_top_reset(device->top, devidx))) {
-		for (map = mc->func->reset; map && map->stat; map++) {
-			if (map->unit == devidx) {
-				pmc_enable = map->stat;
-				break;
+	u64 pmc_enable = 0;
+	if (likely(mc)) {
+		if (!(pmc_enable = nvkm_top_reset(device, devidx))) {
+			for (map = mc->func->reset; map && map->stat; map++) {
+				if (!isauto || !map->noauto) {
+					if (map->unit == devidx) {
+						pmc_enable = map->stat;
+						break;
+					}
+				}
 			}
 		}
 	}
+	return pmc_enable;
+}
 
+void
+nvkm_mc_reset(struct nvkm_device *device, enum nvkm_devidx devidx)
+{
+	u64 pmc_enable = nvkm_mc_reset_mask(device, true, devidx);
 	if (pmc_enable) {
 		nvkm_mask(device, 0x000200, pmc_enable, 0x00000000);
 		nvkm_mask(device, 0x000200, pmc_enable, pmc_enable);
@@ -111,17 +144,27 @@ nvkm_mc_reset_(struct nvkm_mc *mc, enum nvkm_devidx devidx)
 }
 
 void
-nvkm_mc_reset(struct nvkm_mc *mc, enum nvkm_devidx devidx)
+nvkm_mc_disable(struct nvkm_device *device, enum nvkm_devidx devidx)
 {
-	if (likely(mc))
-		nvkm_mc_reset_(mc, devidx);
+	u64 pmc_enable = nvkm_mc_reset_mask(device, false, devidx);
+	if (pmc_enable)
+		nvkm_mask(device, 0x000200, pmc_enable, 0x00000000);
+}
+
+void
+nvkm_mc_enable(struct nvkm_device *device, enum nvkm_devidx devidx)
+{
+	u64 pmc_enable = nvkm_mc_reset_mask(device, false, devidx);
+	if (pmc_enable) {
+		nvkm_mask(device, 0x000200, pmc_enable, pmc_enable);
+		nvkm_rd32(device, 0x000200);
+	}
 }
 
 static int
 nvkm_mc_fini(struct nvkm_subdev *subdev, bool suspend)
 {
-	struct nvkm_mc *mc = nvkm_mc(subdev);
-	nvkm_mc_intr_unarm(mc);
+	nvkm_mc_intr_unarm(subdev->device);
 	return 0;
 }
 
@@ -131,7 +174,7 @@ nvkm_mc_init(struct nvkm_subdev *subdev)
 	struct nvkm_mc *mc = nvkm_mc(subdev);
 	if (mc->func->init)
 		mc->func->init(mc);
-	nvkm_mc_intr_rearm(mc);
+	nvkm_mc_intr_rearm(subdev->device);
 	return 0;
 }
 
@@ -148,16 +191,21 @@ nvkm_mc = {
 	.fini = nvkm_mc_fini,
 };
 
+void
+nvkm_mc_ctor(const struct nvkm_mc_func *func, struct nvkm_device *device,
+	     int index, struct nvkm_mc *mc)
+{
+	nvkm_subdev_ctor(&nvkm_mc, device, index, &mc->subdev);
+	mc->func = func;
+}
+
 int
 nvkm_mc_new_(const struct nvkm_mc_func *func, struct nvkm_device *device,
 	     int index, struct nvkm_mc **pmc)
 {
 	struct nvkm_mc *mc;
-
 	if (!(mc = *pmc = kzalloc(sizeof(*mc), GFP_KERNEL)))
 		return -ENOMEM;
-
-	nvkm_subdev_ctor(&nvkm_mc, device, index, &mc->subdev);
-	mc->func = func;
+	nvkm_mc_ctor(func, device, index, *pmc);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c
index 5c85b47f071d..c3d66ef5dc12 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c
@@ -57,7 +57,7 @@ g84_mc = {
 	.intr = g84_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = g84_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c
index 0280b43cc10c..93ad4982ce5f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c
@@ -57,7 +57,7 @@ g98_mc = {
 	.intr = g98_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = g98_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c
index 8397e223bd43..d2c4d6033abb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c
@@ -76,7 +76,7 @@ gf100_mc_intr_rearm(struct nvkm_mc *mc)
 }
 
 u32
-gf100_mc_intr_mask(struct nvkm_mc *mc)
+gf100_mc_intr_stat(struct nvkm_mc *mc)
 {
 	struct nvkm_device *device = mc->subdev.device;
 	u32 intr0 = nvkm_rd32(device, 0x000100);
@@ -85,6 +85,14 @@ gf100_mc_intr_mask(struct nvkm_mc *mc)
 }
 
 void
+gf100_mc_intr_mask(struct nvkm_mc *mc, u32 mask, u32 stat)
+{
+	struct nvkm_device *device = mc->subdev.device;
+	nvkm_mask(device, 0x000640, mask, stat);
+	nvkm_mask(device, 0x000644, mask, stat);
+}
+
+void
 gf100_mc_unk260(struct nvkm_mc *mc, u32 data)
 {
 	nvkm_wr32(mc->subdev.device, 0x000260, data);
@@ -97,6 +105,7 @@ gf100_mc = {
 	.intr_unarm = gf100_mc_intr_unarm,
 	.intr_rearm = gf100_mc_intr_rearm,
 	.intr_mask = gf100_mc_intr_mask,
+	.intr_stat = gf100_mc_intr_stat,
 	.reset = gf100_mc_reset,
 	.unk260 = gf100_mc_unk260,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c
index 317464212c7d..7b8c6ecad1a5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c
@@ -26,6 +26,7 @@
 const struct nvkm_mc_map
 gk104_mc_reset[] = {
 	{ 0x00000100, NVKM_ENGINE_FIFO },
+	{ 0x00002000, NVKM_SUBDEV_PMU, true },
 	{}
 };
 
@@ -53,6 +54,7 @@ gk104_mc = {
 	.intr_unarm = gf100_mc_intr_unarm,
 	.intr_rearm = gf100_mc_intr_rearm,
 	.intr_mask = gf100_mc_intr_mask,
+	.intr_stat = gf100_mc_intr_stat,
 	.reset = gk104_mc_reset,
 	.unk260 = gf100_mc_unk260,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c
index 60b044f517ed..ca1bf3279dbe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c
@@ -30,6 +30,7 @@ gk20a_mc = {
 	.intr_unarm = gf100_mc_intr_unarm,
 	.intr_rearm = gf100_mc_intr_rearm,
 	.intr_mask = gf100_mc_intr_mask,
+	.intr_stat = gf100_mc_intr_stat,
 	.reset = gk104_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c
new file mode 100644
index 000000000000..4d22f4abd6de
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#define gp100_mc(p) container_of((p), struct gp100_mc, base)
+#include "priv.h"
+
+struct gp100_mc {
+	struct nvkm_mc base;
+	spinlock_t lock;
+	bool intr;
+	u32 mask;
+};
+
+static void
+gp100_mc_intr_update(struct gp100_mc *mc)
+{
+	struct nvkm_device *device = mc->base.subdev.device;
+	u32 mask = mc->intr ? mc->mask : 0, i;
+	for (i = 0; i < 2; i++) {
+		nvkm_wr32(device, 0x000180 + (i * 0x04), ~mask);
+		nvkm_wr32(device, 0x000160 + (i * 0x04),  mask);
+	}
+}
+
+static void
+gp100_mc_intr_unarm(struct nvkm_mc *base)
+{
+	struct gp100_mc *mc = gp100_mc(base);
+	unsigned long flags;
+	spin_lock_irqsave(&mc->lock, flags);
+	mc->intr = false;
+	gp100_mc_intr_update(mc);
+	spin_unlock_irqrestore(&mc->lock, flags);
+}
+
+static void
+gp100_mc_intr_rearm(struct nvkm_mc *base)
+{
+	struct gp100_mc *mc = gp100_mc(base);
+	unsigned long flags;
+	spin_lock_irqsave(&mc->lock, flags);
+	mc->intr = true;
+	gp100_mc_intr_update(mc);
+	spin_unlock_irqrestore(&mc->lock, flags);
+}
+
+static void
+gp100_mc_intr_mask(struct nvkm_mc *base, u32 mask, u32 intr)
+{
+	struct gp100_mc *mc = gp100_mc(base);
+	unsigned long flags;
+	spin_lock_irqsave(&mc->lock, flags);
+	mc->mask = (mc->mask & ~mask) | intr;
+	gp100_mc_intr_update(mc);
+	spin_unlock_irqrestore(&mc->lock, flags);
+}
+
+static const struct nvkm_mc_func
+gp100_mc = {
+	.init = nv50_mc_init,
+	.intr = gk104_mc_intr,
+	.intr_unarm = gp100_mc_intr_unarm,
+	.intr_rearm = gp100_mc_intr_rearm,
+	.intr_mask = gp100_mc_intr_mask,
+	.intr_stat = gf100_mc_intr_stat,
+	.reset = gk104_mc_reset,
+};
+
+int
+gp100_mc_new(struct nvkm_device *device, int index, struct nvkm_mc **pmc)
+{
+	struct gp100_mc *mc;
+
+	if (!(mc = kzalloc(sizeof(*mc), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_mc_ctor(&gp100_mc, device, index, &mc->base);
+	*pmc = &mc->base;
+
+	spin_lock_init(&mc->lock);
+	mc->intr = false;
+	mc->mask = 0x7fffffff;
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c
index aad0ba95bf18..99d50a3d956f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c
@@ -53,13 +53,20 @@ gt215_mc_intr[] = {
 	{},
 };
 
+static void
+gt215_mc_intr_mask(struct nvkm_mc *mc, u32 mask, u32 stat)
+{
+	nvkm_mask(mc->subdev.device, 0x000640, mask, stat);
+}
+
 static const struct nvkm_mc_func
 gt215_mc = {
 	.init = nv50_mc_init,
 	.intr = gt215_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_mask = gt215_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = gt215_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c
index a062624e906b..6509defd1460 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c
@@ -56,7 +56,7 @@ nv04_mc_intr_rearm(struct nvkm_mc *mc)
 }
 
 u32
-nv04_mc_intr_mask(struct nvkm_mc *mc)
+nv04_mc_intr_stat(struct nvkm_mc *mc)
 {
 	return nvkm_rd32(mc->subdev.device, 0x000100);
 }
@@ -75,7 +75,7 @@ nv04_mc = {
 	.intr = nv04_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = nv04_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c
index 55f0b9166b52..9213107901e6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c
@@ -39,7 +39,7 @@ nv11_mc = {
 	.intr = nv11_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = nv04_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c
index c40fa67f79a5..64bf5bbf8146 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c
@@ -48,7 +48,7 @@ nv17_mc = {
 	.intr = nv17_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = nv17_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c
index cc56271db564..65fa44a64b98 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c
@@ -43,7 +43,7 @@ nv44_mc = {
 	.intr = nv17_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = nv17_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c
index 343b6078580d..fe93b4fd7100 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c
@@ -50,7 +50,7 @@ nv50_mc = {
 	.intr = nv50_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = nv17_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
index a12038118512..4f0576a06d24 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
@@ -3,12 +3,15 @@
 #define nvkm_mc(p) container_of((p), struct nvkm_mc, subdev)
 #include <subdev/mc.h>
 
+void nvkm_mc_ctor(const struct nvkm_mc_func *, struct nvkm_device *,
+		  int index, struct nvkm_mc *);
 int nvkm_mc_new_(const struct nvkm_mc_func *, struct nvkm_device *,
 		 int index, struct nvkm_mc **);
 
 struct nvkm_mc_map {
 	u32 stat;
 	u32 unit;
+	bool noauto;
 };
 
 struct nvkm_mc_func {
@@ -18,8 +21,10 @@ struct nvkm_mc_func {
 	void (*intr_unarm)(struct nvkm_mc *);
 	/* enable reporting of interrupts to host */
 	void (*intr_rearm)(struct nvkm_mc *);
+	/* (un)mask delivery of specific interrupts */
+	void (*intr_mask)(struct nvkm_mc *, u32 mask, u32 stat);
 	/* retrieve pending interrupt mask (NV_PMC_INTR) */
-	u32 (*intr_mask)(struct nvkm_mc *);
+	u32 (*intr_stat)(struct nvkm_mc *);
 	const struct nvkm_mc_map *reset;
 	void (*unk260)(struct nvkm_mc *, u32);
 };
@@ -27,7 +32,7 @@ struct nvkm_mc_func {
 void nv04_mc_init(struct nvkm_mc *);
 void nv04_mc_intr_unarm(struct nvkm_mc *);
 void nv04_mc_intr_rearm(struct nvkm_mc *);
-u32 nv04_mc_intr_mask(struct nvkm_mc *);
+u32 nv04_mc_intr_stat(struct nvkm_mc *);
 extern const struct nvkm_mc_map nv04_mc_reset[];
 
 extern const struct nvkm_mc_map nv17_mc_intr[];
@@ -39,7 +44,8 @@ void nv50_mc_init(struct nvkm_mc *);
 
 void gf100_mc_intr_unarm(struct nvkm_mc *);
 void gf100_mc_intr_rearm(struct nvkm_mc *);
-u32 gf100_mc_intr_mask(struct nvkm_mc *);
+void gf100_mc_intr_mask(struct nvkm_mc *, u32, u32);
+u32 gf100_mc_intr_stat(struct nvkm_mc *);
 void gf100_mc_unk260(struct nvkm_mc *, u32);
 
 extern const struct nvkm_mc_map gk104_mc_intr[];
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
index 3c2519fdeb81..2a31b7d66a6d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
@@ -10,3 +10,4 @@ nvkm-y += nvkm/subdev/pci/g94.o
 nvkm-y += nvkm/subdev/pci/gf100.o
 nvkm-y += nvkm/subdev/pci/gf106.o
 nvkm-y += nvkm/subdev/pci/gk104.o
+nvkm-y += nvkm/subdev/pci/gp100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
index 6b0328bd7eed..eb9b278198b2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
@@ -69,15 +69,13 @@ static irqreturn_t
 nvkm_pci_intr(int irq, void *arg)
 {
 	struct nvkm_pci *pci = arg;
-	struct nvkm_mc *mc = pci->subdev.device->mc;
+	struct nvkm_device *device = pci->subdev.device;
 	bool handled = false;
-	if (likely(mc)) {
-		nvkm_mc_intr_unarm(mc);
-		if (pci->msi)
-			pci->func->msi_rearm(pci);
-		nvkm_mc_intr(mc, &handled);
-		nvkm_mc_intr_rearm(mc);
-	}
+	nvkm_mc_intr_unarm(device);
+	if (pci->msi)
+		pci->func->msi_rearm(pci);
+	nvkm_mc_intr(device, &handled);
+	nvkm_mc_intr_rearm(device);
 	return handled ? IRQ_HANDLED : IRQ_NONE;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gp100.c
new file mode 100644
index 000000000000..82c5234a06ff
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gp100.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "priv.h"
+
+static void
+gp100_pci_msi_rearm(struct nvkm_pci *pci)
+{
+	nvkm_pci_wr32(pci, 0x0704, 0x00000000);
+}
+
+static const struct nvkm_pci_func
+gp100_pci_func = {
+	.rd32 = nv40_pci_rd32,
+	.wr08 = nv40_pci_wr08,
+	.wr32 = nv40_pci_wr32,
+	.msi_rearm = gp100_pci_msi_rearm,
+};
+
+int
+gp100_pci_new(struct nvkm_device *device, int index, struct nvkm_pci **ppci)
+{
+	return nvkm_pci_new_(&gp100_pci_func, device, index, ppci);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
index 213fdba6cfa0..314be2192b7d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
@@ -19,8 +19,9 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-
 #include "priv.h"
+
+#include <subdev/mc.h>
 #include <subdev/timer.h>
 
 static const char *
@@ -70,12 +71,11 @@ nvkm_secboot_falcon_enable(struct nvkm_secboot *sb)
 	int ret;
 
 	/* enable engine */
-	nvkm_mask(device, 0x200, sb->enable_mask, sb->enable_mask);
-	nvkm_rd32(device, 0x200);
+	nvkm_mc_enable(device, sb->devidx);
 	ret = nvkm_wait_msec(device, 10, sb->base + 0x10c, 0x6, 0x0);
 	if (ret < 0) {
-		nvkm_mask(device, 0x200, sb->enable_mask, 0x0);
 		nvkm_error(&sb->subdev, "Falcon mem scrubbing timeout\n");
+		nvkm_mc_disable(device, sb->devidx);
 		return ret;
 	}
 
@@ -85,8 +85,7 @@ nvkm_secboot_falcon_enable(struct nvkm_secboot *sb)
 
 	/* enable IRQs */
 	nvkm_wr32(device, sb->base + 0x010, 0xff);
-	nvkm_mask(device, 0x640, sb->irq_mask, sb->irq_mask);
-	nvkm_mask(device, 0x644, sb->irq_mask, sb->irq_mask);
+	nvkm_mc_intr_mask(device, sb->devidx, true);
 
 	return 0;
 }
@@ -97,14 +96,13 @@ nvkm_secboot_falcon_disable(struct nvkm_secboot *sb)
 	struct nvkm_device *device = sb->subdev.device;
 
 	/* disable IRQs and wait for any previous code to complete */
-	nvkm_mask(device, 0x644, sb->irq_mask, 0x0);
-	nvkm_mask(device, 0x640, sb->irq_mask, 0x0);
+	nvkm_mc_intr_mask(device, sb->devidx, false);
 	nvkm_wr32(device, sb->base + 0x014, 0xff);
 
 	falcon_wait_idle(device, sb->base);
 
 	/* disable engine */
-	nvkm_mask(device, 0x200, sb->enable_mask, 0x0);
+	nvkm_mc_disable(device, sb->devidx);
 
 	return 0;
 }
@@ -216,14 +214,7 @@ nvkm_secboot_oneinit(struct nvkm_subdev *subdev)
 		return ret;
 	}
 
-	/*
-	 * Build all blobs - the same blobs can be used to perform secure boot
-	 * multiple times
-	 */
-	if (sb->func->prepare_blobs)
-		ret = sb->func->prepare_blobs(sb);
-
-	return ret;
+	return 0;
 }
 
 static int
@@ -270,9 +261,8 @@ nvkm_secboot_ctor(const struct nvkm_secboot_func *func,
 	/* setup the performing falcon's base address and masks */
 	switch (func->boot_falcon) {
 	case NVKM_SECBOOT_FALCON_PMU:
+		sb->devidx = NVKM_SUBDEV_PMU;
 		sb->base = 0x10a000;
-		sb->irq_mask = 0x1000000;
-		sb->enable_mask = 0x2000;
 		break;
 	default:
 		nvkm_error(&sb->subdev, "invalid secure boot falcon\n");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
index cc100dc940ea..f1e2dc914366 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
@@ -860,6 +860,8 @@ gm200_secboot_prepare_ls_blob(struct gm200_secboot *gsb)
 
 	/* Write LS blob */
 	ret = ls_ucode_mgr_write_wpr(gsb, &mgr, gsb->ls_blob);
+	if (ret)
+		nvkm_gpuobj_del(&gsb->ls_blob);
 
 cleanup:
 	ls_ucode_mgr_cleanup(&mgr);
@@ -1023,29 +1025,34 @@ gm20x_secboot_prepare_blobs(struct gm200_secboot *gsb)
 	int ret;
 
 	/* Load and prepare the managed falcon's firmwares */
-	ret = gm200_secboot_prepare_ls_blob(gsb);
-	if (ret)
-		return ret;
+	if (!gsb->ls_blob) {
+		ret = gm200_secboot_prepare_ls_blob(gsb);
+		if (ret)
+			return ret;
+	}
 
 	/* Load the HS firmware that will load the LS firmwares */
-	ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_load",
-					    &gsb->acr_load_blob,
-					    &gsb->acr_load_bl_desc, true);
-	if (ret)
-		return ret;
+	if (!gsb->acr_load_blob) {
+		ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_load",
+						&gsb->acr_load_blob,
+						&gsb->acr_load_bl_desc, true);
+		if (ret)
+			return ret;
+	}
 
 	/* Load the HS firmware bootloader */
-	ret = gm200_secboot_prepare_hsbl_blob(gsb);
-	if (ret)
-		return ret;
+	if (!gsb->hsbl_blob) {
+		ret = gm200_secboot_prepare_hsbl_blob(gsb);
+		if (ret)
+			return ret;
+	}
 
 	return 0;
 }
 
 static int
-gm200_secboot_prepare_blobs(struct nvkm_secboot *sb)
+gm200_secboot_prepare_blobs(struct gm200_secboot *gsb)
 {
-	struct gm200_secboot *gsb = gm200_secboot(sb);
 	int ret;
 
 	ret = gm20x_secboot_prepare_blobs(gsb);
@@ -1053,15 +1060,37 @@ gm200_secboot_prepare_blobs(struct nvkm_secboot *sb)
 		return ret;
 
 	/* dGPU only: load the HS firmware that unprotects the WPR region */
-	ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_unload",
-					    &gsb->acr_unload_blob,
-					    &gsb->acr_unload_bl_desc, false);
-	if (ret)
-		return ret;
+	if (!gsb->acr_unload_blob) {
+		ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_unload",
+					       &gsb->acr_unload_blob,
+					       &gsb->acr_unload_bl_desc, false);
+		if (ret)
+			return ret;
+	}
 
 	return 0;
 }
 
+static int
+gm200_secboot_blobs_ready(struct gm200_secboot *gsb)
+{
+	struct nvkm_subdev *subdev = &gsb->base.subdev;
+	int ret;
+
+	/* firmware already loaded, nothing to do... */
+	if (gsb->firmware_ok)
+		return 0;
+
+	ret = gsb->func->prepare_blobs(gsb);
+	if (ret) {
+		nvkm_error(subdev, "failed to load secure firmware\n");
+		return ret;
+	}
+
+	gsb->firmware_ok = true;
+
+	return 0;
+}
 
 
 /*
@@ -1234,6 +1263,11 @@ gm200_secboot_reset(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
 	struct gm200_secboot *gsb = gm200_secboot(sb);
 	int ret;
 
+	/* Make sure all blobs are ready */
+	ret = gm200_secboot_blobs_ready(gsb);
+	if (ret)
+		return ret;
+
 	/*
 	 * Dummy GM200 implementation: perform secure boot each time we are
 	 * called on FECS. Since only FECS and GPCCS are managed and started
@@ -1373,7 +1407,6 @@ gm200_secboot = {
 	.dtor = gm200_secboot_dtor,
 	.init = gm200_secboot_init,
 	.fini = gm200_secboot_fini,
-	.prepare_blobs = gm200_secboot_prepare_blobs,
 	.reset = gm200_secboot_reset,
 	.start = gm200_secboot_start,
 	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS) |
@@ -1415,6 +1448,7 @@ gm200_secboot_func = {
 	.bl_desc_size = sizeof(struct gm200_flcn_bl_desc),
 	.fixup_bl_desc = gm200_secboot_fixup_bl_desc,
 	.fixup_hs_desc = gm200_secboot_fixup_hs_desc,
+	.prepare_blobs = gm200_secboot_prepare_blobs,
 };
 
 int
@@ -1487,3 +1521,19 @@ MODULE_FIRMWARE("nvidia/gm206/gr/sw_ctx.bin");
 MODULE_FIRMWARE("nvidia/gm206/gr/sw_nonctx.bin");
 MODULE_FIRMWARE("nvidia/gm206/gr/sw_bundle_init.bin");
 MODULE_FIRMWARE("nvidia/gm206/gr/sw_method_init.bin");
+
+MODULE_FIRMWARE("nvidia/gp100/acr/bl.bin");
+MODULE_FIRMWARE("nvidia/gp100/acr/ucode_load.bin");
+MODULE_FIRMWARE("nvidia/gp100/acr/ucode_unload.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/fecs_inst.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/fecs_data.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/gpccs_inst.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/gpccs_data.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/sw_ctx.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/sw_nonctx.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/sw_bundle_init.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/sw_method_init.bin");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
index 684320484b70..d5395ebfe8d3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
@@ -42,6 +42,32 @@ struct gm20b_flcn_bl_desc {
 	u32 data_size;
 };
 
+static int
+gm20b_secboot_prepare_blobs(struct gm200_secboot *gsb)
+{
+	struct nvkm_subdev *subdev = &gsb->base.subdev;
+	int acr_size;
+	int ret;
+
+	ret = gm20x_secboot_prepare_blobs(gsb);
+	if (ret)
+		return ret;
+
+	acr_size = gsb->acr_load_blob->size;
+	/*
+	 * On Tegra the WPR region is set by the bootloader. It is illegal for
+	 * the HS blob to be larger than this region.
+	 */
+	if (acr_size > gsb->wpr_size) {
+		nvkm_error(subdev, "WPR region too small for FW blob!\n");
+		nvkm_error(subdev, "required: %dB\n", acr_size);
+		nvkm_error(subdev, "WPR size: %dB\n", gsb->wpr_size);
+		return -ENOSPC;
+	}
+
+	return 0;
+}
+
 /**
  * gm20b_secboot_fixup_bl_desc - adapt BL descriptor to format used by GM20B FW
  *
@@ -88,6 +114,7 @@ gm20b_secboot_func = {
 	.bl_desc_size = sizeof(struct gm20b_flcn_bl_desc),
 	.fixup_bl_desc = gm20b_secboot_fixup_bl_desc,
 	.fixup_hs_desc = gm20b_secboot_fixup_hs_desc,
+	.prepare_blobs = gm20b_secboot_prepare_blobs,
 };
 
 
@@ -147,32 +174,6 @@ gm20b_tegra_read_wpr(struct gm200_secboot *gsb)
 #endif
 
 static int
-gm20b_secboot_prepare_blobs(struct nvkm_secboot *sb)
-{
-	struct gm200_secboot *gsb = gm200_secboot(sb);
-	int acr_size;
-	int ret;
-
-	ret = gm20x_secboot_prepare_blobs(gsb);
-	if (ret)
-		return ret;
-
-	acr_size = gsb->acr_load_blob->size;
-	/*
-	 * On Tegra the WPR region is set by the bootloader. It is illegal for
-	 * the HS blob to be larger than this region.
-	 */
-	if (acr_size > gsb->wpr_size) {
-		nvkm_error(&sb->subdev, "WPR region too small for FW blob!\n");
-		nvkm_error(&sb->subdev, "required: %dB\n", acr_size);
-		nvkm_error(&sb->subdev, "WPR size: %dB\n", gsb->wpr_size);
-		return -ENOSPC;
-	}
-
-	return 0;
-}
-
-static int
 gm20b_secboot_init(struct nvkm_secboot *sb)
 {
 	struct gm200_secboot *gsb = gm200_secboot(sb);
@@ -189,7 +190,6 @@ static const struct nvkm_secboot_func
 gm20b_secboot = {
 	.dtor = gm200_secboot_dtor,
 	.init = gm20b_secboot_init,
-	.prepare_blobs = gm20b_secboot_prepare_blobs,
 	.reset = gm200_secboot_reset,
 	.start = gm200_secboot_start,
 	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS),
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
index f2b09dee7c5d..a9a8a0e1017e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
@@ -30,7 +30,6 @@ struct nvkm_secboot_func {
 	int (*init)(struct nvkm_secboot *);
 	int (*fini)(struct nvkm_secboot *, bool suspend);
 	void *(*dtor)(struct nvkm_secboot *);
-	int (*prepare_blobs)(struct nvkm_secboot *);
 	int (*reset)(struct nvkm_secboot *, enum nvkm_secboot_falcon);
 	int (*start)(struct nvkm_secboot *, enum nvkm_secboot_falcon);
 
@@ -147,10 +146,8 @@ struct hsflcn_acr_desc {
  * @inst:		instance block for HS falcon
  * @pgd:		page directory for the HS falcon
  * @vm:			address space used by the HS falcon
- * @bl_desc_size:	size of the BL descriptor used by this chip.
- * @fixup_bl_desc:	hook that generates the proper BL descriptor format from
- *			the generic GM200 format into a data array of size
- *			bl_desc_size
+ * @falcon_state:	current state of the managed falcons
+ * @firmware_ok:	whether the firmware blobs have been created
  */
 struct gm200_secboot {
 	struct nvkm_secboot base;
@@ -196,9 +193,19 @@ struct gm200_secboot {
 		RUNNING,
 	} falcon_state[NVKM_SECBOOT_FALCON_END];
 
+	bool firmware_ok;
 };
 #define gm200_secboot(sb) container_of(sb, struct gm200_secboot, base)
 
+/**
+ * Contains functions we wish to abstract between GM200-like implementations
+ * @bl_desc_size:	size of the BL descriptor used by this chip.
+ * @fixup_bl_desc:	hook that generates the proper BL descriptor format from
+ *			the generic GM200 format into a data array of size
+ *			bl_desc_size
+ * @fixup_hs_desc:	hook that twiddles the HS descriptor before it is used
+ * @prepare_blobs:	prepares the various blobs needed for secure booting
+ */
 struct gm200_secboot_func {
 	/*
 	 * Size of the bootloader descriptor for this chip. A block of this
@@ -214,6 +221,7 @@ struct gm200_secboot_func {
 	 * we want the HS FW to set up.
 	 */
 	void (*fixup_hs_desc)(struct gm200_secboot *, struct hsflcn_acr_desc *);
+	int (*prepare_blobs)(struct gm200_secboot *);
 };
 
 int gm200_secboot_init(struct nvkm_secboot *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
index a1b264664aad..fe063d5728e2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
@@ -41,8 +41,9 @@ nvkm_top_device_new(struct nvkm_top *top)
 }
 
 u32
-nvkm_top_reset(struct nvkm_top *top, enum nvkm_devidx index)
+nvkm_top_reset(struct nvkm_device *device, enum nvkm_devidx index)
 {
+	struct nvkm_top *top = device->top;
 	struct nvkm_top_device *info;
 
 	if (top) {
@@ -56,8 +57,25 @@ nvkm_top_reset(struct nvkm_top *top, enum nvkm_devidx index)
 }
 
 u32
-nvkm_top_intr(struct nvkm_top *top, u32 intr, u64 *psubdevs)
+nvkm_top_intr_mask(struct nvkm_device *device, enum nvkm_devidx devidx)
 {
+	struct nvkm_top *top = device->top;
+	struct nvkm_top_device *info;
+
+	if (top) {
+		list_for_each_entry(info, &top->device, head) {
+			if (info->index == devidx && info->intr >= 0)
+				return BIT(info->intr);
+		}
+	}
+
+	return 0;
+}
+
+u32
+nvkm_top_intr(struct nvkm_device *device, u32 intr, u64 *psubdevs)
+{
+	struct nvkm_top *top = device->top;
 	struct nvkm_top_device *info;
 	u64 subdevs = 0;
 	u32 handled = 0;
@@ -78,8 +96,9 @@ nvkm_top_intr(struct nvkm_top *top, u32 intr, u64 *psubdevs)
 }
 
 enum nvkm_devidx
-nvkm_top_fault(struct nvkm_top *top, int fault)
+nvkm_top_fault(struct nvkm_device *device, int fault)
 {
+	struct nvkm_top *top = device->top;
 	struct nvkm_top_device *info;
 
 	list_for_each_entry(info, &top->device, head) {
@@ -91,8 +110,9 @@ nvkm_top_fault(struct nvkm_top *top, int fault)
 }
 
 enum nvkm_devidx
-nvkm_top_engine(struct nvkm_top *top, int index, int *runl, int *engn)
+nvkm_top_engine(struct nvkm_device *device, int index, int *runl, int *engn)
 {
+	struct nvkm_top *top = device->top;
 	struct nvkm_top_device *info;
 	int n = 0;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
index e06acc340e99..efac3402f9dd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
@@ -29,7 +29,7 @@ gk104_top_oneinit(struct nvkm_top *top)
 	struct nvkm_subdev *subdev = &top->subdev;
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_top_device *info = NULL;
-	u32 data, type;
+	u32 data, type, inst;
 	int i;
 
 	for (i = 0; i < 64; i++) {
@@ -37,6 +37,7 @@ gk104_top_oneinit(struct nvkm_top *top)
 			if (!(info = nvkm_top_device_new(top)))
 				return -ENOMEM;
 			type = ~0;
+			inst = 0;
 		}
 
 		data = nvkm_rd32(device, 0x022700 + (i * 0x04));
@@ -45,6 +46,7 @@ gk104_top_oneinit(struct nvkm_top *top)
 		case 0x00000000: /* NOT_VALID */
 			continue;
 		case 0x00000001: /* DATA */
+			inst        = (data & 0x3c000000) >> 26;
 			info->addr  = (data & 0x00fff000);
 			info->fault = (data & 0x000000f8) >> 3;
 			break;
@@ -67,27 +69,32 @@ gk104_top_oneinit(struct nvkm_top *top)
 			continue;
 
 		/* Translate engine type to NVKM engine identifier. */
+#define A_(A) if (inst == 0) info->index = NVKM_ENGINE_##A
+#define B_(A) if (inst + NVKM_ENGINE_##A##0 < NVKM_ENGINE_##A##_LAST + 1)      \
+		info->index = NVKM_ENGINE_##A##0 + inst
 		switch (type) {
-		case 0x00000000: info->index = NVKM_ENGINE_GR; break;
-		case 0x00000001: info->index = NVKM_ENGINE_CE0; break;
-		case 0x00000002: info->index = NVKM_ENGINE_CE1; break;
-		case 0x00000003: info->index = NVKM_ENGINE_CE2; break;
-		case 0x00000008: info->index = NVKM_ENGINE_MSPDEC; break;
-		case 0x00000009: info->index = NVKM_ENGINE_MSPPP; break;
-		case 0x0000000a: info->index = NVKM_ENGINE_MSVLD; break;
-		case 0x0000000b: info->index = NVKM_ENGINE_MSENC; break;
-		case 0x0000000c: info->index = NVKM_ENGINE_VIC; break;
-		case 0x0000000d: info->index = NVKM_ENGINE_SEC; break;
-		case 0x0000000e: info->index = NVKM_ENGINE_NVENC0; break;
-		case 0x0000000f: info->index = NVKM_ENGINE_NVENC1; break;
-		case 0x00000010: info->index = NVKM_ENGINE_NVDEC; break;
+		case 0x00000000: A_(GR    ); break;
+		case 0x00000001: A_(CE0   ); break;
+		case 0x00000002: A_(CE1   ); break;
+		case 0x00000003: A_(CE2   ); break;
+		case 0x00000008: A_(MSPDEC); break;
+		case 0x00000009: A_(MSPPP ); break;
+		case 0x0000000a: A_(MSVLD ); break;
+		case 0x0000000b: A_(MSENC ); break;
+		case 0x0000000c: A_(VIC   ); break;
+		case 0x0000000d: A_(SEC   ); break;
+		case 0x0000000e: B_(NVENC ); break;
+		case 0x0000000f: A_(NVENC1); break;
+		case 0x00000010: A_(NVDEC ); break;
+		case 0x00000013: B_(CE    ); break;
 			break;
 		default:
 			break;
 		}
 
-		nvkm_debug(subdev, "%02x (%8s): addr %06x fault %2d engine %2d "
-				   "runlist %2d intr %2d reset %2d\n", type,
+		nvkm_debug(subdev, "%02x.%d (%8s): addr %06x fault %2d "
+				   "engine %2d runlist %2d intr %2d "
+				   "reset %2d\n", type, inst,
 			   info->index == NVKM_SUBDEV_NR ? NULL :
 					  nvkm_subdev_name[info->index],
 			   info->addr, info->fault, info->engine, info->runlist,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c
index 6b2d7531a7ff..1c3d23b0e84a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c
@@ -120,6 +120,8 @@ nvkm_volt_parse_bios(struct nvkm_bios *bios, struct nvkm_volt *volt)
 
 	data = nvbios_volt_parse(bios, &ver, &hdr, &cnt, &len, &info);
 	if (data && info.vidmask && info.base && info.step) {
+		volt->min_uv = info.min;
+		volt->max_uv = info.max;
 		for (i = 0; i < info.vidmask + 1; i++) {
 			if (info.base >= info.min &&
 				info.base <= info.max) {
@@ -131,6 +133,8 @@ nvkm_volt_parse_bios(struct nvkm_bios *bios, struct nvkm_volt *volt)
 		}
 		volt->vid_mask = info.vidmask;
 	} else if (data && info.vidmask) {
+		volt->min_uv = 0xffffffff;
+		volt->max_uv = 0;
 		for (i = 0; i < cnt; i++) {
 			data = nvbios_volt_entry_parse(bios, i, &ver, &hdr,
 						       &ivid);
@@ -138,9 +142,14 @@ nvkm_volt_parse_bios(struct nvkm_bios *bios, struct nvkm_volt *volt)
 				volt->vid[volt->vid_nr].uv = ivid.voltage;
 				volt->vid[volt->vid_nr].vid = ivid.vid;
 				volt->vid_nr++;
+				volt->min_uv = min(volt->min_uv, ivid.voltage);
+				volt->max_uv = max(volt->max_uv, ivid.voltage);
 			}
 		}
 		volt->vid_mask = info.vidmask;
+	} else if (data && info.type == NVBIOS_VOLT_PWM) {
+		volt->min_uv = info.base;
+		volt->max_uv = info.base + info.pwm_range;
 	}
 }
 
@@ -181,8 +190,11 @@ nvkm_volt_ctor(const struct nvkm_volt_func *func, struct nvkm_device *device,
 	volt->func = func;
 
 	/* Assuming the non-bios device should build the voltage table later */
-	if (bios)
+	if (bios) {
 		nvkm_volt_parse_bios(bios, volt);
+		nvkm_debug(&volt->subdev, "min: %iuv max: %iuv\n",
+			   volt->min_uv, volt->max_uv);
+	}
 
 	if (volt->vid_nr) {
 		for (i = 0; i < volt->vid_nr; i++) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c
index d554455326da..ce5d83cdc7cf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c
@@ -77,18 +77,19 @@ gk20a_volt_get_cvb_t_voltage(int speedo, int temp, int s_scale, int t_scale,
 	return mv;
 }
 
-int
+static int
 gk20a_volt_calc_voltage(const struct cvb_coef *coef, int speedo)
 {
+	static const int v_scale = 1000;
 	int mv;
 
 	mv = gk20a_volt_get_cvb_t_voltage(speedo, -10, 100, 10, coef);
-	mv = DIV_ROUND_UP(mv, 1000);
+	mv = DIV_ROUND_UP(mv, v_scale);
 
 	return mv * 1000;
 }
 
-int
+static int
 gk20a_volt_vid_get(struct nvkm_volt *base)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -103,7 +104,7 @@ gk20a_volt_vid_get(struct nvkm_volt *base)
 	return -EINVAL;
 }
 
-int
+static int
 gk20a_volt_vid_set(struct nvkm_volt *base, u8 vid)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -113,7 +114,7 @@ gk20a_volt_vid_set(struct nvkm_volt *base, u8 vid)
 	return regulator_set_voltage(volt->vdd, volt->base.vid[vid].uv, 1200000);
 }
 
-int
+static int
 gk20a_volt_set_id(struct nvkm_volt *base, u8 id, int condition)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -143,9 +144,9 @@ gk20a_volt = {
 };
 
 int
-_gk20a_volt_ctor(struct nvkm_device *device, int index,
-		 const struct cvb_coef *coefs, int nb_coefs,
-		 struct gk20a_volt *volt)
+gk20a_volt_ctor(struct nvkm_device *device, int index,
+		const struct cvb_coef *coefs, int nb_coefs,
+		int vmin, struct gk20a_volt *volt)
 {
 	struct nvkm_device_tegra *tdev = device->func->tegra(device);
 	int i, uv;
@@ -160,9 +161,9 @@ _gk20a_volt_ctor(struct nvkm_device *device, int index,
 	volt->base.vid_nr = nb_coefs;
 	for (i = 0; i < volt->base.vid_nr; i++) {
 		volt->base.vid[i].vid = i;
-		volt->base.vid[i].uv =
-			gk20a_volt_calc_voltage(&coefs[i],
-						tdev->gpu_speedo);
+		volt->base.vid[i].uv = max(
+			gk20a_volt_calc_voltage(&coefs[i], tdev->gpu_speedo),
+			vmin);
 		nvkm_debug(&volt->base.subdev, "%2d: vid=%d, uv=%d\n", i,
 			   volt->base.vid[i].vid, volt->base.vid[i].uv);
 	}
@@ -180,6 +181,6 @@ gk20a_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
 		return -ENOMEM;
 	*pvolt = &volt->base;
 
-	return _gk20a_volt_ctor(device, index, gk20a_cvb_coef,
-				ARRAY_SIZE(gk20a_cvb_coef), volt);
+	return gk20a_volt_ctor(device, index, gk20a_cvb_coef,
+			       ARRAY_SIZE(gk20a_cvb_coef), 0, volt);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.h b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.h
index 0fa3b502bcf8..6a6c97f9684e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.h
@@ -37,13 +37,8 @@ struct gk20a_volt {
 	struct regulator *vdd;
 };
 
-int _gk20a_volt_ctor(struct nvkm_device *device, int index,
-		     const struct cvb_coef *coefs, int nb_coefs,
-		     struct gk20a_volt *volt);
-
-int gk20a_volt_calc_voltage(const struct cvb_coef *coef, int speedo);
-int gk20a_volt_vid_get(struct nvkm_volt *volt);
-int gk20a_volt_vid_set(struct nvkm_volt *volt, u8 vid);
-int gk20a_volt_set_id(struct nvkm_volt *volt, u8 id, int condition);
+int gk20a_volt_ctor(struct nvkm_device *device, int index,
+		    const struct cvb_coef *coefs, int nb_coefs,
+		    int vmin, struct gk20a_volt *volt);
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c
index 49b5ecb701e4..74db4d28930f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c
@@ -41,16 +41,52 @@ const struct cvb_coef gm20b_cvb_coef[] = {
 	/* 921600 */ { 2647676, -106455, 1632 },
 };
 
+static const struct cvb_coef gm20b_na_cvb_coef[] = {
+	/* KHz,         c0,     c1,   c2,    c3,     c4,   c5 */
+	/*  76800 */ {  814294, 8144, -940, 808, -21583, 226 },
+	/* 153600 */ {  856185, 8144, -940, 808, -21583, 226 },
+	/* 230400 */ {  898077, 8144, -940, 808, -21583, 226 },
+	/* 307200 */ {  939968, 8144, -940, 808, -21583, 226 },
+	/* 384000 */ {  981860, 8144, -940, 808, -21583, 226 },
+	/* 460800 */ { 1023751, 8144, -940, 808, -21583, 226 },
+	/* 537600 */ { 1065642, 8144, -940, 808, -21583, 226 },
+	/* 614400 */ { 1107534, 8144, -940, 808, -21583, 226 },
+	/* 691200 */ { 1149425, 8144, -940, 808, -21583, 226 },
+	/* 768000 */ { 1191317, 8144, -940, 808, -21583, 226 },
+	/* 844800 */ { 1233208, 8144, -940, 808, -21583, 226 },
+	/* 921600 */ { 1275100, 8144, -940, 808, -21583, 226 },
+	/* 998400 */ { 1316991, 8144, -940, 808, -21583, 226 },
+};
+
+const u32 speedo_to_vmin[] = {
+	/*   0,      1,      2,      3,      4, */
+	950000, 840000, 818750, 840000, 810000,
+};
+
 int
 gm20b_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
 {
+	struct nvkm_device_tegra *tdev = device->func->tegra(device);
 	struct gk20a_volt *volt;
+	u32 vmin;
+
+	if (tdev->gpu_speedo_id >= ARRAY_SIZE(speedo_to_vmin)) {
+		nvdev_error(device, "unsupported speedo %d\n",
+			    tdev->gpu_speedo_id);
+		return -EINVAL;
+	}
 
 	volt = kzalloc(sizeof(*volt), GFP_KERNEL);
 	if (!volt)
 		return -ENOMEM;
 	*pvolt = &volt->base;
 
-	return _gk20a_volt_ctor(device, index, gm20b_cvb_coef,
-				ARRAY_SIZE(gm20b_cvb_coef), volt);
+	vmin = speedo_to_vmin[tdev->gpu_speedo_id];
+
+	if (tdev->gpu_speedo_id >= 1)
+		return gk20a_volt_ctor(device, index, gm20b_na_cvb_coef,
+				     ARRAY_SIZE(gm20b_na_cvb_coef), vmin, volt);
+	else
+		return gk20a_volt_ctor(device, index, gm20b_cvb_coef,
+					ARRAY_SIZE(gm20b_cvb_coef), vmin, volt);
 }
diff --git a/drivers/gpu/drm/omapdrm/Kconfig b/drivers/gpu/drm/omapdrm/Kconfig
index 336ad4de9981..556f81f6b2c7 100644
--- a/drivers/gpu/drm/omapdrm/Kconfig
+++ b/drivers/gpu/drm/omapdrm/Kconfig
@@ -4,11 +4,6 @@ config DRM_OMAP
 	depends on ARCH_OMAP2PLUS || ARCH_MULTIPLATFORM
 	select OMAP2_DSS
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	select FB_SYS_FOPS
 	default n
 	help
 	  DRM display driver for OMAP2/3/4 based boards.
diff --git a/drivers/gpu/drm/omapdrm/displays/connector-dvi.c b/drivers/gpu/drm/omapdrm/displays/connector-dvi.c
index 75f7827525cf..684b7aeda411 100644
--- a/drivers/gpu/drm/omapdrm/displays/connector-dvi.c
+++ b/drivers/gpu/drm/omapdrm/displays/connector-dvi.c
@@ -255,6 +255,7 @@ static int dvic_probe_of(struct platform_device *pdev)
 	adapter_node = of_parse_phandle(node, "ddc-i2c-bus", 0);
 	if (adapter_node) {
 		adapter = of_get_i2c_adapter_by_node(adapter_node);
+		of_node_put(adapter_node);
 		if (adapter == NULL) {
 			dev_err(&pdev->dev, "failed to parse ddc-i2c-bus\n");
 			omap_dss_put_device(ddata->in);
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
index 1b0cf2d8224b..0eae8afaed90 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
@@ -1284,8 +1284,7 @@ static int dsicm_probe(struct platform_device *pdev)
 	return 0;
 
 err_sysfs_create:
-	if (bldev != NULL)
-		backlight_device_unregister(bldev);
+	backlight_device_unregister(bldev);
 err_bl:
 	destroy_workqueue(ddata->workqueue);
 err_reg:
diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c
index 6f45e9d00b41..e1be5e795cd8 100644
--- a/drivers/gpu/drm/omapdrm/dss/dsi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dsi.c
@@ -1167,7 +1167,6 @@ static int dsi_regulator_init(struct platform_device *dsidev)
 {
 	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	struct regulator *vdds_dsi;
-	int r;
 
 	if (dsi->vdds_dsi_reg != NULL)
 		return 0;
diff --git a/drivers/gpu/drm/omapdrm/dss/dss-of.c b/drivers/gpu/drm/omapdrm/dss/dss-of.c
index dfd4e9621e3b..e256d879b25c 100644
--- a/drivers/gpu/drm/omapdrm/dss/dss-of.c
+++ b/drivers/gpu/drm/omapdrm/dss/dss-of.c
@@ -125,15 +125,16 @@ u32 dss_of_port_get_port_number(struct device_node *port)
 
 static struct device_node *omapdss_of_get_remote_port(const struct device_node *node)
 {
-	struct device_node *np;
+	struct device_node *np, *np_parent;
 
 	np = of_parse_phandle(node, "remote-endpoint", 0);
 	if (!np)
 		return NULL;
 
-	np = of_get_next_parent(np);
+	np_parent = of_get_next_parent(np);
+	of_node_put(np);
 
-	return np;
+	return np_parent;
 }
 
 struct device_node *
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
index 061f9bab4c9b..0c0a5139a301 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
@@ -120,7 +120,6 @@ static irqreturn_t hdmi_irq_handler(int irq, void *data)
 
 static int hdmi_init_regulator(void)
 {
-	int r;
 	struct regulator *reg;
 
 	if (hdmi.vdda_reg != NULL)
diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c
index 983c8cf2441c..31f5178c22c7 100644
--- a/drivers/gpu/drm/omapdrm/omap_fb.c
+++ b/drivers/gpu/drm/omapdrm/omap_fb.c
@@ -115,8 +115,8 @@ static void omap_framebuffer_destroy(struct drm_framebuffer *fb)
 
 	for (i = 0; i < n; i++) {
 		struct plane *plane = &omap_fb->planes[i];
-		if (plane->bo)
-			drm_gem_object_unreference_unlocked(plane->bo);
+
+		drm_gem_object_unreference_unlocked(plane->bo);
 	}
 
 	kfree(omap_fb);
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 3a7bdf1c842b..85143d1b9b31 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -168,6 +168,7 @@ static int panel_simple_disable(struct drm_panel *panel)
 
 	if (p->backlight) {
 		p->backlight->props.power = FB_BLANK_POWERDOWN;
+		p->backlight->props.state |= BL_CORE_FBBLANK;
 		backlight_update_status(p->backlight);
 	}
 
@@ -235,6 +236,7 @@ static int panel_simple_enable(struct drm_panel *panel)
 		msleep(p->desc->delay.enable);
 
 	if (p->backlight) {
+		p->backlight->props.state &= ~BL_CORE_FBBLANK;
 		p->backlight->props.power = FB_BLANK_UNBLANK;
 		backlight_update_status(p->backlight);
 	}
@@ -964,8 +966,8 @@ static const struct panel_desc innolux_zj070na_01p = {
 	.num_modes = 1,
 	.bpc = 6,
 	.size = {
-		.width = 1024,
-		.height = 600,
+		.width = 154,
+		.height = 90,
 	},
 };
 
@@ -1017,6 +1019,51 @@ static const struct panel_desc lg_lb070wv8 = {
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
 };
 
+static const struct drm_display_mode lg_lp079qx1_sp0v_mode = {
+	.clock = 200000,
+	.hdisplay = 1536,
+	.hsync_start = 1536 + 12,
+	.hsync_end = 1536 + 12 + 16,
+	.htotal = 1536 + 12 + 16 + 48,
+	.vdisplay = 2048,
+	.vsync_start = 2048 + 8,
+	.vsync_end = 2048 + 8 + 4,
+	.vtotal = 2048 + 8 + 4 + 8,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static const struct panel_desc lg_lp079qx1_sp0v = {
+	.modes = &lg_lp079qx1_sp0v_mode,
+	.num_modes = 1,
+	.size = {
+		.width = 129,
+		.height = 171,
+	},
+};
+
+static const struct drm_display_mode lg_lp097qx1_spa1_mode = {
+	.clock = 205210,
+	.hdisplay = 2048,
+	.hsync_start = 2048 + 150,
+	.hsync_end = 2048 + 150 + 5,
+	.htotal = 2048 + 150 + 5 + 5,
+	.vdisplay = 1536,
+	.vsync_start = 1536 + 3,
+	.vsync_end = 1536 + 3 + 1,
+	.vtotal = 1536 + 3 + 1 + 9,
+	.vrefresh = 60,
+};
+
+static const struct panel_desc lg_lp097qx1_spa1 = {
+	.modes = &lg_lp097qx1_spa1_mode,
+	.num_modes = 1,
+	.size = {
+		.width = 208,
+		.height = 147,
+	},
+};
+
 static const struct drm_display_mode lg_lp120up1_mode = {
 	.clock = 162300,
 	.hdisplay = 1920,
@@ -1224,6 +1271,28 @@ static const struct panel_desc qd43003c0_40 = {
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
 };
 
+static const struct drm_display_mode samsung_lsn122dl01_c01_mode = {
+	.clock = 271560,
+	.hdisplay = 2560,
+	.hsync_start = 2560 + 48,
+	.hsync_end = 2560 + 48 + 32,
+	.htotal = 2560 + 48 + 32 + 80,
+	.vdisplay = 1600,
+	.vsync_start = 1600 + 2,
+	.vsync_end = 1600 + 2 + 5,
+	.vtotal = 1600 + 2 + 5 + 57,
+	.vrefresh = 60,
+};
+
+static const struct panel_desc samsung_lsn122dl01_c01 = {
+	.modes = &samsung_lsn122dl01_c01_mode,
+	.num_modes = 1,
+	.size = {
+		.width = 263,
+		.height = 164,
+	},
+};
+
 static const struct drm_display_mode samsung_ltn101nt05_mode = {
 	.clock = 54030,
 	.hdisplay = 1024,
@@ -1242,8 +1311,8 @@ static const struct panel_desc samsung_ltn101nt05 = {
 	.num_modes = 1,
 	.bpc = 6,
 	.size = {
-		.width = 1024,
-		.height = 600,
+		.width = 223,
+		.height = 125,
 	},
 };
 
@@ -1270,6 +1339,53 @@ static const struct panel_desc samsung_ltn140at29_301 = {
 	},
 };
 
+static const struct display_timing sharp_lq101k1ly04_timing = {
+	.pixelclock = { 60000000, 65000000, 80000000 },
+	.hactive = { 1280, 1280, 1280 },
+	.hfront_porch = { 20, 20, 20 },
+	.hback_porch = { 20, 20, 20 },
+	.hsync_len = { 10, 10, 10 },
+	.vactive = { 800, 800, 800 },
+	.vfront_porch = { 4, 4, 4 },
+	.vback_porch = { 4, 4, 4 },
+	.vsync_len = { 4, 4, 4 },
+	.flags = DISPLAY_FLAGS_PIXDATA_POSEDGE,
+};
+
+static const struct panel_desc sharp_lq101k1ly04 = {
+	.timings = &sharp_lq101k1ly04_timing,
+	.num_timings = 1,
+	.bpc = 8,
+	.size = {
+		.width = 217,
+		.height = 136,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA,
+};
+
+static const struct drm_display_mode sharp_lq123p1jx31_mode = {
+	.clock = 252750,
+	.hdisplay = 2400,
+	.hsync_start = 2400 + 48,
+	.hsync_end = 2400 + 48 + 32,
+	.htotal = 2400 + 48 + 32 + 80,
+	.vdisplay = 1600,
+	.vsync_start = 1600 + 3,
+	.vsync_end = 1600 + 3 + 10,
+	.vtotal = 1600 + 3 + 10 + 33,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static const struct panel_desc sharp_lq123p1jx31 = {
+	.modes = &sharp_lq123p1jx31_mode,
+	.num_modes = 1,
+	.size = {
+		.width = 259,
+		.height = 173,
+	},
+};
+
 static const struct drm_display_mode shelly_sca07010_bfn_lnn_mode = {
 	.clock = 33300,
 	.hdisplay = 800,
@@ -1293,6 +1409,29 @@ static const struct panel_desc shelly_sca07010_bfn_lnn = {
 	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
 };
 
+static const struct drm_display_mode starry_kr122ea0sra_mode = {
+	.clock = 147000,
+	.hdisplay = 1920,
+	.hsync_start = 1920 + 16,
+	.hsync_end = 1920 + 16 + 16,
+	.htotal = 1920 + 16 + 16 + 32,
+	.vdisplay = 1200,
+	.vsync_start = 1200 + 15,
+	.vsync_end = 1200 + 15 + 2,
+	.vtotal = 1200 + 15 + 2 + 18,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static const struct panel_desc starry_kr122ea0sra = {
+	.modes = &starry_kr122ea0sra_mode,
+	.num_modes = 1,
+	.size = {
+		.width = 263,
+		.height = 164,
+	},
+};
+
 static const struct drm_display_mode tpk_f07a_0102_mode = {
 	.clock = 33260,
 	.hdisplay = 800,
@@ -1457,6 +1596,12 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "lg,lb070wv8",
 		.data = &lg_lb070wv8,
 	}, {
+		.compatible = "lg,lp079qx1-sp0v",
+		.data = &lg_lp079qx1_sp0v,
+	}, {
+		.compatible = "lg,lp097qx1-spa1",
+		.data = &lg_lp097qx1_spa1,
+	}, {
 		.compatible = "lg,lp120up1",
 		.data = &lg_lp120up1,
 	}, {
@@ -1481,15 +1626,27 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "qiaodian,qd43003c0-40",
 		.data = &qd43003c0_40,
 	}, {
+		.compatible = "samsung,lsn122dl01-c01",
+		.data = &samsung_lsn122dl01_c01,
+	}, {
 		.compatible = "samsung,ltn101nt05",
 		.data = &samsung_ltn101nt05,
 	}, {
 		.compatible = "samsung,ltn140at29-301",
 		.data = &samsung_ltn140at29_301,
 	}, {
+		.compatible = "sharp,lq101k1ly04",
+		.data = &sharp_lq101k1ly04,
+	}, {
+		.compatible = "sharp,lq123p1jx31",
+		.data = &sharp_lq123p1jx31,
+	}, {
 		.compatible = "shelly,sca07010-bfn-lnn",
 		.data = &shelly_sca07010_bfn_lnn,
 	}, {
+		.compatible = "starry,kr122ea0sra",
+		.data = &starry_kr122ea0sra,
+	}, {
 		.compatible = "tpk,f07a-0102",
 		.data = &tpk_f07a_0102,
 	}, {
@@ -1701,7 +1858,6 @@ static const struct panel_desc_dsi panasonic_vvx10f004b00 = {
 	.lanes = 4,
 };
 
-
 static const struct of_device_id dsi_of_match[] = {
 	{
 		.compatible = "auo,b080uan01",
diff --git a/drivers/gpu/drm/qxl/Kconfig b/drivers/gpu/drm/qxl/Kconfig
index 38c2bb72e456..da45b11b66b8 100644
--- a/drivers/gpu/drm/qxl/Kconfig
+++ b/drivers/gpu/drm/qxl/Kconfig
@@ -1,12 +1,7 @@
 config DRM_QXL
 	tristate "QXL virtual GPU"
 	depends on DRM && PCI
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	select FB_DEFERRED_IO
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_TTM
 	select CRC32
 	help
diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c
index b5d4b41361bd..04270f5d110c 100644
--- a/drivers/gpu/drm/qxl/qxl_cmd.c
+++ b/drivers/gpu/drm/qxl/qxl_cmd.c
@@ -203,7 +203,7 @@ qxl_push_cursor_ring_release(struct qxl_device *qdev, struct qxl_release *releas
 bool qxl_queue_garbage_collect(struct qxl_device *qdev, bool flush)
 {
 	if (!qxl_check_idle(qdev->release_ring)) {
-		queue_work(qdev->gc_queue, &qdev->gc_work);
+		schedule_work(&qdev->gc_work);
 		if (flush)
 			flush_work(&qdev->gc_work);
 		return true;
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index ad429683fef7..3aef12742a53 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -468,8 +468,7 @@ void qxl_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct qxl_framebuffer *qxl_fb = to_qxl_framebuffer(fb);
 
-	if (qxl_fb->obj)
-		drm_gem_object_unreference_unlocked(qxl_fb->obj);
+	drm_gem_object_unreference_unlocked(qxl_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(qxl_fb);
 }
diff --git a/drivers/gpu/drm/qxl/qxl_draw.c b/drivers/gpu/drm/qxl/qxl_draw.c
index 56e1d633875e..ffe885395145 100644
--- a/drivers/gpu/drm/qxl/qxl_draw.c
+++ b/drivers/gpu/drm/qxl/qxl_draw.c
@@ -37,7 +37,6 @@ static int alloc_clips(struct qxl_device *qdev,
  * the qxl_clip_rects. This is *not* the same as the memory allocated
  * on the device, it is offset to qxl_clip_rects.chunk.data */
 static struct qxl_rect *drawable_set_clipping(struct qxl_device *qdev,
-					      struct qxl_drawable *drawable,
 					      unsigned num_clips,
 					      struct qxl_bo *clips_bo)
 {
@@ -136,6 +135,8 @@ static int qxl_palette_create_1bit(struct qxl_bo *palette_bo,
 				 * correctly globaly, since that would require
 				 * tracking all of our palettes. */
 	ret = qxl_bo_kmap(palette_bo, (void **)&pal);
+	if (ret)
+		return ret;
 	pal->num_ents = 2;
 	pal->unique = unique++;
 	if (visual == FB_VISUAL_TRUECOLOR || visual == FB_VISUAL_DIRECTCOLOR) {
@@ -349,7 +350,7 @@ void qxl_draw_dirty_fb(struct qxl_device *qdev,
 	if (ret)
 		goto out_release_backoff;
 
-	rects = drawable_set_clipping(qdev, drawable, num_clips, clips_bo);
+	rects = drawable_set_clipping(qdev, num_clips, clips_bo);
 	if (!rects)
 		goto out_release_backoff;
 
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 3ad6604b34ce..8e633caa4078 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -321,7 +321,6 @@ struct qxl_device {
 	struct qxl_bo *current_release_bo[3];
 	int current_release_bo_offset[3];
 
-	struct workqueue_struct *gc_queue;
 	struct work_struct gc_work;
 
 	struct drm_property *hotplug_mode_update_property;
diff --git a/drivers/gpu/drm/qxl/qxl_kms.c b/drivers/gpu/drm/qxl/qxl_kms.c
index 2319800b7add..e642242728c0 100644
--- a/drivers/gpu/drm/qxl/qxl_kms.c
+++ b/drivers/gpu/drm/qxl/qxl_kms.c
@@ -258,7 +258,6 @@ static int qxl_device_init(struct qxl_device *qdev,
 		 (unsigned long)qdev->surfaceram_size);
 
 
-	qdev->gc_queue = create_singlethread_workqueue("qxl_gc");
 	INIT_WORK(&qdev->gc_work, qxl_gc_work);
 
 	return 0;
@@ -270,10 +269,7 @@ static void qxl_device_fini(struct qxl_device *qdev)
 		qxl_bo_unref(&qdev->current_release_bo[0]);
 	if (qdev->current_release_bo[1])
 		qxl_bo_unref(&qdev->current_release_bo[1]);
-	flush_workqueue(qdev->gc_queue);
-	destroy_workqueue(qdev->gc_queue);
-	qdev->gc_queue = NULL;
-
+	flush_work(&qdev->gc_work);
 	qxl_ring_free(qdev->command_ring);
 	qxl_ring_free(qdev->cursor_ring);
 	qxl_ring_free(qdev->release_ring);
@@ -310,10 +306,6 @@ int qxl_driver_load(struct drm_device *dev, unsigned long flags)
 	struct qxl_device *qdev;
 	int r;
 
-	/* require kms */
-	if (!drm_core_check_feature(dev, DRIVER_MODESET))
-		return -ENODEV;
-
 	qdev = kzalloc(sizeof(struct qxl_device), GFP_KERNEL);
 	if (qdev == NULL)
 		return -ENOMEM;
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 0738d74c8d04..d50c9679e631 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -350,11 +350,19 @@ static int qxl_bo_move(struct ttm_buffer_object *bo,
 		       struct ttm_mem_reg *new_mem)
 {
 	struct ttm_mem_reg *old_mem = &bo->mem;
+	int ret;
+
+	ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+	if (ret)
+		return ret;
+
+
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		qxl_move_null(bo, new_mem);
 		return 0;
 	}
-	return ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
+	return ttm_bo_move_memcpy(bo, evict, interruptible,
+				  no_wait_gpu, new_mem);
 }
 
 static void qxl_bo_move_notify(struct ttm_buffer_object *bo,
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index e91763d5d800..a97abc8af657 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -589,7 +589,8 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev) || ASIC_IS_DCE8(rdev))
 			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
 		/* use frac fb div on RS780/RS880 */
-		if ((rdev->family == CHIP_RS780) || (rdev->family == CHIP_RS880))
+		if (((rdev->family == CHIP_RS780) || (rdev->family == CHIP_RS880))
+		    && !radeon_crtc->ss_enabled)
 			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
 		if (ASIC_IS_DCE32(rdev) && mode->clock > 165000)
 			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
@@ -626,7 +627,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 			if (radeon_crtc->ss.refdiv) {
 				radeon_crtc->pll_flags |= RADEON_PLL_USE_REF_DIV;
 				radeon_crtc->pll_reference_div = radeon_crtc->ss.refdiv;
-				if (ASIC_IS_AVIVO(rdev))
+				if (rdev->family >= CHIP_RV770)
 					radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
 			}
 		}
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index 587cae4e73c9..56bb758f4e33 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -120,6 +120,7 @@ atombios_set_backlight_level(struct radeon_encoder *radeon_encoder, u8 level)
 		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
 		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
 		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
 			if (dig->backlight_level == 0)
 				atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_LCD_BLOFF, 0, 0);
 			else {
diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c
index 35e0fc3ae8a7..7ba450832e6b 100644
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@@ -3843,7 +3843,10 @@ static void ci_find_dpm_states_clocks_in_dpm_table(struct radeon_device *rdev,
 	if (i >= sclk_table->count) {
 		pi->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
 	} else {
-		/* XXX check display min clock requirements */
+		/* XXX The current code always reprogrammed the sclk levels,
+		 * but we don't currently handle disp sclk requirements
+		 * so just skip it.
+		 */
 		if (CISLAND_MINIMUM_ENGINE_CLOCK != CISLAND_MINIMUM_ENGINE_CLOCK)
 			pi->need_update_smu7_dpm_table |= DPMTABLE_UPDATE_SCLK;
 	}
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index ba192a35c607..0c1b9ff433af 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -53,6 +53,7 @@ MODULE_FIRMWARE("radeon/bonaire_mc.bin");
 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
+MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
@@ -72,6 +73,7 @@ MODULE_FIRMWARE("radeon/hawaii_mc.bin");
 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
+MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
@@ -1990,12 +1992,17 @@ static int cik_init_microcode(struct radeon_device *rdev)
 	int new_fw = 0;
 	int err;
 	int num_fw;
+	bool new_smc = false;
 
 	DRM_DEBUG("\n");
 
 	switch (rdev->family) {
 	case CHIP_BONAIRE:
 		chip_name = "BONAIRE";
+		if ((rdev->pdev->revision == 0x80) ||
+		    (rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->device == 0x665f))
+			new_smc = true;
 		new_chip_name = "bonaire";
 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
 		me_req_size = CIK_ME_UCODE_SIZE * 4;
@@ -2010,6 +2017,8 @@ static int cik_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_HAWAII:
 		chip_name = "HAWAII";
+		if (rdev->pdev->revision == 0x80)
+			new_smc = true;
 		new_chip_name = "hawaii";
 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
 		me_req_size = CIK_ME_UCODE_SIZE * 4;
@@ -2259,7 +2268,10 @@ static int cik_init_microcode(struct radeon_device *rdev)
 			}
 		}
 
-		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
+		if (new_smc)
+			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
+		else
+			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
 		if (err) {
 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
@@ -8354,7 +8366,8 @@ static int cik_startup(struct radeon_device *rdev)
 		}
 	}
 	rdev->rlc.cs_data = ci_cs_data;
-	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
+	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
+	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
 	r = sumo_rlc_init(rdev);
 	if (r) {
 		DRM_ERROR("Failed to init rlc BOs!\n");
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 0d3f744de35a..d960d3915408 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -2209,6 +2209,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 		break;
 	}
+	case PACKET3_PFP_SYNC_ME:
+		if (pkt->count) {
+			DRM_ERROR("bad PFP_SYNC_ME\n");
+			return -EINVAL;
+		}
+		break;
 	case PACKET3_SURFACE_SYNC:
 		if (pkt->count != 3) {
 			DRM_ERROR("bad SURFACE_SYNC\n");
@@ -3381,6 +3387,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev,
 	case PACKET3_MPEG_INDEX:
 	case PACKET3_WAIT_REG_MEM:
 	case PACKET3_MEM_WRITE:
+	case PACKET3_PFP_SYNC_ME:
 	case PACKET3_SURFACE_SYNC:
 	case PACKET3_EVENT_WRITE:
 	case PACKET3_EVENT_WRITE_EOP:
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 0b174e14e9a6..c8e3d394cde7 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -1624,6 +1624,7 @@
 		 */
 #              define PACKET3_CP_DMA_CMD_SAIC      (1 << 28)
 #              define PACKET3_CP_DMA_CMD_DAIC      (1 << 29)
+#define	PACKET3_PFP_SYNC_ME				0x42
 #define	PACKET3_SURFACE_SYNC				0x43
 #              define PACKET3_CB0_DEST_BASE_ENA    (1 << 6)
 #              define PACKET3_CB1_DEST_BASE_ENA    (1 << 7)
diff --git a/drivers/gpu/drm/radeon/radeon_acpi.c b/drivers/gpu/drm/radeon/radeon_acpi.c
index 59acd0e5c2c6..31c9a92d6a1b 100644
--- a/drivers/gpu/drm/radeon/radeon_acpi.c
+++ b/drivers/gpu/drm/radeon/radeon_acpi.c
@@ -741,13 +741,6 @@ int radeon_acpi_init(struct radeon_device *rdev)
 		}
 
 		atif->encoder_for_bl = target;
-		if (!target) {
-			/* Brightness change notification is enabled, but we
-			 * didn't find a backlight controller, this should
-			 * never happen.
-			 */
-			DRM_ERROR("Cannot find a backlight controller\n");
-		}
 	}
 
 	if (atif->functions.sbios_requests && !atif->functions.system_params) {
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index f8097a0e7a79..5df3ec73021b 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -1155,7 +1155,7 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
 		    le16_to_cpu(firmware_info->info.usReferenceClock);
 		p1pll->reference_div = 0;
 
-		if (crev < 2)
+		if ((frev < 2) && (crev < 2))
 			p1pll->pll_out_min =
 				le16_to_cpu(firmware_info->info.usMinPixelClockPLL_Output);
 		else
@@ -1164,7 +1164,7 @@ bool radeon_atom_get_clock_info(struct drm_device *dev)
 		p1pll->pll_out_max =
 		    le32_to_cpu(firmware_info->info.ulMaxPixelClockPLL_Output);
 
-		if (crev >= 4) {
+		if (((frev < 2) && (crev >= 4)) || (frev >= 2)) {
 			p1pll->lcd_pll_out_min =
 				le16_to_cpu(firmware_info->info_14.usLcdMinPixelClockPLL_Output) * 100;
 			if (p1pll->lcd_pll_out_min == 0)
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index 95f4fea89302..6de342861202 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -10,6 +10,7 @@
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <linux/pci.h>
+#include <linux/delay.h>
 
 #include "radeon_acpi.h"
 
@@ -27,6 +28,7 @@ struct radeon_atpx_functions {
 struct radeon_atpx {
 	acpi_handle handle;
 	struct radeon_atpx_functions functions;
+	bool is_hybrid;
 };
 
 static struct radeon_atpx_priv {
@@ -62,6 +64,14 @@ bool radeon_has_atpx(void) {
 	return radeon_atpx_priv.atpx_detected;
 }
 
+bool radeon_has_atpx_dgpu_power_cntl(void) {
+	return radeon_atpx_priv.atpx.functions.power_cntl;
+}
+
+bool radeon_is_atpx_hybrid(void) {
+	return radeon_atpx_priv.atpx.is_hybrid;
+}
+
 /**
  * radeon_atpx_call - call an ATPX method
  *
@@ -141,18 +151,12 @@ static void radeon_atpx_parse_functions(struct radeon_atpx_functions *f, u32 mas
  */
 static int radeon_atpx_validate(struct radeon_atpx *atpx)
 {
-	/* make sure required functions are enabled */
-	/* dGPU power control is required */
-	if (atpx->functions.power_cntl == false) {
-		printk("ATPX dGPU power cntl not present, forcing\n");
-		atpx->functions.power_cntl = true;
-	}
+	u32 valid_bits = 0;
 
 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
 		size_t size;
-		u32 valid_bits;
 
 		info = radeon_atpx_call(atpx->handle, ATPX_FUNCTION_GET_PX_PARAMETERS, NULL);
 		if (!info)
@@ -171,19 +175,42 @@ static int radeon_atpx_validate(struct radeon_atpx *atpx)
 		memcpy(&output, info->buffer.pointer, size);
 
 		valid_bits = output.flags & output.valid_flags;
-		/* if separate mux flag is set, mux controls are required */
-		if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
-			atpx->functions.i2c_mux_cntl = true;
-			atpx->functions.disp_mux_cntl = true;
-		}
-		/* if any outputs are muxed, mux controls are required */
-		if (valid_bits & (ATPX_CRT1_RGB_SIGNAL_MUXED |
-				  ATPX_TV_SIGNAL_MUXED |
-				  ATPX_DFP_SIGNAL_MUXED))
-			atpx->functions.disp_mux_cntl = true;
 
 		kfree(info);
 	}
+
+	/* if separate mux flag is set, mux controls are required */
+	if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
+		atpx->functions.i2c_mux_cntl = true;
+		atpx->functions.disp_mux_cntl = true;
+	}
+	/* if any outputs are muxed, mux controls are required */
+	if (valid_bits & (ATPX_CRT1_RGB_SIGNAL_MUXED |
+			  ATPX_TV_SIGNAL_MUXED |
+			  ATPX_DFP_SIGNAL_MUXED))
+		atpx->functions.disp_mux_cntl = true;
+
+	/* some bioses set these bits rather than flagging power_cntl as supported */
+	if (valid_bits & (ATPX_DYNAMIC_PX_SUPPORTED |
+			  ATPX_DYNAMIC_DGPU_POWER_OFF_SUPPORTED))
+		atpx->functions.power_cntl = true;
+
+	atpx->is_hybrid = false;
+	if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
+		printk("ATPX Hybrid Graphics\n");
+#if 1
+		/* This is a temporary hack until the D3 cold support
+		 * makes it upstream.  The ATPX power_control method seems
+		 * to still work on even if the system should be using
+		 * the new standardized hybrid D3 cold ACPI interface.
+		 */
+		atpx->functions.power_cntl = true;
+#else
+		atpx->functions.power_cntl = false;
+#endif
+		atpx->is_hybrid = true;
+	}
+
 	return 0;
 }
 
@@ -258,6 +285,10 @@ static int radeon_atpx_set_discrete_state(struct radeon_atpx *atpx, u8 state)
 		if (!info)
 			return -EIO;
 		kfree(info);
+
+		/* 200ms delay is required after off */
+		if (state == 0)
+			msleep(200);
 	}
 	return 0;
 }
@@ -505,7 +536,6 @@ static int radeon_atpx_get_client_id(struct pci_dev *pdev)
 static const struct vga_switcheroo_handler radeon_atpx_handler = {
 	.switchto = radeon_atpx_switchto,
 	.power_state = radeon_atpx_power_state,
-	.init = radeon_atpx_init,
 	.get_client_id = radeon_atpx_get_client_id,
 };
 
@@ -541,6 +571,7 @@ static bool radeon_atpx_detect(void)
 		printk(KERN_INFO "vga_switcheroo: detected switching method %s handle\n",
 		       acpi_method_name);
 		radeon_atpx_priv.atpx_detected = true;
+		radeon_atpx_init();
 		return true;
 	}
 	return false;
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 81a63d7f5cd9..b79f3b002471 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -2064,7 +2064,6 @@ radeon_add_atom_connector(struct drm_device *dev,
 							   RADEON_OUTPUT_CSC_BYPASS);
 			/* no HPD on analog connectors */
 			radeon_connector->hpd.hpd = RADEON_HPD_NONE;
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 			connector->interlace_allowed = true;
 			connector->doublescan_allowed = true;
 			break;
@@ -2314,8 +2313,10 @@ radeon_add_atom_connector(struct drm_device *dev,
 	}
 
 	if (radeon_connector->hpd.hpd == RADEON_HPD_NONE) {
-		if (i2c_bus->valid)
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+		if (i2c_bus->valid) {
+			connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+			                    DRM_CONNECTOR_POLL_DISCONNECT;
+		}
 	} else
 		connector->polled = DRM_CONNECTOR_POLL_HPD;
 
@@ -2391,7 +2392,6 @@ radeon_add_legacy_connector(struct drm_device *dev,
 					      1);
 		/* no HPD on analog connectors */
 		radeon_connector->hpd.hpd = RADEON_HPD_NONE;
-		connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 		connector->interlace_allowed = true;
 		connector->doublescan_allowed = true;
 		break;
@@ -2476,10 +2476,13 @@ radeon_add_legacy_connector(struct drm_device *dev,
 	}
 
 	if (radeon_connector->hpd.hpd == RADEON_HPD_NONE) {
-		if (i2c_bus->valid)
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+		if (i2c_bus->valid) {
+			connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+			                    DRM_CONNECTOR_POLL_DISCONNECT;
+		}
 	} else
 		connector->polled = DRM_CONNECTOR_POLL_HPD;
+
 	connector->display_info.subpixel_order = subpixel_order;
 	drm_connector_register(connector);
 }
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index e721e6b2766e..a00dd2f74527 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -30,6 +30,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/radeon_drm.h>
+#include <linux/pm_runtime.h>
 #include <linux/vgaarb.h>
 #include <linux/vga_switcheroo.h>
 #include <linux/efi.h>
@@ -630,6 +631,23 @@ void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
 /*
  * GPU helpers function.
  */
+
+/**
+ * radeon_device_is_virtual - check if we are running is a virtual environment
+ *
+ * Check if the asic has been passed through to a VM (all asics).
+ * Used at driver startup.
+ * Returns true if virtual or false if not.
+ */
+static bool radeon_device_is_virtual(void)
+{
+#ifdef CONFIG_X86
+	return boot_cpu_has(X86_FEATURE_HYPERVISOR);
+#else
+	return false;
+#endif
+}
+
 /**
  * radeon_card_posted - check if the hw has already been initialized
  *
@@ -643,6 +661,10 @@ bool radeon_card_posted(struct radeon_device *rdev)
 {
 	uint32_t reg;
 
+	/* for pass through, always force asic_init */
+	if (radeon_device_is_virtual())
+		return false;
+
 	/* required for EFI mode on macbook2,1 which uses an r5xx asic */
 	if (efi_enabled(EFI_BOOT) &&
 	    (rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) &&
@@ -1505,6 +1527,9 @@ int radeon_device_init(struct radeon_device *rdev,
 	return 0;
 
 failed:
+	/* balance pm_runtime_get_sync() in radeon_driver_unload_kms() */
+	if (radeon_is_px(ddev))
+		pm_runtime_put_noidle(ddev->dev);
 	if (runtime)
 		vga_switcheroo_fini_domain_pm_ops(rdev->dev);
 	return r;
@@ -1631,7 +1656,7 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend,
 	radeon_agp_suspend(rdev);
 
 	pci_save_state(dev->pdev);
-	if (freeze && rdev->family >= CHIP_R600) {
+	if (freeze && rdev->family >= CHIP_CEDAR) {
 		rdev->asic->asic_reset(rdev, true);
 		pci_restore_state(dev->pdev);
 	} else if (suspend) {
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 3965d1916b9c..c3206fb8f4cf 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -714,7 +714,7 @@ static void radeon_crtc_init(struct drm_device *dev, int index)
 
 	drm_mode_crtc_set_gamma_size(&radeon_crtc->base, 256);
 	radeon_crtc->crtc_id = index;
-	radeon_crtc->flip_queue = create_singlethread_workqueue("radeon-crtc");
+	radeon_crtc->flip_queue = alloc_workqueue("radeon-crtc", WQ_HIGHPRI, 0);
 	rdev->mode_info.crtcs[index] = radeon_crtc;
 
 	if (rdev->family >= CHIP_BONAIRE) {
@@ -1324,9 +1324,7 @@ static void radeon_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb);
 
-	if (radeon_fb->obj) {
-		drm_gem_object_unreference_unlocked(radeon_fb->obj);
-	}
+	drm_gem_object_unreference_unlocked(radeon_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(radeon_fb);
 }
@@ -1711,6 +1709,7 @@ void radeon_modeset_fini(struct radeon_device *rdev)
 		radeon_afmt_fini(rdev);
 		drm_kms_helper_poll_fini(rdev->ddev);
 		radeon_hpd_fini(rdev);
+		drm_crtc_force_disable_all(rdev->ddev);
 		drm_mode_config_cleanup(rdev->ddev);
 		rdev->mode_info.mode_config_initialized = false;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index a455dc7d4aa1..c01a7c6abb49 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -93,9 +93,10 @@
  *   2.43.0 - RADEON_INFO_GPU_RESET_COUNTER
  *   2.44.0 - SET_APPEND_CNT packet3 support
  *   2.45.0 - Allow setting shader registers using DMA/COPY packet3 on SI
+ *   2.46.0 - Add PFP_SYNC_ME support on evergreen
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	45
+#define KMS_DRIVER_MINOR	46
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
@@ -162,9 +163,13 @@ void radeon_debugfs_cleanup(struct drm_minor *minor);
 #if defined(CONFIG_VGA_SWITCHEROO)
 void radeon_register_atpx_handler(void);
 void radeon_unregister_atpx_handler(void);
+bool radeon_has_atpx_dgpu_power_cntl(void);
+bool radeon_is_atpx_hybrid(void);
 #else
 static inline void radeon_register_atpx_handler(void) {}
 static inline void radeon_unregister_atpx_handler(void) {}
+static inline bool radeon_has_atpx_dgpu_power_cntl(void) { return false; }
+static inline bool radeon_is_atpx_hybrid(void) { return false; }
 #endif
 
 int radeon_no_wb;
@@ -404,7 +409,10 @@ static int radeon_pmops_runtime_suspend(struct device *dev)
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
 	pci_ignore_hotplug(pdev);
-	pci_set_power_state(pdev, PCI_D3cold);
+	if (radeon_is_atpx_hybrid())
+		pci_set_power_state(pdev, PCI_D3cold);
+	else if (!radeon_has_atpx_dgpu_power_cntl())
+		pci_set_power_state(pdev, PCI_D3hot);
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
 
 	return 0;
@@ -421,7 +429,9 @@ static int radeon_pmops_runtime_resume(struct device *dev)
 
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 
-	pci_set_power_state(pdev, PCI_D0);
+	if (radeon_is_atpx_hybrid() ||
+	    !radeon_has_atpx_dgpu_power_cntl())
+		pci_set_power_state(pdev, PCI_D0);
 	pci_restore_state(pdev);
 	ret = pci_enable_device(pdev);
 	if (ret)
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index 414953c46a38..835563c1f0ed 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -63,7 +63,10 @@ int radeon_driver_unload_kms(struct drm_device *dev)
 	if (rdev->rmmio == NULL)
 		goto done_free;
 
-	pm_runtime_get_sync(dev->dev);
+	if (radeon_is_px(dev)) {
+		pm_runtime_get_sync(dev->dev);
+		pm_runtime_forbid(dev->dev);
+	}
 
 	radeon_kfd_device_fini(rdev);
 
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 590b0377fbe2..ffdad81ef964 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -300,8 +300,7 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 	if (IS_ERR(fence))
 		return PTR_ERR(fence);
 
-	r = ttm_bo_move_accel_cleanup(bo, &fence->base,
-				      evict, no_wait_gpu, new_mem);
+	r = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, new_mem);
 	radeon_fence_unref(&fence);
 	return r;
 }
@@ -403,6 +402,10 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	int r;
 
+	r = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+	if (r)
+		return r;
+
 	/* Can't move a pinned BO */
 	rbo = container_of(bo, struct radeon_bo, tbo);
 	if (WARN_ON_ONCE(rbo->pin_count > 0))
@@ -441,7 +444,8 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 
 	if (r) {
 memcpy:
-		r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
+		r = ttm_bo_move_memcpy(bo, evict, interruptible,
+				       no_wait_gpu, new_mem);
 		if (r) {
 			return r;
 		}
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index b30e719dd56d..2523ca96c6c7 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -50,6 +50,7 @@ MODULE_FIRMWARE("radeon/tahiti_ce.bin");
 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
+MODULE_FIRMWARE("radeon/tahiti_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
@@ -65,6 +66,7 @@ MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
+MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
 MODULE_FIRMWARE("radeon/VERDE_me.bin");
@@ -80,6 +82,7 @@ MODULE_FIRMWARE("radeon/verde_ce.bin");
 MODULE_FIRMWARE("radeon/verde_mc.bin");
 MODULE_FIRMWARE("radeon/verde_rlc.bin");
 MODULE_FIRMWARE("radeon/verde_smc.bin");
+MODULE_FIRMWARE("radeon/verde_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
 MODULE_FIRMWARE("radeon/OLAND_me.bin");
@@ -95,6 +98,7 @@ MODULE_FIRMWARE("radeon/oland_ce.bin");
 MODULE_FIRMWARE("radeon/oland_mc.bin");
 MODULE_FIRMWARE("radeon/oland_rlc.bin");
 MODULE_FIRMWARE("radeon/oland_smc.bin");
+MODULE_FIRMWARE("radeon/oland_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
@@ -110,6 +114,7 @@ MODULE_FIRMWARE("radeon/hainan_ce.bin");
 MODULE_FIRMWARE("radeon/hainan_mc.bin");
 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
 MODULE_FIRMWARE("radeon/hainan_smc.bin");
+MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
 
 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 static void si_pcie_gen3_enable(struct radeon_device *rdev);
@@ -1653,12 +1658,16 @@ static int si_init_microcode(struct radeon_device *rdev)
 	char fw_name[30];
 	int err;
 	int new_fw = 0;
+	bool new_smc = false;
 
 	DRM_DEBUG("\n");
 
 	switch (rdev->family) {
 	case CHIP_TAHITI:
 		chip_name = "TAHITI";
+		/* XXX: figure out which Tahitis need the new ucode */
+		if (0)
+			new_smc = true;
 		new_chip_name = "tahiti";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 		me_req_size = SI_PM4_UCODE_SIZE * 4;
@@ -1670,6 +1679,13 @@ static int si_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_PITCAIRN:
 		chip_name = "PITCAIRN";
+		if ((rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->device == 0x6810) ||
+		    (rdev->pdev->device == 0x6811) ||
+		    (rdev->pdev->device == 0x6816) ||
+		    (rdev->pdev->device == 0x6817) ||
+		    (rdev->pdev->device == 0x6806))
+			new_smc = true;
 		new_chip_name = "pitcairn";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 		me_req_size = SI_PM4_UCODE_SIZE * 4;
@@ -1681,6 +1697,16 @@ static int si_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_VERDE:
 		chip_name = "VERDE";
+		if ((rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->revision == 0x83) ||
+		    (rdev->pdev->revision == 0x87) ||
+		    (rdev->pdev->device == 0x6820) ||
+		    (rdev->pdev->device == 0x6821) ||
+		    (rdev->pdev->device == 0x6822) ||
+		    (rdev->pdev->device == 0x6823) ||
+		    (rdev->pdev->device == 0x682A) ||
+		    (rdev->pdev->device == 0x682B))
+			new_smc = true;
 		new_chip_name = "verde";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 		me_req_size = SI_PM4_UCODE_SIZE * 4;
@@ -1692,6 +1718,13 @@ static int si_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_OLAND:
 		chip_name = "OLAND";
+		if ((rdev->pdev->revision == 0xC7) ||
+		    (rdev->pdev->revision == 0x80) ||
+		    (rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->revision == 0x83) ||
+		    (rdev->pdev->device == 0x6604) ||
+		    (rdev->pdev->device == 0x6605))
+			new_smc = true;
 		new_chip_name = "oland";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 		me_req_size = SI_PM4_UCODE_SIZE * 4;
@@ -1702,6 +1735,13 @@ static int si_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_HAINAN:
 		chip_name = "HAINAN";
+		if ((rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->revision == 0x83) ||
+		    (rdev->pdev->revision == 0xC3) ||
+		    (rdev->pdev->device == 0x6664) ||
+		    (rdev->pdev->device == 0x6665) ||
+		    (rdev->pdev->device == 0x6667))
+			new_smc = true;
 		new_chip_name = "hainan";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 		me_req_size = SI_PM4_UCODE_SIZE * 4;
@@ -1847,7 +1887,10 @@ static int si_init_microcode(struct radeon_device *rdev)
 		}
 	}
 
-	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
+	if (new_smc)
+		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
+	else
+		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
 	if (err) {
 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig
index 7fc3ca5ce6c7..4c2fd056dd6d 100644
--- a/drivers/gpu/drm/rcar-du/Kconfig
+++ b/drivers/gpu/drm/rcar-du/Kconfig
@@ -6,7 +6,6 @@ config DRM_RCAR_DU
 	select DRM_KMS_HELPER
 	select DRM_KMS_CMA_HELPER
 	select DRM_GEM_CMA_HELPER
-	select DRM_KMS_FB_HELPER
 	select VIDEOMODE_HELPERS
 	help
 	  Choose this option if you have an R-Car chipset.
diff --git a/drivers/gpu/drm/rcar-du/Makefile b/drivers/gpu/drm/rcar-du/Makefile
index 827711e28226..d3b44651061a 100644
--- a/drivers/gpu/drm/rcar-du/Makefile
+++ b/drivers/gpu/drm/rcar-du/Makefile
@@ -7,8 +7,8 @@ rcar-du-drm-y := rcar_du_crtc.o \
 		 rcar_du_plane.o \
 		 rcar_du_vgacon.o
 
-rcar-du-drm-$(CONFIG_DRM_RCAR_HDMI)	+= rcar_du_hdmicon.o \
-					   rcar_du_hdmienc.o
+rcar-du-drm-$(CONFIG_DRM_RCAR_HDMI)	+= rcar_du_hdmienc.o
+
 rcar-du-drm-$(CONFIG_DRM_RCAR_LVDS)	+= rcar_du_lvdsenc.o
 
 rcar-du-drm-$(CONFIG_DRM_RCAR_VSP)	+= rcar_du_vsp.o
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
index 0d8bdda736f9..e39fcef2e033 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
@@ -552,7 +552,7 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg)
 	rcar_du_crtc_write(rcrtc, DSRCR, status & DSRCR_MASK);
 
 	if (status & DSSR_FRM) {
-		drm_handle_vblank(rcrtc->crtc.dev, rcrtc->index);
+		drm_crtc_handle_vblank(&rcrtc->crtc);
 		rcar_du_crtc_finish_page_flip(rcrtc);
 		ret = IRQ_HANDLED;
 	}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
index 55149e9ce28e..ab8645c57e2d 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
@@ -19,7 +19,6 @@
 
 #include "rcar_du_drv.h"
 #include "rcar_du_encoder.h"
-#include "rcar_du_hdmicon.h"
 #include "rcar_du_hdmienc.h"
 #include "rcar_du_kms.h"
 #include "rcar_du_lvdscon.h"
@@ -174,7 +173,7 @@ int rcar_du_encoder_init(struct rcar_du_device *rcdu,
 		break;
 
 	case DRM_MODE_ENCODER_TMDS:
-		ret = rcar_du_hdmi_connector_init(rcdu, renc);
+		/* connector managed by the bridge driver */
 		break;
 
 	default:
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.h b/drivers/gpu/drm/rcar-du/rcar_du_encoder.h
index a8669c3e0dd5..7fc10a9c34c3 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.h
@@ -15,7 +15,6 @@
 #define __RCAR_DU_ENCODER_H__
 
 #include <drm/drm_crtc.h>
-#include <drm/drm_encoder_slave.h>
 
 struct rcar_du_device;
 struct rcar_du_hdmienc;
@@ -30,16 +29,16 @@ enum rcar_du_encoder_type {
 };
 
 struct rcar_du_encoder {
-	struct drm_encoder_slave slave;
+	struct drm_encoder base;
 	enum rcar_du_output output;
 	struct rcar_du_hdmienc *hdmi;
 	struct rcar_du_lvdsenc *lvds;
 };
 
 #define to_rcar_encoder(e) \
-	container_of(e, struct rcar_du_encoder, slave.base)
+	container_of(e, struct rcar_du_encoder, base)
 
-#define rcar_encoder_to_drm_encoder(e)	(&(e)->slave.base)
+#define rcar_encoder_to_drm_encoder(e)	(&(e)->base)
 
 struct rcar_du_connector {
 	struct drm_connector connector;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c b/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c
deleted file mode 100644
index 612b4d5ae098..000000000000
--- a/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.c
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * R-Car Display Unit HDMI Connector
- *
- * Copyright (C) 2014 Renesas Electronics Corporation
- *
- * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <drm/drmP.h>
-#include <drm/drm_atomic_helper.h>
-#include <drm/drm_crtc.h>
-#include <drm/drm_crtc_helper.h>
-#include <drm/drm_encoder_slave.h>
-
-#include "rcar_du_drv.h"
-#include "rcar_du_encoder.h"
-#include "rcar_du_hdmicon.h"
-#include "rcar_du_kms.h"
-
-#define to_slave_funcs(e)	(to_rcar_encoder(e)->slave.slave_funcs)
-
-static int rcar_du_hdmi_connector_get_modes(struct drm_connector *connector)
-{
-	struct rcar_du_connector *con = to_rcar_connector(connector);
-	struct drm_encoder *encoder = rcar_encoder_to_drm_encoder(con->encoder);
-	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
-
-	if (sfuncs->get_modes == NULL)
-		return 0;
-
-	return sfuncs->get_modes(encoder, connector);
-}
-
-static int rcar_du_hdmi_connector_mode_valid(struct drm_connector *connector,
-					     struct drm_display_mode *mode)
-{
-	struct rcar_du_connector *con = to_rcar_connector(connector);
-	struct drm_encoder *encoder = rcar_encoder_to_drm_encoder(con->encoder);
-	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
-
-	if (sfuncs->mode_valid == NULL)
-		return MODE_OK;
-
-	return sfuncs->mode_valid(encoder, mode);
-}
-
-static const struct drm_connector_helper_funcs connector_helper_funcs = {
-	.get_modes = rcar_du_hdmi_connector_get_modes,
-	.mode_valid = rcar_du_hdmi_connector_mode_valid,
-};
-
-static enum drm_connector_status
-rcar_du_hdmi_connector_detect(struct drm_connector *connector, bool force)
-{
-	struct rcar_du_connector *con = to_rcar_connector(connector);
-	struct drm_encoder *encoder = rcar_encoder_to_drm_encoder(con->encoder);
-	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
-
-	if (sfuncs->detect == NULL)
-		return connector_status_unknown;
-
-	return sfuncs->detect(encoder, connector);
-}
-
-static const struct drm_connector_funcs connector_funcs = {
-	.dpms = drm_atomic_helper_connector_dpms,
-	.reset = drm_atomic_helper_connector_reset,
-	.detect = rcar_du_hdmi_connector_detect,
-	.fill_modes = drm_helper_probe_single_connector_modes,
-	.destroy = drm_connector_cleanup,
-	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
-	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
-};
-
-int rcar_du_hdmi_connector_init(struct rcar_du_device *rcdu,
-				struct rcar_du_encoder *renc)
-{
-	struct drm_encoder *encoder = rcar_encoder_to_drm_encoder(renc);
-	struct rcar_du_connector *rcon;
-	struct drm_connector *connector;
-	int ret;
-
-	rcon = devm_kzalloc(rcdu->dev, sizeof(*rcon), GFP_KERNEL);
-	if (rcon == NULL)
-		return -ENOMEM;
-
-	connector = &rcon->connector;
-	connector->display_info.width_mm = 0;
-	connector->display_info.height_mm = 0;
-	connector->interlace_allowed = true;
-	connector->polled = DRM_CONNECTOR_POLL_HPD;
-
-	ret = drm_connector_init(rcdu->ddev, connector, &connector_funcs,
-				 DRM_MODE_CONNECTOR_HDMIA);
-	if (ret < 0)
-		return ret;
-
-	drm_connector_helper_add(connector, &connector_helper_funcs);
-
-	connector->dpms = DRM_MODE_DPMS_OFF;
-	drm_object_property_set_value(&connector->base,
-		rcdu->ddev->mode_config.dpms_property, DRM_MODE_DPMS_OFF);
-
-	ret = drm_mode_connector_attach_encoder(connector, encoder);
-	if (ret < 0)
-		return ret;
-
-	rcon->encoder = renc;
-
-	return 0;
-}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.h b/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.h
deleted file mode 100644
index 87daa949227f..000000000000
--- a/drivers/gpu/drm/rcar-du/rcar_du_hdmicon.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * R-Car Display Unit HDMI Connector
- *
- * Copyright (C) 2014 Renesas Electronics Corporation
- *
- * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __RCAR_DU_HDMICON_H__
-#define __RCAR_DU_HDMICON_H__
-
-struct rcar_du_device;
-struct rcar_du_encoder;
-
-#if IS_ENABLED(CONFIG_DRM_RCAR_HDMI)
-int rcar_du_hdmi_connector_init(struct rcar_du_device *rcdu,
-				struct rcar_du_encoder *renc);
-#else
-static inline int rcar_du_hdmi_connector_init(struct rcar_du_device *rcdu,
-					      struct rcar_du_encoder *renc)
-{
-	return -ENOSYS;
-}
-#endif
-
-#endif /* __RCAR_DU_HDMICON_H__ */
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
index 461662d231e2..4de3ff0dbebd 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_hdmienc.c
@@ -16,7 +16,6 @@
 #include <drm/drmP.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
-#include <drm/drm_encoder_slave.h>
 
 #include "rcar_du_drv.h"
 #include "rcar_du_encoder.h"
@@ -25,20 +24,14 @@
 
 struct rcar_du_hdmienc {
 	struct rcar_du_encoder *renc;
-	struct device *dev;
 	bool enabled;
 };
 
 #define to_rcar_hdmienc(e)	(to_rcar_encoder(e)->hdmi)
-#define to_slave_funcs(e)	(to_rcar_encoder(e)->slave.slave_funcs)
 
 static void rcar_du_hdmienc_disable(struct drm_encoder *encoder)
 {
 	struct rcar_du_hdmienc *hdmienc = to_rcar_hdmienc(encoder);
-	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
-
-	if (sfuncs->dpms)
-		sfuncs->dpms(encoder, DRM_MODE_DPMS_OFF);
 
 	if (hdmienc->renc->lvds)
 		rcar_du_lvdsenc_enable(hdmienc->renc->lvds, encoder->crtc,
@@ -50,15 +43,11 @@ static void rcar_du_hdmienc_disable(struct drm_encoder *encoder)
 static void rcar_du_hdmienc_enable(struct drm_encoder *encoder)
 {
 	struct rcar_du_hdmienc *hdmienc = to_rcar_hdmienc(encoder);
-	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
 
 	if (hdmienc->renc->lvds)
 		rcar_du_lvdsenc_enable(hdmienc->renc->lvds, encoder->crtc,
 				       true);
 
-	if (sfuncs->dpms)
-		sfuncs->dpms(encoder, DRM_MODE_DPMS_ON);
-
 	hdmienc->enabled = true;
 }
 
@@ -67,29 +56,21 @@ static int rcar_du_hdmienc_atomic_check(struct drm_encoder *encoder,
 					struct drm_connector_state *conn_state)
 {
 	struct rcar_du_hdmienc *hdmienc = to_rcar_hdmienc(encoder);
-	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
 	struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode;
-	const struct drm_display_mode *mode = &crtc_state->mode;
 
 	if (hdmienc->renc->lvds)
 		rcar_du_lvdsenc_atomic_check(hdmienc->renc->lvds,
 					     adjusted_mode);
 
-	if (sfuncs->mode_fixup == NULL)
-		return 0;
-
-	return sfuncs->mode_fixup(encoder, mode, adjusted_mode) ? 0 : -EINVAL;
+	return 0;
 }
 
+
 static void rcar_du_hdmienc_mode_set(struct drm_encoder *encoder,
 				     struct drm_display_mode *mode,
 				     struct drm_display_mode *adjusted_mode)
 {
 	struct rcar_du_hdmienc *hdmienc = to_rcar_hdmienc(encoder);
-	const struct drm_encoder_slave_funcs *sfuncs = to_slave_funcs(encoder);
-
-	if (sfuncs->mode_set)
-		sfuncs->mode_set(encoder, mode, adjusted_mode);
 
 	rcar_du_crtc_route_output(encoder->crtc, hdmienc->renc->output);
 }
@@ -109,7 +90,6 @@ static void rcar_du_hdmienc_cleanup(struct drm_encoder *encoder)
 		rcar_du_hdmienc_disable(encoder);
 
 	drm_encoder_cleanup(encoder);
-	put_device(hdmienc->dev);
 }
 
 static const struct drm_encoder_funcs encoder_funcs = {
@@ -120,8 +100,7 @@ int rcar_du_hdmienc_init(struct rcar_du_device *rcdu,
 			 struct rcar_du_encoder *renc, struct device_node *np)
 {
 	struct drm_encoder *encoder = rcar_encoder_to_drm_encoder(renc);
-	struct drm_i2c_encoder_driver *driver;
-	struct i2c_client *i2c_slave;
+	struct drm_bridge *bridge;
 	struct rcar_du_hdmienc *hdmienc;
 	int ret;
 
@@ -129,44 +108,29 @@ int rcar_du_hdmienc_init(struct rcar_du_device *rcdu,
 	if (hdmienc == NULL)
 		return -ENOMEM;
 
-	/* Locate the slave I2C device and driver. */
-	i2c_slave = of_find_i2c_device_by_node(np);
-	if (!i2c_slave || !i2c_get_clientdata(i2c_slave)) {
-		dev_dbg(rcdu->dev,
-			"can't get I2C slave for %s, deferring probe\n",
-			of_node_full_name(np));
+	/* Locate drm bridge from the hdmi encoder DT node */
+	bridge = of_drm_find_bridge(np);
+	if (!bridge)
 		return -EPROBE_DEFER;
-	}
-
-	hdmienc->dev = &i2c_slave->dev;
-
-	if (hdmienc->dev->driver == NULL) {
-		dev_dbg(rcdu->dev,
-			"I2C slave %s not probed yet, deferring probe\n",
-			dev_name(hdmienc->dev));
-		ret = -EPROBE_DEFER;
-		goto error;
-	}
-
-	/* Initialize the slave encoder. */
-	driver = to_drm_i2c_encoder_driver(to_i2c_driver(hdmienc->dev->driver));
-	ret = driver->encoder_init(i2c_slave, rcdu->ddev, &renc->slave);
-	if (ret < 0)
-		goto error;
 
 	ret = drm_encoder_init(rcdu->ddev, encoder, &encoder_funcs,
 			       DRM_MODE_ENCODER_TMDS, NULL);
 	if (ret < 0)
-		goto error;
+		return ret;
 
 	drm_encoder_helper_add(encoder, &encoder_helper_funcs);
 
 	renc->hdmi = hdmienc;
 	hdmienc->renc = renc;
 
-	return 0;
+	/* Link drm_bridge to encoder */
+	bridge->encoder = encoder;
+
+	ret = drm_bridge_attach(rcdu->ddev, bridge);
+	if (ret) {
+		drm_encoder_cleanup(encoder);
+		return ret;
+	}
 
-error:
-	put_device(hdmienc->dev);
-	return ret;
+	return 0;
 }
diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig
index e48611e83c03..3c58669a06ce 100644
--- a/drivers/gpu/drm/rockchip/Kconfig
+++ b/drivers/gpu/drm/rockchip/Kconfig
@@ -4,11 +4,7 @@ config DRM_ROCKCHIP
 	depends on RESET_CONTROLLER
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_PANEL
-	select FB_CFB_FILLRECT
-	select FB_CFB_COPYAREA
-	select FB_CFB_IMAGEBLIT
 	select VIDEOMODE_HELPERS
 	help
 	  Choose this option if you have a Rockchip soc chipset.
diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
index c120172add5c..89aadbf465f8 100644
--- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
+++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
@@ -14,6 +14,7 @@
 
 #include <linux/component.h>
 #include <linux/mfd/syscon.h>
+#include <linux/of_device.h>
 #include <linux/of_graph.h>
 #include <linux/regmap.h>
 #include <linux/reset.h>
@@ -33,13 +34,28 @@
 #include "rockchip_drm_drv.h"
 #include "rockchip_drm_vop.h"
 
+#define RK3288_GRF_SOC_CON6		0x25c
+#define RK3288_EDP_LCDC_SEL		BIT(5)
+#define RK3399_GRF_SOC_CON20		0x6250
+#define RK3399_EDP_LCDC_SEL		BIT(5)
+
+#define HIWORD_UPDATE(val, mask)	(val | (mask) << 16)
+
 #define to_dp(nm)	container_of(nm, struct rockchip_dp_device, nm)
 
-/* dp grf register offset */
-#define GRF_SOC_CON6                            0x025c
-#define GRF_EDP_LCD_SEL_MASK                    BIT(5)
-#define GRF_EDP_SEL_VOP_LIT                     BIT(5)
-#define GRF_EDP_SEL_VOP_BIG                     0
+/**
+ * struct rockchip_dp_chip_data - splite the grf setting of kind of chips
+ * @lcdsel_grf_reg: grf register offset of lcdc select
+ * @lcdsel_big: reg value of selecting vop big for eDP
+ * @lcdsel_lit: reg value of selecting vop little for eDP
+ * @chip_type: specific chip type
+ */
+struct rockchip_dp_chip_data {
+	u32	lcdsel_grf_reg;
+	u32	lcdsel_big;
+	u32	lcdsel_lit;
+	u32	chip_type;
+};
 
 struct rockchip_dp_device {
 	struct drm_device        *drm_dev;
@@ -48,9 +64,12 @@ struct rockchip_dp_device {
 	struct drm_display_mode  mode;
 
 	struct clk               *pclk;
+	struct clk               *grfclk;
 	struct regmap            *grf;
 	struct reset_control     *rst;
 
+	const struct rockchip_dp_chip_data *data;
+
 	struct analogix_dp_plat_data plat_data;
 };
 
@@ -77,6 +96,7 @@ static int rockchip_dp_poweron(struct analogix_dp_plat_data *plat_data)
 	ret = rockchip_dp_pre_init(dp);
 	if (ret < 0) {
 		dev_err(dp->dev, "failed to dp pre init %d\n", ret);
+		clk_disable_unprepare(dp->pclk);
 		return ret;
 	}
 
@@ -92,6 +112,23 @@ static int rockchip_dp_powerdown(struct analogix_dp_plat_data *plat_data)
 	return 0;
 }
 
+static int rockchip_dp_get_modes(struct analogix_dp_plat_data *plat_data,
+				 struct drm_connector *connector)
+{
+	struct drm_display_info *di = &connector->display_info;
+	/* VOP couldn't output YUV video format for eDP rightly */
+	u32 mask = DRM_COLOR_FORMAT_YCRCB444 | DRM_COLOR_FORMAT_YCRCB422;
+
+	if ((di->color_formats & mask)) {
+		DRM_DEBUG_KMS("Swapping display color format from YUV to RGB\n");
+		di->color_formats &= ~mask;
+		di->color_formats |= DRM_COLOR_FORMAT_RGB444;
+		di->bpc = 8;
+	}
+
+	return 0;
+}
+
 static bool
 rockchip_dp_drm_encoder_mode_fixup(struct drm_encoder *encoder,
 				   const struct drm_display_mode *mode,
@@ -119,17 +156,23 @@ static void rockchip_dp_drm_encoder_enable(struct drm_encoder *encoder)
 		return;
 
 	if (ret)
-		val = GRF_EDP_SEL_VOP_LIT | (GRF_EDP_LCD_SEL_MASK << 16);
+		val = dp->data->lcdsel_lit;
 	else
-		val = GRF_EDP_SEL_VOP_BIG | (GRF_EDP_LCD_SEL_MASK << 16);
+		val = dp->data->lcdsel_big;
 
 	dev_dbg(dp->dev, "vop %s output to dp\n", (ret) ? "LIT" : "BIG");
 
-	ret = regmap_write(dp->grf, GRF_SOC_CON6, val);
-	if (ret != 0) {
-		dev_err(dp->dev, "Could not write to GRF: %d\n", ret);
+	ret = clk_prepare_enable(dp->grfclk);
+	if (ret < 0) {
+		dev_err(dp->dev, "failed to enable grfclk %d\n", ret);
 		return;
 	}
+
+	ret = regmap_write(dp->grf, dp->data->lcdsel_grf_reg, val);
+	if (ret != 0)
+		dev_err(dp->dev, "Could not write to GRF: %d\n", ret);
+
+	clk_disable_unprepare(dp->grfclk);
 }
 
 static void rockchip_dp_drm_encoder_nop(struct drm_encoder *encoder)
@@ -143,22 +186,29 @@ rockchip_dp_drm_encoder_atomic_check(struct drm_encoder *encoder,
 				      struct drm_connector_state *conn_state)
 {
 	struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc_state);
+	struct rockchip_dp_device *dp = to_dp(encoder);
+	int ret;
 
 	/*
-	 * FIXME(Yakir): driver should configure the CRTC output video
-	 * mode with the display information which indicated the monitor
-	 * support colorimetry.
-	 *
-	 * But don't know why the CRTC driver seems could only output the
-	 * RGBaaa rightly. For example, if connect the "innolux,n116bge"
-	 * eDP screen, EDID would indicated that screen only accepted the
-	 * 6bpc mode. But if I configure CRTC to RGB666 output, then eDP
-	 * screen would show a blue picture (RGB888 show a green picture).
-	 * But if I configure CTRC to RGBaaa, and eDP driver still keep
-	 * RGB666 input video mode, then screen would works prefect.
+	 * The hardware IC designed that VOP must output the RGB10 video
+	 * format to eDP controller, and if eDP panel only support RGB8,
+	 * then eDP controller should cut down the video data, not via VOP
+	 * controller, that's why we need to hardcode the VOP output mode
+	 * to RGA10 here.
 	 */
+
 	s->output_mode = ROCKCHIP_OUT_MODE_AAAA;
 	s->output_type = DRM_MODE_CONNECTOR_eDP;
+	if (dp->data->chip_type == RK3399_EDP) {
+		/*
+		 * For RK3399, VOP Lit must code the out mode to RGB888,
+		 * VOP Big must code the out mode to RGB10.
+		 */
+		ret = drm_of_encoder_active_endpoint_id(dp->dev->of_node,
+							encoder);
+		if (ret > 0)
+			s->output_mode = ROCKCHIP_OUT_MODE_P888;
+	}
 
 	return 0;
 }
@@ -192,6 +242,16 @@ static int rockchip_dp_init(struct rockchip_dp_device *dp)
 		return PTR_ERR(dp->grf);
 	}
 
+	dp->grfclk = devm_clk_get(dev, "grf");
+	if (PTR_ERR(dp->grfclk) == -ENOENT) {
+		dp->grfclk = NULL;
+	} else if (PTR_ERR(dp->grfclk) == -EPROBE_DEFER) {
+		return -EPROBE_DEFER;
+	} else if (IS_ERR(dp->grfclk)) {
+		dev_err(dev, "failed to get grf clock\n");
+		return PTR_ERR(dp->grfclk);
+	}
+
 	dp->pclk = devm_clk_get(dev, "pclk");
 	if (IS_ERR(dp->pclk)) {
 		dev_err(dev, "failed to get pclk property\n");
@@ -213,6 +273,7 @@ static int rockchip_dp_init(struct rockchip_dp_device *dp)
 	ret = rockchip_dp_pre_init(dp);
 	if (ret < 0) {
 		dev_err(dp->dev, "failed to pre init %d\n", ret);
+		clk_disable_unprepare(dp->pclk);
 		return ret;
 	}
 
@@ -246,6 +307,7 @@ static int rockchip_dp_bind(struct device *dev, struct device *master,
 			    void *data)
 {
 	struct rockchip_dp_device *dp = dev_get_drvdata(dev);
+	const struct rockchip_dp_chip_data *dp_data;
 	struct drm_device *drm_dev = data;
 	int ret;
 
@@ -256,10 +318,15 @@ static int rockchip_dp_bind(struct device *dev, struct device *master,
 	 */
 	dev_set_drvdata(dev, NULL);
 
+	dp_data = of_device_get_match_data(dev);
+	if (!dp_data)
+		return -ENODEV;
+
 	ret = rockchip_dp_init(dp);
 	if (ret < 0)
 		return ret;
 
+	dp->data = dp_data;
 	dp->drm_dev = drm_dev;
 
 	ret = rockchip_dp_drm_create_encoder(dp);
@@ -270,9 +337,10 @@ static int rockchip_dp_bind(struct device *dev, struct device *master,
 
 	dp->plat_data.encoder = &dp->encoder;
 
-	dp->plat_data.dev_type = RK3288_DP;
+	dp->plat_data.dev_type = dp->data->chip_type;
 	dp->plat_data.power_on = rockchip_dp_poweron;
 	dp->plat_data.power_off = rockchip_dp_powerdown;
+	dp->plat_data.get_modes = rockchip_dp_get_modes;
 
 	return analogix_dp_bind(dev, dp->drm_dev, &dp->plat_data);
 }
@@ -292,38 +360,33 @@ static int rockchip_dp_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *panel_node, *port, *endpoint;
+	struct drm_panel *panel = NULL;
 	struct rockchip_dp_device *dp;
-	struct drm_panel *panel;
 
 	port = of_graph_get_port_by_id(dev->of_node, 1);
-	if (!port) {
-		dev_err(dev, "can't find output port\n");
-		return -EINVAL;
-	}
-
-	endpoint = of_get_child_by_name(port, "endpoint");
-	of_node_put(port);
-	if (!endpoint) {
-		dev_err(dev, "no output endpoint found\n");
-		return -EINVAL;
-	}
-
-	panel_node = of_graph_get_remote_port_parent(endpoint);
-	of_node_put(endpoint);
-	if (!panel_node) {
-		dev_err(dev, "no output node found\n");
-		return -EINVAL;
-	}
-
-	panel = of_drm_find_panel(panel_node);
-	if (!panel) {
-		DRM_ERROR("failed to find panel\n");
+	if (port) {
+		endpoint = of_get_child_by_name(port, "endpoint");
+		of_node_put(port);
+		if (!endpoint) {
+			dev_err(dev, "no output endpoint found\n");
+			return -EINVAL;
+		}
+
+		panel_node = of_graph_get_remote_port_parent(endpoint);
+		of_node_put(endpoint);
+		if (!panel_node) {
+			dev_err(dev, "no output node found\n");
+			return -EINVAL;
+		}
+
+		panel = of_drm_find_panel(panel_node);
 		of_node_put(panel_node);
-		return -EPROBE_DEFER;
+		if (!panel) {
+			DRM_ERROR("failed to find panel\n");
+			return -EPROBE_DEFER;
+		}
 	}
 
-	of_node_put(panel_node);
-
 	dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL);
 	if (!dp)
 		return -ENOMEM;
@@ -356,8 +419,23 @@ static const struct dev_pm_ops rockchip_dp_pm_ops = {
 #endif
 };
 
+static const struct rockchip_dp_chip_data rk3399_edp = {
+	.lcdsel_grf_reg = RK3399_GRF_SOC_CON20,
+	.lcdsel_big = HIWORD_UPDATE(0, RK3399_EDP_LCDC_SEL),
+	.lcdsel_lit = HIWORD_UPDATE(RK3399_EDP_LCDC_SEL, RK3399_EDP_LCDC_SEL),
+	.chip_type = RK3399_EDP,
+};
+
+static const struct rockchip_dp_chip_data rk3288_dp = {
+	.lcdsel_grf_reg = RK3288_GRF_SOC_CON6,
+	.lcdsel_big = HIWORD_UPDATE(0, RK3288_EDP_LCDC_SEL),
+	.lcdsel_lit = HIWORD_UPDATE(RK3288_EDP_LCDC_SEL, RK3288_EDP_LCDC_SEL),
+	.chip_type = RK3288_DP,
+};
+
 static const struct of_device_id rockchip_dp_dt_ids[] = {
-	{.compatible = "rockchip,rk3288-dp",},
+	{.compatible = "rockchip,rk3288-dp", .data = &rk3288_dp },
+	{.compatible = "rockchip,rk3399-edp", .data = &rk3399_edp },
 	{}
 };
 MODULE_DEVICE_TABLE(of, rockchip_dp_dt_ids);
diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index 801110f65a63..0665fb915579 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -15,7 +15,6 @@
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_encoder_slave.h>
 #include <drm/bridge/dw_hdmi.h>
 
 #include "rockchip_drm_drv.h"
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index d665fb04d264..a822d49a255a 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -79,7 +79,7 @@ int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
 	int pipe = drm_crtc_index(crtc);
 	struct rockchip_drm_private *priv = crtc->dev->dev_private;
 
-	if (pipe > ROCKCHIP_MAX_CRTC)
+	if (pipe >= ROCKCHIP_MAX_CRTC)
 		return -EINVAL;
 
 	priv->crtc_funcs[pipe] = crtc_funcs;
@@ -92,7 +92,7 @@ void rockchip_unregister_crtc_funcs(struct drm_crtc *crtc)
 	int pipe = drm_crtc_index(crtc);
 	struct rockchip_drm_private *priv = crtc->dev->dev_private;
 
-	if (pipe > ROCKCHIP_MAX_CRTC)
+	if (pipe >= ROCKCHIP_MAX_CRTC)
 		return;
 
 	priv->crtc_funcs[pipe] = NULL;
@@ -257,7 +257,7 @@ static void rockchip_drm_unbind(struct device *dev)
 	dev_set_drvdata(dev, NULL);
 }
 
-void rockchip_drm_lastclose(struct drm_device *dev)
+static void rockchip_drm_lastclose(struct drm_device *dev)
 {
 	struct rockchip_drm_private *priv = dev->dev_private;
 
@@ -433,6 +433,7 @@ static int rockchip_drm_platform_probe(struct platform_device *pdev)
 			is_support_iommu = false;
 		}
 
+		of_node_put(iommu);
 		component_match_add(dev, &match, compare_of, port->parent);
 		of_node_put(port);
 	}
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index 20f12bc5a386..55c52734c52d 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -20,6 +20,7 @@
 #include <drm/drm_crtc_helper.h>
 
 #include "rockchip_drm_drv.h"
+#include "rockchip_drm_fb.h"
 #include "rockchip_drm_gem.h"
 
 #define to_rockchip_fb(x) container_of(x, struct rockchip_drm_fb, fb)
@@ -43,14 +44,10 @@ struct drm_gem_object *rockchip_fb_get_gem_obj(struct drm_framebuffer *fb,
 static void rockchip_drm_fb_destroy(struct drm_framebuffer *fb)
 {
 	struct rockchip_drm_fb *rockchip_fb = to_rockchip_fb(fb);
-	struct drm_gem_object *obj;
 	int i;
 
-	for (i = 0; i < ROCKCHIP_MAX_FB_BUFFER; i++) {
-		obj = rockchip_fb->obj[i];
-		if (obj)
-			drm_gem_object_unreference_unlocked(obj);
-	}
+	for (i = 0; i < ROCKCHIP_MAX_FB_BUFFER; i++)
+		drm_gem_object_unreference_unlocked(rockchip_fb->obj[i]);
 
 	drm_framebuffer_cleanup(fb);
 	kfree(rockchip_fb);
@@ -245,7 +242,7 @@ rockchip_atomic_commit_tail(struct drm_atomic_state *state)
 	drm_atomic_helper_cleanup_planes(dev, state);
 }
 
-struct drm_mode_config_helper_funcs rockchip_mode_config_helpers = {
+static struct drm_mode_config_helper_funcs rockchip_mode_config_helpers = {
 	.atomic_commit_tail = rockchip_atomic_commit_tail,
 };
 
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 6255e5bcd954..91305eb7d312 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -328,9 +328,9 @@ static void scl_vop_cal_scl_fac(struct vop *vop, const struct vop_win_data *win,
 			    scl_cal_scale2(src_h, dst_h));
 		if (is_yuv) {
 			VOP_SCL_SET(vop, win, scale_cbcr_x,
-				    scl_cal_scale2(src_w, dst_w));
+				    scl_cal_scale2(cbcr_src_w, dst_w));
 			VOP_SCL_SET(vop, win, scale_cbcr_y,
-				    scl_cal_scale2(src_h, dst_h));
+				    scl_cal_scale2(cbcr_src_h, dst_h));
 		}
 		return;
 	}
@@ -798,7 +798,7 @@ static const struct drm_plane_helper_funcs plane_helper_funcs = {
 	.atomic_disable = vop_plane_atomic_disable,
 };
 
-void vop_atomic_plane_reset(struct drm_plane *plane)
+static void vop_atomic_plane_reset(struct drm_plane *plane)
 {
 	struct vop_plane_state *vop_plane_state =
 					to_vop_plane_state(plane->state);
@@ -815,7 +815,7 @@ void vop_atomic_plane_reset(struct drm_plane *plane)
 	plane->state->plane = plane;
 }
 
-struct drm_plane_state *
+static struct drm_plane_state *
 vop_atomic_plane_duplicate_state(struct drm_plane *plane)
 {
 	struct vop_plane_state *old_vop_plane_state;
@@ -1052,6 +1052,17 @@ static void vop_crtc_destroy(struct drm_crtc *crtc)
 	drm_crtc_cleanup(crtc);
 }
 
+static void vop_crtc_reset(struct drm_crtc *crtc)
+{
+	if (crtc->state)
+		__drm_atomic_helper_crtc_destroy_state(crtc->state);
+	kfree(crtc->state);
+
+	crtc->state = kzalloc(sizeof(struct rockchip_crtc_state), GFP_KERNEL);
+	if (crtc->state)
+		crtc->state->crtc = crtc;
+}
+
 static struct drm_crtc_state *vop_crtc_duplicate_state(struct drm_crtc *crtc)
 {
 	struct rockchip_crtc_state *rockchip_state;
@@ -1077,7 +1088,7 @@ static const struct drm_crtc_funcs vop_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
 	.page_flip = drm_atomic_helper_page_flip,
 	.destroy = vop_crtc_destroy,
-	.reset = drm_atomic_helper_crtc_reset,
+	.reset = vop_crtc_reset,
 	.atomic_duplicate_state = vop_crtc_duplicate_state,
 	.atomic_destroy_state = vop_crtc_destroy_state,
 };
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
index 3166b46a5893..919992cdc97e 100644
--- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
@@ -190,7 +190,7 @@ static const struct vop_data rk3288_vop = {
 	.win_size = ARRAY_SIZE(rk3288_vop_win_data),
 };
 
-static const struct vop_scl_regs rk3066_win_scl = {
+static const struct vop_scl_regs rk3036_win_scl = {
 	.scale_yrgb_x = VOP_REG(RK3036_WIN0_SCL_FACTOR_YRGB, 0xffff, 0x0),
 	.scale_yrgb_y = VOP_REG(RK3036_WIN0_SCL_FACTOR_YRGB, 0xffff, 16),
 	.scale_cbcr_x = VOP_REG(RK3036_WIN0_SCL_FACTOR_CBR, 0xffff, 0x0),
@@ -198,7 +198,7 @@ static const struct vop_scl_regs rk3066_win_scl = {
 };
 
 static const struct vop_win_phy rk3036_win0_data = {
-	.scl = &rk3066_win_scl,
+	.scl = &rk3036_win_scl,
 	.data_formats = formats_win_full,
 	.nformats = ARRAY_SIZE(formats_win_full),
 	.enable = VOP_REG(RK3036_SYS_CTRL, 0x1, 0),
@@ -210,6 +210,7 @@ static const struct vop_win_phy rk3036_win0_data = {
 	.yrgb_mst = VOP_REG(RK3036_WIN0_YRGB_MST, 0xffffffff, 0),
 	.uv_mst = VOP_REG(RK3036_WIN0_CBR_MST, 0xffffffff, 0),
 	.yrgb_vir = VOP_REG(RK3036_WIN0_VIR, 0xffff, 0),
+	.uv_vir = VOP_REG(RK3036_WIN0_VIR, 0x1fff, 16),
 };
 
 static const struct vop_win_phy rk3036_win1_data = {
@@ -299,7 +300,7 @@ static int vop_remove(struct platform_device *pdev)
 	return 0;
 }
 
-struct platform_driver vop_platform_driver = {
+static struct platform_driver vop_platform_driver = {
 	.probe = vop_probe,
 	.remove = vop_remove,
 	.driver = {
diff --git a/drivers/gpu/drm/shmobile/Kconfig b/drivers/gpu/drm/shmobile/Kconfig
index 8d17d00ddb4b..c987c826daa3 100644
--- a/drivers/gpu/drm/shmobile/Kconfig
+++ b/drivers/gpu/drm/shmobile/Kconfig
@@ -6,7 +6,6 @@ config DRM_SHMOBILE
 	select BACKLIGHT_CLASS_DEVICE
 	select BACKLIGHT_LCD_SUPPORT
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_KMS_CMA_HELPER
 	select DRM_GEM_CMA_HELPER
 	help
diff --git a/drivers/gpu/drm/sti/Kconfig b/drivers/gpu/drm/sti/Kconfig
index 5ad43a1bb260..494ab257f77c 100644
--- a/drivers/gpu/drm/sti/Kconfig
+++ b/drivers/gpu/drm/sti/Kconfig
@@ -7,5 +7,6 @@ config DRM_STI
 	select DRM_KMS_CMA_HELPER
 	select DRM_PANEL
 	select FW_LOADER
+	select SND_SOC_HDMI_CODEC if SND_SOC
 	help
 	  Choose this option to enable DRM on STM stiH41x chipset
diff --git a/drivers/gpu/drm/sti/sti_awg_utils.c b/drivers/gpu/drm/sti/sti_awg_utils.c
index a516eb869f6f..2da7d6866d5d 100644
--- a/drivers/gpu/drm/sti/sti_awg_utils.c
+++ b/drivers/gpu/drm/sti/sti_awg_utils.c
@@ -6,6 +6,8 @@
 
 #include "sti_awg_utils.h"
 
+#define AWG_DELAY (-5)
+
 #define AWG_OPCODE_OFFSET 10
 #define AWG_MAX_ARG       0x3ff
 
@@ -125,7 +127,7 @@ static int awg_generate_line_signal(
 		val = timing->blanking_level;
 		ret |= awg_generate_instr(RPLSET, val, 0, 0, fwparams);
 
-		val = timing->trailing_pixels - 1;
+		val = timing->trailing_pixels - 1 + AWG_DELAY;
 		ret |= awg_generate_instr(SKIP, val, 0, 0, fwparams);
 	}
 
diff --git a/drivers/gpu/drm/sti/sti_compositor.c b/drivers/gpu/drm/sti/sti_compositor.c
index 794148ff0e57..134201ecc6fd 100644
--- a/drivers/gpu/drm/sti/sti_compositor.c
+++ b/drivers/gpu/drm/sti/sti_compositor.c
@@ -254,12 +254,12 @@ static int sti_compositor_probe(struct platform_device *pdev)
 	}
 
 	/* Get reset resources */
-	compo->rst_main = devm_reset_control_get(dev, "compo-main");
+	compo->rst_main = devm_reset_control_get_shared(dev, "compo-main");
 	/* Take compo main out of reset */
 	if (!IS_ERR(compo->rst_main))
 		reset_control_deassert(compo->rst_main);
 
-	compo->rst_aux = devm_reset_control_get(dev, "compo-aux");
+	compo->rst_aux = devm_reset_control_get_shared(dev, "compo-aux");
 	/* Take compo aux out of reset */
 	if (!IS_ERR(compo->rst_aux))
 		reset_control_deassert(compo->rst_aux);
@@ -267,10 +267,12 @@ static int sti_compositor_probe(struct platform_device *pdev)
 	vtg_np = of_parse_phandle(pdev->dev.of_node, "st,vtg", 0);
 	if (vtg_np)
 		compo->vtg_main = of_vtg_find(vtg_np);
+	of_node_put(vtg_np);
 
 	vtg_np = of_parse_phandle(pdev->dev.of_node, "st,vtg", 1);
 	if (vtg_np)
 		compo->vtg_aux = of_vtg_find(vtg_np);
+	of_node_put(vtg_np);
 
 	platform_set_drvdata(pdev, compo);
 
diff --git a/drivers/gpu/drm/sti/sti_crtc.c b/drivers/gpu/drm/sti/sti_crtc.c
index 7fab3af7473b..c7d734dc3cf4 100644
--- a/drivers/gpu/drm/sti/sti_crtc.c
+++ b/drivers/gpu/drm/sti/sti_crtc.c
@@ -23,22 +23,11 @@
 static void sti_crtc_enable(struct drm_crtc *crtc)
 {
 	struct sti_mixer *mixer = to_sti_mixer(crtc);
-	struct device *dev = mixer->dev;
-	struct sti_compositor *compo = dev_get_drvdata(dev);
 
 	DRM_DEBUG_DRIVER("\n");
 
 	mixer->status = STI_MIXER_READY;
 
-	/* Prepare and enable the compo IP clock */
-	if (mixer->id == STI_MIXER_MAIN) {
-		if (clk_prepare_enable(compo->clk_compo_main))
-			DRM_INFO("Failed to prepare/enable compo_main clk\n");
-	} else {
-		if (clk_prepare_enable(compo->clk_compo_aux))
-			DRM_INFO("Failed to prepare/enable compo_aux clk\n");
-	}
-
 	drm_crtc_vblank_on(crtc);
 }
 
@@ -57,9 +46,8 @@ sti_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode)
 	struct sti_mixer *mixer = to_sti_mixer(crtc);
 	struct device *dev = mixer->dev;
 	struct sti_compositor *compo = dev_get_drvdata(dev);
-	struct clk *clk;
+	struct clk *compo_clk, *pix_clk;
 	int rate = mode->clock * 1000;
-	int res;
 
 	DRM_DEBUG_KMS("CRTC:%d (%s) mode:%d (%s)\n",
 		      crtc->base.id, sti_mixer_to_str(mixer),
@@ -74,32 +62,46 @@ sti_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode)
 		      mode->vsync_start, mode->vsync_end,
 		      mode->vtotal, mode->type, mode->flags);
 
-	/* Set rate and prepare/enable pixel clock */
-	if (mixer->id == STI_MIXER_MAIN)
-		clk = compo->clk_pix_main;
-	else
-		clk = compo->clk_pix_aux;
+	if (mixer->id == STI_MIXER_MAIN) {
+		compo_clk = compo->clk_compo_main;
+		pix_clk = compo->clk_pix_main;
+	} else {
+		compo_clk = compo->clk_compo_aux;
+		pix_clk = compo->clk_pix_aux;
+	}
+
+	/* Prepare and enable the compo IP clock */
+	if (clk_prepare_enable(compo_clk)) {
+		DRM_INFO("Failed to prepare/enable compositor clk\n");
+		goto compo_error;
+	}
 
-	res = clk_set_rate(clk, rate);
-	if (res < 0) {
+	/* Set rate and prepare/enable pixel clock */
+	if (clk_set_rate(pix_clk, rate) < 0) {
 		DRM_ERROR("Cannot set rate (%dHz) for pix clk\n", rate);
-		return -EINVAL;
+		goto pix_error;
 	}
-	if (clk_prepare_enable(clk)) {
+	if (clk_prepare_enable(pix_clk)) {
 		DRM_ERROR("Failed to prepare/enable pix clk\n");
-		return -EINVAL;
+		goto pix_error;
 	}
 
 	sti_vtg_set_config(mixer->id == STI_MIXER_MAIN ?
 			compo->vtg_main : compo->vtg_aux, &crtc->mode);
 
-	res = sti_mixer_active_video_area(mixer, &crtc->mode);
-	if (res) {
+	if (sti_mixer_active_video_area(mixer, &crtc->mode)) {
 		DRM_ERROR("Can't set active video area\n");
-		return -EINVAL;
+		goto mixer_error;
 	}
 
-	return res;
+	return 0;
+
+mixer_error:
+	clk_disable_unprepare(pix_clk);
+pix_error:
+	clk_disable_unprepare(compo_clk);
+compo_error:
+	return -EINVAL;
 }
 
 static void sti_crtc_disable(struct drm_crtc *crtc)
@@ -130,7 +132,6 @@ static void sti_crtc_disable(struct drm_crtc *crtc)
 static void
 sti_crtc_mode_set_nofb(struct drm_crtc *crtc)
 {
-	sti_crtc_enable(crtc);
 	sti_crtc_mode_set(crtc, &crtc->state->adjusted_mode);
 }
 
@@ -221,9 +222,7 @@ static void sti_crtc_atomic_flush(struct drm_crtc *crtc,
 static const struct drm_crtc_helper_funcs sti_crtc_helper_funcs = {
 	.enable = sti_crtc_enable,
 	.disable = sti_crtc_disabling,
-	.mode_set = drm_helper_crtc_mode_set,
 	.mode_set_nofb = sti_crtc_mode_set_nofb,
-	.mode_set_base = drm_helper_crtc_mode_set_base,
 	.atomic_begin = sti_crtc_atomic_begin,
 	.atomic_flush = sti_crtc_atomic_flush,
 };
diff --git a/drivers/gpu/drm/sti/sti_dvo.c b/drivers/gpu/drm/sti/sti_dvo.c
index ec3108074350..00881eb4536e 100644
--- a/drivers/gpu/drm/sti/sti_dvo.c
+++ b/drivers/gpu/drm/sti/sti_dvo.c
@@ -580,6 +580,7 @@ static int sti_dvo_probe(struct platform_device *pdev)
 	dvo->panel_node = of_parse_phandle(np, "sti,panel", 0);
 	if (!dvo->panel_node)
 		DRM_ERROR("No panel associated to the dvo output\n");
+	of_node_put(dvo->panel_node);
 
 	platform_set_drvdata(pdev, dvo);
 
diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c
index 8d1402b245bf..fedc17f98d9b 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.c
+++ b/drivers/gpu/drm/sti/sti_hdmi.c
@@ -18,6 +18,8 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 
+#include <sound/hdmi-codec.h>
+
 #include "sti_hdmi.h"
 #include "sti_hdmi_tx3g4c28phy.h"
 #include "sti_hdmi_tx3g0c55phy.h"
@@ -35,6 +37,8 @@
 #define HDMI_DFLT_CHL0_DAT              0x0110
 #define HDMI_DFLT_CHL1_DAT              0x0114
 #define HDMI_DFLT_CHL2_DAT              0x0118
+#define HDMI_AUDIO_CFG                  0x0200
+#define HDMI_SPDIF_FIFO_STATUS          0x0204
 #define HDMI_SW_DI_1_HEAD_WORD          0x0210
 #define HDMI_SW_DI_1_PKT_WORD0          0x0214
 #define HDMI_SW_DI_1_PKT_WORD1          0x0218
@@ -44,6 +48,9 @@
 #define HDMI_SW_DI_1_PKT_WORD5          0x0228
 #define HDMI_SW_DI_1_PKT_WORD6          0x022C
 #define HDMI_SW_DI_CFG                  0x0230
+#define HDMI_SAMPLE_FLAT_MASK           0x0244
+#define HDMI_AUDN                       0x0400
+#define HDMI_AUD_CTS                    0x0404
 #define HDMI_SW_DI_2_HEAD_WORD          0x0600
 #define HDMI_SW_DI_2_PKT_WORD0          0x0604
 #define HDMI_SW_DI_2_PKT_WORD1          0x0608
@@ -103,6 +110,7 @@
 #define HDMI_INT_DLL_LCK                BIT(5)
 #define HDMI_INT_NEW_FRAME              BIT(6)
 #define HDMI_INT_GENCTRL_PKT            BIT(7)
+#define HDMI_INT_AUDIO_FIFO_XRUN        BIT(8)
 #define HDMI_INT_SINK_TERM_PRESENT      BIT(11)
 
 #define HDMI_DEFAULT_INT (HDMI_INT_SINK_TERM_PRESENT \
@@ -111,6 +119,7 @@
 			| HDMI_INT_GLOBAL)
 
 #define HDMI_WORKING_INT (HDMI_INT_SINK_TERM_PRESENT \
+			| HDMI_INT_AUDIO_FIFO_XRUN \
 			| HDMI_INT_GENCTRL_PKT \
 			| HDMI_INT_NEW_FRAME \
 			| HDMI_INT_DLL_LCK \
@@ -121,6 +130,27 @@
 
 #define HDMI_STA_SW_RST                 BIT(1)
 
+#define HDMI_AUD_CFG_8CH		BIT(0)
+#define HDMI_AUD_CFG_SPDIF_DIV_2	BIT(1)
+#define HDMI_AUD_CFG_SPDIF_DIV_3	BIT(2)
+#define HDMI_AUD_CFG_SPDIF_CLK_DIV_4	(BIT(1) | BIT(2))
+#define HDMI_AUD_CFG_CTS_CLK_256FS	BIT(12)
+#define HDMI_AUD_CFG_DTS_INVALID	BIT(16)
+#define HDMI_AUD_CFG_ONE_BIT_INVALID	(BIT(18) | BIT(19) | BIT(20) |  BIT(21))
+#define HDMI_AUD_CFG_CH12_VALID	BIT(28)
+#define HDMI_AUD_CFG_CH34_VALID	BIT(29)
+#define HDMI_AUD_CFG_CH56_VALID	BIT(30)
+#define HDMI_AUD_CFG_CH78_VALID	BIT(31)
+
+/* sample flat mask */
+#define HDMI_SAMPLE_FLAT_NO	 0
+#define HDMI_SAMPLE_FLAT_SP0 BIT(0)
+#define HDMI_SAMPLE_FLAT_SP1 BIT(1)
+#define HDMI_SAMPLE_FLAT_SP2 BIT(2)
+#define HDMI_SAMPLE_FLAT_SP3 BIT(3)
+#define HDMI_SAMPLE_FLAT_ALL (HDMI_SAMPLE_FLAT_SP0 | HDMI_SAMPLE_FLAT_SP1 |\
+			      HDMI_SAMPLE_FLAT_SP2 | HDMI_SAMPLE_FLAT_SP3)
+
 #define HDMI_INFOFRAME_HEADER_TYPE(x)    (((x) & 0xff) <<  0)
 #define HDMI_INFOFRAME_HEADER_VERSION(x) (((x) & 0xff) <<  8)
 #define HDMI_INFOFRAME_HEADER_LEN(x)     (((x) & 0x0f) << 16)
@@ -171,6 +201,10 @@ static irqreturn_t hdmi_irq_thread(int irq, void *arg)
 		wake_up_interruptible(&hdmi->wait_event);
 	}
 
+	/* Audio FIFO underrun IRQ */
+	if (hdmi->irq_status & HDMI_INT_AUDIO_FIFO_XRUN)
+		DRM_INFO("Warning: audio FIFO underrun occurs!");
+
 	return IRQ_HANDLED;
 }
 
@@ -441,26 +475,29 @@ static int hdmi_avi_infoframe_config(struct sti_hdmi *hdmi)
  */
 static int hdmi_audio_infoframe_config(struct sti_hdmi *hdmi)
 {
-	struct hdmi_audio_infoframe infofame;
+	struct hdmi_audio_params *audio = &hdmi->audio;
 	u8 buffer[HDMI_INFOFRAME_SIZE(AUDIO)];
-	int ret;
-
-	ret = hdmi_audio_infoframe_init(&infofame);
-	if (ret < 0) {
-		DRM_ERROR("failed to setup audio infoframe: %d\n", ret);
-		return ret;
-	}
-
-	infofame.channels = 2;
-
-	ret = hdmi_audio_infoframe_pack(&infofame, buffer, sizeof(buffer));
-	if (ret < 0) {
-		DRM_ERROR("failed to pack audio infoframe: %d\n", ret);
-		return ret;
+	int ret, val;
+
+	DRM_DEBUG_DRIVER("enter %s, AIF %s\n", __func__,
+			 audio->enabled ? "enable" : "disable");
+	if (audio->enabled) {
+		/* set audio parameters stored*/
+		ret = hdmi_audio_infoframe_pack(&audio->cea, buffer,
+						sizeof(buffer));
+		if (ret < 0) {
+			DRM_ERROR("failed to pack audio infoframe: %d\n", ret);
+			return ret;
+		}
+		hdmi_infoframe_write_infopack(hdmi, buffer, ret);
+	} else {
+		/*disable audio info frame transmission */
+		val = hdmi_read(hdmi, HDMI_SW_DI_CFG);
+		val &= ~HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK,
+					     HDMI_IFRAME_SLOT_AUDIO);
+		hdmi_write(hdmi, val, HDMI_SW_DI_CFG);
 	}
 
-	hdmi_infoframe_write_infopack(hdmi, buffer, ret);
-
 	return 0;
 }
 
@@ -650,6 +687,10 @@ static int hdmi_dbg_show(struct seq_file *s, void *data)
 	DBGFS_DUMP("", HDMI_SW_DI_CFG);
 	hdmi_dbg_sw_di_cfg(s, hdmi_read(hdmi, HDMI_SW_DI_CFG));
 
+	DBGFS_DUMP("\n", HDMI_AUDIO_CFG);
+	DBGFS_DUMP("\n", HDMI_SPDIF_FIFO_STATUS);
+	DBGFS_DUMP("\n", HDMI_AUDN);
+
 	seq_printf(s, "\n AVI Infoframe (Data Island slot N=%d):",
 		   HDMI_IFRAME_SLOT_AVI);
 	DBGFS_DUMP_DI(HDMI_SW_DI_N_HEAD_WORD, HDMI_IFRAME_SLOT_AVI);
@@ -854,6 +895,7 @@ static int sti_hdmi_connector_get_modes(struct drm_connector *connector)
 
 	count = drm_add_edid_modes(connector, edid);
 	drm_mode_connector_update_edid_property(connector, edid);
+	drm_edid_to_eld(connector, edid);
 
 	kfree(edid);
 	return count;
@@ -1036,6 +1078,207 @@ static struct drm_encoder *sti_hdmi_find_encoder(struct drm_device *dev)
 	return NULL;
 }
 
+/**
+ * sti_hdmi_audio_get_non_coherent_n() - get N parameter for non-coherent
+ * clocks. None-coherent clocks means that audio and TMDS clocks have not the
+ * same source (drifts between clocks). In this case assumption is that CTS is
+ * automatically calculated by hardware.
+ *
+ * @audio_fs: audio frame clock frequency in Hz
+ *
+ * Values computed are based on table described in HDMI specification 1.4b
+ *
+ * Returns n value.
+ */
+static int sti_hdmi_audio_get_non_coherent_n(unsigned int audio_fs)
+{
+	unsigned int n;
+
+	switch (audio_fs) {
+	case 32000:
+		n = 4096;
+		break;
+	case 44100:
+		n = 6272;
+		break;
+	case 48000:
+		n = 6144;
+		break;
+	case 88200:
+		n = 6272 * 2;
+		break;
+	case 96000:
+		n = 6144 * 2;
+		break;
+	case 176400:
+		n = 6272 * 4;
+		break;
+	case 192000:
+		n = 6144 * 4;
+		break;
+	default:
+		/* Not pre-defined, recommended value: 128 * fs / 1000 */
+		n = (audio_fs * 128) / 1000;
+	}
+
+	return n;
+}
+
+static int hdmi_audio_configure(struct sti_hdmi *hdmi,
+				struct hdmi_audio_params *params)
+{
+	int audio_cfg, n;
+	struct hdmi_audio_infoframe *info = &params->cea;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	if (!hdmi->enabled)
+		return 0;
+
+	/* update N parameter */
+	n = sti_hdmi_audio_get_non_coherent_n(params->sample_rate);
+
+	DRM_DEBUG_DRIVER("Audio rate = %d Hz, TMDS clock = %d Hz, n = %d\n",
+			 params->sample_rate, hdmi->mode.clock * 1000, n);
+	hdmi_write(hdmi, n, HDMI_AUDN);
+
+	/* update HDMI registers according to configuration */
+	audio_cfg = HDMI_AUD_CFG_SPDIF_DIV_2 | HDMI_AUD_CFG_DTS_INVALID |
+		    HDMI_AUD_CFG_ONE_BIT_INVALID;
+
+	switch (info->channels) {
+	case 8:
+		audio_cfg |= HDMI_AUD_CFG_CH78_VALID;
+	case 6:
+		audio_cfg |= HDMI_AUD_CFG_CH56_VALID;
+	case 4:
+		audio_cfg |= HDMI_AUD_CFG_CH34_VALID | HDMI_AUD_CFG_8CH;
+	case 2:
+		audio_cfg |= HDMI_AUD_CFG_CH12_VALID;
+		break;
+	default:
+		DRM_ERROR("ERROR: Unsupported number of channels (%d)!\n",
+			  info->channels);
+		return -EINVAL;
+	}
+
+	hdmi_write(hdmi, audio_cfg, HDMI_AUDIO_CFG);
+
+	hdmi->audio = *params;
+
+	return hdmi_audio_infoframe_config(hdmi);
+}
+
+static void hdmi_audio_shutdown(struct device *dev, void *data)
+{
+	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
+	int audio_cfg;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	/* disable audio */
+	audio_cfg = HDMI_AUD_CFG_SPDIF_DIV_2 | HDMI_AUD_CFG_DTS_INVALID |
+		    HDMI_AUD_CFG_ONE_BIT_INVALID;
+	hdmi_write(hdmi, audio_cfg, HDMI_AUDIO_CFG);
+
+	hdmi->audio.enabled = 0;
+	hdmi_audio_infoframe_config(hdmi);
+}
+
+static int hdmi_audio_hw_params(struct device *dev,
+				void *data,
+				struct hdmi_codec_daifmt *daifmt,
+				struct hdmi_codec_params *params)
+{
+	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
+	int ret;
+	struct hdmi_audio_params audio = {
+		.sample_width = params->sample_width,
+		.sample_rate = params->sample_rate,
+		.cea = params->cea,
+	};
+
+	DRM_DEBUG_DRIVER("\n");
+
+	if (!hdmi->enabled)
+		return 0;
+
+	if ((daifmt->fmt != HDMI_I2S) || daifmt->bit_clk_inv ||
+	    daifmt->frame_clk_inv || daifmt->bit_clk_master ||
+	    daifmt->frame_clk_master) {
+		dev_err(dev, "%s: Bad flags %d %d %d %d\n", __func__,
+			daifmt->bit_clk_inv, daifmt->frame_clk_inv,
+			daifmt->bit_clk_master,
+			daifmt->frame_clk_master);
+		return -EINVAL;
+	}
+
+	audio.enabled = 1;
+
+	ret = hdmi_audio_configure(hdmi, &audio);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int hdmi_audio_digital_mute(struct device *dev, void *data, bool enable)
+{
+	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
+
+	DRM_DEBUG_DRIVER("%s\n", enable ? "enable" : "disable");
+
+	if (enable)
+		hdmi_write(hdmi, HDMI_SAMPLE_FLAT_ALL, HDMI_SAMPLE_FLAT_MASK);
+	else
+		hdmi_write(hdmi, HDMI_SAMPLE_FLAT_NO, HDMI_SAMPLE_FLAT_MASK);
+
+	return 0;
+}
+
+static int hdmi_audio_get_eld(struct device *dev, void *data, uint8_t *buf, size_t len)
+{
+	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
+	struct drm_connector *connector = hdmi->drm_connector;
+
+	DRM_DEBUG_DRIVER("\n");
+	memcpy(buf, connector->eld, min(sizeof(connector->eld), len));
+
+	return 0;
+}
+
+static const struct hdmi_codec_ops audio_codec_ops = {
+	.hw_params = hdmi_audio_hw_params,
+	.audio_shutdown = hdmi_audio_shutdown,
+	.digital_mute = hdmi_audio_digital_mute,
+	.get_eld = hdmi_audio_get_eld,
+};
+
+static int sti_hdmi_register_audio_driver(struct device *dev,
+					  struct sti_hdmi *hdmi)
+{
+	struct hdmi_codec_pdata codec_data = {
+		.ops = &audio_codec_ops,
+		.max_i2s_channels = 8,
+		.i2s = 1,
+	};
+
+	DRM_DEBUG_DRIVER("\n");
+
+	hdmi->audio.enabled = 0;
+
+	hdmi->audio_pdev = platform_device_register_data(
+		dev, HDMI_CODEC_DRV_NAME, PLATFORM_DEVID_AUTO,
+		&codec_data, sizeof(codec_data));
+
+	if (IS_ERR(hdmi->audio_pdev))
+		return PTR_ERR(hdmi->audio_pdev);
+
+	DRM_INFO("%s Driver bound %s\n", HDMI_CODEC_DRV_NAME, dev_name(dev));
+
+	return 0;
+}
+
 static int sti_hdmi_bind(struct device *dev, struct device *master, void *data)
 {
 	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
@@ -1082,12 +1325,27 @@ static int sti_hdmi_bind(struct device *dev, struct device *master, void *data)
 	/* initialise property */
 	sti_hdmi_connector_init_property(drm_dev, drm_connector);
 
+	hdmi->drm_connector = drm_connector;
+
 	err = drm_mode_connector_attach_encoder(drm_connector, encoder);
 	if (err) {
 		DRM_ERROR("Failed to attach a connector to a encoder\n");
 		goto err_sysfs;
 	}
 
+	err = sti_hdmi_register_audio_driver(dev, hdmi);
+	if (err) {
+		DRM_ERROR("Failed to attach an audio codec\n");
+		goto err_sysfs;
+	}
+
+	/* Initialize audio infoframe */
+	err = hdmi_audio_infoframe_init(&hdmi->audio.cea);
+	if (err) {
+		DRM_ERROR("Failed to init audio infoframe\n");
+		goto err_sysfs;
+	}
+
 	/* Enable default interrupts */
 	hdmi_write(hdmi, HDMI_DEFAULT_INT, HDMI_INT_EN);
 
@@ -1095,6 +1353,7 @@ static int sti_hdmi_bind(struct device *dev, struct device *master, void *data)
 
 err_sysfs:
 	drm_bridge_remove(bridge);
+	hdmi->drm_connector = NULL;
 	return -EINVAL;
 }
 
@@ -1244,6 +1503,8 @@ static int sti_hdmi_remove(struct platform_device *pdev)
 	struct sti_hdmi *hdmi = dev_get_drvdata(&pdev->dev);
 
 	i2c_put_adapter(hdmi->ddc_adapt);
+	if (hdmi->audio_pdev)
+		platform_device_unregister(hdmi->audio_pdev);
 	component_del(&pdev->dev, &sti_hdmi_ops);
 
 	return 0;
diff --git a/drivers/gpu/drm/sti/sti_hdmi.h b/drivers/gpu/drm/sti/sti_hdmi.h
index ef3a94583bbd..119bc3582ac7 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.h
+++ b/drivers/gpu/drm/sti/sti_hdmi.h
@@ -23,6 +23,13 @@ struct hdmi_phy_ops {
 	void (*stop)(struct sti_hdmi *hdmi);
 };
 
+struct hdmi_audio_params {
+	bool enabled;
+	unsigned int sample_width;
+	unsigned int sample_rate;
+	struct hdmi_audio_infoframe cea;
+};
+
 /* values for the framing mode property */
 enum sti_hdmi_modes {
 	HDMI_MODE_HDMI,
@@ -67,6 +74,9 @@ static const struct drm_prop_enum_list colorspace_mode_names[] = {
  * @ddc_adapt: i2c ddc adapter
  * @colorspace: current colorspace selected
  * @hdmi_mode: select framing for HDMI or DVI
+ * @audio_pdev: ASoC hdmi-codec platform device
+ * @audio: hdmi audio parameters.
+ * @drm_connector: hdmi connector
  */
 struct sti_hdmi {
 	struct device dev;
@@ -89,6 +99,9 @@ struct sti_hdmi {
 	struct i2c_adapter *ddc_adapt;
 	enum hdmi_colorspace colorspace;
 	enum sti_hdmi_modes hdmi_mode;
+	struct platform_device *audio_pdev;
+	struct hdmi_audio_params audio;
+	struct drm_connector *drm_connector;
 };
 
 u32 hdmi_read(struct sti_hdmi *hdmi, int offset);
diff --git a/drivers/gpu/drm/sti/sti_hqvdp.c b/drivers/gpu/drm/sti/sti_hqvdp.c
index 33d2f42550cc..b03232247966 100644
--- a/drivers/gpu/drm/sti/sti_hqvdp.c
+++ b/drivers/gpu/drm/sti/sti_hqvdp.c
@@ -1363,6 +1363,7 @@ static int sti_hqvdp_probe(struct platform_device *pdev)
 	vtg_np = of_parse_phandle(pdev->dev.of_node, "st,vtg", 0);
 	if (vtg_np)
 		hqvdp->vtg = of_vtg_find(vtg_np);
+	of_node_put(vtg_np);
 
 	platform_set_drvdata(pdev, hqvdp);
 
diff --git a/drivers/gpu/drm/sti/sti_plane.c b/drivers/gpu/drm/sti/sti_plane.c
index 85cee9098439..0cf3335ef37c 100644
--- a/drivers/gpu/drm/sti/sti_plane.c
+++ b/drivers/gpu/drm/sti/sti_plane.c
@@ -45,25 +45,15 @@ const char *sti_plane_to_str(struct sti_plane *plane)
 
 #define STI_FPS_INTERVAL_MS     3000
 
-static int sti_plane_timespec_ms_diff(struct timespec lhs, struct timespec rhs)
-{
-	struct timespec tmp_ts = timespec_sub(lhs, rhs);
-	u64 tmp_ns = (u64)timespec_to_ns(&tmp_ts);
-
-	do_div(tmp_ns, NSEC_PER_MSEC);
-
-	return (u32)tmp_ns;
-}
-
 void sti_plane_update_fps(struct sti_plane *plane,
 			  bool new_frame,
 			  bool new_field)
 {
-	struct timespec now;
+	ktime_t now;
 	struct sti_fps_info *fps;
 	int fpks, fipks, ms_since_last, num_frames, num_fields;
 
-	getrawmonotonic(&now);
+	now = ktime_get();
 
 	/* Compute number of frame updates */
 	fps = &plane->fps_info;
@@ -76,7 +66,7 @@ void sti_plane_update_fps(struct sti_plane *plane,
 		return;
 
 	fps->curr_frame_counter++;
-	ms_since_last = sti_plane_timespec_ms_diff(now, fps->last_timestamp);
+	ms_since_last = ktime_to_ms(ktime_sub(now, fps->last_timestamp));
 	num_frames = fps->curr_frame_counter - fps->last_frame_counter;
 
 	if (num_frames <= 0  || ms_since_last < STI_FPS_INTERVAL_MS)
diff --git a/drivers/gpu/drm/sti/sti_plane.h b/drivers/gpu/drm/sti/sti_plane.h
index 39d39f5b7dd9..e0ea1dd3bb88 100644
--- a/drivers/gpu/drm/sti/sti_plane.h
+++ b/drivers/gpu/drm/sti/sti_plane.h
@@ -55,7 +55,7 @@ struct sti_fps_info {
 	unsigned int last_frame_counter;
 	unsigned int curr_field_counter;
 	unsigned int last_field_counter;
-	struct timespec last_timestamp;
+	ktime_t	     last_timestamp;
 	char fps_str[FPS_LENGTH];
 	char fips_str[FPS_LENGTH];
 };
diff --git a/drivers/gpu/drm/sti/sti_vtg.c b/drivers/gpu/drm/sti/sti_vtg.c
index 6bf4ce466d20..0bdc385eec17 100644
--- a/drivers/gpu/drm/sti/sti_vtg.c
+++ b/drivers/gpu/drm/sti/sti_vtg.c
@@ -65,7 +65,7 @@
 #define HDMI_DELAY          (5)
 
 /* Delay introduced by the DVO in nb of pixel */
-#define DVO_DELAY           (2)
+#define DVO_DELAY           (7)
 
 /* delay introduced by the Arbitrary Waveform Generator in nb of pixels */
 #define AWG_DELAY_HD        (-9)
@@ -432,6 +432,7 @@ static int vtg_probe(struct platform_device *pdev)
 	np = of_parse_phandle(pdev->dev.of_node, "st,slave", 0);
 	if (np) {
 		vtg->slave = of_vtg_find(np);
+		of_node_put(np);
 
 		if (!vtg->slave)
 			return -EPROBE_DEFER;
diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig
index 99510e64e91a..a4b357db8856 100644
--- a/drivers/gpu/drm/sun4i/Kconfig
+++ b/drivers/gpu/drm/sun4i/Kconfig
@@ -1,6 +1,6 @@
 config DRM_SUN4I
 	tristate "DRM Support for Allwinner A10 Display Engine"
-	depends on DRM && ARM
+	depends on DRM && ARM && COMMON_CLK
 	depends on ARCH_SUNXI || COMPILE_TEST
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c
index f7a15c1a93bf..3ab560450a82 100644
--- a/drivers/gpu/drm/sun4i/sun4i_backend.c
+++ b/drivers/gpu/drm/sun4i/sun4i_backend.c
@@ -190,7 +190,7 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend,
 	/* Get the physical address of the buffer in memory */
 	gem = drm_fb_cma_get_gem_obj(fb, 0);
 
-	DRM_DEBUG_DRIVER("Using GEM @ 0x%x\n", gem->paddr);
+	DRM_DEBUG_DRIVER("Using GEM @ %pad\n", &gem->paddr);
 
 	/* Compute the start of the displayed memory */
 	bpp = drm_format_plane_cpp(fb->pixel_format, 0);
@@ -198,7 +198,7 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend,
 	paddr += (state->src_x >> 16) * bpp;
 	paddr += (state->src_y >> 16) * fb->pitches[0];
 
-	DRM_DEBUG_DRIVER("Setting buffer address to 0x%x\n", paddr);
+	DRM_DEBUG_DRIVER("Setting buffer address to %pad\n", &paddr);
 
 	/* Write the 32 lower bits of the address (in bits) */
 	lo_paddr = paddr << 3;
diff --git a/drivers/gpu/drm/sun4i/sun4i_crtc.c b/drivers/gpu/drm/sun4i/sun4i_crtc.c
index f628b6d8f23f..4a192210574f 100644
--- a/drivers/gpu/drm/sun4i/sun4i_crtc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_crtc.c
@@ -77,6 +77,14 @@ static void sun4i_crtc_disable(struct drm_crtc *crtc)
 	DRM_DEBUG_DRIVER("Disabling the CRTC\n");
 
 	sun4i_tcon_disable(drv->tcon);
+
+	if (crtc->state->event && !crtc->state->active) {
+		spin_lock_irq(&crtc->dev->event_lock);
+		drm_crtc_send_vblank_event(crtc, crtc->state->event);
+		spin_unlock_irq(&crtc->dev->event_lock);
+
+		crtc->state->event = NULL;
+	}
 }
 
 static void sun4i_crtc_enable(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
index 3ff668cb463c..5b3463197c48 100644
--- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c
+++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
@@ -72,14 +72,40 @@ static unsigned long sun4i_dclk_recalc_rate(struct clk_hw *hw,
 static long sun4i_dclk_round_rate(struct clk_hw *hw, unsigned long rate,
 				  unsigned long *parent_rate)
 {
-	return *parent_rate / DIV_ROUND_CLOSEST(*parent_rate, rate);
+	unsigned long best_parent = 0;
+	u8 best_div = 1;
+	int i;
+
+	for (i = 6; i < 127; i++) {
+		unsigned long ideal = rate * i;
+		unsigned long rounded;
+
+		rounded = clk_hw_round_rate(clk_hw_get_parent(hw),
+					    ideal);
+
+		if (rounded == ideal) {
+			best_parent = rounded;
+			best_div = i;
+			goto out;
+		}
+
+		if ((rounded < ideal) && (rounded > best_parent)) {
+			best_parent = rounded;
+			best_div = i;
+		}
+	}
+
+out:
+	*parent_rate = best_parent;
+
+	return best_parent / best_div;
 }
 
 static int sun4i_dclk_set_rate(struct clk_hw *hw, unsigned long rate,
 			       unsigned long parent_rate)
 {
 	struct sun4i_dclk *dclk = hw_to_dclk(hw);
-	int div = DIV_ROUND_CLOSEST(parent_rate, rate);
+	u8 div = parent_rate / rate;
 
 	return regmap_update_bits(dclk->regmap, SUN4I_TCON0_DCLK_REG,
 				  GENMASK(6, 0), div);
@@ -127,10 +153,14 @@ int sun4i_dclk_create(struct device *dev, struct sun4i_tcon *tcon)
 	const char *clk_name, *parent_name;
 	struct clk_init_data init;
 	struct sun4i_dclk *dclk;
+	int ret;
 
 	parent_name = __clk_get_name(tcon->sclk0);
-	of_property_read_string_index(dev->of_node, "clock-output-names", 0,
-				      &clk_name);
+	ret = of_property_read_string_index(dev->of_node,
+					    "clock-output-names", 0,
+					    &clk_name);
+	if (ret)
+		return ret;
 
 	dclk = devm_kzalloc(dev, sizeof(*dclk), GFP_KERNEL);
 	if (!dclk)
@@ -140,6 +170,7 @@ int sun4i_dclk_create(struct device *dev, struct sun4i_tcon *tcon)
 	init.ops = &sun4i_dclk_ops;
 	init.parent_names = &parent_name;
 	init.num_parents = 1;
+	init.flags = CLK_SET_RATE_PARENT;
 
 	dclk->regmap = tcon->regs;
 	dclk->hw.init = &init;
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 9a67f927a53e..7092daaf6c43 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -92,11 +92,27 @@ static struct drm_driver sun4i_drv_driver = {
 	/* Frame Buffer Operations */
 
 	/* VBlank Operations */
-	.get_vblank_counter	= drm_vblank_count,
+	.get_vblank_counter	= drm_vblank_no_hw_counter,
 	.enable_vblank		= sun4i_drv_enable_vblank,
 	.disable_vblank		= sun4i_drv_disable_vblank,
 };
 
+static void sun4i_remove_framebuffers(void)
+{
+	struct apertures_struct *ap;
+
+	ap = alloc_apertures(1);
+	if (!ap)
+		return;
+
+	/* The framebuffer can be located anywhere in RAM */
+	ap->ranges[0].base = 0;
+	ap->ranges[0].size = ~0;
+
+	remove_conflicting_framebuffers(ap, "sun4i-drm-fb", false);
+	kfree(ap);
+}
+
 static int sun4i_drv_bind(struct device *dev)
 {
 	struct drm_device *drm;
@@ -140,6 +156,9 @@ static int sun4i_drv_bind(struct device *dev)
 	}
 	drm->irq_enabled = true;
 
+	/* Remove early framebuffers (ie. simplefb) */
+	sun4i_remove_framebuffers();
+
 	/* Create our framebuffer */
 	drv->fbdev = sun4i_framebuffer_init(drm);
 	if (IS_ERR(drv->fbdev)) {
@@ -280,6 +299,7 @@ static int sun4i_drv_probe(struct platform_device *pdev)
 
 		count += sun4i_drv_add_endpoints(&pdev->dev, &match,
 						pipeline);
+		of_node_put(pipeline);
 
 		DRM_DEBUG_DRIVER("Queued %d outputs on pipeline %d\n",
 				 count, i);
diff --git a/drivers/gpu/drm/sun4i/sun4i_framebuffer.c b/drivers/gpu/drm/sun4i/sun4i_framebuffer.c
index a0b30c216a5b..70688febd7ac 100644
--- a/drivers/gpu/drm/sun4i/sun4i_framebuffer.c
+++ b/drivers/gpu/drm/sun4i/sun4i_framebuffer.c
@@ -20,8 +20,7 @@ static void sun4i_de_output_poll_changed(struct drm_device *drm)
 {
 	struct sun4i_drv *drv = drm->dev_private;
 
-	if (drv->fbdev)
-		drm_fbdev_cma_hotplug_event(drv->fbdev);
+	drm_fbdev_cma_hotplug_event(drv->fbdev);
 }
 
 static const struct drm_mode_config_funcs sun4i_de_mode_config_funcs = {
diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c
index 442cfe271688..f5bbac6efb4c 100644
--- a/drivers/gpu/drm/sun4i/sun4i_rgb.c
+++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c
@@ -54,8 +54,13 @@ static int sun4i_rgb_get_modes(struct drm_connector *connector)
 static int sun4i_rgb_mode_valid(struct drm_connector *connector,
 				struct drm_display_mode *mode)
 {
+	struct sun4i_rgb *rgb = drm_connector_to_sun4i_rgb(connector);
+	struct sun4i_drv *drv = rgb->drv;
+	struct sun4i_tcon *tcon = drv->tcon;
 	u32 hsync = mode->hsync_end - mode->hsync_start;
 	u32 vsync = mode->vsync_end - mode->vsync_start;
+	unsigned long rate = mode->clock * 1000;
+	long rounded_rate;
 
 	DRM_DEBUG_DRIVER("Validating modes...\n");
 
@@ -87,6 +92,15 @@ static int sun4i_rgb_mode_valid(struct drm_connector *connector,
 
 	DRM_DEBUG_DRIVER("Vertical parameters OK\n");
 
+	rounded_rate = clk_round_rate(tcon->dclk, rate);
+	if (rounded_rate < rate)
+		return MODE_CLOCK_LOW;
+
+	if (rounded_rate > rate)
+		return MODE_CLOCK_HIGH;
+
+	DRM_DEBUG_DRIVER("Clock rate OK\n");
+
 	return MODE_OK;
 }
 
@@ -193,7 +207,7 @@ int sun4i_rgb_init(struct drm_device *drm)
 	int ret;
 
 	/* If we don't have a panel, there's no point in going on */
-	if (!tcon->panel)
+	if (IS_ERR(tcon->panel))
 		return -ENODEV;
 
 	rgb = devm_kzalloc(drm->dev, sizeof(*rgb), GFP_KERNEL);
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index 9f19b0e08560..652385f09735 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -425,11 +425,11 @@ static struct drm_panel *sun4i_tcon_find_panel(struct device_node *node)
 
 	remote = of_graph_get_remote_port_parent(end_node);
 	if (!remote) {
-		DRM_DEBUG_DRIVER("Enable to parse remote node\n");
+		DRM_DEBUG_DRIVER("Unable to parse remote node\n");
 		return ERR_PTR(-EINVAL);
 	}
 
-	return of_drm_find_panel(remote);
+	return of_drm_find_panel(remote) ?: ERR_PTR(-EPROBE_DEFER);
 }
 
 static int sun4i_tcon_bind(struct device *dev, struct device *master,
@@ -490,7 +490,11 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
 		return 0;
 	}
 
-	return sun4i_rgb_init(drm);
+	ret = sun4i_rgb_init(drm);
+	if (ret < 0)
+		goto err_free_clocks;
+
+	return 0;
 
 err_free_clocks:
 	sun4i_tcon_free_clocks(tcon);
@@ -522,12 +526,13 @@ static int sun4i_tcon_probe(struct platform_device *pdev)
 	 * Defer the probe.
 	 */
 	panel = sun4i_tcon_find_panel(node);
-	if (IS_ERR(panel)) {
-		/*
-		 * If we don't have a panel endpoint, just go on
-		 */
-		if (PTR_ERR(panel) != -ENODEV)
-			return -EPROBE_DEFER;
+
+	/*
+	 * If we don't have a panel endpoint, just go on
+	 */
+	if (PTR_ERR(panel) == -EPROBE_DEFER) {
+		DRM_DEBUG_DRIVER("Still waiting for our panel. Deferring...\n");
+		return -EPROBE_DEFER;
 	}
 
 	return component_add(&pdev->dev, &sun4i_tcon_ops);
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 39940f5b7c91..8495bd01b544 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -10,6 +10,7 @@
 #include <linux/clk.h>
 #include <linux/debugfs.h>
 #include <linux/iommu.h>
+#include <linux/pm_runtime.h>
 #include <linux/reset.h>
 
 #include <soc/tegra/pmc.h>
@@ -1216,6 +1217,8 @@ static void tegra_crtc_disable(struct drm_crtc *crtc)
 
 	tegra_dc_stats_reset(&dc->stats);
 	drm_crtc_vblank_off(crtc);
+
+	pm_runtime_put_sync(dc->dev);
 }
 
 static void tegra_crtc_enable(struct drm_crtc *crtc)
@@ -1225,6 +1228,48 @@ static void tegra_crtc_enable(struct drm_crtc *crtc)
 	struct tegra_dc *dc = to_tegra_dc(crtc);
 	u32 value;
 
+	pm_runtime_get_sync(dc->dev);
+
+	/* initialize display controller */
+	if (dc->syncpt) {
+		u32 syncpt = host1x_syncpt_id(dc->syncpt);
+
+		value = SYNCPT_CNTRL_NO_STALL;
+		tegra_dc_writel(dc, value, DC_CMD_GENERAL_INCR_SYNCPT_CNTRL);
+
+		value = SYNCPT_VSYNC_ENABLE | syncpt;
+		tegra_dc_writel(dc, value, DC_CMD_CONT_SYNCPT_VSYNC);
+	}
+
+	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
+		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_TYPE);
+
+	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
+		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_POLARITY);
+
+	/* initialize timer */
+	value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(0x20) |
+		WINDOW_B_THRESHOLD(0x20) | WINDOW_C_THRESHOLD(0x20);
+	tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY);
+
+	value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(1) |
+		WINDOW_B_THRESHOLD(1) | WINDOW_C_THRESHOLD(1);
+	tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER);
+
+	value = VBLANK_INT | WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
+		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_ENABLE);
+
+	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
+		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
+	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
+
+	if (dc->soc->supports_border_color)
+		tegra_dc_writel(dc, 0, DC_DISP_BORDER_COLOR);
+
+	/* apply PLL and pixel clock changes */
 	tegra_dc_commit_state(dc, state);
 
 	/* program display mode */
@@ -1685,7 +1730,6 @@ static int tegra_dc_init(struct host1x_client *client)
 	struct tegra_drm *tegra = drm->dev_private;
 	struct drm_plane *primary = NULL;
 	struct drm_plane *cursor = NULL;
-	u32 value;
 	int err;
 
 	dc->syncpt = host1x_syncpt_request(dc->dev, flags);
@@ -1755,47 +1799,6 @@ static int tegra_dc_init(struct host1x_client *client)
 		goto cleanup;
 	}
 
-	/* initialize display controller */
-	if (dc->syncpt) {
-		u32 syncpt = host1x_syncpt_id(dc->syncpt);
-
-		value = SYNCPT_CNTRL_NO_STALL;
-		tegra_dc_writel(dc, value, DC_CMD_GENERAL_INCR_SYNCPT_CNTRL);
-
-		value = SYNCPT_VSYNC_ENABLE | syncpt;
-		tegra_dc_writel(dc, value, DC_CMD_CONT_SYNCPT_VSYNC);
-	}
-
-	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
-		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_TYPE);
-
-	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
-		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_POLARITY);
-
-	/* initialize timer */
-	value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(0x20) |
-		WINDOW_B_THRESHOLD(0x20) | WINDOW_C_THRESHOLD(0x20);
-	tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY);
-
-	value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(1) |
-		WINDOW_B_THRESHOLD(1) | WINDOW_C_THRESHOLD(1);
-	tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER);
-
-	value = VBLANK_INT | WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
-		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_ENABLE);
-
-	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
-		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
-
-	if (dc->soc->supports_border_color)
-		tegra_dc_writel(dc, 0, DC_DISP_BORDER_COLOR);
-
-	tegra_dc_stats_reset(&dc->stats);
-
 	return 0;
 
 cleanup:
@@ -1987,33 +1990,15 @@ static int tegra_dc_probe(struct platform_device *pdev)
 		return PTR_ERR(dc->rst);
 	}
 
+	reset_control_assert(dc->rst);
+
 	if (dc->soc->has_powergate) {
 		if (dc->pipe == 0)
 			dc->powergate = TEGRA_POWERGATE_DIS;
 		else
 			dc->powergate = TEGRA_POWERGATE_DISB;
 
-		err = tegra_powergate_sequence_power_up(dc->powergate, dc->clk,
-							dc->rst);
-		if (err < 0) {
-			dev_err(&pdev->dev, "failed to power partition: %d\n",
-				err);
-			return err;
-		}
-	} else {
-		err = clk_prepare_enable(dc->clk);
-		if (err < 0) {
-			dev_err(&pdev->dev, "failed to enable clock: %d\n",
-				err);
-			return err;
-		}
-
-		err = reset_control_deassert(dc->rst);
-		if (err < 0) {
-			dev_err(&pdev->dev, "failed to deassert reset: %d\n",
-				err);
-			return err;
-		}
+		tegra_powergate_power_off(dc->powergate);
 	}
 
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -2027,16 +2012,19 @@ static int tegra_dc_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	INIT_LIST_HEAD(&dc->client.list);
-	dc->client.ops = &dc_client_ops;
-	dc->client.dev = &pdev->dev;
-
 	err = tegra_dc_rgb_probe(dc);
 	if (err < 0 && err != -ENODEV) {
 		dev_err(&pdev->dev, "failed to probe RGB output: %d\n", err);
 		return err;
 	}
 
+	platform_set_drvdata(pdev, dc);
+	pm_runtime_enable(&pdev->dev);
+
+	INIT_LIST_HEAD(&dc->client.list);
+	dc->client.ops = &dc_client_ops;
+	dc->client.dev = &pdev->dev;
+
 	err = host1x_client_register(&dc->client);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
@@ -2044,8 +2032,6 @@ static int tegra_dc_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	platform_set_drvdata(pdev, dc);
-
 	return 0;
 }
 
@@ -2067,7 +2053,22 @@ static int tegra_dc_remove(struct platform_device *pdev)
 		return err;
 	}
 
-	reset_control_assert(dc->rst);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int tegra_dc_suspend(struct device *dev)
+{
+	struct tegra_dc *dc = dev_get_drvdata(dev);
+	int err;
+
+	err = reset_control_assert(dc->rst);
+	if (err < 0) {
+		dev_err(dev, "failed to assert reset: %d\n", err);
+		return err;
+	}
 
 	if (dc->soc->has_powergate)
 		tegra_powergate_power_off(dc->powergate);
@@ -2077,10 +2078,45 @@ static int tegra_dc_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int tegra_dc_resume(struct device *dev)
+{
+	struct tegra_dc *dc = dev_get_drvdata(dev);
+	int err;
+
+	if (dc->soc->has_powergate) {
+		err = tegra_powergate_sequence_power_up(dc->powergate, dc->clk,
+							dc->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to power partition: %d\n", err);
+			return err;
+		}
+	} else {
+		err = clk_prepare_enable(dc->clk);
+		if (err < 0) {
+			dev_err(dev, "failed to enable clock: %d\n", err);
+			return err;
+		}
+
+		err = reset_control_deassert(dc->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to deassert reset: %d\n", err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops tegra_dc_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_dc_suspend, tegra_dc_resume, NULL)
+};
+
 struct platform_driver tegra_dc_driver = {
 	.driver = {
 		.name = "tegra-dc",
 		.of_match_table = tegra_dc_of_match,
+		.pm = &tegra_dc_pm_ops,
 	},
 	.probe = tegra_dc_probe,
 	.remove = tegra_dc_remove,
diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index b24a0f14821a..059f409556d5 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -12,6 +12,9 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/of_gpio.h>
+#include <linux/pinctrl/pinconf-generic.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinmux.h>
 #include <linux/platform_device.h>
 #include <linux/reset.h>
 #include <linux/regulator/consumer.h>
@@ -44,6 +47,11 @@ struct tegra_dpaux {
 	struct completion complete;
 	struct work_struct work;
 	struct list_head list;
+
+#ifdef CONFIG_GENERIC_PINCONF
+	struct pinctrl_dev *pinctrl;
+	struct pinctrl_desc desc;
+#endif
 };
 
 static inline struct tegra_dpaux *to_dpaux(struct drm_dp_aux *aux)
@@ -267,6 +275,148 @@ static irqreturn_t tegra_dpaux_irq(int irq, void *data)
 	return ret;
 }
 
+enum tegra_dpaux_functions {
+	DPAUX_PADCTL_FUNC_AUX,
+	DPAUX_PADCTL_FUNC_I2C,
+	DPAUX_PADCTL_FUNC_OFF,
+};
+
+static void tegra_dpaux_pad_power_down(struct tegra_dpaux *dpaux)
+{
+	u32 value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE);
+
+	value |= DPAUX_HYBRID_SPARE_PAD_POWER_DOWN;
+
+	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_SPARE);
+}
+
+static void tegra_dpaux_pad_power_up(struct tegra_dpaux *dpaux)
+{
+	u32 value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE);
+
+	value &= ~DPAUX_HYBRID_SPARE_PAD_POWER_DOWN;
+
+	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_SPARE);
+}
+
+static int tegra_dpaux_pad_config(struct tegra_dpaux *dpaux, unsigned function)
+{
+	u32 value;
+
+	switch (function) {
+	case DPAUX_PADCTL_FUNC_AUX:
+		value = DPAUX_HYBRID_PADCTL_AUX_CMH(2) |
+			DPAUX_HYBRID_PADCTL_AUX_DRVZ(4) |
+			DPAUX_HYBRID_PADCTL_AUX_DRVI(0x18) |
+			DPAUX_HYBRID_PADCTL_AUX_INPUT_RCV |
+			DPAUX_HYBRID_PADCTL_MODE_AUX;
+		break;
+
+	case DPAUX_PADCTL_FUNC_I2C:
+		value = DPAUX_HYBRID_PADCTL_I2C_SDA_INPUT_RCV |
+			DPAUX_HYBRID_PADCTL_I2C_SCL_INPUT_RCV |
+			DPAUX_HYBRID_PADCTL_MODE_I2C;
+		break;
+
+	case DPAUX_PADCTL_FUNC_OFF:
+		tegra_dpaux_pad_power_down(dpaux);
+		return 0;
+
+	default:
+		return -ENOTSUPP;
+	}
+
+	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_PADCTL);
+	tegra_dpaux_pad_power_up(dpaux);
+
+	return 0;
+}
+
+#ifdef CONFIG_GENERIC_PINCONF
+static const struct pinctrl_pin_desc tegra_dpaux_pins[] = {
+	PINCTRL_PIN(0, "DP_AUX_CHx_P"),
+	PINCTRL_PIN(1, "DP_AUX_CHx_N"),
+};
+
+static const unsigned tegra_dpaux_pin_numbers[] = { 0, 1 };
+
+static const char * const tegra_dpaux_groups[] = {
+	"dpaux-io",
+};
+
+static const char * const tegra_dpaux_functions[] = {
+	"aux",
+	"i2c",
+	"off",
+};
+
+static int tegra_dpaux_get_groups_count(struct pinctrl_dev *pinctrl)
+{
+	return ARRAY_SIZE(tegra_dpaux_groups);
+}
+
+static const char *tegra_dpaux_get_group_name(struct pinctrl_dev *pinctrl,
+					      unsigned int group)
+{
+	return tegra_dpaux_groups[group];
+}
+
+static int tegra_dpaux_get_group_pins(struct pinctrl_dev *pinctrl,
+				      unsigned group, const unsigned **pins,
+				      unsigned *num_pins)
+{
+	*pins = tegra_dpaux_pin_numbers;
+	*num_pins = ARRAY_SIZE(tegra_dpaux_pin_numbers);
+
+	return 0;
+}
+
+static const struct pinctrl_ops tegra_dpaux_pinctrl_ops = {
+	.get_groups_count = tegra_dpaux_get_groups_count,
+	.get_group_name = tegra_dpaux_get_group_name,
+	.get_group_pins = tegra_dpaux_get_group_pins,
+	.dt_node_to_map = pinconf_generic_dt_node_to_map_group,
+	.dt_free_map = pinconf_generic_dt_free_map,
+};
+
+static int tegra_dpaux_get_functions_count(struct pinctrl_dev *pinctrl)
+{
+	return ARRAY_SIZE(tegra_dpaux_functions);
+}
+
+static const char *tegra_dpaux_get_function_name(struct pinctrl_dev *pinctrl,
+						 unsigned int function)
+{
+	return tegra_dpaux_functions[function];
+}
+
+static int tegra_dpaux_get_function_groups(struct pinctrl_dev *pinctrl,
+					   unsigned int function,
+					   const char * const **groups,
+					   unsigned * const num_groups)
+{
+	*num_groups = ARRAY_SIZE(tegra_dpaux_groups);
+	*groups = tegra_dpaux_groups;
+
+	return 0;
+}
+
+static int tegra_dpaux_set_mux(struct pinctrl_dev *pinctrl,
+			       unsigned int function, unsigned int group)
+{
+	struct tegra_dpaux *dpaux = pinctrl_dev_get_drvdata(pinctrl);
+
+	return tegra_dpaux_pad_config(dpaux, function);
+}
+
+static const struct pinmux_ops tegra_dpaux_pinmux_ops = {
+	.get_functions_count = tegra_dpaux_get_functions_count,
+	.get_function_name = tegra_dpaux_get_function_name,
+	.get_function_groups = tegra_dpaux_get_function_groups,
+	.set_mux = tegra_dpaux_set_mux,
+};
+#endif
+
 static int tegra_dpaux_probe(struct platform_device *pdev)
 {
 	struct tegra_dpaux *dpaux;
@@ -294,11 +444,14 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	dpaux->rst = devm_reset_control_get(&pdev->dev, "dpaux");
-	if (IS_ERR(dpaux->rst)) {
-		dev_err(&pdev->dev, "failed to get reset control: %ld\n",
-			PTR_ERR(dpaux->rst));
-		return PTR_ERR(dpaux->rst);
+	if (!pdev->dev.pm_domain) {
+		dpaux->rst = devm_reset_control_get(&pdev->dev, "dpaux");
+		if (IS_ERR(dpaux->rst)) {
+			dev_err(&pdev->dev,
+				"failed to get reset control: %ld\n",
+				PTR_ERR(dpaux->rst));
+			return PTR_ERR(dpaux->rst);
+		}
 	}
 
 	dpaux->clk = devm_clk_get(&pdev->dev, NULL);
@@ -315,34 +468,37 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	reset_control_deassert(dpaux->rst);
+	if (dpaux->rst)
+		reset_control_deassert(dpaux->rst);
 
 	dpaux->clk_parent = devm_clk_get(&pdev->dev, "parent");
 	if (IS_ERR(dpaux->clk_parent)) {
 		dev_err(&pdev->dev, "failed to get parent clock: %ld\n",
 			PTR_ERR(dpaux->clk_parent));
-		return PTR_ERR(dpaux->clk_parent);
+		err = PTR_ERR(dpaux->clk_parent);
+		goto assert_reset;
 	}
 
 	err = clk_prepare_enable(dpaux->clk_parent);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to enable parent clock: %d\n",
 			err);
-		return err;
+		goto assert_reset;
 	}
 
 	err = clk_set_rate(dpaux->clk_parent, 270000000);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to set clock to 270 MHz: %d\n",
 			err);
-		return err;
+		goto disable_parent_clk;
 	}
 
 	dpaux->vdd = devm_regulator_get(&pdev->dev, "vdd");
 	if (IS_ERR(dpaux->vdd)) {
 		dev_err(&pdev->dev, "failed to get VDD supply: %ld\n",
 			PTR_ERR(dpaux->vdd));
-		return PTR_ERR(dpaux->vdd);
+		err = PTR_ERR(dpaux->vdd);
+		goto disable_parent_clk;
 	}
 
 	err = devm_request_irq(dpaux->dev, dpaux->irq, tegra_dpaux_irq, 0,
@@ -350,7 +506,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 	if (err < 0) {
 		dev_err(dpaux->dev, "failed to request IRQ#%u: %d\n",
 			dpaux->irq, err);
-		return err;
+		goto disable_parent_clk;
 	}
 
 	disable_irq(dpaux->irq);
@@ -360,7 +516,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 
 	err = drm_dp_aux_register(&dpaux->aux);
 	if (err < 0)
-		return err;
+		goto disable_parent_clk;
 
 	/*
 	 * Assume that by default the DPAUX/I2C pads will be used for HDMI,
@@ -370,16 +526,24 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 	 * is no possibility to perform the I2C mode configuration in the
 	 * HDMI path.
 	 */
-	value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE);
-	value &= ~DPAUX_HYBRID_SPARE_PAD_POWER_DOWN;
-	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_SPARE);
-
-	value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_PADCTL);
-	value = DPAUX_HYBRID_PADCTL_I2C_SDA_INPUT_RCV |
-		DPAUX_HYBRID_PADCTL_I2C_SCL_INPUT_RCV |
-		DPAUX_HYBRID_PADCTL_MODE_I2C;
-	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_PADCTL);
+	err = tegra_dpaux_pad_config(dpaux, DPAUX_HYBRID_PADCTL_MODE_I2C);
+	if (err < 0)
+		return err;
 
+#ifdef CONFIG_GENERIC_PINCONF
+	dpaux->desc.name = dev_name(&pdev->dev);
+	dpaux->desc.pins = tegra_dpaux_pins;
+	dpaux->desc.npins = ARRAY_SIZE(tegra_dpaux_pins);
+	dpaux->desc.pctlops = &tegra_dpaux_pinctrl_ops;
+	dpaux->desc.pmxops = &tegra_dpaux_pinmux_ops;
+	dpaux->desc.owner = THIS_MODULE;
+
+	dpaux->pinctrl = devm_pinctrl_register(&pdev->dev, &dpaux->desc, dpaux);
+	if (!dpaux->pinctrl) {
+		dev_err(&pdev->dev, "failed to register pincontrol\n");
+		return -ENODEV;
+	}
+#endif
 	/* enable and clear all interrupts */
 	value = DPAUX_INTR_AUX_DONE | DPAUX_INTR_IRQ_EVENT |
 		DPAUX_INTR_UNPLUG_EVENT | DPAUX_INTR_PLUG_EVENT;
@@ -393,17 +557,24 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, dpaux);
 
 	return 0;
+
+disable_parent_clk:
+	clk_disable_unprepare(dpaux->clk_parent);
+assert_reset:
+	if (dpaux->rst)
+		reset_control_assert(dpaux->rst);
+
+	clk_disable_unprepare(dpaux->clk);
+
+	return err;
 }
 
 static int tegra_dpaux_remove(struct platform_device *pdev)
 {
 	struct tegra_dpaux *dpaux = platform_get_drvdata(pdev);
-	u32 value;
 
 	/* make sure pads are powered down when not in use */
-	value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE);
-	value |= DPAUX_HYBRID_SPARE_PAD_POWER_DOWN;
-	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_SPARE);
+	tegra_dpaux_pad_power_down(dpaux);
 
 	drm_dp_aux_unregister(&dpaux->aux);
 
@@ -414,7 +585,10 @@ static int tegra_dpaux_remove(struct platform_device *pdev)
 	cancel_work_sync(&dpaux->work);
 
 	clk_disable_unprepare(dpaux->clk_parent);
-	reset_control_assert(dpaux->rst);
+
+	if (dpaux->rst)
+		reset_control_assert(dpaux->rst);
+
 	clk_disable_unprepare(dpaux->clk);
 
 	return 0;
@@ -528,30 +702,15 @@ enum drm_connector_status drm_dp_aux_detect(struct drm_dp_aux *aux)
 int drm_dp_aux_enable(struct drm_dp_aux *aux)
 {
 	struct tegra_dpaux *dpaux = to_dpaux(aux);
-	u32 value;
-
-	value = DPAUX_HYBRID_PADCTL_AUX_CMH(2) |
-		DPAUX_HYBRID_PADCTL_AUX_DRVZ(4) |
-		DPAUX_HYBRID_PADCTL_AUX_DRVI(0x18) |
-		DPAUX_HYBRID_PADCTL_AUX_INPUT_RCV |
-		DPAUX_HYBRID_PADCTL_MODE_AUX;
-	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_PADCTL);
-
-	value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE);
-	value &= ~DPAUX_HYBRID_SPARE_PAD_POWER_DOWN;
-	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_SPARE);
 
-	return 0;
+	return tegra_dpaux_pad_config(dpaux, DPAUX_PADCTL_FUNC_AUX);
 }
 
 int drm_dp_aux_disable(struct drm_dp_aux *aux)
 {
 	struct tegra_dpaux *dpaux = to_dpaux(aux);
-	u32 value;
 
-	value = tegra_dpaux_readl(dpaux, DPAUX_HYBRID_SPARE);
-	value |= DPAUX_HYBRID_SPARE_PAD_POWER_DOWN;
-	tegra_dpaux_writel(dpaux, value, DPAUX_HYBRID_SPARE);
+	tegra_dpaux_pad_power_down(dpaux);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index a177a42a9849..755264d9db22 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -56,8 +56,8 @@ static void tegra_atomic_complete(struct tegra_drm *tegra,
 	 */
 
 	drm_atomic_helper_commit_modeset_disables(drm, state);
-	drm_atomic_helper_commit_planes(drm, state, false);
 	drm_atomic_helper_commit_modeset_enables(drm, state);
+	drm_atomic_helper_commit_planes(drm, state, true);
 
 	drm_atomic_helper_wait_for_vblanks(drm, state);
 
diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
index 099cccb2fbcb..3d228ad90e0f 100644
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c
@@ -13,6 +13,7 @@
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/reset.h>
 
 #include <linux/regulator/consumer.h>
@@ -677,6 +678,45 @@ static void tegra_dsi_ganged_disable(struct tegra_dsi *dsi)
 	tegra_dsi_writel(dsi, 0, DSI_GANGED_MODE_CONTROL);
 }
 
+static int tegra_dsi_pad_enable(struct tegra_dsi *dsi)
+{
+	u32 value;
+
+	value = DSI_PAD_CONTROL_VS1_PULLDN(0) | DSI_PAD_CONTROL_VS1_PDIO(0);
+	tegra_dsi_writel(dsi, value, DSI_PAD_CONTROL_0);
+
+	return 0;
+}
+
+static int tegra_dsi_pad_calibrate(struct tegra_dsi *dsi)
+{
+	u32 value;
+
+	/*
+	 * XXX Is this still needed? The module reset is deasserted right
+	 * before this function is called.
+	 */
+	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_0);
+	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_1);
+	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_2);
+	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_3);
+	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_4);
+
+	/* start calibration */
+	tegra_dsi_pad_enable(dsi);
+
+	value = DSI_PAD_SLEW_UP(0x7) | DSI_PAD_SLEW_DN(0x7) |
+		DSI_PAD_LP_UP(0x1) | DSI_PAD_LP_DN(0x1) |
+		DSI_PAD_OUT_CLK(0x0);
+	tegra_dsi_writel(dsi, value, DSI_PAD_CONTROL_2);
+
+	value = DSI_PAD_PREEMP_PD_CLK(0x3) | DSI_PAD_PREEMP_PU_CLK(0x3) |
+		DSI_PAD_PREEMP_PD(0x03) | DSI_PAD_PREEMP_PU(0x3);
+	tegra_dsi_writel(dsi, value, DSI_PAD_CONTROL_3);
+
+	return tegra_mipi_calibrate(dsi->mipi);
+}
+
 static void tegra_dsi_set_timeout(struct tegra_dsi *dsi, unsigned long bclk,
 				  unsigned int vrefresh)
 {
@@ -836,7 +876,7 @@ static void tegra_dsi_encoder_disable(struct drm_encoder *encoder)
 
 	tegra_dsi_disable(dsi);
 
-	return;
+	pm_runtime_put(dsi->dev);
 }
 
 static void tegra_dsi_encoder_enable(struct drm_encoder *encoder)
@@ -847,6 +887,13 @@ static void tegra_dsi_encoder_enable(struct drm_encoder *encoder)
 	struct tegra_dsi *dsi = to_dsi(output);
 	struct tegra_dsi_state *state;
 	u32 value;
+	int err;
+
+	pm_runtime_get_sync(dsi->dev);
+
+	err = tegra_dsi_pad_calibrate(dsi);
+	if (err < 0)
+		dev_err(dsi->dev, "MIPI calibration failed: %d\n", err);
 
 	state = tegra_dsi_get_state(dsi);
 
@@ -875,8 +922,6 @@ static void tegra_dsi_encoder_enable(struct drm_encoder *encoder)
 
 	if (output->panel)
 		drm_panel_enable(output->panel);
-
-	return;
 }
 
 static int
@@ -966,55 +1011,12 @@ static const struct drm_encoder_helper_funcs tegra_dsi_encoder_helper_funcs = {
 	.atomic_check = tegra_dsi_encoder_atomic_check,
 };
 
-static int tegra_dsi_pad_enable(struct tegra_dsi *dsi)
-{
-	u32 value;
-
-	value = DSI_PAD_CONTROL_VS1_PULLDN(0) | DSI_PAD_CONTROL_VS1_PDIO(0);
-	tegra_dsi_writel(dsi, value, DSI_PAD_CONTROL_0);
-
-	return 0;
-}
-
-static int tegra_dsi_pad_calibrate(struct tegra_dsi *dsi)
-{
-	u32 value;
-
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_0);
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_1);
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_2);
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_3);
-	tegra_dsi_writel(dsi, 0, DSI_PAD_CONTROL_4);
-
-	/* start calibration */
-	tegra_dsi_pad_enable(dsi);
-
-	value = DSI_PAD_SLEW_UP(0x7) | DSI_PAD_SLEW_DN(0x7) |
-		DSI_PAD_LP_UP(0x1) | DSI_PAD_LP_DN(0x1) |
-		DSI_PAD_OUT_CLK(0x0);
-	tegra_dsi_writel(dsi, value, DSI_PAD_CONTROL_2);
-
-	value = DSI_PAD_PREEMP_PD_CLK(0x3) | DSI_PAD_PREEMP_PU_CLK(0x3) |
-		DSI_PAD_PREEMP_PD(0x03) | DSI_PAD_PREEMP_PU(0x3);
-	tegra_dsi_writel(dsi, value, DSI_PAD_CONTROL_3);
-
-	return tegra_mipi_calibrate(dsi->mipi);
-}
-
 static int tegra_dsi_init(struct host1x_client *client)
 {
 	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct tegra_dsi *dsi = host1x_client_to_dsi(client);
 	int err;
 
-	reset_control_deassert(dsi->rst);
-
-	err = tegra_dsi_pad_calibrate(dsi);
-	if (err < 0) {
-		dev_err(dsi->dev, "MIPI calibration failed: %d\n", err);
-		goto reset;
-	}
-
 	/* Gangsters must not register their own outputs. */
 	if (!dsi->master) {
 		dsi->output.dev = client->dev;
@@ -1037,12 +1039,9 @@ static int tegra_dsi_init(struct host1x_client *client)
 		drm_connector_register(&dsi->output.connector);
 
 		err = tegra_output_init(drm, &dsi->output);
-		if (err < 0) {
-			dev_err(client->dev,
-				"failed to initialize output: %d\n",
+		if (err < 0)
+			dev_err(dsi->dev, "failed to initialize output: %d\n",
 				err);
-			goto reset;
-		}
 
 		dsi->output.encoder.possible_crtcs = 0x3;
 	}
@@ -1054,10 +1053,6 @@ static int tegra_dsi_init(struct host1x_client *client)
 	}
 
 	return 0;
-
-reset:
-	reset_control_assert(dsi->rst);
-	return err;
 }
 
 static int tegra_dsi_exit(struct host1x_client *client)
@@ -1069,7 +1064,7 @@ static int tegra_dsi_exit(struct host1x_client *client)
 	if (IS_ENABLED(CONFIG_DEBUG_FS))
 		tegra_dsi_debugfs_exit(dsi);
 
-	reset_control_assert(dsi->rst);
+	regulator_disable(dsi->vdd);
 
 	return 0;
 }
@@ -1493,74 +1488,50 @@ static int tegra_dsi_probe(struct platform_device *pdev)
 	dsi->format = MIPI_DSI_FMT_RGB888;
 	dsi->lanes = 4;
 
-	dsi->rst = devm_reset_control_get(&pdev->dev, "dsi");
-	if (IS_ERR(dsi->rst))
-		return PTR_ERR(dsi->rst);
+	if (!pdev->dev.pm_domain) {
+		dsi->rst = devm_reset_control_get(&pdev->dev, "dsi");
+		if (IS_ERR(dsi->rst))
+			return PTR_ERR(dsi->rst);
+	}
 
 	dsi->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(dsi->clk)) {
 		dev_err(&pdev->dev, "cannot get DSI clock\n");
-		err = PTR_ERR(dsi->clk);
-		goto reset;
-	}
-
-	err = clk_prepare_enable(dsi->clk);
-	if (err < 0) {
-		dev_err(&pdev->dev, "cannot enable DSI clock\n");
-		goto reset;
+		return PTR_ERR(dsi->clk);
 	}
 
 	dsi->clk_lp = devm_clk_get(&pdev->dev, "lp");
 	if (IS_ERR(dsi->clk_lp)) {
 		dev_err(&pdev->dev, "cannot get low-power clock\n");
-		err = PTR_ERR(dsi->clk_lp);
-		goto disable_clk;
-	}
-
-	err = clk_prepare_enable(dsi->clk_lp);
-	if (err < 0) {
-		dev_err(&pdev->dev, "cannot enable low-power clock\n");
-		goto disable_clk;
+		return PTR_ERR(dsi->clk_lp);
 	}
 
 	dsi->clk_parent = devm_clk_get(&pdev->dev, "parent");
 	if (IS_ERR(dsi->clk_parent)) {
 		dev_err(&pdev->dev, "cannot get parent clock\n");
-		err = PTR_ERR(dsi->clk_parent);
-		goto disable_clk_lp;
+		return PTR_ERR(dsi->clk_parent);
 	}
 
 	dsi->vdd = devm_regulator_get(&pdev->dev, "avdd-dsi-csi");
 	if (IS_ERR(dsi->vdd)) {
 		dev_err(&pdev->dev, "cannot get VDD supply\n");
-		err = PTR_ERR(dsi->vdd);
-		goto disable_clk_lp;
-	}
-
-	err = regulator_enable(dsi->vdd);
-	if (err < 0) {
-		dev_err(&pdev->dev, "cannot enable VDD supply\n");
-		goto disable_clk_lp;
+		return PTR_ERR(dsi->vdd);
 	}
 
 	err = tegra_dsi_setup_clocks(dsi);
 	if (err < 0) {
 		dev_err(&pdev->dev, "cannot setup clocks\n");
-		goto disable_vdd;
+		return err;
 	}
 
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	dsi->regs = devm_ioremap_resource(&pdev->dev, regs);
-	if (IS_ERR(dsi->regs)) {
-		err = PTR_ERR(dsi->regs);
-		goto disable_vdd;
-	}
+	if (IS_ERR(dsi->regs))
+		return PTR_ERR(dsi->regs);
 
 	dsi->mipi = tegra_mipi_request(&pdev->dev);
-	if (IS_ERR(dsi->mipi)) {
-		err = PTR_ERR(dsi->mipi);
-		goto disable_vdd;
-	}
+	if (IS_ERR(dsi->mipi))
+		return PTR_ERR(dsi->mipi);
 
 	dsi->host.ops = &tegra_dsi_host_ops;
 	dsi->host.dev = &pdev->dev;
@@ -1571,6 +1542,9 @@ static int tegra_dsi_probe(struct platform_device *pdev)
 		goto mipi_free;
 	}
 
+	platform_set_drvdata(pdev, dsi);
+	pm_runtime_enable(&pdev->dev);
+
 	INIT_LIST_HEAD(&dsi->client.list);
 	dsi->client.ops = &dsi_client_ops;
 	dsi->client.dev = &pdev->dev;
@@ -1582,22 +1556,12 @@ static int tegra_dsi_probe(struct platform_device *pdev)
 		goto unregister;
 	}
 
-	platform_set_drvdata(pdev, dsi);
-
 	return 0;
 
 unregister:
 	mipi_dsi_host_unregister(&dsi->host);
 mipi_free:
 	tegra_mipi_free(dsi->mipi);
-disable_vdd:
-	regulator_disable(dsi->vdd);
-disable_clk_lp:
-	clk_disable_unprepare(dsi->clk_lp);
-disable_clk:
-	clk_disable_unprepare(dsi->clk);
-reset:
-	reset_control_assert(dsi->rst);
 	return err;
 }
 
@@ -1606,6 +1570,8 @@ static int tegra_dsi_remove(struct platform_device *pdev)
 	struct tegra_dsi *dsi = platform_get_drvdata(pdev);
 	int err;
 
+	pm_runtime_disable(&pdev->dev);
+
 	err = host1x_client_unregister(&dsi->client);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
@@ -1618,14 +1584,82 @@ static int tegra_dsi_remove(struct platform_device *pdev)
 	mipi_dsi_host_unregister(&dsi->host);
 	tegra_mipi_free(dsi->mipi);
 
-	regulator_disable(dsi->vdd);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int tegra_dsi_suspend(struct device *dev)
+{
+	struct tegra_dsi *dsi = dev_get_drvdata(dev);
+	int err;
+
+	if (dsi->rst) {
+		err = reset_control_assert(dsi->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to assert reset: %d\n", err);
+			return err;
+		}
+	}
+
+	usleep_range(1000, 2000);
+
 	clk_disable_unprepare(dsi->clk_lp);
 	clk_disable_unprepare(dsi->clk);
-	reset_control_assert(dsi->rst);
+
+	regulator_disable(dsi->vdd);
 
 	return 0;
 }
 
+static int tegra_dsi_resume(struct device *dev)
+{
+	struct tegra_dsi *dsi = dev_get_drvdata(dev);
+	int err;
+
+	err = regulator_enable(dsi->vdd);
+	if (err < 0) {
+		dev_err(dsi->dev, "failed to enable VDD supply: %d\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(dsi->clk);
+	if (err < 0) {
+		dev_err(dev, "cannot enable DSI clock: %d\n", err);
+		goto disable_vdd;
+	}
+
+	err = clk_prepare_enable(dsi->clk_lp);
+	if (err < 0) {
+		dev_err(dev, "cannot enable low-power clock: %d\n", err);
+		goto disable_clk;
+	}
+
+	usleep_range(1000, 2000);
+
+	if (dsi->rst) {
+		err = reset_control_deassert(dsi->rst);
+		if (err < 0) {
+			dev_err(dev, "cannot assert reset: %d\n", err);
+			goto disable_clk_lp;
+		}
+	}
+
+	return 0;
+
+disable_clk_lp:
+	clk_disable_unprepare(dsi->clk_lp);
+disable_clk:
+	clk_disable_unprepare(dsi->clk);
+disable_vdd:
+	regulator_disable(dsi->vdd);
+	return err;
+}
+#endif
+
+static const struct dev_pm_ops tegra_dsi_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_dsi_suspend, tegra_dsi_resume, NULL)
+};
+
 static const struct of_device_id tegra_dsi_of_match[] = {
 	{ .compatible = "nvidia,tegra210-dsi", },
 	{ .compatible = "nvidia,tegra132-dsi", },
@@ -1639,6 +1673,7 @@ struct platform_driver tegra_dsi_driver = {
 	.driver = {
 		.name = "tegra-dsi",
 		.of_match_table = tegra_dsi_of_match,
+		.pm = &tegra_dsi_pm_ops,
 	},
 	.probe = tegra_dsi_probe,
 	.remove = tegra_dsi_remove,
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index 1b12aa7a715e..e6d71fa4028e 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -68,7 +68,7 @@ static void tegra_fb_destroy(struct drm_framebuffer *framebuffer)
 		struct tegra_bo *bo = fb->planes[i];
 
 		if (bo) {
-			if (bo->pages && bo->vaddr)
+			if (bo->pages)
 				vunmap(bo->vaddr);
 
 			drm_gem_object_unreference_unlocked(&bo->gem);
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index 2fdb8796443e..cda0491ed6bf 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -11,6 +11,7 @@
 #include <linux/debugfs.h>
 #include <linux/gpio.h>
 #include <linux/hdmi.h>
+#include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 
@@ -18,10 +19,14 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
 
+#include <sound/hda_verbs.h>
+
 #include "hdmi.h"
 #include "drm.h"
 #include "dc.h"
 
+#define HDMI_ELD_BUFFER_SIZE 96
+
 struct tmds_config {
 	unsigned int pclk;
 	u32 pll0;
@@ -39,6 +44,8 @@ struct tegra_hdmi_config {
 	u32 fuse_override_value;
 
 	bool has_sor_io_peak_current;
+	bool has_hda;
+	bool has_hbr;
 };
 
 struct tegra_hdmi {
@@ -60,7 +67,10 @@ struct tegra_hdmi {
 	const struct tegra_hdmi_config *config;
 
 	unsigned int audio_source;
-	unsigned int audio_freq;
+	unsigned int audio_sample_rate;
+	unsigned int audio_channels;
+
+	unsigned int pixel_clock;
 	bool stereo;
 	bool dvi;
 
@@ -402,11 +412,11 @@ static const struct tmds_config tegra124_tmds_config[] = {
 };
 
 static const struct tegra_hdmi_audio_config *
-tegra_hdmi_get_audio_config(unsigned int audio_freq, unsigned int pclk)
+tegra_hdmi_get_audio_config(unsigned int sample_rate, unsigned int pclk)
 {
 	const struct tegra_hdmi_audio_config *table;
 
-	switch (audio_freq) {
+	switch (sample_rate) {
 	case 32000:
 		table = tegra_hdmi_audio_32k;
 		break;
@@ -476,44 +486,114 @@ static void tegra_hdmi_setup_audio_fs_tables(struct tegra_hdmi *hdmi)
 	}
 }
 
-static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi, unsigned int pclk)
+static void tegra_hdmi_write_aval(struct tegra_hdmi *hdmi, u32 value)
+{
+	static const struct {
+		unsigned int sample_rate;
+		unsigned int offset;
+	} regs[] = {
+		{  32000, HDMI_NV_PDISP_SOR_AUDIO_AVAL_0320 },
+		{  44100, HDMI_NV_PDISP_SOR_AUDIO_AVAL_0441 },
+		{  48000, HDMI_NV_PDISP_SOR_AUDIO_AVAL_0480 },
+		{  88200, HDMI_NV_PDISP_SOR_AUDIO_AVAL_0882 },
+		{  96000, HDMI_NV_PDISP_SOR_AUDIO_AVAL_0960 },
+		{ 176400, HDMI_NV_PDISP_SOR_AUDIO_AVAL_1764 },
+		{ 192000, HDMI_NV_PDISP_SOR_AUDIO_AVAL_1920 },
+	};
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(regs); i++) {
+		if (regs[i].sample_rate == hdmi->audio_sample_rate) {
+			tegra_hdmi_writel(hdmi, value, regs[i].offset);
+			break;
+		}
+	}
+}
+
+static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi)
 {
-	struct device_node *node = hdmi->dev->of_node;
 	const struct tegra_hdmi_audio_config *config;
-	unsigned int offset = 0;
-	u32 value;
+	u32 source, value;
 
 	switch (hdmi->audio_source) {
 	case HDA:
-		value = AUDIO_CNTRL0_SOURCE_SELECT_HDAL;
+		if (hdmi->config->has_hda)
+			source = SOR_AUDIO_CNTRL0_SOURCE_SELECT_HDAL;
+		else
+			return -EINVAL;
+
 		break;
 
 	case SPDIF:
-		value = AUDIO_CNTRL0_SOURCE_SELECT_SPDIF;
+		if (hdmi->config->has_hda)
+			source = SOR_AUDIO_CNTRL0_SOURCE_SELECT_SPDIF;
+		else
+			source = AUDIO_CNTRL0_SOURCE_SELECT_SPDIF;
 		break;
 
 	default:
-		value = AUDIO_CNTRL0_SOURCE_SELECT_AUTO;
+		if (hdmi->config->has_hda)
+			source = SOR_AUDIO_CNTRL0_SOURCE_SELECT_AUTO;
+		else
+			source = AUDIO_CNTRL0_SOURCE_SELECT_AUTO;
 		break;
 	}
 
-	if (of_device_is_compatible(node, "nvidia,tegra30-hdmi")) {
-		value |= AUDIO_CNTRL0_ERROR_TOLERANCE(6) |
-			 AUDIO_CNTRL0_FRAMES_PER_BLOCK(0xc0);
-		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_CNTRL0);
-	} else {
-		value |= AUDIO_CNTRL0_INJECT_NULLSMPL;
+	/*
+	 * Tegra30 and later use a slightly modified version of the register
+	 * layout to accomodate for changes related to supporting HDA as the
+	 * audio input source for HDMI. The source select field has moved to
+	 * the SOR_AUDIO_CNTRL0 register, but the error tolerance and frames
+	 * per block fields remain in the AUDIO_CNTRL0 register.
+	 */
+	if (hdmi->config->has_hda) {
+		/*
+		 * Inject null samples into the audio FIFO for every frame in
+		 * which the codec did not receive any samples. This applies
+		 * to stereo LPCM only.
+		 *
+		 * XXX: This seems to be a remnant of MCP days when this was
+		 * used to work around issues with monitors not being able to
+		 * play back system startup sounds early. It is possibly not
+		 * needed on Linux at all.
+		 */
+		if (hdmi->audio_channels == 2)
+			value = SOR_AUDIO_CNTRL0_INJECT_NULLSMPL;
+		else
+			value = 0;
+
+		value |= source;
+
 		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_AUDIO_CNTRL0);
+	}
 
-		value = AUDIO_CNTRL0_ERROR_TOLERANCE(6) |
-			AUDIO_CNTRL0_FRAMES_PER_BLOCK(0xc0);
-		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_CNTRL0);
+	/*
+	 * On Tegra20, HDA is not a supported audio source and the source
+	 * select field is part of the AUDIO_CNTRL0 register.
+	 */
+	value = AUDIO_CNTRL0_FRAMES_PER_BLOCK(0xc0) |
+		AUDIO_CNTRL0_ERROR_TOLERANCE(6);
+
+	if (!hdmi->config->has_hda)
+		value |= source;
+
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_CNTRL0);
+
+	/*
+	 * Advertise support for High Bit-Rate on Tegra114 and later.
+	 */
+	if (hdmi->config->has_hbr) {
+		value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_SOR_AUDIO_SPARE0);
+		value |= SOR_AUDIO_SPARE0_HBR_ENABLE;
+		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_AUDIO_SPARE0);
 	}
 
-	config = tegra_hdmi_get_audio_config(hdmi->audio_freq, pclk);
+	config = tegra_hdmi_get_audio_config(hdmi->audio_sample_rate,
+					     hdmi->pixel_clock);
 	if (!config) {
-		dev_err(hdmi->dev, "cannot set audio to %u at %u pclk\n",
-			hdmi->audio_freq, pclk);
+		dev_err(hdmi->dev,
+			"cannot set audio to %u Hz at %u Hz pixel clock\n",
+			hdmi->audio_sample_rate, hdmi->pixel_clock);
 		return -EINVAL;
 	}
 
@@ -526,8 +606,8 @@ static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi, unsigned int pclk)
 	tegra_hdmi_writel(hdmi, ACR_SUBPACK_N(config->n) | ACR_ENABLE,
 			  HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_HIGH);
 
-	value = ACR_SUBPACK_CTS(config->cts);
-	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_LOW);
+	tegra_hdmi_writel(hdmi, ACR_SUBPACK_CTS(config->cts),
+			  HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_LOW);
 
 	value = SPARE_HW_CTS | SPARE_FORCE_SW_CTS | SPARE_CTS_RESET_VAL(1);
 	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_SPARE);
@@ -536,43 +616,53 @@ static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi, unsigned int pclk)
 	value &= ~AUDIO_N_RESETF;
 	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_N);
 
-	if (of_device_is_compatible(node, "nvidia,tegra30-hdmi")) {
-		switch (hdmi->audio_freq) {
-		case 32000:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0320;
-			break;
+	if (hdmi->config->has_hda)
+		tegra_hdmi_write_aval(hdmi, config->aval);
 
-		case 44100:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0441;
-			break;
+	tegra_hdmi_setup_audio_fs_tables(hdmi);
 
-		case 48000:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0480;
-			break;
+	return 0;
+}
 
-		case 88200:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0882;
-			break;
+static void tegra_hdmi_disable_audio(struct tegra_hdmi *hdmi)
+{
+	u32 value;
 
-		case 96000:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0960;
-			break;
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+	value &= ~GENERIC_CTRL_AUDIO;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+}
 
-		case 176400:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_1764;
-			break;
+static void tegra_hdmi_enable_audio(struct tegra_hdmi *hdmi)
+{
+	u32 value;
 
-		case 192000:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_1920;
-			break;
-		}
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+	value |= GENERIC_CTRL_AUDIO;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+}
 
-		tegra_hdmi_writel(hdmi, config->aval, offset);
-	}
+static void tegra_hdmi_write_eld(struct tegra_hdmi *hdmi)
+{
+	size_t length = drm_eld_size(hdmi->output.connector.eld), i;
+	u32 value;
 
-	tegra_hdmi_setup_audio_fs_tables(hdmi);
+	for (i = 0; i < length; i++)
+		tegra_hdmi_writel(hdmi, i << 8 | hdmi->output.connector.eld[i],
+				  HDMI_NV_PDISP_SOR_AUDIO_HDA_ELD_BUFWR);
 
-	return 0;
+	/*
+	 * The HDA codec will always report an ELD buffer size of 96 bytes and
+	 * the HDA codec driver will check that each byte read from the buffer
+	 * is valid. Therefore every byte must be written, even if no 96 bytes
+	 * were parsed from EDID.
+	 */
+	for (i = length; i < HDMI_ELD_BUFFER_SIZE; i++)
+		tegra_hdmi_writel(hdmi, i << 8 | 0,
+				  HDMI_NV_PDISP_SOR_AUDIO_HDA_ELD_BUFWR);
+
+	value = SOR_AUDIO_HDA_PRESENSE_VALID | SOR_AUDIO_HDA_PRESENSE_PRESENT;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_AUDIO_HDA_PRESENSE);
 }
 
 static inline u32 tegra_hdmi_subpack(const u8 *ptr, size_t size)
@@ -644,12 +734,6 @@ static void tegra_hdmi_setup_avi_infoframe(struct tegra_hdmi *hdmi,
 	u8 buffer[17];
 	ssize_t err;
 
-	if (hdmi->dvi) {
-		tegra_hdmi_writel(hdmi, 0,
-				  HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
-		return;
-	}
-
 	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode);
 	if (err < 0) {
 		dev_err(hdmi->dev, "failed to setup AVI infoframe: %zd\n", err);
@@ -663,9 +747,24 @@ static void tegra_hdmi_setup_avi_infoframe(struct tegra_hdmi *hdmi,
 	}
 
 	tegra_hdmi_write_infopack(hdmi, buffer, err);
+}
+
+static void tegra_hdmi_disable_avi_infoframe(struct tegra_hdmi *hdmi)
+{
+	u32 value;
 
-	tegra_hdmi_writel(hdmi, INFOFRAME_CTRL_ENABLE,
-			  HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
+	value &= ~INFOFRAME_CTRL_ENABLE;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
+}
+
+static void tegra_hdmi_enable_avi_infoframe(struct tegra_hdmi *hdmi)
+{
+	u32 value;
+
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
+	value |= INFOFRAME_CTRL_ENABLE;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
 }
 
 static void tegra_hdmi_setup_audio_infoframe(struct tegra_hdmi *hdmi)
@@ -674,12 +773,6 @@ static void tegra_hdmi_setup_audio_infoframe(struct tegra_hdmi *hdmi)
 	u8 buffer[14];
 	ssize_t err;
 
-	if (hdmi->dvi) {
-		tegra_hdmi_writel(hdmi, 0,
-				  HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
-		return;
-	}
-
 	err = hdmi_audio_infoframe_init(&frame);
 	if (err < 0) {
 		dev_err(hdmi->dev, "failed to setup audio infoframe: %zd\n",
@@ -687,7 +780,7 @@ static void tegra_hdmi_setup_audio_infoframe(struct tegra_hdmi *hdmi)
 		return;
 	}
 
-	frame.channels = 2;
+	frame.channels = hdmi->audio_channels;
 
 	err = hdmi_audio_infoframe_pack(&frame, buffer, sizeof(buffer));
 	if (err < 0) {
@@ -703,9 +796,24 @@ static void tegra_hdmi_setup_audio_infoframe(struct tegra_hdmi *hdmi)
 	 * bytes can be programmed.
 	 */
 	tegra_hdmi_write_infopack(hdmi, buffer, min_t(size_t, 10, err));
+}
 
-	tegra_hdmi_writel(hdmi, INFOFRAME_CTRL_ENABLE,
-			  HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
+static void tegra_hdmi_disable_audio_infoframe(struct tegra_hdmi *hdmi)
+{
+	u32 value;
+
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
+	value &= ~INFOFRAME_CTRL_ENABLE;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
+}
+
+static void tegra_hdmi_enable_audio_infoframe(struct tegra_hdmi *hdmi)
+{
+	u32 value;
+
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
+	value |= INFOFRAME_CTRL_ENABLE;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
 }
 
 static void tegra_hdmi_setup_stereo_infoframe(struct tegra_hdmi *hdmi)
@@ -713,14 +821,6 @@ static void tegra_hdmi_setup_stereo_infoframe(struct tegra_hdmi *hdmi)
 	struct hdmi_vendor_infoframe frame;
 	u8 buffer[10];
 	ssize_t err;
-	u32 value;
-
-	if (!hdmi->stereo) {
-		value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
-		value &= ~GENERIC_CTRL_ENABLE;
-		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
-		return;
-	}
 
 	hdmi_vendor_infoframe_init(&frame);
 	frame.s3d_struct = HDMI_3D_STRUCTURE_FRAME_PACKING;
@@ -733,6 +833,20 @@ static void tegra_hdmi_setup_stereo_infoframe(struct tegra_hdmi *hdmi)
 	}
 
 	tegra_hdmi_write_infopack(hdmi, buffer, err);
+}
+
+static void tegra_hdmi_disable_stereo_infoframe(struct tegra_hdmi *hdmi)
+{
+	u32 value;
+
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+	value &= ~GENERIC_CTRL_ENABLE;
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+}
+
+static void tegra_hdmi_enable_stereo_infoframe(struct tegra_hdmi *hdmi)
+{
+	u32 value;
 
 	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
 	value |= GENERIC_CTRL_ENABLE;
@@ -772,10 +886,25 @@ static bool tegra_output_is_hdmi(struct tegra_output *output)
 	return drm_detect_hdmi_monitor(edid);
 }
 
+static enum drm_connector_status
+tegra_hdmi_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct tegra_output *output = connector_to_output(connector);
+	struct tegra_hdmi *hdmi = to_hdmi(output);
+	enum drm_connector_status status;
+
+	status = tegra_output_connector_detect(connector, force);
+	if (status == connector_status_connected)
+		return status;
+
+	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_SOR_AUDIO_HDA_PRESENSE);
+	return status;
+}
+
 static const struct drm_connector_funcs tegra_hdmi_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
 	.reset = drm_atomic_helper_connector_reset,
-	.detect = tegra_output_connector_detect,
+	.detect = tegra_hdmi_connector_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.destroy = tegra_output_connector_destroy,
 	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
@@ -814,7 +943,9 @@ static const struct drm_encoder_funcs tegra_hdmi_encoder_funcs = {
 
 static void tegra_hdmi_encoder_disable(struct drm_encoder *encoder)
 {
+	struct tegra_output *output = encoder_to_output(encoder);
 	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
+	struct tegra_hdmi *hdmi = to_hdmi(output);
 	u32 value;
 
 	/*
@@ -828,6 +959,20 @@ static void tegra_hdmi_encoder_disable(struct drm_encoder *encoder)
 
 		tegra_dc_commit(dc);
 	}
+
+	if (!hdmi->dvi) {
+		if (hdmi->stereo)
+			tegra_hdmi_disable_stereo_infoframe(hdmi);
+
+		tegra_hdmi_disable_audio_infoframe(hdmi);
+		tegra_hdmi_disable_avi_infoframe(hdmi);
+		tegra_hdmi_disable_audio(hdmi);
+	}
+
+	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_INT_ENABLE);
+	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_INT_MASK);
+
+	pm_runtime_put(hdmi->dev);
 }
 
 static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
@@ -836,21 +981,28 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
 	unsigned int h_sync_width, h_front_porch, h_back_porch, i, rekey;
 	struct tegra_output *output = encoder_to_output(encoder);
 	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
-	struct device_node *node = output->dev->of_node;
 	struct tegra_hdmi *hdmi = to_hdmi(output);
-	unsigned int pulse_start, div82, pclk;
+	unsigned int pulse_start, div82;
 	int retries = 1000;
 	u32 value;
 	int err;
 
-	hdmi->dvi = !tegra_output_is_hdmi(output);
+	pm_runtime_get_sync(hdmi->dev);
 
-	pclk = mode->clock * 1000;
+	/*
+	 * Enable and unmask the HDA codec SCRATCH0 register interrupt. This
+	 * is used for interoperability between the HDA codec driver and the
+	 * HDMI driver.
+	 */
+	tegra_hdmi_writel(hdmi, INT_CODEC_SCRATCH0, HDMI_NV_PDISP_INT_ENABLE);
+	tegra_hdmi_writel(hdmi, INT_CODEC_SCRATCH0, HDMI_NV_PDISP_INT_MASK);
+
+	hdmi->pixel_clock = mode->clock * 1000;
 	h_sync_width = mode->hsync_end - mode->hsync_start;
 	h_back_porch = mode->htotal - mode->hsync_end;
 	h_front_porch = mode->hsync_start - mode->hdisplay;
 
-	err = clk_set_rate(hdmi->clk, pclk);
+	err = clk_set_rate(hdmi->clk, hdmi->pixel_clock);
 	if (err < 0) {
 		dev_err(hdmi->dev, "failed to set HDMI clock frequency: %d\n",
 			err);
@@ -909,17 +1061,15 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
 	value = SOR_REFCLK_DIV_INT(div82 >> 2) | SOR_REFCLK_DIV_FRAC(div82);
 	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_REFCLK);
 
+	hdmi->dvi = !tegra_output_is_hdmi(output);
 	if (!hdmi->dvi) {
-		err = tegra_hdmi_setup_audio(hdmi, pclk);
+		err = tegra_hdmi_setup_audio(hdmi);
 		if (err < 0)
 			hdmi->dvi = true;
 	}
 
-	if (of_device_is_compatible(node, "nvidia,tegra20-hdmi")) {
-		/*
-		 * TODO: add ELD support
-		 */
-	}
+	if (hdmi->config->has_hda)
+		tegra_hdmi_write_eld(hdmi);
 
 	rekey = HDMI_REKEY_DEFAULT;
 	value = HDMI_CTRL_REKEY(rekey);
@@ -931,20 +1081,17 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
 
 	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_CTRL);
 
-	if (hdmi->dvi)
-		tegra_hdmi_writel(hdmi, 0x0,
-				  HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
-	else
-		tegra_hdmi_writel(hdmi, GENERIC_CTRL_AUDIO,
-				  HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
+	if (!hdmi->dvi) {
+		tegra_hdmi_setup_avi_infoframe(hdmi, mode);
+		tegra_hdmi_setup_audio_infoframe(hdmi);
 
-	tegra_hdmi_setup_avi_infoframe(hdmi, mode);
-	tegra_hdmi_setup_audio_infoframe(hdmi);
-	tegra_hdmi_setup_stereo_infoframe(hdmi);
+		if (hdmi->stereo)
+			tegra_hdmi_setup_stereo_infoframe(hdmi);
+	}
 
 	/* TMDS CONFIG */
 	for (i = 0; i < hdmi->config->num_tmds; i++) {
-		if (pclk <= hdmi->config->tmds[i].pclk) {
+		if (hdmi->pixel_clock <= hdmi->config->tmds[i].pclk) {
 			tegra_hdmi_setup_tmds(hdmi, &hdmi->config->tmds[i]);
 			break;
 		}
@@ -1031,6 +1178,15 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
 
 	tegra_dc_commit(dc);
 
+	if (!hdmi->dvi) {
+		tegra_hdmi_enable_avi_infoframe(hdmi);
+		tegra_hdmi_enable_audio_infoframe(hdmi);
+		tegra_hdmi_enable_audio(hdmi);
+
+		if (hdmi->stereo)
+			tegra_hdmi_enable_stereo_infoframe(hdmi);
+	}
+
 	/* TODO: add HDCP support */
 }
 
@@ -1235,8 +1391,14 @@ static int tegra_hdmi_show_regs(struct seq_file *s, void *data)
 	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_TRIG);
 	DUMP_REG(HDMI_NV_PDISP_KEY_SKEY_INDEX);
 	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_CNTRL0);
+	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_SPARE0);
+	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_HDA_CODEC_SCRATCH0);
+	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_HDA_CODEC_SCRATCH1);
 	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_HDA_ELD_BUFWR);
 	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_HDA_PRESENSE);
+	DUMP_REG(HDMI_NV_PDISP_INT_STATUS);
+	DUMP_REG(HDMI_NV_PDISP_INT_MASK);
+	DUMP_REG(HDMI_NV_PDISP_INT_ENABLE);
 	DUMP_REG(HDMI_NV_PDISP_SOR_IO_PEAK_CURRENT);
 
 #undef DUMP_REG
@@ -1360,14 +1522,6 @@ static int tegra_hdmi_init(struct host1x_client *client)
 		return err;
 	}
 
-	err = clk_prepare_enable(hdmi->clk);
-	if (err < 0) {
-		dev_err(hdmi->dev, "failed to enable clock: %d\n", err);
-		return err;
-	}
-
-	reset_control_deassert(hdmi->rst);
-
 	return 0;
 }
 
@@ -1377,9 +1531,6 @@ static int tegra_hdmi_exit(struct host1x_client *client)
 
 	tegra_output_exit(&hdmi->output);
 
-	reset_control_assert(hdmi->rst);
-	clk_disable_unprepare(hdmi->clk);
-
 	regulator_disable(hdmi->vdd);
 	regulator_disable(hdmi->pll);
 	regulator_disable(hdmi->hdmi);
@@ -1401,6 +1552,8 @@ static const struct tegra_hdmi_config tegra20_hdmi_config = {
 	.fuse_override_offset = HDMI_NV_PDISP_SOR_LANE_DRIVE_CURRENT,
 	.fuse_override_value = 1 << 31,
 	.has_sor_io_peak_current = false,
+	.has_hda = false,
+	.has_hbr = false,
 };
 
 static const struct tegra_hdmi_config tegra30_hdmi_config = {
@@ -1409,6 +1562,8 @@ static const struct tegra_hdmi_config tegra30_hdmi_config = {
 	.fuse_override_offset = HDMI_NV_PDISP_SOR_LANE_DRIVE_CURRENT,
 	.fuse_override_value = 1 << 31,
 	.has_sor_io_peak_current = false,
+	.has_hda = true,
+	.has_hbr = false,
 };
 
 static const struct tegra_hdmi_config tegra114_hdmi_config = {
@@ -1417,6 +1572,8 @@ static const struct tegra_hdmi_config tegra114_hdmi_config = {
 	.fuse_override_offset = HDMI_NV_PDISP_SOR_PAD_CTLS0,
 	.fuse_override_value = 1 << 31,
 	.has_sor_io_peak_current = true,
+	.has_hda = true,
+	.has_hbr = true,
 };
 
 static const struct tegra_hdmi_config tegra124_hdmi_config = {
@@ -1425,6 +1582,8 @@ static const struct tegra_hdmi_config tegra124_hdmi_config = {
 	.fuse_override_offset = HDMI_NV_PDISP_SOR_PAD_CTLS0,
 	.fuse_override_value = 1 << 31,
 	.has_sor_io_peak_current = true,
+	.has_hda = true,
+	.has_hbr = true,
 };
 
 static const struct of_device_id tegra_hdmi_of_match[] = {
@@ -1436,6 +1595,67 @@ static const struct of_device_id tegra_hdmi_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, tegra_hdmi_of_match);
 
+static void hda_format_parse(unsigned int format, unsigned int *rate,
+			     unsigned int *channels)
+{
+	unsigned int mul, div;
+
+	if (format & AC_FMT_BASE_44K)
+		*rate = 44100;
+	else
+		*rate = 48000;
+
+	mul = (format & AC_FMT_MULT_MASK) >> AC_FMT_MULT_SHIFT;
+	div = (format & AC_FMT_DIV_MASK) >> AC_FMT_DIV_SHIFT;
+
+	*rate = *rate * (mul + 1) / (div + 1);
+
+	*channels = (format & AC_FMT_CHAN_MASK) >> AC_FMT_CHAN_SHIFT;
+}
+
+static irqreturn_t tegra_hdmi_irq(int irq, void *data)
+{
+	struct tegra_hdmi *hdmi = data;
+	u32 value;
+	int err;
+
+	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_INT_STATUS);
+	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_INT_STATUS);
+
+	if (value & INT_CODEC_SCRATCH0) {
+		unsigned int format;
+		u32 value;
+
+		value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_SOR_AUDIO_HDA_CODEC_SCRATCH0);
+
+		if (value & SOR_AUDIO_HDA_CODEC_SCRATCH0_VALID) {
+			unsigned int sample_rate, channels;
+
+			format = value & SOR_AUDIO_HDA_CODEC_SCRATCH0_FMT_MASK;
+
+			hda_format_parse(format, &sample_rate, &channels);
+
+			hdmi->audio_sample_rate = sample_rate;
+			hdmi->audio_channels = channels;
+
+			err = tegra_hdmi_setup_audio(hdmi);
+			if (err < 0) {
+				tegra_hdmi_disable_audio_infoframe(hdmi);
+				tegra_hdmi_disable_audio(hdmi);
+			} else {
+				tegra_hdmi_setup_audio_infoframe(hdmi);
+				tegra_hdmi_enable_audio_infoframe(hdmi);
+				tegra_hdmi_enable_audio(hdmi);
+			}
+		} else {
+			tegra_hdmi_disable_audio_infoframe(hdmi);
+			tegra_hdmi_disable_audio(hdmi);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
 static int tegra_hdmi_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *match;
@@ -1453,8 +1673,10 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
 
 	hdmi->config = match->data;
 	hdmi->dev = &pdev->dev;
+
 	hdmi->audio_source = AUTO;
-	hdmi->audio_freq = 44100;
+	hdmi->audio_sample_rate = 48000;
+	hdmi->audio_channels = 2;
 	hdmi->stereo = false;
 	hdmi->dvi = false;
 
@@ -1515,6 +1737,17 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
 
 	hdmi->irq = err;
 
+	err = devm_request_irq(hdmi->dev, hdmi->irq, tegra_hdmi_irq, 0,
+			       dev_name(hdmi->dev), hdmi);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to request IRQ#%u: %d\n",
+			hdmi->irq, err);
+		return err;
+	}
+
+	platform_set_drvdata(pdev, hdmi);
+	pm_runtime_enable(&pdev->dev);
+
 	INIT_LIST_HEAD(&hdmi->client.list);
 	hdmi->client.ops = &hdmi_client_ops;
 	hdmi->client.dev = &pdev->dev;
@@ -1526,8 +1759,6 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	platform_set_drvdata(pdev, hdmi);
-
 	return 0;
 }
 
@@ -1536,6 +1767,8 @@ static int tegra_hdmi_remove(struct platform_device *pdev)
 	struct tegra_hdmi *hdmi = platform_get_drvdata(pdev);
 	int err;
 
+	pm_runtime_disable(&pdev->dev);
+
 	err = host1x_client_unregister(&hdmi->client);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
@@ -1545,17 +1778,61 @@ static int tegra_hdmi_remove(struct platform_device *pdev)
 
 	tegra_output_remove(&hdmi->output);
 
-	clk_disable_unprepare(hdmi->clk_parent);
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int tegra_hdmi_suspend(struct device *dev)
+{
+	struct tegra_hdmi *hdmi = dev_get_drvdata(dev);
+	int err;
+
+	err = reset_control_assert(hdmi->rst);
+	if (err < 0) {
+		dev_err(dev, "failed to assert reset: %d\n", err);
+		return err;
+	}
+
+	usleep_range(1000, 2000);
+
 	clk_disable_unprepare(hdmi->clk);
 
 	return 0;
 }
 
+static int tegra_hdmi_resume(struct device *dev)
+{
+	struct tegra_hdmi *hdmi = dev_get_drvdata(dev);
+	int err;
+
+	err = clk_prepare_enable(hdmi->clk);
+	if (err < 0) {
+		dev_err(dev, "failed to enable clock: %d\n", err);
+		return err;
+	}
+
+	usleep_range(1000, 2000);
+
+	err = reset_control_deassert(hdmi->rst);
+	if (err < 0) {
+		dev_err(dev, "failed to deassert reset: %d\n", err);
+		clk_disable_unprepare(hdmi->clk);
+		return err;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops tegra_hdmi_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_hdmi_suspend, tegra_hdmi_resume, NULL)
+};
+
 struct platform_driver tegra_hdmi_driver = {
 	.driver = {
 		.name = "tegra-hdmi",
-		.owner = THIS_MODULE,
 		.of_match_table = tegra_hdmi_of_match,
+		.pm = &tegra_hdmi_pm_ops,
 	},
 	.probe = tegra_hdmi_probe,
 	.remove = tegra_hdmi_remove,
diff --git a/drivers/gpu/drm/tegra/hdmi.h b/drivers/gpu/drm/tegra/hdmi.h
index a882514389cd..2339f134a09a 100644
--- a/drivers/gpu/drm/tegra/hdmi.h
+++ b/drivers/gpu/drm/tegra/hdmi.h
@@ -468,9 +468,20 @@
 #define HDMI_NV_PDISP_KEY_SKEY_INDEX				0xa3
 
 #define HDMI_NV_PDISP_SOR_AUDIO_CNTRL0				0xac
-#define AUDIO_CNTRL0_INJECT_NULLSMPL (1 << 29)
+#define  SOR_AUDIO_CNTRL0_SOURCE_SELECT_AUTO	(0 << 20)
+#define  SOR_AUDIO_CNTRL0_SOURCE_SELECT_SPDIF	(1 << 20)
+#define  SOR_AUDIO_CNTRL0_SOURCE_SELECT_HDAL	(2 << 20)
+#define  SOR_AUDIO_CNTRL0_INJECT_NULLSMPL	(1 << 29)
+#define HDMI_NV_PDISP_SOR_AUDIO_SPARE0				0xae
+#define  SOR_AUDIO_SPARE0_HBR_ENABLE		(1 << 27)
+#define HDMI_NV_PDISP_SOR_AUDIO_HDA_CODEC_SCRATCH0		0xba
+#define  SOR_AUDIO_HDA_CODEC_SCRATCH0_VALID	(1 << 30)
+#define  SOR_AUDIO_HDA_CODEC_SCRATCH0_FMT_MASK	0xffff
+#define HDMI_NV_PDISP_SOR_AUDIO_HDA_CODEC_SCRATCH1		0xbb
 #define HDMI_NV_PDISP_SOR_AUDIO_HDA_ELD_BUFWR			0xbc
 #define HDMI_NV_PDISP_SOR_AUDIO_HDA_PRESENSE			0xbd
+#define  SOR_AUDIO_HDA_PRESENSE_VALID		(1 << 1)
+#define  SOR_AUDIO_HDA_PRESENSE_PRESENT		(1 << 0)
 
 #define HDMI_NV_PDISP_SOR_AUDIO_AVAL_0320    0xbf
 #define HDMI_NV_PDISP_SOR_AUDIO_AVAL_0441    0xc0
@@ -481,6 +492,14 @@
 #define HDMI_NV_PDISP_SOR_AUDIO_AVAL_1920    0xc5
 #define HDMI_NV_PDISP_SOR_AUDIO_AVAL_DEFAULT 0xc5
 
+#define HDMI_NV_PDISP_INT_STATUS			0xcc
+#define  INT_SCRATCH		(1 << 3)
+#define  INT_CP_REQUEST		(1 << 2)
+#define  INT_CODEC_SCRATCH1	(1 << 1)
+#define  INT_CODEC_SCRATCH0	(1 << 0)
+#define HDMI_NV_PDISP_INT_MASK				0xcd
+#define HDMI_NV_PDISP_INT_ENABLE			0xce
+
 #define HDMI_NV_PDISP_SOR_IO_PEAK_CURRENT		0xd1
 #define PEAK_CURRENT_LANE0(x) (((x) & 0x7f) <<  0)
 #define PEAK_CURRENT_LANE1(x) (((x) & 0x7f) <<  8)
diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c
index 1480f6aaffe4..595d1ec3e02e 100644
--- a/drivers/gpu/drm/tegra/output.c
+++ b/drivers/gpu/drm/tegra/output.c
@@ -36,6 +36,7 @@ int tegra_output_connector_get_modes(struct drm_connector *connector)
 
 	if (edid) {
 		err = drm_add_edid_modes(connector, edid);
+		drm_edid_to_eld(connector, edid);
 		kfree(edid);
 	}
 
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index 34958d71284b..74d0540b8d4c 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -7,11 +7,13 @@
  */
 
 #include <linux/clk.h>
+#include <linux/clk-provider.h>
 #include <linux/debugfs.h>
 #include <linux/gpio.h>
 #include <linux/io.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 
@@ -149,6 +151,8 @@ struct tegra_sor_soc {
 
 	const struct tegra_sor_hdmi_settings *settings;
 	unsigned int num_settings;
+
+	const u8 *xbar_cfg;
 };
 
 struct tegra_sor;
@@ -169,7 +173,9 @@ struct tegra_sor {
 
 	struct reset_control *rst;
 	struct clk *clk_parent;
+	struct clk *clk_brick;
 	struct clk *clk_safe;
+	struct clk *clk_src;
 	struct clk *clk_dp;
 	struct clk *clk;
 
@@ -190,6 +196,18 @@ struct tegra_sor {
 	struct regulator *hdmi_supply;
 };
 
+struct tegra_sor_state {
+	struct drm_connector_state base;
+
+	unsigned int bpc;
+};
+
+static inline struct tegra_sor_state *
+to_sor_state(struct drm_connector_state *state)
+{
+	return container_of(state, struct tegra_sor_state, base);
+}
+
 struct tegra_sor_config {
 	u32 bits_per_pixel;
 
@@ -225,6 +243,118 @@ static inline void tegra_sor_writel(struct tegra_sor *sor, u32 value,
 	writel(value, sor->regs + (offset << 2));
 }
 
+static int tegra_sor_set_parent_clock(struct tegra_sor *sor, struct clk *parent)
+{
+	int err;
+
+	clk_disable_unprepare(sor->clk);
+
+	err = clk_set_parent(sor->clk, parent);
+	if (err < 0)
+		return err;
+
+	err = clk_prepare_enable(sor->clk);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+struct tegra_clk_sor_brick {
+	struct clk_hw hw;
+	struct tegra_sor *sor;
+};
+
+static inline struct tegra_clk_sor_brick *to_brick(struct clk_hw *hw)
+{
+	return container_of(hw, struct tegra_clk_sor_brick, hw);
+}
+
+static const char * const tegra_clk_sor_brick_parents[] = {
+	"pll_d2_out0", "pll_dp"
+};
+
+static int tegra_clk_sor_brick_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct tegra_clk_sor_brick *brick = to_brick(hw);
+	struct tegra_sor *sor = brick->sor;
+	u32 value;
+
+	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
+	value &= ~SOR_CLK_CNTRL_DP_CLK_SEL_MASK;
+
+	switch (index) {
+	case 0:
+		value |= SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_PCLK;
+		break;
+
+	case 1:
+		value |= SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_DPCLK;
+		break;
+	}
+
+	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
+
+	return 0;
+}
+
+static u8 tegra_clk_sor_brick_get_parent(struct clk_hw *hw)
+{
+	struct tegra_clk_sor_brick *brick = to_brick(hw);
+	struct tegra_sor *sor = brick->sor;
+	u8 parent = U8_MAX;
+	u32 value;
+
+	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
+
+	switch (value & SOR_CLK_CNTRL_DP_CLK_SEL_MASK) {
+	case SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_PCLK:
+	case SOR_CLK_CNTRL_DP_CLK_SEL_DIFF_PCLK:
+		parent = 0;
+		break;
+
+	case SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_DPCLK:
+	case SOR_CLK_CNTRL_DP_CLK_SEL_DIFF_DPCLK:
+		parent = 1;
+		break;
+	}
+
+	return parent;
+}
+
+static const struct clk_ops tegra_clk_sor_brick_ops = {
+	.set_parent = tegra_clk_sor_brick_set_parent,
+	.get_parent = tegra_clk_sor_brick_get_parent,
+};
+
+static struct clk *tegra_clk_sor_brick_register(struct tegra_sor *sor,
+						const char *name)
+{
+	struct tegra_clk_sor_brick *brick;
+	struct clk_init_data init;
+	struct clk *clk;
+
+	brick = devm_kzalloc(sor->dev, sizeof(*brick), GFP_KERNEL);
+	if (!brick)
+		return ERR_PTR(-ENOMEM);
+
+	brick->sor = sor;
+
+	init.name = name;
+	init.flags = 0;
+	init.parent_names = tegra_clk_sor_brick_parents;
+	init.num_parents = ARRAY_SIZE(tegra_clk_sor_brick_parents);
+	init.ops = &tegra_clk_sor_brick_ops;
+
+	brick->hw.init = &init;
+
+	clk = devm_clk_register(sor->dev, &brick->hw);
+	if (IS_ERR(clk))
+		kfree(brick);
+
+	return clk;
+}
+
 static int tegra_sor_dp_train_fast(struct tegra_sor *sor,
 				   struct drm_dp_link *link)
 {
@@ -569,10 +699,10 @@ static int tegra_sor_compute_params(struct tegra_sor *sor,
 	return false;
 }
 
-static int tegra_sor_calc_config(struct tegra_sor *sor,
-				 const struct drm_display_mode *mode,
-				 struct tegra_sor_config *config,
-				 struct drm_dp_link *link)
+static int tegra_sor_compute_config(struct tegra_sor *sor,
+				    const struct drm_display_mode *mode,
+				    struct tegra_sor_config *config,
+				    struct drm_dp_link *link)
 {
 	const u64 f = 100000, link_rate = link->rate * 1000;
 	const u64 pclk = mode->clock * 1000;
@@ -661,6 +791,135 @@ static int tegra_sor_calc_config(struct tegra_sor *sor,
 	return 0;
 }
 
+static void tegra_sor_apply_config(struct tegra_sor *sor,
+				   const struct tegra_sor_config *config)
+{
+	u32 value;
+
+	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
+	value &= ~SOR_DP_LINKCTL_TU_SIZE_MASK;
+	value |= SOR_DP_LINKCTL_TU_SIZE(config->tu_size);
+	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
+
+	value = tegra_sor_readl(sor, SOR_DP_CONFIG0);
+	value &= ~SOR_DP_CONFIG_WATERMARK_MASK;
+	value |= SOR_DP_CONFIG_WATERMARK(config->watermark);
+
+	value &= ~SOR_DP_CONFIG_ACTIVE_SYM_COUNT_MASK;
+	value |= SOR_DP_CONFIG_ACTIVE_SYM_COUNT(config->active_count);
+
+	value &= ~SOR_DP_CONFIG_ACTIVE_SYM_FRAC_MASK;
+	value |= SOR_DP_CONFIG_ACTIVE_SYM_FRAC(config->active_frac);
+
+	if (config->active_polarity)
+		value |= SOR_DP_CONFIG_ACTIVE_SYM_POLARITY;
+	else
+		value &= ~SOR_DP_CONFIG_ACTIVE_SYM_POLARITY;
+
+	value |= SOR_DP_CONFIG_ACTIVE_SYM_ENABLE;
+	value |= SOR_DP_CONFIG_DISPARITY_NEGATIVE;
+	tegra_sor_writel(sor, value, SOR_DP_CONFIG0);
+
+	value = tegra_sor_readl(sor, SOR_DP_AUDIO_HBLANK_SYMBOLS);
+	value &= ~SOR_DP_AUDIO_HBLANK_SYMBOLS_MASK;
+	value |= config->hblank_symbols & 0xffff;
+	tegra_sor_writel(sor, value, SOR_DP_AUDIO_HBLANK_SYMBOLS);
+
+	value = tegra_sor_readl(sor, SOR_DP_AUDIO_VBLANK_SYMBOLS);
+	value &= ~SOR_DP_AUDIO_VBLANK_SYMBOLS_MASK;
+	value |= config->vblank_symbols & 0xffff;
+	tegra_sor_writel(sor, value, SOR_DP_AUDIO_VBLANK_SYMBOLS);
+}
+
+static void tegra_sor_mode_set(struct tegra_sor *sor,
+			       const struct drm_display_mode *mode,
+			       struct tegra_sor_state *state)
+{
+	struct tegra_dc *dc = to_tegra_dc(sor->output.encoder.crtc);
+	unsigned int vbe, vse, hbe, hse, vbs, hbs;
+	u32 value;
+
+	value = tegra_sor_readl(sor, SOR_STATE1);
+	value &= ~SOR_STATE_ASY_PIXELDEPTH_MASK;
+	value &= ~SOR_STATE_ASY_CRC_MODE_MASK;
+	value &= ~SOR_STATE_ASY_OWNER_MASK;
+
+	value |= SOR_STATE_ASY_CRC_MODE_COMPLETE |
+		 SOR_STATE_ASY_OWNER(dc->pipe + 1);
+
+	if (mode->flags & DRM_MODE_FLAG_PHSYNC)
+		value &= ~SOR_STATE_ASY_HSYNCPOL;
+
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		value |= SOR_STATE_ASY_HSYNCPOL;
+
+	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
+		value &= ~SOR_STATE_ASY_VSYNCPOL;
+
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		value |= SOR_STATE_ASY_VSYNCPOL;
+
+	switch (state->bpc) {
+	case 16:
+		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_48_444;
+		break;
+
+	case 12:
+		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_36_444;
+		break;
+
+	case 10:
+		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_30_444;
+		break;
+
+	case 8:
+		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_24_444;
+		break;
+
+	case 6:
+		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_18_444;
+		break;
+
+	default:
+		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_24_444;
+		break;
+	}
+
+	tegra_sor_writel(sor, value, SOR_STATE1);
+
+	/*
+	 * TODO: The video timing programming below doesn't seem to match the
+	 * register definitions.
+	 */
+
+	value = ((mode->vtotal & 0x7fff) << 16) | (mode->htotal & 0x7fff);
+	tegra_sor_writel(sor, value, SOR_HEAD_STATE1(dc->pipe));
+
+	/* sync end = sync width - 1 */
+	vse = mode->vsync_end - mode->vsync_start - 1;
+	hse = mode->hsync_end - mode->hsync_start - 1;
+
+	value = ((vse & 0x7fff) << 16) | (hse & 0x7fff);
+	tegra_sor_writel(sor, value, SOR_HEAD_STATE2(dc->pipe));
+
+	/* blank end = sync end + back porch */
+	vbe = vse + (mode->vtotal - mode->vsync_end);
+	hbe = hse + (mode->htotal - mode->hsync_end);
+
+	value = ((vbe & 0x7fff) << 16) | (hbe & 0x7fff);
+	tegra_sor_writel(sor, value, SOR_HEAD_STATE3(dc->pipe));
+
+	/* blank start = blank end + active */
+	vbs = vbe + mode->vdisplay;
+	hbs = hbe + mode->hdisplay;
+
+	value = ((vbs & 0x7fff) << 16) | (hbs & 0x7fff);
+	tegra_sor_writel(sor, value, SOR_HEAD_STATE4(dc->pipe));
+
+	/* XXX interlacing support */
+	tegra_sor_writel(sor, 0x001, SOR_HEAD_STATE5(dc->pipe));
+}
+
 static int tegra_sor_detach(struct tegra_sor *sor)
 {
 	unsigned long value, timeout;
@@ -733,7 +992,8 @@ static int tegra_sor_power_down(struct tegra_sor *sor)
 	if ((value & SOR_PWR_TRIGGER) != 0)
 		return -ETIMEDOUT;
 
-	err = clk_set_parent(sor->clk, sor->clk_safe);
+	/* switch to safe parent clock */
+	err = tegra_sor_set_parent_clock(sor, sor->clk_safe);
 	if (err < 0)
 		dev_err(sor->dev, "failed to set safe parent clock: %d\n", err);
 
@@ -1038,6 +1298,22 @@ static void tegra_sor_debugfs_exit(struct tegra_sor *sor)
 	sor->debugfs = NULL;
 }
 
+static void tegra_sor_connector_reset(struct drm_connector *connector)
+{
+	struct tegra_sor_state *state;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return;
+
+	if (connector->state) {
+		__drm_atomic_helper_connector_destroy_state(connector->state);
+		kfree(connector->state);
+	}
+
+	__drm_atomic_helper_connector_reset(connector, &state->base);
+}
+
 static enum drm_connector_status
 tegra_sor_connector_detect(struct drm_connector *connector, bool force)
 {
@@ -1050,13 +1326,28 @@ tegra_sor_connector_detect(struct drm_connector *connector, bool force)
 	return tegra_output_connector_detect(connector, force);
 }
 
+static struct drm_connector_state *
+tegra_sor_connector_duplicate_state(struct drm_connector *connector)
+{
+	struct tegra_sor_state *state = to_sor_state(connector->state);
+	struct tegra_sor_state *copy;
+
+	copy = kmemdup(state, sizeof(*state), GFP_KERNEL);
+	if (!copy)
+		return NULL;
+
+	__drm_atomic_helper_connector_duplicate_state(connector, &copy->base);
+
+	return &copy->base;
+}
+
 static const struct drm_connector_funcs tegra_sor_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
-	.reset = drm_atomic_helper_connector_reset,
+	.reset = tegra_sor_connector_reset,
 	.detect = tegra_sor_connector_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.destroy = tegra_output_connector_destroy,
-	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_duplicate_state = tegra_sor_connector_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
@@ -1081,6 +1372,10 @@ static enum drm_mode_status
 tegra_sor_connector_mode_valid(struct drm_connector *connector,
 			       struct drm_display_mode *mode)
 {
+	/* HDMI 2.0 modes are not yet supported */
+	if (mode->clock > 340000)
+		return MODE_NOCLOCK;
+
 	return MODE_OK;
 }
 
@@ -1140,8 +1435,7 @@ static void tegra_sor_edp_disable(struct drm_encoder *encoder)
 	if (output->panel)
 		drm_panel_unprepare(output->panel);
 
-	reset_control_assert(sor->rst);
-	clk_disable_unprepare(sor->clk);
+	pm_runtime_put(sor->dev);
 }
 
 #if 0
@@ -1191,19 +1485,18 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode;
 	struct tegra_output *output = encoder_to_output(encoder);
 	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
-	unsigned int vbe, vse, hbe, hse, vbs, hbs, i;
 	struct tegra_sor *sor = to_sor(output);
 	struct tegra_sor_config config;
+	struct tegra_sor_state *state;
 	struct drm_dp_link link;
 	u8 rate, lanes;
+	unsigned int i;
 	int err = 0;
 	u32 value;
 
-	err = clk_prepare_enable(sor->clk);
-	if (err < 0)
-		dev_err(sor->dev, "failed to enable clock: %d\n", err);
+	state = to_sor_state(output->connector.state);
 
-	reset_control_deassert(sor->rst);
+	pm_runtime_get_sync(sor->dev);
 
 	if (output->panel)
 		drm_panel_prepare(output->panel);
@@ -1218,17 +1511,17 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 		return;
 	}
 
-	err = clk_set_parent(sor->clk, sor->clk_safe);
+	/* switch to safe parent clock */
+	err = tegra_sor_set_parent_clock(sor, sor->clk_safe);
 	if (err < 0)
 		dev_err(sor->dev, "failed to set safe parent clock: %d\n", err);
 
 	memset(&config, 0, sizeof(config));
-	config.bits_per_pixel = output->connector.display_info.bpc * 3;
+	config.bits_per_pixel = state->bpc * 3;
 
-	err = tegra_sor_calc_config(sor, mode, &config, &link);
+	err = tegra_sor_compute_config(sor, mode, &config, &link);
 	if (err < 0)
-		dev_err(sor->dev, "failed to compute link configuration: %d\n",
-			err);
+		dev_err(sor->dev, "failed to compute configuration: %d\n", err);
 
 	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
 	value &= ~SOR_CLK_CNTRL_DP_CLK_SEL_MASK;
@@ -1325,10 +1618,18 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	value &= ~SOR_PLL2_PORT_POWERDOWN;
 	tegra_sor_writel(sor, value, SOR_PLL2);
 
-	/* switch to DP clock */
-	err = clk_set_parent(sor->clk, sor->clk_dp);
+	/* XXX not in TRM */
+	for (value = 0, i = 0; i < 5; i++)
+		value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->soc->xbar_cfg[i]) |
+			 SOR_XBAR_CTRL_LINK1_XSEL(i, i);
+
+	tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL);
+	tegra_sor_writel(sor, value, SOR_XBAR_CTRL);
+
+	/* switch to DP parent clock */
+	err = tegra_sor_set_parent_clock(sor, sor->clk_dp);
 	if (err < 0)
-		dev_err(sor->dev, "failed to set DP parent clock: %d\n", err);
+		dev_err(sor->dev, "failed to set parent clock: %d\n", err);
 
 	/* power DP lanes */
 	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
@@ -1374,13 +1675,11 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	value |= drm_dp_link_rate_to_bw_code(link.rate) << 2;
 	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
 
-	/* set linkctl */
+	tegra_sor_apply_config(sor, &config);
+
+	/* enable link */
 	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
 	value |= SOR_DP_LINKCTL_ENABLE;
-
-	value &= ~SOR_DP_LINKCTL_TU_SIZE_MASK;
-	value |= SOR_DP_LINKCTL_TU_SIZE(config.tu_size);
-
 	value |= SOR_DP_LINKCTL_ENHANCED_FRAME;
 	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
 
@@ -1393,35 +1692,6 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 
 	tegra_sor_writel(sor, value, SOR_DP_TPG);
 
-	value = tegra_sor_readl(sor, SOR_DP_CONFIG0);
-	value &= ~SOR_DP_CONFIG_WATERMARK_MASK;
-	value |= SOR_DP_CONFIG_WATERMARK(config.watermark);
-
-	value &= ~SOR_DP_CONFIG_ACTIVE_SYM_COUNT_MASK;
-	value |= SOR_DP_CONFIG_ACTIVE_SYM_COUNT(config.active_count);
-
-	value &= ~SOR_DP_CONFIG_ACTIVE_SYM_FRAC_MASK;
-	value |= SOR_DP_CONFIG_ACTIVE_SYM_FRAC(config.active_frac);
-
-	if (config.active_polarity)
-		value |= SOR_DP_CONFIG_ACTIVE_SYM_POLARITY;
-	else
-		value &= ~SOR_DP_CONFIG_ACTIVE_SYM_POLARITY;
-
-	value |= SOR_DP_CONFIG_ACTIVE_SYM_ENABLE;
-	value |= SOR_DP_CONFIG_DISPARITY_NEGATIVE;
-	tegra_sor_writel(sor, value, SOR_DP_CONFIG0);
-
-	value = tegra_sor_readl(sor, SOR_DP_AUDIO_HBLANK_SYMBOLS);
-	value &= ~SOR_DP_AUDIO_HBLANK_SYMBOLS_MASK;
-	value |= config.hblank_symbols & 0xffff;
-	tegra_sor_writel(sor, value, SOR_DP_AUDIO_HBLANK_SYMBOLS);
-
-	value = tegra_sor_readl(sor, SOR_DP_AUDIO_VBLANK_SYMBOLS);
-	value &= ~SOR_DP_AUDIO_VBLANK_SYMBOLS_MASK;
-	value |= config.vblank_symbols & 0xffff;
-	tegra_sor_writel(sor, value, SOR_DP_AUDIO_VBLANK_SYMBOLS);
-
 	/* enable pad calibration logic */
 	value = tegra_sor_readl(sor, SOR_DP_PADCTL0);
 	value |= SOR_DP_PADCTL_PAD_CAL_PD;
@@ -1477,75 +1747,19 @@ static void tegra_sor_edp_enable(struct drm_encoder *encoder)
 	if (err < 0)
 		dev_err(sor->dev, "failed to power up SOR: %d\n", err);
 
-	/*
-	 * configure panel (24bpp, vsync-, hsync-, DP-A protocol, complete
-	 * raster, associate with display controller)
-	 */
-	value = SOR_STATE_ASY_PROTOCOL_DP_A |
-		SOR_STATE_ASY_CRC_MODE_COMPLETE |
-		SOR_STATE_ASY_OWNER(dc->pipe + 1);
-
-	if (mode->flags & DRM_MODE_FLAG_PHSYNC)
-		value &= ~SOR_STATE_ASY_HSYNCPOL;
-
-	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-		value |= SOR_STATE_ASY_HSYNCPOL;
-
-	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
-		value &= ~SOR_STATE_ASY_VSYNCPOL;
-
-	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-		value |= SOR_STATE_ASY_VSYNCPOL;
-
-	switch (config.bits_per_pixel) {
-	case 24:
-		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_24_444;
-		break;
-
-	case 18:
-		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_18_444;
-		break;
-
-	default:
-		BUG();
-		break;
-	}
-
-	tegra_sor_writel(sor, value, SOR_STATE1);
-
-	/*
-	 * TODO: The video timing programming below doesn't seem to match the
-	 * register definitions.
-	 */
-
-	value = ((mode->vtotal & 0x7fff) << 16) | (mode->htotal & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE1(dc->pipe));
-
-	vse = mode->vsync_end - mode->vsync_start - 1;
-	hse = mode->hsync_end - mode->hsync_start - 1;
-
-	value = ((vse & 0x7fff) << 16) | (hse & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE2(dc->pipe));
-
-	vbe = vse + (mode->vsync_start - mode->vdisplay);
-	hbe = hse + (mode->hsync_start - mode->hdisplay);
-
-	value = ((vbe & 0x7fff) << 16) | (hbe & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE3(dc->pipe));
-
-	vbs = vbe + mode->vdisplay;
-	hbs = hbe + mode->hdisplay;
-
-	value = ((vbs & 0x7fff) << 16) | (hbs & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE4(dc->pipe));
-
-	tegra_sor_writel(sor, 0x1, SOR_HEAD_STATE5(dc->pipe));
-
 	/* CSTM (LVDS, link A/B, upper) */
 	value = SOR_CSTM_LVDS | SOR_CSTM_LINK_ACT_A | SOR_CSTM_LINK_ACT_B |
 		SOR_CSTM_UPPER;
 	tegra_sor_writel(sor, value, SOR_CSTM);
 
+	/* use DP-A protocol */
+	value = tegra_sor_readl(sor, SOR_STATE1);
+	value &= ~SOR_STATE_ASY_PROTOCOL_MASK;
+	value |= SOR_STATE_ASY_PROTOCOL_DP_A;
+	tegra_sor_writel(sor, value, SOR_STATE1);
+
+	tegra_sor_mode_set(sor, mode, state);
+
 	/* PWM setup */
 	err = tegra_sor_setup_pwm(sor, 250);
 	if (err < 0)
@@ -1577,11 +1791,15 @@ tegra_sor_encoder_atomic_check(struct drm_encoder *encoder,
 			       struct drm_connector_state *conn_state)
 {
 	struct tegra_output *output = encoder_to_output(encoder);
+	struct tegra_sor_state *state = to_sor_state(conn_state);
 	struct tegra_dc *dc = to_tegra_dc(conn_state->crtc);
 	unsigned long pclk = crtc_state->mode.clock * 1000;
 	struct tegra_sor *sor = to_sor(output);
+	struct drm_display_info *info;
 	int err;
 
+	info = &output->connector.display_info;
+
 	err = tegra_dc_state_setup_clock(dc, crtc_state, sor->clk_parent,
 					 pclk, 0);
 	if (err < 0) {
@@ -1589,6 +1807,18 @@ tegra_sor_encoder_atomic_check(struct drm_encoder *encoder,
 		return err;
 	}
 
+	switch (info->bpc) {
+	case 8:
+	case 6:
+		state->bpc = info->bpc;
+		break;
+
+	default:
+		DRM_DEBUG_KMS("%u bits-per-color not supported\n", info->bpc);
+		state->bpc = 8;
+		break;
+	}
+
 	return 0;
 }
 
@@ -1751,9 +1981,7 @@ static void tegra_sor_hdmi_disable(struct drm_encoder *encoder)
 	if (err < 0)
 		dev_err(sor->dev, "failed to power off HDMI rail: %d\n", err);
 
-	reset_control_assert(sor->rst);
-	usleep_range(1000, 2000);
-	clk_disable_unprepare(sor->clk);
+	pm_runtime_put(sor->dev);
 }
 
 static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
@@ -1761,26 +1989,21 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	struct tegra_output *output = encoder_to_output(encoder);
 	unsigned int h_ref_to_sync = 1, pulse_start, max_ac;
 	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
-	unsigned int vbe, vse, hbe, hse, vbs, hbs, div;
 	struct tegra_sor_hdmi_settings *settings;
 	struct tegra_sor *sor = to_sor(output);
+	struct tegra_sor_state *state;
 	struct drm_display_mode *mode;
-	struct drm_display_info *info;
+	unsigned int div, i;
 	u32 value;
 	int err;
 
+	state = to_sor_state(output->connector.state);
 	mode = &encoder->crtc->state->adjusted_mode;
-	info = &output->connector.display_info;
 
-	err = clk_prepare_enable(sor->clk);
-	if (err < 0)
-		dev_err(sor->dev, "failed to enable clock: %d\n", err);
+	pm_runtime_get_sync(sor->dev);
 
-	usleep_range(1000, 2000);
-
-	reset_control_deassert(sor->rst);
-
-	err = clk_set_parent(sor->clk, sor->clk_safe);
+	/* switch to safe parent clock */
+	err = tegra_sor_set_parent_clock(sor, sor->clk_safe);
 	if (err < 0)
 		dev_err(sor->dev, "failed to set safe parent clock: %d\n", err);
 
@@ -1876,22 +2099,20 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	value = SOR_REFCLK_DIV_INT(div) | SOR_REFCLK_DIV_FRAC(div);
 	tegra_sor_writel(sor, value, SOR_REFCLK);
 
-	/* XXX don't hardcode */
-	value = SOR_XBAR_CTRL_LINK1_XSEL(4, 4) |
-		SOR_XBAR_CTRL_LINK1_XSEL(3, 3) |
-		SOR_XBAR_CTRL_LINK1_XSEL(2, 2) |
-		SOR_XBAR_CTRL_LINK1_XSEL(1, 1) |
-		SOR_XBAR_CTRL_LINK1_XSEL(0, 0) |
-		SOR_XBAR_CTRL_LINK0_XSEL(4, 4) |
-		SOR_XBAR_CTRL_LINK0_XSEL(3, 3) |
-		SOR_XBAR_CTRL_LINK0_XSEL(2, 0) |
-		SOR_XBAR_CTRL_LINK0_XSEL(1, 1) |
-		SOR_XBAR_CTRL_LINK0_XSEL(0, 2);
-	tegra_sor_writel(sor, value, SOR_XBAR_CTRL);
+	/* XXX not in TRM */
+	for (value = 0, i = 0; i < 5; i++)
+		value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->soc->xbar_cfg[i]) |
+			 SOR_XBAR_CTRL_LINK1_XSEL(i, i);
 
 	tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL);
+	tegra_sor_writel(sor, value, SOR_XBAR_CTRL);
 
-	err = clk_set_parent(sor->clk, sor->clk_parent);
+	/* switch to parent clock */
+	err = clk_set_parent(sor->clk_src, sor->clk_parent);
+	if (err < 0)
+		dev_err(sor->dev, "failed to set source clock: %d\n", err);
+
+	err = tegra_sor_set_parent_clock(sor, sor->clk_src);
 	if (err < 0)
 		dev_err(sor->dev, "failed to set parent clock: %d\n", err);
 
@@ -2001,7 +2222,7 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	value &= ~DITHER_CONTROL_MASK;
 	value &= ~BASE_COLOR_SIZE_MASK;
 
-	switch (info->bpc) {
+	switch (state->bpc) {
 	case 6:
 		value |= BASE_COLOR_SIZE_666;
 		break;
@@ -2011,7 +2232,8 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 		break;
 
 	default:
-		WARN(1, "%u bits-per-color not supported\n", info->bpc);
+		WARN(1, "%u bits-per-color not supported\n", state->bpc);
+		value |= BASE_COLOR_SIZE_888;
 		break;
 	}
 
@@ -2021,83 +2243,19 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	if (err < 0)
 		dev_err(sor->dev, "failed to power up SOR: %d\n", err);
 
-	/* configure mode */
-	value = tegra_sor_readl(sor, SOR_STATE1);
-	value &= ~SOR_STATE_ASY_PIXELDEPTH_MASK;
-	value &= ~SOR_STATE_ASY_CRC_MODE_MASK;
-	value &= ~SOR_STATE_ASY_OWNER_MASK;
-
-	value |= SOR_STATE_ASY_CRC_MODE_COMPLETE |
-		 SOR_STATE_ASY_OWNER(dc->pipe + 1);
-
-	if (mode->flags & DRM_MODE_FLAG_PHSYNC)
-		value &= ~SOR_STATE_ASY_HSYNCPOL;
-
-	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-		value |= SOR_STATE_ASY_HSYNCPOL;
-
-	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
-		value &= ~SOR_STATE_ASY_VSYNCPOL;
-
-	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-		value |= SOR_STATE_ASY_VSYNCPOL;
-
-	switch (info->bpc) {
-	case 8:
-		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_24_444;
-		break;
-
-	case 6:
-		value |= SOR_STATE_ASY_PIXELDEPTH_BPP_18_444;
-		break;
-
-	default:
-		BUG();
-		break;
-	}
-
-	tegra_sor_writel(sor, value, SOR_STATE1);
-
+	/* configure dynamic range of output */
 	value = tegra_sor_readl(sor, SOR_HEAD_STATE0(dc->pipe));
 	value &= ~SOR_HEAD_STATE_RANGECOMPRESS_MASK;
 	value &= ~SOR_HEAD_STATE_DYNRANGE_MASK;
 	tegra_sor_writel(sor, value, SOR_HEAD_STATE0(dc->pipe));
 
+	/* configure colorspace */
 	value = tegra_sor_readl(sor, SOR_HEAD_STATE0(dc->pipe));
 	value &= ~SOR_HEAD_STATE_COLORSPACE_MASK;
 	value |= SOR_HEAD_STATE_COLORSPACE_RGB;
 	tegra_sor_writel(sor, value, SOR_HEAD_STATE0(dc->pipe));
 
-	/*
-	 * TODO: The video timing programming below doesn't seem to match the
-	 * register definitions.
-	 */
-
-	value = ((mode->vtotal & 0x7fff) << 16) | (mode->htotal & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE1(dc->pipe));
-
-	/* sync end = sync width - 1 */
-	vse = mode->vsync_end - mode->vsync_start - 1;
-	hse = mode->hsync_end - mode->hsync_start - 1;
-
-	value = ((vse & 0x7fff) << 16) | (hse & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE2(dc->pipe));
-
-	/* blank end = sync end + back porch */
-	vbe = vse + (mode->vtotal - mode->vsync_end);
-	hbe = hse + (mode->htotal - mode->hsync_end);
-
-	value = ((vbe & 0x7fff) << 16) | (hbe & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE3(dc->pipe));
-
-	/* blank start = blank end + active */
-	vbs = vbe + mode->vdisplay;
-	hbs = hbe + mode->hdisplay;
-
-	value = ((vbs & 0x7fff) << 16) | (hbs & 0x7fff);
-	tegra_sor_writel(sor, value, SOR_HEAD_STATE4(dc->pipe));
-
-	tegra_sor_writel(sor, 0x1, SOR_HEAD_STATE5(dc->pipe));
+	tegra_sor_mode_set(sor, mode, state);
 
 	tegra_sor_update(sor);
 
@@ -2195,10 +2353,13 @@ static int tegra_sor_init(struct host1x_client *client)
 	 * XXX: Remove this reset once proper hand-over from firmware to
 	 * kernel is possible.
 	 */
-	err = reset_control_assert(sor->rst);
-	if (err < 0) {
-		dev_err(sor->dev, "failed to assert SOR reset: %d\n", err);
-		return err;
+	if (sor->rst) {
+		err = reset_control_assert(sor->rst);
+		if (err < 0) {
+			dev_err(sor->dev, "failed to assert SOR reset: %d\n",
+				err);
+			return err;
+		}
 	}
 
 	err = clk_prepare_enable(sor->clk);
@@ -2209,10 +2370,13 @@ static int tegra_sor_init(struct host1x_client *client)
 
 	usleep_range(1000, 3000);
 
-	err = reset_control_deassert(sor->rst);
-	if (err < 0) {
-		dev_err(sor->dev, "failed to deassert SOR reset: %d\n", err);
-		return err;
+	if (sor->rst) {
+		err = reset_control_deassert(sor->rst);
+		if (err < 0) {
+			dev_err(sor->dev, "failed to deassert SOR reset: %d\n",
+				err);
+			return err;
+		}
 	}
 
 	err = clk_prepare_enable(sor->clk_safe);
@@ -2323,11 +2487,16 @@ static const struct tegra_sor_ops tegra_sor_hdmi_ops = {
 	.remove = tegra_sor_hdmi_remove,
 };
 
+static const u8 tegra124_sor_xbar_cfg[5] = {
+	0, 1, 2, 3, 4
+};
+
 static const struct tegra_sor_soc tegra124_sor = {
 	.supports_edp = true,
 	.supports_lvds = true,
 	.supports_hdmi = false,
 	.supports_dp = false,
+	.xbar_cfg = tegra124_sor_xbar_cfg,
 };
 
 static const struct tegra_sor_soc tegra210_sor = {
@@ -2335,6 +2504,11 @@ static const struct tegra_sor_soc tegra210_sor = {
 	.supports_lvds = false,
 	.supports_hdmi = false,
 	.supports_dp = false,
+	.xbar_cfg = tegra124_sor_xbar_cfg,
+};
+
+static const u8 tegra210_sor_xbar_cfg[5] = {
+	2, 1, 0, 3, 4
 };
 
 static const struct tegra_sor_soc tegra210_sor1 = {
@@ -2345,6 +2519,8 @@ static const struct tegra_sor_soc tegra210_sor1 = {
 
 	.num_settings = ARRAY_SIZE(tegra210_sor_hdmi_defaults),
 	.settings = tegra210_sor_hdmi_defaults,
+
+	.xbar_cfg = tegra210_sor_xbar_cfg,
 };
 
 static const struct of_device_id tegra_sor_of_match[] = {
@@ -2434,11 +2610,14 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		goto remove;
 	}
 
-	sor->rst = devm_reset_control_get(&pdev->dev, "sor");
-	if (IS_ERR(sor->rst)) {
-		err = PTR_ERR(sor->rst);
-		dev_err(&pdev->dev, "failed to get reset control: %d\n", err);
-		goto remove;
+	if (!pdev->dev.pm_domain) {
+		sor->rst = devm_reset_control_get(&pdev->dev, "sor");
+		if (IS_ERR(sor->rst)) {
+			err = PTR_ERR(sor->rst);
+			dev_err(&pdev->dev, "failed to get reset control: %d\n",
+				err);
+			goto remove;
+		}
 	}
 
 	sor->clk = devm_clk_get(&pdev->dev, NULL);
@@ -2448,6 +2627,16 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		goto remove;
 	}
 
+	if (sor->soc->supports_hdmi || sor->soc->supports_dp) {
+		sor->clk_src = devm_clk_get(&pdev->dev, "source");
+		if (IS_ERR(sor->clk_src)) {
+			err = PTR_ERR(sor->clk_src);
+			dev_err(sor->dev, "failed to get source clock: %d\n",
+				err);
+			goto remove;
+		}
+	}
+
 	sor->clk_parent = devm_clk_get(&pdev->dev, "parent");
 	if (IS_ERR(sor->clk_parent)) {
 		err = PTR_ERR(sor->clk_parent);
@@ -2469,6 +2658,19 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		goto remove;
 	}
 
+	platform_set_drvdata(pdev, sor);
+	pm_runtime_enable(&pdev->dev);
+
+	pm_runtime_get_sync(&pdev->dev);
+	sor->clk_brick = tegra_clk_sor_brick_register(sor, "sor1_brick");
+	pm_runtime_put(&pdev->dev);
+
+	if (IS_ERR(sor->clk_brick)) {
+		err = PTR_ERR(sor->clk_brick);
+		dev_err(&pdev->dev, "failed to register SOR clock: %d\n", err);
+		goto remove;
+	}
+
 	INIT_LIST_HEAD(&sor->client.list);
 	sor->client.ops = &sor_client_ops;
 	sor->client.dev = &pdev->dev;
@@ -2480,8 +2682,6 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		goto remove;
 	}
 
-	platform_set_drvdata(pdev, sor);
-
 	return 0;
 
 remove:
@@ -2497,6 +2697,8 @@ static int tegra_sor_remove(struct platform_device *pdev)
 	struct tegra_sor *sor = platform_get_drvdata(pdev);
 	int err;
 
+	pm_runtime_disable(&pdev->dev);
+
 	err = host1x_client_unregister(&sor->client);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
@@ -2515,10 +2717,62 @@ static int tegra_sor_remove(struct platform_device *pdev)
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int tegra_sor_suspend(struct device *dev)
+{
+	struct tegra_sor *sor = dev_get_drvdata(dev);
+	int err;
+
+	if (sor->rst) {
+		err = reset_control_assert(sor->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to assert reset: %d\n", err);
+			return err;
+		}
+	}
+
+	usleep_range(1000, 2000);
+
+	clk_disable_unprepare(sor->clk);
+
+	return 0;
+}
+
+static int tegra_sor_resume(struct device *dev)
+{
+	struct tegra_sor *sor = dev_get_drvdata(dev);
+	int err;
+
+	err = clk_prepare_enable(sor->clk);
+	if (err < 0) {
+		dev_err(dev, "failed to enable clock: %d\n", err);
+		return err;
+	}
+
+	usleep_range(1000, 2000);
+
+	if (sor->rst) {
+		err = reset_control_deassert(sor->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to deassert reset: %d\n", err);
+			clk_disable_unprepare(sor->clk);
+			return err;
+		}
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops tegra_sor_pm_ops = {
+	SET_RUNTIME_PM_OPS(tegra_sor_suspend, tegra_sor_resume, NULL)
+};
+
 struct platform_driver tegra_sor_driver = {
 	.driver = {
 		.name = "tegra-sor",
 		.of_match_table = tegra_sor_of_match,
+		.pm = &tegra_sor_pm_ops,
 	},
 	.probe = tegra_sor_probe,
 	.remove = tegra_sor_remove,
diff --git a/drivers/gpu/drm/tegra/sor.h b/drivers/gpu/drm/tegra/sor.h
index 2d31d027e3f6..865c73b48968 100644
--- a/drivers/gpu/drm/tegra/sor.h
+++ b/drivers/gpu/drm/tegra/sor.h
@@ -27,6 +27,9 @@
 #define  SOR_STATE_ASY_PIXELDEPTH_MASK		(0xf << 17)
 #define  SOR_STATE_ASY_PIXELDEPTH_BPP_18_444	(0x2 << 17)
 #define  SOR_STATE_ASY_PIXELDEPTH_BPP_24_444	(0x5 << 17)
+#define  SOR_STATE_ASY_PIXELDEPTH_BPP_30_444	(0x6 << 17)
+#define  SOR_STATE_ASY_PIXELDEPTH_BPP_36_444	(0x8 << 17)
+#define  SOR_STATE_ASY_PIXELDEPTH_BPP_48_444	(0x9 << 17)
 #define  SOR_STATE_ASY_VSYNCPOL			(1 << 13)
 #define  SOR_STATE_ASY_HSYNCPOL			(1 << 12)
 #define  SOR_STATE_ASY_PROTOCOL_MASK		(0xf << 8)
diff --git a/drivers/gpu/drm/tilcdc/Kconfig b/drivers/gpu/drm/tilcdc/Kconfig
index f60a1ec84fa4..28fed7e206d0 100644
--- a/drivers/gpu/drm/tilcdc/Kconfig
+++ b/drivers/gpu/drm/tilcdc/Kconfig
@@ -2,7 +2,6 @@ config DRM_TILCDC
 	tristate "DRM Support for TI LCDC Display Controller"
 	depends on DRM && OF && ARM
 	select DRM_KMS_HELPER
-	select DRM_KMS_FB_HELPER
 	select DRM_KMS_CMA_HELPER
 	select DRM_GEM_CMA_HELPER
 	select VIDEOMODE_HELPERS
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
index 79027b1c64d3..107c8bd04f6d 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -697,7 +697,7 @@ irqreturn_t tilcdc_crtc_irq(struct drm_crtc *crtc)
 
 		spin_unlock_irqrestore(&tilcdc_crtc->irq_lock, flags);
 
-		drm_handle_vblank(dev, 0);
+		drm_crtc_handle_vblank(crtc);
 
 		if (!skip_event) {
 			struct drm_pending_vblank_event *event;
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 39386f50af87..4054d804fe06 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -146,10 +146,9 @@ static void ttm_bo_release_list(struct kref *list_kref)
 	BUG_ON(bo->mem.mm_node != NULL);
 	BUG_ON(!list_empty(&bo->lru));
 	BUG_ON(!list_empty(&bo->ddestroy));
-
-	if (bo->ttm)
-		ttm_tt_destroy(bo->ttm);
+	ttm_tt_destroy(bo->ttm);
 	atomic_dec(&bo->glob->bo_count);
+	fence_put(bo->moving);
 	if (bo->resv == &bo->ttm_resv)
 		reservation_object_fini(&bo->ttm_resv);
 	mutex_destroy(&bo->wu_mutex);
@@ -360,7 +359,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 		ret = bdev->driver->move(bo, evict, interruptible,
 					 no_wait_gpu, mem);
 	else
-		ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, mem);
+		ret = ttm_bo_move_memcpy(bo, evict, interruptible,
+					 no_wait_gpu, mem);
 
 	if (ret) {
 		if (bdev->driver->move_notify) {
@@ -396,8 +396,7 @@ moved:
 
 out_err:
 	new_man = &bdev->man[bo->mem.mem_type];
-	if ((new_man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm) {
-		ttm_tt_unbind(bo->ttm);
+	if (new_man->flags & TTM_MEMTYPE_FLAG_FIXED) {
 		ttm_tt_destroy(bo->ttm);
 		bo->ttm = NULL;
 	}
@@ -418,11 +417,8 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
 	if (bo->bdev->driver->move_notify)
 		bo->bdev->driver->move_notify(bo, NULL);
 
-	if (bo->ttm) {
-		ttm_tt_unbind(bo->ttm);
-		ttm_tt_destroy(bo->ttm);
-		bo->ttm = NULL;
-	}
+	ttm_tt_destroy(bo->ttm);
+	bo->ttm = NULL;
 	ttm_bo_mem_put(bo, &bo->mem);
 
 	ww_mutex_unlock (&bo->resv->lock);
@@ -688,15 +684,6 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 	struct ttm_placement placement;
 	int ret = 0;
 
-	ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
-
-	if (unlikely(ret != 0)) {
-		if (ret != -ERESTARTSYS) {
-			pr_err("Failed to expire sync object before buffer eviction\n");
-		}
-		goto out;
-	}
-
 	lockdep_assert_held(&bo->resv->lock.base);
 
 	evict_mem = bo->mem;
@@ -720,7 +707,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 
 	ret = ttm_bo_handle_move_mem(bo, &evict_mem, true, interruptible,
 				     no_wait_gpu);
-	if (ret) {
+	if (unlikely(ret)) {
 		if (ret != -ERESTARTSYS)
 			pr_err("Buffer eviction failed\n");
 		ttm_bo_mem_put(bo, &evict_mem);
@@ -800,6 +787,34 @@ void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem)
 EXPORT_SYMBOL(ttm_bo_mem_put);
 
 /**
+ * Add the last move fence to the BO and reserve a new shared slot.
+ */
+static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
+				 struct ttm_mem_type_manager *man,
+				 struct ttm_mem_reg *mem)
+{
+	struct fence *fence;
+	int ret;
+
+	spin_lock(&man->move_lock);
+	fence = fence_get(man->move);
+	spin_unlock(&man->move_lock);
+
+	if (fence) {
+		reservation_object_add_shared_fence(bo->resv, fence);
+
+		ret = reservation_object_reserve_shared(bo->resv);
+		if (unlikely(ret))
+			return ret;
+
+		fence_put(bo->moving);
+		bo->moving = fence;
+	}
+
+	return 0;
+}
+
+/**
  * Repeatedly evict memory from the LRU for @mem_type until we create enough
  * space, or we've evicted everything and there isn't enough space.
  */
@@ -825,10 +840,8 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 		if (unlikely(ret != 0))
 			return ret;
 	} while (1);
-	if (mem->mm_node == NULL)
-		return -ENOMEM;
 	mem->mem_type = mem_type;
-	return 0;
+	return ttm_bo_add_move_fence(bo, man, mem);
 }
 
 static uint32_t ttm_bo_select_caching(struct ttm_mem_type_manager *man,
@@ -898,6 +911,10 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 	bool has_erestartsys = false;
 	int i, ret;
 
+	ret = reservation_object_reserve_shared(bo->resv);
+	if (unlikely(ret))
+		return ret;
+
 	mem->mm_node = NULL;
 	for (i = 0; i < placement->num_placement; ++i) {
 		const struct ttm_place *place = &placement->placement[i];
@@ -931,9 +948,15 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		ret = (*man->func->get_node)(man, bo, place, mem);
 		if (unlikely(ret))
 			return ret;
-		
-		if (mem->mm_node)
+
+		if (mem->mm_node) {
+			ret = ttm_bo_add_move_fence(bo, man, mem);
+			if (unlikely(ret)) {
+				(*man->func->put_node)(man, mem);
+				return ret;
+			}
 			break;
+		}
 	}
 
 	if ((type_ok && (mem_type == TTM_PL_SYSTEM)) || mem->mm_node) {
@@ -1000,20 +1023,6 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 
 	lockdep_assert_held(&bo->resv->lock.base);
 
-	/*
-	 * Don't wait for the BO on initial allocation. This is important when
-	 * the BO has an imported reservation object.
-	 */
-	if (bo->mem.mem_type != TTM_PL_SYSTEM || bo->ttm != NULL) {
-		/*
-		 * FIXME: It's possible to pipeline buffer moves.
-		 * Have the driver move function wait for idle when necessary,
-		 * instead of doing it here.
-		 */
-		ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
-		if (ret)
-			return ret;
-	}
 	mem.num_pages = bo->num_pages;
 	mem.size = mem.num_pages << PAGE_SHIFT;
 	mem.page_alignment = bo->mem.page_alignment;
@@ -1034,9 +1043,9 @@ out_unlock:
 	return ret;
 }
 
-static bool ttm_bo_mem_compat(struct ttm_placement *placement,
-			      struct ttm_mem_reg *mem,
-			      uint32_t *new_flags)
+bool ttm_bo_mem_compat(struct ttm_placement *placement,
+		       struct ttm_mem_reg *mem,
+		       uint32_t *new_flags)
 {
 	int i;
 
@@ -1068,6 +1077,7 @@ static bool ttm_bo_mem_compat(struct ttm_placement *placement,
 
 	return false;
 }
+EXPORT_SYMBOL(ttm_bo_mem_compat);
 
 int ttm_bo_validate(struct ttm_buffer_object *bo,
 			struct ttm_placement *placement,
@@ -1165,7 +1175,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
 	bo->mem.page_alignment = page_alignment;
 	bo->mem.bus.io_reserved_vm = false;
 	bo->mem.bus.io_reserved_count = 0;
-	bo->priv_flags = 0;
+	bo->moving = NULL;
 	bo->mem.placement = (TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED);
 	bo->persistent_swap_storage = persistent_swap_storage;
 	bo->acc_size = acc_size;
@@ -1277,6 +1287,7 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 {
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
 	struct ttm_bo_global *glob = bdev->glob;
+	struct fence *fence;
 	int ret;
 
 	/*
@@ -1297,6 +1308,23 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 		spin_lock(&glob->lru_lock);
 	}
 	spin_unlock(&glob->lru_lock);
+
+	spin_lock(&man->move_lock);
+	fence = fence_get(man->move);
+	spin_unlock(&man->move_lock);
+
+	if (fence) {
+		ret = fence_wait(fence, false);
+		fence_put(fence);
+		if (ret) {
+			if (allow_errors) {
+				return ret;
+			} else {
+				pr_err("Cleanup eviction failed\n");
+			}
+		}
+	}
+
 	return 0;
 }
 
@@ -1316,6 +1344,7 @@ int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type)
 		       mem_type);
 		return ret;
 	}
+	fence_put(man->move);
 
 	man->use_type = false;
 	man->has_type = false;
@@ -1361,6 +1390,7 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 	man->io_reserve_fastpath = true;
 	man->use_io_reserve_lru = false;
 	mutex_init(&man->io_reserve_mutex);
+	spin_lock_init(&man->move_lock);
 	INIT_LIST_HEAD(&man->io_reserve_lru);
 
 	ret = bdev->driver->init_mem_type(bdev, type, man);
@@ -1379,6 +1409,7 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 	man->size = p_size;
 
 	INIT_LIST_HEAD(&man->lru);
+	man->move = NULL;
 
 	return 0;
 }
@@ -1572,47 +1603,17 @@ EXPORT_SYMBOL(ttm_bo_unmap_virtual);
 int ttm_bo_wait(struct ttm_buffer_object *bo,
 		bool interruptible, bool no_wait)
 {
-	struct reservation_object_list *fobj;
-	struct reservation_object *resv;
-	struct fence *excl;
-	long timeout = 15 * HZ;
-	int i;
-
-	resv = bo->resv;
-	fobj = reservation_object_get_list(resv);
-	excl = reservation_object_get_excl(resv);
-	if (excl) {
-		if (!fence_is_signaled(excl)) {
-			if (no_wait)
-				return -EBUSY;
-
-			timeout = fence_wait_timeout(excl,
-						     interruptible, timeout);
-		}
-	}
-
-	for (i = 0; fobj && timeout > 0 && i < fobj->shared_count; ++i) {
-		struct fence *fence;
-		fence = rcu_dereference_protected(fobj->shared[i],
-						reservation_object_held(resv));
-
-		if (!fence_is_signaled(fence)) {
-			if (no_wait)
-				return -EBUSY;
-
-			timeout = fence_wait_timeout(fence,
-						     interruptible, timeout);
-		}
-	}
+	long timeout = no_wait ? 0 : 15 * HZ;
 
+	timeout = reservation_object_wait_timeout_rcu(bo->resv, true,
+						      interruptible, timeout);
 	if (timeout < 0)
 		return timeout;
 
 	if (timeout == 0)
 		return -EBUSY;
 
-	reservation_object_add_excl_fence(resv, NULL);
-	clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
+	reservation_object_add_excl_fence(bo->resv, NULL);
 	return 0;
 }
 EXPORT_SYMBOL(ttm_bo_wait);
@@ -1682,14 +1683,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 	ttm_bo_list_ref_sub(bo, put_count, true);
 
 	/**
-	 * Wait for GPU, then move to system cached.
+	 * Move to system cached
 	 */
 
-	ret = ttm_bo_wait(bo, false, false);
-
-	if (unlikely(ret != 0))
-		goto out;
-
 	if ((bo->mem.placement & swap_placement) != swap_placement) {
 		struct ttm_mem_reg evict_mem;
 
@@ -1704,6 +1700,14 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 			goto out;
 	}
 
+	/**
+	 * Make sure BO is idle.
+	 */
+
+	ret = ttm_bo_wait(bo, false, false);
+	if (unlikely(ret != 0))
+		goto out;
+
 	ttm_bo_unmap_virtual(bo);
 
 	/**
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index d9831559706e..2df602a35f92 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -321,7 +321,8 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst,
 }
 
 int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
-		       bool evict, bool no_wait_gpu,
+		       bool evict, bool interruptible,
+		       bool no_wait_gpu,
 		       struct ttm_mem_reg *new_mem)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -337,6 +338,10 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
 	unsigned long add = 0;
 	int dir;
 
+	ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+	if (ret)
+		return ret;
+
 	ret = ttm_mem_reg_ioremap(bdev, old_mem, &old_iomap);
 	if (ret)
 		return ret;
@@ -401,8 +406,7 @@ out2:
 	*old_mem = *new_mem;
 	new_mem->mm_node = NULL;
 
-	if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) && (ttm != NULL)) {
-		ttm_tt_unbind(ttm);
+	if (man->flags & TTM_MEMTYPE_FLAG_FIXED) {
 		ttm_tt_destroy(ttm);
 		bo->ttm = NULL;
 	}
@@ -462,6 +466,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	INIT_LIST_HEAD(&fbo->lru);
 	INIT_LIST_HEAD(&fbo->swap);
 	INIT_LIST_HEAD(&fbo->io_reserve_lru);
+	fbo->moving = NULL;
 	drm_vma_node_reset(&fbo->vma_node);
 	atomic_set(&fbo->cpu_writers, 0);
 
@@ -634,7 +639,6 @@ EXPORT_SYMBOL(ttm_bo_kunmap);
 int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 			      struct fence *fence,
 			      bool evict,
-			      bool no_wait_gpu,
 			      struct ttm_mem_reg *new_mem)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -649,9 +653,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 		if (ret)
 			return ret;
 
-		if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
-		    (bo->ttm != NULL)) {
-			ttm_tt_unbind(bo->ttm);
+		if (man->flags & TTM_MEMTYPE_FLAG_FIXED) {
 			ttm_tt_destroy(bo->ttm);
 			bo->ttm = NULL;
 		}
@@ -665,7 +667,8 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 		 * operation has completed.
 		 */
 
-		set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
+		fence_put(bo->moving);
+		bo->moving = fence_get(fence);
 
 		ret = ttm_buffer_object_transfer(bo, &ghost_obj);
 		if (ret)
@@ -694,3 +697,95 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 	return 0;
 }
 EXPORT_SYMBOL(ttm_bo_move_accel_cleanup);
+
+int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
+			 struct fence *fence, bool evict,
+			 struct ttm_mem_reg *new_mem)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_mem_reg *old_mem = &bo->mem;
+
+	struct ttm_mem_type_manager *from = &bdev->man[old_mem->mem_type];
+	struct ttm_mem_type_manager *to = &bdev->man[new_mem->mem_type];
+
+	int ret;
+
+	reservation_object_add_excl_fence(bo->resv, fence);
+
+	if (!evict) {
+		struct ttm_buffer_object *ghost_obj;
+
+		/**
+		 * This should help pipeline ordinary buffer moves.
+		 *
+		 * Hang old buffer memory on a new buffer object,
+		 * and leave it to be released when the GPU
+		 * operation has completed.
+		 */
+
+		fence_put(bo->moving);
+		bo->moving = fence_get(fence);
+
+		ret = ttm_buffer_object_transfer(bo, &ghost_obj);
+		if (ret)
+			return ret;
+
+		reservation_object_add_excl_fence(ghost_obj->resv, fence);
+
+		/**
+		 * If we're not moving to fixed memory, the TTM object
+		 * needs to stay alive. Otherwhise hang it on the ghost
+		 * bo to be unbound and destroyed.
+		 */
+
+		if (!(to->flags & TTM_MEMTYPE_FLAG_FIXED))
+			ghost_obj->ttm = NULL;
+		else
+			bo->ttm = NULL;
+
+		ttm_bo_unreserve(ghost_obj);
+		ttm_bo_unref(&ghost_obj);
+
+	} else if (from->flags & TTM_MEMTYPE_FLAG_FIXED) {
+
+		/**
+		 * BO doesn't have a TTM we need to bind/unbind. Just remember
+		 * this eviction and free up the allocation
+		 */
+
+		spin_lock(&from->move_lock);
+		if (!from->move || fence_is_later(fence, from->move)) {
+			fence_put(from->move);
+			from->move = fence_get(fence);
+		}
+		spin_unlock(&from->move_lock);
+
+		ttm_bo_free_old_node(bo);
+
+		fence_put(bo->moving);
+		bo->moving = fence_get(fence);
+
+	} else {
+		/**
+		 * Last resort, wait for the move to be completed.
+		 *
+		 * Should never happen in pratice.
+		 */
+
+		ret = ttm_bo_wait(bo, false, false);
+		if (ret)
+			return ret;
+
+		if (to->flags & TTM_MEMTYPE_FLAG_FIXED) {
+			ttm_tt_destroy(bo->ttm);
+			bo->ttm = NULL;
+		}
+		ttm_bo_free_old_node(bo);
+	}
+
+	*old_mem = *new_mem;
+	new_mem->mm_node = NULL;
+
+	return 0;
+}
+EXPORT_SYMBOL(ttm_bo_pipeline_move);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 3216878bced3..a6ed9d5e5167 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -48,15 +48,14 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 {
 	int ret = 0;
 
-	if (likely(!test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)))
+	if (likely(!bo->moving))
 		goto out_unlock;
 
 	/*
 	 * Quick non-stalling check for idle.
 	 */
-	ret = ttm_bo_wait(bo, false, true);
-	if (likely(ret == 0))
-		goto out_unlock;
+	if (fence_is_signaled(bo->moving))
+		goto out_clear;
 
 	/*
 	 * If possible, avoid waiting for GPU with mmap_sem
@@ -68,17 +67,23 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 			goto out_unlock;
 
 		up_read(&vma->vm_mm->mmap_sem);
-		(void) ttm_bo_wait(bo, true, false);
+		(void) fence_wait(bo->moving, true);
 		goto out_unlock;
 	}
 
 	/*
 	 * Ordinary wait.
 	 */
-	ret = ttm_bo_wait(bo, true, false);
-	if (unlikely(ret != 0))
+	ret = fence_wait(bo->moving, true);
+	if (unlikely(ret != 0)) {
 		ret = (ret != -ERESTARTSYS) ? VM_FAULT_SIGBUS :
 			VM_FAULT_NOPAGE;
+		goto out_unlock;
+	}
+
+out_clear:
+	fence_put(bo->moving);
+	bo->moving = NULL;
 
 out_unlock:
 	return ret;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 077ae9b2865d..526e5a78eea5 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -166,12 +166,10 @@ EXPORT_SYMBOL(ttm_tt_set_placement_caching);
 
 void ttm_tt_destroy(struct ttm_tt *ttm)
 {
-	if (unlikely(ttm == NULL))
+	if (ttm == NULL)
 		return;
 
-	if (ttm->state == tt_bound) {
-		ttm_tt_unbind(ttm);
-	}
+	ttm_tt_unbind(ttm);
 
 	if (ttm->state == tt_unbound)
 		ttm_tt_unpopulate(ttm);
diff --git a/drivers/gpu/drm/udl/Kconfig b/drivers/gpu/drm/udl/Kconfig
index 613ab0622d6e..1616ec4f4d84 100644
--- a/drivers/gpu/drm/udl/Kconfig
+++ b/drivers/gpu/drm/udl/Kconfig
@@ -4,12 +4,7 @@ config DRM_UDL
 	depends on USB_SUPPORT
 	depends on USB_ARCH_HAS_HCD
 	select USB
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	select FB_DEFERRED_IO
 	select DRM_KMS_HELPER
-        select DRM_KMS_FB_HELPER
 	help
 	  This is a KMS driver for the USB displaylink video adapters.
           Say M/Y to add support for these devices via drm/kms interfaces.
diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c
index c20408940cd0..17d34e0edbdd 100644
--- a/drivers/gpu/drm/udl/udl_drv.c
+++ b/drivers/gpu/drm/udl/udl_drv.c
@@ -94,7 +94,6 @@ static void udl_usb_disconnect(struct usb_interface *interface)
 	struct drm_device *dev = usb_get_intfdata(interface);
 
 	drm_kms_helper_poll_disable(dev);
-	drm_connector_unregister_all(dev);
 	udl_fbdev_unplug(dev);
 	udl_drop_usb(dev);
 	drm_unplug_dev(dev);
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 4c0f26a644a3..8fc2b731b59a 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -46,12 +46,17 @@ struct vc4_crtc {
 	const struct vc4_crtc_data *data;
 	void __iomem *regs;
 
+	/* Timestamp at start of vblank irq - unaffected by lock delays. */
+	ktime_t t_vblank;
+
 	/* Which HVS channel we're using for our CRTC. */
 	int channel;
 
 	u8 lut_r[256];
 	u8 lut_g[256];
 	u8 lut_b[256];
+	/* Size in pixels of the COB memory allocated to this CRTC. */
+	u32 cob_size;
 
 	struct drm_pending_vblank_event *event;
 };
@@ -146,6 +151,144 @@ int vc4_crtc_debugfs_regs(struct seq_file *m, void *unused)
 }
 #endif
 
+int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
+			    unsigned int flags, int *vpos, int *hpos,
+			    ktime_t *stime, ktime_t *etime,
+			    const struct drm_display_mode *mode)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id];
+	u32 val;
+	int fifo_lines;
+	int vblank_lines;
+	int ret = 0;
+
+	/*
+	 * XXX Doesn't work well in interlaced mode yet, partially due
+	 * to problems in vc4 kms or drm core interlaced mode handling,
+	 * so disable for now in interlaced mode.
+	 */
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		return ret;
+
+	/* preempt_disable_rt() should go right here in PREEMPT_RT patchset. */
+
+	/* Get optional system timestamp before query. */
+	if (stime)
+		*stime = ktime_get();
+
+	/*
+	 * Read vertical scanline which is currently composed for our
+	 * pixelvalve by the HVS, and also the scaler status.
+	 */
+	val = HVS_READ(SCALER_DISPSTATX(vc4_crtc->channel));
+
+	/* Get optional system timestamp after query. */
+	if (etime)
+		*etime = ktime_get();
+
+	/* preempt_enable_rt() should go right here in PREEMPT_RT patchset. */
+
+	/* Vertical position of hvs composed scanline. */
+	*vpos = VC4_GET_FIELD(val, SCALER_DISPSTATX_LINE);
+
+	/* No hpos info available. */
+	if (hpos)
+		*hpos = 0;
+
+	/* This is the offset we need for translating hvs -> pv scanout pos. */
+	fifo_lines = vc4_crtc->cob_size / mode->crtc_hdisplay;
+
+	if (fifo_lines > 0)
+		ret |= DRM_SCANOUTPOS_VALID;
+
+	/* HVS more than fifo_lines into frame for compositing? */
+	if (*vpos > fifo_lines) {
+		/*
+		 * We are in active scanout and can get some meaningful results
+		 * from HVS. The actual PV scanout can not trail behind more
+		 * than fifo_lines as that is the fifo's capacity. Assume that
+		 * in active scanout the HVS and PV work in lockstep wrt. HVS
+		 * refilling the fifo and PV consuming from the fifo, ie.
+		 * whenever the PV consumes and frees up a scanline in the
+		 * fifo, the HVS will immediately refill it, therefore
+		 * incrementing vpos. Therefore we choose HVS read position -
+		 * fifo size in scanlines as a estimate of the real scanout
+		 * position of the PV.
+		 */
+		*vpos -= fifo_lines + 1;
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+			*vpos /= 2;
+
+		ret |= DRM_SCANOUTPOS_ACCURATE;
+		return ret;
+	}
+
+	/*
+	 * Less: This happens when we are in vblank and the HVS, after getting
+	 * the VSTART restart signal from the PV, just started refilling its
+	 * fifo with new lines from the top-most lines of the new framebuffers.
+	 * The PV does not scan out in vblank, so does not remove lines from
+	 * the fifo, so the fifo will be full quickly and the HVS has to pause.
+	 * We can't get meaningful readings wrt. scanline position of the PV
+	 * and need to make things up in a approximative but consistent way.
+	 */
+	ret |= DRM_SCANOUTPOS_IN_VBLANK;
+	vblank_lines = mode->crtc_vtotal - mode->crtc_vdisplay;
+
+	if (flags & DRM_CALLED_FROM_VBLIRQ) {
+		/*
+		 * Assume the irq handler got called close to first
+		 * line of vblank, so PV has about a full vblank
+		 * scanlines to go, and as a base timestamp use the
+		 * one taken at entry into vblank irq handler, so it
+		 * is not affected by random delays due to lock
+		 * contention on event_lock or vblank_time lock in
+		 * the core.
+		 */
+		*vpos = -vblank_lines;
+
+		if (stime)
+			*stime = vc4_crtc->t_vblank;
+		if (etime)
+			*etime = vc4_crtc->t_vblank;
+
+		/*
+		 * If the HVS fifo is not yet full then we know for certain
+		 * we are at the very beginning of vblank, as the hvs just
+		 * started refilling, and the stime and etime timestamps
+		 * truly correspond to start of vblank.
+		 */
+		if ((val & SCALER_DISPSTATX_FULL) != SCALER_DISPSTATX_FULL)
+			ret |= DRM_SCANOUTPOS_ACCURATE;
+	} else {
+		/*
+		 * No clue where we are inside vblank. Return a vpos of zero,
+		 * which will cause calling code to just return the etime
+		 * timestamp uncorrected. At least this is no worse than the
+		 * standard fallback.
+		 */
+		*vpos = 0;
+	}
+
+	return ret;
+}
+
+int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id,
+				  int *max_error, struct timeval *vblank_time,
+				  unsigned flags)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_crtc *vc4_crtc = vc4->crtc[crtc_id];
+	struct drm_crtc *crtc = &vc4_crtc->base;
+	struct drm_crtc_state *state = crtc->state;
+
+	/* Helper routine in DRM core does all the work: */
+	return drm_calc_vbltimestamp_from_scanoutpos(dev, crtc_id, max_error,
+						     vblank_time, flags,
+						     &state->adjusted_mode);
+}
+
 static void vc4_crtc_destroy(struct drm_crtc *crtc)
 {
 	drm_crtc_cleanup(crtc);
@@ -449,14 +592,6 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc,
 
 	WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size);
 
-	HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
-		  vc4_state->mm.start);
-
-	if (debug_dump_regs) {
-		DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc));
-		vc4_hvs_dump_state(dev);
-	}
-
 	if (crtc->state->event) {
 		unsigned long flags;
 
@@ -466,8 +601,20 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc,
 
 		spin_lock_irqsave(&dev->event_lock, flags);
 		vc4_crtc->event = crtc->state->event;
-		spin_unlock_irqrestore(&dev->event_lock, flags);
 		crtc->state->event = NULL;
+
+		HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
+			  vc4_state->mm.start);
+
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+	} else {
+		HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
+			  vc4_state->mm.start);
+	}
+
+	if (debug_dump_regs) {
+		DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc));
+		vc4_hvs_dump_state(dev);
 	}
 }
 
@@ -493,12 +640,17 @@ static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc)
 {
 	struct drm_crtc *crtc = &vc4_crtc->base;
 	struct drm_device *dev = crtc->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
+	u32 chan = vc4_crtc->channel;
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
-	if (vc4_crtc->event) {
+	if (vc4_crtc->event &&
+	    (vc4_state->mm.start == HVS_READ(SCALER_DISPLACTX(chan)))) {
 		drm_crtc_send_vblank_event(crtc, vc4_crtc->event);
 		vc4_crtc->event = NULL;
+		drm_crtc_vblank_put(crtc);
 	}
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 }
@@ -510,6 +662,7 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data)
 	irqreturn_t ret = IRQ_NONE;
 
 	if (stat & PV_INT_VFP_START) {
+		vc4_crtc->t_vblank = ktime_get();
 		CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
 		drm_crtc_handle_vblank(&vc4_crtc->base);
 		vc4_crtc_handle_page_flip(vc4_crtc);
@@ -549,6 +702,7 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 	}
 
+	drm_crtc_vblank_put(crtc);
 	drm_framebuffer_unreference(flip_state->fb);
 	kfree(flip_state);
 
@@ -591,6 +745,8 @@ static int vc4_async_page_flip(struct drm_crtc *crtc,
 		return ret;
 	}
 
+	WARN_ON(drm_crtc_vblank_get(crtc) != 0);
+
 	/* Immediately update the plane's legacy fb pointer, so that later
 	 * modeset prep sees the state that will be present when the semaphore
 	 * is released.
@@ -711,6 +867,22 @@ static void vc4_set_crtc_possible_masks(struct drm_device *drm,
 	}
 }
 
+static void
+vc4_crtc_get_cob_allocation(struct vc4_crtc *vc4_crtc)
+{
+	struct drm_device *drm = vc4_crtc->base.dev;
+	struct vc4_dev *vc4 = to_vc4_dev(drm);
+	u32 dispbase = HVS_READ(SCALER_DISPBASEX(vc4_crtc->channel));
+	/* Top/base are supposed to be 4-pixel aligned, but the
+	 * Raspberry Pi firmware fills the low bits (which are
+	 * presumably ignored).
+	 */
+	u32 top = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_TOP) & ~3;
+	u32 base = VC4_GET_FIELD(dispbase, SCALER_DISPBASEX_BASE) & ~3;
+
+	vc4_crtc->cob_size = top - base + 4;
+}
+
 static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -787,6 +959,8 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data)
 		crtc->cursor = cursor_plane;
 	}
 
+	vc4_crtc_get_cob_allocation(vc4_crtc);
+
 	CRTC_WRITE(PV_INTEN, 0);
 	CRTC_WRITE(PV_INTSTAT, PV_INT_VFP_START);
 	ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c
index dba1114297e4..275fedbdbd9e 100644
--- a/drivers/gpu/drm/vc4/vc4_dpi.c
+++ b/drivers/gpu/drm/vc4/vc4_dpi.c
@@ -227,14 +227,12 @@ static struct drm_connector *vc4_dpi_connector_init(struct drm_device *dev,
 {
 	struct drm_connector *connector = NULL;
 	struct vc4_dpi_connector *dpi_connector;
-	int ret = 0;
 
 	dpi_connector = devm_kzalloc(dev->dev, sizeof(*dpi_connector),
 				     GFP_KERNEL);
-	if (!dpi_connector) {
-		ret = -ENOMEM;
-		goto fail;
-	}
+	if (!dpi_connector)
+		return ERR_PTR(-ENOMEM);
+
 	connector = &dpi_connector->base;
 
 	dpi_connector->encoder = dpi->encoder;
@@ -251,12 +249,6 @@ static struct drm_connector *vc4_dpi_connector_init(struct drm_device *dev,
 	drm_mode_connector_attach_encoder(connector, dpi->encoder);
 
 	return connector;
-
- fail:
-	if (connector)
-		vc4_dpi_connector_destroy(connector);
-
-	return ERR_PTR(ret);
 }
 
 static const struct drm_encoder_funcs vc4_dpi_encoder_funcs = {
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 9e88231b8906..8b42d31a7f0e 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include "drm_fb_cma_helper.h"
 
 #include "uapi/drm/vc4_drm.h"
@@ -43,12 +44,54 @@ void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index)
 	return map;
 }
 
+static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct drm_vc4_get_param *args = data;
+	int ret;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	switch (args->param) {
+	case DRM_VC4_PARAM_V3D_IDENT0:
+		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
+		if (ret)
+			return ret;
+		args->value = V3D_READ(V3D_IDENT0);
+		pm_runtime_put(&vc4->v3d->pdev->dev);
+		break;
+	case DRM_VC4_PARAM_V3D_IDENT1:
+		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
+		if (ret)
+			return ret;
+		args->value = V3D_READ(V3D_IDENT1);
+		pm_runtime_put(&vc4->v3d->pdev->dev);
+		break;
+	case DRM_VC4_PARAM_V3D_IDENT2:
+		ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
+		if (ret)
+			return ret;
+		args->value = V3D_READ(V3D_IDENT2);
+		pm_runtime_put(&vc4->v3d->pdev->dev);
+		break;
+	case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
+		args->value = true;
+		break;
+	default:
+		DRM_DEBUG("Unknown parameter %d\n", args->param);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static void vc4_lastclose(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
-	if (vc4->fbdev)
-		drm_fbdev_cma_restore_mode(vc4->fbdev);
+	drm_fbdev_cma_restore_mode(vc4->fbdev);
 }
 
 static const struct file_operations vc4_drm_fops = {
@@ -66,14 +109,15 @@ static const struct file_operations vc4_drm_fops = {
 };
 
 static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
-	DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl,
 			  DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF_DRV(VC4_GET_PARAM, vc4_get_param_ioctl, DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver vc4_drm_driver = {
@@ -91,7 +135,9 @@ static struct drm_driver vc4_drm_driver = {
 
 	.enable_vblank = vc4_enable_vblank,
 	.disable_vblank = vc4_disable_vblank,
-	.get_vblank_counter = drm_vblank_count,
+	.get_vblank_counter = drm_vblank_no_hw_counter,
+	.get_scanout_position = vc4_crtc_get_scanoutpos,
+	.get_vblank_timestamp = vc4_crtc_get_vblank_timestamp,
 
 #if defined(CONFIG_DEBUG_FS)
 	.debugfs_init = vc4_debugfs_init,
@@ -195,8 +241,6 @@ static int vc4_drm_bind(struct device *dev)
 	vc4_bo_cache_init(drm);
 
 	drm_mode_config_init(drm);
-	if (ret)
-		goto unref;
 
 	vc4_gem_init(drm);
 
@@ -218,7 +262,6 @@ unbind_all:
 	component_unbind_all(dev, drm);
 gem_destroy:
 	vc4_gem_destroy(drm);
-unref:
 	drm_dev_unref(drm);
 	vc4_bo_cache_destroy(drm);
 	return ret;
@@ -246,8 +289,8 @@ static const struct component_master_ops vc4_drm_ops = {
 static struct platform_driver *const component_drivers[] = {
 	&vc4_hdmi_driver,
 	&vc4_dpi_driver,
-	&vc4_crtc_driver,
 	&vc4_hvs_driver,
+	&vc4_crtc_driver,
 	&vc4_v3d_driver,
 };
 
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index c799baabc008..489e3de0c050 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -355,6 +355,9 @@ struct vc4_validated_shader_info {
 	uint32_t uniforms_src_size;
 	uint32_t num_texture_samples;
 	struct vc4_texture_sample_info *texture_samples;
+
+	uint32_t num_uniform_addr_offsets;
+	uint32_t *uniform_addr_offsets;
 };
 
 /**
@@ -415,6 +418,13 @@ extern struct platform_driver vc4_crtc_driver;
 int vc4_enable_vblank(struct drm_device *dev, unsigned int crtc_id);
 void vc4_disable_vblank(struct drm_device *dev, unsigned int crtc_id);
 int vc4_crtc_debugfs_regs(struct seq_file *m, void *arg);
+int vc4_crtc_get_scanoutpos(struct drm_device *dev, unsigned int crtc_id,
+			    unsigned int flags, int *vpos, int *hpos,
+			    ktime_t *stime, ktime_t *etime,
+			    const struct drm_display_mode *mode);
+int vc4_crtc_get_vblank_timestamp(struct drm_device *dev, unsigned int crtc_id,
+				  int *max_error, struct timeval *vblank_time,
+				  unsigned flags);
 
 /* vc4_debugfs.c */
 int vc4_debugfs_init(struct drm_minor *minor);
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 68df91c3f860..4452f3631cac 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -456,12 +456,6 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 	if (IS_ERR(hdmi->hd_regs))
 		return PTR_ERR(hdmi->hd_regs);
 
-	ddc_node = of_parse_phandle(dev->of_node, "ddc", 0);
-	if (!ddc_node) {
-		DRM_ERROR("Failed to find ddc node in device tree\n");
-		return -ENODEV;
-	}
-
 	hdmi->pixel_clock = devm_clk_get(dev, "pixel");
 	if (IS_ERR(hdmi->pixel_clock)) {
 		DRM_ERROR("Failed to get pixel clock\n");
@@ -473,7 +467,14 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 		return PTR_ERR(hdmi->hsm_clock);
 	}
 
+	ddc_node = of_parse_phandle(dev->of_node, "ddc", 0);
+	if (!ddc_node) {
+		DRM_ERROR("Failed to find ddc node in device tree\n");
+		return -ENODEV;
+	}
+
 	hdmi->ddc = of_find_i2c_adapter_by_node(ddc_node);
+	of_node_put(ddc_node);
 	if (!hdmi->ddc) {
 		DRM_DEBUG("Failed to get ddc i2c adapter by node\n");
 		return -EPROBE_DEFER;
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index 8f4d5ffc32be..4ac894d993cd 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -26,8 +26,7 @@ static void vc4_output_poll_changed(struct drm_device *dev)
 {
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
-	if (vc4->fbdev)
-		drm_fbdev_cma_hotplug_event(vc4->fbdev);
+	drm_fbdev_cma_hotplug_event(vc4->fbdev);
 }
 
 struct vc4_commit {
@@ -119,10 +118,18 @@ static int vc4_atomic_commit(struct drm_device *dev,
 		return -ENOMEM;
 
 	/* Make sure that any outstanding modesets have finished. */
-	ret = down_interruptible(&vc4->async_modeset);
-	if (ret) {
-		kfree(c);
-		return ret;
+	if (nonblock) {
+		ret = down_trylock(&vc4->async_modeset);
+		if (ret) {
+			kfree(c);
+			return -EBUSY;
+		}
+	} else {
+		ret = down_interruptible(&vc4->async_modeset);
+		if (ret) {
+			kfree(c);
+			return ret;
+		}
 	}
 
 	ret = drm_atomic_helper_prepare_planes(dev, state);
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 5d2c3d9fd17a..29e4b400e25e 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -94,6 +94,14 @@ static const struct hvs_format {
 		.pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true,
 	},
 	{
+		.drm = DRM_FORMAT_ABGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
+		.pixel_order = HVS_PIXEL_ORDER_ARGB, .has_alpha = true,
+	},
+	{
+		.drm = DRM_FORMAT_XBGR8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
+		.pixel_order = HVS_PIXEL_ORDER_ARGB, .has_alpha = false,
+	},
+	{
 		.drm = DRM_FORMAT_RGB565, .hvs = HVS_PIXEL_FORMAT_RGB565,
 		.pixel_order = HVS_PIXEL_ORDER_XRGB, .has_alpha = false,
 	},
diff --git a/drivers/gpu/drm/vc4/vc4_qpu_defines.h b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
index d5c2f3c85ebb..f4e795a0d3f6 100644
--- a/drivers/gpu/drm/vc4/vc4_qpu_defines.h
+++ b/drivers/gpu/drm/vc4/vc4_qpu_defines.h
@@ -70,7 +70,7 @@ enum qpu_raddr {
 	QPU_R_ELEM_QPU = 38,
 	QPU_R_NOP,
 	QPU_R_XY_PIXEL_COORD = 41,
-	QPU_R_MS_REV_FLAGS = 41,
+	QPU_R_MS_REV_FLAGS = 42,
 	QPU_R_VPM = 48,
 	QPU_R_VPM_LD_BUSY,
 	QPU_R_VPM_LD_WAIT,
@@ -230,6 +230,15 @@ enum qpu_unpack_r4 {
 #define QPU_COND_MUL_SHIFT              46
 #define QPU_COND_MUL_MASK               QPU_MASK(48, 46)
 
+#define QPU_BRANCH_COND_SHIFT           52
+#define QPU_BRANCH_COND_MASK            QPU_MASK(55, 52)
+
+#define QPU_BRANCH_REL                  ((uint64_t)1 << 51)
+#define QPU_BRANCH_REG                  ((uint64_t)1 << 50)
+
+#define QPU_BRANCH_RADDR_A_SHIFT        45
+#define QPU_BRANCH_RADDR_A_MASK         QPU_MASK(49, 45)
+
 #define QPU_SF                          ((uint64_t)1 << 45)
 
 #define QPU_WADDR_ADD_SHIFT             38
@@ -261,4 +270,10 @@ enum qpu_unpack_r4 {
 #define QPU_OP_ADD_SHIFT                24
 #define QPU_OP_ADD_MASK                 QPU_MASK(28, 24)
 
+#define QPU_LOAD_IMM_SHIFT              0
+#define QPU_LOAD_IMM_MASK               QPU_MASK(31, 0)
+
+#define QPU_BRANCH_TARGET_SHIFT         0
+#define QPU_BRANCH_TARGET_MASK          QPU_MASK(31, 0)
+
 #endif /* VC4_QPU_DEFINES_H */
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 6163b95c5411..160942a9180e 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -341,6 +341,10 @@
 #define SCALER_DISPLACT0                        0x00000030
 #define SCALER_DISPLACT1                        0x00000034
 #define SCALER_DISPLACT2                        0x00000038
+#define SCALER_DISPLACTX(x)			(SCALER_DISPLACT0 +	\
+						 (x) * (SCALER_DISPLACT1 - \
+							SCALER_DISPLACT0))
+
 #define SCALER_DISPCTRL0                        0x00000040
 # define SCALER_DISPCTRLX_ENABLE		BIT(31)
 # define SCALER_DISPCTRLX_RESET			BIT(30)
@@ -362,7 +366,6 @@
 # define SCALER_DISPBKGND_FILL			BIT(24)
 
 #define SCALER_DISPSTAT0                        0x00000048
-#define SCALER_DISPBASE0                        0x0000004c
 # define SCALER_DISPSTATX_MODE_MASK		VC4_MASK(31, 30)
 # define SCALER_DISPSTATX_MODE_SHIFT		30
 # define SCALER_DISPSTATX_MODE_DISABLED		0
@@ -371,6 +374,24 @@
 # define SCALER_DISPSTATX_MODE_EOF		3
 # define SCALER_DISPSTATX_FULL			BIT(29)
 # define SCALER_DISPSTATX_EMPTY			BIT(28)
+# define SCALER_DISPSTATX_FRAME_COUNT_MASK	VC4_MASK(17, 12)
+# define SCALER_DISPSTATX_FRAME_COUNT_SHIFT	12
+# define SCALER_DISPSTATX_LINE_MASK		VC4_MASK(11, 0)
+# define SCALER_DISPSTATX_LINE_SHIFT		0
+
+#define SCALER_DISPBASE0                        0x0000004c
+/* Last pixel in the COB (display FIFO memory) allocated to this HVS
+ * channel.  Must be 4-pixel aligned (and thus 4 pixels less than the
+ * next COB base).
+ */
+# define SCALER_DISPBASEX_TOP_MASK		VC4_MASK(31, 16)
+# define SCALER_DISPBASEX_TOP_SHIFT		16
+/* First pixel in the COB (display FIFO memory) allocated to this HVS
+ * channel.  Must be 4-pixel aligned.
+ */
+# define SCALER_DISPBASEX_BASE_MASK		VC4_MASK(15, 0)
+# define SCALER_DISPBASEX_BASE_SHIFT		0
+
 #define SCALER_DISPCTRL1                        0x00000050
 #define SCALER_DISPBKGND1                       0x00000054
 #define SCALER_DISPBKGNDX(x)			(SCALER_DISPBKGND0 +        \
@@ -381,6 +402,9 @@
 						 (x) * (SCALER_DISPSTAT1 - \
 							SCALER_DISPSTAT0))
 #define SCALER_DISPBASE1                        0x0000005c
+#define SCALER_DISPBASEX(x)			(SCALER_DISPBASE0 +        \
+						 (x) * (SCALER_DISPBASE1 - \
+							SCALER_DISPBASE0))
 #define SCALER_DISPCTRL2                        0x00000060
 #define SCALER_DISPCTRLX(x)			(SCALER_DISPCTRL0 +        \
 						 (x) * (SCALER_DISPCTRL1 - \
diff --git a/drivers/gpu/drm/vc4/vc4_validate.c b/drivers/gpu/drm/vc4/vc4_validate.c
index 24c2c746e8f3..9ce1d0adf882 100644
--- a/drivers/gpu/drm/vc4/vc4_validate.c
+++ b/drivers/gpu/drm/vc4/vc4_validate.c
@@ -802,7 +802,7 @@ validate_gl_shader_rec(struct drm_device *dev,
 		uint32_t src_offset = *(uint32_t *)(pkt_u + o);
 		uint32_t *texture_handles_u;
 		void *uniform_data_u;
-		uint32_t tex;
+		uint32_t tex, uni;
 
 		*(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;
 
@@ -840,6 +840,17 @@ validate_gl_shader_rec(struct drm_device *dev,
 			}
 		}
 
+		/* Fill in the uniform slots that need this shader's
+		 * start-of-uniforms address (used for resetting the uniform
+		 * stream in the presence of control flow).
+		 */
+		for (uni = 0;
+		     uni < validated_shader->num_uniform_addr_offsets;
+		     uni++) {
+			uint32_t o = validated_shader->uniform_addr_offsets[uni];
+			((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;
+		}
+
 		*(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;
 
 		exec->uniforms_u += validated_shader->uniforms_src_size;
diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
index f67124b4c534..46527e989ce3 100644
--- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c
+++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c
@@ -39,7 +39,17 @@
 #include "vc4_drv.h"
 #include "vc4_qpu_defines.h"
 
+#define LIVE_REG_COUNT (32 + 32 + 4)
+
 struct vc4_shader_validation_state {
+	/* Current IP being validated. */
+	uint32_t ip;
+
+	/* IP at the end of the BO, do not read shader[max_ip] */
+	uint32_t max_ip;
+
+	uint64_t *shader;
+
 	struct vc4_texture_sample_info tmu_setup[2];
 	int tmu_write_count[2];
 
@@ -49,8 +59,30 @@ struct vc4_shader_validation_state {
 	 *
 	 * This is used for the validation of direct address memory reads.
 	 */
-	uint32_t live_min_clamp_offsets[32 + 32 + 4];
-	bool live_max_clamp_regs[32 + 32 + 4];
+	uint32_t live_min_clamp_offsets[LIVE_REG_COUNT];
+	bool live_max_clamp_regs[LIVE_REG_COUNT];
+	uint32_t live_immediates[LIVE_REG_COUNT];
+
+	/* Bitfield of which IPs are used as branch targets.
+	 *
+	 * Used for validation that the uniform stream is updated at the right
+	 * points and clearing the texturing/clamping state.
+	 */
+	unsigned long *branch_targets;
+
+	/* Set when entering a basic block, and cleared when the uniform
+	 * address update is found.  This is used to make sure that we don't
+	 * read uniforms when the address is undefined.
+	 */
+	bool needs_uniform_address_update;
+
+	/* Set when we find a backwards branch.  If the branch is backwards,
+	 * the taraget is probably doing an address reset to read uniforms,
+	 * and so we need to be sure that a uniforms address is present in the
+	 * stream, even if the shader didn't need to read uniforms in later
+	 * basic blocks.
+	 */
+	bool needs_uniform_address_for_loop;
 };
 
 static uint32_t
@@ -129,11 +161,11 @@ record_texture_sample(struct vc4_validated_shader_info *validated_shader,
 }
 
 static bool
-check_tmu_write(uint64_t inst,
-		struct vc4_validated_shader_info *validated_shader,
+check_tmu_write(struct vc4_validated_shader_info *validated_shader,
 		struct vc4_shader_validation_state *validation_state,
 		bool is_mul)
 {
+	uint64_t inst = validation_state->shader[validation_state->ip];
 	uint32_t waddr = (is_mul ?
 			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
 			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
@@ -162,7 +194,7 @@ check_tmu_write(uint64_t inst,
 			return false;
 		}
 
-		/* We assert that the the clamped address is the first
+		/* We assert that the clamped address is the first
 		 * argument, and the UBO base address is the second argument.
 		 * This is arbitrary, but simpler than supporting flipping the
 		 * two either way.
@@ -212,8 +244,14 @@ check_tmu_write(uint64_t inst,
 	/* Since direct uses a RADDR uniform reference, it will get counted in
 	 * check_instruction_reads()
 	 */
-	if (!is_direct)
+	if (!is_direct) {
+		if (validation_state->needs_uniform_address_update) {
+			DRM_ERROR("Texturing with undefined uniform address\n");
+			return false;
+		}
+
 		validated_shader->uniforms_size += 4;
+	}
 
 	if (submit) {
 		if (!record_texture_sample(validated_shader,
@@ -227,23 +265,138 @@ check_tmu_write(uint64_t inst,
 	return true;
 }
 
+static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader)
+{
+	uint32_t o = validated_shader->num_uniform_addr_offsets;
+	uint32_t num_uniforms = validated_shader->uniforms_size / 4;
+
+	validated_shader->uniform_addr_offsets =
+		krealloc(validated_shader->uniform_addr_offsets,
+			 (o + 1) *
+			 sizeof(*validated_shader->uniform_addr_offsets),
+			 GFP_KERNEL);
+	if (!validated_shader->uniform_addr_offsets)
+		return false;
+
+	validated_shader->uniform_addr_offsets[o] = num_uniforms;
+	validated_shader->num_uniform_addr_offsets++;
+
+	return true;
+}
+
 static bool
-check_reg_write(uint64_t inst,
-		struct vc4_validated_shader_info *validated_shader,
+validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader,
+			       struct vc4_shader_validation_state *validation_state,
+			       bool is_mul)
+{
+	uint64_t inst = validation_state->shader[validation_state->ip];
+	u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
+	u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+	u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+	u32 add_lri = raddr_add_a_to_live_reg_index(inst);
+	/* We want our reset to be pointing at whatever uniform follows the
+	 * uniforms base address.
+	 */
+	u32 expected_offset = validated_shader->uniforms_size + 4;
+
+	/* We only support absolute uniform address changes, and we
+	 * require that they be in the current basic block before any
+	 * of its uniform reads.
+	 *
+	 * One could potentially emit more efficient QPU code, by
+	 * noticing that (say) an if statement does uniform control
+	 * flow for all threads and that the if reads the same number
+	 * of uniforms on each side.  However, this scheme is easy to
+	 * validate so it's all we allow for now.
+	 */
+
+	if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_NONE) {
+		DRM_ERROR("uniforms address change must be "
+			  "normal math\n");
+		return false;
+	}
+
+	if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
+		DRM_ERROR("Uniform address reset must be an ADD.\n");
+		return false;
+	}
+
+	if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) {
+		DRM_ERROR("Uniform address reset must be unconditional.\n");
+		return false;
+	}
+
+	if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP &&
+	    !(inst & QPU_PM)) {
+		DRM_ERROR("No packing allowed on uniforms reset\n");
+		return false;
+	}
+
+	if (add_lri == -1) {
+		DRM_ERROR("First argument of uniform address write must be "
+			  "an immediate value.\n");
+		return false;
+	}
+
+	if (validation_state->live_immediates[add_lri] != expected_offset) {
+		DRM_ERROR("Resetting uniforms with offset %db instead of %db\n",
+			  validation_state->live_immediates[add_lri],
+			  expected_offset);
+		return false;
+	}
+
+	if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
+	    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
+		DRM_ERROR("Second argument of uniform address write must be "
+			  "a uniform.\n");
+		return false;
+	}
+
+	validation_state->needs_uniform_address_update = false;
+	validation_state->needs_uniform_address_for_loop = false;
+	return require_uniform_address_uniform(validated_shader);
+}
+
+static bool
+check_reg_write(struct vc4_validated_shader_info *validated_shader,
 		struct vc4_shader_validation_state *validation_state,
 		bool is_mul)
 {
+	uint64_t inst = validation_state->shader[validation_state->ip];
 	uint32_t waddr = (is_mul ?
 			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
 			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
+	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+	bool ws = inst & QPU_WS;
+	bool is_b = is_mul ^ ws;
+	u32 lri = waddr_to_live_reg_index(waddr, is_b);
+
+	if (lri != -1) {
+		uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
+		uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
+
+		if (sig == QPU_SIG_LOAD_IMM &&
+		    QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP &&
+		    ((is_mul && cond_mul == QPU_COND_ALWAYS) ||
+		     (!is_mul && cond_add == QPU_COND_ALWAYS))) {
+			validation_state->live_immediates[lri] =
+				QPU_GET_FIELD(inst, QPU_LOAD_IMM);
+		} else {
+			validation_state->live_immediates[lri] = ~0;
+		}
+	}
 
 	switch (waddr) {
 	case QPU_W_UNIFORMS_ADDRESS:
-		/* XXX: We'll probably need to support this for reladdr, but
-		 * it's definitely a security-related one.
-		 */
-		DRM_ERROR("uniforms address load unsupported\n");
-		return false;
+		if (is_b) {
+			DRM_ERROR("relative uniforms address change "
+				  "unsupported\n");
+			return false;
+		}
+
+		return validate_uniform_address_write(validated_shader,
+						      validation_state,
+						      is_mul);
 
 	case QPU_W_TLB_COLOR_MS:
 	case QPU_W_TLB_COLOR_ALL:
@@ -261,7 +414,7 @@ check_reg_write(uint64_t inst,
 	case QPU_W_TMU1_T:
 	case QPU_W_TMU1_R:
 	case QPU_W_TMU1_B:
-		return check_tmu_write(inst, validated_shader, validation_state,
+		return check_tmu_write(validated_shader, validation_state,
 				       is_mul);
 
 	case QPU_W_HOST_INT:
@@ -294,10 +447,10 @@ check_reg_write(uint64_t inst,
 }
 
 static void
-track_live_clamps(uint64_t inst,
-		  struct vc4_validated_shader_info *validated_shader,
+track_live_clamps(struct vc4_validated_shader_info *validated_shader,
 		  struct vc4_shader_validation_state *validation_state)
 {
+	uint64_t inst = validation_state->shader[validation_state->ip];
 	uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
 	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
 	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
@@ -369,10 +522,10 @@ track_live_clamps(uint64_t inst,
 }
 
 static bool
-check_instruction_writes(uint64_t inst,
-			 struct vc4_validated_shader_info *validated_shader,
+check_instruction_writes(struct vc4_validated_shader_info *validated_shader,
 			 struct vc4_shader_validation_state *validation_state)
 {
+	uint64_t inst = validation_state->shader[validation_state->ip];
 	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
 	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
 	bool ok;
@@ -382,20 +535,44 @@ check_instruction_writes(uint64_t inst,
 		return false;
 	}
 
-	ok = (check_reg_write(inst, validated_shader, validation_state,
-			      false) &&
-	      check_reg_write(inst, validated_shader, validation_state,
-			      true));
+	ok = (check_reg_write(validated_shader, validation_state, false) &&
+	      check_reg_write(validated_shader, validation_state, true));
 
-	track_live_clamps(inst, validated_shader, validation_state);
+	track_live_clamps(validated_shader, validation_state);
 
 	return ok;
 }
 
 static bool
-check_instruction_reads(uint64_t inst,
-			struct vc4_validated_shader_info *validated_shader)
+check_branch(uint64_t inst,
+	     struct vc4_validated_shader_info *validated_shader,
+	     struct vc4_shader_validation_state *validation_state,
+	     int ip)
+{
+	int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
+	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+
+	if ((int)branch_imm < 0)
+		validation_state->needs_uniform_address_for_loop = true;
+
+	/* We don't want to have to worry about validation of this, and
+	 * there's no need for it.
+	 */
+	if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) {
+		DRM_ERROR("branch instruction at %d wrote a register.\n",
+			  validation_state->ip);
+		return false;
+	}
+
+	return true;
+}
+
+static bool
+check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
+			struct vc4_shader_validation_state *validation_state)
 {
+	uint64_t inst = validation_state->shader[validation_state->ip];
 	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
 	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
 	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
@@ -407,40 +584,204 @@ check_instruction_reads(uint64_t inst,
 		 * already be OOM.
 		 */
 		validated_shader->uniforms_size += 4;
+
+		if (validation_state->needs_uniform_address_update) {
+			DRM_ERROR("Uniform read with undefined uniform "
+				  "address\n");
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/* Make sure that all branches are absolute and point within the shader, and
+ * note their targets for later.
+ */
+static bool
+vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
+{
+	uint32_t max_branch_target = 0;
+	bool found_shader_end = false;
+	int ip;
+	int shader_end_ip = 0;
+	int last_branch = -2;
+
+	for (ip = 0; ip < validation_state->max_ip; ip++) {
+		uint64_t inst = validation_state->shader[ip];
+		int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
+		uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+		uint32_t after_delay_ip = ip + 4;
+		uint32_t branch_target_ip;
+
+		if (sig == QPU_SIG_PROG_END) {
+			shader_end_ip = ip;
+			found_shader_end = true;
+			continue;
+		}
+
+		if (sig != QPU_SIG_BRANCH)
+			continue;
+
+		if (ip - last_branch < 4) {
+			DRM_ERROR("Branch at %d during delay slots\n", ip);
+			return false;
+		}
+		last_branch = ip;
+
+		if (inst & QPU_BRANCH_REG) {
+			DRM_ERROR("branching from register relative "
+				  "not supported\n");
+			return false;
+		}
+
+		if (!(inst & QPU_BRANCH_REL)) {
+			DRM_ERROR("relative branching required\n");
+			return false;
+		}
+
+		/* The actual branch target is the instruction after the delay
+		 * slots, plus whatever byte offset is in the low 32 bits of
+		 * the instruction.  Make sure we're not branching beyond the
+		 * end of the shader object.
+		 */
+		if (branch_imm % sizeof(inst) != 0) {
+			DRM_ERROR("branch target not aligned\n");
+			return false;
+		}
+
+		branch_target_ip = after_delay_ip + (branch_imm >> 3);
+		if (branch_target_ip >= validation_state->max_ip) {
+			DRM_ERROR("Branch at %d outside of shader (ip %d/%d)\n",
+				  ip, branch_target_ip,
+				  validation_state->max_ip);
+			return false;
+		}
+		set_bit(branch_target_ip, validation_state->branch_targets);
+
+		/* Make sure that the non-branching path is also not outside
+		 * the shader.
+		 */
+		if (after_delay_ip >= validation_state->max_ip) {
+			DRM_ERROR("Branch at %d continues past shader end "
+				  "(%d/%d)\n",
+				  ip, after_delay_ip, validation_state->max_ip);
+			return false;
+		}
+		set_bit(after_delay_ip, validation_state->branch_targets);
+		max_branch_target = max(max_branch_target, after_delay_ip);
+
+		/* There are two delay slots after program end is signaled
+		 * that are still executed, then we're finished.
+		 */
+		if (found_shader_end && ip == shader_end_ip + 2)
+			break;
+	}
+
+	if (max_branch_target > shader_end_ip) {
+		DRM_ERROR("Branch landed after QPU_SIG_PROG_END");
+		return false;
 	}
 
 	return true;
 }
 
+/* Resets any known state for the shader, used when we may be branched to from
+ * multiple locations in the program (or at shader start).
+ */
+static void
+reset_validation_state(struct vc4_shader_validation_state *validation_state)
+{
+	int i;
+
+	for (i = 0; i < 8; i++)
+		validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0;
+
+	for (i = 0; i < LIVE_REG_COUNT; i++) {
+		validation_state->live_min_clamp_offsets[i] = ~0;
+		validation_state->live_max_clamp_regs[i] = false;
+		validation_state->live_immediates[i] = ~0;
+	}
+}
+
+static bool
+texturing_in_progress(struct vc4_shader_validation_state *validation_state)
+{
+	return (validation_state->tmu_write_count[0] != 0 ||
+		validation_state->tmu_write_count[1] != 0);
+}
+
+static bool
+vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state)
+{
+	uint32_t ip = validation_state->ip;
+
+	if (!test_bit(ip, validation_state->branch_targets))
+		return true;
+
+	if (texturing_in_progress(validation_state)) {
+		DRM_ERROR("Branch target landed during TMU setup\n");
+		return false;
+	}
+
+	/* Reset our live values tracking, since this instruction may have
+	 * multiple predecessors.
+	 *
+	 * One could potentially do analysis to determine that, for
+	 * example, all predecessors have a live max clamp in the same
+	 * register, but we don't bother with that.
+	 */
+	reset_validation_state(validation_state);
+
+	/* Since we've entered a basic block from potentially multiple
+	 * predecessors, we need the uniforms address to be updated before any
+	 * unforms are read.  We require that after any branch point, the next
+	 * uniform to be loaded is a uniform address offset.  That uniform's
+	 * offset will be marked by the uniform address register write
+	 * validation, or a one-off the end-of-program check.
+	 */
+	validation_state->needs_uniform_address_update = true;
+
+	return true;
+}
+
 struct vc4_validated_shader_info *
 vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 {
 	bool found_shader_end = false;
 	int shader_end_ip = 0;
-	uint32_t ip, max_ip;
-	uint64_t *shader;
-	struct vc4_validated_shader_info *validated_shader;
+	uint32_t ip;
+	struct vc4_validated_shader_info *validated_shader = NULL;
 	struct vc4_shader_validation_state validation_state;
-	int i;
 
 	memset(&validation_state, 0, sizeof(validation_state));
+	validation_state.shader = shader_obj->vaddr;
+	validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
 
-	for (i = 0; i < 8; i++)
-		validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0;
-	for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++)
-		validation_state.live_min_clamp_offsets[i] = ~0;
+	reset_validation_state(&validation_state);
 
-	shader = shader_obj->vaddr;
-	max_ip = shader_obj->base.size / sizeof(uint64_t);
+	validation_state.branch_targets =
+		kcalloc(BITS_TO_LONGS(validation_state.max_ip),
+			sizeof(unsigned long), GFP_KERNEL);
+	if (!validation_state.branch_targets)
+		goto fail;
 
 	validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
 	if (!validated_shader)
-		return NULL;
+		goto fail;
+
+	if (!vc4_validate_branches(&validation_state))
+		goto fail;
 
-	for (ip = 0; ip < max_ip; ip++) {
-		uint64_t inst = shader[ip];
+	for (ip = 0; ip < validation_state.max_ip; ip++) {
+		uint64_t inst = validation_state.shader[ip];
 		uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
 
+		validation_state.ip = ip;
+
+		if (!vc4_handle_branch_target(&validation_state))
+			goto fail;
+
 		switch (sig) {
 		case QPU_SIG_NONE:
 		case QPU_SIG_WAIT_FOR_SCOREBOARD:
@@ -450,13 +791,14 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 		case QPU_SIG_LOAD_TMU1:
 		case QPU_SIG_PROG_END:
 		case QPU_SIG_SMALL_IMM:
-			if (!check_instruction_writes(inst, validated_shader,
+			if (!check_instruction_writes(validated_shader,
 						      &validation_state)) {
 				DRM_ERROR("Bad write at ip %d\n", ip);
 				goto fail;
 			}
 
-			if (!check_instruction_reads(inst, validated_shader))
+			if (!check_instruction_reads(validated_shader,
+						     &validation_state))
 				goto fail;
 
 			if (sig == QPU_SIG_PROG_END) {
@@ -467,13 +809,18 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 			break;
 
 		case QPU_SIG_LOAD_IMM:
-			if (!check_instruction_writes(inst, validated_shader,
+			if (!check_instruction_writes(validated_shader,
 						      &validation_state)) {
 				DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip);
 				goto fail;
 			}
 			break;
 
+		case QPU_SIG_BRANCH:
+			if (!check_branch(inst, validated_shader,
+					  &validation_state, ip))
+				goto fail;
+			break;
 		default:
 			DRM_ERROR("Unsupported QPU signal %d at "
 				  "instruction %d\n", sig, ip);
@@ -487,13 +834,28 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 			break;
 	}
 
-	if (ip == max_ip) {
+	if (ip == validation_state.max_ip) {
 		DRM_ERROR("shader failed to terminate before "
 			  "shader BO end at %zd\n",
 			  shader_obj->base.size);
 		goto fail;
 	}
 
+	/* If we did a backwards branch and we haven't emitted a uniforms
+	 * reset since then, we still need the uniforms stream to have the
+	 * uniforms address available so that the backwards branch can do its
+	 * uniforms reset.
+	 *
+	 * We could potentially prove that the backwards branch doesn't
+	 * contain any uses of uniforms until program exit, but that doesn't
+	 * seem to be worth the trouble.
+	 */
+	if (validation_state.needs_uniform_address_for_loop) {
+		if (!require_uniform_address_uniform(validated_shader))
+			goto fail;
+		validated_shader->uniforms_size += 4;
+	}
+
 	/* Again, no chance of integer overflow here because the worst case
 	 * scenario is 8 bytes of uniforms plus handles per 8-byte
 	 * instruction.
@@ -502,9 +864,12 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
 		(validated_shader->uniforms_size +
 		 4 * validated_shader->num_texture_samples);
 
+	kfree(validation_state.branch_targets);
+
 	return validated_shader;
 
 fail:
+	kfree(validation_state.branch_targets);
 	if (validated_shader) {
 		kfree(validated_shader->texture_samples);
 		kfree(validated_shader);
diff --git a/drivers/gpu/drm/vgem/Makefile b/drivers/gpu/drm/vgem/Makefile
index 3f4c7b842028..bfcdea1330e6 100644
--- a/drivers/gpu/drm/vgem/Makefile
+++ b/drivers/gpu/drm/vgem/Makefile
@@ -1,4 +1,4 @@
 ccflags-y := -Iinclude/drm
-vgem-y := vgem_drv.o
+vgem-y := vgem_drv.o vgem_fence.o
 
 obj-$(CONFIG_DRM_VGEM)	+= vgem.o
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 35ea5d02a827..c15bafb06665 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -42,81 +42,38 @@
 #define DRIVER_MAJOR	1
 #define DRIVER_MINOR	0
 
-void vgem_gem_put_pages(struct drm_vgem_gem_object *obj)
-{
-	drm_gem_put_pages(&obj->base, obj->pages, false, false);
-	obj->pages = NULL;
-}
-
 static void vgem_gem_free_object(struct drm_gem_object *obj)
 {
 	struct drm_vgem_gem_object *vgem_obj = to_vgem_bo(obj);
 
-	drm_gem_free_mmap_offset(obj);
-
-	if (vgem_obj->use_dma_buf && obj->dma_buf) {
-		dma_buf_put(obj->dma_buf);
-		obj->dma_buf = NULL;
-	}
-
 	drm_gem_object_release(obj);
-
-	if (vgem_obj->pages)
-		vgem_gem_put_pages(vgem_obj);
-
-	vgem_obj->pages = NULL;
-
 	kfree(vgem_obj);
 }
 
-int vgem_gem_get_pages(struct drm_vgem_gem_object *obj)
-{
-	struct page **pages;
-
-	if (obj->pages || obj->use_dma_buf)
-		return 0;
-
-	pages = drm_gem_get_pages(&obj->base);
-	if (IS_ERR(pages)) {
-		return PTR_ERR(pages);
-	}
-
-	obj->pages = pages;
-
-	return 0;
-}
-
 static int vgem_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
 	struct drm_vgem_gem_object *obj = vma->vm_private_data;
-	loff_t num_pages;
-	pgoff_t page_offset;
-	int ret;
-
 	/* We don't use vmf->pgoff since that has the fake offset */
-	page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
-		PAGE_SHIFT;
-
-	num_pages = DIV_ROUND_UP(obj->base.size, PAGE_SIZE);
-
-	if (page_offset > num_pages)
-		return VM_FAULT_SIGBUS;
-
-	ret = vm_insert_page(vma, (unsigned long)vmf->virtual_address,
-			     obj->pages[page_offset]);
-	switch (ret) {
-	case 0:
-		return VM_FAULT_NOPAGE;
-	case -ENOMEM:
-		return VM_FAULT_OOM;
-	case -EBUSY:
-		return VM_FAULT_RETRY;
-	case -EFAULT:
-	case -EINVAL:
-		return VM_FAULT_SIGBUS;
-	default:
-		WARN_ON(1);
-		return VM_FAULT_SIGBUS;
+	unsigned long vaddr = (unsigned long)vmf->virtual_address;
+	struct page *page;
+
+	page = shmem_read_mapping_page(file_inode(obj->base.filp)->i_mapping,
+				       (vaddr - vma->vm_start) >> PAGE_SHIFT);
+	if (!IS_ERR(page)) {
+		vmf->page = page;
+		return 0;
+	} else switch (PTR_ERR(page)) {
+		case -ENOSPC:
+		case -ENOMEM:
+			return VM_FAULT_OOM;
+		case -EBUSY:
+			return VM_FAULT_RETRY;
+		case -EFAULT:
+		case -EINVAL:
+			return VM_FAULT_SIGBUS;
+		default:
+			WARN_ON_ONCE(PTR_ERR(page));
+			return VM_FAULT_SIGBUS;
 	}
 }
 
@@ -126,6 +83,34 @@ static const struct vm_operations_struct vgem_gem_vm_ops = {
 	.close = drm_gem_vm_close,
 };
 
+static int vgem_open(struct drm_device *dev, struct drm_file *file)
+{
+	struct vgem_file *vfile;
+	int ret;
+
+	vfile = kzalloc(sizeof(*vfile), GFP_KERNEL);
+	if (!vfile)
+		return -ENOMEM;
+
+	file->driver_priv = vfile;
+
+	ret = vgem_fence_open(vfile);
+	if (ret) {
+		kfree(vfile);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void vgem_preclose(struct drm_device *dev, struct drm_file *file)
+{
+	struct vgem_file *vfile = file->driver_priv;
+
+	vgem_fence_close(vfile);
+	kfree(vfile);
+}
+
 /* ioctls */
 
 static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
@@ -134,57 +119,43 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
 					      unsigned long size)
 {
 	struct drm_vgem_gem_object *obj;
-	struct drm_gem_object *gem_object;
-	int err;
-
-	size = roundup(size, PAGE_SIZE);
+	int ret;
 
 	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
 	if (!obj)
 		return ERR_PTR(-ENOMEM);
 
-	gem_object = &obj->base;
-
-	err = drm_gem_object_init(dev, gem_object, size);
-	if (err)
-		goto out;
-
-	err = vgem_gem_get_pages(obj);
-	if (err)
-		goto out;
-
-	err = drm_gem_handle_create(file, gem_object, handle);
-	if (err)
-		goto handle_out;
+	ret = drm_gem_object_init(dev, &obj->base, roundup(size, PAGE_SIZE));
+	if (ret)
+		goto err_free;
 
-	drm_gem_object_unreference_unlocked(gem_object);
+	ret = drm_gem_handle_create(file, &obj->base, handle);
+	drm_gem_object_unreference_unlocked(&obj->base);
+	if (ret)
+		goto err;
 
-	return gem_object;
+	return &obj->base;
 
-handle_out:
-	drm_gem_object_release(gem_object);
-out:
+err_free:
 	kfree(obj);
-	return ERR_PTR(err);
+err:
+	return ERR_PTR(ret);
 }
 
 static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
 				struct drm_mode_create_dumb *args)
 {
 	struct drm_gem_object *gem_object;
-	uint64_t size;
-	uint64_t pitch = args->width * DIV_ROUND_UP(args->bpp, 8);
+	u64 pitch, size;
 
+	pitch = args->width * DIV_ROUND_UP(args->bpp, 8);
 	size = args->height * pitch;
 	if (size == 0)
 		return -EINVAL;
 
 	gem_object = vgem_gem_create(dev, file, &args->handle, size);
-
-	if (IS_ERR(gem_object)) {
-		DRM_DEBUG_DRIVER("object creation failed\n");
+	if (IS_ERR(gem_object))
 		return PTR_ERR(gem_object);
-	}
 
 	args->size = gem_object->size;
 	args->pitch = pitch;
@@ -194,26 +165,26 @@ static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
 	return 0;
 }
 
-int vgem_gem_dumb_map(struct drm_file *file, struct drm_device *dev,
-		      uint32_t handle, uint64_t *offset)
+static int vgem_gem_dumb_map(struct drm_file *file, struct drm_device *dev,
+			     uint32_t handle, uint64_t *offset)
 {
-	int ret = 0;
 	struct drm_gem_object *obj;
+	int ret;
 
 	obj = drm_gem_object_lookup(file, handle);
 	if (!obj)
 		return -ENOENT;
 
+	if (!obj->filp) {
+		ret = -EINVAL;
+		goto unref;
+	}
+
 	ret = drm_gem_create_mmap_offset(obj);
 	if (ret)
 		goto unref;
 
-	BUG_ON(!obj->filp);
-
-	obj->filp->private_data = obj;
-
 	*offset = drm_vma_node_offset_addr(&obj->vma_node);
-
 unref:
 	drm_gem_object_unreference_unlocked(obj);
 
@@ -221,26 +192,134 @@ unref:
 }
 
 static struct drm_ioctl_desc vgem_ioctls[] = {
+	DRM_IOCTL_DEF_DRV(VGEM_FENCE_ATTACH, vgem_fence_attach_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VGEM_FENCE_SIGNAL, vgem_fence_signal_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 };
 
+static int vgem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	unsigned long flags = vma->vm_flags;
+	int ret;
+
+	ret = drm_gem_mmap(filp, vma);
+	if (ret)
+		return ret;
+
+	/* Keep the WC mmaping set by drm_gem_mmap() but our pages
+	 * are ordinary and not special.
+	 */
+	vma->vm_flags = flags | VM_DONTEXPAND | VM_DONTDUMP;
+	return 0;
+}
+
 static const struct file_operations vgem_driver_fops = {
 	.owner		= THIS_MODULE,
 	.open		= drm_open,
-	.mmap		= drm_gem_mmap,
+	.mmap		= vgem_mmap,
 	.poll		= drm_poll,
 	.read		= drm_read,
 	.unlocked_ioctl = drm_ioctl,
 	.release	= drm_release,
 };
 
+static int vgem_prime_pin(struct drm_gem_object *obj)
+{
+	long n_pages = obj->size >> PAGE_SHIFT;
+	struct page **pages;
+
+	/* Flush the object from the CPU cache so that importers can rely
+	 * on coherent indirect access via the exported dma-address.
+	 */
+	pages = drm_gem_get_pages(obj);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	drm_clflush_pages(pages, n_pages);
+	drm_gem_put_pages(obj, pages, true, false);
+
+	return 0;
+}
+
+static struct sg_table *vgem_prime_get_sg_table(struct drm_gem_object *obj)
+{
+	struct sg_table *st;
+	struct page **pages;
+
+	pages = drm_gem_get_pages(obj);
+	if (IS_ERR(pages))
+		return ERR_CAST(pages);
+
+	st = drm_prime_pages_to_sg(pages, obj->size >> PAGE_SHIFT);
+	drm_gem_put_pages(obj, pages, false, false);
+
+	return st;
+}
+
+static void *vgem_prime_vmap(struct drm_gem_object *obj)
+{
+	long n_pages = obj->size >> PAGE_SHIFT;
+	struct page **pages;
+	void *addr;
+
+	pages = drm_gem_get_pages(obj);
+	if (IS_ERR(pages))
+		return NULL;
+
+	addr = vmap(pages, n_pages, 0, pgprot_writecombine(PAGE_KERNEL));
+	drm_gem_put_pages(obj, pages, false, false);
+
+	return addr;
+}
+
+static void vgem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
+{
+	vunmap(vaddr);
+}
+
+static int vgem_prime_mmap(struct drm_gem_object *obj,
+			   struct vm_area_struct *vma)
+{
+	int ret;
+
+	if (obj->size < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	if (!obj->filp)
+		return -ENODEV;
+
+	ret = obj->filp->f_op->mmap(obj->filp, vma);
+	if (ret)
+		return ret;
+
+	fput(vma->vm_file);
+	vma->vm_file = get_file(obj->filp);
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+
+	return 0;
+}
+
 static struct drm_driver vgem_driver = {
-	.driver_features		= DRIVER_GEM,
+	.driver_features		= DRIVER_GEM | DRIVER_PRIME,
+	.open				= vgem_open,
+	.preclose			= vgem_preclose,
 	.gem_free_object_unlocked	= vgem_gem_free_object,
 	.gem_vm_ops			= &vgem_gem_vm_ops,
 	.ioctls				= vgem_ioctls,
+	.num_ioctls 			= ARRAY_SIZE(vgem_ioctls),
 	.fops				= &vgem_driver_fops,
+
 	.dumb_create			= vgem_gem_dumb_create,
 	.dumb_map_offset		= vgem_gem_dumb_map,
+
+	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+	.gem_prime_pin = vgem_prime_pin,
+	.gem_prime_export = drm_gem_prime_export,
+	.gem_prime_get_sg_table = vgem_prime_get_sg_table,
+	.gem_prime_vmap = vgem_prime_vmap,
+	.gem_prime_vunmap = vgem_prime_vunmap,
+	.gem_prime_mmap = vgem_prime_mmap,
+
 	.name	= DRIVER_NAME,
 	.desc	= DRIVER_DESC,
 	.date	= DRIVER_DATE,
@@ -248,7 +327,7 @@ static struct drm_driver vgem_driver = {
 	.minor	= DRIVER_MINOR,
 };
 
-struct drm_device *vgem_device;
+static struct drm_device *vgem_device;
 
 static int __init vgem_init(void)
 {
@@ -261,7 +340,6 @@ static int __init vgem_init(void)
 	}
 
 	ret  = drm_dev_register(vgem_device, 0);
-
 	if (ret)
 		goto out_unref;
 
@@ -283,5 +361,6 @@ module_init(vgem_init);
 module_exit(vgem_exit);
 
 MODULE_AUTHOR("Red Hat, Inc.");
+MODULE_AUTHOR("Intel Corporation");
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL and additional rights");
diff --git a/drivers/gpu/drm/vgem/vgem_drv.h b/drivers/gpu/drm/vgem/vgem_drv.h
index e9f92f7ee275..1f8798ad329c 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.h
+++ b/drivers/gpu/drm/vgem/vgem_drv.h
@@ -32,15 +32,25 @@
 #include <drm/drmP.h>
 #include <drm/drm_gem.h>
 
+#include <uapi/drm/vgem_drm.h>
+
+struct vgem_file {
+	struct idr fence_idr;
+	struct mutex fence_mutex;
+};
+
 #define to_vgem_bo(x) container_of(x, struct drm_vgem_gem_object, base)
 struct drm_vgem_gem_object {
 	struct drm_gem_object base;
-	struct page **pages;
-	bool use_dma_buf;
 };
 
-/* vgem_drv.c */
-extern void vgem_gem_put_pages(struct drm_vgem_gem_object *obj);
-extern int vgem_gem_get_pages(struct drm_vgem_gem_object *obj);
+int vgem_fence_open(struct vgem_file *file);
+int vgem_fence_attach_ioctl(struct drm_device *dev,
+			    void *data,
+			    struct drm_file *file);
+int vgem_fence_signal_ioctl(struct drm_device *dev,
+			    void *data,
+			    struct drm_file *file);
+void vgem_fence_close(struct vgem_file *file);
 
 #endif
diff --git a/drivers/gpu/drm/vgem/vgem_fence.c b/drivers/gpu/drm/vgem/vgem_fence.c
new file mode 100644
index 000000000000..5c57c1ffa1f9
--- /dev/null
+++ b/drivers/gpu/drm/vgem/vgem_fence.c
@@ -0,0 +1,283 @@
+/*
+ * Copyright 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software")
+ * to deal in the software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * them Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTIBILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES, OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT, OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/reservation.h>
+
+#include "vgem_drv.h"
+
+#define VGEM_FENCE_TIMEOUT (10*HZ)
+
+struct vgem_fence {
+	struct fence base;
+	struct spinlock lock;
+	struct timer_list timer;
+};
+
+static const char *vgem_fence_get_driver_name(struct fence *fence)
+{
+	return "vgem";
+}
+
+static const char *vgem_fence_get_timeline_name(struct fence *fence)
+{
+	return "unbound";
+}
+
+static bool vgem_fence_signaled(struct fence *fence)
+{
+	return false;
+}
+
+static bool vgem_fence_enable_signaling(struct fence *fence)
+{
+	return true;
+}
+
+static void vgem_fence_release(struct fence *base)
+{
+	struct vgem_fence *fence = container_of(base, typeof(*fence), base);
+
+	del_timer_sync(&fence->timer);
+	fence_free(&fence->base);
+}
+
+static void vgem_fence_value_str(struct fence *fence, char *str, int size)
+{
+	snprintf(str, size, "%u", fence->seqno);
+}
+
+static void vgem_fence_timeline_value_str(struct fence *fence, char *str,
+					  int size)
+{
+	snprintf(str, size, "%u", fence_is_signaled(fence) ? fence->seqno : 0);
+}
+
+static const struct fence_ops vgem_fence_ops = {
+	.get_driver_name = vgem_fence_get_driver_name,
+	.get_timeline_name = vgem_fence_get_timeline_name,
+	.enable_signaling = vgem_fence_enable_signaling,
+	.signaled = vgem_fence_signaled,
+	.wait = fence_default_wait,
+	.release = vgem_fence_release,
+
+	.fence_value_str = vgem_fence_value_str,
+	.timeline_value_str = vgem_fence_timeline_value_str,
+};
+
+static void vgem_fence_timeout(unsigned long data)
+{
+	struct vgem_fence *fence = (struct vgem_fence *)data;
+
+	fence_signal(&fence->base);
+}
+
+static struct fence *vgem_fence_create(struct vgem_file *vfile,
+				       unsigned int flags)
+{
+	struct vgem_fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return NULL;
+
+	spin_lock_init(&fence->lock);
+	fence_init(&fence->base, &vgem_fence_ops, &fence->lock,
+		   fence_context_alloc(1), 1);
+
+	setup_timer(&fence->timer, vgem_fence_timeout, (unsigned long)fence);
+
+	/* We force the fence to expire within 10s to prevent driver hangs */
+	mod_timer(&fence->timer, jiffies + VGEM_FENCE_TIMEOUT);
+
+	return &fence->base;
+}
+
+static int attach_dmabuf(struct drm_device *dev,
+			 struct drm_gem_object *obj)
+{
+	struct dma_buf *dmabuf;
+
+	if (obj->dma_buf)
+		return 0;
+
+	dmabuf = dev->driver->gem_prime_export(dev, obj, 0);
+	if (IS_ERR(dmabuf))
+		return PTR_ERR(dmabuf);
+
+	obj->dma_buf = dmabuf;
+	drm_gem_object_reference(obj);
+	return 0;
+}
+
+/*
+ * vgem_fence_attach_ioctl (DRM_IOCTL_VGEM_FENCE_ATTACH):
+ *
+ * Create and attach a fence to the vGEM handle. This fence is then exposed
+ * via the dma-buf reservation object and visible to consumers of the exported
+ * dma-buf. If the flags contain VGEM_FENCE_WRITE, the fence indicates the
+ * vGEM buffer is being written to by the client and is exposed as an exclusive
+ * fence, otherwise the fence indicates the client is current reading from the
+ * buffer and all future writes should wait for the client to signal its
+ * completion. Note that if a conflicting fence is already on the dma-buf (i.e.
+ * an exclusive fence when adding a read, or any fence when adding a write),
+ * -EBUSY is reported. Serialisation between operations should be handled
+ * by waiting upon the dma-buf.
+ *
+ * This returns the handle for the new fence that must be signaled within 10
+ * seconds (or otherwise it will automatically expire). See
+ * vgem_fence_signal_ioctl (DRM_IOCTL_VGEM_FENCE_SIGNAL).
+ *
+ * If the vGEM handle does not exist, vgem_fence_attach_ioctl returns -ENOENT.
+ */
+int vgem_fence_attach_ioctl(struct drm_device *dev,
+			    void *data,
+			    struct drm_file *file)
+{
+	struct drm_vgem_fence_attach *arg = data;
+	struct vgem_file *vfile = file->driver_priv;
+	struct reservation_object *resv;
+	struct drm_gem_object *obj;
+	struct fence *fence;
+	int ret;
+
+	if (arg->flags & ~VGEM_FENCE_WRITE)
+		return -EINVAL;
+
+	if (arg->pad)
+		return -EINVAL;
+
+	obj = drm_gem_object_lookup(file, arg->handle);
+	if (!obj)
+		return -ENOENT;
+
+	ret = attach_dmabuf(dev, obj);
+	if (ret)
+		goto err;
+
+	fence = vgem_fence_create(vfile, arg->flags);
+	if (!fence) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* Check for a conflicting fence */
+	resv = obj->dma_buf->resv;
+	if (!reservation_object_test_signaled_rcu(resv,
+						  arg->flags & VGEM_FENCE_WRITE)) {
+		ret = -EBUSY;
+		goto err_fence;
+	}
+
+	/* Expose the fence via the dma-buf */
+	ret = 0;
+	mutex_lock(&resv->lock.base);
+	if (arg->flags & VGEM_FENCE_WRITE)
+		reservation_object_add_excl_fence(resv, fence);
+	else if ((ret = reservation_object_reserve_shared(resv)) == 0)
+		reservation_object_add_shared_fence(resv, fence);
+	mutex_unlock(&resv->lock.base);
+
+	/* Record the fence in our idr for later signaling */
+	if (ret == 0) {
+		mutex_lock(&vfile->fence_mutex);
+		ret = idr_alloc(&vfile->fence_idr, fence, 1, 0, GFP_KERNEL);
+		mutex_unlock(&vfile->fence_mutex);
+		if (ret > 0) {
+			arg->out_fence = ret;
+			ret = 0;
+		}
+	}
+err_fence:
+	if (ret) {
+		fence_signal(fence);
+		fence_put(fence);
+	}
+err:
+	drm_gem_object_unreference_unlocked(obj);
+	return ret;
+}
+
+/*
+ * vgem_fence_signal_ioctl (DRM_IOCTL_VGEM_FENCE_SIGNAL):
+ *
+ * Signal and consume a fence ealier attached to a vGEM handle using
+ * vgem_fence_attach_ioctl (DRM_IOCTL_VGEM_FENCE_ATTACH).
+ *
+ * All fences must be signaled within 10s of attachment or otherwise they
+ * will automatically expire (and a vgem_fence_signal_ioctl returns -ETIMEDOUT).
+ *
+ * Signaling a fence indicates to all consumers of the dma-buf that the
+ * client has completed the operation associated with the fence, and that the
+ * buffer is then ready for consumption.
+ *
+ * If the fence does not exist (or has already been signaled by the client),
+ * vgem_fence_signal_ioctl returns -ENOENT.
+ */
+int vgem_fence_signal_ioctl(struct drm_device *dev,
+			    void *data,
+			    struct drm_file *file)
+{
+	struct vgem_file *vfile = file->driver_priv;
+	struct drm_vgem_fence_signal *arg = data;
+	struct fence *fence;
+	int ret = 0;
+
+	if (arg->flags)
+		return -EINVAL;
+
+	mutex_lock(&vfile->fence_mutex);
+	fence = idr_replace(&vfile->fence_idr, NULL, arg->fence);
+	mutex_unlock(&vfile->fence_mutex);
+	if (!fence)
+		return -ENOENT;
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+
+	if (fence_is_signaled(fence))
+		ret = -ETIMEDOUT;
+
+	fence_signal(fence);
+	fence_put(fence);
+	return ret;
+}
+
+int vgem_fence_open(struct vgem_file *vfile)
+{
+	mutex_init(&vfile->fence_mutex);
+	idr_init(&vfile->fence_idr);
+
+	return 0;
+}
+
+static int __vgem_fence_idr_fini(int id, void *p, void *data)
+{
+	fence_signal(p);
+	fence_put(p);
+	return 0;
+}
+
+void vgem_fence_close(struct vgem_file *vfile)
+{
+	idr_for_each(&vfile->fence_idr, __vgem_fence_idr_fini, vfile);
+	idr_destroy(&vfile->fence_idr);
+}
diff --git a/drivers/gpu/drm/virtio/Kconfig b/drivers/gpu/drm/virtio/Kconfig
index 9983eadb81b6..e1afc3d3f8d9 100644
--- a/drivers/gpu/drm/virtio/Kconfig
+++ b/drivers/gpu/drm/virtio/Kconfig
@@ -1,11 +1,7 @@
 config DRM_VIRTIO_GPU
 	tristate "Virtio GPU driver"
 	depends on DRM && VIRTIO
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
         select DRM_KMS_HELPER
-        select DRM_KMS_FB_HELPER
         select DRM_TTM
 	help
 	   This is the virtual GPU driver for virtio.  It can be used with
diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c
index ac758cdbc1bc..4e192aa2d021 100644
--- a/drivers/gpu/drm/virtio/virtgpu_display.c
+++ b/drivers/gpu/drm/virtio/virtgpu_display.c
@@ -53,8 +53,7 @@ static void virtio_gpu_user_framebuffer_destroy(struct drm_framebuffer *fb)
 	struct virtio_gpu_framebuffer *virtio_gpu_fb
 		= to_virtio_gpu_framebuffer(fb);
 
-	if (virtio_gpu_fb->obj)
-		drm_gem_object_unreference_unlocked(virtio_gpu_fb->obj);
+	drm_gem_object_unreference_unlocked(virtio_gpu_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(virtio_gpu_fb);
 }
@@ -326,8 +325,7 @@ virtio_gpu_user_framebuffer_create(struct drm_device *dev,
 	ret = virtio_gpu_framebuffer_init(dev, virtio_gpu_fb, mode_cmd, obj);
 	if (ret) {
 		kfree(virtio_gpu_fb);
-		if (obj)
-			drm_gem_object_unreference_unlocked(obj);
+		drm_gem_object_unreference_unlocked(obj);
 		return NULL;
 	}
 
@@ -348,7 +346,7 @@ static void vgdev_atomic_commit_tail(struct drm_atomic_state *state)
 	drm_atomic_helper_cleanup_planes(dev, state);
 }
 
-struct drm_mode_config_helper_funcs virtio_mode_config_helpers = {
+static struct drm_mode_config_helper_funcs virtio_mode_config_helpers = {
 	.atomic_commit_tail = vgdev_atomic_commit_tail,
 };
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c
index a0580815629f..80482ac5f95d 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ttm.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c
@@ -375,6 +375,12 @@ static int virtio_gpu_bo_move(struct ttm_buffer_object *bo,
 			      bool no_wait_gpu,
 			      struct ttm_mem_reg *new_mem)
 {
+	int ret;
+
+	ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+	if (ret)
+		return ret;
+
 	virtio_gpu_move_null(bo, new_mem);
 	return 0;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
index 9b078a493996..0cd889015dc5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_dmabuf.c
@@ -49,6 +49,7 @@ int vmw_dmabuf_pin_in_placement(struct vmw_private *dev_priv,
 {
 	struct ttm_buffer_object *bo = &buf->base;
 	int ret;
+	uint32_t new_flags;
 
 	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
 	if (unlikely(ret != 0))
@@ -60,7 +61,12 @@ int vmw_dmabuf_pin_in_placement(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		goto err;
 
-	ret = ttm_bo_validate(bo, placement, interruptible, false);
+	if (buf->pin_count > 0)
+		ret = ttm_bo_mem_compat(placement, &bo->mem,
+					&new_flags) == true ? 0 : -EINVAL;
+	else
+		ret = ttm_bo_validate(bo, placement, interruptible, false);
+
 	if (!ret)
 		vmw_bo_pin_reserved(buf, true);
 
@@ -91,6 +97,7 @@ int vmw_dmabuf_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
 {
 	struct ttm_buffer_object *bo = &buf->base;
 	int ret;
+	uint32_t new_flags;
 
 	ret = ttm_write_lock(&dev_priv->reservation_sem, interruptible);
 	if (unlikely(ret != 0))
@@ -102,6 +109,12 @@ int vmw_dmabuf_pin_in_vram_or_gmr(struct vmw_private *dev_priv,
 	if (unlikely(ret != 0))
 		goto err;
 
+	if (buf->pin_count > 0) {
+		ret = ttm_bo_mem_compat(&vmw_vram_gmr_placement, &bo->mem,
+					&new_flags) == true ? 0 : -EINVAL;
+		goto out_unreserve;
+	}
+
 	ret = ttm_bo_validate(bo, &vmw_vram_gmr_placement, interruptible,
 			      false);
 	if (likely(ret == 0) || ret == -ERESTARTSYS)
@@ -161,6 +174,7 @@ int vmw_dmabuf_pin_in_start_of_vram(struct vmw_private *dev_priv,
 	struct ttm_placement placement;
 	struct ttm_place place;
 	int ret = 0;
+	uint32_t new_flags;
 
 	place = vmw_vram_placement.placement[0];
 	place.lpfn = bo->num_pages;
@@ -185,10 +199,15 @@ int vmw_dmabuf_pin_in_start_of_vram(struct vmw_private *dev_priv,
 	 */
 	if (bo->mem.mem_type == TTM_PL_VRAM &&
 	    bo->mem.start < bo->num_pages &&
-	    bo->mem.start > 0)
+	    bo->mem.start > 0 &&
+	    buf->pin_count == 0)
 		(void) ttm_bo_validate(bo, &vmw_sys_placement, false, false);
 
-	ret = ttm_bo_validate(bo, &placement, interruptible, false);
+	if (buf->pin_count > 0)
+		ret = ttm_bo_mem_compat(&placement, &bo->mem,
+					&new_flags) == true ? 0 : -EINVAL;
+	else
+		ret = ttm_bo_validate(bo, &placement, interruptible, false);
 
 	/* For some reason we didn't end up at the start of vram */
 	WARN_ON(ret == 0 && bo->offset != 0);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 60646644bef3..e8ae3dc476d1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -233,6 +233,7 @@ static int vmw_force_iommu;
 static int vmw_restrict_iommu;
 static int vmw_force_coherent;
 static int vmw_restrict_dma_mask;
+static int vmw_assume_16bpp;
 
 static int vmw_probe(struct pci_dev *, const struct pci_device_id *);
 static void vmw_master_init(struct vmw_master *);
@@ -249,6 +250,8 @@ MODULE_PARM_DESC(force_coherent, "Force coherent TTM pages");
 module_param_named(force_coherent, vmw_force_coherent, int, 0600);
 MODULE_PARM_DESC(restrict_dma_mask, "Restrict DMA mask to 44 bits with IOMMU");
 module_param_named(restrict_dma_mask, vmw_restrict_dma_mask, int, 0600);
+MODULE_PARM_DESC(assume_16bpp, "Assume 16-bpp when filtering modes");
+module_param_named(assume_16bpp, vmw_assume_16bpp, int, 0600);
 
 
 static void vmw_print_capabilities(uint32_t capabilities)
@@ -660,6 +663,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	dev_priv->vram_start = pci_resource_start(dev->pdev, 1);
 	dev_priv->mmio_start = pci_resource_start(dev->pdev, 2);
 
+	dev_priv->assume_16bpp = !!vmw_assume_16bpp;
+
 	dev_priv->enable_fb = enable_fbdev;
 
 	vmw_write(dev_priv, SVGA_REG_ID, SVGA_ID_2);
@@ -706,6 +711,13 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 			vmw_read(dev_priv,
 				 SVGA_REG_SUGGESTED_GBOBJECT_MEM_SIZE_KB);
 
+		/*
+		 * Workaround for low memory 2D VMs to compensate for the
+		 * allocation taken by fbdev
+		 */
+		if (!(dev_priv->capabilities & SVGA_CAP_3D))
+			mem_size *= 2;
+
 		dev_priv->max_mob_pages = mem_size * 1024 / PAGE_SIZE;
 		dev_priv->prim_bb_mem =
 			vmw_read(dev_priv,
@@ -1041,8 +1053,7 @@ static struct vmw_master *vmw_master_check(struct drm_device *dev,
 	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
 	struct vmw_master *vmaster;
 
-	if (file_priv->minor->type != DRM_MINOR_LEGACY ||
-	    !(flags & DRM_AUTH))
+	if (!drm_is_primary_client(file_priv) || !(flags & DRM_AUTH))
 		return NULL;
 
 	ret = mutex_lock_interruptible(&dev->master_mutex);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 9a90f824814e..74304b03f9d4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -387,6 +387,7 @@ struct vmw_private {
 	spinlock_t hw_lock;
 	spinlock_t cap_lock;
 	bool has_dx;
+	bool assume_16bpp;
 
 	/*
 	 * VGA registers.
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 1a1a87cbf109..dc5beff2b4aa 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -3625,9 +3625,7 @@ static int vmw_resize_cmd_bounce(struct vmw_sw_context *sw_context,
 				   (sw_context->cmd_bounce_size >> 1));
 	}
 
-	if (sw_context->cmd_bounce != NULL)
-		vfree(sw_context->cmd_bounce);
-
+	vfree(sw_context->cmd_bounce);
 	sw_context->cmd_bounce = vmalloc(sw_context->cmd_bounce_size);
 
 	if (sw_context->cmd_bounce == NULL) {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 679a4cb98ee3..d2d93959b119 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -517,28 +517,6 @@ static int vmw_fb_kms_framebuffer(struct fb_info *info)
 
 	par->set_fb = &vfb->base;
 
-	if (!par->bo_ptr) {
-		/*
-		 * Pin before mapping. Since we don't know in what placement
-		 * to pin, call into KMS to do it for us.
-		 */
-		ret = vfb->pin(vfb);
-		if (ret) {
-			DRM_ERROR("Could not pin the fbdev framebuffer.\n");
-			return ret;
-		}
-
-		ret = ttm_bo_kmap(&par->vmw_bo->base, 0,
-				  par->vmw_bo->base.num_pages, &par->map);
-		if (ret) {
-			vfb->unpin(vfb);
-			DRM_ERROR("Could not map the fbdev framebuffer.\n");
-			return ret;
-		}
-
-		par->bo_ptr = ttm_kmap_obj_virtual(&par->map, &par->bo_iowrite);
-	}
-
 	return 0;
 }
 
@@ -601,6 +579,31 @@ static int vmw_fb_set_par(struct fb_info *info)
 	if (ret)
 		goto out_unlock;
 
+	if (!par->bo_ptr) {
+		struct vmw_framebuffer *vfb = vmw_framebuffer_to_vfb(set.fb);
+
+		/*
+		 * Pin before mapping. Since we don't know in what placement
+		 * to pin, call into KMS to do it for us.
+		 */
+		ret = vfb->pin(vfb);
+		if (ret) {
+			DRM_ERROR("Could not pin the fbdev framebuffer.\n");
+			goto out_unlock;
+		}
+
+		ret = ttm_bo_kmap(&par->vmw_bo->base, 0,
+				  par->vmw_bo->base.num_pages, &par->map);
+		if (ret) {
+			vfb->unpin(vfb);
+			DRM_ERROR("Could not map the fbdev framebuffer.\n");
+			goto out_unlock;
+		}
+
+		par->bo_ptr = ttm_kmap_obj_virtual(&par->map, &par->bo_iowrite);
+	}
+
+
 	vmw_fb_dirty_mark(par, par->fb_x, par->fb_y,
 			  par->set_fb->width, par->set_fb->height);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 8a69d4da40b5..bf28ccc150df 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1555,14 +1555,10 @@ int vmw_du_connector_fill_modes(struct drm_connector *connector,
 		DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC)
 	};
 	int i;
-	u32 assumed_bpp = 2;
+	u32 assumed_bpp = 4;
 
-	/*
-	 * If using screen objects, then assume 32-bpp because that's what the
-	 * SVGA device is assuming
-	 */
-	if (dev_priv->active_display_unit == vmw_du_screen_object)
-		assumed_bpp = 4;
+	if (dev_priv->assume_16bpp)
+		assumed_bpp = 2;
 
 	if (dev_priv->active_display_unit == vmw_du_screen_target) {
 		max_width  = min(max_width,  dev_priv->stdu_max_width);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index 6de283c8fa3e..e57a0bad7a62 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/frame.h>
 #include <asm/hypervisor.h>
 #include "drmP.h"
 #include "vmwgfx_msg.h"
@@ -194,7 +195,7 @@ static int vmw_send_msg(struct rpc_channel *channel, const char *msg)
 
 	return -EINVAL;
 }
-
+STACK_FRAME_NON_STANDARD(vmw_send_msg);
 
 
 /**
@@ -299,11 +300,15 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg,
 		break;
 	}
 
+	if (retries == RETRIES)
+		return -EINVAL;
+
 	*msg_len = reply_len;
 	*msg     = reply;
 
 	return 0;
 }
+STACK_FRAME_NON_STANDARD(vmw_recv_msg);
 
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
index 9ca818fb034c..41932a7c4f79 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c
@@ -399,8 +399,10 @@ static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
 
 	WARN_ON_ONCE(!stdu->defined);
 
-	if (!vfb->dmabuf && new_fb->width == mode->hdisplay &&
-	    new_fb->height == mode->vdisplay)
+	new_vfbs = (vfb->dmabuf) ? NULL : vmw_framebuffer_to_vfbs(new_fb);
+
+	if (new_vfbs && new_vfbs->surface->base_size.width == mode->hdisplay &&
+	    new_vfbs->surface->base_size.height == mode->vdisplay)
 		new_content_type = SAME_AS_DISPLAY;
 	else if (vfb->dmabuf)
 		new_content_type = SEPARATE_DMA;
@@ -444,7 +446,6 @@ static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
 			content_srf.mip_levels[0]     = 1;
 			content_srf.multisample_count = 0;
 		} else {
-			new_vfbs = vmw_framebuffer_to_vfbs(new_fb);
 			content_srf = *new_vfbs->surface;
 		}
 
@@ -464,7 +465,6 @@ static int vmw_stdu_bind_fb(struct vmw_private *dev_priv,
 			return ret;
 		}
 	} else if (new_content_type == SAME_AS_DISPLAY) {
-		new_vfbs = vmw_framebuffer_to_vfbs(new_fb);
 		new_display_srf = vmw_surface_reference(new_vfbs->surface);
 	}
 
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index a18db4d5347c..c5d82a8a2ec9 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -96,12 +96,12 @@ fail:
  */
 static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
 {
-	u32 pos = pb->pos;
-	u32 *p = (u32 *)((void *)pb->mapped + pos);
-	WARN_ON(pos == pb->fence);
+	u32 *p = (u32 *)((void *)pb->mapped + pb->pos);
+
+	WARN_ON(pb->pos == pb->fence);
 	*(p++) = op1;
 	*(p++) = op2;
-	pb->pos = (pos + 8) & (pb->size_bytes - 1);
+	pb->pos = (pb->pos + 8) & (pb->size_bytes - 1);
 }
 
 /*
@@ -134,14 +134,19 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma,
 				     enum cdma_event event)
 {
 	for (;;) {
+		struct push_buffer *pb = &cdma->push_buffer;
 		unsigned int space;
 
-		if (event == CDMA_EVENT_SYNC_QUEUE_EMPTY)
+		switch (event) {
+		case CDMA_EVENT_SYNC_QUEUE_EMPTY:
 			space = list_empty(&cdma->sync_queue) ? 1 : 0;
-		else if (event == CDMA_EVENT_PUSH_BUFFER_SPACE) {
-			struct push_buffer *pb = &cdma->push_buffer;
+			break;
+
+		case CDMA_EVENT_PUSH_BUFFER_SPACE:
 			space = host1x_pushbuffer_space(pb);
-		} else {
+			break;
+
+		default:
 			WARN_ON(1);
 			return -EINVAL;
 		}
@@ -159,12 +164,14 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma,
 			mutex_lock(&cdma->lock);
 			continue;
 		}
+
 		cdma->event = event;
 
 		mutex_unlock(&cdma->lock);
 		down(&cdma->sem);
 		mutex_lock(&cdma->lock);
 	}
+
 	return 0;
 }
 
@@ -234,6 +241,7 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
 			/* Start timer on next pending syncpt */
 			if (job->timeout)
 				cdma_start_timer_locked(cdma, job);
+
 			break;
 		}
 
@@ -247,7 +255,9 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
 		/* Pop push buffer slots */
 		if (job->num_slots) {
 			struct push_buffer *pb = &cdma->push_buffer;
+
 			host1x_pushbuffer_pop(pb, job->num_slots);
+
 			if (cdma->event == CDMA_EVENT_PUSH_BUFFER_SPACE)
 				signal = true;
 		}
@@ -269,11 +279,9 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
 void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
 				   struct device *dev)
 {
-	u32 restart_addr;
-	u32 syncpt_incrs;
-	struct host1x_job *job = NULL;
-	u32 syncpt_val;
 	struct host1x *host1x = cdma_to_host1x(cdma);
+	u32 restart_addr, syncpt_incrs, syncpt_val;
+	struct host1x_job *job = NULL;
 
 	syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt);
 
@@ -342,9 +350,11 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
 		syncpt_val += syncpt_incrs;
 	}
 
-	/* The following sumbits from the same client may be dependent on the
+	/*
+	 * The following sumbits from the same client may be dependent on the
 	 * failed submit and therefore they may fail. Force a small timeout
-	 * to make the queue cleanup faster */
+	 * to make the queue cleanup faster.
+	 */
 
 	list_for_each_entry_from(job, &cdma->sync_queue, list)
 		if (job->client == cdma->timeout.client)
@@ -375,6 +385,7 @@ int host1x_cdma_init(struct host1x_cdma *cdma)
 	err = host1x_pushbuffer_init(&cdma->push_buffer);
 	if (err)
 		return err;
+
 	return 0;
 }
 
@@ -410,6 +421,7 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job)
 		/* init state on first submit with timeout value */
 		if (!cdma->timeout.initialized) {
 			int err;
+
 			err = host1x_hw_cdma_timeout_init(host1x, cdma,
 							  job->syncpt_id);
 			if (err) {
@@ -418,6 +430,7 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job)
 			}
 		}
 	}
+
 	if (!cdma->running)
 		host1x_hw_cdma_start(host1x, cdma);
 
@@ -448,6 +461,7 @@ void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2)
 		slots_free = host1x_cdma_wait_locked(cdma,
 						CDMA_EVENT_PUSH_BUFFER_SPACE);
 	}
+
 	cdma->slots_free = slots_free - 1;
 	cdma->slots_used++;
 	host1x_pushbuffer_push(pb, op1, op2);
diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index b4ae3affb987..8f437d924c10 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -83,9 +83,10 @@ EXPORT_SYMBOL(host1x_channel_put);
 struct host1x_channel *host1x_channel_request(struct device *dev)
 {
 	struct host1x *host = dev_get_drvdata(dev->parent);
-	int max_channels = host->info->nb_channels;
+	unsigned int max_channels = host->info->nb_channels;
 	struct host1x_channel *channel = NULL;
-	int index, err;
+	unsigned long index;
+	int err;
 
 	mutex_lock(&host->chlist_mutex);
 
diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index ee3d12b51c50..d9330fcc62ad 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -39,6 +39,7 @@ void host1x_debug_output(struct output *o, const char *fmt, ...)
 	va_start(args, fmt);
 	len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
 	va_end(args);
+
 	o->fn(o->ctx, o->buf, len);
 }
 
@@ -48,13 +49,17 @@ static int show_channels(struct host1x_channel *ch, void *data, bool show_fifo)
 	struct output *o = data;
 
 	mutex_lock(&ch->reflock);
+
 	if (ch->refcount) {
 		mutex_lock(&ch->cdma.lock);
+
 		if (show_fifo)
 			host1x_hw_show_channel_fifo(m, ch, o);
+
 		host1x_hw_show_channel_cdma(m, ch, o);
 		mutex_unlock(&ch->cdma.lock);
 	}
+
 	mutex_unlock(&ch->reflock);
 
 	return 0;
@@ -62,22 +67,27 @@ static int show_channels(struct host1x_channel *ch, void *data, bool show_fifo)
 
 static void show_syncpts(struct host1x *m, struct output *o)
 {
-	int i;
+	unsigned int i;
+
 	host1x_debug_output(o, "---- syncpts ----\n");
+
 	for (i = 0; i < host1x_syncpt_nb_pts(m); i++) {
 		u32 max = host1x_syncpt_read_max(m->syncpt + i);
 		u32 min = host1x_syncpt_load(m->syncpt + i);
+
 		if (!min && !max)
 			continue;
-		host1x_debug_output(o, "id %d (%s) min %d max %d\n",
+
+		host1x_debug_output(o, "id %u (%s) min %d max %d\n",
 				    i, m->syncpt[i].name, min, max);
 	}
 
 	for (i = 0; i < host1x_syncpt_nb_bases(m); i++) {
 		u32 base_val;
+
 		base_val = host1x_syncpt_load_wait_base(m->syncpt + i);
 		if (base_val)
-			host1x_debug_output(o, "waitbase id %d val %d\n", i,
+			host1x_debug_output(o, "waitbase id %u val %d\n", i,
 					    base_val);
 	}
 
@@ -114,7 +124,9 @@ static int host1x_debug_show_all(struct seq_file *s, void *unused)
 		.fn = write_to_seqfile,
 		.ctx = s
 	};
+
 	show_all(s->private, &o);
+
 	return 0;
 }
 
@@ -124,7 +136,9 @@ static int host1x_debug_show(struct seq_file *s, void *unused)
 		.fn = write_to_seqfile,
 		.ctx = s
 	};
+
 	show_all_no_fifo(s->private, &o);
+
 	return 0;
 }
 
@@ -134,10 +148,10 @@ static int host1x_debug_open_all(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations host1x_debug_all_fops = {
-	.open		= host1x_debug_open_all,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
+	.open = host1x_debug_open_all,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
 };
 
 static int host1x_debug_open(struct inode *inode, struct file *file)
@@ -146,10 +160,10 @@ static int host1x_debug_open(struct inode *inode, struct file *file)
 }
 
 static const struct file_operations host1x_debug_fops = {
-	.open		= host1x_debug_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
+	.open = host1x_debug_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
 };
 
 static void host1x_debugfs_init(struct host1x *host1x)
@@ -201,6 +215,7 @@ void host1x_debug_dump(struct host1x *host1x)
 	struct output o = {
 		.fn = write_to_printk
 	};
+
 	show_all(host1x, &o);
 }
 
@@ -209,5 +224,6 @@ void host1x_debug_dump_syncpts(struct host1x *host1x)
 	struct output o = {
 		.fn = write_to_printk
 	};
+
 	show_syncpts(host1x, &o);
 }
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index ff348690df94..a62317af76ad 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -63,13 +63,13 @@ u32 host1x_ch_readl(struct host1x_channel *ch, u32 r)
 }
 
 static const struct host1x_info host1x01_info = {
-	.nb_channels	= 8,
-	.nb_pts		= 32,
-	.nb_mlocks	= 16,
-	.nb_bases	= 8,
-	.init		= host1x01_init,
-	.sync_offset	= 0x3000,
-	.dma_mask	= DMA_BIT_MASK(32),
+	.nb_channels = 8,
+	.nb_pts = 32,
+	.nb_mlocks = 16,
+	.nb_bases = 8,
+	.init = host1x01_init,
+	.sync_offset = 0x3000,
+	.dma_mask = DMA_BIT_MASK(32),
 };
 
 static const struct host1x_info host1x02_info = {
@@ -102,7 +102,7 @@ static const struct host1x_info host1x05_info = {
 	.dma_mask = DMA_BIT_MASK(34),
 };
 
-static struct of_device_id host1x_of_match[] = {
+static const struct of_device_id host1x_of_match[] = {
 	{ .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, },
 	{ .compatible = "nvidia,tegra124-host1x", .data = &host1x04_info, },
 	{ .compatible = "nvidia,tegra114-host1x", .data = &host1x02_info, },
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index dace124994bb..5220510f39da 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -45,7 +45,7 @@ struct host1x_cdma_ops {
 	void (*start)(struct host1x_cdma *cdma);
 	void (*stop)(struct host1x_cdma *cdma);
 	void (*flush)(struct  host1x_cdma *cdma);
-	int (*timeout_init)(struct host1x_cdma *cdma, u32 syncpt_id);
+	int (*timeout_init)(struct host1x_cdma *cdma, unsigned int syncpt);
 	void (*timeout_destroy)(struct host1x_cdma *cdma);
 	void (*freeze)(struct host1x_cdma *cdma);
 	void (*resume)(struct host1x_cdma *cdma, u32 getptr);
@@ -82,21 +82,21 @@ struct host1x_intr_ops {
 	int (*init_host_sync)(struct host1x *host, u32 cpm,
 		void (*syncpt_thresh_work)(struct work_struct *work));
 	void (*set_syncpt_threshold)(
-		struct host1x *host, u32 id, u32 thresh);
-	void (*enable_syncpt_intr)(struct host1x *host, u32 id);
-	void (*disable_syncpt_intr)(struct host1x *host, u32 id);
+		struct host1x *host, unsigned int id, u32 thresh);
+	void (*enable_syncpt_intr)(struct host1x *host, unsigned int id);
+	void (*disable_syncpt_intr)(struct host1x *host, unsigned int id);
 	void (*disable_all_syncpt_intrs)(struct host1x *host);
 	int (*free_syncpt_irq)(struct host1x *host);
 };
 
 struct host1x_info {
-	int	nb_channels;		/* host1x: num channels supported */
-	int	nb_pts;			/* host1x: num syncpoints supported */
-	int	nb_bases;		/* host1x: num syncpoints supported */
-	int	nb_mlocks;		/* host1x: number of mlocks */
-	int	(*init)(struct host1x *); /* initialize per SoC ops */
-	int	sync_offset;
-	u64	dma_mask;		/* mask of addressable memory */
+	unsigned int nb_channels; /* host1x: number of channels supported */
+	unsigned int nb_pts; /* host1x: number of syncpoints supported */
+	unsigned int nb_bases; /* host1x: number of syncpoint bases supported */
+	unsigned int nb_mlocks; /* host1x: number of mlocks supported */
+	int (*init)(struct host1x *host1x); /* initialize per SoC ops */
+	unsigned int sync_offset; /* offset of syncpoint registers */
+	u64 dma_mask; /* mask of addressable memory */
 };
 
 struct host1x {
@@ -109,7 +109,6 @@ struct host1x {
 	struct clk *clk;
 
 	struct mutex intr_mutex;
-	struct workqueue_struct *intr_wq;
 	int intr_syncpt_irq;
 
 	const struct host1x_syncpt_ops *syncpt_op;
@@ -183,19 +182,20 @@ static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
 }
 
 static inline void host1x_hw_intr_set_syncpt_threshold(struct host1x *host,
-						       u32 id, u32 thresh)
+						       unsigned int id,
+						       u32 thresh)
 {
 	host->intr_op->set_syncpt_threshold(host, id, thresh);
 }
 
 static inline void host1x_hw_intr_enable_syncpt_intr(struct host1x *host,
-						     u32 id)
+						     unsigned int id)
 {
 	host->intr_op->enable_syncpt_intr(host, id);
 }
 
 static inline void host1x_hw_intr_disable_syncpt_intr(struct host1x *host,
-						      u32 id)
+						      unsigned int id)
 {
 	host->intr_op->disable_syncpt_intr(host, id);
 }
@@ -212,9 +212,9 @@ static inline int host1x_hw_intr_free_syncpt_irq(struct host1x *host)
 
 static inline int host1x_hw_channel_init(struct host1x *host,
 					 struct host1x_channel *channel,
-					 int chid)
+					 unsigned int id)
 {
-	return host->channel_op->init(channel, host, chid);
+	return host->channel_op->init(channel, host, id);
 }
 
 static inline int host1x_hw_channel_submit(struct host1x *host,
@@ -243,9 +243,9 @@ static inline void host1x_hw_cdma_flush(struct host1x *host,
 
 static inline int host1x_hw_cdma_timeout_init(struct host1x *host,
 					      struct host1x_cdma *cdma,
-					      u32 syncpt_id)
+					      unsigned int syncpt)
 {
-	return host->cdma_op->timeout_init(cdma, syncpt_id);
+	return host->cdma_op->timeout_init(cdma, syncpt);
 }
 
 static inline void host1x_hw_cdma_timeout_destroy(struct host1x *host,
diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index 305ea8f3382d..659c1bbfeeba 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -41,7 +41,7 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
 {
 	struct host1x *host1x = cdma_to_host1x(cdma);
 	struct push_buffer *pb = &cdma->push_buffer;
-	u32 i;
+	unsigned int i;
 
 	for (i = 0; i < syncpt_incrs; i++)
 		host1x_syncpt_incr(cdma->timeout.syncpt);
@@ -58,6 +58,7 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
 			&pb->phys, getptr);
 		getptr = (getptr + 8) & (pb->size_bytes - 1);
 	}
+
 	wmb();
 }
 
@@ -162,12 +163,14 @@ static void cdma_stop(struct host1x_cdma *cdma)
 	struct host1x_channel *ch = cdma_to_channel(cdma);
 
 	mutex_lock(&cdma->lock);
+
 	if (cdma->running) {
 		host1x_cdma_wait_locked(cdma, CDMA_EVENT_SYNC_QUEUE_EMPTY);
 		host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
 				 HOST1X_CHANNEL_DMACTRL);
 		cdma->running = false;
 	}
+
 	mutex_unlock(&cdma->lock);
 }
 
@@ -213,11 +216,11 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 getptr)
 	u32 cmdproc_stop;
 
 	dev_dbg(host1x->dev,
-		"resuming channel (id %d, DMAGET restart = 0x%x)\n",
+		"resuming channel (id %u, DMAGET restart = 0x%x)\n",
 		ch->id, getptr);
 
 	cmdproc_stop = host1x_sync_readl(host1x, HOST1X_SYNC_CMDPROC_STOP);
-	cmdproc_stop &= ~(BIT(ch->id));
+	cmdproc_stop &= ~BIT(ch->id);
 	host1x_sync_writel(host1x, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
 
 	cdma->torndown = false;
@@ -231,14 +234,11 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 getptr)
  */
 static void cdma_timeout_handler(struct work_struct *work)
 {
+	u32 prev_cmdproc, cmdproc_stop, syncpt_val;
 	struct host1x_cdma *cdma;
 	struct host1x *host1x;
 	struct host1x_channel *ch;
 
-	u32 syncpt_val;
-
-	u32 prev_cmdproc, cmdproc_stop;
-
 	cdma = container_of(to_delayed_work(work), struct host1x_cdma,
 			    timeout.wq);
 	host1x = cdma_to_host1x(cdma);
@@ -277,9 +277,9 @@ static void cdma_timeout_handler(struct work_struct *work)
 		return;
 	}
 
-	dev_warn(host1x->dev, "%s: timeout: %d (%s), HW thresh %d, done %d\n",
-		__func__, cdma->timeout.syncpt->id, cdma->timeout.syncpt->name,
-		syncpt_val, cdma->timeout.syncpt_val);
+	dev_warn(host1x->dev, "%s: timeout: %u (%s), HW thresh %d, done %d\n",
+		 __func__, cdma->timeout.syncpt->id, cdma->timeout.syncpt->name,
+		 syncpt_val, cdma->timeout.syncpt_val);
 
 	/* stop HW, resetting channel/module */
 	host1x_hw_cdma_freeze(host1x, cdma);
@@ -291,7 +291,7 @@ static void cdma_timeout_handler(struct work_struct *work)
 /*
  * Init timeout resources
  */
-static int cdma_timeout_init(struct host1x_cdma *cdma, u32 syncpt_id)
+static int cdma_timeout_init(struct host1x_cdma *cdma, unsigned int syncpt)
 {
 	INIT_DELAYED_WORK(&cdma->timeout.wq, cdma_timeout_handler);
 	cdma->timeout.initialized = true;
@@ -306,6 +306,7 @@ static void cdma_timeout_destroy(struct host1x_cdma *cdma)
 {
 	if (cdma->timeout.initialized)
 		cancel_delayed_work(&cdma->timeout.wq);
+
 	cdma->timeout.initialized = false;
 }
 
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 946c332c3906..5e8df78b7acd 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -46,6 +46,7 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
 		 */
 		for (i = 0; i < words; i += TRACE_MAX_LENGTH) {
 			u32 num_words = min(words - i, TRACE_MAX_LENGTH);
+
 			offset += i * sizeof(u32);
 
 			trace_host1x_cdma_push_gather(dev_name(dev), bo,
@@ -66,6 +67,7 @@ static void submit_gathers(struct host1x_job *job)
 		struct host1x_job_gather *g = &job->gathers[i];
 		u32 op1 = host1x_opcode_gather(g->words);
 		u32 op2 = g->base + g->offset;
+
 		trace_write_gather(cdma, g->bo, g->offset, op1 & 0xffff);
 		host1x_cdma_push(cdma, op1, op2);
 	}
@@ -75,7 +77,8 @@ static inline void synchronize_syncpt_base(struct host1x_job *job)
 {
 	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
 	struct host1x_syncpt *sp = host->syncpt + job->syncpt_id;
-	u32 id, value;
+	unsigned int id;
+	u32 value;
 
 	value = host1x_syncpt_read_max(sp);
 	id = sp->base->id;
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index cc3f1825c735..7a4a3286e4a7 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -40,8 +40,7 @@ enum {
 
 static unsigned int show_channel_command(struct output *o, u32 val)
 {
-	unsigned mask;
-	unsigned subop;
+	unsigned int mask, subop;
 
 	switch (val >> 28) {
 	case HOST1X_OPCODE_SETCLASS:
@@ -51,12 +50,11 @@ static unsigned int show_channel_command(struct output *o, u32 val)
 					    val >> 6 & 0x3ff,
 					    val >> 16 & 0xfff, mask);
 			return hweight8(mask);
-		} else {
-			host1x_debug_output(o, "SETCL(class=%03x)\n",
-					    val >> 6 & 0x3ff);
-			return 0;
 		}
 
+		host1x_debug_output(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff);
+		return 0;
+
 	case HOST1X_OPCODE_INCR:
 		host1x_debug_output(o, "INCR(offset=%03x, [",
 				    val >> 16 & 0xfff);
@@ -143,7 +141,8 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
 	struct host1x_job *job;
 
 	list_for_each_entry(job, &cdma->sync_queue, list) {
-		int i;
+		unsigned int i;
+
 		host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n",
 				    job, job->syncpt_id, job->syncpt_end,
 				    job->first_get, job->timeout,
@@ -190,7 +189,7 @@ static void host1x_debug_show_channel_cdma(struct host1x *host,
 	cbread = host1x_sync_readl(host, HOST1X_SYNC_CBREAD(ch->id));
 	cbstat = host1x_sync_readl(host, HOST1X_SYNC_CBSTAT(ch->id));
 
-	host1x_debug_output(o, "%d-%s: ", ch->id, dev_name(ch->dev));
+	host1x_debug_output(o, "%u-%s: ", ch->id, dev_name(ch->dev));
 
 	if (HOST1X_CHANNEL_DMACTRL_DMASTOP_V(dmactrl) ||
 	    !ch->cdma.push_buffer.mapped) {
@@ -200,14 +199,13 @@ static void host1x_debug_show_channel_cdma(struct host1x *host,
 
 	if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) == HOST1X_CLASS_HOST1X &&
 	    HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
-	    HOST1X_UCLASS_WAIT_SYNCPT)
+			HOST1X_UCLASS_WAIT_SYNCPT)
 		host1x_debug_output(o, "waiting on syncpt %d val %d\n",
 				    cbread >> 24, cbread & 0xffffff);
 	else if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) ==
-	   HOST1X_CLASS_HOST1X &&
-	   HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
-	   HOST1X_UCLASS_WAIT_SYNCPT_BASE) {
-
+				HOST1X_CLASS_HOST1X &&
+		 HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
+				HOST1X_UCLASS_WAIT_SYNCPT_BASE) {
 		base = (cbread >> 16) & 0xff;
 		baseval =
 			host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(base));
@@ -236,7 +234,7 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
 	u32 val, rd_ptr, wr_ptr, start, end;
 	unsigned int data_count = 0;
 
-	host1x_debug_output(o, "%d: fifo:\n", ch->id);
+	host1x_debug_output(o, "%u: fifo:\n", ch->id);
 
 	val = host1x_ch_readl(ch, HOST1X_CHANNEL_FIFOSTAT);
 	host1x_debug_output(o, "FIFOSTAT %08x\n", val);
@@ -290,20 +288,22 @@ static void host1x_debug_show_channel_fifo(struct host1x *host,
 
 static void host1x_debug_show_mlocks(struct host1x *host, struct output *o)
 {
-	int i;
+	unsigned int i;
 
 	host1x_debug_output(o, "---- mlocks ----\n");
+
 	for (i = 0; i < host1x_syncpt_nb_mlocks(host); i++) {
 		u32 owner =
 			host1x_sync_readl(host, HOST1X_SYNC_MLOCK_OWNER(i));
 		if (HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(owner))
-			host1x_debug_output(o, "%d: locked by channel %d\n",
+			host1x_debug_output(o, "%u: locked by channel %u\n",
 				i, HOST1X_SYNC_MLOCK_OWNER_CHID_V(owner));
 		else if (HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(owner))
-			host1x_debug_output(o, "%d: locked by cpu\n", i);
+			host1x_debug_output(o, "%u: locked by cpu\n", i);
 		else
-			host1x_debug_output(o, "%d: unlocked\n", i);
+			host1x_debug_output(o, "%u: unlocked\n", i);
 	}
+
 	host1x_debug_output(o, "\n");
 }
 
diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
index e1e31e9e67cd..dacb8009a605 100644
--- a/drivers/gpu/host1x/hw/intr_hw.c
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -38,14 +38,14 @@ static void host1x_intr_syncpt_handle(struct host1x_syncpt *syncpt)
 	host1x_sync_writel(host, BIT_MASK(id),
 		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(BIT_WORD(id)));
 
-	queue_work(host->intr_wq, &syncpt->intr.work);
+	schedule_work(&syncpt->intr.work);
 }
 
 static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
 {
 	struct host1x *host = dev_id;
 	unsigned long reg;
-	int i, id;
+	unsigned int i, id;
 
 	for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) {
 		reg = host1x_sync_readl(host,
@@ -62,7 +62,7 @@ static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
 
 static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
 {
-	u32 i;
+	unsigned int i;
 
 	for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); ++i) {
 		host1x_sync_writel(host, 0xffffffffu,
@@ -72,10 +72,12 @@ static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
 	}
 }
 
-static int _host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
-	void (*syncpt_thresh_work)(struct work_struct *))
+static int
+_host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
+			    void (*syncpt_thresh_work)(struct work_struct *))
 {
-	int i, err;
+	unsigned int i;
+	int err;
 
 	host1x_hw_intr_disable_all_syncpt_intrs(host);
 
@@ -106,18 +108,21 @@ static int _host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
 }
 
 static void _host1x_intr_set_syncpt_threshold(struct host1x *host,
-	u32 id, u32 thresh)
+					      unsigned int id,
+					      u32 thresh)
 {
 	host1x_sync_writel(host, thresh, HOST1X_SYNC_SYNCPT_INT_THRESH(id));
 }
 
-static void _host1x_intr_enable_syncpt_intr(struct host1x *host, u32 id)
+static void _host1x_intr_enable_syncpt_intr(struct host1x *host,
+					    unsigned int id)
 {
 	host1x_sync_writel(host, BIT_MASK(id),
 		HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(BIT_WORD(id)));
 }
 
-static void _host1x_intr_disable_syncpt_intr(struct host1x *host, u32 id)
+static void _host1x_intr_disable_syncpt_intr(struct host1x *host,
+					     unsigned int id)
 {
 	host1x_sync_writel(host, BIT_MASK(id),
 		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(BIT_WORD(id)));
@@ -127,8 +132,13 @@ static void _host1x_intr_disable_syncpt_intr(struct host1x *host, u32 id)
 
 static int _host1x_free_syncpt_irq(struct host1x *host)
 {
+	unsigned int i;
+
 	devm_free_irq(host->dev, host->intr_syncpt_irq, host);
-	flush_workqueue(host->intr_wq);
+
+	for (i = 0; i < host->info->nb_pts; i++)
+		cancel_work_sync(&host->syncpt[i].intr.work);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
index 56e85395ac24..c93f74fcce72 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -26,8 +26,9 @@
  */
 static void syncpt_restore(struct host1x_syncpt *sp)
 {
+	u32 min = host1x_syncpt_read_min(sp);
 	struct host1x *host = sp->host;
-	int min = host1x_syncpt_read_min(sp);
+
 	host1x_sync_writel(host, min, HOST1X_SYNC_SYNCPT(sp->id));
 }
 
@@ -37,6 +38,7 @@ static void syncpt_restore(struct host1x_syncpt *sp)
 static void syncpt_restore_wait_base(struct host1x_syncpt *sp)
 {
 	struct host1x *host = sp->host;
+
 	host1x_sync_writel(host, sp->base_val,
 			   HOST1X_SYNC_SYNCPT_BASE(sp->id));
 }
@@ -47,6 +49,7 @@ static void syncpt_restore_wait_base(struct host1x_syncpt *sp)
 static void syncpt_read_wait_base(struct host1x_syncpt *sp)
 {
 	struct host1x *host = sp->host;
+
 	sp->base_val =
 		host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(sp->id));
 }
@@ -85,6 +88,7 @@ static int syncpt_cpu_incr(struct host1x_syncpt *sp)
 	if (!host1x_syncpt_client_managed(sp) &&
 	    host1x_syncpt_idle(sp))
 		return -EINVAL;
+
 	host1x_sync_writel(host, BIT_MASK(sp->id),
 			   HOST1X_SYNC_SYNCPT_CPU_INCR(reg_offset));
 	wmb();
@@ -95,10 +99,10 @@ static int syncpt_cpu_incr(struct host1x_syncpt *sp)
 /* remove a wait pointed to by patch_addr */
 static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
 {
-	u32 override = host1x_class_host_wait_syncpt(
-		HOST1X_SYNCPT_RESERVED, 0);
+	u32 override = host1x_class_host_wait_syncpt(HOST1X_SYNCPT_RESERVED, 0);
 
 	*((u32 *)patch_addr) = override;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 2491bf82e30c..8b4fad0ab35d 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -122,18 +122,20 @@ static void action_submit_complete(struct host1x_waitlist *waiter)
 static void action_wakeup(struct host1x_waitlist *waiter)
 {
 	wait_queue_head_t *wq = waiter->data;
+
 	wake_up(wq);
 }
 
 static void action_wakeup_interruptible(struct host1x_waitlist *waiter)
 {
 	wait_queue_head_t *wq = waiter->data;
+
 	wake_up_interruptible(wq);
 }
 
 typedef void (*action_handler)(struct host1x_waitlist *waiter);
 
-static action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = {
+static const action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = {
 	action_submit_complete,
 	action_wakeup,
 	action_wakeup_interruptible,
@@ -209,7 +211,7 @@ static void syncpt_thresh_work(struct work_struct *work)
 				host1x_syncpt_load(host->syncpt + id));
 }
 
-int host1x_intr_add_action(struct host1x *host, u32 id, u32 thresh,
+int host1x_intr_add_action(struct host1x *host, unsigned int id, u32 thresh,
 			   enum host1x_intr_action action, void *data,
 			   struct host1x_waitlist *waiter, void **ref)
 {
@@ -254,7 +256,7 @@ int host1x_intr_add_action(struct host1x *host, u32 id, u32 thresh,
 	return 0;
 }
 
-void host1x_intr_put_ref(struct host1x *host, u32 id, void *ref)
+void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref)
 {
 	struct host1x_waitlist *waiter = ref;
 	struct host1x_syncpt *syncpt;
@@ -277,9 +279,6 @@ int host1x_intr_init(struct host1x *host, unsigned int irq_sync)
 
 	mutex_init(&host->intr_mutex);
 	host->intr_syncpt_irq = irq_sync;
-	host->intr_wq = create_workqueue("host_syncpt");
-	if (!host->intr_wq)
-		return -ENOMEM;
 
 	for (id = 0; id < nb_pts; ++id) {
 		struct host1x_syncpt *syncpt = host->syncpt + id;
@@ -288,7 +287,7 @@ int host1x_intr_init(struct host1x *host, unsigned int irq_sync)
 		INIT_LIST_HEAD(&syncpt->intr.wait_head);
 		snprintf(syncpt->intr.thresh_irq_name,
 			 sizeof(syncpt->intr.thresh_irq_name),
-			 "host1x_sp_%02d", id);
+			 "host1x_sp_%02u", id);
 	}
 
 	host1x_intr_start(host);
@@ -299,7 +298,6 @@ int host1x_intr_init(struct host1x *host, unsigned int irq_sync)
 void host1x_intr_deinit(struct host1x *host)
 {
 	host1x_intr_stop(host);
-	destroy_workqueue(host->intr_wq);
 }
 
 void host1x_intr_start(struct host1x *host)
@@ -342,7 +340,7 @@ void host1x_intr_stop(struct host1x *host)
 		if (!list_empty(&syncpt[id].intr.wait_head)) {
 			/* output diagnostics */
 			mutex_unlock(&host->intr_mutex);
-			pr_warn("%s cannot stop syncpt intr id=%d\n",
+			pr_warn("%s cannot stop syncpt intr id=%u\n",
 				__func__, id);
 			return;
 		}
diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
index 2b8adf016a05..1370c2bb75b8 100644
--- a/drivers/gpu/host1x/intr.h
+++ b/drivers/gpu/host1x/intr.h
@@ -75,7 +75,7 @@ struct host1x_waitlist {
  *
  * This is a non-blocking api.
  */
-int host1x_intr_add_action(struct host1x *host, u32 id, u32 thresh,
+int host1x_intr_add_action(struct host1x *host, unsigned int id, u32 thresh,
 	enum host1x_intr_action action, void *data,
 	struct host1x_waitlist *waiter, void **ref);
 
@@ -84,7 +84,7 @@ int host1x_intr_add_action(struct host1x *host, u32 id, u32 thresh,
  * You must call this if you passed non-NULL as ref.
  * @ref the ref returned from host1x_intr_add_action()
  */
-void host1x_intr_put_ref(struct host1x *host, u32 id, void *ref);
+void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref);
 
 /* Initialize host1x sync point interrupt */
 int host1x_intr_init(struct host1x *host, unsigned int irq_sync);
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index b4515d544039..a91b7c4a6110 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -161,7 +161,7 @@ static int do_waitchks(struct host1x_job *job, struct host1x *host,
 
 		if (host1x_syncpt_is_expired(sp, wait->thresh)) {
 			dev_dbg(host->dev,
-				"drop WAIT id %d (%s) thresh 0x%x, min 0x%x\n",
+				"drop WAIT id %u (%s) thresh 0x%x, min 0x%x\n",
 				wait->syncpt_id, sp->name, wait->thresh,
 				host1x_syncpt_read_min(sp));
 
@@ -464,6 +464,7 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 
 	for (i = 0; i < job->num_gathers; i++) {
 		struct host1x_job_gather *g = &job->gathers[i];
+
 		size += g->words * sizeof(u32);
 	}
 
@@ -514,6 +515,7 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
 	bitmap_zero(waitchk_mask, host1x_syncpt_nb_pts(host));
 	for (i = 0; i < job->num_waitchk; i++) {
 		u32 syncpt_id = job->waitchk[i].syncpt_id;
+
 		if (syncpt_id < host1x_syncpt_nb_pts(host))
 			set_bit(syncpt_id, waitchk_mask);
 	}
@@ -571,14 +573,16 @@ void host1x_job_unpin(struct host1x_job *job)
 
 	for (i = 0; i < job->num_unpins; i++) {
 		struct host1x_job_unpin_data *unpin = &job->unpins[i];
+
 		host1x_bo_unpin(unpin->bo, unpin->sgt);
 		host1x_bo_put(unpin->bo);
 	}
+
 	job->num_unpins = 0;
 
 	if (job->gather_copy_size)
 		dma_free_wc(job->channel->dev, job->gather_copy_size,
-		            job->gather_copy_mapped, job->gather_copy);
+			    job->gather_copy_mapped, job->gather_copy);
 }
 EXPORT_SYMBOL(host1x_job_unpin);
 
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 6b7fdc1e2ed0..95589328ad52 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -73,7 +73,7 @@ static struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
 			return NULL;
 	}
 
-	name = kasprintf(GFP_KERNEL, "%02d-%s", sp->id,
+	name = kasprintf(GFP_KERNEL, "%02u-%s", sp->id,
 			dev ? dev_name(dev) : NULL);
 	if (!name)
 		return NULL;
@@ -110,12 +110,14 @@ EXPORT_SYMBOL(host1x_syncpt_incr_max);
 void host1x_syncpt_restore(struct host1x *host)
 {
 	struct host1x_syncpt *sp_base = host->syncpt;
-	u32 i;
+	unsigned int i;
 
 	for (i = 0; i < host1x_syncpt_nb_pts(host); i++)
 		host1x_hw_syncpt_restore(host, sp_base + i);
+
 	for (i = 0; i < host1x_syncpt_nb_bases(host); i++)
 		host1x_hw_syncpt_restore_wait_base(host, sp_base + i);
+
 	wmb();
 }
 
@@ -126,7 +128,7 @@ void host1x_syncpt_restore(struct host1x *host)
 void host1x_syncpt_save(struct host1x *host)
 {
 	struct host1x_syncpt *sp_base = host->syncpt;
-	u32 i;
+	unsigned int i;
 
 	for (i = 0; i < host1x_syncpt_nb_pts(host); i++) {
 		if (host1x_syncpt_client_managed(sp_base + i))
@@ -146,6 +148,7 @@ void host1x_syncpt_save(struct host1x *host)
 u32 host1x_syncpt_load(struct host1x_syncpt *sp)
 {
 	u32 val;
+
 	val = host1x_hw_syncpt_load(sp->host, sp);
 	trace_host1x_syncpt_load_min(sp->id, val);
 
@@ -157,10 +160,9 @@ u32 host1x_syncpt_load(struct host1x_syncpt *sp)
  */
 u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp)
 {
-	u32 val;
 	host1x_hw_syncpt_load_wait_base(sp->host, sp);
-	val = sp->base_val;
-	return val;
+
+	return sp->base_val;
 }
 
 /*
@@ -179,6 +181,7 @@ EXPORT_SYMBOL(host1x_syncpt_incr);
 static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
 {
 	host1x_hw_syncpt_load(sp->host, sp);
+
 	return host1x_syncpt_is_expired(sp, thresh);
 }
 
@@ -186,7 +189,7 @@ static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
  * Main entrypoint for syncpoint value waits.
  */
 int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
-			u32 *value)
+		       u32 *value)
 {
 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 	void *ref;
@@ -201,6 +204,7 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 	if (host1x_syncpt_is_expired(sp, thresh)) {
 		if (value)
 			*value = host1x_syncpt_load(sp);
+
 		return 0;
 	}
 
@@ -209,6 +213,7 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 	if (host1x_syncpt_is_expired(sp, thresh)) {
 		if (value)
 			*value = val;
+
 		goto done;
 	}
 
@@ -239,32 +244,42 @@ int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 	/* wait for the syncpoint, or timeout, or signal */
 	while (timeout) {
 		long check = min_t(long, SYNCPT_CHECK_PERIOD, timeout);
-		int remain = wait_event_interruptible_timeout(wq,
+		int remain;
+
+		remain = wait_event_interruptible_timeout(wq,
 				syncpt_load_min_is_expired(sp, thresh),
 				check);
 		if (remain > 0 || host1x_syncpt_is_expired(sp, thresh)) {
 			if (value)
 				*value = host1x_syncpt_load(sp);
+
 			err = 0;
+
 			break;
 		}
+
 		if (remain < 0) {
 			err = remain;
 			break;
 		}
+
 		timeout -= check;
+
 		if (timeout && check_count <= MAX_STUCK_CHECK_COUNT) {
 			dev_warn(sp->host->dev,
-				"%s: syncpoint id %d (%s) stuck waiting %d, timeout=%ld\n",
+				"%s: syncpoint id %u (%s) stuck waiting %d, timeout=%ld\n",
 				 current->comm, sp->id, sp->name,
 				 thresh, timeout);
 
 			host1x_debug_dump_syncpts(sp->host);
+
 			if (check_count == MAX_STUCK_CHECK_COUNT)
 				host1x_debug_dump(sp->host);
+
 			check_count++;
 		}
 	}
+
 	host1x_intr_put_ref(sp->host, sp->id, ref);
 
 done:
@@ -279,7 +294,9 @@ bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh)
 {
 	u32 current_val;
 	u32 future_val;
+
 	smp_rmb();
+
 	current_val = (u32)atomic_read(&sp->min_val);
 	future_val = (u32)atomic_read(&sp->max_val);
 
@@ -341,14 +358,14 @@ int host1x_syncpt_init(struct host1x *host)
 {
 	struct host1x_syncpt_base *bases;
 	struct host1x_syncpt *syncpt;
-	int i;
+	unsigned int i;
 
-	syncpt = devm_kzalloc(host->dev, sizeof(*syncpt) * host->info->nb_pts,
+	syncpt = devm_kcalloc(host->dev, host->info->nb_pts, sizeof(*syncpt),
 			      GFP_KERNEL);
 	if (!syncpt)
 		return -ENOMEM;
 
-	bases = devm_kzalloc(host->dev, sizeof(*bases) * host->info->nb_bases,
+	bases = devm_kcalloc(host->dev, host->info->nb_bases, sizeof(*bases),
 			     GFP_KERNEL);
 	if (!bases)
 		return -ENOMEM;
@@ -378,6 +395,7 @@ struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
 					    unsigned long flags)
 {
 	struct host1x *host = dev_get_drvdata(dev->parent);
+
 	return host1x_syncpt_alloc(host, dev, flags);
 }
 EXPORT_SYMBOL(host1x_syncpt_request);
@@ -398,8 +416,9 @@ EXPORT_SYMBOL(host1x_syncpt_free);
 
 void host1x_syncpt_deinit(struct host1x *host)
 {
-	int i;
 	struct host1x_syncpt *sp = host->syncpt;
+	unsigned int i;
+
 	for (i = 0; i < host->info->nb_pts; i++, sp++)
 		kfree(sp->name);
 }
@@ -407,10 +426,11 @@ void host1x_syncpt_deinit(struct host1x *host)
 /*
  * Read max. It indicates how many operations there are in queue, either in
  * channel or in a software thread.
- * */
+ */
 u32 host1x_syncpt_read_max(struct host1x_syncpt *sp)
 {
 	smp_rmb();
+
 	return (u32)atomic_read(&sp->max_val);
 }
 EXPORT_SYMBOL(host1x_syncpt_read_max);
@@ -421,6 +441,7 @@ EXPORT_SYMBOL(host1x_syncpt_read_max);
 u32 host1x_syncpt_read_min(struct host1x_syncpt *sp)
 {
 	smp_rmb();
+
 	return (u32)atomic_read(&sp->min_val);
 }
 EXPORT_SYMBOL(host1x_syncpt_read_min);
@@ -431,25 +452,26 @@ u32 host1x_syncpt_read(struct host1x_syncpt *sp)
 }
 EXPORT_SYMBOL(host1x_syncpt_read);
 
-int host1x_syncpt_nb_pts(struct host1x *host)
+unsigned int host1x_syncpt_nb_pts(struct host1x *host)
 {
 	return host->info->nb_pts;
 }
 
-int host1x_syncpt_nb_bases(struct host1x *host)
+unsigned int host1x_syncpt_nb_bases(struct host1x *host)
 {
 	return host->info->nb_bases;
 }
 
-int host1x_syncpt_nb_mlocks(struct host1x *host)
+unsigned int host1x_syncpt_nb_mlocks(struct host1x *host)
 {
 	return host->info->nb_mlocks;
 }
 
-struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, u32 id)
+struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, unsigned int id)
 {
 	if (host->info->nb_pts < id)
 		return NULL;
+
 	return host->syncpt + id;
 }
 EXPORT_SYMBOL(host1x_syncpt_get);
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
index 9056465ecd3f..f719205105ac 100644
--- a/drivers/gpu/host1x/syncpt.h
+++ b/drivers/gpu/host1x/syncpt.h
@@ -37,7 +37,7 @@ struct host1x_syncpt_base {
 };
 
 struct host1x_syncpt {
-	int id;
+	unsigned int id;
 	atomic_t min_val;
 	atomic_t max_val;
 	u32 base_val;
@@ -58,13 +58,13 @@ int host1x_syncpt_init(struct host1x *host);
 void host1x_syncpt_deinit(struct host1x *host);
 
 /* Return number of sync point supported. */
-int host1x_syncpt_nb_pts(struct host1x *host);
+unsigned int host1x_syncpt_nb_pts(struct host1x *host);
 
 /* Return number of wait bases supported. */
-int host1x_syncpt_nb_bases(struct host1x *host);
+unsigned int host1x_syncpt_nb_bases(struct host1x *host);
 
 /* Return number of mlocks supported. */
-int host1x_syncpt_nb_mlocks(struct host1x *host);
+unsigned int host1x_syncpt_nb_mlocks(struct host1x *host);
 
 /*
  * Check sync point sanity. If max is larger than min, there have too many
diff --git a/drivers/gpu/ipu-v3/ipu-dc.c b/drivers/gpu/ipu-v3/ipu-dc.c
index 2f29780e7c68..659475c1e44a 100644
--- a/drivers/gpu/ipu-v3/ipu-dc.c
+++ b/drivers/gpu/ipu-v3/ipu-dc.c
@@ -150,6 +150,9 @@ static void dc_write_tmpl(struct ipu_dc *dc, int word, u32 opcode, u32 operand,
 static int ipu_bus_format_to_map(u32 fmt)
 {
 	switch (fmt) {
+	default:
+		WARN_ON(1);
+		/* fall-through */
 	case MEDIA_BUS_FMT_RGB888_1X24:
 		return IPU_DC_MAP_RGB24;
 	case MEDIA_BUS_FMT_RGB565_1X16:
@@ -162,8 +165,6 @@ static int ipu_bus_format_to_map(u32 fmt)
 		return IPU_DC_MAP_LVDS666;
 	case MEDIA_BUS_FMT_BGR888_1X24:
 		return IPU_DC_MAP_BGR24;
-	default:
-		return -EINVAL;
 	}
 }
 
@@ -178,10 +179,6 @@ int ipu_dc_init_sync(struct ipu_dc *dc, struct ipu_di *di, bool interlaced,
 	dc->di = ipu_di_get_num(di);
 
 	map = ipu_bus_format_to_map(bus_format);
-	if (map < 0) {
-		dev_dbg(priv->dev, "IPU_DISP: No MAP\n");
-		return map;
-	}
 
 	/*
 	 * In interlaced mode we need more counters to create the asymmetric
diff --git a/drivers/gpu/ipu-v3/ipu-di.c b/drivers/gpu/ipu-v3/ipu-di.c
index 359268e3a166..a8d87ddd8a17 100644
--- a/drivers/gpu/ipu-v3/ipu-di.c
+++ b/drivers/gpu/ipu-v3/ipu-di.c
@@ -572,9 +572,6 @@ int ipu_di_init_sync_panel(struct ipu_di *di, struct ipu_di_signal_cfg *sig)
 	dev_dbg(di->ipu->dev, "disp %d: panel size = %d x %d\n",
 		di->id, sig->mode.hactive, sig->mode.vactive);
 
-	if ((sig->mode.vsync_len == 0) || (sig->mode.hsync_len == 0))
-		return -EINVAL;
-
 	dev_dbg(di->ipu->dev, "Clocks: IPU %luHz DI %luHz Needed %luHz\n",
 		clk_get_rate(di->clk_ipu),
 		clk_get_rate(di->clk_di),
diff --git a/drivers/gpu/ipu-v3/ipu-dmfc.c b/drivers/gpu/ipu-v3/ipu-dmfc.c
index 837b1ec22800..42705bb5aaa3 100644
--- a/drivers/gpu/ipu-v3/ipu-dmfc.c
+++ b/drivers/gpu/ipu-v3/ipu-dmfc.c
@@ -45,17 +45,6 @@
 #define DMFC_DP_CHAN_6B_24		16
 #define DMFC_DP_CHAN_6F_29		24
 
-#define DMFC_FIFO_SIZE_64		(3 << 3)
-#define DMFC_FIFO_SIZE_128		(2 << 3)
-#define DMFC_FIFO_SIZE_256		(1 << 3)
-#define DMFC_FIFO_SIZE_512		(0 << 3)
-
-#define DMFC_SEGMENT(x)			((x & 0x7) << 0)
-#define DMFC_BURSTSIZE_128		(0 << 6)
-#define DMFC_BURSTSIZE_64		(1 << 6)
-#define DMFC_BURSTSIZE_32		(2 << 6)
-#define DMFC_BURSTSIZE_16		(3 << 6)
-
 struct dmfc_channel_data {
 	int		ipu_channel;
 	unsigned long	channel_reg;
@@ -104,9 +93,6 @@ struct ipu_dmfc_priv;
 
 struct dmfc_channel {
 	unsigned			slots;
-	unsigned			slotmask;
-	unsigned			segment;
-	int				burstsize;
 	struct ipu_soc			*ipu;
 	struct ipu_dmfc_priv		*priv;
 	const struct dmfc_channel_data	*data;
@@ -117,7 +103,6 @@ struct ipu_dmfc_priv {
 	struct device *dev;
 	struct dmfc_channel channels[DMFC_NUM_CHANNELS];
 	struct mutex mutex;
-	unsigned long bandwidth_per_slot;
 	void __iomem *base;
 	int use_count;
 };
@@ -172,184 +157,6 @@ void ipu_dmfc_disable_channel(struct dmfc_channel *dmfc)
 }
 EXPORT_SYMBOL_GPL(ipu_dmfc_disable_channel);
 
-static int ipu_dmfc_setup_channel(struct dmfc_channel *dmfc, int slots,
-		int segment, int burstsize)
-{
-	struct ipu_dmfc_priv *priv = dmfc->priv;
-	u32 val, field;
-
-	dev_dbg(priv->dev,
-			"dmfc: using %d slots starting from segment %d for IPU channel %d\n",
-			slots, segment, dmfc->data->ipu_channel);
-
-	switch (slots) {
-	case 1:
-		field = DMFC_FIFO_SIZE_64;
-		break;
-	case 2:
-		field = DMFC_FIFO_SIZE_128;
-		break;
-	case 4:
-		field = DMFC_FIFO_SIZE_256;
-		break;
-	case 8:
-		field = DMFC_FIFO_SIZE_512;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	switch (burstsize) {
-	case 16:
-		field |= DMFC_BURSTSIZE_16;
-		break;
-	case 32:
-		field |= DMFC_BURSTSIZE_32;
-		break;
-	case 64:
-		field |= DMFC_BURSTSIZE_64;
-		break;
-	case 128:
-		field |= DMFC_BURSTSIZE_128;
-		break;
-	}
-
-	field |= DMFC_SEGMENT(segment);
-
-	val = readl(priv->base + dmfc->data->channel_reg);
-
-	val &= ~(0xff << dmfc->data->shift);
-	val |= field << dmfc->data->shift;
-
-	writel(val, priv->base + dmfc->data->channel_reg);
-
-	dmfc->slots = slots;
-	dmfc->segment = segment;
-	dmfc->burstsize = burstsize;
-	dmfc->slotmask = ((1 << slots) - 1) << segment;
-
-	return 0;
-}
-
-static int dmfc_bandwidth_to_slots(struct ipu_dmfc_priv *priv,
-		unsigned long bandwidth)
-{
-	int slots = 1;
-
-	while (slots * priv->bandwidth_per_slot < bandwidth)
-		slots *= 2;
-
-	return slots;
-}
-
-static int dmfc_find_slots(struct ipu_dmfc_priv *priv, int slots)
-{
-	unsigned slotmask_need, slotmask_used = 0;
-	int i, segment = 0;
-
-	slotmask_need = (1 << slots) - 1;
-
-	for (i = 0; i < DMFC_NUM_CHANNELS; i++)
-		slotmask_used |= priv->channels[i].slotmask;
-
-	while (slotmask_need <= 0xff) {
-		if (!(slotmask_used & slotmask_need))
-			return segment;
-
-		slotmask_need <<= 1;
-		segment++;
-	}
-
-	return -EBUSY;
-}
-
-void ipu_dmfc_free_bandwidth(struct dmfc_channel *dmfc)
-{
-	struct ipu_dmfc_priv *priv = dmfc->priv;
-	int i;
-
-	dev_dbg(priv->dev, "dmfc: freeing %d slots starting from segment %d\n",
-			dmfc->slots, dmfc->segment);
-
-	mutex_lock(&priv->mutex);
-
-	if (!dmfc->slots)
-		goto out;
-
-	dmfc->slotmask = 0;
-	dmfc->slots = 0;
-	dmfc->segment = 0;
-
-	for (i = 0; i < DMFC_NUM_CHANNELS; i++)
-		priv->channels[i].slotmask = 0;
-
-	for (i = 0; i < DMFC_NUM_CHANNELS; i++) {
-		if (priv->channels[i].slots > 0) {
-			priv->channels[i].segment =
-				dmfc_find_slots(priv, priv->channels[i].slots);
-			priv->channels[i].slotmask =
-				((1 << priv->channels[i].slots) - 1) <<
-				priv->channels[i].segment;
-		}
-	}
-
-	for (i = 0; i < DMFC_NUM_CHANNELS; i++) {
-		if (priv->channels[i].slots > 0)
-			ipu_dmfc_setup_channel(&priv->channels[i],
-					priv->channels[i].slots,
-					priv->channels[i].segment,
-					priv->channels[i].burstsize);
-	}
-out:
-	mutex_unlock(&priv->mutex);
-}
-EXPORT_SYMBOL_GPL(ipu_dmfc_free_bandwidth);
-
-int ipu_dmfc_alloc_bandwidth(struct dmfc_channel *dmfc,
-		unsigned long bandwidth_pixel_per_second, int burstsize)
-{
-	struct ipu_dmfc_priv *priv = dmfc->priv;
-	int slots = dmfc_bandwidth_to_slots(priv, bandwidth_pixel_per_second);
-	int segment = -1, ret = 0;
-
-	dev_dbg(priv->dev, "dmfc: trying to allocate %ldMpixel/s for IPU channel %d\n",
-			bandwidth_pixel_per_second / 1000000,
-			dmfc->data->ipu_channel);
-
-	ipu_dmfc_free_bandwidth(dmfc);
-
-	mutex_lock(&priv->mutex);
-
-	if (slots > 8) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	/* For the MEM_BG channel, first try to allocate twice the slots */
-	if (dmfc->data->ipu_channel == IPUV3_CHANNEL_MEM_BG_SYNC)
-		segment = dmfc_find_slots(priv, slots * 2);
-	else if (slots < 2)
-		/* Always allocate at least 128*4 bytes (2 slots) */
-		slots = 2;
-
-	if (segment >= 0)
-		slots *= 2;
-	else
-		segment = dmfc_find_slots(priv, slots);
-	if (segment < 0) {
-		ret = -EBUSY;
-		goto out;
-	}
-
-	ipu_dmfc_setup_channel(dmfc, slots, segment, burstsize);
-
-out:
-	mutex_unlock(&priv->mutex);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(ipu_dmfc_alloc_bandwidth);
-
 void ipu_dmfc_config_wait4eot(struct dmfc_channel *dmfc, int width)
 {
 	struct ipu_dmfc_priv *priv = dmfc->priv;
@@ -384,7 +191,6 @@ EXPORT_SYMBOL_GPL(ipu_dmfc_get);
 
 void ipu_dmfc_put(struct dmfc_channel *dmfc)
 {
-	ipu_dmfc_free_bandwidth(dmfc);
 }
 EXPORT_SYMBOL_GPL(ipu_dmfc_put);
 
@@ -412,20 +218,15 @@ int ipu_dmfc_init(struct ipu_soc *ipu, struct device *dev, unsigned long base,
 		priv->channels[i].priv = priv;
 		priv->channels[i].ipu = ipu;
 		priv->channels[i].data = &dmfcdata[i];
-	}
-
-	writel(0x0, priv->base + DMFC_WR_CHAN);
-	writel(0x0, priv->base + DMFC_DP_CHAN);
 
-	/*
-	 * We have a total bandwidth of clkrate * 4pixel divided
-	 * into 8 slots.
-	 */
-	priv->bandwidth_per_slot = clk_get_rate(ipu_clk) * 4 / 8;
-
-	dev_dbg(dev, "dmfc: 8 slots with %ldMpixel/s bandwidth each\n",
-			priv->bandwidth_per_slot / 1000000);
+		if (dmfcdata[i].ipu_channel == IPUV3_CHANNEL_MEM_BG_SYNC ||
+		    dmfcdata[i].ipu_channel == IPUV3_CHANNEL_MEM_FG_SYNC ||
+		    dmfcdata[i].ipu_channel == IPUV3_CHANNEL_MEM_DC_SYNC)
+			priv->channels[i].slots = 2;
+	}
 
+	writel(0x00000050, priv->base + DMFC_WR_CHAN);
+	writel(0x00005654, priv->base + DMFC_DP_CHAN);
 	writel(0x202020f6, priv->base + DMFC_WR_CHAN_DEF);
 	writel(0x2020f6f6, priv->base + DMFC_DP_CHAN_DEF);
 	writel(0x00000003, priv->base + DMFC_GENERAL1);
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 2df216b39cc5..5f962bfcb43c 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -52,9 +52,9 @@
  *
  * * muxed: Dual GPUs with a multiplexer chip to switch outputs between GPUs.
  * * muxless: Dual GPUs but only one of them is connected to outputs.
- * 	The other one is merely used to offload rendering, its results
- * 	are copied over PCIe into the framebuffer. On Linux this is
- * 	supported with DRI PRIME.
+ *   The other one is merely used to offload rendering, its results
+ *   are copied over PCIe into the framebuffer. On Linux this is
+ *   supported with DRI PRIME.
  *
  * Hybrid graphics started to appear in the late Naughties and were initially
  * all muxed. Newer laptops moved to a muxless architecture for cost reasons.
@@ -560,21 +560,21 @@ EXPORT_SYMBOL(vga_switcheroo_unlock_ddc);
  * * OFF: Power off the device not in use.
  * * ON: Power on the device not in use.
  * * IGD: Switch to the integrated graphics device.
- * 	Power on the integrated GPU if necessary, power off the discrete GPU.
- * 	Prerequisite is that no user space processes (e.g. Xorg, alsactl)
- * 	have opened device files of the GPUs or the audio client. If the
- * 	switch fails, the user may invoke lsof(8) or fuser(1) on /dev/dri/
- * 	and /dev/snd/controlC1 to identify processes blocking the switch.
+ *   Power on the integrated GPU if necessary, power off the discrete GPU.
+ *   Prerequisite is that no user space processes (e.g. Xorg, alsactl)
+ *   have opened device files of the GPUs or the audio client. If the
+ *   switch fails, the user may invoke lsof(8) or fuser(1) on /dev/dri/
+ *   and /dev/snd/controlC1 to identify processes blocking the switch.
  * * DIS: Switch to the discrete graphics device.
  * * DIGD: Delayed switch to the integrated graphics device.
- * 	This will perform the switch once the last user space process has
- * 	closed the device files of the GPUs and the audio client.
+ *   This will perform the switch once the last user space process has
+ *   closed the device files of the GPUs and the audio client.
  * * DDIS: Delayed switch to the discrete graphics device.
  * * MIGD: Mux-only switch to the integrated graphics device.
- * 	Does not remap console or change the power state of either gpu.
- * 	If the integrated GPU is currently off, the screen will turn black.
- * 	If it is on, the screen will show whatever happens to be in VRAM.
- * 	Either way, the user has to blindly enter the command to switch back.
+ *   Does not remap console or change the power state of either gpu.
+ *   If the integrated GPU is currently off, the screen will turn black.
+ *   If it is on, the screen will show whatever happens to be in VRAM.
+ *   Either way, the user has to blindly enter the command to switch back.
  * * MDIS: Mux-only switch to the discrete graphics device.
  *
  * For GPUs whose power state is controlled by the driver's runtime pm,