48 files changed, 2346 insertions, 678 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 9ec6356d3f0b..b9371ec5e04f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3340,8 +3340,6 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 		if (!ring || !ring->sched.thread)
 			continue;
 
-		drm_sched_stop(&ring->sched);
-
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		amdgpu_fence_driver_force_completion(ring);
 	}
@@ -3349,8 +3347,7 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 	if(job)
 		drm_sched_increase_karma(&job->base);
 
-
-
+	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
 	if (!amdgpu_sriov_vf(adev)) {
 
 		if (!need_full_reset)
@@ -3488,38 +3485,21 @@ end:
 	return r;
 }
 
-static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
-					  struct amdgpu_job *job)
+static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
 {
-	int i;
-
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct amdgpu_ring *ring = adev->rings[i];
-
-		if (!ring || !ring->sched.thread)
-			continue;
+	if (trylock) {
+		if (!mutex_trylock(&adev->lock_reset))
+			return false;
+	} else
+		mutex_lock(&adev->lock_reset);
 
-		if (!adev->asic_reset_res)
-			drm_sched_resubmit_jobs(&ring->sched);
-
-		drm_sched_start(&ring->sched, !adev->asic_reset_res);
-	}
-
-	if (!amdgpu_device_has_dc_support(adev)) {
-		drm_helper_resume_force_mode(adev->ddev);
-	}
-
-	adev->asic_reset_res = 0;
-}
-
-static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
-{
-	mutex_lock(&adev->lock_reset);
 	atomic_inc(&adev->gpu_reset_counter);
 	adev->in_gpu_reset = 1;
 	/* Block kfd: SRIOV would do it separately */
 	if (!amdgpu_sriov_vf(adev))
                 amdgpu_amdkfd_pre_reset(adev);
+
+	return true;
 }
 
 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
@@ -3547,40 +3527,42 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 			      struct amdgpu_job *job)
 {
-	int r;
+	struct list_head device_list, *device_list_handle =  NULL;
+	bool need_full_reset, job_signaled;
 	struct amdgpu_hive_info *hive = NULL;
-	bool need_full_reset = false;
 	struct amdgpu_device *tmp_adev = NULL;
-	struct list_head device_list, *device_list_handle =  NULL;
+	int i, r = 0;
 
+	need_full_reset = job_signaled = false;
 	INIT_LIST_HEAD(&device_list);
 
 	dev_info(adev->dev, "GPU reset begin!\n");
 
+	hive = amdgpu_get_xgmi_hive(adev, false);
+
 	/*
-	 * In case of XGMI hive disallow concurrent resets to be triggered
-	 * by different nodes. No point also since the one node already executing
-	 * reset will also reset all the other nodes in the hive.
+	 * Here we trylock to avoid chain of resets executing from
+	 * either trigger by jobs on different adevs in XGMI hive or jobs on
+	 * different schedulers for same device while this TO handler is running.
+	 * We always reset all schedulers for device and all devices for XGMI
+	 * hive so that should take care of them too.
 	 */
-	hive = amdgpu_get_xgmi_hive(adev, 0);
-	if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
-	    !mutex_trylock(&hive->reset_lock))
+
+	if (hive && !mutex_trylock(&hive->reset_lock)) {
+		DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
+			 job->base.id, hive->hive_id);
 		return 0;
+	}
 
 	/* Start with adev pre asic reset first for soft reset check.*/
-	amdgpu_device_lock_adev(adev);
-	r = amdgpu_device_pre_asic_reset(adev,
-					 job,
-					 &need_full_reset);
-	if (r) {
-		/*TODO Should we stop ?*/
-		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
-			  r, adev->ddev->unique);
-		adev->asic_reset_res = r;
+	if (!amdgpu_device_lock_adev(adev, !hive)) {
+		DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
+					 job->base.id);
+		return 0;
 	}
 
 	/* Build list of devices to reset */
-	if  (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
+	if  (adev->gmc.xgmi.num_physical_nodes > 1) {
 		if (!hive) {
 			amdgpu_device_unlock_adev(adev);
 			return -ENODEV;
@@ -3597,13 +3579,56 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		device_list_handle = &device_list;
 	}
 
+	/* block all schedulers and reset given job's ring */
+	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+			struct amdgpu_ring *ring = tmp_adev->rings[i];
+
+			if (!ring || !ring->sched.thread)
+				continue;
+
+			drm_sched_stop(&ring->sched, &job->base);
+		}
+	}
+
+
+	/*
+	 * Must check guilty signal here since after this point all old
+	 * HW fences are force signaled.
+	 *
+	 * job->base holds a reference to parent fence
+	 */
+	if (job && job->base.s_fence->parent &&
+	    dma_fence_is_signaled(job->base.s_fence->parent))
+		job_signaled = true;
+
+	if (!amdgpu_device_ip_need_full_reset(adev))
+		device_list_handle = &device_list;
+
+	if (job_signaled) {
+		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
+		goto skip_hw_reset;
+	}
+
+
+	/* Guilty job will be freed after this*/
+	r = amdgpu_device_pre_asic_reset(adev,
+					 job,
+					 &need_full_reset);
+	if (r) {
+		/*TODO Should we stop ?*/
+		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
+			  r, adev->ddev->unique);
+		adev->asic_reset_res = r;
+	}
+
 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
 
 		if (tmp_adev == adev)
 			continue;
 
-		amdgpu_device_lock_adev(tmp_adev);
+		amdgpu_device_lock_adev(tmp_adev, false);
 		r = amdgpu_device_pre_asic_reset(tmp_adev,
 						 NULL,
 						 &need_full_reset);
@@ -3627,9 +3652,28 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 			goto retry;
 	}
 
+skip_hw_reset:
+
 	/* Post ASIC reset for all devs .*/
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
-		amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL);
+		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+			struct amdgpu_ring *ring = tmp_adev->rings[i];
+
+			if (!ring || !ring->sched.thread)
+				continue;
+
+			/* No point to resubmit jobs if we didn't HW reset*/
+			if (!tmp_adev->asic_reset_res && !job_signaled)
+				drm_sched_resubmit_jobs(&ring->sched);
+
+			drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
+		}
+
+		if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
+			drm_helper_resume_force_mode(tmp_adev->ddev);
+		}
+
+		tmp_adev->asic_reset_res = 0;
 
 		if (r) {
 			/* bad news, how to tell it to userspace ? */
@@ -3642,7 +3686,7 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 		amdgpu_device_unlock_adev(tmp_adev);
 	}
 
-	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
+	if (hive)
 		mutex_unlock(&hive->reset_lock);
 
 	if (r)
diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c
index 56aad288666e..d6690e016f0b 100644
--- a/drivers/gpu/drm/arm/malidp_crtc.c
+++ b/drivers/gpu/drm/arm/malidp_crtc.c
@@ -463,23 +463,6 @@ static struct drm_crtc_state *malidp_crtc_duplicate_state(struct drm_crtc *crtc)
 	return &state->base;
 }
 
-static void malidp_crtc_reset(struct drm_crtc *crtc)
-{
-	struct malidp_crtc_state *state = NULL;
-
-	if (crtc->state) {
-		state = to_malidp_crtc_state(crtc->state);
-		__drm_atomic_helper_crtc_destroy_state(crtc->state);
-	}
-
-	kfree(state);
-	state = kzalloc(sizeof(*state), GFP_KERNEL);
-	if (state) {
-		crtc->state = &state->base;
-		crtc->state->crtc = crtc;
-	}
-}
-
 static void malidp_crtc_destroy_state(struct drm_crtc *crtc,
 				      struct drm_crtc_state *state)
 {
@@ -493,6 +476,17 @@ static void malidp_crtc_destroy_state(struct drm_crtc *crtc,
 	kfree(mali_state);
 }
 
+static void malidp_crtc_reset(struct drm_crtc *crtc)
+{
+	struct malidp_crtc_state *state =
+		kzalloc(sizeof(*state), GFP_KERNEL);
+
+	if (crtc->state)
+		malidp_crtc_destroy_state(crtc, crtc->state);
+
+	__drm_atomic_helper_crtc_reset(crtc, &state->base);
+}
+
 static int malidp_crtc_enable_vblank(struct drm_crtc *crtc)
 {
 	struct malidp_drm *malidp = crtc_to_malidp_device(crtc);
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 2e0cb4246cbd..79dbeafb9a52 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1423,7 +1423,7 @@ drm_atomic_helper_wait_for_vblanks(struct drm_device *dev,
 		ret = wait_event_timeout(dev->vblank[i].queue,
 				old_state->crtcs[i].last_vblank_count !=
 					drm_crtc_vblank_count(crtc),
-				msecs_to_jiffies(50));
+				msecs_to_jiffies(100));
 
 		WARN(!ret, "[CRTC:%d:%s] vblank wait timed out\n",
 		     crtc->base.id, crtc->name);
diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c
index 59ffb6b9c745..ac929f68ff31 100644
--- a/drivers/gpu/drm/drm_atomic_state_helper.c
+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
@@ -57,6 +57,29 @@
  */
 
 /**
+ * __drm_atomic_helper_crtc_reset - reset state on CRTC
+ * @crtc: drm CRTC
+ * @crtc_state: CRTC state to assign
+ *
+ * Initializes the newly allocated @crtc_state and assigns it to
+ * the &drm_crtc->state pointer of @crtc, usually required when
+ * initializing the drivers or when called from the &drm_crtc_funcs.reset
+ * hook.
+ *
+ * This is useful for drivers that subclass the CRTC state.
+ */
+void
+__drm_atomic_helper_crtc_reset(struct drm_crtc *crtc,
+			       struct drm_crtc_state *crtc_state)
+{
+	if (crtc_state)
+		crtc_state->crtc = crtc;
+
+	crtc->state = crtc_state;
+}
+EXPORT_SYMBOL(__drm_atomic_helper_crtc_reset);
+
+/**
  * drm_atomic_helper_crtc_reset - default &drm_crtc_funcs.reset hook for CRTCs
  * @crtc: drm CRTC
  *
@@ -65,14 +88,13 @@
  */
 void drm_atomic_helper_crtc_reset(struct drm_crtc *crtc)
 {
-	if (crtc->state)
-		__drm_atomic_helper_crtc_destroy_state(crtc->state);
-
-	kfree(crtc->state);
-	crtc->state = kzalloc(sizeof(*crtc->state), GFP_KERNEL);
+	struct drm_crtc_state *crtc_state =
+		kzalloc(sizeof(*crtc->state), GFP_KERNEL);
 
 	if (crtc->state)
-		crtc->state->crtc = crtc;
+		crtc->funcs->atomic_destroy_state(crtc, crtc->state);
+
+	__drm_atomic_helper_crtc_reset(crtc, crtc_state);
 }
 EXPORT_SYMBOL(drm_atomic_helper_crtc_reset);
 
@@ -314,7 +336,7 @@ EXPORT_SYMBOL(drm_atomic_helper_plane_destroy_state);
  * @conn_state: connector state to assign
  *
  * Initializes the newly allocated @conn_state and assigns it to
- * the &drm_conector->state pointer of @connector, usually required when
+ * the &drm_connector->state pointer of @connector, usually required when
  * initializing the drivers or when called from the &drm_connector_funcs.reset
  * hook.
  *
diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c
index 0e4f25d63fd2..5be28e3295f3 100644
--- a/drivers/gpu/drm/drm_dp_aux_dev.c
+++ b/drivers/gpu/drm/drm_dp_aux_dev.c
@@ -27,15 +27,17 @@
 
 #include <linux/device.h>
 #include <linux/fs.h>
-#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/sched/signal.h>
+#include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/uio.h>
-#include <drm/drm_dp_helper.h>
+
 #include <drm/drm_crtc.h>
-#include <drm/drmP.h>
+#include <drm/drm_dp_helper.h>
+#include <drm/drm_print.h>
 
 #include "drm_crtc_helper_internal.h"
 
diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/drm_dp_dual_mode_helper.c
index e7f4fe2848a5..1c9ea9f7fdaf 100644
--- a/drivers/gpu/drm/drm_dp_dual_mode_helper.c
+++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c
@@ -20,13 +20,15 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
+#include <linux/delay.h>
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/i2c.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+
 #include <drm/drm_dp_dual_mode_helper.h>
-#include <drm/drmP.h>
+#include <drm/drm_print.h>
 
 /**
  * DOC: dp dual mode helpers
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 54a6414c5d96..e6af758a7d22 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -20,16 +20,18 @@
  * OF THIS SOFTWARE.
  */
 
-#include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/delay.h>
-#include <linux/init.h>
 #include <linux/errno.h>
-#include <linux/sched.h>
 #include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
 #include <linux/seq_file.h>
+
 #include <drm/drm_dp_helper.h>
-#include <drm/drmP.h>
+#include <drm/drm_print.h>
+#include <drm/drm_vblank.h>
 
 #include "drm_crtc_helper_internal.h"
 
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index c630ed157994..da1abca1b9e9 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -20,19 +20,20 @@
  * OF THIS SOFTWARE.
  */
 
-#include <linux/kernel.h>
 #include <linux/delay.h>
-#include <linux/init.h>
 #include <linux/errno.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
-#include <linux/i2c.h>
-#include <drm/drm_dp_mst_helper.h>
-#include <drm/drmP.h>
 
-#include <drm/drm_fixed.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_dp_mst_helper.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fixed.h>
+#include <drm/drm_print.h>
 #include <drm/drm_probe_helper.h>
 
 /**
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 649cfd8b4200..852bdd87cf13 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -27,16 +27,19 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-#include <linux/kernel.h>
-#include <linux/slab.h>
+
 #include <linux/hdmi.h>
 #include <linux/i2c.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/vga_switcheroo.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_displayid.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_encoder.h>
-#include <drm/drm_displayid.h>
+#include <drm/drm_print.h>
 #include <drm/drm_scdc_helper.h>
 
 #include "drm_crtc_internal.h"
diff --git a/drivers/gpu/drm/drm_edid_load.c b/drivers/gpu/drm/drm_edid_load.c
index a4915099aaa9..18d52dc4b21d 100644
--- a/drivers/gpu/drm/drm_edid_load.c
+++ b/drivers/gpu/drm/drm_edid_load.c
@@ -19,12 +19,15 @@
    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA.
 */
 
-#include <linux/module.h>
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_print.h>
 
 static char edid_firmware[PATH_MAX];
 module_param_string(edid_firmware, edid_firmware, sizeof(edid_firmware), 0644);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index 33854c94cb85..b24ddc406bba 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -118,7 +118,6 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 	unsigned int n_obj, n_bomap_pages;
 	size_t file_size, mmu_size;
 	__le64 *bomap, *bomap_start;
-	unsigned long flags;
 
 	/* Only catch the first event, or when manually re-armed */
 	if (!etnaviv_dump_core)
@@ -135,13 +134,11 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 		    mmu_size + gpu->buffer.size;
 
 	/* Add in the active command buffers */
-	spin_lock_irqsave(&gpu->sched.job_list_lock, flags);
 	list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
 		submit = to_etnaviv_submit(s_job);
 		file_size += submit->cmdbuf.size;
 		n_obj++;
 	}
-	spin_unlock_irqrestore(&gpu->sched.job_list_lock, flags);
 
 	/* Add in the active buffer objects */
 	list_for_each_entry(vram, &gpu->mmu->mappings, mmu_node) {
@@ -183,14 +180,12 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 			      gpu->buffer.size,
 			      etnaviv_cmdbuf_get_va(&gpu->buffer));
 
-	spin_lock_irqsave(&gpu->sched.job_list_lock, flags);
 	list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
 		submit = to_etnaviv_submit(s_job);
 		etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD,
 				      submit->cmdbuf.vaddr, submit->cmdbuf.size,
 				      etnaviv_cmdbuf_get_va(&submit->cmdbuf));
 	}
-	spin_unlock_irqrestore(&gpu->sched.job_list_lock, flags);
 
 	/* Reserve space for the bomap */
 	if (n_bomap_pages) {
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 6d24fea1766b..a813c824e154 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -109,7 +109,7 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
 	}
 
 	/* block scheduler */
-	drm_sched_stop(&gpu->sched);
+	drm_sched_stop(&gpu->sched, sched_job);
 
 	if(sched_job)
 		drm_sched_increase_karma(sched_job);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3bd40a4a6739..5454930f6aa8 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -14596,9 +14596,8 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe)
 		ret = -ENOMEM;
 		goto fail;
 	}
+	__drm_atomic_helper_crtc_reset(&intel_crtc->base, &crtc_state->base);
 	intel_crtc->config = crtc_state;
-	intel_crtc->base.state = &crtc_state->base;
-	crtc_state->base.crtc = &intel_crtc->base;
 
 	primary = intel_primary_plane_create(dev_priv, pipe);
 	if (IS_ERR(primary)) {
@@ -16140,7 +16139,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev)
 
 		__drm_atomic_helper_crtc_destroy_state(&crtc_state->base);
 		memset(crtc_state, 0, sizeof(*crtc_state));
-		crtc_state->base.crtc = &crtc->base;
+		__drm_atomic_helper_crtc_reset(&crtc->base, &crtc_state->base);
 
 		crtc_state->base.active = crtc_state->base.enable =
 			dev_priv->display.get_pipe_config(crtc, crtc_state);
diff --git a/drivers/gpu/drm/lima/lima_pp.c b/drivers/gpu/drm/lima/lima_pp.c
index d29721e177bf..8fef224b93c8 100644
--- a/drivers/gpu/drm/lima/lima_pp.c
+++ b/drivers/gpu/drm/lima/lima_pp.c
@@ -64,7 +64,13 @@ static irqreturn_t lima_pp_bcast_irq_handler(int irq, void *data)
 	struct lima_ip *pp_bcast = data;
 	struct lima_device *dev = pp_bcast->dev;
 	struct lima_sched_pipe *pipe = dev->pipe + lima_pipe_pp;
-	struct drm_lima_m450_pp_frame *frame = pipe->current_task->frame;
+	struct drm_lima_m450_pp_frame *frame;
+
+	/* for shared irq case */
+	if (!pipe->current_task)
+		return IRQ_NONE;
+
+	frame = pipe->current_task->frame;
 
 	for (i = 0; i < frame->num_pp; i++) {
 		struct lima_ip *ip = pipe->processor[i];
diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
index d53bd45f8d96..58a15479d175 100644
--- a/drivers/gpu/drm/lima/lima_sched.c
+++ b/drivers/gpu/drm/lima/lima_sched.c
@@ -258,7 +258,7 @@ static struct dma_fence *lima_sched_run_job(struct drm_sched_job *job)
 static void lima_sched_handle_error_task(struct lima_sched_pipe *pipe,
 					 struct lima_sched_task *task)
 {
-	drm_sched_stop(&pipe->base);
+	drm_sched_stop(&pipe->base, &task->base);
 
 	if (task)
 		drm_sched_increase_karma(&task->base);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
index dfdfa766da8f..3772f745589d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -694,14 +694,12 @@ end:
 
 static void dpu_crtc_reset(struct drm_crtc *crtc)
 {
-	struct dpu_crtc_state *cstate;
+	struct dpu_crtc_state *cstate = kzalloc(sizeof(*cstate), GFP_KERNEL);
 
 	if (crtc->state)
 		dpu_crtc_destroy_state(crtc, crtc->state);
 
-	crtc->state = kzalloc(sizeof(*cstate), GFP_KERNEL);
-	if (crtc->state)
-		crtc->state->crtc = crtc;
+	__drm_atomic_helper_crtc_reset(crtc, &cstate->base);
 }
 
 /**
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
index 018df2c3b7ed..45a5bc6ede5d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_lm.c
@@ -15,7 +15,6 @@
 #include "dpu_hwio.h"
 #include "dpu_hw_lm.h"
 #include "dpu_hw_mdss.h"
-#include "dpu_kms.h"
 
 #define LM_OP_MODE                        0x00
 #define LM_OUT_SIZE                       0x04
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
index b0cf63c4e3d7..f33d7007e830 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
@@ -1002,23 +1002,6 @@ mdp5_crtc_atomic_print_state(struct drm_printer *p,
 	drm_printf(p, "\tcmd_mode=%d\n", mdp5_cstate->cmd_mode);
 }
 
-static void mdp5_crtc_reset(struct drm_crtc *crtc)
-{
-	struct mdp5_crtc_state *mdp5_cstate;
-
-	if (crtc->state) {
-		__drm_atomic_helper_crtc_destroy_state(crtc->state);
-		kfree(to_mdp5_crtc_state(crtc->state));
-	}
-
-	mdp5_cstate = kzalloc(sizeof(*mdp5_cstate), GFP_KERNEL);
-
-	if (mdp5_cstate) {
-		mdp5_cstate->base.crtc = crtc;
-		crtc->state = &mdp5_cstate->base;
-	}
-}
-
 static struct drm_crtc_state *
 mdp5_crtc_duplicate_state(struct drm_crtc *crtc)
 {
@@ -1046,6 +1029,17 @@ static void mdp5_crtc_destroy_state(struct drm_crtc *crtc, struct drm_crtc_state
 	kfree(mdp5_cstate);
 }
 
+static void mdp5_crtc_reset(struct drm_crtc *crtc)
+{
+	struct mdp5_crtc_state *mdp5_cstate =
+		kzalloc(sizeof(*mdp5_cstate), GFP_KERNEL);
+
+	if (crtc->state)
+		mdp5_crtc_destroy_state(crtc, crtc->state);
+
+	__drm_atomic_helper_crtc_reset(crtc, &mdp5_cstate->base);
+}
+
 static const struct drm_crtc_funcs mdp5_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
 	.destroy = mdp5_crtc_destroy,
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c
index 2e7a0c347ddb..93754743090f 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/head.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/head.c
@@ -420,16 +420,6 @@ nv50_head_atomic_duplicate_state(struct drm_crtc *crtc)
 }
 
 static void
-__drm_atomic_helper_crtc_reset(struct drm_crtc *crtc,
-			       struct drm_crtc_state *state)
-{
-	if (crtc->state)
-		crtc->funcs->atomic_destroy_state(crtc, crtc->state);
-	crtc->state = state;
-	crtc->state->crtc = crtc;
-}
-
-static void
 nv50_head_reset(struct drm_crtc *crtc)
 {
 	struct nv50_head_atom *asyh;
@@ -437,6 +427,9 @@ nv50_head_reset(struct drm_crtc *crtc)
 	if (WARN_ON(!(asyh = kzalloc(sizeof(*asyh), GFP_KERNEL))))
 		return;
 
+	if (crtc->state)
+		nv50_head_atomic_destroy_state(crtc, crtc->state);
+
 	__drm_atomic_helper_crtc_reset(crtc, &asyh->state);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv04.c
index c80b96789c31..2b44ba5cf4b0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bus/nv04.c
@@ -26,8 +26,6 @@
 
 #include <subdev/gpio.h>
 
-#include <subdev/gpio.h>
-
 static void
 nv04_bus_intr(struct nvkm_bus *bus)
 {
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index e36dbb4df867..287a35d42f0f 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -131,6 +131,15 @@ config DRM_PANEL_ORISETECH_OTM8009A
 	  Say Y here if you want to enable support for Orise Technology
 	  otm8009a 480x800 dsi 2dl panel.
 
+config DRM_PANEL_OSD_OSD101T2587_53TS
+	tristate "OSD OSD101T2587-53TS DSI 1920x1200 video mode panel"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y here if you want to enable support for One Stop Displays
+	  OSD101T2587-53TS 10.1" 1920x1200 dsi panel.
+
 config DRM_PANEL_PANASONIC_VVX10F034N00
 	tristate "Panasonic VVX10F034N00 1920x1200 video mode panel"
 	depends on OF
@@ -200,6 +209,15 @@ config DRM_PANEL_SAMSUNG_S6E63J0X03
 	depends on BACKLIGHT_CLASS_DEVICE
 	select VIDEOMODE_HELPERS
 
+config DRM_PANEL_SAMSUNG_S6E63M0
+	tristate "Samsung S6E63M0 RGB/SPI panel"
+	depends on OF
+	depends on SPI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y here if you want to enable support for Samsung S6E63M0
+	  AMOLED LCD panel.
+
 config DRM_PANEL_SAMSUNG_S6E8AA0
 	tristate "Samsung S6E8AA0 DSI video mode panel"
 	depends on OF
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index 78e3dc376bdd..fb0cb3aaa9e6 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -11,6 +11,7 @@ obj-$(CONFIG_DRM_PANEL_KINGDISPLAY_KD097D04) += panel-kingdisplay-kd097d04.o
 obj-$(CONFIG_DRM_PANEL_LG_LG4573) += panel-lg-lg4573.o
 obj-$(CONFIG_DRM_PANEL_OLIMEX_LCD_OLINUXINO) += panel-olimex-lcd-olinuxino.o
 obj-$(CONFIG_DRM_PANEL_ORISETECH_OTM8009A) += panel-orisetech-otm8009a.o
+obj-$(CONFIG_DRM_PANEL_OSD_OSD101T2587_53TS) += panel-osd-osd101t2587-53ts.o
 obj-$(CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00) += panel-panasonic-vvx10f034n00.o
 obj-$(CONFIG_DRM_PANEL_RASPBERRYPI_TOUCHSCREEN) += panel-raspberrypi-touchscreen.o
 obj-$(CONFIG_DRM_PANEL_RAYDIUM_RM68200) += panel-raydium-rm68200.o
@@ -20,6 +21,7 @@ obj-$(CONFIG_DRM_PANEL_SAMSUNG_LD9040) += panel-samsung-ld9040.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6D16D0) += panel-samsung-s6d16d0.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2) += panel-samsung-s6e3ha2.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E63J0X03) += panel-samsung-s6e63j0x03.o
+obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E63M0) += panel-samsung-s6e63m0.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0) += panel-samsung-s6e8aa0.o
 obj-$(CONFIG_DRM_PANEL_SEIKO_43WVF1G) += panel-seiko-43wvf1g.o
 obj-$(CONFIG_DRM_PANEL_SHARP_LQ101R1SX01) += panel-sharp-lq101r1sx01.o
diff --git a/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c b/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c
new file mode 100644
index 000000000000..e0e20ecff916
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-osd-osd101t2587-53ts.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  Copyright (C) 2019 Texas Instruments Incorporated - http://www.ti.com
+ *  Author: Peter Ujfalusi <peter.ujfalusi@ti.com>
+ */
+
+#include <linux/backlight.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/regulator/consumer.h>
+
+#include <drm/drm_crtc.h>
+#include <drm/drm_device.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_panel.h>
+
+#include <video/mipi_display.h>
+
+struct osd101t2587_panel {
+	struct drm_panel base;
+	struct mipi_dsi_device *dsi;
+
+	struct backlight_device *backlight;
+	struct regulator *supply;
+
+	bool prepared;
+	bool enabled;
+
+	const struct drm_display_mode *default_mode;
+};
+
+static inline struct osd101t2587_panel *ti_osd_panel(struct drm_panel *panel)
+{
+	return container_of(panel, struct osd101t2587_panel, base);
+}
+
+static int osd101t2587_panel_disable(struct drm_panel *panel)
+{
+	struct osd101t2587_panel *osd101t2587 = ti_osd_panel(panel);
+	int ret;
+
+	if (!osd101t2587->enabled)
+		return 0;
+
+	backlight_disable(osd101t2587->backlight);
+
+	ret = mipi_dsi_shutdown_peripheral(osd101t2587->dsi);
+
+	osd101t2587->enabled = false;
+
+	return ret;
+}
+
+static int osd101t2587_panel_unprepare(struct drm_panel *panel)
+{
+	struct osd101t2587_panel *osd101t2587 = ti_osd_panel(panel);
+
+	if (!osd101t2587->prepared)
+		return 0;
+
+	regulator_disable(osd101t2587->supply);
+	osd101t2587->prepared = false;
+
+	return 0;
+}
+
+static int osd101t2587_panel_prepare(struct drm_panel *panel)
+{
+	struct osd101t2587_panel *osd101t2587 = ti_osd_panel(panel);
+	int ret;
+
+	if (osd101t2587->prepared)
+		return 0;
+
+	ret = regulator_enable(osd101t2587->supply);
+	if (!ret)
+		osd101t2587->prepared = true;
+
+	return ret;
+}
+
+static int osd101t2587_panel_enable(struct drm_panel *panel)
+{
+	struct osd101t2587_panel *osd101t2587 = ti_osd_panel(panel);
+	int ret;
+
+	if (osd101t2587->enabled)
+		return 0;
+
+	ret = mipi_dsi_turn_on_peripheral(osd101t2587->dsi);
+	if (ret)
+		return ret;
+
+	backlight_enable(osd101t2587->backlight);
+
+	osd101t2587->enabled = true;
+
+	return ret;
+}
+
+static const struct drm_display_mode default_mode_osd101t2587 = {
+	.clock = 164400,
+	.hdisplay = 1920,
+	.hsync_start = 1920 + 152,
+	.hsync_end = 1920 + 152 + 52,
+	.htotal = 1920 + 152 + 52 + 20,
+	.vdisplay = 1200,
+	.vsync_start = 1200 + 24,
+	.vsync_end = 1200 + 24 + 6,
+	.vtotal = 1200 + 24 + 6 + 48,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+};
+
+static int osd101t2587_panel_get_modes(struct drm_panel *panel)
+{
+	struct osd101t2587_panel *osd101t2587 = ti_osd_panel(panel);
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(panel->drm, osd101t2587->default_mode);
+	if (!mode) {
+		dev_err(panel->drm->dev, "failed to add mode %ux%ux@%u\n",
+			osd101t2587->default_mode->hdisplay,
+			osd101t2587->default_mode->vdisplay,
+			osd101t2587->default_mode->vrefresh);
+		return -ENOMEM;
+	}
+
+	drm_mode_set_name(mode);
+
+	drm_mode_probed_add(panel->connector, mode);
+
+	panel->connector->display_info.width_mm = 217;
+	panel->connector->display_info.height_mm = 136;
+
+	return 1;
+}
+
+static const struct drm_panel_funcs osd101t2587_panel_funcs = {
+	.disable = osd101t2587_panel_disable,
+	.unprepare = osd101t2587_panel_unprepare,
+	.prepare = osd101t2587_panel_prepare,
+	.enable = osd101t2587_panel_enable,
+	.get_modes = osd101t2587_panel_get_modes,
+};
+
+static const struct of_device_id osd101t2587_of_match[] = {
+	{
+		.compatible = "osddisplays,osd101t2587-53ts",
+		.data = &default_mode_osd101t2587,
+	}, {
+		/* sentinel */
+	}
+};
+MODULE_DEVICE_TABLE(of, osd101t2587_of_match);
+
+static int osd101t2587_panel_add(struct osd101t2587_panel *osd101t2587)
+{
+	struct device *dev = &osd101t2587->dsi->dev;
+
+	osd101t2587->supply = devm_regulator_get(dev, "power");
+	if (IS_ERR(osd101t2587->supply))
+		return PTR_ERR(osd101t2587->supply);
+
+	osd101t2587->backlight = devm_of_find_backlight(dev);
+	if (IS_ERR(osd101t2587->backlight))
+		return PTR_ERR(osd101t2587->backlight);
+
+	drm_panel_init(&osd101t2587->base);
+	osd101t2587->base.funcs = &osd101t2587_panel_funcs;
+	osd101t2587->base.dev = &osd101t2587->dsi->dev;
+
+	return drm_panel_add(&osd101t2587->base);
+}
+
+static int osd101t2587_panel_probe(struct mipi_dsi_device *dsi)
+{
+	struct osd101t2587_panel *osd101t2587;
+	const struct of_device_id *id;
+	int ret;
+
+	id = of_match_node(osd101t2587_of_match, dsi->dev.of_node);
+	if (!id)
+		return -ENODEV;
+
+	dsi->lanes = 4;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO |
+			  MIPI_DSI_MODE_VIDEO_BURST |
+			  MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
+			  MIPI_DSI_MODE_EOT_PACKET;
+
+	osd101t2587 = devm_kzalloc(&dsi->dev, sizeof(*osd101t2587), GFP_KERNEL);
+	if (!osd101t2587)
+		return -ENOMEM;
+
+	mipi_dsi_set_drvdata(dsi, osd101t2587);
+
+	osd101t2587->dsi = dsi;
+	osd101t2587->default_mode = id->data;
+
+	ret = osd101t2587_panel_add(osd101t2587);
+	if (ret < 0)
+		return ret;
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret)
+		drm_panel_remove(&osd101t2587->base);
+
+	return ret;
+}
+
+static int osd101t2587_panel_remove(struct mipi_dsi_device *dsi)
+{
+	struct osd101t2587_panel *osd101t2587 = mipi_dsi_get_drvdata(dsi);
+	int ret;
+
+	ret = osd101t2587_panel_disable(&osd101t2587->base);
+	if (ret < 0)
+		dev_warn(&dsi->dev, "failed to disable panel: %d\n", ret);
+
+	osd101t2587_panel_unprepare(&osd101t2587->base);
+
+	drm_panel_remove(&osd101t2587->base);
+
+	ret = mipi_dsi_detach(dsi);
+	if (ret < 0)
+		dev_err(&dsi->dev, "failed to detach from DSI host: %d\n", ret);
+
+	return ret;
+}
+
+static void osd101t2587_panel_shutdown(struct mipi_dsi_device *dsi)
+{
+	struct osd101t2587_panel *osd101t2587 = mipi_dsi_get_drvdata(dsi);
+
+	osd101t2587_panel_disable(&osd101t2587->base);
+	osd101t2587_panel_unprepare(&osd101t2587->base);
+}
+
+static struct mipi_dsi_driver osd101t2587_panel_driver = {
+	.driver = {
+		.name = "panel-osd-osd101t2587-53ts",
+		.of_match_table = osd101t2587_of_match,
+	},
+	.probe = osd101t2587_panel_probe,
+	.remove = osd101t2587_panel_remove,
+	.shutdown = osd101t2587_panel_shutdown,
+};
+module_mipi_dsi_driver(osd101t2587_panel_driver);
+
+MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@ti.com>");
+MODULE_DESCRIPTION("OSD101T2587-53TS DSI panel");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
index 2c9c9722734f..1b708c85fd27 100644
--- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
+++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
@@ -57,7 +57,6 @@
 #include <drm/drmP.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_mipi_dsi.h>
-#include <drm/drm_panel.h>
 
 #define RPI_DSI_DRIVER_NAME "rpi-ts-dsi"
 
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c
new file mode 100644
index 000000000000..142d395ea512
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * S6E63M0 AMOLED LCD drm_panel driver.
+ *
+ * Copyright (C) 2019 Paweł Chmiel <pawel.mikolaj.chmiel@gmail.com>
+ * Derived from drivers/gpu/drm/panel-samsung-ld9040.c
+ *
+ * Andrzej Hajda <a.hajda@samsung.com>
+ */
+
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+
+#include <linux/backlight.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spi/spi.h>
+
+#include <video/mipi_display.h>
+
+/* Manufacturer Command Set */
+#define MCS_ELVSS_ON                0xb1
+#define MCS_MIECTL1                0xc0
+#define MCS_BCMODE                              0xc1
+#define MCS_DISCTL   0xf2
+#define MCS_SRCCTL           0xf6
+#define MCS_IFCTL                       0xf7
+#define MCS_PANELCTL         0xF8
+#define MCS_PGAMMACTL                   0xfa
+
+#define NUM_GAMMA_LEVELS             11
+#define GAMMA_TABLE_COUNT           23
+
+#define DATA_MASK                                       0x100
+
+#define MAX_BRIGHTNESS              (NUM_GAMMA_LEVELS - 1)
+
+/* array of gamma tables for gamma value 2.2 */
+static u8 const s6e63m0_gamma_22[NUM_GAMMA_LEVELS][GAMMA_TABLE_COUNT] = {
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x78, 0xEC, 0x3D, 0xC8,
+	  0xC2, 0xB6, 0xC4, 0xC7, 0xB6, 0xD5, 0xD7,
+	  0xCC, 0x00, 0x39, 0x00, 0x36, 0x00, 0x51 },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x73, 0x4A, 0x3D, 0xC0,
+	  0xC2, 0xB1, 0xBB, 0xBE, 0xAC, 0xCE, 0xCF,
+	  0xC5, 0x00, 0x5D, 0x00, 0x5E, 0x00, 0x82 },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x70, 0x51, 0x3E, 0xBF,
+	  0xC1, 0xAF, 0xB9, 0xBC, 0xAB, 0xCC, 0xCC,
+	  0xC2, 0x00, 0x65, 0x00, 0x67, 0x00, 0x8D },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x6C, 0x54, 0x3A, 0xBC,
+	  0xBF, 0xAC, 0xB7, 0xBB, 0xA9, 0xC9, 0xC9,
+	  0xBE, 0x00, 0x71, 0x00, 0x73, 0x00, 0x9E },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x69, 0x54, 0x37, 0xBB,
+	  0xBE, 0xAC, 0xB4, 0xB7, 0xA6, 0xC7, 0xC8,
+	  0xBC, 0x00, 0x7B, 0x00, 0x7E, 0x00, 0xAB },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x66, 0x55, 0x34, 0xBA,
+	  0xBD, 0xAB, 0xB1, 0xB5, 0xA3, 0xC5, 0xC6,
+	  0xB9, 0x00, 0x85, 0x00, 0x88, 0x00, 0xBA },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x63, 0x53, 0x31, 0xB8,
+	  0xBC, 0xA9, 0xB0, 0xB5, 0xA2, 0xC4, 0xC4,
+	  0xB8, 0x00, 0x8B, 0x00, 0x8E, 0x00, 0xC2 },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x62, 0x54, 0x30, 0xB9,
+	  0xBB, 0xA9, 0xB0, 0xB3, 0xA1, 0xC1, 0xC3,
+	  0xB7, 0x00, 0x91, 0x00, 0x95, 0x00, 0xDA },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x66, 0x58, 0x34, 0xB6,
+	  0xBA, 0xA7, 0xAF, 0xB3, 0xA0, 0xC1, 0xC2,
+	  0xB7, 0x00, 0x97, 0x00, 0x9A, 0x00, 0xD1 },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x64, 0x56, 0x33, 0xB6,
+	  0xBA, 0xA8, 0xAC, 0xB1, 0x9D, 0xC1, 0xC1,
+	  0xB7, 0x00, 0x9C, 0x00, 0x9F, 0x00, 0xD6 },
+	{ MCS_PGAMMACTL, 0x00,
+	  0x18, 0x08, 0x24, 0x5f, 0x50, 0x2d, 0xB6,
+	  0xB9, 0xA7, 0xAd, 0xB1, 0x9f, 0xbe, 0xC0,
+	  0xB5, 0x00, 0xa0, 0x00, 0xa4, 0x00, 0xdb },
+};
+
+struct s6e63m0 {
+	struct device *dev;
+	struct drm_panel panel;
+	struct backlight_device *bl_dev;
+
+	struct regulator_bulk_data supplies[2];
+	struct gpio_desc *reset_gpio;
+
+	bool prepared;
+	bool enabled;
+
+	/*
+	 * This field is tested by functions directly accessing bus before
+	 * transfer, transfer is skipped if it is set. In case of transfer
+	 * failure or unexpected response the field is set to error value.
+	 * Such construct allows to eliminate many checks in higher level
+	 * functions.
+	 */
+	int error;
+};
+
+static const struct drm_display_mode default_mode = {
+	.clock		= 25628,
+	.hdisplay	= 480,
+	.hsync_start	= 480 + 16,
+	.hsync_end	= 480 + 16 + 2,
+	.htotal		= 480 + 16 + 2 + 16,
+	.vdisplay	= 800,
+	.vsync_start	= 800 + 28,
+	.vsync_end	= 800 + 28 + 2,
+	.vtotal		= 800 + 28 + 2 + 1,
+	.vrefresh	= 60,
+	.width_mm	= 53,
+	.height_mm	= 89,
+	.flags		= DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static inline struct s6e63m0 *panel_to_s6e63m0(struct drm_panel *panel)
+{
+	return container_of(panel, struct s6e63m0, panel);
+}
+
+static int s6e63m0_clear_error(struct s6e63m0 *ctx)
+{
+	int ret = ctx->error;
+
+	ctx->error = 0;
+	return ret;
+}
+
+static int s6e63m0_spi_write_word(struct s6e63m0 *ctx, u16 data)
+{
+	struct spi_device *spi = to_spi_device(ctx->dev);
+	struct spi_transfer xfer = {
+		.len	= 2,
+		.tx_buf = &data,
+	};
+	struct spi_message msg;
+
+	spi_message_init(&msg);
+	spi_message_add_tail(&xfer, &msg);
+
+	return spi_sync(spi, &msg);
+}
+
+static void s6e63m0_dcs_write(struct s6e63m0 *ctx, const u8 *data, size_t len)
+{
+	int ret = 0;
+
+	if (ctx->error < 0 || len == 0)
+		return;
+
+	DRM_DEV_DEBUG(ctx->dev, "writing dcs seq: %*ph\n", (int)len, data);
+	ret = s6e63m0_spi_write_word(ctx, *data);
+
+	while (!ret && --len) {
+		++data;
+		ret = s6e63m0_spi_write_word(ctx, *data | DATA_MASK);
+	}
+
+	if (ret) {
+		DRM_DEV_ERROR(ctx->dev, "error %d writing dcs seq: %*ph\n", ret,
+			      (int)len, data);
+		ctx->error = ret;
+	}
+
+	usleep_range(300, 310);
+}
+
+#define s6e63m0_dcs_write_seq_static(ctx, seq ...) \
+	({ \
+		static const u8 d[] = { seq }; \
+		s6e63m0_dcs_write(ctx, d, ARRAY_SIZE(d)); \
+	})
+
+static void s6e63m0_init(struct s6e63m0 *ctx)
+{
+	s6e63m0_dcs_write_seq_static(ctx, MCS_PANELCTL,
+				     0x01, 0x27, 0x27, 0x07, 0x07, 0x54, 0x9f,
+				     0x63, 0x86, 0x1a, 0x33, 0x0d, 0x00, 0x00);
+
+	s6e63m0_dcs_write_seq_static(ctx, MCS_DISCTL,
+				     0x02, 0x03, 0x1c, 0x10, 0x10);
+	s6e63m0_dcs_write_seq_static(ctx, MCS_IFCTL,
+				     0x03, 0x00, 0x00);
+
+	s6e63m0_dcs_write_seq_static(ctx, MCS_PGAMMACTL,
+				     0x00, 0x18, 0x08, 0x24, 0x64, 0x56, 0x33,
+				     0xb6, 0xba, 0xa8, 0xac, 0xb1, 0x9d, 0xc1,
+				     0xc1, 0xb7, 0x00, 0x9c, 0x00, 0x9f, 0x00,
+				     0xd6);
+	s6e63m0_dcs_write_seq_static(ctx, MCS_PGAMMACTL,
+				     0x01);
+
+	s6e63m0_dcs_write_seq_static(ctx, MCS_SRCCTL,
+				     0x00, 0x8c, 0x07);
+	s6e63m0_dcs_write_seq_static(ctx, 0xb3,
+				     0xc);
+
+	s6e63m0_dcs_write_seq_static(ctx, 0xb5,
+				     0x2c, 0x12, 0x0c, 0x0a, 0x10, 0x0e, 0x17,
+				     0x13, 0x1f, 0x1a, 0x2a, 0x24, 0x1f, 0x1b,
+				     0x1a, 0x17, 0x2b, 0x26, 0x22, 0x20, 0x3a,
+				     0x34, 0x30, 0x2c, 0x29, 0x26, 0x25, 0x23,
+				     0x21, 0x20, 0x1e, 0x1e);
+
+	s6e63m0_dcs_write_seq_static(ctx, 0xb6,
+				     0x00, 0x00, 0x11, 0x22, 0x33, 0x44, 0x44,
+				     0x44, 0x55, 0x55, 0x66, 0x66, 0x66, 0x66,
+				     0x66, 0x66);
+
+	s6e63m0_dcs_write_seq_static(ctx, 0xb7,
+				     0x2c, 0x12, 0x0c, 0x0a, 0x10, 0x0e, 0x17,
+				     0x13, 0x1f, 0x1a, 0x2a, 0x24, 0x1f, 0x1b,
+				     0x1a, 0x17, 0x2b, 0x26, 0x22, 0x20, 0x3a,
+				     0x34, 0x30, 0x2c, 0x29, 0x26, 0x25, 0x23,
+				     0x21, 0x20, 0x1e, 0x1e, 0x00, 0x00, 0x11,
+				     0x22, 0x33, 0x44, 0x44, 0x44, 0x55, 0x55,
+				     0x66, 0x66, 0x66, 0x66, 0x66, 0x66);
+
+	s6e63m0_dcs_write_seq_static(ctx, 0xb9,
+				     0x2c, 0x12, 0x0c, 0x0a, 0x10, 0x0e, 0x17,
+				     0x13, 0x1f, 0x1a, 0x2a, 0x24, 0x1f, 0x1b,
+				     0x1a, 0x17, 0x2b, 0x26, 0x22, 0x20, 0x3a,
+				     0x34, 0x30, 0x2c, 0x29, 0x26, 0x25, 0x23,
+				     0x21, 0x20, 0x1e, 0x1e);
+
+	s6e63m0_dcs_write_seq_static(ctx, 0xba,
+				     0x00, 0x00, 0x11, 0x22, 0x33, 0x44, 0x44,
+				     0x44, 0x55, 0x55, 0x66, 0x66, 0x66, 0x66,
+				     0x66, 0x66);
+
+	s6e63m0_dcs_write_seq_static(ctx, MCS_BCMODE,
+				     0x4d, 0x96, 0x1d, 0x00, 0x00, 0x01, 0xdf,
+				     0x00, 0x00, 0x03, 0x1f, 0x00, 0x00, 0x00,
+				     0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x06,
+				     0x09, 0x0d, 0x0f, 0x12, 0x15, 0x18);
+
+	s6e63m0_dcs_write_seq_static(ctx, 0xb2,
+				     0x10, 0x10, 0x0b, 0x05);
+
+	s6e63m0_dcs_write_seq_static(ctx, MCS_MIECTL1,
+				     0x01);
+
+	s6e63m0_dcs_write_seq_static(ctx, MCS_ELVSS_ON,
+				     0x0b);
+
+	s6e63m0_dcs_write_seq_static(ctx, MIPI_DCS_EXIT_SLEEP_MODE);
+}
+
+static int s6e63m0_power_on(struct s6e63m0 *ctx)
+{
+	int ret;
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(ctx->supplies), ctx->supplies);
+	if (ret < 0)
+		return ret;
+
+	msleep(25);
+
+	gpiod_set_value(ctx->reset_gpio, 0);
+	msleep(120);
+
+	return 0;
+}
+
+static int s6e63m0_power_off(struct s6e63m0 *ctx)
+{
+	int ret;
+
+	gpiod_set_value(ctx->reset_gpio, 1);
+	msleep(120);
+
+	ret = regulator_bulk_disable(ARRAY_SIZE(ctx->supplies), ctx->supplies);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int s6e63m0_disable(struct drm_panel *panel)
+{
+	struct s6e63m0 *ctx = panel_to_s6e63m0(panel);
+
+	if (!ctx->enabled)
+		return 0;
+
+	backlight_disable(ctx->bl_dev);
+
+	s6e63m0_dcs_write_seq_static(ctx, MIPI_DCS_ENTER_SLEEP_MODE);
+	msleep(200);
+
+	ctx->enabled = false;
+
+	return 0;
+}
+
+static int s6e63m0_unprepare(struct drm_panel *panel)
+{
+	struct s6e63m0 *ctx = panel_to_s6e63m0(panel);
+	int ret;
+
+	if (!ctx->prepared)
+		return 0;
+
+	s6e63m0_clear_error(ctx);
+
+	ret = s6e63m0_power_off(ctx);
+	if (ret < 0)
+		return ret;
+
+	ctx->prepared = false;
+
+	return 0;
+}
+
+static int s6e63m0_prepare(struct drm_panel *panel)
+{
+	struct s6e63m0 *ctx = panel_to_s6e63m0(panel);
+	int ret;
+
+	if (ctx->prepared)
+		return 0;
+
+	ret = s6e63m0_power_on(ctx);
+	if (ret < 0)
+		return ret;
+
+	s6e63m0_init(ctx);
+
+	ret = s6e63m0_clear_error(ctx);
+
+	if (ret < 0)
+		s6e63m0_unprepare(panel);
+
+	ctx->prepared = true;
+
+	return ret;
+}
+
+static int s6e63m0_enable(struct drm_panel *panel)
+{
+	struct s6e63m0 *ctx = panel_to_s6e63m0(panel);
+
+	if (ctx->enabled)
+		return 0;
+
+	s6e63m0_dcs_write_seq_static(ctx, MIPI_DCS_SET_DISPLAY_ON);
+
+	backlight_enable(ctx->bl_dev);
+
+	ctx->enabled = true;
+
+	return 0;
+}
+
+static int s6e63m0_get_modes(struct drm_panel *panel)
+{
+	struct drm_connector *connector = panel->connector;
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(panel->drm, &default_mode);
+	if (!mode) {
+		DRM_ERROR("failed to add mode %ux%ux@%u\n",
+			  default_mode.hdisplay, default_mode.vdisplay,
+			  default_mode.vrefresh);
+		return -ENOMEM;
+	}
+
+	drm_mode_set_name(mode);
+
+	mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+	drm_mode_probed_add(connector, mode);
+
+	return 1;
+}
+
+static const struct drm_panel_funcs s6e63m0_drm_funcs = {
+	.disable	= s6e63m0_disable,
+	.unprepare	= s6e63m0_unprepare,
+	.prepare	= s6e63m0_prepare,
+	.enable		= s6e63m0_enable,
+	.get_modes	= s6e63m0_get_modes,
+};
+
+static int s6e63m0_set_brightness(struct backlight_device *bd)
+{
+	struct s6e63m0 *ctx = bl_get_data(bd);
+
+	int brightness = bd->props.brightness;
+
+	/* disable and set new gamma */
+	s6e63m0_dcs_write(ctx, s6e63m0_gamma_22[brightness],
+			  ARRAY_SIZE(s6e63m0_gamma_22[brightness]));
+
+	/* update gamma table. */
+	s6e63m0_dcs_write_seq_static(ctx, MCS_PGAMMACTL, 0x01);
+
+	return s6e63m0_clear_error(ctx);
+}
+
+static const struct backlight_ops s6e63m0_backlight_ops = {
+	.update_status	= s6e63m0_set_brightness,
+};
+
+static int s6e63m0_backlight_register(struct s6e63m0 *ctx)
+{
+	struct backlight_properties props = {
+		.type		= BACKLIGHT_RAW,
+		.brightness	= MAX_BRIGHTNESS,
+		.max_brightness = MAX_BRIGHTNESS
+	};
+	struct device *dev = ctx->dev;
+	int ret = 0;
+
+	ctx->bl_dev = devm_backlight_device_register(dev, "panel", dev, ctx,
+						     &s6e63m0_backlight_ops,
+						     &props);
+	if (IS_ERR(ctx->bl_dev)) {
+		ret = PTR_ERR(ctx->bl_dev);
+		DRM_DEV_ERROR(dev, "error registering backlight device (%d)\n",
+			      ret);
+	}
+
+	return ret;
+}
+
+static int s6e63m0_probe(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct s6e63m0 *ctx;
+	int ret;
+
+	ctx = devm_kzalloc(dev, sizeof(struct s6e63m0), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	spi_set_drvdata(spi, ctx);
+
+	ctx->dev = dev;
+	ctx->enabled = false;
+	ctx->prepared = false;
+
+	ctx->supplies[0].supply = "vdd3";
+	ctx->supplies[1].supply = "vci";
+	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ctx->supplies),
+				      ctx->supplies);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev, "failed to get regulators: %d\n", ret);
+		return ret;
+	}
+
+	ctx->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(ctx->reset_gpio)) {
+		DRM_DEV_ERROR(dev, "cannot get reset-gpios %ld\n",
+			      PTR_ERR(ctx->reset_gpio));
+		return PTR_ERR(ctx->reset_gpio);
+	}
+
+	spi->bits_per_word = 9;
+	spi->mode = SPI_MODE_3;
+	ret = spi_setup(spi);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dev, "spi setup failed.\n");
+		return ret;
+	}
+
+	drm_panel_init(&ctx->panel);
+	ctx->panel.dev = dev;
+	ctx->panel.funcs = &s6e63m0_drm_funcs;
+
+	ret = s6e63m0_backlight_register(ctx);
+	if (ret < 0)
+		return ret;
+
+	return drm_panel_add(&ctx->panel);
+}
+
+static int s6e63m0_remove(struct spi_device *spi)
+{
+	struct s6e63m0 *ctx = spi_get_drvdata(spi);
+
+	drm_panel_remove(&ctx->panel);
+
+	return 0;
+}
+
+static const struct of_device_id s6e63m0_of_match[] = {
+	{ .compatible = "samsung,s6e63m0" },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, s6e63m0_of_match);
+
+static struct spi_driver s6e63m0_driver = {
+	.probe			= s6e63m0_probe,
+	.remove			= s6e63m0_remove,
+	.driver			= {
+		.name		= "panel-samsung-s6e63m0",
+		.of_match_table = s6e63m0_of_match,
+	},
+};
+module_spi_driver(s6e63m0_driver);
+
+MODULE_AUTHOR("Paweł Chmiel <pawel.mikolaj.chmiel@gmail.com>");
+MODULE_DESCRIPTION("s6e63m0 LCD Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 569be4efd8d1..c22c4719cd2c 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -1096,6 +1096,56 @@ static const struct panel_desc dlc_dlc1010gig = {
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
 };
 
+static const struct drm_display_mode edt_et035012dm6_mode = {
+	.clock = 6500,
+	.hdisplay = 320,
+	.hsync_start = 320 + 20,
+	.hsync_end = 320 + 20 + 30,
+	.htotal = 320 + 20 + 68,
+	.vdisplay = 240,
+	.vsync_start = 240 + 4,
+	.vsync_end = 240 + 4 + 4,
+	.vtotal = 240 + 4 + 4 + 14,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static const struct panel_desc edt_et035012dm6 = {
+	.modes = &edt_et035012dm6_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 70,
+		.height = 52,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+	.bus_flags = DRM_BUS_FLAG_DE_LOW | DRM_BUS_FLAG_PIXDATA_NEGEDGE,
+};
+
+static const struct drm_display_mode edt_etm0430g0dh6_mode = {
+	.clock = 9000,
+	.hdisplay = 480,
+	.hsync_start = 480 + 2,
+	.hsync_end = 480 + 2 + 41,
+	.htotal = 480 + 2 + 41 + 2,
+	.vdisplay = 272,
+	.vsync_start = 272 + 2,
+	.vsync_end = 272 + 2 + 10,
+	.vtotal = 272 + 2 + 10 + 2,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+};
+
+static const struct panel_desc edt_etm0430g0dh6 = {
+	.modes = &edt_etm0430g0dh6_mode,
+	.num_modes = 1,
+	.bpc = 6,
+	.size = {
+		.width = 95,
+		.height = 54,
+	},
+};
+
 static const struct drm_display_mode edt_et057090dhu_mode = {
 	.clock = 25175,
 	.hdisplay = 640,
@@ -1160,6 +1210,33 @@ static const struct panel_desc edt_etm0700g0bdh6 = {
 	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE,
 };
 
+static const struct display_timing evervision_vgg804821_timing = {
+	.pixelclock = { 27600000, 33300000, 50000000 },
+	.hactive = { 800, 800, 800 },
+	.hfront_porch = { 40, 66, 70 },
+	.hback_porch = { 40, 67, 70 },
+	.hsync_len = { 40, 67, 70 },
+	.vactive = { 480, 480, 480 },
+	.vfront_porch = { 6, 10, 10 },
+	.vback_porch = { 7, 11, 11 },
+	.vsync_len = { 7, 11, 11 },
+	.flags = DISPLAY_FLAGS_HSYNC_HIGH | DISPLAY_FLAGS_VSYNC_HIGH |
+		 DISPLAY_FLAGS_DE_HIGH | DISPLAY_FLAGS_PIXDATA_NEGEDGE |
+		 DISPLAY_FLAGS_SYNC_NEGEDGE,
+};
+
+static const struct panel_desc evervision_vgg804821 = {
+	.timings = &evervision_vgg804821_timing,
+	.num_timings = 1,
+	.bpc = 8,
+	.size = {
+		.width = 108,
+		.height = 64,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_NEGEDGE,
+};
+
 static const struct drm_display_mode foxlink_fl500wvr00_a0t_mode = {
 	.clock = 32260,
 	.hdisplay = 800,
@@ -1184,6 +1261,29 @@ static const struct panel_desc foxlink_fl500wvr00_a0t = {
 	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
 };
 
+static const struct drm_display_mode friendlyarm_hd702e_mode = {
+	.clock		= 67185,
+	.hdisplay	= 800,
+	.hsync_start	= 800 + 20,
+	.hsync_end	= 800 + 20 + 24,
+	.htotal		= 800 + 20 + 24 + 20,
+	.vdisplay	= 1280,
+	.vsync_start	= 1280 + 4,
+	.vsync_end	= 1280 + 4 + 8,
+	.vtotal		= 1280 + 4 + 8 + 4,
+	.vrefresh	= 60,
+	.flags		= DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static const struct panel_desc friendlyarm_hd702e = {
+	.modes = &friendlyarm_hd702e_mode,
+	.num_modes = 1,
+	.size = {
+		.width	= 94,
+		.height	= 151,
+	},
+};
+
 static const struct drm_display_mode giantplus_gpg482739qs5_mode = {
 	.clock = 9000,
 	.hdisplay = 480,
@@ -2355,6 +2455,31 @@ static const struct panel_desc starry_kr122ea0sra = {
 	},
 };
 
+static const struct drm_display_mode tfc_s9700rtwv43tr_01b_mode = {
+	.clock = 30000,
+	.hdisplay = 800,
+	.hsync_start = 800 + 39,
+	.hsync_end = 800 + 39 + 47,
+	.htotal = 800 + 39 + 47 + 39,
+	.vdisplay = 480,
+	.vsync_start = 480 + 13,
+	.vsync_end = 480 + 13 + 2,
+	.vtotal = 480 + 13 + 2 + 29,
+	.vrefresh = 62,
+};
+
+static const struct panel_desc tfc_s9700rtwv43tr_01b = {
+	.modes = &tfc_s9700rtwv43tr_01b_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 155,
+		.height = 90,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_POSEDGE,
+};
+
 static const struct display_timing tianma_tm070jdhg30_timing = {
 	.pixelclock = { 62600000, 68200000, 78100000 },
 	.hactive = { 1280, 1280, 1280 },
@@ -2508,6 +2633,32 @@ static const struct panel_desc urt_umsh_8596md_parallel = {
 	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
 };
 
+static const struct drm_display_mode vl050_8048nt_c01_mode = {
+	.clock = 33333,
+	.hdisplay = 800,
+	.hsync_start = 800 + 210,
+	.hsync_end = 800 + 210 + 20,
+	.htotal = 800 + 210 + 20 + 46,
+	.vdisplay =  480,
+	.vsync_start = 480 + 22,
+	.vsync_end = 480 + 22 + 10,
+	.vtotal = 480 + 22 + 10 + 23,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+};
+
+static const struct panel_desc vl050_8048nt_c01 = {
+	.modes = &vl050_8048nt_c01_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 120,
+		.height = 76,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_POSEDGE,
+};
+
 static const struct drm_display_mode winstar_wf35ltiacd_mode = {
 	.clock = 6410,
 	.hdisplay = 320,
@@ -2646,6 +2797,12 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "dlc,dlc1010gig",
 		.data = &dlc_dlc1010gig,
 	}, {
+		.compatible = "edt,et035012dm6",
+		.data = &edt_et035012dm6,
+	}, {
+		.compatible = "edt,etm0430g0dh6",
+		.data = &edt_etm0430g0dh6,
+	}, {
 		.compatible = "edt,et057090dhu",
 		.data = &edt_et057090dhu,
 	}, {
@@ -2661,9 +2818,15 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "edt,etm0700g0edh6",
 		.data = &edt_etm0700g0bdh6,
 	}, {
+		.compatible = "evervision,vgg804821",
+		.data = &evervision_vgg804821,
+	}, {
 		.compatible = "foxlink,fl500wvr00-a0t",
 		.data = &foxlink_fl500wvr00_a0t,
 	}, {
+		.compatible = "friendlyarm,hd702e",
+		.data = &friendlyarm_hd702e,
+	}, {
 		.compatible = "giantplus,gpg482739qs5",
 		.data = &giantplus_gpg482739qs5
 	}, {
@@ -2802,6 +2965,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "starry,kr122ea0sra",
 		.data = &starry_kr122ea0sra,
 	}, {
+		.compatible = "tfc,s9700rtwv43tr-01b",
+		.data = &tfc_s9700rtwv43tr_01b,
+	}, {
 		.compatible = "tianma,tm070jdhg30",
 		.data = &tianma_tm070jdhg30,
 	}, {
@@ -2835,6 +3001,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "urt,umsh-8596md-20t",
 		.data = &urt_umsh_8596md_parallel,
 	}, {
+		.compatible = "vxt,vl050-8048nt-c01",
+		.data = &vl050_8048nt_c01,
+	}, {
 		.compatible = "winstar,wf35ltiacd",
 		.data = &winstar_wf35ltiacd,
 	}, {
@@ -3053,6 +3222,37 @@ static const struct panel_desc_dsi lg_acx467akm_7 = {
 	.lanes = 4,
 };
 
+static const struct drm_display_mode osd101t2045_53ts_mode = {
+	.clock = 154500,
+	.hdisplay = 1920,
+	.hsync_start = 1920 + 112,
+	.hsync_end = 1920 + 112 + 16,
+	.htotal = 1920 + 112 + 16 + 32,
+	.vdisplay = 1200,
+	.vsync_start = 1200 + 16,
+	.vsync_end = 1200 + 16 + 2,
+	.vtotal = 1200 + 16 + 2 + 16,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+};
+
+static const struct panel_desc_dsi osd101t2045_53ts = {
+	.desc = {
+		.modes = &osd101t2045_53ts_mode,
+		.num_modes = 1,
+		.bpc = 8,
+		.size = {
+			.width = 217,
+			.height = 136,
+		},
+	},
+	.flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
+		 MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
+		 MIPI_DSI_MODE_EOT_PACKET,
+	.format = MIPI_DSI_FMT_RGB888,
+	.lanes = 4,
+};
+
 static const struct of_device_id dsi_of_match[] = {
 	{
 		.compatible = "auo,b080uan01",
@@ -3073,6 +3273,9 @@ static const struct of_device_id dsi_of_match[] = {
 		.compatible = "lg,acx467akm-7",
 		.data = &lg_acx467akm_7
 	}, {
+		.compatible = "osddisplays,osd101t2045-53ts",
+		.data = &osd101t2045_53ts
+	}, {
 		/* sentinel */
 	}
 };
@@ -3098,7 +3301,14 @@ static int panel_simple_dsi_probe(struct mipi_dsi_device *dsi)
 	dsi->format = desc->format;
 	dsi->lanes = desc->lanes;
 
-	return mipi_dsi_attach(dsi);
+	err = mipi_dsi_attach(dsi);
+	if (err) {
+		struct panel_simple *panel = dev_get_drvdata(&dsi->dev);
+
+		drm_panel_remove(&panel->base);
+	}
+
+	return err;
 }
 
 static int panel_simple_dsi_remove(struct mipi_dsi_device *dsi)
diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
index a5716c8fe8b3..9bb9260d9181 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -387,7 +387,7 @@ static void panfrost_job_timedout(struct drm_sched_job *sched_job)
 	mutex_lock(&pfdev->reset_lock);
 
 	for (i = 0; i < NUM_JOB_SLOTS; i++)
-		drm_sched_stop(&pfdev->js->queue[i].sched);
+		drm_sched_stop(&pfdev->js->queue[i].sched, sched_job);
 
 	if (sched_job)
 		drm_sched_increase_karma(sched_job);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 20a9c296d027..e590fa0bb02b 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -1222,17 +1222,6 @@ static void vop_crtc_destroy(struct drm_crtc *crtc)
 	drm_crtc_cleanup(crtc);
 }
 
-static void vop_crtc_reset(struct drm_crtc *crtc)
-{
-	if (crtc->state)
-		__drm_atomic_helper_crtc_destroy_state(crtc->state);
-	kfree(crtc->state);
-
-	crtc->state = kzalloc(sizeof(struct rockchip_crtc_state), GFP_KERNEL);
-	if (crtc->state)
-		crtc->state->crtc = crtc;
-}
-
 static struct drm_crtc_state *vop_crtc_duplicate_state(struct drm_crtc *crtc)
 {
 	struct rockchip_crtc_state *rockchip_state;
@@ -1254,6 +1243,17 @@ static void vop_crtc_destroy_state(struct drm_crtc *crtc,
 	kfree(s);
 }
 
+static void vop_crtc_reset(struct drm_crtc *crtc)
+{
+	struct rockchip_crtc_state *crtc_state =
+		kzalloc(sizeof(*crtc_state), GFP_KERNEL);
+
+	if (crtc->state)
+		vop_crtc_destroy_state(crtc, crtc->state);
+
+	__drm_atomic_helper_crtc_reset(crtc, &crtc_state->base);
+}
+
 #ifdef CONFIG_DRM_ANALOGIX_DP
 static struct drm_connector *vop_get_edp_connector(struct vop *vop)
 {
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 19fc601c9eeb..f8f0e1c19002 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -265,32 +265,6 @@ void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
 }
 EXPORT_SYMBOL(drm_sched_resume_timeout);
 
-/* job_finish is called after hw fence signaled
- */
-static void drm_sched_job_finish(struct work_struct *work)
-{
-	struct drm_sched_job *s_job = container_of(work, struct drm_sched_job,
-						   finish_work);
-	struct drm_gpu_scheduler *sched = s_job->sched;
-	unsigned long flags;
-
-	/*
-	 * Canceling the timeout without removing our job from the ring mirror
-	 * list is safe, as we will only end up in this worker if our jobs
-	 * finished fence has been signaled. So even if some another worker
-	 * manages to find this job as the next job in the list, the fence
-	 * signaled check below will prevent the timeout to be restarted.
-	 */
-	cancel_delayed_work_sync(&sched->work_tdr);
-
-	spin_lock_irqsave(&sched->job_list_lock, flags);
-	/* queue TDR for next job */
-	drm_sched_start_timeout(sched);
-	spin_unlock_irqrestore(&sched->job_list_lock, flags);
-
-	sched->ops->free_job(s_job);
-}
-
 static void drm_sched_job_begin(struct drm_sched_job *s_job)
 {
 	struct drm_gpu_scheduler *sched = s_job->sched;
@@ -315,6 +289,15 @@ static void drm_sched_job_timedout(struct work_struct *work)
 	if (job)
 		job->sched->ops->timedout_job(job);
 
+	/*
+	 * Guilty job did complete and hence needs to be manually removed
+	 * See drm_sched_stop doc.
+	 */
+	if (sched->free_guilty) {
+		job->sched->ops->free_job(job);
+		sched->free_guilty = false;
+	}
+
 	spin_lock_irqsave(&sched->job_list_lock, flags);
 	drm_sched_start_timeout(sched);
 	spin_unlock_irqrestore(&sched->job_list_lock, flags);
@@ -371,40 +354,66 @@ EXPORT_SYMBOL(drm_sched_increase_karma);
  * @sched: scheduler instance
  * @bad: bad scheduler job
  *
+ * Stop the scheduler and also removes and frees all completed jobs.
+ * Note: bad job will not be freed as it might be used later and so it's
+ * callers responsibility to release it manually if it's not part of the
+ * mirror list any more.
+ *
  */
-void drm_sched_stop(struct drm_gpu_scheduler *sched)
+void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
 {
-	struct drm_sched_job *s_job;
+	struct drm_sched_job *s_job, *tmp;
 	unsigned long flags;
-	struct dma_fence *last_fence =  NULL;
 
 	kthread_park(sched->thread);
 
 	/*
-	 * Verify all the signaled jobs in mirror list are removed from the ring
-	 * by waiting for the latest job to enter the list. This should insure that
-	 * also all the previous jobs that were in flight also already singaled
-	 * and removed from the list.
+	 * Iterate the job list from later to  earlier one and either deactive
+	 * their HW callbacks or remove them from mirror list if they already
+	 * signaled.
+	 * This iteration is thread safe as sched thread is stopped.
 	 */
-	spin_lock_irqsave(&sched->job_list_lock, flags);
-	list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
+	list_for_each_entry_safe_reverse(s_job, tmp, &sched->ring_mirror_list, node) {
 		if (s_job->s_fence->parent &&
 		    dma_fence_remove_callback(s_job->s_fence->parent,
 					      &s_job->cb)) {
-			dma_fence_put(s_job->s_fence->parent);
-			s_job->s_fence->parent = NULL;
 			atomic_dec(&sched->hw_rq_count);
 		} else {
-			 last_fence = dma_fence_get(&s_job->s_fence->finished);
-			 break;
+			/*
+			 * remove job from ring_mirror_list.
+			 * Locking here is for concurrent resume timeout
+			 */
+			spin_lock_irqsave(&sched->job_list_lock, flags);
+			list_del_init(&s_job->node);
+			spin_unlock_irqrestore(&sched->job_list_lock, flags);
+
+			/*
+			 * Wait for job's HW fence callback to finish using s_job
+			 * before releasing it.
+			 *
+			 * Job is still alive so fence refcount at least 1
+			 */
+			dma_fence_wait(&s_job->s_fence->finished, false);
+
+			/*
+			 * We must keep bad job alive for later use during
+			 * recovery by some of the drivers but leave a hint
+			 * that the guilty job must be released.
+			 */
+			if (bad != s_job)
+				sched->ops->free_job(s_job);
+			else
+				sched->free_guilty = true;
 		}
 	}
-	spin_unlock_irqrestore(&sched->job_list_lock, flags);
 
-	if (last_fence) {
-		dma_fence_wait(last_fence, false);
-		dma_fence_put(last_fence);
-	}
+	/*
+	 * Stop pending timer in flight as we rearm it in  drm_sched_start. This
+	 * avoids the pending timeout work in progress to fire right away after
+	 * this TDR finished and before the newly restarted jobs had a
+	 * chance to complete.
+	 */
+	cancel_delayed_work(&sched->work_tdr);
 }
 
 EXPORT_SYMBOL(drm_sched_stop);
@@ -418,21 +427,22 @@ EXPORT_SYMBOL(drm_sched_stop);
 void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 {
 	struct drm_sched_job *s_job, *tmp;
+	unsigned long flags;
 	int r;
 
-	if (!full_recovery)
-		goto unpark;
-
 	/*
 	 * Locking the list is not required here as the sched thread is parked
-	 * so no new jobs are being pushed in to HW and in drm_sched_stop we
-	 * flushed all the jobs who were still in mirror list but who already
-	 * signaled and removed them self from the list. Also concurrent
+	 * so no new jobs are being inserted or removed. Also concurrent
 	 * GPU recovers can't run in parallel.
 	 */
 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 		struct dma_fence *fence = s_job->s_fence->parent;
 
+		atomic_inc(&sched->hw_rq_count);
+
+		if (!full_recovery)
+			continue;
+
 		if (fence) {
 			r = dma_fence_add_callback(fence, &s_job->cb,
 						   drm_sched_process_job);
@@ -445,9 +455,12 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
 			drm_sched_process_job(NULL, &s_job->cb);
 	}
 
-	drm_sched_start_timeout(sched);
+	if (full_recovery) {
+		spin_lock_irqsave(&sched->job_list_lock, flags);
+		drm_sched_start_timeout(sched);
+		spin_unlock_irqrestore(&sched->job_list_lock, flags);
+	}
 
-unpark:
 	kthread_unpark(sched->thread);
 }
 EXPORT_SYMBOL(drm_sched_start);
@@ -464,7 +477,6 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
 	uint64_t guilty_context;
 	bool found_guilty = false;
 
-	/*TODO DO we need spinlock here ? */
 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 		struct drm_sched_fence *s_fence = s_job->s_fence;
 
@@ -476,8 +488,8 @@ void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
 		if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
 			dma_fence_set_error(&s_fence->finished, -ECANCELED);
 
+		dma_fence_put(s_job->s_fence->parent);
 		s_job->s_fence->parent = sched->ops->run_job(s_job);
-		atomic_inc(&sched->hw_rq_count);
 	}
 }
 EXPORT_SYMBOL(drm_sched_resubmit_jobs);
@@ -514,7 +526,6 @@ int drm_sched_job_init(struct drm_sched_job *job,
 		return -ENOMEM;
 	job->id = atomic64_inc_return(&sched->job_id_count);
 
-	INIT_WORK(&job->finish_work, drm_sched_job_finish);
 	INIT_LIST_HEAD(&job->node);
 
 	return 0;
@@ -597,24 +608,53 @@ static void drm_sched_process_job(struct dma_fence *f, struct dma_fence_cb *cb)
 	struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
 	struct drm_sched_fence *s_fence = s_job->s_fence;
 	struct drm_gpu_scheduler *sched = s_fence->sched;
-	unsigned long flags;
-
-	cancel_delayed_work(&sched->work_tdr);
 
 	atomic_dec(&sched->hw_rq_count);
 	atomic_dec(&sched->num_jobs);
 
-	spin_lock_irqsave(&sched->job_list_lock, flags);
-	/* remove job from ring_mirror_list */
-	list_del_init(&s_job->node);
-	spin_unlock_irqrestore(&sched->job_list_lock, flags);
+	trace_drm_sched_process_job(s_fence);
 
 	drm_sched_fence_finished(s_fence);
-
-	trace_drm_sched_process_job(s_fence);
 	wake_up_interruptible(&sched->wake_up_worker);
+}
+
+/**
+ * drm_sched_cleanup_jobs - destroy finished jobs
+ *
+ * @sched: scheduler instance
+ *
+ * Remove all finished jobs from the mirror list and destroy them.
+ */
+static void drm_sched_cleanup_jobs(struct drm_gpu_scheduler *sched)
+{
+	unsigned long flags;
+
+	/* Don't destroy jobs while the timeout worker is running */
+	if (!cancel_delayed_work(&sched->work_tdr))
+		return;
+
+
+	while (!list_empty(&sched->ring_mirror_list)) {
+		struct drm_sched_job *job;
+
+		job = list_first_entry(&sched->ring_mirror_list,
+				       struct drm_sched_job, node);
+		if (!dma_fence_is_signaled(&job->s_fence->finished))
+			break;
+
+		spin_lock_irqsave(&sched->job_list_lock, flags);
+		/* remove job from ring_mirror_list */
+		list_del_init(&job->node);
+		spin_unlock_irqrestore(&sched->job_list_lock, flags);
+
+		sched->ops->free_job(job);
+	}
+
+	/* queue timeout for next job */
+	spin_lock_irqsave(&sched->job_list_lock, flags);
+	drm_sched_start_timeout(sched);
+	spin_unlock_irqrestore(&sched->job_list_lock, flags);
 
-	schedule_work(&s_job->finish_work);
 }
 
 /**
@@ -656,9 +696,10 @@ static int drm_sched_main(void *param)
 		struct dma_fence *fence;
 
 		wait_event_interruptible(sched->wake_up_worker,
+					 (drm_sched_cleanup_jobs(sched),
 					 (!drm_sched_blocked(sched) &&
 					  (entity = drm_sched_select_entity(sched))) ||
-					 kthread_should_stop());
+					 kthread_should_stop()));
 
 		if (!entity)
 			continue;
diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c
index 32fd6a3b37fb..97912e2c663d 100644
--- a/drivers/gpu/drm/stm/ltdc.c
+++ b/drivers/gpu/drm/stm/ltdc.c
@@ -232,6 +232,11 @@ static const enum ltdc_pix_fmt ltdc_pix_fmt_a1[NB_PF] = {
 	PF_ARGB4444		/* 0x07 */
 };
 
+static const u64 ltdc_format_modifiers[] = {
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID
+};
+
 static inline u32 reg_read(void __iomem *base, u32 reg)
 {
 	return readl_relaxed(base + reg);
@@ -426,8 +431,8 @@ static void ltdc_crtc_atomic_enable(struct drm_crtc *crtc,
 	/* Enable IRQ */
 	reg_set(ldev->regs, LTDC_IER, IER_RRIE | IER_FUIE | IER_TERRIE);
 
-	/* Immediately commit the planes */
-	reg_set(ldev->regs, LTDC_SRCR, SRCR_IMR);
+	/* Commit shadow registers = update planes at next vblank */
+	reg_set(ldev->regs, LTDC_SRCR, SRCR_VBR);
 
 	/* Enable LTDC */
 	reg_set(ldev->regs, LTDC_GCR, GCR_LTDCEN);
@@ -555,7 +560,7 @@ static void ltdc_crtc_mode_set_nofb(struct drm_crtc *crtc)
 	if (vm.flags & DISPLAY_FLAGS_VSYNC_HIGH)
 		val |= GCR_VSPOL;
 
-	if (vm.flags & DISPLAY_FLAGS_DE_HIGH)
+	if (vm.flags & DISPLAY_FLAGS_DE_LOW)
 		val |= GCR_DEPOL;
 
 	if (vm.flags & DISPLAY_FLAGS_PIXDATA_NEGEDGE)
@@ -822,11 +827,11 @@ static void ltdc_plane_atomic_update(struct drm_plane *plane,
 
 	mutex_lock(&ldev->err_lock);
 	if (ldev->error_status & ISR_FUIF) {
-		DRM_DEBUG_DRIVER("Fifo underrun\n");
+		DRM_WARN("ltdc fifo underrun: please verify display mode\n");
 		ldev->error_status &= ~ISR_FUIF;
 	}
 	if (ldev->error_status & ISR_TERRIF) {
-		DRM_DEBUG_DRIVER("Transfer error\n");
+		DRM_WARN("ltdc transfer error\n");
 		ldev->error_status &= ~ISR_TERRIF;
 	}
 	mutex_unlock(&ldev->err_lock);
@@ -864,6 +869,16 @@ static void ltdc_plane_atomic_print_state(struct drm_printer *p,
 	fpsi->counter = 0;
 }
 
+static bool ltdc_plane_format_mod_supported(struct drm_plane *plane,
+					    u32 format,
+					    u64 modifier)
+{
+	if (modifier == DRM_FORMAT_MOD_LINEAR)
+		return true;
+
+	return false;
+}
+
 static const struct drm_plane_funcs ltdc_plane_funcs = {
 	.update_plane = drm_atomic_helper_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
@@ -872,6 +887,7 @@ static const struct drm_plane_funcs ltdc_plane_funcs = {
 	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
 	.atomic_destroy_state = drm_atomic_helper_plane_destroy_state,
 	.atomic_print_state = ltdc_plane_atomic_print_state,
+	.format_mod_supported = ltdc_plane_format_mod_supported,
 };
 
 static const struct drm_plane_helper_funcs ltdc_plane_helper_funcs = {
@@ -890,6 +906,7 @@ static struct drm_plane *ltdc_plane_create(struct drm_device *ddev,
 	unsigned int i, nb_fmt = 0;
 	u32 formats[NB_PF * 2];
 	u32 drm_fmt, drm_fmt_no_alpha;
+	const u64 *modifiers = ltdc_format_modifiers;
 	int ret;
 
 	/* Get supported pixel formats */
@@ -918,7 +935,7 @@ static struct drm_plane *ltdc_plane_create(struct drm_device *ddev,
 
 	ret = drm_universal_plane_init(ddev, plane, possible_crtcs,
 				       &ltdc_plane_funcs, formats, nb_fmt,
-				       NULL, type, NULL);
+				       modifiers, type, NULL);
 	if (ret < 0)
 		return NULL;
 
@@ -1021,10 +1038,13 @@ static int ltdc_get_caps(struct drm_device *ddev)
 	struct ltdc_device *ldev = ddev->dev_private;
 	u32 bus_width_log2, lcr, gc2r;
 
-	/* at least 1 layer must be managed */
+	/*
+	 * at least 1 layer must be managed & the number of layers
+	 * must not exceed LTDC_MAX_LAYER
+	 */
 	lcr = reg_read(ldev->regs, LTDC_LCR);
 
-	ldev->caps.nb_layers = max_t(int, lcr, 1);
+	ldev->caps.nb_layers = clamp((int)lcr, 1, LTDC_MAX_LAYER);
 
 	/* set data bus width */
 	gc2r = reg_read(ldev->regs, LTDC_GC2R);
@@ -1125,8 +1145,9 @@ int ltdc_load(struct drm_device *ddev)
 
 	ldev->pixel_clk = devm_clk_get(dev, "lcd");
 	if (IS_ERR(ldev->pixel_clk)) {
-		DRM_ERROR("Unable to get lcd clock\n");
-		return -ENODEV;
+		if (PTR_ERR(ldev->pixel_clk) != -EPROBE_DEFER)
+			DRM_ERROR("Unable to get lcd clock\n");
+		return PTR_ERR(ldev->pixel_clk);
 	}
 
 	if (clk_prepare_enable(ldev->pixel_clk)) {
@@ -1134,6 +1155,12 @@ int ltdc_load(struct drm_device *ddev)
 		return -ENODEV;
 	}
 
+	if (!IS_ERR(rstc)) {
+		reset_control_assert(rstc);
+		usleep_range(10, 20);
+		reset_control_deassert(rstc);
+	}
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	ldev->regs = devm_ioremap_resource(dev, res);
 	if (IS_ERR(ldev->regs)) {
@@ -1142,8 +1169,15 @@ int ltdc_load(struct drm_device *ddev)
 		goto err;
 	}
 
+	/* Disable interrupts */
+	reg_clear(ldev->regs, LTDC_IER,
+		  IER_LIE | IER_RRIE | IER_FUIE | IER_TERRIE);
+
 	for (i = 0; i < MAX_IRQ; i++) {
 		irq = platform_get_irq(pdev, i);
+		if (irq == -EPROBE_DEFER)
+			goto err;
+
 		if (irq < 0)
 			continue;
 
@@ -1156,15 +1190,6 @@ int ltdc_load(struct drm_device *ddev)
 		}
 	}
 
-	if (!IS_ERR(rstc)) {
-		reset_control_assert(rstc);
-		usleep_range(10, 20);
-		reset_control_deassert(rstc);
-	}
-
-	/* Disable interrupts */
-	reg_clear(ldev->regs, LTDC_IER,
-		  IER_LIE | IER_RRIE | IER_FUIE | IER_TERRIE);
 
 	ret = ltdc_get_caps(ddev);
 	if (ret) {
@@ -1203,6 +1228,8 @@ int ltdc_load(struct drm_device *ddev)
 		goto err;
 	}
 
+	ddev->mode_config.allow_fb_modifiers = true;
+
 	ret = ltdc_crtc_init(ddev, crtc);
 	if (ret) {
 		DRM_ERROR("Failed to init crtc\n");
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 3ebd9f5e2719..c8c0ab3da972 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -52,22 +52,8 @@ static struct drm_driver sun4i_drv_driver = {
 	.minor			= 0,
 
 	/* GEM Operations */
+	DRM_GEM_CMA_VMAP_DRIVER_OPS,
 	.dumb_create		= drm_sun4i_gem_dumb_create,
-	.gem_free_object_unlocked = drm_gem_cma_free_object,
-	.gem_vm_ops		= &drm_gem_cma_vm_ops,
-
-	/* PRIME Operations */
-	.prime_handle_to_fd	= drm_gem_prime_handle_to_fd,
-	.prime_fd_to_handle	= drm_gem_prime_fd_to_handle,
-	.gem_prime_import	= drm_gem_prime_import,
-	.gem_prime_export	= drm_gem_prime_export,
-	.gem_prime_get_sg_table	= drm_gem_cma_prime_get_sg_table,
-	.gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
-	.gem_prime_vmap		= drm_gem_cma_prime_vmap,
-	.gem_prime_vunmap	= drm_gem_cma_prime_vunmap,
-	.gem_prime_mmap		= drm_gem_cma_prime_mmap,
-
-	/* Frame Buffer Operations */
 };
 
 static int sun4i_drv_bind(struct device *dev)
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 607a6ea17ecc..079250c85733 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -26,6 +26,9 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_plane_helper.h>
 
+static void tegra_crtc_atomic_destroy_state(struct drm_crtc *crtc,
+					    struct drm_crtc_state *state);
+
 static void tegra_dc_stats_reset(struct tegra_dc_stats *stats)
 {
 	stats->frames = 0;
@@ -1155,20 +1158,12 @@ static void tegra_dc_destroy(struct drm_crtc *crtc)
 
 static void tegra_crtc_reset(struct drm_crtc *crtc)
 {
-	struct tegra_dc_state *state;
+	struct tegra_dc_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
 
 	if (crtc->state)
-		__drm_atomic_helper_crtc_destroy_state(crtc->state);
-
-	kfree(crtc->state);
-	crtc->state = NULL;
-
-	state = kzalloc(sizeof(*state), GFP_KERNEL);
-	if (state) {
-		crtc->state = &state->base;
-		crtc->state->crtc = crtc;
-	}
+		tegra_crtc_atomic_destroy_state(crtc, crtc->state);
 
+	__drm_atomic_helper_crtc_reset(crtc, &state->base);
 	drm_crtc_vblank_reset(crtc);
 }
 
diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c
index a24af2d2f574..a2dc4262955e 100644
--- a/drivers/gpu/drm/v3d/v3d_debugfs.c
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -58,6 +58,17 @@ static const struct v3d_reg_def v3d_core_reg_defs[] = {
 	REGDEF(V3D_GMP_VIO_ADDR),
 };
 
+static const struct v3d_reg_def v3d_csd_reg_defs[] = {
+	REGDEF(V3D_CSD_STATUS),
+	REGDEF(V3D_CSD_CURRENT_CFG0),
+	REGDEF(V3D_CSD_CURRENT_CFG1),
+	REGDEF(V3D_CSD_CURRENT_CFG2),
+	REGDEF(V3D_CSD_CURRENT_CFG3),
+	REGDEF(V3D_CSD_CURRENT_CFG4),
+	REGDEF(V3D_CSD_CURRENT_CFG5),
+	REGDEF(V3D_CSD_CURRENT_CFG6),
+};
+
 static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused)
 {
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
@@ -89,6 +100,17 @@ static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused)
 				   V3D_CORE_READ(core,
 						 v3d_core_reg_defs[i].reg));
 		}
+
+		if (v3d_has_csd(v3d)) {
+			for (i = 0; i < ARRAY_SIZE(v3d_csd_reg_defs); i++) {
+				seq_printf(m, "core %d %s (0x%04x): 0x%08x\n",
+					   core,
+					   v3d_csd_reg_defs[i].name,
+					   v3d_csd_reg_defs[i].reg,
+					   V3D_CORE_READ(core,
+							 v3d_csd_reg_defs[i].reg));
+			}
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index a06b05f714a5..df66c90a0102 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -7,9 +7,9 @@
  * This driver supports the Broadcom V3D 3.3 and 4.1 OpenGL ES GPUs.
  * For V3D 2.x support, see the VC4 driver.
  *
- * Currently only single-core rendering using the binner and renderer,
- * along with TFU (texture formatting unit) rendering is supported.
- * V3D 4.x's CSD (compute shader dispatch) is not yet supported.
+ * The V3D GPU includes a tiled render (composed of a bin and render
+ * pipelines), the TFU (texture formatting unit), and the CSD (compute
+ * shader dispatch).
  */
 
 #include <linux/clk.h>
@@ -120,6 +120,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
 	case DRM_V3D_PARAM_SUPPORTS_TFU:
 		args->value = 1;
 		return 0;
+	case DRM_V3D_PARAM_SUPPORTS_CSD:
+		args->value = v3d_has_csd(v3d);
+		return 0;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", args->param);
 		return -EINVAL;
@@ -179,6 +182,7 @@ static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(V3D_GET_PARAM, v3d_get_param_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(V3D_SUBMIT_TFU, v3d_submit_tfu_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
+	DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CSD, v3d_submit_csd_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
 };
 
 static struct drm_driver v3d_drm_driver = {
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index e9d4a2fdcf44..47b86a25629e 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -16,9 +16,11 @@ enum v3d_queue {
 	V3D_BIN,
 	V3D_RENDER,
 	V3D_TFU,
+	V3D_CSD,
+	V3D_CACHE_CLEAN,
 };
 
-#define V3D_MAX_QUEUES (V3D_TFU + 1)
+#define V3D_MAX_QUEUES (V3D_CACHE_CLEAN + 1)
 
 struct v3d_queue_state {
 	struct drm_gpu_scheduler sched;
@@ -67,9 +69,10 @@ struct v3d_dev {
 
 	struct work_struct overflow_mem_work;
 
-	struct v3d_exec_info *bin_job;
-	struct v3d_exec_info *render_job;
+	struct v3d_bin_job *bin_job;
+	struct v3d_render_job *render_job;
 	struct v3d_tfu_job *tfu_job;
+	struct v3d_csd_job *csd_job;
 
 	struct v3d_queue_state queue[V3D_MAX_QUEUES];
 
@@ -92,6 +95,12 @@ struct v3d_dev {
 	 */
 	struct mutex sched_lock;
 
+	/* Lock taken during a cache clean and when initiating an L2
+	 * flush, to keep L2 flushes from interfering with the
+	 * synchronous L2 cleans.
+	 */
+	struct mutex cache_clean_lock;
+
 	struct {
 		u32 num_allocated;
 		u32 pages_allocated;
@@ -104,6 +113,12 @@ to_v3d_dev(struct drm_device *dev)
 	return (struct v3d_dev *)dev->dev_private;
 }
 
+static inline bool
+v3d_has_csd(struct v3d_dev *v3d)
+{
+	return v3d->ver >= 41;
+}
+
 /* The per-fd struct, which tracks the MMU mappings. */
 struct v3d_file_priv {
 	struct v3d_dev *v3d;
@@ -117,7 +132,7 @@ struct v3d_bo {
 	struct drm_mm_node node;
 
 	/* List entry for the BO's position in
-	 * v3d_exec_info->unref_list
+	 * v3d_render_job->unref_list
 	 */
 	struct list_head unref_head;
 };
@@ -157,67 +172,74 @@ to_v3d_fence(struct dma_fence *fence)
 struct v3d_job {
 	struct drm_sched_job base;
 
-	struct v3d_exec_info *exec;
+	struct kref refcount;
 
-	/* An optional fence userspace can pass in for the job to depend on. */
-	struct dma_fence *in_fence;
+	struct v3d_dev *v3d;
+
+	/* This is the array of BOs that were looked up at the start
+	 * of submission.
+	 */
+	struct drm_gem_object **bo;
+	u32 bo_count;
+
+	/* Array of struct dma_fence * to block on before submitting this job.
+	 */
+	struct xarray deps;
+	unsigned long last_dep;
 
 	/* v3d fence to be signaled by IRQ handler when the job is complete. */
 	struct dma_fence *irq_fence;
 
+	/* scheduler fence for when the job is considered complete and
+	 * the BO reservations can be released.
+	 */
+	struct dma_fence *done_fence;
+
+	/* Callback for the freeing of the job on refcount going to 0. */
+	void (*free)(struct kref *ref);
+};
+
+struct v3d_bin_job {
+	struct v3d_job base;
+
 	/* GPU virtual addresses of the start/end of the CL job. */
 	u32 start, end;
 
 	u32 timedout_ctca, timedout_ctra;
-};
 
-struct v3d_exec_info {
-	struct v3d_dev *v3d;
+	/* Corresponding render job, for attaching our overflow memory. */
+	struct v3d_render_job *render;
 
-	struct v3d_job bin, render;
-
-	/* Fence for when the scheduler considers the binner to be
-	 * done, for render to depend on.
-	 */
-	struct dma_fence *bin_done_fence;
+	/* Submitted tile memory allocation start/size, tile state. */
+	u32 qma, qms, qts;
+};
 
-	/* Fence for when the scheduler considers the render to be
-	 * done, for when the BOs reservations should be complete.
-	 */
-	struct dma_fence *render_done_fence;
+struct v3d_render_job {
+	struct v3d_job base;
 
-	struct kref refcount;
+	/* GPU virtual addresses of the start/end of the CL job. */
+	u32 start, end;
 
-	/* This is the array of BOs that were looked up at the start of exec. */
-	struct v3d_bo **bo;
-	u32 bo_count;
+	u32 timedout_ctca, timedout_ctra;
 
 	/* List of overflow BOs used in the job that need to be
 	 * released once the job is complete.
 	 */
 	struct list_head unref_list;
-
-	/* Submitted tile memory allocation start/size, tile state. */
-	u32 qma, qms, qts;
 };
 
 struct v3d_tfu_job {
-	struct drm_sched_job base;
+	struct v3d_job base;
 
 	struct drm_v3d_submit_tfu args;
+};
 
-	/* An optional fence userspace can pass in for the job to depend on. */
-	struct dma_fence *in_fence;
-
-	/* v3d fence to be signaled by IRQ handler when the job is complete. */
-	struct dma_fence *irq_fence;
-
-	struct v3d_dev *v3d;
+struct v3d_csd_job {
+	struct v3d_job base;
 
-	struct kref refcount;
+	u32 timedout_batches;
 
-	/* This is the array of BOs that were looked up at the start of exec. */
-	struct v3d_bo *bo[4];
+	struct drm_v3d_submit_csd args;
 };
 
 /**
@@ -281,12 +303,14 @@ int v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
 int v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv);
+int v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv);
 int v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv);
-void v3d_exec_put(struct v3d_exec_info *exec);
-void v3d_tfu_job_put(struct v3d_tfu_job *exec);
+void v3d_job_put(struct v3d_job *job);
 void v3d_reset(struct v3d_dev *v3d);
 void v3d_invalidate_caches(struct v3d_dev *v3d);
+void v3d_clean_caches(struct v3d_dev *v3d);
 
 /* v3d_irq.c */
 int v3d_irq_init(struct v3d_dev *v3d);
diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c
index b0a2a1ae2eb1..89840ed212c0 100644
--- a/drivers/gpu/drm/v3d/v3d_fence.c
+++ b/drivers/gpu/drm/v3d/v3d_fence.c
@@ -36,6 +36,8 @@ static const char *v3d_fence_get_timeline_name(struct dma_fence *fence)
 		return "v3d-render";
 	case V3D_TFU:
 		return "v3d-tfu";
+	case V3D_CSD:
+		return "v3d-csd";
 	default:
 		return NULL;
 	}
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index 93ff8fcbe475..f736e021467a 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -162,10 +162,52 @@ v3d_flush_l2t(struct v3d_dev *v3d, int core)
 	/* While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't
 	 * need to wait for completion before dispatching the job --
 	 * L2T accesses will be stalled until the flush has completed.
+	 * However, we do need to make sure we don't try to trigger a
+	 * new flush while the L2_CLEAN queue is trying to
+	 * synchronously clean after a job.
 	 */
+	mutex_lock(&v3d->cache_clean_lock);
 	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
 		       V3D_L2TCACTL_L2TFLS |
 		       V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
+	mutex_unlock(&v3d->cache_clean_lock);
+}
+
+/* Cleans texture L1 and L2 cachelines (writing back dirty data).
+ *
+ * For cleaning, which happens from the CACHE_CLEAN queue after CSD has
+ * executed, we need to make sure that the clean is done before
+ * signaling job completion.  So, we synchronously wait before
+ * returning, and we make sure that L2 invalidates don't happen in the
+ * meantime to confuse our are-we-done checks.
+ */
+void
+v3d_clean_caches(struct v3d_dev *v3d)
+{
+	struct drm_device *dev = &v3d->drm;
+	int core = 0;
+
+	trace_v3d_cache_clean_begin(dev);
+
+	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
+	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
+		       V3D_L2TCACTL_L2TFLS), 100)) {
+		DRM_ERROR("Timeout waiting for L1T write combiner flush\n");
+	}
+
+	mutex_lock(&v3d->cache_clean_lock);
+	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
+		       V3D_L2TCACTL_L2TFLS |
+		       V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAN, V3D_L2TCACTL_FLM));
+
+	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
+		       V3D_L2TCACTL_L2TFLS), 100)) {
+		DRM_ERROR("Timeout waiting for L2T clean\n");
+	}
+
+	mutex_unlock(&v3d->cache_clean_lock);
+
+	trace_v3d_cache_clean_end(dev);
 }
 
 /* Invalidates the slice caches.  These are read-only caches. */
@@ -193,28 +235,6 @@ v3d_invalidate_caches(struct v3d_dev *v3d)
 	v3d_invalidate_slices(v3d, 0);
 }
 
-static void
-v3d_attach_object_fences(struct v3d_bo **bos, int bo_count,
-			 struct dma_fence *fence)
-{
-	int i;
-
-	for (i = 0; i < bo_count; i++) {
-		/* XXX: Use shared fences for read-only objects. */
-		reservation_object_add_excl_fence(bos[i]->base.base.resv,
-						  fence);
-	}
-}
-
-static void
-v3d_unlock_bo_reservations(struct v3d_bo **bos,
-			   int bo_count,
-			   struct ww_acquire_ctx *acquire_ctx)
-{
-	drm_gem_unlock_reservations((struct drm_gem_object **)bos, bo_count,
-				    acquire_ctx);
-}
-
 /* Takes the reservation lock on all the BOs being referenced, so that
  * at queue submit time we can update the reservations.
  *
@@ -223,26 +243,21 @@ v3d_unlock_bo_reservations(struct v3d_bo **bos,
  * to v3d, so we don't attach dma-buf fences to them.
  */
 static int
-v3d_lock_bo_reservations(struct v3d_bo **bos,
-			 int bo_count,
+v3d_lock_bo_reservations(struct v3d_job *job,
 			 struct ww_acquire_ctx *acquire_ctx)
 {
 	int i, ret;
 
-	ret = drm_gem_lock_reservations((struct drm_gem_object **)bos,
-					bo_count, acquire_ctx);
+	ret = drm_gem_lock_reservations(job->bo, job->bo_count, acquire_ctx);
 	if (ret)
 		return ret;
 
-	/* Reserve space for our shared (read-only) fence references,
-	 * before we commit the CL to the hardware.
-	 */
-	for (i = 0; i < bo_count; i++) {
-		ret = reservation_object_reserve_shared(bos[i]->base.base.resv,
-							1);
+	for (i = 0; i < job->bo_count; i++) {
+		ret = drm_gem_fence_array_add_implicit(&job->deps,
+						       job->bo[i], true);
 		if (ret) {
-			v3d_unlock_bo_reservations(bos, bo_count,
-						   acquire_ctx);
+			drm_gem_unlock_reservations(job->bo, job->bo_count,
+						    acquire_ctx);
 			return ret;
 		}
 	}
@@ -251,11 +266,11 @@ v3d_lock_bo_reservations(struct v3d_bo **bos,
 }
 
 /**
- * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects
+ * v3d_lookup_bos() - Sets up job->bo[] with the GEM objects
  * referenced by the job.
  * @dev: DRM device
  * @file_priv: DRM file for this fd
- * @exec: V3D job being set up
+ * @job: V3D job being set up
  *
  * The command validator needs to reference BOs by their index within
  * the submitted job's BO list.  This does the validation of the job's
@@ -265,18 +280,19 @@ v3d_lock_bo_reservations(struct v3d_bo **bos,
  * failure, because that will happen at v3d_exec_cleanup() time.
  */
 static int
-v3d_cl_lookup_bos(struct drm_device *dev,
-		  struct drm_file *file_priv,
-		  struct drm_v3d_submit_cl *args,
-		  struct v3d_exec_info *exec)
+v3d_lookup_bos(struct drm_device *dev,
+	       struct drm_file *file_priv,
+	       struct v3d_job *job,
+	       u64 bo_handles,
+	       u32 bo_count)
 {
 	u32 *handles;
 	int ret = 0;
 	int i;
 
-	exec->bo_count = args->bo_handle_count;
+	job->bo_count = bo_count;
 
-	if (!exec->bo_count) {
+	if (!job->bo_count) {
 		/* See comment on bo_index for why we have to check
 		 * this.
 		 */
@@ -284,15 +300,15 @@ v3d_cl_lookup_bos(struct drm_device *dev,
 		return -EINVAL;
 	}
 
-	exec->bo = kvmalloc_array(exec->bo_count,
-				  sizeof(struct drm_gem_cma_object *),
-				  GFP_KERNEL | __GFP_ZERO);
-	if (!exec->bo) {
+	job->bo = kvmalloc_array(job->bo_count,
+				 sizeof(struct drm_gem_cma_object *),
+				 GFP_KERNEL | __GFP_ZERO);
+	if (!job->bo) {
 		DRM_DEBUG("Failed to allocate validated BO pointers\n");
 		return -ENOMEM;
 	}
 
-	handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL);
+	handles = kvmalloc_array(job->bo_count, sizeof(u32), GFP_KERNEL);
 	if (!handles) {
 		ret = -ENOMEM;
 		DRM_DEBUG("Failed to allocate incoming GEM handles\n");
@@ -300,15 +316,15 @@ v3d_cl_lookup_bos(struct drm_device *dev,
 	}
 
 	if (copy_from_user(handles,
-			   (void __user *)(uintptr_t)args->bo_handles,
-			   exec->bo_count * sizeof(u32))) {
+			   (void __user *)(uintptr_t)bo_handles,
+			   job->bo_count * sizeof(u32))) {
 		ret = -EFAULT;
 		DRM_DEBUG("Failed to copy in GEM handles\n");
 		goto fail;
 	}
 
 	spin_lock(&file_priv->table_lock);
-	for (i = 0; i < exec->bo_count; i++) {
+	for (i = 0; i < job->bo_count; i++) {
 		struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
 						     handles[i]);
 		if (!bo) {
@@ -319,7 +335,7 @@ v3d_cl_lookup_bos(struct drm_device *dev,
 			goto fail;
 		}
 		drm_gem_object_get(bo);
-		exec->bo[i] = to_v3d_bo(bo);
+		job->bo[i] = bo;
 	}
 	spin_unlock(&file_priv->table_lock);
 
@@ -329,67 +345,50 @@ fail:
 }
 
 static void
-v3d_exec_cleanup(struct kref *ref)
+v3d_job_free(struct kref *ref)
 {
-	struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info,
-						  refcount);
-	struct v3d_dev *v3d = exec->v3d;
-	unsigned int i;
-	struct v3d_bo *bo, *save;
-
-	dma_fence_put(exec->bin.in_fence);
-	dma_fence_put(exec->render.in_fence);
-
-	dma_fence_put(exec->bin.irq_fence);
-	dma_fence_put(exec->render.irq_fence);
-
-	dma_fence_put(exec->bin_done_fence);
-	dma_fence_put(exec->render_done_fence);
+	struct v3d_job *job = container_of(ref, struct v3d_job, refcount);
+	unsigned long index;
+	struct dma_fence *fence;
+	int i;
 
-	for (i = 0; i < exec->bo_count; i++)
-		drm_gem_object_put_unlocked(&exec->bo[i]->base.base);
-	kvfree(exec->bo);
+	for (i = 0; i < job->bo_count; i++) {
+		if (job->bo[i])
+			drm_gem_object_put_unlocked(job->bo[i]);
+	}
+	kvfree(job->bo);
 
-	list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) {
-		drm_gem_object_put_unlocked(&bo->base.base);
+	xa_for_each(&job->deps, index, fence) {
+		dma_fence_put(fence);
 	}
+	xa_destroy(&job->deps);
 
-	pm_runtime_mark_last_busy(v3d->dev);
-	pm_runtime_put_autosuspend(v3d->dev);
+	dma_fence_put(job->irq_fence);
+	dma_fence_put(job->done_fence);
 
-	kfree(exec);
-}
+	pm_runtime_mark_last_busy(job->v3d->dev);
+	pm_runtime_put_autosuspend(job->v3d->dev);
 
-void v3d_exec_put(struct v3d_exec_info *exec)
-{
-	kref_put(&exec->refcount, v3d_exec_cleanup);
+	kfree(job);
 }
 
 static void
-v3d_tfu_job_cleanup(struct kref *ref)
+v3d_render_job_free(struct kref *ref)
 {
-	struct v3d_tfu_job *job = container_of(ref, struct v3d_tfu_job,
-					       refcount);
-	struct v3d_dev *v3d = job->v3d;
-	unsigned int i;
-
-	dma_fence_put(job->in_fence);
-	dma_fence_put(job->irq_fence);
+	struct v3d_render_job *job = container_of(ref, struct v3d_render_job,
+						  base.refcount);
+	struct v3d_bo *bo, *save;
 
-	for (i = 0; i < ARRAY_SIZE(job->bo); i++) {
-		if (job->bo[i])
-			drm_gem_object_put_unlocked(&job->bo[i]->base.base);
+	list_for_each_entry_safe(bo, save, &job->unref_list, unref_head) {
+		drm_gem_object_put_unlocked(&bo->base.base);
 	}
 
-	pm_runtime_mark_last_busy(v3d->dev);
-	pm_runtime_put_autosuspend(v3d->dev);
-
-	kfree(job);
+	v3d_job_free(ref);
 }
 
-void v3d_tfu_job_put(struct v3d_tfu_job *job)
+void v3d_job_put(struct v3d_job *job)
 {
-	kref_put(&job->refcount, v3d_tfu_job_cleanup);
+	kref_put(&job->refcount, job->free);
 }
 
 int
@@ -425,6 +424,87 @@ v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
 	return ret;
 }
 
+static int
+v3d_job_init(struct v3d_dev *v3d, struct drm_file *file_priv,
+	     struct v3d_job *job, void (*free)(struct kref *ref),
+	     u32 in_sync)
+{
+	struct dma_fence *in_fence = NULL;
+	int ret;
+
+	job->v3d = v3d;
+	job->free = free;
+
+	ret = pm_runtime_get_sync(v3d->dev);
+	if (ret < 0)
+		return ret;
+
+	xa_init_flags(&job->deps, XA_FLAGS_ALLOC);
+
+	ret = drm_syncobj_find_fence(file_priv, in_sync, 0, 0, &in_fence);
+	if (ret == -EINVAL)
+		goto fail;
+
+	ret = drm_gem_fence_array_add(&job->deps, in_fence);
+	if (ret)
+		goto fail;
+
+	kref_init(&job->refcount);
+
+	return 0;
+fail:
+	xa_destroy(&job->deps);
+	pm_runtime_put_autosuspend(v3d->dev);
+	return ret;
+}
+
+static int
+v3d_push_job(struct v3d_file_priv *v3d_priv,
+	     struct v3d_job *job, enum v3d_queue queue)
+{
+	int ret;
+
+	ret = drm_sched_job_init(&job->base, &v3d_priv->sched_entity[queue],
+				 v3d_priv);
+	if (ret)
+		return ret;
+
+	job->done_fence = dma_fence_get(&job->base.s_fence->finished);
+
+	/* put by scheduler job completion */
+	kref_get(&job->refcount);
+
+	drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[queue]);
+
+	return 0;
+}
+
+static void
+v3d_attach_fences_and_unlock_reservation(struct drm_file *file_priv,
+					 struct v3d_job *job,
+					 struct ww_acquire_ctx *acquire_ctx,
+					 u32 out_sync,
+					 struct dma_fence *done_fence)
+{
+	struct drm_syncobj *sync_out;
+	int i;
+
+	for (i = 0; i < job->bo_count; i++) {
+		/* XXX: Use shared fences for read-only objects. */
+		reservation_object_add_excl_fence(job->bo[i]->resv,
+						  job->done_fence);
+	}
+
+	drm_gem_unlock_reservations(job->bo, job->bo_count, acquire_ctx);
+
+	/* Update the return sync object for the job */
+	sync_out = drm_syncobj_find(file_priv, out_sync);
+	if (sync_out) {
+		drm_syncobj_replace_fence(sync_out, done_fence);
+		drm_syncobj_put(sync_out);
+	}
+}
+
 /**
  * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D.
  * @dev: DRM device
@@ -444,9 +524,9 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 	struct v3d_dev *v3d = to_v3d_dev(dev);
 	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
 	struct drm_v3d_submit_cl *args = data;
-	struct v3d_exec_info *exec;
+	struct v3d_bin_job *bin = NULL;
+	struct v3d_render_job *render;
 	struct ww_acquire_ctx acquire_ctx;
-	struct drm_syncobj *sync_out;
 	int ret = 0;
 
 	trace_v3d_submit_cl_ioctl(&v3d->drm, args->rcl_start, args->rcl_end);
@@ -456,100 +536,87 @@ v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
-	if (!exec)
+	render = kcalloc(1, sizeof(*render), GFP_KERNEL);
+	if (!render)
 		return -ENOMEM;
 
-	ret = pm_runtime_get_sync(v3d->dev);
-	if (ret < 0) {
-		kfree(exec);
+	render->start = args->rcl_start;
+	render->end = args->rcl_end;
+	INIT_LIST_HEAD(&render->unref_list);
+
+	ret = v3d_job_init(v3d, file_priv, &render->base,
+			   v3d_render_job_free, args->in_sync_rcl);
+	if (ret) {
+		kfree(render);
 		return ret;
 	}
 
-	kref_init(&exec->refcount);
+	if (args->bcl_start != args->bcl_end) {
+		bin = kcalloc(1, sizeof(*bin), GFP_KERNEL);
+		if (!bin)
+			return -ENOMEM;
 
-	ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
-				     0, 0, &exec->bin.in_fence);
-	if (ret == -EINVAL)
-		goto fail;
+		ret = v3d_job_init(v3d, file_priv, &bin->base,
+				   v3d_job_free, args->in_sync_bcl);
+		if (ret) {
+			v3d_job_put(&render->base);
+			return ret;
+		}
 
-	ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
-				     0, 0, &exec->render.in_fence);
-	if (ret == -EINVAL)
-		goto fail;
+		bin->start = args->bcl_start;
+		bin->end = args->bcl_end;
+		bin->qma = args->qma;
+		bin->qms = args->qms;
+		bin->qts = args->qts;
+		bin->render = render;
+	}
 
-	exec->qma = args->qma;
-	exec->qms = args->qms;
-	exec->qts = args->qts;
-	exec->bin.exec = exec;
-	exec->bin.start = args->bcl_start;
-	exec->bin.end = args->bcl_end;
-	exec->render.exec = exec;
-	exec->render.start = args->rcl_start;
-	exec->render.end = args->rcl_end;
-	exec->v3d = v3d;
-	INIT_LIST_HEAD(&exec->unref_list);
-
-	ret = v3d_cl_lookup_bos(dev, file_priv, args, exec);
+	ret = v3d_lookup_bos(dev, file_priv, &render->base,
+			     args->bo_handles, args->bo_handle_count);
 	if (ret)
 		goto fail;
 
-	ret = v3d_lock_bo_reservations(exec->bo, exec->bo_count,
-				       &acquire_ctx);
+	ret = v3d_lock_bo_reservations(&render->base, &acquire_ctx);
 	if (ret)
 		goto fail;
 
 	mutex_lock(&v3d->sched_lock);
-	if (exec->bin.start != exec->bin.end) {
-		ret = drm_sched_job_init(&exec->bin.base,
-					 &v3d_priv->sched_entity[V3D_BIN],
-					 v3d_priv);
+	if (bin) {
+		ret = v3d_push_job(v3d_priv, &bin->base, V3D_BIN);
 		if (ret)
 			goto fail_unreserve;
 
-		exec->bin_done_fence =
-			dma_fence_get(&exec->bin.base.s_fence->finished);
-
-		kref_get(&exec->refcount); /* put by scheduler job completion */
-		drm_sched_entity_push_job(&exec->bin.base,
-					  &v3d_priv->sched_entity[V3D_BIN]);
+		ret = drm_gem_fence_array_add(&render->base.deps,
+					      dma_fence_get(bin->base.done_fence));
+		if (ret)
+			goto fail_unreserve;
 	}
 
-	ret = drm_sched_job_init(&exec->render.base,
-				 &v3d_priv->sched_entity[V3D_RENDER],
-				 v3d_priv);
+	ret = v3d_push_job(v3d_priv, &render->base, V3D_RENDER);
 	if (ret)
 		goto fail_unreserve;
-
-	exec->render_done_fence =
-		dma_fence_get(&exec->render.base.s_fence->finished);
-
-	kref_get(&exec->refcount); /* put by scheduler job completion */
-	drm_sched_entity_push_job(&exec->render.base,
-				  &v3d_priv->sched_entity[V3D_RENDER]);
 	mutex_unlock(&v3d->sched_lock);
 
-	v3d_attach_object_fences(exec->bo, exec->bo_count,
-				 exec->render_done_fence);
-
-	v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx);
-
-	/* Update the return sync object for the */
-	sync_out = drm_syncobj_find(file_priv, args->out_sync);
-	if (sync_out) {
-		drm_syncobj_replace_fence(sync_out, exec->render_done_fence);
-		drm_syncobj_put(sync_out);
-	}
+	v3d_attach_fences_and_unlock_reservation(file_priv,
+						 &render->base,
+						 &acquire_ctx,
+						 args->out_sync,
+						 render->base.done_fence);
 
-	v3d_exec_put(exec);
+	if (bin)
+		v3d_job_put(&bin->base);
+	v3d_job_put(&render->base);
 
 	return 0;
 
 fail_unreserve:
 	mutex_unlock(&v3d->sched_lock);
-	v3d_unlock_bo_reservations(exec->bo, exec->bo_count, &acquire_ctx);
+	drm_gem_unlock_reservations(render->base.bo,
+				    render->base.bo_count, &acquire_ctx);
 fail:
-	v3d_exec_put(exec);
+	if (bin)
+		v3d_job_put(&bin->base);
+	v3d_job_put(&render->base);
 
 	return ret;
 }
@@ -572,10 +639,7 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
 	struct drm_v3d_submit_tfu *args = data;
 	struct v3d_tfu_job *job;
 	struct ww_acquire_ctx acquire_ctx;
-	struct drm_syncobj *sync_out;
-	struct dma_fence *sched_done_fence;
 	int ret = 0;
-	int bo_count;
 
 	trace_v3d_submit_tfu_ioctl(&v3d->drm, args->iia);
 
@@ -583,81 +647,172 @@ v3d_submit_tfu_ioctl(struct drm_device *dev, void *data,
 	if (!job)
 		return -ENOMEM;
 
-	ret = pm_runtime_get_sync(v3d->dev);
-	if (ret < 0) {
+	ret = v3d_job_init(v3d, file_priv, &job->base,
+			   v3d_job_free, args->in_sync);
+	if (ret) {
 		kfree(job);
 		return ret;
 	}
 
-	kref_init(&job->refcount);
-
-	ret = drm_syncobj_find_fence(file_priv, args->in_sync,
-				     0, 0, &job->in_fence);
-	if (ret == -EINVAL)
-		goto fail;
+	job->base.bo = kcalloc(ARRAY_SIZE(args->bo_handles),
+			       sizeof(*job->base.bo), GFP_KERNEL);
+	if (!job->base.bo) {
+		v3d_job_put(&job->base);
+		return -ENOMEM;
+	}
 
 	job->args = *args;
-	job->v3d = v3d;
 
 	spin_lock(&file_priv->table_lock);
-	for (bo_count = 0; bo_count < ARRAY_SIZE(job->bo); bo_count++) {
+	for (job->base.bo_count = 0;
+	     job->base.bo_count < ARRAY_SIZE(args->bo_handles);
+	     job->base.bo_count++) {
 		struct drm_gem_object *bo;
 
-		if (!args->bo_handles[bo_count])
+		if (!args->bo_handles[job->base.bo_count])
 			break;
 
 		bo = idr_find(&file_priv->object_idr,
-			      args->bo_handles[bo_count]);
+			      args->bo_handles[job->base.bo_count]);
 		if (!bo) {
 			DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
-				  bo_count, args->bo_handles[bo_count]);
+				  job->base.bo_count,
+				  args->bo_handles[job->base.bo_count]);
 			ret = -ENOENT;
 			spin_unlock(&file_priv->table_lock);
 			goto fail;
 		}
 		drm_gem_object_get(bo);
-		job->bo[bo_count] = to_v3d_bo(bo);
+		job->base.bo[job->base.bo_count] = bo;
 	}
 	spin_unlock(&file_priv->table_lock);
 
-	ret = v3d_lock_bo_reservations(job->bo, bo_count, &acquire_ctx);
+	ret = v3d_lock_bo_reservations(&job->base, &acquire_ctx);
 	if (ret)
 		goto fail;
 
 	mutex_lock(&v3d->sched_lock);
-	ret = drm_sched_job_init(&job->base,
-				 &v3d_priv->sched_entity[V3D_TFU],
-				 v3d_priv);
+	ret = v3d_push_job(v3d_priv, &job->base, V3D_TFU);
 	if (ret)
 		goto fail_unreserve;
+	mutex_unlock(&v3d->sched_lock);
 
-	sched_done_fence = dma_fence_get(&job->base.s_fence->finished);
+	v3d_attach_fences_and_unlock_reservation(file_priv,
+						 &job->base, &acquire_ctx,
+						 args->out_sync,
+						 job->base.done_fence);
 
-	kref_get(&job->refcount); /* put by scheduler job completion */
-	drm_sched_entity_push_job(&job->base, &v3d_priv->sched_entity[V3D_TFU]);
+	v3d_job_put(&job->base);
+
+	return 0;
+
+fail_unreserve:
 	mutex_unlock(&v3d->sched_lock);
+	drm_gem_unlock_reservations(job->base.bo, job->base.bo_count,
+				    &acquire_ctx);
+fail:
+	v3d_job_put(&job->base);
 
-	v3d_attach_object_fences(job->bo, bo_count, sched_done_fence);
+	return ret;
+}
 
-	v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx);
+/**
+ * v3d_submit_csd_ioctl() - Submits a CSD (texture formatting) job to the V3D.
+ * @dev: DRM device
+ * @data: ioctl argument
+ * @file_priv: DRM file for this fd
+ *
+ * Userspace provides the register setup for the CSD, which we don't
+ * need to validate since the CSD is behind the MMU.
+ */
+int
+v3d_submit_csd_ioctl(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	struct v3d_dev *v3d = to_v3d_dev(dev);
+	struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
+	struct drm_v3d_submit_csd *args = data;
+	struct v3d_csd_job *job;
+	struct v3d_job *clean_job;
+	struct ww_acquire_ctx acquire_ctx;
+	int ret;
 
-	/* Update the return sync object */
-	sync_out = drm_syncobj_find(file_priv, args->out_sync);
-	if (sync_out) {
-		drm_syncobj_replace_fence(sync_out, sched_done_fence);
-		drm_syncobj_put(sync_out);
+	trace_v3d_submit_csd_ioctl(&v3d->drm, args->cfg[5], args->cfg[6]);
+
+	if (!v3d_has_csd(v3d)) {
+		DRM_DEBUG("Attempting CSD submit on non-CSD hardware\n");
+		return -EINVAL;
+	}
+
+	job = kcalloc(1, sizeof(*job), GFP_KERNEL);
+	if (!job)
+		return -ENOMEM;
+
+	ret = v3d_job_init(v3d, file_priv, &job->base,
+			   v3d_job_free, args->in_sync);
+	if (ret) {
+		kfree(job);
+		return ret;
+	}
+
+	clean_job = kcalloc(1, sizeof(*clean_job), GFP_KERNEL);
+	if (!clean_job) {
+		v3d_job_put(&job->base);
+		kfree(job);
+		return -ENOMEM;
 	}
-	dma_fence_put(sched_done_fence);
 
-	v3d_tfu_job_put(job);
+	ret = v3d_job_init(v3d, file_priv, clean_job, v3d_job_free, 0);
+	if (ret) {
+		v3d_job_put(&job->base);
+		kfree(clean_job);
+		return ret;
+	}
+
+	job->args = *args;
+
+	ret = v3d_lookup_bos(dev, file_priv, clean_job,
+			     args->bo_handles, args->bo_handle_count);
+	if (ret)
+		goto fail;
+
+	ret = v3d_lock_bo_reservations(clean_job, &acquire_ctx);
+	if (ret)
+		goto fail;
+
+	mutex_lock(&v3d->sched_lock);
+	ret = v3d_push_job(v3d_priv, &job->base, V3D_CSD);
+	if (ret)
+		goto fail_unreserve;
+
+	ret = drm_gem_fence_array_add(&clean_job->deps,
+				      dma_fence_get(job->base.done_fence));
+	if (ret)
+		goto fail_unreserve;
+
+	ret = v3d_push_job(v3d_priv, clean_job, V3D_CACHE_CLEAN);
+	if (ret)
+		goto fail_unreserve;
+	mutex_unlock(&v3d->sched_lock);
+
+	v3d_attach_fences_and_unlock_reservation(file_priv,
+						 clean_job,
+						 &acquire_ctx,
+						 args->out_sync,
+						 clean_job->done_fence);
+
+	v3d_job_put(&job->base);
+	v3d_job_put(clean_job);
 
 	return 0;
 
 fail_unreserve:
 	mutex_unlock(&v3d->sched_lock);
-	v3d_unlock_bo_reservations(job->bo, bo_count, &acquire_ctx);
+	drm_gem_unlock_reservations(clean_job->bo, clean_job->bo_count,
+				    &acquire_ctx);
 fail:
-	v3d_tfu_job_put(job);
+	v3d_job_put(&job->base);
+	v3d_job_put(clean_job);
 
 	return ret;
 }
@@ -677,6 +832,7 @@ v3d_gem_init(struct drm_device *dev)
 	mutex_init(&v3d->bo_lock);
 	mutex_init(&v3d->reset_lock);
 	mutex_init(&v3d->sched_lock);
+	mutex_init(&v3d->cache_clean_lock);
 
 	/* Note: We don't allocate address 0.  Various bits of HW
 	 * treat 0 as special, such as the occlusion query counters
@@ -715,7 +871,7 @@ v3d_gem_destroy(struct drm_device *dev)
 
 	v3d_sched_fini(v3d);
 
-	/* Waiting for exec to finish would need to be done before
+	/* Waiting for jobs to finish would need to be done before
 	 * unregistering V3D.
 	 */
 	WARN_ON(v3d->bin_job);
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index aa0a180ae700..fac3c542860b 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -4,9 +4,9 @@
 /**
  * DOC: Interrupt management for the V3D engine
  *
- * When we take a bin, render, or TFU done interrupt, we need to
- * signal the fence for that job so that the scheduler can queue up
- * the next one and unblock any waiters.
+ * When we take a bin, render, TFU done, or CSD done interrupt, we
+ * need to signal the fence for that job so that the scheduler can
+ * queue up the next one and unblock any waiters.
  *
  * When we take the binner out of memory interrupt, we need to
  * allocate some new memory and pass it to the binner so that the
@@ -20,6 +20,7 @@
 #define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM |	\
 			     V3D_INT_FLDONE |	\
 			     V3D_INT_FRDONE |	\
+			     V3D_INT_CSDDONE |	\
 			     V3D_INT_GMPV))
 
 #define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV |	\
@@ -62,7 +63,7 @@ v3d_overflow_mem_work(struct work_struct *work)
 	}
 
 	drm_gem_object_get(obj);
-	list_add_tail(&bo->unref_head, &v3d->bin_job->unref_list);
+	list_add_tail(&bo->unref_head, &v3d->bin_job->render->unref_list);
 	spin_unlock_irqrestore(&v3d->job_lock, irqflags);
 
 	V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << PAGE_SHIFT);
@@ -96,7 +97,7 @@ v3d_irq(int irq, void *arg)
 
 	if (intsts & V3D_INT_FLDONE) {
 		struct v3d_fence *fence =
-			to_v3d_fence(v3d->bin_job->bin.irq_fence);
+			to_v3d_fence(v3d->bin_job->base.irq_fence);
 
 		trace_v3d_bcl_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
@@ -105,13 +106,22 @@ v3d_irq(int irq, void *arg)
 
 	if (intsts & V3D_INT_FRDONE) {
 		struct v3d_fence *fence =
-			to_v3d_fence(v3d->render_job->render.irq_fence);
+			to_v3d_fence(v3d->render_job->base.irq_fence);
 
 		trace_v3d_rcl_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
 		status = IRQ_HANDLED;
 	}
 
+	if (intsts & V3D_INT_CSDDONE) {
+		struct v3d_fence *fence =
+			to_v3d_fence(v3d->csd_job->base.irq_fence);
+
+		trace_v3d_csd_irq(&v3d->drm, fence->seqno);
+		dma_fence_signal(&fence->base);
+		status = IRQ_HANDLED;
+	}
+
 	/* We shouldn't be triggering these if we have GMP in
 	 * always-allowed mode.
 	 */
@@ -141,7 +151,7 @@ v3d_hub_irq(int irq, void *arg)
 
 	if (intsts & V3D_HUB_INT_TFUC) {
 		struct v3d_fence *fence =
-			to_v3d_fence(v3d->tfu_job->irq_fence);
+			to_v3d_fence(v3d->tfu_job->base.irq_fence);
 
 		trace_v3d_tfu_irq(&v3d->drm, fence->seqno);
 		dma_fence_signal(&fence->base);
diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h
index 8e88af237610..9a8ff0ce648e 100644
--- a/drivers/gpu/drm/v3d/v3d_regs.h
+++ b/drivers/gpu/drm/v3d/v3d_regs.h
@@ -238,8 +238,11 @@
 #define V3D_CTL_L2TCACTL                               0x00030
 # define V3D_L2TCACTL_TMUWCF                           BIT(8)
 # define V3D_L2TCACTL_L2T_NO_WM                        BIT(4)
+/* Invalidates cache lines. */
 # define V3D_L2TCACTL_FLM_FLUSH                        0
+/* Removes cachelines without writing dirty lines back. */
 # define V3D_L2TCACTL_FLM_CLEAR                        1
+/* Writes out dirty cachelines and marks them clean, but doesn't invalidate. */
 # define V3D_L2TCACTL_FLM_CLEAN                        2
 # define V3D_L2TCACTL_FLM_MASK                         V3D_MASK(2, 1)
 # define V3D_L2TCACTL_FLM_SHIFT                        1
@@ -255,6 +258,8 @@
 #define V3D_CTL_INT_MSK_CLR                            0x00064
 # define V3D_INT_QPU_MASK                              V3D_MASK(27, 16)
 # define V3D_INT_QPU_SHIFT                             16
+# define V3D_INT_CSDDONE                               BIT(7)
+# define V3D_INT_PCTR                                  BIT(6)
 # define V3D_INT_GMPV                                  BIT(5)
 # define V3D_INT_TRFB                                  BIT(4)
 # define V3D_INT_SPILLUSE                              BIT(3)
@@ -374,4 +379,72 @@
 #define V3D_GMP_PRESERVE_LOAD                          0x00818
 #define V3D_GMP_VALID_LINES                            0x00820
 
+#define V3D_CSD_STATUS                                 0x00900
+# define V3D_CSD_STATUS_NUM_COMPLETED_MASK             V3D_MASK(11, 4)
+# define V3D_CSD_STATUS_NUM_COMPLETED_SHIFT            4
+# define V3D_CSD_STATUS_NUM_ACTIVE_MASK                V3D_MASK(3, 2)
+# define V3D_CSD_STATUS_NUM_ACTIVE_SHIFT               2
+# define V3D_CSD_STATUS_HAVE_CURRENT_DISPATCH          BIT(1)
+# define V3D_CSD_STATUS_HAVE_QUEUED_DISPATCH           BIT(0)
+
+#define V3D_CSD_QUEUED_CFG0                            0x00904
+# define V3D_CSD_QUEUED_CFG0_NUM_WGS_X_MASK            V3D_MASK(31, 16)
+# define V3D_CSD_QUEUED_CFG0_NUM_WGS_X_SHIFT           16
+# define V3D_CSD_QUEUED_CFG0_WG_X_OFFSET_MASK          V3D_MASK(15, 0)
+# define V3D_CSD_QUEUED_CFG0_WG_X_OFFSET_SHIFT         0
+
+#define V3D_CSD_QUEUED_CFG1                            0x00908
+# define V3D_CSD_QUEUED_CFG1_NUM_WGS_Y_MASK            V3D_MASK(31, 16)
+# define V3D_CSD_QUEUED_CFG1_NUM_WGS_Y_SHIFT           16
+# define V3D_CSD_QUEUED_CFG1_WG_Y_OFFSET_MASK          V3D_MASK(15, 0)
+# define V3D_CSD_QUEUED_CFG1_WG_Y_OFFSET_SHIFT         0
+
+#define V3D_CSD_QUEUED_CFG2                            0x0090c
+# define V3D_CSD_QUEUED_CFG2_NUM_WGS_Z_MASK            V3D_MASK(31, 16)
+# define V3D_CSD_QUEUED_CFG2_NUM_WGS_Z_SHIFT           16
+# define V3D_CSD_QUEUED_CFG2_WG_Z_OFFSET_MASK          V3D_MASK(15, 0)
+# define V3D_CSD_QUEUED_CFG2_WG_Z_OFFSET_SHIFT         0
+
+#define V3D_CSD_QUEUED_CFG3                            0x00910
+# define V3D_CSD_QUEUED_CFG3_OVERLAP_WITH_PREV         BIT(26)
+# define V3D_CSD_QUEUED_CFG3_MAX_SG_ID_MASK            V3D_MASK(25, 20)
+# define V3D_CSD_QUEUED_CFG3_MAX_SG_ID_SHIFT           20
+# define V3D_CSD_QUEUED_CFG3_BATCHES_PER_SG_M1_MASK    V3D_MASK(19, 12)
+# define V3D_CSD_QUEUED_CFG3_BATCHES_PER_SG_M1_SHIFT   12
+# define V3D_CSD_QUEUED_CFG3_WGS_PER_SG_MASK           V3D_MASK(11, 8)
+# define V3D_CSD_QUEUED_CFG3_WGS_PER_SG_SHIFT          8
+# define V3D_CSD_QUEUED_CFG3_WG_SIZE_MASK              V3D_MASK(7, 0)
+# define V3D_CSD_QUEUED_CFG3_WG_SIZE_SHIFT             0
+
+/* Number of batches, minus 1 */
+#define V3D_CSD_QUEUED_CFG4                            0x00914
+
+/* Shader address, pnan, singleseg, threading, like a shader record. */
+#define V3D_CSD_QUEUED_CFG5                            0x00918
+
+/* Uniforms address (4 byte aligned) */
+#define V3D_CSD_QUEUED_CFG6                            0x0091c
+
+#define V3D_CSD_CURRENT_CFG0                          0x00920
+#define V3D_CSD_CURRENT_CFG1                          0x00924
+#define V3D_CSD_CURRENT_CFG2                          0x00928
+#define V3D_CSD_CURRENT_CFG3                          0x0092c
+#define V3D_CSD_CURRENT_CFG4                          0x00930
+#define V3D_CSD_CURRENT_CFG5                          0x00934
+#define V3D_CSD_CURRENT_CFG6                          0x00938
+
+#define V3D_CSD_CURRENT_ID0                            0x0093c
+# define V3D_CSD_CURRENT_ID0_WG_X_MASK                 V3D_MASK(31, 16)
+# define V3D_CSD_CURRENT_ID0_WG_X_SHIFT                16
+# define V3D_CSD_CURRENT_ID0_WG_IN_SG_MASK             V3D_MASK(11, 8)
+# define V3D_CSD_CURRENT_ID0_WG_IN_SG_SHIFT            8
+# define V3D_CSD_CURRENT_ID0_L_IDX_MASK                V3D_MASK(7, 0)
+# define V3D_CSD_CURRENT_ID0_L_IDX_SHIFT               0
+
+#define V3D_CSD_CURRENT_ID1                            0x00940
+# define V3D_CSD_CURRENT_ID0_WG_Z_MASK                 V3D_MASK(31, 16)
+# define V3D_CSD_CURRENT_ID0_WG_Z_SHIFT                16
+# define V3D_CSD_CURRENT_ID0_WG_Y_MASK                 V3D_MASK(15, 0)
+# define V3D_CSD_CURRENT_ID0_WG_Y_SHIFT                0
+
 #endif /* V3D_REGS_H */
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index e740f3b99aa5..8c2df6d95283 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -30,158 +30,152 @@ to_v3d_job(struct drm_sched_job *sched_job)
 	return container_of(sched_job, struct v3d_job, base);
 }
 
-static struct v3d_tfu_job *
-to_tfu_job(struct drm_sched_job *sched_job)
+static struct v3d_bin_job *
+to_bin_job(struct drm_sched_job *sched_job)
 {
-	return container_of(sched_job, struct v3d_tfu_job, base);
+	return container_of(sched_job, struct v3d_bin_job, base.base);
 }
 
-static void
-v3d_job_free(struct drm_sched_job *sched_job)
+static struct v3d_render_job *
+to_render_job(struct drm_sched_job *sched_job)
 {
-	struct v3d_job *job = to_v3d_job(sched_job);
+	return container_of(sched_job, struct v3d_render_job, base.base);
+}
 
-	drm_sched_job_cleanup(sched_job);
+static struct v3d_tfu_job *
+to_tfu_job(struct drm_sched_job *sched_job)
+{
+	return container_of(sched_job, struct v3d_tfu_job, base.base);
+}
 
-	v3d_exec_put(job->exec);
+static struct v3d_csd_job *
+to_csd_job(struct drm_sched_job *sched_job)
+{
+	return container_of(sched_job, struct v3d_csd_job, base.base);
 }
 
 static void
-v3d_tfu_job_free(struct drm_sched_job *sched_job)
+v3d_job_free(struct drm_sched_job *sched_job)
 {
-	struct v3d_tfu_job *job = to_tfu_job(sched_job);
+	struct v3d_job *job = to_v3d_job(sched_job);
 
 	drm_sched_job_cleanup(sched_job);
-
-	v3d_tfu_job_put(job);
+	v3d_job_put(job);
 }
 
 /**
- * Returns the fences that the bin or render job depends on, one by one.
- * v3d_job_run() won't be called until all of them have been signaled.
+ * Returns the fences that the job depends on, one by one.
+ *
+ * If placed in the scheduler's .dependency method, the corresponding
+ * .run_job won't be called until all of them have been signaled.
  */
 static struct dma_fence *
 v3d_job_dependency(struct drm_sched_job *sched_job,
 		   struct drm_sched_entity *s_entity)
 {
 	struct v3d_job *job = to_v3d_job(sched_job);
-	struct v3d_exec_info *exec = job->exec;
-	enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
-	struct dma_fence *fence;
-
-	fence = job->in_fence;
-	if (fence) {
-		job->in_fence = NULL;
-		return fence;
-	}
-
-	if (q == V3D_RENDER) {
-		/* If we had a bin job, the render job definitely depends on
-		 * it. We first have to wait for bin to be scheduled, so that
-		 * its done_fence is created.
-		 */
-		fence = exec->bin_done_fence;
-		if (fence) {
-			exec->bin_done_fence = NULL;
-			return fence;
-		}
-	}
 
 	/* XXX: Wait on a fence for switching the GMP if necessary,
 	 * and then do so.
 	 */
 
-	return fence;
-}
-
-/**
- * Returns the fences that the TFU job depends on, one by one.
- * v3d_tfu_job_run() won't be called until all of them have been
- * signaled.
- */
-static struct dma_fence *
-v3d_tfu_job_dependency(struct drm_sched_job *sched_job,
-		       struct drm_sched_entity *s_entity)
-{
-	struct v3d_tfu_job *job = to_tfu_job(sched_job);
-	struct dma_fence *fence;
-
-	fence = job->in_fence;
-	if (fence) {
-		job->in_fence = NULL;
-		return fence;
-	}
+	if (!xa_empty(&job->deps))
+		return xa_erase(&job->deps, job->last_dep++);
 
 	return NULL;
 }
 
-static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job)
+static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
 {
-	struct v3d_job *job = to_v3d_job(sched_job);
-	struct v3d_exec_info *exec = job->exec;
-	enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
-	struct v3d_dev *v3d = exec->v3d;
+	struct v3d_bin_job *job = to_bin_job(sched_job);
+	struct v3d_dev *v3d = job->base.v3d;
 	struct drm_device *dev = &v3d->drm;
 	struct dma_fence *fence;
 	unsigned long irqflags;
 
-	if (unlikely(job->base.s_fence->finished.error))
+	if (unlikely(job->base.base.s_fence->finished.error))
 		return NULL;
 
 	/* Lock required around bin_job update vs
 	 * v3d_overflow_mem_work().
 	 */
 	spin_lock_irqsave(&v3d->job_lock, irqflags);
-	if (q == V3D_BIN) {
-		v3d->bin_job = job->exec;
-
-		/* Clear out the overflow allocation, so we don't
-		 * reuse the overflow attached to a previous job.
-		 */
-		V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
-	} else {
-		v3d->render_job = job->exec;
-	}
+	v3d->bin_job = job;
+	/* Clear out the overflow allocation, so we don't
+	 * reuse the overflow attached to a previous job.
+	 */
+	V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
 	spin_unlock_irqrestore(&v3d->job_lock, irqflags);
 
-	/* Can we avoid this flush when q==RENDER?  We need to be
-	 * careful of scheduling, though -- imagine job0 rendering to
-	 * texture and job1 reading, and them being executed as bin0,
-	 * bin1, render0, render1, so that render1's flush at bin time
+	v3d_invalidate_caches(v3d);
+
+	fence = v3d_fence_create(v3d, V3D_BIN);
+	if (IS_ERR(fence))
+		return NULL;
+
+	if (job->base.irq_fence)
+		dma_fence_put(job->base.irq_fence);
+	job->base.irq_fence = dma_fence_get(fence);
+
+	trace_v3d_submit_cl(dev, false, to_v3d_fence(fence)->seqno,
+			    job->start, job->end);
+
+	/* Set the current and end address of the control list.
+	 * Writing the end register is what starts the job.
+	 */
+	if (job->qma) {
+		V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, job->qma);
+		V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, job->qms);
+	}
+	if (job->qts) {
+		V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
+			       V3D_CLE_CT0QTS_ENABLE |
+			       job->qts);
+	}
+	V3D_CORE_WRITE(0, V3D_CLE_CT0QBA, job->start);
+	V3D_CORE_WRITE(0, V3D_CLE_CT0QEA, job->end);
+
+	return fence;
+}
+
+static struct dma_fence *v3d_render_job_run(struct drm_sched_job *sched_job)
+{
+	struct v3d_render_job *job = to_render_job(sched_job);
+	struct v3d_dev *v3d = job->base.v3d;
+	struct drm_device *dev = &v3d->drm;
+	struct dma_fence *fence;
+
+	if (unlikely(job->base.base.s_fence->finished.error))
+		return NULL;
+
+	v3d->render_job = job;
+
+	/* Can we avoid this flush?  We need to be careful of
+	 * scheduling, though -- imagine job0 rendering to texture and
+	 * job1 reading, and them being executed as bin0, bin1,
+	 * render0, render1, so that render1's flush at bin time
 	 * wasn't enough.
 	 */
 	v3d_invalidate_caches(v3d);
 
-	fence = v3d_fence_create(v3d, q);
+	fence = v3d_fence_create(v3d, V3D_RENDER);
 	if (IS_ERR(fence))
 		return NULL;
 
-	if (job->irq_fence)
-		dma_fence_put(job->irq_fence);
-	job->irq_fence = dma_fence_get(fence);
+	if (job->base.irq_fence)
+		dma_fence_put(job->base.irq_fence);
+	job->base.irq_fence = dma_fence_get(fence);
 
-	trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno,
+	trace_v3d_submit_cl(dev, true, to_v3d_fence(fence)->seqno,
 			    job->start, job->end);
 
-	if (q == V3D_BIN) {
-		if (exec->qma) {
-			V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma);
-			V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms);
-		}
-		if (exec->qts) {
-			V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
-				       V3D_CLE_CT0QTS_ENABLE |
-				       exec->qts);
-		}
-	} else {
-		/* XXX: Set the QCFG */
-	}
+	/* XXX: Set the QCFG */
 
 	/* Set the current and end address of the control list.
 	 * Writing the end register is what starts the job.
 	 */
-	V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start);
-	V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end);
+	V3D_CORE_WRITE(0, V3D_CLE_CT1QBA, job->start);
+	V3D_CORE_WRITE(0, V3D_CLE_CT1QEA, job->end);
 
 	return fence;
 }
@@ -190,7 +184,7 @@ static struct dma_fence *
 v3d_tfu_job_run(struct drm_sched_job *sched_job)
 {
 	struct v3d_tfu_job *job = to_tfu_job(sched_job);
-	struct v3d_dev *v3d = job->v3d;
+	struct v3d_dev *v3d = job->base.v3d;
 	struct drm_device *dev = &v3d->drm;
 	struct dma_fence *fence;
 
@@ -199,9 +193,9 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
 		return NULL;
 
 	v3d->tfu_job = job;
-	if (job->irq_fence)
-		dma_fence_put(job->irq_fence);
-	job->irq_fence = dma_fence_get(fence);
+	if (job->base.irq_fence)
+		dma_fence_put(job->base.irq_fence);
+	job->base.irq_fence = dma_fence_get(fence);
 
 	trace_v3d_submit_tfu(dev, to_v3d_fence(fence)->seqno);
 
@@ -223,6 +217,48 @@ v3d_tfu_job_run(struct drm_sched_job *sched_job)
 	return fence;
 }
 
+static struct dma_fence *
+v3d_csd_job_run(struct drm_sched_job *sched_job)
+{
+	struct v3d_csd_job *job = to_csd_job(sched_job);
+	struct v3d_dev *v3d = job->base.v3d;
+	struct drm_device *dev = &v3d->drm;
+	struct dma_fence *fence;
+	int i;
+
+	v3d->csd_job = job;
+
+	v3d_invalidate_caches(v3d);
+
+	fence = v3d_fence_create(v3d, V3D_CSD);
+	if (IS_ERR(fence))
+		return NULL;
+
+	if (job->base.irq_fence)
+		dma_fence_put(job->base.irq_fence);
+	job->base.irq_fence = dma_fence_get(fence);
+
+	trace_v3d_submit_csd(dev, to_v3d_fence(fence)->seqno);
+
+	for (i = 1; i <= 6; i++)
+		V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0 + 4 * i, job->args.cfg[i]);
+	/* CFG0 write kicks off the job. */
+	V3D_CORE_WRITE(0, V3D_CSD_QUEUED_CFG0, job->args.cfg[0]);
+
+	return fence;
+}
+
+static struct dma_fence *
+v3d_cache_clean_job_run(struct drm_sched_job *sched_job)
+{
+	struct v3d_job *job = to_v3d_job(sched_job);
+	struct v3d_dev *v3d = job->v3d;
+
+	v3d_clean_caches(v3d);
+
+	return NULL;
+}
+
 static void
 v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
 {
@@ -232,7 +268,7 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
 
 	/* block scheduler */
 	for (q = 0; q < V3D_MAX_QUEUES; q++)
-		drm_sched_stop(&v3d->queue[q].sched);
+		drm_sched_stop(&v3d->queue[q].sched, sched_job);
 
 	if (sched_job)
 		drm_sched_increase_karma(sched_job);
@@ -251,25 +287,23 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
 	mutex_unlock(&v3d->reset_lock);
 }
 
+/* If the current address or return address have changed, then the GPU
+ * has probably made progress and we should delay the reset.  This
+ * could fail if the GPU got in an infinite loop in the CL, but that
+ * is pretty unlikely outside of an i-g-t testcase.
+ */
 static void
-v3d_job_timedout(struct drm_sched_job *sched_job)
+v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
+		    u32 *timedout_ctca, u32 *timedout_ctra)
 {
 	struct v3d_job *job = to_v3d_job(sched_job);
-	struct v3d_exec_info *exec = job->exec;
-	struct v3d_dev *v3d = exec->v3d;
-	enum v3d_queue job_q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
-	u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(job_q));
-	u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(job_q));
-
-	/* If the current address or return address have changed, then
-	 * the GPU has probably made progress and we should delay the
-	 * reset.  This could fail if the GPU got in an infinite loop
-	 * in the CL, but that is pretty unlikely outside of an i-g-t
-	 * testcase.
-	 */
-	if (job->timedout_ctca != ctca || job->timedout_ctra != ctra) {
-		job->timedout_ctca = ctca;
-		job->timedout_ctra = ctra;
+	struct v3d_dev *v3d = job->v3d;
+	u32 ctca = V3D_CORE_READ(0, V3D_CLE_CTNCA(q));
+	u32 ctra = V3D_CORE_READ(0, V3D_CLE_CTNRA(q));
+
+	if (*timedout_ctca != ctca || *timedout_ctra != ctra) {
+		*timedout_ctca = ctca;
+		*timedout_ctra = ctra;
 		return;
 	}
 
@@ -277,25 +311,82 @@ v3d_job_timedout(struct drm_sched_job *sched_job)
 }
 
 static void
-v3d_tfu_job_timedout(struct drm_sched_job *sched_job)
+v3d_bin_job_timedout(struct drm_sched_job *sched_job)
 {
-	struct v3d_tfu_job *job = to_tfu_job(sched_job);
+	struct v3d_bin_job *job = to_bin_job(sched_job);
+
+	v3d_cl_job_timedout(sched_job, V3D_BIN,
+			    &job->timedout_ctca, &job->timedout_ctra);
+}
+
+static void
+v3d_render_job_timedout(struct drm_sched_job *sched_job)
+{
+	struct v3d_render_job *job = to_render_job(sched_job);
+
+	v3d_cl_job_timedout(sched_job, V3D_RENDER,
+			    &job->timedout_ctca, &job->timedout_ctra);
+}
+
+static void
+v3d_generic_job_timedout(struct drm_sched_job *sched_job)
+{
+	struct v3d_job *job = to_v3d_job(sched_job);
 
 	v3d_gpu_reset_for_timeout(job->v3d, sched_job);
 }
 
-static const struct drm_sched_backend_ops v3d_sched_ops = {
+static void
+v3d_csd_job_timedout(struct drm_sched_job *sched_job)
+{
+	struct v3d_csd_job *job = to_csd_job(sched_job);
+	struct v3d_dev *v3d = job->base.v3d;
+	u32 batches = V3D_CORE_READ(0, V3D_CSD_CURRENT_CFG4);
+
+	/* If we've made progress, skip reset and let the timer get
+	 * rearmed.
+	 */
+	if (job->timedout_batches != batches) {
+		job->timedout_batches = batches;
+		return;
+	}
+
+	v3d_gpu_reset_for_timeout(v3d, sched_job);
+}
+
+static const struct drm_sched_backend_ops v3d_bin_sched_ops = {
 	.dependency = v3d_job_dependency,
-	.run_job = v3d_job_run,
-	.timedout_job = v3d_job_timedout,
-	.free_job = v3d_job_free
+	.run_job = v3d_bin_job_run,
+	.timedout_job = v3d_bin_job_timedout,
+	.free_job = v3d_job_free,
+};
+
+static const struct drm_sched_backend_ops v3d_render_sched_ops = {
+	.dependency = v3d_job_dependency,
+	.run_job = v3d_render_job_run,
+	.timedout_job = v3d_render_job_timedout,
+	.free_job = v3d_job_free,
 };
 
 static const struct drm_sched_backend_ops v3d_tfu_sched_ops = {
-	.dependency = v3d_tfu_job_dependency,
+	.dependency = v3d_job_dependency,
 	.run_job = v3d_tfu_job_run,
-	.timedout_job = v3d_tfu_job_timedout,
-	.free_job = v3d_tfu_job_free
+	.timedout_job = v3d_generic_job_timedout,
+	.free_job = v3d_job_free,
+};
+
+static const struct drm_sched_backend_ops v3d_csd_sched_ops = {
+	.dependency = v3d_job_dependency,
+	.run_job = v3d_csd_job_run,
+	.timedout_job = v3d_csd_job_timedout,
+	.free_job = v3d_job_free
+};
+
+static const struct drm_sched_backend_ops v3d_cache_clean_sched_ops = {
+	.dependency = v3d_job_dependency,
+	.run_job = v3d_cache_clean_job_run,
+	.timedout_job = v3d_generic_job_timedout,
+	.free_job = v3d_job_free
 };
 
 int
@@ -307,7 +398,7 @@ v3d_sched_init(struct v3d_dev *v3d)
 	int ret;
 
 	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
-			     &v3d_sched_ops,
+			     &v3d_bin_sched_ops,
 			     hw_jobs_limit, job_hang_limit,
 			     msecs_to_jiffies(hang_limit_ms),
 			     "v3d_bin");
@@ -317,14 +408,14 @@ v3d_sched_init(struct v3d_dev *v3d)
 	}
 
 	ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
-			     &v3d_sched_ops,
+			     &v3d_render_sched_ops,
 			     hw_jobs_limit, job_hang_limit,
 			     msecs_to_jiffies(hang_limit_ms),
 			     "v3d_render");
 	if (ret) {
 		dev_err(v3d->dev, "Failed to create render scheduler: %d.",
 			ret);
-		drm_sched_fini(&v3d->queue[V3D_BIN].sched);
+		v3d_sched_fini(v3d);
 		return ret;
 	}
 
@@ -336,11 +427,36 @@ v3d_sched_init(struct v3d_dev *v3d)
 	if (ret) {
 		dev_err(v3d->dev, "Failed to create TFU scheduler: %d.",
 			ret);
-		drm_sched_fini(&v3d->queue[V3D_RENDER].sched);
-		drm_sched_fini(&v3d->queue[V3D_BIN].sched);
+		v3d_sched_fini(v3d);
 		return ret;
 	}
 
+	if (v3d_has_csd(v3d)) {
+		ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
+				     &v3d_csd_sched_ops,
+				     hw_jobs_limit, job_hang_limit,
+				     msecs_to_jiffies(hang_limit_ms),
+				     "v3d_csd");
+		if (ret) {
+			dev_err(v3d->dev, "Failed to create CSD scheduler: %d.",
+				ret);
+			v3d_sched_fini(v3d);
+			return ret;
+		}
+
+		ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
+				     &v3d_cache_clean_sched_ops,
+				     hw_jobs_limit, job_hang_limit,
+				     msecs_to_jiffies(hang_limit_ms),
+				     "v3d_cache_clean");
+		if (ret) {
+			dev_err(v3d->dev, "Failed to create CACHE_CLEAN scheduler: %d.",
+				ret);
+			v3d_sched_fini(v3d);
+			return ret;
+		}
+	}
+
 	return 0;
 }
 
@@ -349,6 +465,8 @@ v3d_sched_fini(struct v3d_dev *v3d)
 {
 	enum v3d_queue q;
 
-	for (q = 0; q < V3D_MAX_QUEUES; q++)
-		drm_sched_fini(&v3d->queue[q].sched);
+	for (q = 0; q < V3D_MAX_QUEUES; q++) {
+		if (v3d->queue[q].sched.ready)
+			drm_sched_fini(&v3d->queue[q].sched);
+	}
 }
diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h
index edd984afa33f..7aa8dc356e54 100644
--- a/drivers/gpu/drm/v3d/v3d_trace.h
+++ b/drivers/gpu/drm/v3d/v3d_trace.h
@@ -124,6 +124,26 @@ TRACE_EVENT(v3d_tfu_irq,
 		      __entry->seqno)
 );
 
+TRACE_EVENT(v3d_csd_irq,
+	    TP_PROTO(struct drm_device *dev,
+		     uint64_t seqno),
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u64, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%u, seqno=%llu",
+		      __entry->dev,
+		      __entry->seqno)
+);
+
 TRACE_EVENT(v3d_submit_tfu_ioctl,
 	    TP_PROTO(struct drm_device *dev, u32 iia),
 	    TP_ARGS(dev, iia),
@@ -163,6 +183,80 @@ TRACE_EVENT(v3d_submit_tfu,
 		      __entry->seqno)
 );
 
+TRACE_EVENT(v3d_submit_csd_ioctl,
+	    TP_PROTO(struct drm_device *dev, u32 cfg5, u32 cfg6),
+	    TP_ARGS(dev, cfg5, cfg6),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u32, cfg5)
+			     __field(u32, cfg6)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->cfg5 = cfg5;
+			   __entry->cfg6 = cfg6;
+			   ),
+
+	    TP_printk("dev=%u, CFG5 0x%08x, CFG6 0x%08x",
+		      __entry->dev,
+		      __entry->cfg5,
+		      __entry->cfg6)
+);
+
+TRACE_EVENT(v3d_submit_csd,
+	    TP_PROTO(struct drm_device *dev,
+		     uint64_t seqno),
+	    TP_ARGS(dev, seqno),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     __field(u64, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   __entry->seqno = seqno;
+			   ),
+
+	    TP_printk("dev=%u, seqno=%llu",
+		      __entry->dev,
+		      __entry->seqno)
+);
+
+TRACE_EVENT(v3d_cache_clean_begin,
+	    TP_PROTO(struct drm_device *dev),
+	    TP_ARGS(dev),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   ),
+
+	    TP_printk("dev=%u",
+		      __entry->dev)
+);
+
+TRACE_EVENT(v3d_cache_clean_end,
+	    TP_PROTO(struct drm_device *dev),
+	    TP_ARGS(dev),
+
+	    TP_STRUCT__entry(
+			     __field(u32, dev)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->dev = dev->primary->index;
+			   ),
+
+	    TP_printk("dev=%u",
+		      __entry->dev)
+);
+
 TRACE_EVENT(v3d_reset_begin,
 	    TP_PROTO(struct drm_device *dev),
 	    TP_ARGS(dev),
diff --git a/drivers/gpu/drm/virtio/Makefile b/drivers/gpu/drm/virtio/Makefile
index 4e90cc8fa651..42949a17ff70 100644
--- a/drivers/gpu/drm/virtio/Makefile
+++ b/drivers/gpu/drm/virtio/Makefile
@@ -6,6 +6,6 @@
 virtio-gpu-y := virtgpu_drv.o virtgpu_kms.o virtgpu_gem.o \
 	virtgpu_fb.o virtgpu_display.o virtgpu_vq.o virtgpu_ttm.o \
 	virtgpu_fence.o virtgpu_object.o virtgpu_debugfs.o virtgpu_plane.o \
-	virtgpu_ioctl.o virtgpu_prime.o
+	virtgpu_ioctl.o virtgpu_prime.o virtgpu_trace_points.o
 
 obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio-gpu.o
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 491dec0712b3..2d3e5b1debb3 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -102,7 +102,6 @@ struct virtio_gpu_fence {
 	struct dma_fence f;
 	struct virtio_gpu_fence_driver *drv;
 	struct list_head node;
-	uint64_t seq;
 };
 #define to_virtio_fence(x) \
 	container_of(x, struct virtio_gpu_fence, f)
@@ -356,7 +355,7 @@ int virtio_gpu_mmap(struct file *filp, struct vm_area_struct *vma);
 bool virtio_fence_signaled(struct dma_fence *f);
 struct virtio_gpu_fence *virtio_gpu_fence_alloc(
 	struct virtio_gpu_device *vgdev);
-int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
+void virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
 			  struct virtio_gpu_ctrl_hdr *cmd_hdr,
 			  struct virtio_gpu_fence *fence);
 void virtio_gpu_fence_event_process(struct virtio_gpu_device *vdev,
diff --git a/drivers/gpu/drm/virtio/virtgpu_fence.c b/drivers/gpu/drm/virtio/virtgpu_fence.c
index 87d1966192f4..70d6c4329778 100644
--- a/drivers/gpu/drm/virtio/virtgpu_fence.c
+++ b/drivers/gpu/drm/virtio/virtgpu_fence.c
@@ -24,6 +24,7 @@
  */
 
 #include <drm/drmP.h>
+#include <trace/events/dma_fence.h>
 #include "virtgpu_drv.h"
 
 static const char *virtio_get_driver_name(struct dma_fence *f)
@@ -40,16 +41,14 @@ bool virtio_fence_signaled(struct dma_fence *f)
 {
 	struct virtio_gpu_fence *fence = to_virtio_fence(f);
 
-	if (atomic64_read(&fence->drv->last_seq) >= fence->seq)
+	if (atomic64_read(&fence->drv->last_seq) >= fence->f.seqno)
 		return true;
 	return false;
 }
 
 static void virtio_fence_value_str(struct dma_fence *f, char *str, int size)
 {
-	struct virtio_gpu_fence *fence = to_virtio_fence(f);
-
-	snprintf(str, size, "%llu", fence->seq);
+	snprintf(str, size, "%llu", f->seqno);
 }
 
 static void virtio_timeline_value_str(struct dma_fence *f, char *str, int size)
@@ -71,17 +70,22 @@ struct virtio_gpu_fence *virtio_gpu_fence_alloc(struct virtio_gpu_device *vgdev)
 {
 	struct virtio_gpu_fence_driver *drv = &vgdev->fence_drv;
 	struct virtio_gpu_fence *fence = kzalloc(sizeof(struct virtio_gpu_fence),
-							GFP_ATOMIC);
+							GFP_KERNEL);
 	if (!fence)
 		return fence;
 
 	fence->drv = drv;
+
+	/* This only partially initializes the fence because the seqno is
+	 * unknown yet.  The fence must not be used outside of the driver
+	 * until virtio_gpu_fence_emit is called.
+	 */
 	dma_fence_init(&fence->f, &virtio_fence_ops, &drv->lock, drv->context, 0);
 
 	return fence;
 }
 
-int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
+void virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
 			  struct virtio_gpu_ctrl_hdr *cmd_hdr,
 			  struct virtio_gpu_fence *fence)
 {
@@ -89,14 +93,15 @@ int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
 	unsigned long irq_flags;
 
 	spin_lock_irqsave(&drv->lock, irq_flags);
-	fence->seq = ++drv->sync_seq;
+	fence->f.seqno = ++drv->sync_seq;
 	dma_fence_get(&fence->f);
 	list_add_tail(&fence->node, &drv->fences);
 	spin_unlock_irqrestore(&drv->lock, irq_flags);
 
+	trace_dma_fence_emit(&fence->f);
+
 	cmd_hdr->flags |= cpu_to_le32(VIRTIO_GPU_FLAG_FENCE);
-	cmd_hdr->fence_id = cpu_to_le64(fence->seq);
-	return 0;
+	cmd_hdr->fence_id = cpu_to_le64(fence->f.seqno);
 }
 
 void virtio_gpu_fence_event_process(struct virtio_gpu_device *vgdev,
@@ -109,7 +114,7 @@ void virtio_gpu_fence_event_process(struct virtio_gpu_device *vgdev,
 	spin_lock_irqsave(&drv->lock, irq_flags);
 	atomic64_set(&vgdev->fence_drv.last_seq, last_seq);
 	list_for_each_entry_safe(fence, tmp, &drv->fences, node) {
-		if (last_seq < fence->seq)
+		if (last_seq < fence->f.seqno)
 			continue;
 		dma_fence_signal_locked(&fence->f);
 		list_del(&fence->node);
diff --git a/drivers/gpu/drm/virtio/virtgpu_trace.h b/drivers/gpu/drm/virtio/virtgpu_trace.h
new file mode 100644
index 000000000000..711ecc2bd241
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_trace.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(_VIRTGPU_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _VIRTGPU_TRACE_H_
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM virtio_gpu
+#define TRACE_INCLUDE_FILE virtgpu_trace
+
+DECLARE_EVENT_CLASS(virtio_gpu_cmd,
+	TP_PROTO(struct virtqueue *vq, struct virtio_gpu_ctrl_hdr *hdr),
+	TP_ARGS(vq, hdr),
+	TP_STRUCT__entry(
+			 __field(int, dev)
+			 __field(unsigned int, vq)
+			 __field(const char *, name)
+			 __field(u32, type)
+			 __field(u32, flags)
+			 __field(u64, fence_id)
+			 __field(u32, ctx_id)
+			 ),
+	TP_fast_assign(
+		       __entry->dev = vq->vdev->index;
+		       __entry->vq = vq->index;
+		       __entry->name = vq->name;
+		       __entry->type = le32_to_cpu(hdr->type);
+		       __entry->flags = le32_to_cpu(hdr->flags);
+		       __entry->fence_id = le64_to_cpu(hdr->fence_id);
+		       __entry->ctx_id = le32_to_cpu(hdr->ctx_id);
+		       ),
+	TP_printk("vdev=%d vq=%u name=%s type=0x%x flags=0x%x fence_id=%llu ctx_id=%u",
+		  __entry->dev, __entry->vq, __entry->name,
+		  __entry->type, __entry->flags, __entry->fence_id,
+		  __entry->ctx_id)
+);
+
+DEFINE_EVENT(virtio_gpu_cmd, virtio_gpu_cmd_queue,
+	TP_PROTO(struct virtqueue *vq, struct virtio_gpu_ctrl_hdr *hdr),
+	TP_ARGS(vq, hdr)
+);
+
+DEFINE_EVENT(virtio_gpu_cmd, virtio_gpu_cmd_response,
+	TP_PROTO(struct virtqueue *vq, struct virtio_gpu_ctrl_hdr *hdr),
+	TP_ARGS(vq, hdr)
+);
+
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/virtio
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/virtio/virtgpu_trace_points.c b/drivers/gpu/drm/virtio/virtgpu_trace_points.c
new file mode 100644
index 000000000000..1970cb6f24ef
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_trace_points.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "virtgpu_drv.h"
+
+#define CREATE_TRACE_POINTS
+#include "virtgpu_trace.h"
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
index e62fe24b1a2e..2c5eeccb88c0 100644
--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -28,6 +28,7 @@
 
 #include <drm/drmP.h>
 #include "virtgpu_drv.h"
+#include "virtgpu_trace.h"
 #include <linux/virtio.h>
 #include <linux/virtio_config.h>
 #include <linux/virtio_ring.h>
@@ -192,6 +193,9 @@ void virtio_gpu_dequeue_ctrl_func(struct work_struct *work)
 
 	list_for_each_entry_safe(entry, tmp, &reclaim_list, list) {
 		resp = (struct virtio_gpu_ctrl_hdr *)entry->resp_buf;
+
+		trace_virtio_gpu_cmd_response(vgdev->ctrlq.vq, resp);
+
 		if (resp->type != cpu_to_le32(VIRTIO_GPU_RESP_OK_NODATA)) {
 			if (resp->type >= cpu_to_le32(VIRTIO_GPU_RESP_ERR_UNSPEC)) {
 				struct virtio_gpu_ctrl_hdr *cmd;
@@ -284,6 +288,9 @@ retry:
 		spin_lock(&vgdev->ctrlq.qlock);
 		goto retry;
 	} else {
+		trace_virtio_gpu_cmd_queue(vq,
+			(struct virtio_gpu_ctrl_hdr *)vbuf->buf);
+
 		virtqueue_kick(vq);
 	}
 
@@ -359,6 +366,9 @@ retry:
 		spin_lock(&vgdev->cursorq.qlock);
 		goto retry;
 	} else {
+		trace_virtio_gpu_cmd_queue(vq,
+			(struct virtio_gpu_ctrl_hdr *)vbuf->buf);
+
 		virtqueue_kick(vq);
 	}
 
diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c
index bb66dbcd5e3f..7508815fac11 100644
--- a/drivers/gpu/drm/vkms/vkms_crtc.c
+++ b/drivers/gpu/drm/vkms/vkms_crtc.c
@@ -83,26 +83,6 @@ bool vkms_get_vblank_timestamp(struct drm_device *dev, unsigned int pipe,
 	return true;
 }
 
-static void vkms_atomic_crtc_reset(struct drm_crtc *crtc)
-{
-	struct vkms_crtc_state *vkms_state = NULL;
-
-	if (crtc->state) {
-		vkms_state = to_vkms_crtc_state(crtc->state);
-		__drm_atomic_helper_crtc_destroy_state(crtc->state);
-		kfree(vkms_state);
-		crtc->state = NULL;
-	}
-
-	vkms_state = kzalloc(sizeof(*vkms_state), GFP_KERNEL);
-	if (!vkms_state)
-		return;
-	INIT_WORK(&vkms_state->crc_work, vkms_crc_work_handle);
-
-	crtc->state = &vkms_state->base;
-	crtc->state->crtc = crtc;
-}
-
 static struct drm_crtc_state *
 vkms_atomic_crtc_duplicate_state(struct drm_crtc *crtc)
 {
@@ -135,6 +115,19 @@ static void vkms_atomic_crtc_destroy_state(struct drm_crtc *crtc,
 	}
 }
 
+static void vkms_atomic_crtc_reset(struct drm_crtc *crtc)
+{
+	struct vkms_crtc_state *vkms_state =
+		kzalloc(sizeof(*vkms_state), GFP_KERNEL);
+
+	if (crtc->state)
+		vkms_atomic_crtc_destroy_state(crtc, crtc->state);
+
+	__drm_atomic_helper_crtc_reset(crtc, &vkms_state->base);
+	if (vkms_state)
+		INIT_WORK(&vkms_state->crc_work, vkms_crc_work_handle);
+}
+
 static const struct drm_crtc_funcs vkms_crtc_funcs = {
 	.set_config             = drm_atomic_helper_set_config,
 	.destroy                = drm_crtc_cleanup,