19 files changed, 261 insertions, 468 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5c400f4b87fd..de9312c02185 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -405,7 +405,6 @@ struct amdgpu_fence_driver {
 /* some special values for the owner field */
 #define AMDGPU_FENCE_OWNER_UNDEFINED	((void*)0ul)
 #define AMDGPU_FENCE_OWNER_VM		((void*)1ul)
-#define AMDGPU_FENCE_OWNER_MOVE		((void*)2ul)
 
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
@@ -447,57 +446,11 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
 int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
 unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
 
-signed long amdgpu_fence_wait_any(struct fence **array,
-				  uint32_t count,
-				  bool intr,
-				  signed long t);
-struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence);
-void amdgpu_fence_unref(struct amdgpu_fence **fence);
-
 bool amdgpu_fence_need_sync(struct amdgpu_fence *fence,
 			    struct amdgpu_ring *ring);
 void amdgpu_fence_note_sync(struct amdgpu_fence *fence,
 			    struct amdgpu_ring *ring);
 
-static inline struct amdgpu_fence *amdgpu_fence_later(struct amdgpu_fence *a,
-						      struct amdgpu_fence *b)
-{
-	if (!a) {
-		return b;
-	}
-
-	if (!b) {
-		return a;
-	}
-
-	BUG_ON(a->ring != b->ring);
-
-	if (a->seq > b->seq) {
-		return a;
-	} else {
-		return b;
-	}
-}
-
-static inline bool amdgpu_fence_is_earlier(struct amdgpu_fence *a,
-					   struct amdgpu_fence *b)
-{
-	if (!a) {
-		return false;
-	}
-
-	if (!b) {
-		return true;
-	}
-
-	BUG_ON(a->ring != b->ring);
-
-	return a->seq < b->seq;
-}
-
-int amdgpu_user_fence_emit(struct amdgpu_ring *ring, struct amdgpu_user_fence *user,
-			   void *owner, struct amdgpu_fence **fence);
-
 /*
  * TTM.
  */
@@ -708,7 +661,7 @@ void amdgpu_semaphore_free(struct amdgpu_device *adev,
  */
 struct amdgpu_sync {
 	struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS];
-	struct amdgpu_fence	*sync_to[AMDGPU_MAX_RINGS];
+	struct fence		*sync_to[AMDGPU_MAX_RINGS];
 	DECLARE_HASHTABLE(fences, 4);
 	struct fence	        *last_vm_update;
 };
@@ -974,7 +927,7 @@ struct amdgpu_vm_id {
 	/* last flushed PD/PT update */
 	struct fence	        *flushed_updates;
 	/* last use of vmid */
-	struct amdgpu_fence	*last_id_use;
+	struct fence		*last_id_use;
 };
 
 struct amdgpu_vm {
@@ -1007,7 +960,7 @@ struct amdgpu_vm {
 };
 
 struct amdgpu_vm_manager {
-	struct amdgpu_fence		*active[AMDGPU_NUM_VM];
+	struct fence			*active[AMDGPU_NUM_VM];
 	uint32_t			max_pfn;
 	/* number of VMIDs */
 	unsigned			nvm;
@@ -1235,6 +1188,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     struct amdgpu_irq_src *irq_src, unsigned irq_type,
 		     enum amdgpu_ring_type ring_type);
 void amdgpu_ring_fini(struct amdgpu_ring *ring);
+struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f);
 
 /*
  * CS.
@@ -1758,11 +1712,11 @@ void amdgpu_test_syncing(struct amdgpu_device *adev);
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 void amdgpu_mn_unregister(struct amdgpu_bo *bo);
 #else
-static int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
+static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 {
 	return -ENODEV;
 }
-static void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
+static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
 #endif
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 27ef52847e6d..dfc4d02c7a38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -876,8 +876,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		struct amdgpu_job *job;
 		struct amdgpu_ring * ring =  parser->ibs->ring;
 		job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
-		if (!job)
-			return -ENOMEM;
+		if (!job) {
+			r = -ENOMEM;
+			goto out;
+		}
 		job->base.sched = &ring->sched;
 		job->base.s_entity = &parser->ctx->rings[ring->idx].entity;
 		job->adev = parser->adev;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index e0b80ccdfe8a..fec65f01c031 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -69,6 +69,9 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 	struct amdgpu_device *adev = ctx->adev;
 	unsigned i, j;
 
+	if (!adev)
+		return;
+
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j)
 			fence_put(ctx->rings[i].fences[j]);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 96290d9cddca..093a8c618931 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -207,6 +207,7 @@ static int amdgpufb_create(struct drm_fb_helper *helper,
 	}
 
 	info->par = rfbdev;
+	info->skip_vt_switch = true;
 
 	ret = amdgpu_framebuffer_init(adev->ddev, &rfbdev->rfb, &mode_cmd, gobj);
 	if (ret) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 003a219943f1..257d72205bb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -137,42 +137,6 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
 }
 
 /**
- * amdgpu_fence_check_signaled - callback from fence_queue
- *
- * this function is called with fence_queue lock held, which is also used
- * for the fence locking itself, so unlocked variants are used for
- * fence_signal, and remove_wait_queue.
- */
-static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
-{
-	struct amdgpu_fence *fence;
-	struct amdgpu_device *adev;
-	u64 seq;
-	int ret;
-
-	fence = container_of(wait, struct amdgpu_fence, fence_wake);
-	adev = fence->ring->adev;
-
-	/*
-	 * We cannot use amdgpu_fence_process here because we're already
-	 * in the waitqueue, in a call from wake_up_all.
-	 */
-	seq = atomic64_read(&fence->ring->fence_drv.last_seq);
-	if (seq >= fence->seq) {
-		ret = fence_signal_locked(&fence->base);
-		if (!ret)
-			FENCE_TRACE(&fence->base, "signaled from irq context\n");
-		else
-			FENCE_TRACE(&fence->base, "was already signaled\n");
-
-		__remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
-		fence_put(&fence->base);
-	} else
-		FENCE_TRACE(&fence->base, "pending\n");
-	return 0;
-}
-
-/**
  * amdgpu_fence_activity - check for fence activity
  *
  * @ring: pointer to struct amdgpu_ring
@@ -305,47 +269,6 @@ static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
 	return false;
 }
 
-static bool amdgpu_fence_is_signaled(struct fence *f)
-{
-	struct amdgpu_fence *fence = to_amdgpu_fence(f);
-	struct amdgpu_ring *ring = fence->ring;
-
-	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
-		return true;
-
-	amdgpu_fence_process(ring);
-
-	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
-		return true;
-
-	return false;
-}
-
-/**
- * amdgpu_fence_enable_signaling - enable signalling on fence
- * @fence: fence
- *
- * This function is called with fence_queue lock held, and adds a callback
- * to fence_queue that checks if this fence is signaled, and if so it
- * signals the fence and removes itself.
- */
-static bool amdgpu_fence_enable_signaling(struct fence *f)
-{
-	struct amdgpu_fence *fence = to_amdgpu_fence(f);
-	struct amdgpu_ring *ring = fence->ring;
-
-	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
-		return false;
-
-	fence->fence_wake.flags = 0;
-	fence->fence_wake.private = NULL;
-	fence->fence_wake.func = amdgpu_fence_check_signaled;
-	__add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
-	fence_get(f);
-	FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
-	return true;
-}
-
 /*
  * amdgpu_ring_wait_seq_timeout - wait for seq of the specific ring to signal
  * @ring: ring to wait on for the seq number
@@ -367,6 +290,7 @@ static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
 	if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
 		return 0;
 
+	amdgpu_fence_schedule_check(ring);
 	wait_event(ring->fence_drv.fence_queue, (
 		   (signaled = amdgpu_fence_seq_signaled(ring, seq))));
 
@@ -417,36 +341,6 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
 }
 
 /**
- * amdgpu_fence_ref - take a ref on a fence
- *
- * @fence: amdgpu fence object
- *
- * Take a reference on a fence (all asics).
- * Returns the fence.
- */
-struct amdgpu_fence *amdgpu_fence_ref(struct amdgpu_fence *fence)
-{
-	fence_get(&fence->base);
-	return fence;
-}
-
-/**
- * amdgpu_fence_unref - remove a ref on a fence
- *
- * @fence: amdgpu fence object
- *
- * Remove a reference on a fence (all asics).
- */
-void amdgpu_fence_unref(struct amdgpu_fence **fence)
-{
-	struct amdgpu_fence *tmp = *fence;
-
-	*fence = NULL;
-	if (tmp)
-		fence_put(&tmp->base);
-}
-
-/**
  * amdgpu_fence_count_emitted - get the count of emitted fences
  *
  * @ring: ring the fence is associated with
@@ -761,6 +655,115 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
 	}
 }
 
+/*
+ * Common fence implementation
+ */
+
+static const char *amdgpu_fence_get_driver_name(struct fence *fence)
+{
+	return "amdgpu";
+}
+
+static const char *amdgpu_fence_get_timeline_name(struct fence *f)
+{
+	struct amdgpu_fence *fence = to_amdgpu_fence(f);
+	return (const char *)fence->ring->name;
+}
+
+/**
+ * amdgpu_fence_is_signaled - test if fence is signaled
+ *
+ * @f: fence to test
+ *
+ * Test the fence sequence number if it is already signaled. If it isn't
+ * signaled start fence processing. Returns True if the fence is signaled.
+ */
+static bool amdgpu_fence_is_signaled(struct fence *f)
+{
+	struct amdgpu_fence *fence = to_amdgpu_fence(f);
+	struct amdgpu_ring *ring = fence->ring;
+
+	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
+		return true;
+
+	amdgpu_fence_process(ring);
+
+	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
+		return true;
+
+	return false;
+}
+
+/**
+ * amdgpu_fence_check_signaled - callback from fence_queue
+ *
+ * this function is called with fence_queue lock held, which is also used
+ * for the fence locking itself, so unlocked variants are used for
+ * fence_signal, and remove_wait_queue.
+ */
+static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+{
+	struct amdgpu_fence *fence;
+	struct amdgpu_device *adev;
+	u64 seq;
+	int ret;
+
+	fence = container_of(wait, struct amdgpu_fence, fence_wake);
+	adev = fence->ring->adev;
+
+	/*
+	 * We cannot use amdgpu_fence_process here because we're already
+	 * in the waitqueue, in a call from wake_up_all.
+	 */
+	seq = atomic64_read(&fence->ring->fence_drv.last_seq);
+	if (seq >= fence->seq) {
+		ret = fence_signal_locked(&fence->base);
+		if (!ret)
+			FENCE_TRACE(&fence->base, "signaled from irq context\n");
+		else
+			FENCE_TRACE(&fence->base, "was already signaled\n");
+
+		__remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
+		fence_put(&fence->base);
+	} else
+		FENCE_TRACE(&fence->base, "pending\n");
+	return 0;
+}
+
+/**
+ * amdgpu_fence_enable_signaling - enable signalling on fence
+ * @fence: fence
+ *
+ * This function is called with fence_queue lock held, and adds a callback
+ * to fence_queue that checks if this fence is signaled, and if so it
+ * signals the fence and removes itself.
+ */
+static bool amdgpu_fence_enable_signaling(struct fence *f)
+{
+	struct amdgpu_fence *fence = to_amdgpu_fence(f);
+	struct amdgpu_ring *ring = fence->ring;
+
+	if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
+		return false;
+
+	fence->fence_wake.flags = 0;
+	fence->fence_wake.private = NULL;
+	fence->fence_wake.func = amdgpu_fence_check_signaled;
+	__add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
+	fence_get(f);
+	amdgpu_fence_schedule_check(ring);
+	FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
+	return true;
+}
+
+const struct fence_ops amdgpu_fence_ops = {
+	.get_driver_name = amdgpu_fence_get_driver_name,
+	.get_timeline_name = amdgpu_fence_get_timeline_name,
+	.enable_signaling = amdgpu_fence_enable_signaling,
+	.signaled = amdgpu_fence_is_signaled,
+	.wait = fence_default_wait,
+	.release = NULL,
+};
 
 /*
  * Fence debugfs
@@ -811,131 +814,3 @@ int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
 #endif
 }
 
-static const char *amdgpu_fence_get_driver_name(struct fence *fence)
-{
-	return "amdgpu";
-}
-
-static const char *amdgpu_fence_get_timeline_name(struct fence *f)
-{
-	struct amdgpu_fence *fence = to_amdgpu_fence(f);
-	return (const char *)fence->ring->name;
-}
-
-static inline bool amdgpu_test_signaled(struct amdgpu_fence *fence)
-{
-	return test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags);
-}
-
-static bool amdgpu_test_signaled_any(struct fence **fences, uint32_t count)
-{
-	int idx;
-	struct fence *fence;
-
-	for (idx = 0; idx < count; ++idx) {
-		fence = fences[idx];
-		if (fence) {
-			if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
-				return true;
-		}
-	}
-	return false;
-}
-
-struct amdgpu_wait_cb {
-	struct fence_cb base;
-	struct task_struct *task;
-};
-
-static void amdgpu_fence_wait_cb(struct fence *fence, struct fence_cb *cb)
-{
-	struct amdgpu_wait_cb *wait =
-		container_of(cb, struct amdgpu_wait_cb, base);
-	wake_up_process(wait->task);
-}
-
-static signed long amdgpu_fence_default_wait(struct fence *f, bool intr,
-					     signed long t)
-{
-	return amdgpu_fence_wait_any(&f, 1, intr, t);
-}
-
-/**
- * Wait the fence array with timeout
- *
- * @array:    the fence array with amdgpu fence pointer
- * @count:    the number of the fence array
- * @intr:     when sleep, set the current task interruptable or not
- * @t:        timeout to wait
- *
- * It will return when any fence is signaled or timeout.
- */
-signed long amdgpu_fence_wait_any(struct fence **array, uint32_t count,
-				  bool intr, signed long t)
-{
-	struct amdgpu_wait_cb *cb;
-	struct fence *fence;
-	unsigned idx;
-
-	BUG_ON(!array);
-
-	cb = kcalloc(count, sizeof(struct amdgpu_wait_cb), GFP_KERNEL);
-	if (cb == NULL) {
-		t = -ENOMEM;
-		goto err_free_cb;
-	}
-
-	for (idx = 0; idx < count; ++idx) {
-		fence = array[idx];
-		if (fence) {
-			cb[idx].task = current;
-			if (fence_add_callback(fence,
-					&cb[idx].base, amdgpu_fence_wait_cb)) {
-				/* The fence is already signaled */
-				goto fence_rm_cb;
-			}
-		}
-	}
-
-	while (t > 0) {
-		if (intr)
-			set_current_state(TASK_INTERRUPTIBLE);
-		else
-			set_current_state(TASK_UNINTERRUPTIBLE);
-
-		/*
-		 * amdgpu_test_signaled_any must be called after
-		 * set_current_state to prevent a race with wake_up_process
-		 */
-		if (amdgpu_test_signaled_any(array, count))
-			break;
-
-		t = schedule_timeout(t);
-
-		if (t > 0 && intr && signal_pending(current))
-			t = -ERESTARTSYS;
-	}
-
-	__set_current_state(TASK_RUNNING);
-
-fence_rm_cb:
-	for (idx = 0; idx < count; ++idx) {
-		fence = array[idx];
-		if (fence && cb[idx].base.func)
-			fence_remove_callback(fence, &cb[idx].base);
-	}
-
-err_free_cb:
-	kfree(cb);
-
-	return t;
-}
-
-const struct fence_ops amdgpu_fence_ops = {
-	.get_driver_name = amdgpu_fence_get_driver_name,
-	.get_timeline_name = amdgpu_fence_get_timeline_name,
-	.enable_signaling = amdgpu_fence_enable_signaling,
-	.signaled = amdgpu_fence_is_signaled,
-	.wait = amdgpu_fence_default_wait,
-	.release = NULL,
-};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index aad4c1c69448..e65987743871 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -95,7 +95,8 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
 {
 	amdgpu_sync_free(adev, &ib->sync, &ib->fence->base);
 	amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base);
-	amdgpu_fence_unref(&ib->fence);
+	if (ib->fence)
+		fence_put(&ib->fence->base);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index b2df348aa223..78e9b0f14661 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -436,6 +436,30 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 	}
 }
 
+/**
+ * amdgpu_ring_from_fence - get ring from fence
+ *
+ * @f: fence structure
+ *
+ * Extract the ring a fence belongs to. Handles both scheduler as
+ * well as hardware fences.
+ */
+struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f)
+{
+	struct amdgpu_fence *a_fence;
+	struct amd_sched_fence *s_fence;
+
+	s_fence = to_amd_sched_fence(f);
+	if (s_fence)
+		return container_of(s_fence->sched, struct amdgpu_ring, sched);
+
+	a_fence = to_amdgpu_fence(f);
+	if (a_fence)
+		return a_fence->ring;
+
+	return NULL;
+}
+
 /*
  * Debugfs info
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 5cb27d525e43..0212b31dc194 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -139,25 +139,6 @@ int amdgpu_sa_bo_manager_suspend(struct amdgpu_device *adev,
 	return r;
 }
 
-static uint32_t amdgpu_sa_get_ring_from_fence(struct fence *f)
-{
-	struct amdgpu_fence *a_fence;
-	struct amd_sched_fence *s_fence;
-
-	s_fence = to_amd_sched_fence(f);
-	if (s_fence) {
-		struct amdgpu_ring *ring;
-
-		ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
-		return ring->idx;
-	}
-
-	a_fence = to_amdgpu_fence(f);
-	if (a_fence)
-		return a_fence->ring->idx;
-	return 0;
-}
-
 static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo)
 {
 	struct amdgpu_sa_manager *sa_manager = sa_bo->manager;
@@ -318,7 +299,7 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
 	}
 
 	if (best_bo) {
-		uint32_t idx = amdgpu_sa_get_ring_from_fence(best_bo->fence);
+		uint32_t idx = amdgpu_ring_from_fence(best_bo->fence)->idx;
 		++tries[idx];
 		sa_manager->hole = best_bo->olist.prev;
 
@@ -337,6 +318,7 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
 {
 	struct fence *fences[AMDGPU_MAX_RINGS];
 	unsigned tries[AMDGPU_MAX_RINGS];
+	unsigned count;
 	int i, r;
 	signed long t;
 
@@ -371,13 +353,18 @@ int amdgpu_sa_bo_new(struct amdgpu_device *adev,
 			/* see if we can skip over some allocations */
 		} while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
 
-		spin_unlock(&sa_manager->wq.lock);
-		t = amdgpu_fence_wait_any(fences, AMDGPU_MAX_RINGS,
-					  false, MAX_SCHEDULE_TIMEOUT);
-		r = (t > 0) ? 0 : t;
-		spin_lock(&sa_manager->wq.lock);
-		/* if we have nothing to wait for block */
-		if (r == -ENOENT) {
+		for (i = 0, count = 0; i < AMDGPU_MAX_RINGS; ++i)
+			if (fences[i])
+				fences[count++] = fences[i];
+
+		if (count) {
+			spin_unlock(&sa_manager->wq.lock);
+			t = fence_wait_any_timeout(fences, count, false,
+						   MAX_SCHEDULE_TIMEOUT);
+			r = (t > 0) ? 0 : t;
+			spin_lock(&sa_manager->wq.lock);
+		} else {
+			/* if we have nothing to wait for block */
 			r = wait_event_interruptible_locked(
 				sa_manager->wq,
 				amdgpu_sa_event(sa_manager, size, align)
@@ -406,7 +393,7 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
 	if (fence && !fence_is_signaled(fence)) {
 		uint32_t idx;
 		(*sa_bo)->fence = fence_get(fence);
-		idx = amdgpu_sa_get_ring_from_fence(fence);
+		idx = amdgpu_ring_from_fence(fence)->idx;
 		list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
 	} else {
 		amdgpu_sa_bo_remove_locked(*sa_bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 2e946b2cad88..dcf4a8aca680 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -54,7 +54,8 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
 		goto err;
 	}
 
-	fence = amdgpu_fence_ref(job->ibs[job->num_ibs - 1].fence);
+	fence = job->ibs[job->num_ibs - 1].fence;
+	fence_get(&fence->base);
 
 err:
 	if (job->free_job)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 4921de15b451..a6697fd05217 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -87,6 +87,15 @@ static bool amdgpu_sync_test_owner(struct fence *f, void *owner)
 	return false;
 }
 
+static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence)
+{
+	if (*keep && fence_is_later(*keep, fence))
+		return;
+
+	fence_put(*keep);
+	*keep = fence_get(fence);
+}
+
 /**
  * amdgpu_sync_fence - remember to sync to this fence
  *
@@ -99,35 +108,21 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 {
 	struct amdgpu_sync_entry *e;
 	struct amdgpu_fence *fence;
-	struct amdgpu_fence *other;
-	struct fence *tmp, *later;
 
 	if (!f)
 		return 0;
 
 	if (amdgpu_sync_same_dev(adev, f) &&
-	    amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) {
-		if (sync->last_vm_update) {
-			tmp = sync->last_vm_update;
-			BUG_ON(f->context != tmp->context);
-			later = (f->seqno - tmp->seqno <= INT_MAX) ? f : tmp;
-			sync->last_vm_update = fence_get(later);
-			fence_put(tmp);
-		} else
-			sync->last_vm_update = fence_get(f);
-	}
+	    amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM))
+		amdgpu_sync_keep_later(&sync->last_vm_update, f);
 
 	fence = to_amdgpu_fence(f);
 	if (!fence || fence->ring->adev != adev) {
 		hash_for_each_possible(sync->fences, e, node, f->context) {
-			struct fence *new;
 			if (unlikely(e->fence->context != f->context))
 				continue;
-			new = fence_get(fence_later(e->fence, f));
-			if (new) {
-				fence_put(e->fence);
-				e->fence = new;
-			}
+
+			amdgpu_sync_keep_later(&e->fence, f);
 			return 0;
 		}
 
@@ -140,10 +135,7 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 		return 0;
 	}
 
-	other = sync->sync_to[fence->ring->idx];
-	sync->sync_to[fence->ring->idx] = amdgpu_fence_ref(
-		amdgpu_fence_later(fence, other));
-	amdgpu_fence_unref(&other);
+	amdgpu_sync_keep_later(&sync->sync_to[fence->ring->idx], f);
 
 	return 0;
 }
@@ -199,8 +191,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 			 * for other VM updates and moves.
 			 */
 			fence_owner = amdgpu_sync_get_owner(f);
-			if ((owner != AMDGPU_FENCE_OWNER_MOVE) &&
-			    (fence_owner != AMDGPU_FENCE_OWNER_MOVE) &&
+			if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
+			    (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
 			    ((owner == AMDGPU_FENCE_OWNER_VM) !=
 			     (fence_owner == AMDGPU_FENCE_OWNER_VM)))
 				continue;
@@ -262,11 +254,11 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync)
 		return 0;
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct amdgpu_fence *fence = sync->sync_to[i];
+		struct fence *fence = sync->sync_to[i];
 		if (!fence)
 			continue;
 
-		r = fence_wait(&fence->base, false);
+		r = fence_wait(fence, false);
 		if (r)
 			return r;
 	}
@@ -291,9 +283,14 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
 	int i, r;
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct amdgpu_fence *fence = sync->sync_to[i];
-		struct amdgpu_semaphore *semaphore;
 		struct amdgpu_ring *other = adev->rings[i];
+		struct amdgpu_semaphore *semaphore;
+		struct amdgpu_fence *fence;
+
+		if (!sync->sync_to[i])
+			continue;
+
+		fence = to_amdgpu_fence(sync->sync_to[i]);
 
 		/* check if we really need to sync */
 		if (!amdgpu_fence_need_sync(fence, ring))
@@ -378,7 +375,7 @@ void amdgpu_sync_free(struct amdgpu_device *adev,
 		amdgpu_semaphore_free(adev, &sync->semaphores[i], fence);
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
-		amdgpu_fence_unref(&sync->sync_to[i]);
+		fence_put(sync->sync_to[i]);
 
 	fence_put(sync->last_vm_update);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index a089e69e9927..81bb8e9fc26d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1041,7 +1041,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
 	WARN_ON(ib->length_dw > num_dw);
 	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
 						 &amdgpu_vm_free_job,
-						 AMDGPU_FENCE_OWNER_MOVE,
+						 AMDGPU_FENCE_OWNER_UNDEFINED,
 						 fence);
 	if (r)
 		goto error_free;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ff26e330ccd6..633a32a48560 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -135,7 +135,7 @@ struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_sync *sync)
 {
-	struct amdgpu_fence *best[AMDGPU_MAX_RINGS] = {};
+	struct fence *best[AMDGPU_MAX_RINGS] = {};
 	struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
 	struct amdgpu_device *adev = ring->adev;
 
@@ -154,7 +154,8 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 
 	/* skip over VMID 0, since it is the system VM */
 	for (i = 1; i < adev->vm_manager.nvm; ++i) {
-		struct amdgpu_fence *fence = adev->vm_manager.active[i];
+		struct fence *fence = adev->vm_manager.active[i];
+		struct amdgpu_ring *fring;
 
 		if (fence == NULL) {
 			/* found a free one */
@@ -163,21 +164,23 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 			return 0;
 		}
 
-		if (amdgpu_fence_is_earlier(fence, best[fence->ring->idx])) {
-			best[fence->ring->idx] = fence;
-			choices[fence->ring == ring ? 0 : 1] = i;
+		fring = amdgpu_ring_from_fence(fence);
+		if (best[fring->idx] == NULL ||
+		    fence_is_later(best[fring->idx], fence)) {
+			best[fring->idx] = fence;
+			choices[fring == ring ? 0 : 1] = i;
 		}
 	}
 
 	for (i = 0; i < 2; ++i) {
 		if (choices[i]) {
-			struct amdgpu_fence *fence;
+			struct fence *fence;
 
 			fence  = adev->vm_manager.active[choices[i]];
 			vm_id->id = choices[i];
 
 			trace_amdgpu_vm_grab_id(choices[i], ring->idx);
-			return amdgpu_sync_fence(ring->adev, sync, &fence->base);
+			return amdgpu_sync_fence(ring->adev, sync, fence);
 		}
 	}
 
@@ -246,11 +249,11 @@ void amdgpu_vm_fence(struct amdgpu_device *adev,
 	unsigned ridx = fence->ring->idx;
 	unsigned vm_id = vm->ids[ridx].id;
 
-	amdgpu_fence_unref(&adev->vm_manager.active[vm_id]);
-	adev->vm_manager.active[vm_id] = amdgpu_fence_ref(fence);
+	fence_put(adev->vm_manager.active[vm_id]);
+	adev->vm_manager.active[vm_id] = fence_get(&fence->base);
 
-	amdgpu_fence_unref(&vm->ids[ridx].last_id_use);
-	vm->ids[ridx].last_id_use = amdgpu_fence_ref(fence);
+	fence_put(vm->ids[ridx].last_id_use);
+	vm->ids[ridx].last_id_use = fence_get(&fence->base);
 }
 
 /**
@@ -1313,7 +1316,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		fence_put(vm->ids[i].flushed_updates);
-		amdgpu_fence_unref(&vm->ids[i].last_id_use);
+		fence_put(vm->ids[i].last_id_use);
 	}
 
 	mutex_destroy(&vm->mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index a6ea2d8e85df..61320511c479 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -262,46 +262,22 @@ static u32 dce_v10_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
  * @crtc_id: crtc to cleanup pageflip on
  * @crtc_base: new address of the crtc (GPU MC address)
  *
- * Does the actual pageflip (evergreen+).
- * During vblank we take the crtc lock and wait for the update_pending
- * bit to go high, when it does, we release the lock, and allow the
- * double buffered update to take place.
- * Returns the current update pending status.
+ * Triggers the actual pageflip by updating the primary
+ * surface base address.
  */
 static void dce_v10_0_page_flip(struct amdgpu_device *adev,
 			      int crtc_id, u64 crtc_base)
 {
 	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
-	u32 tmp = RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset);
-	int i;
-
-	/* Lock the graphics update lock */
-	tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1);
-	WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
-
-	/* update the scanout addresses */
-	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
-	       upper_32_bits(crtc_base));
-	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
-	       lower_32_bits(crtc_base));
 
+	/* update the primary scanout address */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
 	       upper_32_bits(crtc_base));
+	/* writing to the low address triggers the update */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
 	       lower_32_bits(crtc_base));
-
-	/* Wait for update_pending to go high. */
-	for (i = 0; i < adev->usec_timeout; i++) {
-		if (RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset) &
-				GRPH_UPDATE__GRPH_SURFACE_UPDATE_PENDING_MASK)
-			break;
-		udelay(1);
-	}
-	DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
-
-	/* Unlock the lock, so double-buffering can take place inside vblank */
-	tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0);
-	WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
+	/* post the write */
+	RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
 }
 
 static int dce_v10_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 784afb5978ac..3506c5b42106 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -252,46 +252,22 @@ static u32 dce_v11_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
  * @crtc_id: crtc to cleanup pageflip on
  * @crtc_base: new address of the crtc (GPU MC address)
  *
- * Does the actual pageflip (evergreen+).
- * During vblank we take the crtc lock and wait for the update_pending
- * bit to go high, when it does, we release the lock, and allow the
- * double buffered update to take place.
- * Returns the current update pending status.
+ * Triggers the actual pageflip by updating the primary
+ * surface base address.
  */
 static void dce_v11_0_page_flip(struct amdgpu_device *adev,
 			      int crtc_id, u64 crtc_base)
 {
 	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
-	u32 tmp = RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset);
-	int i;
-
-	/* Lock the graphics update lock */
-	tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1);
-	WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
 
 	/* update the scanout addresses */
-	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
-	       upper_32_bits(crtc_base));
-	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
-	       lower_32_bits(crtc_base));
-
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
 	       upper_32_bits(crtc_base));
+	/* writing to the low address triggers the update */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
 	       lower_32_bits(crtc_base));
-
-	/* Wait for update_pending to go high. */
-	for (i = 0; i < adev->usec_timeout; i++) {
-		if (RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset) &
-				GRPH_UPDATE__GRPH_SURFACE_UPDATE_PENDING_MASK)
-			break;
-		udelay(1);
-	}
-	DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
-
-	/* Unlock the lock, so double-buffering can take place inside vblank */
-	tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0);
-	WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);	
+	/* post the write */
+	RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
 }
 
 static int dce_v11_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 00c34f87ac20..b17abbe1be99 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -211,46 +211,22 @@ static u32 dce_v8_0_vblank_get_counter(struct amdgpu_device *adev, int crtc)
  * @crtc_id: crtc to cleanup pageflip on
  * @crtc_base: new address of the crtc (GPU MC address)
  *
- * Does the actual pageflip (evergreen+).
- * During vblank we take the crtc lock and wait for the update_pending
- * bit to go high, when it does, we release the lock, and allow the
- * double buffered update to take place.
- * Returns the current update pending status.
+ * Triggers the actual pageflip by updating the primary
+ * surface base address.
  */
 static void dce_v8_0_page_flip(struct amdgpu_device *adev,
 			      int crtc_id, u64 crtc_base)
 {
 	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
-	u32 tmp = RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset);
-	int i;
-
-	/* Lock the graphics update lock */
-	tmp |= GRPH_UPDATE__GRPH_UPDATE_LOCK_MASK;
-	WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
-
-	/* update the scanout addresses */
-	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
-	       upper_32_bits(crtc_base));
-	WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
-	       (u32)crtc_base);
 
+	/* update the primary scanout addresses */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
 	       upper_32_bits(crtc_base));
+	/* writing to the low address triggers the update */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset,
-	       (u32)crtc_base);
-
-	/* Wait for update_pending to go high. */
-	for (i = 0; i < adev->usec_timeout; i++) {
-		if (RREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset) &
-				GRPH_UPDATE__GRPH_SURFACE_UPDATE_PENDING_MASK)
-			break;
-		udelay(1);
-	}
-	DRM_DEBUG("Update pending now high. Unlocking vupdate_lock.\n");
-
-	/* Unlock the lock, so double-buffering can take place inside vblank */
-	tmp &= ~GRPH_UPDATE__GRPH_UPDATE_LOCK_MASK;
-	WREG32(mmGRPH_UPDATE + amdgpu_crtc->crtc_offset, tmp);
+	       lower_32_bits(crtc_base));
+	/* post the write */
+	RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset);
 }
 
 static int dce_v8_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index cbc46a34987c..6776cf756d40 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -235,11 +235,13 @@ static const u32 fiji_golden_common_all[] =
 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 	mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
 	mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
-	mmGB_ADDR_CONFIG, 0xffffffff, 0x12011003,
+	mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
 	mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
 	mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
 	mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
-	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
+	mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
+	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
+	mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
 };
 
 static const u32 golden_settings_fiji_a10[] =
@@ -247,24 +249,26 @@ static const u32 golden_settings_fiji_a10[] =
 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
-	mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x00000100,
 	mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
+	mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
+	mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
 	mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
-	mmTCC_CTRL, 0x00100000, 0xf30fff7f,
+	mmTCC_CTRL, 0x00100000, 0xf31fff7f,
+	mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
 	mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
-	mmTCP_CHAN_STEER_HI, 0xffffffff, 0x7d6cf5e4,
-	mmTCP_CHAN_STEER_LO, 0xffffffff, 0x3928b1a0,
+	mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
 };
 
 static const u32 fiji_mgcg_cgcg_init[] =
 {
-	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffc0,
+	mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
 	mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
 	mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
 	mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
 	mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
 	mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
 	mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
+	mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00600100,
 	mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
 	mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
 	mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
@@ -292,6 +296,10 @@ static const u32 fiji_mgcg_cgcg_init[] =
 	mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
 	mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
 	mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
+	mmPCIE_INDEX, 0xffffffff, 0x0140001c,
+	mmPCIE_DATA, 0x000f0000, 0x00000000,
+	mmCGTT_DRM_CLK_CTRL0, 0xff000fff, 0x00000100,
+	mmHDP_XDP_CGTT_BLK_CTRL, 0xc0000fff, 0x00000104,
 	mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
 };
 
@@ -1031,6 +1039,8 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 		case 0x84:
 		case 0xc8:
 		case 0xcc:
+		case 0xe1:
+		case 0xe3:
 			/* B10 */
 			adev->gfx.config.max_cu_per_sh = 8;
 			break;
@@ -1039,18 +1049,23 @@ static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 		case 0x85:
 		case 0xc9:
 		case 0xcd:
+		case 0xe2:
+		case 0xe4:
 			/* B8 */
 			adev->gfx.config.max_cu_per_sh = 6;
 			break;
 		case 0xc6:
 		case 0xca:
 		case 0xce:
+		case 0x88:
 			/* B6 */
 			adev->gfx.config.max_cu_per_sh = 6;
 			break;
 		case 0xc7:
 		case 0x87:
 		case 0xcb:
+		case 0xe5:
+		case 0x89:
 		default:
 			/* B4 */
 			adev->gfx.config.max_cu_per_sh = 4;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 488348272c4d..85bbcdc73fff 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -965,7 +965,7 @@ static int gmc_v7_0_sw_fini(void *handle)
 
 	if (adev->vm_manager.enabled) {
 		for (i = 0; i < AMDGPU_NUM_VM; ++i)
-			amdgpu_fence_unref(&adev->vm_manager.active[i]);
+			fence_put(adev->vm_manager.active[i]);
 		gmc_v7_0_vm_fini(adev);
 		adev->vm_manager.enabled = false;
 	}
@@ -1015,7 +1015,7 @@ static int gmc_v7_0_suspend(void *handle)
 
 	if (adev->vm_manager.enabled) {
 		for (i = 0; i < AMDGPU_NUM_VM; ++i)
-			amdgpu_fence_unref(&adev->vm_manager.active[i]);
+			fence_put(adev->vm_manager.active[i]);
 		gmc_v7_0_vm_fini(adev);
 		adev->vm_manager.enabled = false;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 72e977b1685d..1bcc4e74e3b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -984,7 +984,7 @@ static int gmc_v8_0_sw_fini(void *handle)
 
 	if (adev->vm_manager.enabled) {
 		for (i = 0; i < AMDGPU_NUM_VM; ++i)
-			amdgpu_fence_unref(&adev->vm_manager.active[i]);
+			fence_put(adev->vm_manager.active[i]);
 		gmc_v8_0_vm_fini(adev);
 		adev->vm_manager.enabled = false;
 	}
@@ -1036,7 +1036,7 @@ static int gmc_v8_0_suspend(void *handle)
 
 	if (adev->vm_manager.enabled) {
 		for (i = 0; i < AMDGPU_NUM_VM; ++i)
-			amdgpu_fence_unref(&adev->vm_manager.active[i]);
+			fence_put(adev->vm_manager.active[i]);
 		gmc_v8_0_vm_fini(adev);
 		adev->vm_manager.enabled = false;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index d8c93f14c3b5..2adc1c855e85 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1413,17 +1413,18 @@ static int vi_common_early_init(void *handle)
 		adev->cg_flags = 0;
 		adev->pg_flags = 0;
 		adev->external_rev_id = 0x1;
-		if (amdgpu_smc_load_fw && smc_enabled)
-			adev->firmware.smu_load = true;
 		break;
 	case CHIP_FIJI:
+		adev->has_uvd = true;
+		adev->cg_flags = 0;
+		adev->pg_flags = 0;
+		adev->external_rev_id = adev->rev_id + 0x3c;
+		break;
 	case CHIP_TONGA:
 		adev->has_uvd = true;
 		adev->cg_flags = 0;
 		adev->pg_flags = 0;
 		adev->external_rev_id = adev->rev_id + 0x14;
-		if (amdgpu_smc_load_fw && smc_enabled)
-			adev->firmware.smu_load = true;
 		break;
 	case CHIP_CARRIZO:
 	case CHIP_STONEY:
@@ -1432,14 +1433,15 @@ static int vi_common_early_init(void *handle)
 		/* Disable UVD pg */
 		adev->pg_flags = /* AMDGPU_PG_SUPPORT_UVD | */AMDGPU_PG_SUPPORT_VCE;
 		adev->external_rev_id = adev->rev_id + 0x1;
-		if (amdgpu_smc_load_fw && smc_enabled)
-			adev->firmware.smu_load = true;
 		break;
 	default:
 		/* FIXME: not supported yet */
 		return -EINVAL;
 	}
 
+	if (amdgpu_smc_load_fw && smc_enabled)
+		adev->firmware.smu_load = true;
+
 	return 0;
 }