From 8e9fbeb522fa3043dc65ef0e383af28843950799 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Thu, 17 Mar 2016 11:41:37 +0800
Subject: drm/amdgpu: improve vmid assigment V2

V2: the signaled items on the LRU maintain their order

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b6c011b83641..6630732ea1bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -216,6 +216,20 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 				      struct amdgpu_vm_manager_id,
 				      list);
 
+	if (id->mgr_id->active && !fence_is_signaled(id->mgr_id->active)) {
+		struct amdgpu_vm_manager_id *mgr_id, *tmp;
+		struct list_head *head = &adev->vm_manager.ids_lru;
+		list_for_each_entry_safe(mgr_id, tmp, &adev->vm_manager.ids_lru, list) {
+			if (mgr_id->active && fence_is_signaled(mgr_id->active)) {
+				list_move(&mgr_id->list, head);
+				head = &mgr_id->list;
+			}
+		}
+		id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru,
+					      struct amdgpu_vm_manager_id,
+					      list);
+	}
+
 	r = amdgpu_sync_fence(ring->adev, sync, id->mgr_id->active);
 	if (!r) {
 		fence_put(id->mgr_id->active);
-- 
cgit v1.2.3


From fa3ab3c7babf3c2c8a4a174a532732739a304885 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 18 Mar 2016 21:00:35 +0100
Subject: drm/amdgpu: change parameter passing in the VM code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make it more flexible by passing src and page addresses
directly instead of the structures they contain.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 100 ++++++++++++++++++---------------
 1 file changed, 54 insertions(+), 46 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6630732ea1bd..9eec3e1be667 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -349,8 +349,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
  * amdgpu_vm_update_pages - helper to call the right asic function
  *
  * @adev: amdgpu_device pointer
- * @gtt: GART instance to use for mapping
- * @gtt_flags: GTT hw access flags
+ * @src: address where to copy page table entries from
+ * @pages_addr: DMA addresses to use for mapping
  * @ib: indirect buffer to fill with commands
  * @pe: addr of the page entry
  * @addr: dst addr to write into pe
@@ -362,8 +362,8 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
  * to setup the page table using the DMA.
  */
 static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
-				   struct amdgpu_gart *gtt,
-				   uint32_t gtt_flags,
+				   uint64_t src,
+				   dma_addr_t *pages_addr,
 				   struct amdgpu_ib *ib,
 				   uint64_t pe, uint64_t addr,
 				   unsigned count, uint32_t incr,
@@ -371,12 +371,11 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
 {
 	trace_amdgpu_vm_set_page(pe, addr, count, incr, flags);
 
-	if ((gtt == &adev->gart) && (flags == gtt_flags)) {
-		uint64_t src = gtt->table_addr + (addr >> 12) * 8;
+	if (src) {
+		src += (addr >> 12) * 8;
 		amdgpu_vm_copy_pte(adev, ib, pe, src, count);
 
-	} else if (gtt) {
-		dma_addr_t *pages_addr = gtt->pages_addr;
+	} else if (pages_addr) {
 		amdgpu_vm_write_pte(adev, ib, pages_addr, pe, addr,
 				    count, incr, flags);
 
@@ -426,7 +425,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	if (r)
 		goto error;
 
-	amdgpu_vm_update_pages(adev, NULL, 0, &job->ibs[0], addr, 0, entries,
+	amdgpu_vm_update_pages(adev, 0, NULL, &job->ibs[0], addr, 0, entries,
 			       0, 0);
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 
@@ -536,7 +535,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 		    ((last_pt + incr * count) != pt)) {
 
 			if (count) {
-				amdgpu_vm_update_pages(adev, NULL, 0, ib,
+				amdgpu_vm_update_pages(adev, 0, NULL, ib,
 						       last_pde, last_pt,
 						       count, incr,
 						       AMDGPU_PTE_VALID);
@@ -551,7 +550,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 	}
 
 	if (count)
-		amdgpu_vm_update_pages(adev, NULL, 0, ib, last_pde, last_pt,
+		amdgpu_vm_update_pages(adev, 0, NULL, ib, last_pde, last_pt,
 				       count, incr, AMDGPU_PTE_VALID);
 
 	if (ib->length_dw != 0) {
@@ -584,8 +583,8 @@ error_free:
  * amdgpu_vm_frag_ptes - add fragment information to PTEs
  *
  * @adev: amdgpu_device pointer
- * @gtt: GART instance to use for mapping
- * @gtt_flags: GTT hw mapping flags
+ * @src: address where to copy page table entries from
+ * @pages_addr: DMA addresses to use for mapping
  * @ib: IB for the update
  * @pe_start: first PTE to handle
  * @pe_end: last PTE to handle
@@ -593,8 +592,8 @@ error_free:
  * @flags: hw mapping flags
  */
 static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
-				struct amdgpu_gart *gtt,
-				uint32_t gtt_flags,
+				uint64_t src,
+				dma_addr_t *pages_addr,
 				struct amdgpu_ib *ib,
 				uint64_t pe_start, uint64_t pe_end,
 				uint64_t addr, uint32_t flags)
@@ -632,10 +631,11 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
 		return;
 
 	/* system pages are non continuously */
-	if (gtt || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
+	if (src || pages_addr || !(flags & AMDGPU_PTE_VALID) ||
+	    (frag_start >= frag_end)) {
 
 		count = (pe_end - pe_start) / 8;
-		amdgpu_vm_update_pages(adev, gtt, gtt_flags, ib, pe_start,
+		amdgpu_vm_update_pages(adev, src, pages_addr, ib, pe_start,
 				       addr, count, AMDGPU_GPU_PAGE_SIZE,
 				       flags);
 		return;
@@ -644,21 +644,21 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
 	/* handle the 4K area at the beginning */
 	if (pe_start != frag_start) {
 		count = (frag_start - pe_start) / 8;
-		amdgpu_vm_update_pages(adev, NULL, 0, ib, pe_start, addr,
+		amdgpu_vm_update_pages(adev, 0, NULL, ib, pe_start, addr,
 				       count, AMDGPU_GPU_PAGE_SIZE, flags);
 		addr += AMDGPU_GPU_PAGE_SIZE * count;
 	}
 
 	/* handle the area in the middle */
 	count = (frag_end - frag_start) / 8;
-	amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_start, addr, count,
+	amdgpu_vm_update_pages(adev, 0, NULL, ib, frag_start, addr, count,
 			       AMDGPU_GPU_PAGE_SIZE, flags | frag_flags);
 
 	/* handle the 4K area at the end */
 	if (frag_end != pe_end) {
 		addr += AMDGPU_GPU_PAGE_SIZE * count;
 		count = (pe_end - frag_end) / 8;
-		amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_end, addr,
+		amdgpu_vm_update_pages(adev, 0, NULL, ib, frag_end, addr,
 				       count, AMDGPU_GPU_PAGE_SIZE, flags);
 	}
 }
@@ -667,8 +667,8 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
  * amdgpu_vm_update_ptes - make sure that page tables are valid
  *
  * @adev: amdgpu_device pointer
- * @gtt: GART instance to use for mapping
- * @gtt_flags: GTT hw mapping flags
+ * @src: address where to copy page table entries from
+ * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
  * @start: start of GPU address range
  * @end: end of GPU address range
@@ -678,8 +678,8 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
  * Update the page tables in the range @start - @end.
  */
 static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
-				  struct amdgpu_gart *gtt,
-				  uint32_t gtt_flags,
+				  uint64_t src,
+				  dma_addr_t *pages_addr,
 				  struct amdgpu_vm *vm,
 				  struct amdgpu_ib *ib,
 				  uint64_t start, uint64_t end,
@@ -707,7 +707,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 
 		if (last_pe_end != pe_start) {
 
-			amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
+			amdgpu_vm_frag_ptes(adev, src, pages_addr, ib,
 					    last_pe_start, last_pe_end,
 					    last_dst, flags);
 
@@ -722,17 +722,16 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}
 
-	amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
-			    last_pe_start, last_pe_end,
-			    last_dst, flags);
+	amdgpu_vm_frag_ptes(adev, src, pages_addr, ib, last_pe_start,
+			    last_pe_end, last_dst, flags);
 }
 
 /**
  * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
  *
  * @adev: amdgpu_device pointer
- * @gtt: GART instance to use for mapping
- * @gtt_flags: flags as they are used for GTT
+ * @src: address where to copy page table entries from
+ * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
  * @start: start of mapped range
  * @last: last mapped entry
@@ -744,8 +743,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
  * Returns 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
-				       struct amdgpu_gart *gtt,
-				       uint32_t gtt_flags,
+				       uint64_t src,
+				       dma_addr_t *pages_addr,
 				       struct amdgpu_vm *vm,
 				       uint64_t start, uint64_t last,
 				       uint32_t flags, uint64_t addr,
@@ -776,11 +775,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	/* padding, etc. */
 	ndw = 64;
 
-	if ((gtt == &adev->gart) && (flags == gtt_flags)) {
+	if (src) {
 		/* only copy commands needed */
 		ndw += ncmds * 7;
 
-	} else if (gtt) {
+	} else if (pages_addr) {
 		/* header for write data commands */
 		ndw += ncmds * 4;
 
@@ -810,8 +809,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_free;
 
-	amdgpu_vm_update_ptes(adev, gtt, gtt_flags, vm, ib, start, last + 1,
-			      addr, flags);
+	amdgpu_vm_update_ptes(adev, src, pages_addr, vm, ib, start,
+			      last + 1, addr, flags);
 
 	amdgpu_ring_pad_ib(ring, ib);
 	WARN_ON(ib->length_dw > ndw);
@@ -853,12 +852,13 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 				      uint32_t gtt_flags,
 				      struct amdgpu_vm *vm,
 				      struct amdgpu_bo_va_mapping *mapping,
-				      uint64_t addr, struct fence **fence)
+				      uint32_t flags, uint64_t addr,
+				      struct fence **fence)
 {
 	const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE;
 
-	uint64_t start = mapping->it.start;
-	uint32_t flags = gtt_flags;
+	uint64_t src = 0, start = mapping->it.start;
+	dma_addr_t *pages_addr = NULL;
 	int r;
 
 	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
@@ -871,10 +871,17 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 
 	trace_amdgpu_vm_bo_update(mapping);
 
+	if (gtt) {
+		if (flags == gtt_flags)
+			src = adev->gart.table_addr + (addr >> 12) * 8;
+		else
+			pages_addr = &gtt->pages_addr[addr >> 12];
+		addr = 0;
+	}
 	addr += mapping->offset;
 
-	if (!gtt || ((gtt == &adev->gart) && (flags == gtt_flags)))
-		return amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm,
+	if (!gtt || src)
+		return amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm,
 						   start, mapping->it.last,
 						   flags, addr, fence);
 
@@ -882,7 +889,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		uint64_t last;
 
 		last = min((uint64_t)mapping->it.last, start + max_size - 1);
-		r = amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm,
 						start, last, flags, addr,
 						fence);
 		if (r)
@@ -914,7 +921,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	struct amdgpu_vm *vm = bo_va->vm;
 	struct amdgpu_bo_va_mapping *mapping;
 	struct amdgpu_gart *gtt = NULL;
-	uint32_t flags;
+	uint32_t gtt_flags, flags;
 	uint64_t addr;
 	int r;
 
@@ -937,6 +944,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	}
 
 	flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
+	gtt_flags = (adev == bo_va->bo->adev) ? flags : 0;
 
 	spin_lock(&vm->status_lock);
 	if (!list_empty(&bo_va->vm_status))
@@ -944,8 +952,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	spin_unlock(&vm->status_lock);
 
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
-		r = amdgpu_vm_bo_split_mapping(adev, gtt, flags, vm, mapping, addr,
-					       &bo_va->last_pt_update);
+		r = amdgpu_vm_bo_split_mapping(adev, gtt, gtt_flags, vm, mapping,
+					       flags, addr, &bo_va->last_pt_update);
 		if (r)
 			return r;
 	}
@@ -991,7 +999,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 		list_del(&mapping->list);
 
 		r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, vm, mapping,
-					       0, NULL);
+					       0, 0, NULL);
 		kfree(mapping);
 		if (r)
 			return r;
-- 
cgit v1.2.3


From 8358dceed981cf389778fba217288da8dadbe103 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 30 Mar 2016 10:50:25 +0200
Subject: drm/amdgpu: use BO pages instead of GART array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9eec3e1be667..0c92e0450694 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -836,11 +836,12 @@ error_free:
  * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
  *
  * @adev: amdgpu_device pointer
- * @gtt: GART instance to use for mapping
+ * @gtt_flags: flags as they are used for GTT
+ * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
  * @mapping: mapped range and flags to use for the update
  * @addr: addr to set the area to
- * @gtt_flags: flags as they are used for GTT
+ * @flags: HW flags for the mapping
  * @fence: optional resulting fence
  *
  * Split the mapping into smaller chunks so that each update fits
@@ -848,8 +849,8 @@ error_free:
  * Returns 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
-				      struct amdgpu_gart *gtt,
 				      uint32_t gtt_flags,
+				      dma_addr_t *pages_addr,
 				      struct amdgpu_vm *vm,
 				      struct amdgpu_bo_va_mapping *mapping,
 				      uint32_t flags, uint64_t addr,
@@ -858,7 +859,6 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 	const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE;
 
 	uint64_t src = 0, start = mapping->it.start;
-	dma_addr_t *pages_addr = NULL;
 	int r;
 
 	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
@@ -871,16 +871,14 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 
 	trace_amdgpu_vm_bo_update(mapping);
 
-	if (gtt) {
+	if (pages_addr) {
 		if (flags == gtt_flags)
 			src = adev->gart.table_addr + (addr >> 12) * 8;
-		else
-			pages_addr = &gtt->pages_addr[addr >> 12];
 		addr = 0;
 	}
 	addr += mapping->offset;
 
-	if (!gtt || src)
+	if (!pages_addr || src)
 		return amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm,
 						   start, mapping->it.last,
 						   flags, addr, fence);
@@ -920,16 +918,20 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 {
 	struct amdgpu_vm *vm = bo_va->vm;
 	struct amdgpu_bo_va_mapping *mapping;
-	struct amdgpu_gart *gtt = NULL;
+	dma_addr_t *pages_addr = NULL;
 	uint32_t gtt_flags, flags;
 	uint64_t addr;
 	int r;
 
 	if (mem) {
+		struct ttm_dma_tt *ttm;
+
 		addr = (u64)mem->start << PAGE_SHIFT;
 		switch (mem->mem_type) {
 		case TTM_PL_TT:
-			gtt = &bo_va->bo->adev->gart;
+			ttm = container_of(bo_va->bo->tbo.ttm, struct
+					   ttm_dma_tt, ttm);
+			pages_addr = ttm->dma_address;
 			break;
 
 		case TTM_PL_VRAM:
@@ -952,8 +954,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	spin_unlock(&vm->status_lock);
 
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
-		r = amdgpu_vm_bo_split_mapping(adev, gtt, gtt_flags, vm, mapping,
-					       flags, addr, &bo_va->last_pt_update);
+		r = amdgpu_vm_bo_split_mapping(adev, gtt_flags, pages_addr, vm,
+					       mapping, flags, addr,
+					       &bo_va->last_pt_update);
 		if (r)
 			return r;
 	}
@@ -998,7 +1001,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			struct amdgpu_bo_va_mapping, list);
 		list_del(&mapping->list);
 
-		r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, vm, mapping,
+		r = amdgpu_vm_bo_split_mapping(adev, 0, NULL, vm, mapping,
 					       0, 0, NULL);
 		kfree(mapping);
 		if (r)
-- 
cgit v1.2.3


From bcb1ba35a87be34d1312f6e050f1b5cc4d32f096 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 8 Mar 2016 15:40:11 +0100
Subject: drm/amdgpu: merge VM manager and VM context ID structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

No need to have two of them any more.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  17 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 139 +++++++++++++++++----------------
 2 files changed, 78 insertions(+), 78 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b9a6fe9a2a31..c1b10046317e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -839,13 +839,6 @@ struct amdgpu_vm_pt {
 	uint64_t			addr;
 };
 
-struct amdgpu_vm_id {
-	struct amdgpu_vm_manager_id	*mgr_id;
-	uint64_t			pd_gpu_addr;
-	/* last flushed PD/PT update */
-	struct fence			*flushed_updates;
-};
-
 struct amdgpu_vm {
 	/* tree of virtual addresses mapped */
 	struct rb_root		va;
@@ -871,7 +864,7 @@ struct amdgpu_vm {
 	struct amdgpu_vm_pt	*page_tables;
 
 	/* for id and flush management per ring */
-	struct amdgpu_vm_id	ids[AMDGPU_MAX_RINGS];
+	struct amdgpu_vm_id	*ids[AMDGPU_MAX_RINGS];
 
 	/* protecting freed */
 	spinlock_t		freed_lock;
@@ -880,11 +873,15 @@ struct amdgpu_vm {
 	struct amd_sched_entity	entity;
 };
 
-struct amdgpu_vm_manager_id {
+struct amdgpu_vm_id {
 	struct list_head	list;
 	struct fence		*active;
 	atomic_long_t		owner;
 
+	uint64_t		pd_gpu_addr;
+	/* last flushed PD/PT update */
+	struct fence		*flushed_updates;
+
 	uint32_t		gds_base;
 	uint32_t		gds_size;
 	uint32_t		gws_base;
@@ -898,7 +895,7 @@ struct amdgpu_vm_manager {
 	struct mutex				lock;
 	unsigned				num_ids;
 	struct list_head			ids_lru;
-	struct amdgpu_vm_manager_id		ids[AMDGPU_NUM_VM];
+	struct amdgpu_vm_id			ids[AMDGPU_NUM_VM];
 
 	uint32_t				max_pfn;
 	/* vram base address for page table entry  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0c92e0450694..8a758b4fb3a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -166,43 +166,41 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 {
 	uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_vm_id *id = &vm->ids[ring->idx];
+	struct amdgpu_vm_id *id = vm->ids[ring->idx];
 	struct fence *updates = sync->last_vm_update;
 	int r;
 
 	mutex_lock(&adev->vm_manager.lock);
 
 	/* check if the id is still valid */
-	if (id->mgr_id) {
+	if (id) {
 		struct fence *flushed = id->flushed_updates;
-		bool is_later;
-		long owner;
+		long owner = atomic_long_read(&id->owner);
+		bool usable = pd_addr == id->pd_gpu_addr;
 
-		if (!flushed)
-			is_later = true;
+		if (owner != (long)&vm->ids[ring->idx])
+			usable = false;
+		else if (!flushed)
+			usable = false;
 		else if (!updates)
-			is_later = false;
+			usable = true;
 		else
-			is_later = fence_is_later(updates, flushed);
+			usable = !fence_is_later(updates, flushed);
 
-		owner = atomic_long_read(&id->mgr_id->owner);
-		if (!is_later && owner == (long)id &&
-		    pd_addr == id->pd_gpu_addr) {
+		if (usable) {
 
-			r = amdgpu_sync_fence(ring->adev, sync,
-					      id->mgr_id->active);
+			r = amdgpu_sync_fence(ring->adev, sync, id->active);
 			if (r) {
 				mutex_unlock(&adev->vm_manager.lock);
 				return r;
 			}
 
-			fence_put(id->mgr_id->active);
-			id->mgr_id->active = fence_get(fence);
+			fence_put(id->active);
+			id->active = fence_get(fence);
 
-			list_move_tail(&id->mgr_id->list,
-				       &adev->vm_manager.ids_lru);
+			list_move_tail(&id->list, &adev->vm_manager.ids_lru);
 
-			*vm_id = id->mgr_id - adev->vm_manager.ids;
+			*vm_id = id - adev->vm_manager.ids;
 			*vm_pd_addr = AMDGPU_VM_NO_FLUSH;
 			trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id,
 						*vm_pd_addr);
@@ -212,38 +210,41 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		}
 	}
 
-	id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru,
-				      struct amdgpu_vm_manager_id,
-				      list);
+	id = list_first_entry(&adev->vm_manager.ids_lru,
+			      struct amdgpu_vm_id,
+			      list);
 
-	if (id->mgr_id->active && !fence_is_signaled(id->mgr_id->active)) {
-		struct amdgpu_vm_manager_id *mgr_id, *tmp;
+	if (id->active && !fence_is_signaled(id->active)) {
+		struct amdgpu_vm_id *tmp;
 		struct list_head *head = &adev->vm_manager.ids_lru;
-		list_for_each_entry_safe(mgr_id, tmp, &adev->vm_manager.ids_lru, list) {
-			if (mgr_id->active && fence_is_signaled(mgr_id->active)) {
-				list_move(&mgr_id->list, head);
-				head = &mgr_id->list;
+
+		list_for_each_entry_safe(id, tmp, &adev->vm_manager.ids_lru,
+					 list) {
+			if (id->active && fence_is_signaled(id->active)) {
+				list_move(&id->list, head);
+				head = &id->list;
 			}
 		}
-		id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru,
-					      struct amdgpu_vm_manager_id,
-					      list);
+		id = list_first_entry(&adev->vm_manager.ids_lru,
+				      struct amdgpu_vm_id,
+				      list);
 	}
 
-	r = amdgpu_sync_fence(ring->adev, sync, id->mgr_id->active);
+	r = amdgpu_sync_fence(ring->adev, sync, id->active);
 	if (!r) {
-		fence_put(id->mgr_id->active);
-		id->mgr_id->active = fence_get(fence);
+		fence_put(id->active);
+		id->active = fence_get(fence);
 
 		fence_put(id->flushed_updates);
 		id->flushed_updates = fence_get(updates);
 
 		id->pd_gpu_addr = pd_addr;
 
-		list_move_tail(&id->mgr_id->list, &adev->vm_manager.ids_lru);
-		atomic_long_set(&id->mgr_id->owner, (long)id);
+		list_move_tail(&id->list, &adev->vm_manager.ids_lru);
+		atomic_long_set(&id->owner, (long)&vm->ids[ring->idx]);
+		vm->ids[ring->idx] = id;
 
-		*vm_id = id->mgr_id - adev->vm_manager.ids;
+		*vm_id = id - adev->vm_manager.ids;
 		*vm_pd_addr = pd_addr;
 		trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
 	}
@@ -268,14 +269,14 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
 		     uint32_t oa_base, uint32_t oa_size)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id];
+	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
 	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
-		mgr_id->gds_base != gds_base ||
-		mgr_id->gds_size != gds_size ||
-		mgr_id->gws_base != gws_base ||
-		mgr_id->gws_size != gws_size ||
-		mgr_id->oa_base != oa_base ||
-		mgr_id->oa_size != oa_size);
+		id->gds_base != gds_base ||
+		id->gds_size != gds_size ||
+		id->gws_base != gws_base ||
+		id->gws_size != gws_size ||
+		id->oa_base != oa_base ||
+		id->oa_size != oa_size);
 
 	if (ring->funcs->emit_pipeline_sync && (
 	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed))
@@ -287,12 +288,12 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
 	}
 
 	if (gds_switch_needed) {
-		mgr_id->gds_base = gds_base;
-		mgr_id->gds_size = gds_size;
-		mgr_id->gws_base = gws_base;
-		mgr_id->gws_size = gws_size;
-		mgr_id->oa_base = oa_base;
-		mgr_id->oa_size = oa_size;
+		id->gds_base = gds_base;
+		id->gds_size = gds_size;
+		id->gws_base = gws_base;
+		id->gws_size = gws_size;
+		id->oa_base = oa_base;
+		id->oa_size = oa_size;
 		amdgpu_ring_emit_gds_switch(ring, vm_id,
 					    gds_base, gds_size,
 					    gws_base, gws_size,
@@ -310,14 +311,14 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
  */
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id)
 {
-	struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id];
-
-	mgr_id->gds_base = 0;
-	mgr_id->gds_size = 0;
-	mgr_id->gws_base = 0;
-	mgr_id->gws_size = 0;
-	mgr_id->oa_base = 0;
-	mgr_id->oa_size = 0;
+	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
+
+	id->gds_base = 0;
+	id->gds_size = 0;
+	id->gws_base = 0;
+	id->gws_size = 0;
+	id->oa_base = 0;
+	id->oa_size = 0;
 }
 
 /**
@@ -1345,10 +1346,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	struct amd_sched_rq *rq;
 	int i, r;
 
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		vm->ids[i].mgr_id = NULL;
-		vm->ids[i].flushed_updates = NULL;
-	}
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+		vm->ids[i] = NULL;
 	vm->va = RB_ROOT;
 	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->invalidated);
@@ -1443,12 +1442,12 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	fence_put(vm->page_directory_fence);
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct amdgpu_vm_id *id = &vm->ids[i];
+		struct amdgpu_vm_id *id = vm->ids[i];
 
-		if (id->mgr_id)
-			atomic_long_cmpxchg(&id->mgr_id->owner,
-					    (long)id, 0);
-		fence_put(id->flushed_updates);
+		if (!id)
+			continue;
+
+		atomic_long_cmpxchg(&id->owner, (long)&vm->ids[i], 0);
 	}
 }
 
@@ -1486,6 +1485,10 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 {
 	unsigned i;
 
-	for (i = 0; i < AMDGPU_NUM_VM; ++i)
-		fence_put(adev->vm_manager.ids[i].active);
+	for (i = 0; i < AMDGPU_NUM_VM; ++i) {
+		struct amdgpu_vm_id *id = &adev->vm_manager.ids[i];
+
+		fence_put(id->active);
+		fence_put(id->flushed_updates);
+	}
 }
-- 
cgit v1.2.3


From 832a902f9433b812f829e9f2257daf5d518cf0de Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 15 Feb 2016 12:33:02 +0100
Subject: drm/amdgpu: use a sync object for VMID fences v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2: rebase & cleanup

This way we can store more than one fence as user for each VMID.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com> (v1)
Reviewed-by: Chunming Zhou <david1.zhou@amd.com> (v1)
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      |   6 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 105 +++++++++++++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |  53 ++++++++--------
 3 files changed, 132 insertions(+), 32 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c1b10046317e..148e2c61463c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -588,6 +588,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     struct amdgpu_sync *sync,
 		     struct reservation_object *resv,
 		     void *owner);
+bool amdgpu_sync_is_idle(struct amdgpu_sync *sync);
+int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
+			     struct fence *fence);
 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
 int amdgpu_sync_wait(struct amdgpu_sync *sync);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
@@ -875,7 +878,8 @@ struct amdgpu_vm {
 
 struct amdgpu_vm_id {
 	struct list_head	list;
-	struct fence		*active;
+	struct fence		*first;
+	struct amdgpu_sync	active;
 	atomic_long_t		owner;
 
 	uint64_t		pd_gpu_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index c48b4fce5e57..34a92808bbd4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -108,6 +108,29 @@ static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence)
 	*keep = fence_get(fence);
 }
 
+/**
+ * amdgpu_sync_add_later - add the fence to the hash
+ *
+ * @sync: sync object to add the fence to
+ * @f: fence to add
+ *
+ * Tries to add the fence to an existing hash entry. Returns true when an entry
+ * was found, false otherwise.
+ */
+static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct fence *f)
+{
+	struct amdgpu_sync_entry *e;
+
+	hash_for_each_possible(sync->fences, e, node, f->context) {
+		if (unlikely(e->fence->context != f->context))
+			continue;
+
+		amdgpu_sync_keep_later(&e->fence, f);
+		return true;
+	}
+	return false;
+}
+
 /**
  * amdgpu_sync_fence - remember to sync to this fence
  *
@@ -127,13 +150,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 	    amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM)
 		amdgpu_sync_keep_later(&sync->last_vm_update, f);
 
-	hash_for_each_possible(sync->fences, e, node, f->context) {
-		if (unlikely(e->fence->context != f->context))
-			continue;
-
-		amdgpu_sync_keep_later(&e->fence, f);
+	if (amdgpu_sync_add_later(sync, f))
 		return 0;
-	}
 
 	e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
 	if (!e)
@@ -204,6 +222,81 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 	return r;
 }
 
+/**
+ * amdgpu_sync_is_idle - test if all fences are signaled
+ *
+ * @sync: the sync object
+ *
+ * Returns true if all fences in the sync object are signaled.
+ */
+bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)
+{
+	struct amdgpu_sync_entry *e;
+	struct hlist_node *tmp;
+	int i;
+
+	hash_for_each_safe(sync->fences, i, tmp, e, node) {
+		struct fence *f = e->fence;
+
+		if (fence_is_signaled(f)) {
+			hash_del(&e->node);
+			fence_put(f);
+			kmem_cache_free(amdgpu_sync_slab, e);
+			continue;
+		}
+
+		return false;
+	}
+
+	return true;
+}
+
+/**
+ * amdgpu_sync_cycle_fences - move fences from one sync object into another
+ *
+ * @dst: the destination sync object
+ * @src: the source sync object
+ * @fence: fence to add to source
+ *
+ * Remove all fences from source and put them into destination and add
+ * fence as new one into source.
+ */
+int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
+			     struct fence *fence)
+{
+	struct amdgpu_sync_entry *e, *newone;
+	struct hlist_node *tmp;
+	int i;
+
+	/* Allocate the new entry before moving the old ones */
+	newone = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
+	if (!newone)
+		return -ENOMEM;
+
+	hash_for_each_safe(src->fences, i, tmp, e, node) {
+		struct fence *f = e->fence;
+
+		hash_del(&e->node);
+		if (fence_is_signaled(f)) {
+			fence_put(f);
+			kmem_cache_free(amdgpu_sync_slab, e);
+			continue;
+		}
+
+		if (amdgpu_sync_add_later(dst, f)) {
+			kmem_cache_free(amdgpu_sync_slab, e);
+			continue;
+		}
+
+		hash_add(dst->fences, &e->node, f->context);
+	}
+
+	hash_add(src->fences, &newone->node, fence->context);
+	newone->fence = fence_get(fence);
+
+	return 0;
+}
+
 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 {
 	struct amdgpu_sync_entry *e;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8a758b4fb3a9..d0cce7c3129a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -189,14 +189,13 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 
 		if (usable) {
 
-			r = amdgpu_sync_fence(ring->adev, sync, id->active);
-			if (r) {
-				mutex_unlock(&adev->vm_manager.lock);
-				return r;
-			}
+			r = amdgpu_sync_fence(ring->adev, sync, id->first);
+			if (r)
+				goto error;
 
-			fence_put(id->active);
-			id->active = fence_get(fence);
+			r = amdgpu_sync_fence(ring->adev, &id->active, fence);
+			if (r)
+				goto error;
 
 			list_move_tail(&id->list, &adev->vm_manager.ids_lru);
 
@@ -214,13 +213,13 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 			      struct amdgpu_vm_id,
 			      list);
 
-	if (id->active && !fence_is_signaled(id->active)) {
-		struct amdgpu_vm_id *tmp;
+	if (!amdgpu_sync_is_idle(&id->active)) {
 		struct list_head *head = &adev->vm_manager.ids_lru;
+		struct amdgpu_vm_id *tmp;
 
 		list_for_each_entry_safe(id, tmp, &adev->vm_manager.ids_lru,
 					 list) {
-			if (id->active && fence_is_signaled(id->active)) {
+			if (amdgpu_sync_is_idle(&id->active)) {
 				list_move(&id->list, head);
 				head = &id->list;
 			}
@@ -230,25 +229,27 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 				      list);
 	}
 
-	r = amdgpu_sync_fence(ring->adev, sync, id->active);
-	if (!r) {
-		fence_put(id->active);
-		id->active = fence_get(fence);
+	r = amdgpu_sync_cycle_fences(sync, &id->active, fence);
+	if (r)
+		goto error;
 
-		fence_put(id->flushed_updates);
-		id->flushed_updates = fence_get(updates);
+	fence_put(id->first);
+	id->first = fence_get(fence);
 
-		id->pd_gpu_addr = pd_addr;
+	fence_put(id->flushed_updates);
+	id->flushed_updates = fence_get(updates);
 
-		list_move_tail(&id->list, &adev->vm_manager.ids_lru);
-		atomic_long_set(&id->owner, (long)&vm->ids[ring->idx]);
-		vm->ids[ring->idx] = id;
+	id->pd_gpu_addr = pd_addr;
 
-		*vm_id = id - adev->vm_manager.ids;
-		*vm_pd_addr = pd_addr;
-		trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
-	}
+	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
+	atomic_long_set(&id->owner, (long)id);
+	vm->ids[ring->idx] = id;
 
+	*vm_id = id - adev->vm_manager.ids;
+	*vm_pd_addr = pd_addr;
+	trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
+
+error:
 	mutex_unlock(&adev->vm_manager.lock);
 	return r;
 }
@@ -1467,6 +1468,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	/* skip over VMID 0, since it is the system VM */
 	for (i = 1; i < adev->vm_manager.num_ids; ++i) {
 		amdgpu_vm_reset_id(adev, i);
+		amdgpu_sync_create(&adev->vm_manager.ids[i].active);
 		list_add_tail(&adev->vm_manager.ids[i].list,
 			      &adev->vm_manager.ids_lru);
 	}
@@ -1488,7 +1490,8 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 	for (i = 0; i < AMDGPU_NUM_VM; ++i) {
 		struct amdgpu_vm_id *id = &adev->vm_manager.ids[i];
 
-		fence_put(id->active);
+		fence_put(adev->vm_manager.ids[i].first);
+		amdgpu_sync_free(&adev->vm_manager.ids[i].active);
 		fence_put(id->flushed_updates);
 	}
 }
-- 
cgit v1.2.3


From 41d9eb2c5a2a21c9120e906d077e77562883510e Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 1 Mar 2016 16:46:18 +0100
Subject: drm/amdgpu: add a fence after the VM flush
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This way we can track when the flush is done.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 11 ++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 12 ++++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 26 +++++++++++++++++++++-----
 3 files changed, 35 insertions(+), 14 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 148e2c61463c..66e51f9e593b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -880,6 +880,7 @@ struct amdgpu_vm_id {
 	struct list_head	list;
 	struct fence		*first;
 	struct amdgpu_sync	active;
+	struct fence		*last_flush;
 	atomic_long_t		owner;
 
 	uint64_t		pd_gpu_addr;
@@ -926,11 +927,11 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_sync *sync, struct fence *fence,
 		      unsigned *vm_id, uint64_t *vm_pd_addr);
-void amdgpu_vm_flush(struct amdgpu_ring *ring,
-		     unsigned vm_id, uint64_t pd_addr,
-		     uint32_t gds_base, uint32_t gds_size,
-		     uint32_t gws_base, uint32_t gws_size,
-		     uint32_t oa_base, uint32_t oa_size);
+int amdgpu_vm_flush(struct amdgpu_ring *ring,
+		    unsigned vm_id, uint64_t pd_addr,
+		    uint32_t gds_base, uint32_t gds_size,
+		    uint32_t gws_base, uint32_t gws_size,
+		    uint32_t oa_base, uint32_t oa_size);
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 644336d76aca..83973d051080 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -155,10 +155,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 
 	if (vm) {
 		/* do context switch */
-		amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr,
-				ib->gds_base, ib->gds_size,
-				ib->gws_base, ib->gws_size,
-				ib->oa_base, ib->oa_size);
+		r = amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr,
+				    ib->gds_base, ib->gds_size,
+				    ib->gws_base, ib->gws_size,
+				    ib->oa_base, ib->oa_size);
+		if (r) {
+			amdgpu_ring_undo(ring);
+			return r;
+		}
 
 		if (ring->funcs->emit_hdp_flush)
 			amdgpu_ring_emit_hdp_flush(ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d0cce7c3129a..252445f578f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -236,6 +236,9 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	fence_put(id->first);
 	id->first = fence_get(fence);
 
+	fence_put(id->last_flush);
+	id->last_flush = NULL;
+
 	fence_put(id->flushed_updates);
 	id->flushed_updates = fence_get(updates);
 
@@ -263,11 +266,11 @@ error:
  *
  * Emit a VM flush when it is necessary.
  */
-void amdgpu_vm_flush(struct amdgpu_ring *ring,
-		     unsigned vm_id, uint64_t pd_addr,
-		     uint32_t gds_base, uint32_t gds_size,
-		     uint32_t gws_base, uint32_t gws_size,
-		     uint32_t oa_base, uint32_t oa_size)
+int amdgpu_vm_flush(struct amdgpu_ring *ring,
+		    unsigned vm_id, uint64_t pd_addr,
+		    uint32_t gds_base, uint32_t gds_size,
+		    uint32_t gws_base, uint32_t gws_size,
+		    uint32_t oa_base, uint32_t oa_size)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
@@ -278,14 +281,25 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
 		id->gws_size != gws_size ||
 		id->oa_base != oa_base ||
 		id->oa_size != oa_size);
+	int r;
 
 	if (ring->funcs->emit_pipeline_sync && (
 	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed))
 		amdgpu_ring_emit_pipeline_sync(ring);
 
 	if (pd_addr != AMDGPU_VM_NO_FLUSH) {
+		struct fence *fence;
+
 		trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id);
 		amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr);
+		r = amdgpu_fence_emit(ring, &fence);
+		if (r)
+			return r;
+
+		mutex_lock(&adev->vm_manager.lock);
+		fence_put(id->last_flush);
+		id->last_flush = fence;
+		mutex_unlock(&adev->vm_manager.lock);
 	}
 
 	if (gds_switch_needed) {
@@ -300,6 +314,8 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring,
 					    gws_base, gws_size,
 					    oa_base, oa_size);
 	}
+
+	return 0;
 }
 
 /**
-- 
cgit v1.2.3


From 794f50b95d40bbde905c3c4c514a33fdda54f497 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 9 Mar 2016 22:11:53 +0100
Subject: drm/amdgpu: reuse VMIDs already assigned to a process
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If we don't need to flush we can easily use another VMID
already assigned to the process.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 77 ++++++++++++++++++++--------------
 1 file changed, 46 insertions(+), 31 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 252445f578f6..1425aab31233 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -166,48 +166,63 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 {
 	uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_vm_id *id = vm->ids[ring->idx];
 	struct fence *updates = sync->last_vm_update;
+	struct amdgpu_vm_id *id;
+	unsigned i = ring->idx;
 	int r;
 
 	mutex_lock(&adev->vm_manager.lock);
 
-	/* check if the id is still valid */
-	if (id) {
-		struct fence *flushed = id->flushed_updates;
-		long owner = atomic_long_read(&id->owner);
-		bool usable = pd_addr == id->pd_gpu_addr;
-
-		if (owner != (long)&vm->ids[ring->idx])
-			usable = false;
-		else if (!flushed)
-			usable = false;
-		else if (!updates)
-			usable = true;
-		else
-			usable = !fence_is_later(updates, flushed);
+	/* Check if we can use a VMID already assigned to this VM */
+	do {
+		struct fence *flushed;
 
-		if (usable) {
+		id = vm->ids[i++];
+		if (i == AMDGPU_MAX_RINGS)
+			i = 0;
 
-			r = amdgpu_sync_fence(ring->adev, sync, id->first);
-			if (r)
-				goto error;
+		/* Check all the prerequisites to using this VMID */
+		if (!id)
+			continue;
+
+		if (atomic_long_read(&id->owner) != (long)vm)
+			continue;
+
+		if (pd_addr != id->pd_gpu_addr)
+			continue;
+
+		if (id != vm->ids[ring->idx] &&
+		    (!id->last_flush || !fence_is_signaled(id->last_flush)))
+			continue;
+
+		flushed  = id->flushed_updates;
+		if (updates && (!flushed || fence_is_later(updates, flushed)))
+			continue;
 
-			r = amdgpu_sync_fence(ring->adev, &id->active, fence);
+		/* Good we can use this VMID */
+		if (id == vm->ids[ring->idx]) {
+			r = amdgpu_sync_fence(ring->adev, sync,
+					      id->first);
 			if (r)
 				goto error;
+		}
+
+		/* And remember this submission as user of the VMID */
+		r = amdgpu_sync_fence(ring->adev, &id->active, fence);
+		if (r)
+			goto error;
 
-			list_move_tail(&id->list, &adev->vm_manager.ids_lru);
+		list_move_tail(&id->list, &adev->vm_manager.ids_lru);
+		vm->ids[ring->idx] = id;
 
-			*vm_id = id - adev->vm_manager.ids;
-			*vm_pd_addr = AMDGPU_VM_NO_FLUSH;
-			trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id,
-						*vm_pd_addr);
+		*vm_id = id - adev->vm_manager.ids;
+		*vm_pd_addr = AMDGPU_VM_NO_FLUSH;
+		trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
 
-			mutex_unlock(&adev->vm_manager.lock);
-			return 0;
-		}
-	}
+		mutex_unlock(&adev->vm_manager.lock);
+		return 0;
+
+	} while (i != ring->idx);
 
 	id = list_first_entry(&adev->vm_manager.ids_lru,
 			      struct amdgpu_vm_id,
@@ -245,7 +260,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	id->pd_gpu_addr = pd_addr;
 
 	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
-	atomic_long_set(&id->owner, (long)id);
+	atomic_long_set(&id->owner, (long)vm);
 	vm->ids[ring->idx] = id;
 
 	*vm_id = id - adev->vm_manager.ids;
@@ -1464,7 +1479,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		if (!id)
 			continue;
 
-		atomic_long_cmpxchg(&id->owner, (long)&vm->ids[i], 0);
+		atomic_long_cmpxchg(&id->owner, (long)vm, 0);
 	}
 }
 
-- 
cgit v1.2.3


From 68befebee4927036b0f350825a20d46104f375b5 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Thu, 14 Apr 2016 13:42:32 +0800
Subject: drm/amdgpu: only update last_flush when vmid doesn't have other new
 owner
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 ++++++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4a2b9a37271d..e4e781658cf1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -881,6 +881,7 @@ struct amdgpu_vm_id {
 	struct fence		*first;
 	struct amdgpu_sync	active;
 	struct fence		*last_flush;
+	struct amdgpu_ring      *last_user;
 	atomic_long_t		owner;
 
 	uint64_t		pd_gpu_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1425aab31233..2aff938f94c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -260,6 +260,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	id->pd_gpu_addr = pd_addr;
 
 	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
+	id->last_user = ring;
 	atomic_long_set(&id->owner, (long)vm);
 	vm->ids[ring->idx] = id;
 
@@ -307,13 +308,17 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 
 		trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id);
 		amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr);
-		r = amdgpu_fence_emit(ring, &fence);
-		if (r)
-			return r;
 
 		mutex_lock(&adev->vm_manager.lock);
-		fence_put(id->last_flush);
-		id->last_flush = fence;
+		if ((id->pd_gpu_addr == pd_addr) && (id->last_user == ring)) {
+			r = amdgpu_fence_emit(ring, &fence);
+			if (r) {
+				mutex_unlock(&adev->vm_manager.lock);
+				return r;
+			}
+			fence_put(id->last_flush);
+			id->last_flush = fence;
+		}
 		mutex_unlock(&adev->vm_manager.lock);
 	}
 
-- 
cgit v1.2.3


From 178d7cb8d576ff837fea8e1b6460c769297615ec Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Thu, 14 Apr 2016 15:53:55 +0800
Subject: drm/amdgpu: fix error checking when reuse vmid on same ring
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 2aff938f94c6..856116a874bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -191,7 +191,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (pd_addr != id->pd_gpu_addr)
 			continue;
 
-		if (id != vm->ids[ring->idx] &&
+		if (id->last_user != ring &&
 		    (!id->last_flush || !fence_is_signaled(id->last_flush)))
 			continue;
 
@@ -200,7 +200,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 			continue;
 
 		/* Good we can use this VMID */
-		if (id == vm->ids[ring->idx]) {
+		if (id->last_user == ring) {
 			r = amdgpu_sync_fence(ring->adev, sync,
 					      id->first);
 			if (r)
-- 
cgit v1.2.3


From 444066b915c1b9d0aa4ec7b2d2bbe627e08bf7a6 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Mon, 25 Apr 2016 10:28:24 +0800
Subject: drm/amdgpu: fix wrong release of vmid owner

The release of the vmid owner was not handled
correctly.  We need to take the lock and walk
the lru list.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Monk Liu <monk.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 856116a874bb..e06d0661549f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1454,6 +1454,7 @@ error_free_sched_entity:
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
 	struct amdgpu_bo_va_mapping *mapping, *tmp;
+	struct amdgpu_vm_id *id, *id_tmp;
 	int i;
 
 	amd_sched_entity_fini(vm->entity.sched, &vm->entity);
@@ -1478,14 +1479,17 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	amdgpu_bo_unref(&vm->page_directory);
 	fence_put(vm->page_directory_fence);
 
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct amdgpu_vm_id *id = vm->ids[i];
-
+	mutex_lock(&adev->vm_manager.lock);
+	list_for_each_entry_safe(id, id_tmp, &adev->vm_manager.ids_lru,
+				 list) {
 		if (!id)
 			continue;
-
-		atomic_long_cmpxchg(&id->owner, (long)vm, 0);
+		if (atomic_long_read(&id->owner) == (long)vm) {
+			atomic_long_set(&id->owner, 0);
+			id->pd_gpu_addr = 0;
+		}
 	}
+	mutex_unlock(&adev->vm_manager.lock);
 }
 
 /**
-- 
cgit v1.2.3


From 031e2983e8e385b9c99367586decabf6323ae049 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Mon, 25 Apr 2016 10:19:13 +0800
Subject: drm/amdgpu: add client id for every vm

This adds a unique id for each vm client so we can
properly track them.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Monk Liu <monk.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 6 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 113fd4bf9b64..e72cf4518c30 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -363,6 +363,7 @@ struct amdgpu_fence_driver {
 /* some special values for the owner field */
 #define AMDGPU_FENCE_OWNER_UNDEFINED	((void*)0ul)
 #define AMDGPU_FENCE_OWNER_VM		((void*)1ul)
+#define AMDGPU_CLIENT_ID_RESERVED       2
 
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
@@ -885,6 +886,9 @@ struct amdgpu_vm {
 
 	/* Scheduler entity for page table updates */
 	struct amd_sched_entity	entity;
+
+	/* client id */
+	u64                     client_id;
 };
 
 struct amdgpu_vm_id {
@@ -924,6 +928,8 @@ struct amdgpu_vm_manager {
 	struct amdgpu_ring                      *vm_pte_rings[AMDGPU_MAX_RINGS];
 	unsigned				vm_pte_num_rings;
 	atomic_t				vm_pte_next_ring;
+	/* client id counter */
+	atomic64_t				client_counter;
 };
 
 void amdgpu_vm_manager_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index e06d0661549f..275378c46b9f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1386,6 +1386,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
 		vm->ids[i] = NULL;
 	vm->va = RB_ROOT;
+	vm->client_id = atomic64_inc_return(&adev->vm_manager.client_counter);
 	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->invalidated);
 	INIT_LIST_HEAD(&vm->cleared);
@@ -1514,6 +1515,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	}
 
 	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
+	atomic64_set(&adev->vm_manager.client_counter, AMDGPU_CLIENT_ID_RESERVED);
 }
 
 /**
-- 
cgit v1.2.3


From 1f207f81e371bed8aa4c898de091d5cfbf68f59e Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Mon, 25 Apr 2016 10:23:34 +0800
Subject: drm/amdgpu: make vmid owner be client_id

Using the pointer is not adequate.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Monk Liu <monk.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 275378c46b9f..2c3d9557e1a2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -185,7 +185,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (!id)
 			continue;
 
-		if (atomic_long_read(&id->owner) != (long)vm)
+		if (atomic_long_read(&id->owner) != vm->client_id)
 			continue;
 
 		if (pd_addr != id->pd_gpu_addr)
@@ -261,7 +261,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 
 	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
 	id->last_user = ring;
-	atomic_long_set(&id->owner, (long)vm);
+	atomic_long_set(&id->owner, vm->client_id);
 	vm->ids[ring->idx] = id;
 
 	*vm_id = id - adev->vm_manager.ids;
@@ -1485,7 +1485,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 				 list) {
 		if (!id)
 			continue;
-		if (atomic_long_read(&id->owner) == (long)vm) {
+		if (atomic_long_read(&id->owner) == vm->client_id) {
 			atomic_long_set(&id->owner, 0);
 			id->pd_gpu_addr = 0;
 		}
-- 
cgit v1.2.3


From c5637837ba5d5b5e962e73f5a1a7c5456fa85a68 Mon Sep 17 00:00:00 2001
From: Monk Liu <Monk.Liu@amd.com>
Date: Tue, 19 Apr 2016 20:11:32 +0800
Subject: drm/amdgpu: keep vm in job instead of ib (v2)

ib.vm is a legacy way to get vm, after scheduler
implemented vm should be get from job, and all ibs
from one job share the same vm, no need to keep ib.vm
just move vm field to job.

this patch as well add job as paramter to ib_schedule
so it can get vm from job->vm.

v2: agd: sqaush in:
drm/amdgpu: check if ring emit_vm_flush exists in vm flush

No vm flush on engines that don't support VM.

bug:
https://bugs.freedesktop.org/show_bug.cgi?id=95195

Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  3 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c  | 16 ++++------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c |  9 +++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  |  3 ++-
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   |  4 ++--
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c  |  2 +-
 12 files changed, 25 insertions(+), 30 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e72cf4518c30..959008ad65a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -740,7 +740,6 @@ struct amdgpu_ib {
 	uint64_t			gpu_addr;
 	uint32_t			*ptr;
 	struct amdgpu_user_fence        *user;
-	struct amdgpu_vm		*vm;
 	unsigned			vm_id;
 	uint64_t			vm_pd_addr;
 	struct amdgpu_ctx		*ctx;
@@ -763,7 +762,7 @@ enum amdgpu_ring_type {
 extern const struct amd_sched_backend_ops amdgpu_sched_ops;
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
-		     struct amdgpu_job **job);
+		     struct amdgpu_job **job, struct amdgpu_vm *vm);
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 			     struct amdgpu_job **job);
 
@@ -1191,7 +1190,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f);
 int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		       struct amdgpu_ib *ib, struct fence *last_vm_update,
-		       struct fence **f);
+		       struct amdgpu_job *job, struct fence **f);
 int amdgpu_ib_pool_init(struct amdgpu_device *adev);
 void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
 int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
@@ -1247,6 +1246,7 @@ struct amdgpu_cs_parser {
 struct amdgpu_job {
 	struct amd_sched_job    base;
 	struct amdgpu_device	*adev;
+	struct amdgpu_vm 	*vm;
 	struct amdgpu_ring	*ring;
 	struct amdgpu_sync	sync;
 	struct amdgpu_ib	*ibs;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 2ebba295d0e4..1a065961981a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -120,6 +120,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
 	union drm_amdgpu_cs *cs = data;
 	uint64_t *chunk_array_user;
 	uint64_t *chunk_array;
@@ -214,7 +215,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 		}
 	}
 
-	ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job);
+	ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm);
 	if (ret)
 		goto free_all_kdata;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 0129617a7962..0ed643036361 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -74,7 +74,6 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
 	}
 
-	ib->vm = vm;
 	ib->vm_id = 0;
 
 	return 0;
@@ -117,13 +116,13 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fen
  */
 int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		       struct amdgpu_ib *ibs, struct fence *last_vm_update,
-		       struct fence **f)
+		       struct amdgpu_job *job, struct fence **f)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib *ib = &ibs[0];
 	struct amdgpu_ctx *ctx, *old_ctx;
-	struct amdgpu_vm *vm;
 	struct fence *hwf;
+	struct amdgpu_vm *vm = NULL;
 	unsigned i, patch_offset = ~0;
 
 	int r = 0;
@@ -132,7 +131,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		return -EINVAL;
 
 	ctx = ibs->ctx;
-	vm = ibs->vm;
+	if (job) /* for domain0 job like ring test, ibs->job is not assigned */
+		vm = job->vm;
 
 	if (!ring->ready) {
 		dev_err(adev->dev, "couldn't schedule ib\n");
@@ -174,14 +174,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	old_ctx = ring->current_ctx;
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
-
-		if (ib->ctx != ctx || ib->vm != vm) {
-			ring->current_ctx = old_ctx;
-			if (ib->vm_id)
-				amdgpu_vm_reset_id(adev, ib->vm_id);
-			amdgpu_ring_undo(ring);
-			return -EINVAL;
-		}
 		amdgpu_ring_emit_ib(ring, ib);
 		ring->current_ctx = ctx;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4eea2a18d8bb..917c6f3bfa09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -46,7 +46,7 @@ void amdgpu_job_timeout_func(struct work_struct *work)
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
-		     struct amdgpu_job **job)
+		     struct amdgpu_job **job, struct amdgpu_vm *vm)
 {
 	size_t size = sizeof(struct amdgpu_job);
 
@@ -60,6 +60,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 		return -ENOMEM;
 
 	(*job)->adev = adev;
+	(*job)->vm = vm;
 	(*job)->ibs = (void *)&(*job)[1];
 	(*job)->num_ibs = num_ibs;
 	INIT_WORK(&(*job)->base.work_free_job, amdgpu_job_free_handler);
@@ -74,7 +75,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 {
 	int r;
 
-	r = amdgpu_job_alloc(adev, 1, job);
+	r = amdgpu_job_alloc(adev, 1, job, NULL);
 	if (r)
 		return r;
 
@@ -138,7 +139,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 {
 	struct amdgpu_job *job = to_amdgpu_job(sched_job);
-	struct amdgpu_vm *vm = job->ibs->vm;
+	struct amdgpu_vm *vm = job->vm;
 
 	struct fence *fence = amdgpu_sync_get_fence(&job->sync);
 
@@ -186,7 +187,7 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 
 	trace_amdgpu_sched_run_job(job);
 	r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs,
-			       job->sync.last_vm_update, &fence);
+			       job->sync.last_vm_update, job, &fence);
 	if (r) {
 		DRM_ERROR("Error scheduling IBs (%d)\n", r);
 		goto err;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index abda242980ba..3f953759002f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -910,7 +910,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	ib->length_dw = 16;
 
 	if (direct) {
-		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+		r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
 		job->fence = f;
 		if (r)
 			goto err_free;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 22a4d96fedb7..79ba2aae0d7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -436,7 +436,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 	for (i = ib->length_dw; i < ib_size_dw; ++i)
 		ib->ptr[i] = 0x0;
 
-	r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
 	job->fence = f;
 	if (r)
 		goto err;
@@ -498,7 +498,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 		ib->ptr[i] = 0x0;
 
 	if (direct) {
-		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+		r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
 		job->fence = f;
 		if (r)
 			goto err;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 2c3d9557e1a2..692d0d02b644 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -303,7 +303,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed))
 		amdgpu_ring_emit_pipeline_sync(ring);
 
-	if (pd_addr != AMDGPU_VM_NO_FLUSH) {
+	if (ring->funcs->emit_vm_flush &&
+	    pd_addr != AMDGPU_VM_NO_FLUSH) {
 		struct fence *fence;
 
 		trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index b7ed9d376001..8d69c6555e02 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -643,7 +643,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[3] = 1;
 	ib.ptr[4] = 0xDEADBEEF;
 	ib.length_dw = 5;
-	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
 	if (r)
 		goto err1;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 6686c9c3005d..03108909a275 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2136,7 +2136,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[2] = 0xDEADBEEF;
 	ib.length_dw = 3;
 
-	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
 	if (r)
 		goto err2;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 021c17e50d51..a82945f3a5d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -800,7 +800,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[2] = 0xDEADBEEF;
 	ib.length_dw = 3;
 
-	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
 	if (r)
 		goto err2;
 
@@ -1551,7 +1551,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
 	ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
 
 	/* shedule the ib on the ring */
-	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
 	if (r) {
 		DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
 		goto fail;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index e6d3544fda06..27ca46d16bc4 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -701,7 +701,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
 	ib.length_dw = 8;
 
-	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
 	if (r)
 		goto err1;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 00b43700c956..278b1fe35385 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -925,7 +925,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
 	ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
 	ib.length_dw = 8;
 
-	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
+	r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
 	if (r)
 		goto err1;
 
-- 
cgit v1.2.3


From fe707664879b15b9caafd3b9f0f9897aa26a7edd Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Wed, 27 Apr 2016 18:07:41 +0800
Subject: drm/amdgpu: add pipeline sync for compute job
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

hardware ring is async processed, the job is executed in parallel.
In some case, this will result vm fault, like jobs with different vmids.

This works around a CPC hw issue which will eventually be fixed in fw.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Monk Liu <monk.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 692d0d02b644..62ce7253e917 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -300,7 +300,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 	int r;
 
 	if (ring->funcs->emit_pipeline_sync && (
-	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed))
+	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed ||
+		    ring->type == AMDGPU_RING_TYPE_COMPUTE))
 		amdgpu_ring_emit_pipeline_sync(ring);
 
 	if (ring->funcs->emit_vm_flush &&
-- 
cgit v1.2.3


From 0ea54b9b6c4ebc04cc6f68246b03577a25dbd4bb Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 4 May 2016 10:20:01 +0200
Subject: drm/amdgpu: make the VMID owner always 64bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Otherwise we could (in theory) run into problems on 32bit systems.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e42b0a357fb8..130c0a7c65d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -897,7 +897,7 @@ struct amdgpu_vm_id {
 	struct amdgpu_sync	active;
 	struct fence		*last_flush;
 	struct amdgpu_ring      *last_user;
-	atomic_long_t		owner;
+	atomic64_t		owner;
 
 	uint64_t		pd_gpu_addr;
 	/* last flushed PD/PT update */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 62ce7253e917..cd578987d6c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -185,7 +185,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (!id)
 			continue;
 
-		if (atomic_long_read(&id->owner) != vm->client_id)
+		if (atomic64_read(&id->owner) != vm->client_id)
 			continue;
 
 		if (pd_addr != id->pd_gpu_addr)
@@ -261,7 +261,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 
 	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
 	id->last_user = ring;
-	atomic_long_set(&id->owner, vm->client_id);
+	atomic64_set(&id->owner, vm->client_id);
 	vm->ids[ring->idx] = id;
 
 	*vm_id = id - adev->vm_manager.ids;
-- 
cgit v1.2.3


From 79aa03893df937967201b6ea34e8d4216b893a38 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 4 May 2016 10:33:11 +0200
Subject: drm/amdgpu: remove owner cleanup v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The client ID is now unique, so no need to resert the owner fields any more.

v2: remove unused variables as well

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com> (v1)
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 -------------
 1 file changed, 13 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index cd578987d6c0..67f6c2eb8282 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1457,7 +1457,6 @@ error_free_sched_entity:
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
 	struct amdgpu_bo_va_mapping *mapping, *tmp;
-	struct amdgpu_vm_id *id, *id_tmp;
 	int i;
 
 	amd_sched_entity_fini(vm->entity.sched, &vm->entity);
@@ -1481,18 +1480,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 
 	amdgpu_bo_unref(&vm->page_directory);
 	fence_put(vm->page_directory_fence);
-
-	mutex_lock(&adev->vm_manager.lock);
-	list_for_each_entry_safe(id, id_tmp, &adev->vm_manager.ids_lru,
-				 list) {
-		if (!id)
-			continue;
-		if (atomic_long_read(&id->owner) == vm->client_id) {
-			atomic_long_set(&id->owner, 0);
-			id->pd_gpu_addr = 0;
-		}
-	}
-	mutex_unlock(&adev->vm_manager.lock);
 }
 
 /**
-- 
cgit v1.2.3


From b1c8a81fdd346274e3c38909740eec7182ef8f8a Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 4 May 2016 10:34:03 +0200
Subject: drm/amdgpu: remove define for reserved client ID
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Just set it to zero instead.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 130c0a7c65d1..0ab5fcc72273 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -363,7 +363,6 @@ struct amdgpu_fence_driver {
 /* some special values for the owner field */
 #define AMDGPU_FENCE_OWNER_UNDEFINED	((void*)0ul)
 #define AMDGPU_FENCE_OWNER_VM		((void*)1ul)
-#define AMDGPU_CLIENT_ID_RESERVED       2
 
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 67f6c2eb8282..ea708cb94862 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1504,7 +1504,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	}
 
 	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
-	atomic64_set(&adev->vm_manager.client_counter, AMDGPU_CLIENT_ID_RESERVED);
+	atomic64_set(&adev->vm_manager.client_counter, 0);
 }
 
 /**
-- 
cgit v1.2.3


From f4833c4fd4d76ddee9e647814a64251801844db7 Mon Sep 17 00:00:00 2001
From: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Date: Thu, 21 Apr 2016 10:40:18 -0400
Subject: drm/amdgpu: Encapsulate some VM table update parameters (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bundle some VM table parameters into amdgpu_vm_update_params structure,
so that number of function parameters can be reduced. Only structural
change, no logic change.

v2: agd: squash in fix from Harish

Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 111 ++++++++++++++++++---------------
 1 file changed, 62 insertions(+), 49 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ea708cb94862..9f36ed30ba11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -53,6 +53,18 @@
 /* Special value that no flush is necessary */
 #define AMDGPU_VM_NO_FLUSH (~0ll)
 
+/* Local structure. Encapsulate some VM table update parameters to reduce
+ * the number of function parameters
+ */
+struct amdgpu_vm_update_params {
+	/* address where to copy page table entries from */
+	uint64_t src;
+	/* DMA addresses to use for mapping */
+	dma_addr_t *pages_addr;
+	/* indirect buffer to fill with commands */
+	struct amdgpu_ib *ib;
+};
+
 /**
  * amdgpu_vm_num_pde - return the number of page directory entries
  *
@@ -389,9 +401,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
  * amdgpu_vm_update_pages - helper to call the right asic function
  *
  * @adev: amdgpu_device pointer
- * @src: address where to copy page table entries from
- * @pages_addr: DMA addresses to use for mapping
- * @ib: indirect buffer to fill with commands
+ * @vm_update_params: see amdgpu_vm_update_params definition
  * @pe: addr of the page entry
  * @addr: dst addr to write into pe
  * @count: number of page entries to update
@@ -402,29 +412,29 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
  * to setup the page table using the DMA.
  */
 static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
-				   uint64_t src,
-				   dma_addr_t *pages_addr,
-				   struct amdgpu_ib *ib,
+				   struct amdgpu_vm_update_params
+					*vm_update_params,
 				   uint64_t pe, uint64_t addr,
 				   unsigned count, uint32_t incr,
 				   uint32_t flags)
 {
 	trace_amdgpu_vm_set_page(pe, addr, count, incr, flags);
 
-	if (src) {
-		src += (addr >> 12) * 8;
-		amdgpu_vm_copy_pte(adev, ib, pe, src, count);
+	if (vm_update_params->src) {
+		amdgpu_vm_copy_pte(adev, vm_update_params->ib,
+			pe, (vm_update_params->src + (addr >> 12) * 8), count);
 
-	} else if (pages_addr) {
-		amdgpu_vm_write_pte(adev, ib, pages_addr, pe, addr,
-				    count, incr, flags);
+	} else if (vm_update_params->pages_addr) {
+		amdgpu_vm_write_pte(adev, vm_update_params->ib,
+			vm_update_params->pages_addr,
+			pe, addr, count, incr, flags);
 
 	} else if (count < 3) {
-		amdgpu_vm_write_pte(adev, ib, NULL, pe, addr,
+		amdgpu_vm_write_pte(adev, vm_update_params->ib, NULL, pe, addr,
 				    count, incr, flags);
 
 	} else {
-		amdgpu_vm_set_pte_pde(adev, ib, pe, addr,
+		amdgpu_vm_set_pte_pde(adev, vm_update_params->ib, pe, addr,
 				      count, incr, flags);
 	}
 }
@@ -444,10 +454,12 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	struct amdgpu_ring *ring;
 	struct fence *fence = NULL;
 	struct amdgpu_job *job;
+	struct amdgpu_vm_update_params vm_update_params;
 	unsigned entries;
 	uint64_t addr;
 	int r;
 
+	memset(&vm_update_params, 0, sizeof(vm_update_params));
 	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 
 	r = reservation_object_reserve_shared(bo->tbo.resv);
@@ -465,7 +477,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	if (r)
 		goto error;
 
-	amdgpu_vm_update_pages(adev, 0, NULL, &job->ibs[0], addr, 0, entries,
+	vm_update_params.ib = &job->ibs[0];
+	amdgpu_vm_update_pages(adev, &vm_update_params, addr, 0, entries,
 			       0, 0);
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 
@@ -538,11 +551,12 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 	uint64_t last_pde = ~0, last_pt = ~0;
 	unsigned count = 0, pt_idx, ndw;
 	struct amdgpu_job *job;
-	struct amdgpu_ib *ib;
+	struct amdgpu_vm_update_params vm_update_params;
 	struct fence *fence = NULL;
 
 	int r;
 
+	memset(&vm_update_params, 0, sizeof(vm_update_params));
 	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
 
 	/* padding, etc. */
@@ -555,7 +569,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 	if (r)
 		return r;
 
-	ib = &job->ibs[0];
+	vm_update_params.ib = &job->ibs[0];
 
 	/* walk over the address space and update the page directory */
 	for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
@@ -575,7 +589,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 		    ((last_pt + incr * count) != pt)) {
 
 			if (count) {
-				amdgpu_vm_update_pages(adev, 0, NULL, ib,
+				amdgpu_vm_update_pages(adev, &vm_update_params,
 						       last_pde, last_pt,
 						       count, incr,
 						       AMDGPU_PTE_VALID);
@@ -590,14 +604,15 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 	}
 
 	if (count)
-		amdgpu_vm_update_pages(adev, 0, NULL, ib, last_pde, last_pt,
-				       count, incr, AMDGPU_PTE_VALID);
+		amdgpu_vm_update_pages(adev, &vm_update_params,
+					last_pde, last_pt,
+					count, incr, AMDGPU_PTE_VALID);
 
-	if (ib->length_dw != 0) {
-		amdgpu_ring_pad_ib(ring, ib);
+	if (vm_update_params.ib->length_dw != 0) {
+		amdgpu_ring_pad_ib(ring, vm_update_params.ib);
 		amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv,
 				 AMDGPU_FENCE_OWNER_VM);
-		WARN_ON(ib->length_dw > ndw);
+		WARN_ON(vm_update_params.ib->length_dw > ndw);
 		r = amdgpu_job_submit(job, ring, &vm->entity,
 				      AMDGPU_FENCE_OWNER_VM, &fence);
 		if (r)
@@ -623,18 +638,15 @@ error_free:
  * amdgpu_vm_frag_ptes - add fragment information to PTEs
  *
  * @adev: amdgpu_device pointer
- * @src: address where to copy page table entries from
- * @pages_addr: DMA addresses to use for mapping
- * @ib: IB for the update
+ * @vm_update_params: see amdgpu_vm_update_params definition
  * @pe_start: first PTE to handle
  * @pe_end: last PTE to handle
  * @addr: addr those PTEs should point to
  * @flags: hw mapping flags
  */
 static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
-				uint64_t src,
-				dma_addr_t *pages_addr,
-				struct amdgpu_ib *ib,
+				struct amdgpu_vm_update_params
+					*vm_update_params,
 				uint64_t pe_start, uint64_t pe_end,
 				uint64_t addr, uint32_t flags)
 {
@@ -671,11 +683,11 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
 		return;
 
 	/* system pages are non continuously */
-	if (src || pages_addr || !(flags & AMDGPU_PTE_VALID) ||
-	    (frag_start >= frag_end)) {
+	if (vm_update_params->src || vm_update_params->pages_addr ||
+		!(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
 
 		count = (pe_end - pe_start) / 8;
-		amdgpu_vm_update_pages(adev, src, pages_addr, ib, pe_start,
+		amdgpu_vm_update_pages(adev, vm_update_params, pe_start,
 				       addr, count, AMDGPU_GPU_PAGE_SIZE,
 				       flags);
 		return;
@@ -684,21 +696,21 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
 	/* handle the 4K area at the beginning */
 	if (pe_start != frag_start) {
 		count = (frag_start - pe_start) / 8;
-		amdgpu_vm_update_pages(adev, 0, NULL, ib, pe_start, addr,
+		amdgpu_vm_update_pages(adev, vm_update_params, pe_start, addr,
 				       count, AMDGPU_GPU_PAGE_SIZE, flags);
 		addr += AMDGPU_GPU_PAGE_SIZE * count;
 	}
 
 	/* handle the area in the middle */
 	count = (frag_end - frag_start) / 8;
-	amdgpu_vm_update_pages(adev, 0, NULL, ib, frag_start, addr, count,
+	amdgpu_vm_update_pages(adev, vm_update_params, frag_start, addr, count,
 			       AMDGPU_GPU_PAGE_SIZE, flags | frag_flags);
 
 	/* handle the 4K area at the end */
 	if (frag_end != pe_end) {
 		addr += AMDGPU_GPU_PAGE_SIZE * count;
 		count = (pe_end - frag_end) / 8;
-		amdgpu_vm_update_pages(adev, 0, NULL, ib, frag_end, addr,
+		amdgpu_vm_update_pages(adev, vm_update_params, frag_end, addr,
 				       count, AMDGPU_GPU_PAGE_SIZE, flags);
 	}
 }
@@ -707,8 +719,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
  * amdgpu_vm_update_ptes - make sure that page tables are valid
  *
  * @adev: amdgpu_device pointer
- * @src: address where to copy page table entries from
- * @pages_addr: DMA addresses to use for mapping
+ * @vm_update_params: see amdgpu_vm_update_params definition
  * @vm: requested vm
  * @start: start of GPU address range
  * @end: end of GPU address range
@@ -718,10 +729,9 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
  * Update the page tables in the range @start - @end.
  */
 static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
-				  uint64_t src,
-				  dma_addr_t *pages_addr,
+				  struct amdgpu_vm_update_params
+					*vm_update_params,
 				  struct amdgpu_vm *vm,
-				  struct amdgpu_ib *ib,
 				  uint64_t start, uint64_t end,
 				  uint64_t dst, uint32_t flags)
 {
@@ -747,7 +757,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 
 		if (last_pe_end != pe_start) {
 
-			amdgpu_vm_frag_ptes(adev, src, pages_addr, ib,
+			amdgpu_vm_frag_ptes(adev, vm_update_params,
 					    last_pe_start, last_pe_end,
 					    last_dst, flags);
 
@@ -762,7 +772,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}
 
-	amdgpu_vm_frag_ptes(adev, src, pages_addr, ib, last_pe_start,
+	amdgpu_vm_frag_ptes(adev, vm_update_params, last_pe_start,
 			    last_pe_end, last_dst, flags);
 }
 
@@ -794,11 +804,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	void *owner = AMDGPU_FENCE_OWNER_VM;
 	unsigned nptes, ncmds, ndw;
 	struct amdgpu_job *job;
-	struct amdgpu_ib *ib;
+	struct amdgpu_vm_update_params vm_update_params;
 	struct fence *f = NULL;
 	int r;
 
 	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+	memset(&vm_update_params, 0, sizeof(vm_update_params));
+	vm_update_params.src = src;
+	vm_update_params.pages_addr = pages_addr;
 
 	/* sync to everything on unmapping */
 	if (!(flags & AMDGPU_PTE_VALID))
@@ -815,11 +828,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	/* padding, etc. */
 	ndw = 64;
 
-	if (src) {
+	if (vm_update_params.src) {
 		/* only copy commands needed */
 		ndw += ncmds * 7;
 
-	} else if (pages_addr) {
+	} else if (vm_update_params.pages_addr) {
 		/* header for write data commands */
 		ndw += ncmds * 4;
 
@@ -838,7 +851,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		return r;
 
-	ib = &job->ibs[0];
+	vm_update_params.ib = &job->ibs[0];
 
 	r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv,
 			     owner);
@@ -849,11 +862,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_free;
 
-	amdgpu_vm_update_ptes(adev, src, pages_addr, vm, ib, start,
+	amdgpu_vm_update_ptes(adev, &vm_update_params, vm, start,
 			      last + 1, addr, flags);
 
-	amdgpu_ring_pad_ib(ring, ib);
-	WARN_ON(ib->length_dw > ndw);
+	amdgpu_ring_pad_ib(ring, vm_update_params.ib);
+	WARN_ON(vm_update_params.ib->length_dw > ndw);
 	r = amdgpu_job_submit(job, ring, &vm->entity,
 			      AMDGPU_FENCE_OWNER_VM, &f);
 	if (r)
-- 
cgit v1.2.3


From 2ba272d7bde27e1db2cf1c6cee49b01b7ea08989 Mon Sep 17 00:00:00 2001
From: Chunming Zhou <David1.Zhou@amd.com>
Date: Wed, 27 Apr 2016 18:07:41 +0800
Subject: drm/amdgpu: add pipeline sync while vmid switch in same ctx

Since vmid-mgr supports vmid sharing in one vm, the same ctx could
get different vmids for two emits without vm flush, vm_flush could
be done in another ring.

Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 4 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 9 +++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++---
 3 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 992f00b65be4..01c36b8d6222 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -799,6 +799,7 @@ struct amdgpu_ring {
 	unsigned		cond_exe_offs;
 	u64				cond_exe_gpu_addr;
 	volatile u32	*cond_exe_cpu_addr;
+	int                     vmid;
 };
 
 /*
@@ -936,7 +937,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 		    unsigned vm_id, uint64_t pd_addr,
 		    uint32_t gds_base, uint32_t gds_size,
 		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size);
+		    uint32_t oa_base, uint32_t oa_size,
+		    bool vmid_switch);
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 34e35423b78e..7a0b1e50f293 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -122,6 +122,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	bool skip_preamble, need_ctx_switch;
 	unsigned patch_offset = ~0;
 	struct amdgpu_vm *vm;
+	int vmid = 0, old_vmid = ring->vmid;
 	struct fence *hwf;
 	uint64_t ctx;
 
@@ -135,9 +136,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (job) {
 		vm = job->vm;
 		ctx = job->ctx;
+		vmid = job->vm_id;
 	} else {
 		vm = NULL;
 		ctx = 0;
+		vmid = 0;
 	}
 
 	if (!ring->ready) {
@@ -163,7 +166,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr,
 				    job->gds_base, job->gds_size,
 				    job->gws_base, job->gws_size,
-				    job->oa_base, job->oa_size);
+				    job->oa_base, job->oa_size,
+				    (ring->current_ctx == ctx) && (old_vmid != vmid));
 		if (r) {
 			amdgpu_ring_undo(ring);
 			return r;
@@ -180,7 +184,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	need_ctx_switch = ring->current_ctx != ctx;
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
-
 		/* drop preamble IBs if we don't have a context switch */
 		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
 			continue;
@@ -188,6 +191,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
 				    need_ctx_switch);
 		need_ctx_switch = false;
+		ring->vmid = vmid;
 	}
 
 	if (ring->funcs->emit_hdp_invalidate)
@@ -198,6 +202,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		if (job && job->vm_id)
 			amdgpu_vm_reset_id(adev, job->vm_id);
+		ring->vmid = old_vmid;
 		amdgpu_ring_undo(ring);
 		return r;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9f36ed30ba11..62a4c127620f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -298,7 +298,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 		    unsigned vm_id, uint64_t pd_addr,
 		    uint32_t gds_base, uint32_t gds_size,
 		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size)
+		    uint32_t oa_base, uint32_t oa_size,
+		    bool vmid_switch)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
@@ -312,8 +313,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 	int r;
 
 	if (ring->funcs->emit_pipeline_sync && (
-	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed ||
-		    ring->type == AMDGPU_RING_TYPE_COMPUTE))
+	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || vmid_switch))
 		amdgpu_ring_emit_pipeline_sync(ring);
 
 	if (ring->funcs->emit_vm_flush &&
-- 
cgit v1.2.3


From 7c4021d403ca72ce52d39c17d8154974521a82be Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Mon, 13 Jun 2016 18:59:17 -0400
Subject: Revert "drm/amdgpu: add pipeline sync while vmid switch in same ctx"

This reverts commit 2ba272d7bde27e1db2cf1c6cee49b01b7ea08989.

The issue fixed by this patch is specific to compute rings and the
previous patch was enough.  Additionally, this patch as been traced
to strange behavior on some CZ systems so we might as well drop it.
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 4 +---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 9 ++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 +++---
 3 files changed, 6 insertions(+), 13 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 70af26d97d28..e055d5be1c3c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -799,7 +799,6 @@ struct amdgpu_ring {
 	unsigned		cond_exe_offs;
 	u64				cond_exe_gpu_addr;
 	volatile u32	*cond_exe_cpu_addr;
-	int                     vmid;
 };
 
 /*
@@ -937,8 +936,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 		    unsigned vm_id, uint64_t pd_addr,
 		    uint32_t gds_base, uint32_t gds_size,
 		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size,
-		    bool vmid_switch);
+		    uint32_t oa_base, uint32_t oa_size);
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 7a0b1e50f293..34e35423b78e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -122,7 +122,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	bool skip_preamble, need_ctx_switch;
 	unsigned patch_offset = ~0;
 	struct amdgpu_vm *vm;
-	int vmid = 0, old_vmid = ring->vmid;
 	struct fence *hwf;
 	uint64_t ctx;
 
@@ -136,11 +135,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (job) {
 		vm = job->vm;
 		ctx = job->ctx;
-		vmid = job->vm_id;
 	} else {
 		vm = NULL;
 		ctx = 0;
-		vmid = 0;
 	}
 
 	if (!ring->ready) {
@@ -166,8 +163,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr,
 				    job->gds_base, job->gds_size,
 				    job->gws_base, job->gws_size,
-				    job->oa_base, job->oa_size,
-				    (ring->current_ctx == ctx) && (old_vmid != vmid));
+				    job->oa_base, job->oa_size);
 		if (r) {
 			amdgpu_ring_undo(ring);
 			return r;
@@ -184,6 +180,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	need_ctx_switch = ring->current_ctx != ctx;
 	for (i = 0; i < num_ibs; ++i) {
 		ib = &ibs[i];
+
 		/* drop preamble IBs if we don't have a context switch */
 		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
 			continue;
@@ -191,7 +188,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
 				    need_ctx_switch);
 		need_ctx_switch = false;
-		ring->vmid = vmid;
 	}
 
 	if (ring->funcs->emit_hdp_invalidate)
@@ -202,7 +198,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		if (job && job->vm_id)
 			amdgpu_vm_reset_id(adev, job->vm_id);
-		ring->vmid = old_vmid;
 		amdgpu_ring_undo(ring);
 		return r;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 62a4c127620f..9f36ed30ba11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -298,8 +298,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 		    unsigned vm_id, uint64_t pd_addr,
 		    uint32_t gds_base, uint32_t gds_size,
 		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size,
-		    bool vmid_switch)
+		    uint32_t oa_base, uint32_t oa_size)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
@@ -313,7 +312,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring,
 	int r;
 
 	if (ring->funcs->emit_pipeline_sync && (
-	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed || vmid_switch))
+	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed ||
+		    ring->type == AMDGPU_RING_TYPE_COMPUTE))
 		amdgpu_ring_emit_pipeline_sync(ring);
 
 	if (ring->funcs->emit_vm_flush &&
-- 
cgit v1.2.3