From f10379503e2b1814e27957899bcdfd0132a1915e Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Sat, 30 Jul 2016 00:48:39 +0200 Subject: drm/amdgpu: print more accurate error messages on IB submission failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's useful for debugging. Signed-off-by: Marek Olšák Reviewed-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 0307ff5887c5..8eb93dff69d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -386,8 +386,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates); - if (unlikely(r != 0)) + if (unlikely(r != 0)) { + DRM_ERROR("ttm_eu_reserve_buffers failed.\n"); goto error_free_pages; + } /* Without a BO list we don't have userptr BOs */ if (!p->bo_list) @@ -427,9 +429,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, /* Unreserve everything again. */ ttm_eu_backoff_reservation(&p->ticket, &p->validated); - /* We tried to often, just abort */ + /* We tried too many times, just abort */ if (!--tries) { r = -EDEADLK; + DRM_ERROR("deadlock in %s\n", __func__); goto error_free_pages; } @@ -441,11 +444,13 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, sizeof(struct page*)); if (!e->user_pages) { r = -ENOMEM; + DRM_ERROR("calloc failure in %s\n", __func__); goto error_free_pages; } r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); if (r) { + DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n"); drm_free_large(e->user_pages); e->user_pages = NULL; goto error_free_pages; @@ -462,12 +467,16 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, p->bytes_moved = 0; r = amdgpu_cs_list_validate(p, &duplicates); - if (r) + if (r) { + DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n"); goto error_validate; + } r = amdgpu_cs_list_validate(p, &p->validated); - if (r) + if (r) { + DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n"); goto error_validate; + } fpriv->vm.last_eviction_counter = atomic64_read(&p->adev->num_evictions); -- cgit v1.2.3 From 14fd833efa3f13619623501de5e2221dfdab7f7f Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 4 Aug 2016 13:05:46 +0800 Subject: drm/amdgpu: validate shadow as well when validating bo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 76 ++++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 30 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8eb93dff69d6..396a412d70d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -287,18 +287,56 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) return max(bytes_moved_threshold, 1024*1024ull); } +static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, + struct amdgpu_bo *bo) +{ + u64 initial_bytes_moved; + uint32_t domain; + int r; + + if (bo->pin_count) + return 0; + + /* Avoid moving this one if we have moved too many buffers + * for this IB already. + * + * Note that this allows moving at least one buffer of + * any size, because it doesn't take the current "bo" + * into account. We don't want to disallow buffer moves + * completely. + */ + if (p->bytes_moved <= p->bytes_moved_threshold) + domain = bo->prefered_domains; + else + domain = bo->allowed_domains; + +retry: + amdgpu_ttm_placement_from_domain(bo, domain); + initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved); + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) - + initial_bytes_moved; + + if (unlikely(r)) { + if (r != -ERESTARTSYS && domain != bo->allowed_domains) { + domain = bo->allowed_domains; + goto retry; + } + } + + return r; +} + int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, struct list_head *validated) { struct amdgpu_bo_list_entry *lobj; - u64 initial_bytes_moved; int r; list_for_each_entry(lobj, validated, tv.head) { struct amdgpu_bo *bo = lobj->robj; bool binding_userptr = false; struct mm_struct *usermm; - uint32_t domain; usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); if (usermm && usermm != current->mm) @@ -313,35 +351,13 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, binding_userptr = true; } - if (bo->pin_count) - continue; - - /* Avoid moving this one if we have moved too many buffers - * for this IB already. - * - * Note that this allows moving at least one buffer of - * any size, because it doesn't take the current "bo" - * into account. We don't want to disallow buffer moves - * completely. - */ - if (p->bytes_moved <= p->bytes_moved_threshold) - domain = bo->prefered_domains; - else - domain = bo->allowed_domains; - - retry: - amdgpu_ttm_placement_from_domain(bo, domain); - initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved); - r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) - - initial_bytes_moved; - - if (unlikely(r)) { - if (r != -ERESTARTSYS && domain != bo->allowed_domains) { - domain = bo->allowed_domains; - goto retry; - } + r = amdgpu_cs_bo_validate(p, bo); + if (r) return r; + if (bo->shadow) { + r = amdgpu_cs_bo_validate(p, bo); + if (r) + return r; } if (binding_userptr) { -- cgit v1.2.3 From 99e124f402d6d649498e2aa3cbcf4563a37fea0e Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 16 Aug 2016 14:43:17 +0200 Subject: drm/amdgpu: cleanup amdgpu_vm_bo_update params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make it more obvious what we are doing here. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 21 +++++++++++---------- 3 files changed, 13 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e30a0d6353cd..3cc2629eb158 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -928,7 +928,7 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_sync *sync); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - struct ttm_mem_reg *mem); + bool clear); void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo); struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 396a412d70d4..d80e5d3a4add 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -642,7 +642,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, if (bo_va == NULL) continue; - r = amdgpu_vm_bo_update(adev, bo_va, &bo->tbo.mem); + r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index d2824d97b7e5..bf56f1814437 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1117,28 +1117,32 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @bo_va: requested BO and VM object - * @mem: ttm mem + * @clear: if true clear the entries * * Fill in the page table entries for @bo_va. * Returns 0 for success, -EINVAL for failure. - * - * Object have to be reserved and mutex must be locked! */ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, - struct ttm_mem_reg *mem) + bool clear) { struct amdgpu_vm *vm = bo_va->vm; struct amdgpu_bo_va_mapping *mapping; dma_addr_t *pages_addr = NULL; uint32_t gtt_flags, flags; + struct ttm_mem_reg *mem; struct fence *exclusive; uint64_t addr; int r; - if (mem) { + if (clear) { + mem = NULL; + addr = 0; + exclusive = NULL; + } else { struct ttm_dma_tt *ttm; + mem = &bo_va->bo->tbo.mem; addr = (u64)mem->start << PAGE_SHIFT; switch (mem->mem_type) { case TTM_PL_TT: @@ -1156,9 +1160,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, } exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv); - } else { - addr = 0; - exclusive = NULL; } flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); @@ -1189,7 +1190,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, spin_lock(&vm->status_lock); list_splice_init(&bo_va->invalids, &bo_va->valids); list_del_init(&bo_va->vm_status); - if (!mem) + if (clear) list_add(&bo_va->vm_status, &vm->cleared); spin_unlock(&vm->status_lock); @@ -1252,7 +1253,7 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, struct amdgpu_bo_va, vm_status); spin_unlock(&vm->status_lock); - r = amdgpu_vm_bo_update(adev, bo_va, NULL); + r = amdgpu_vm_bo_update(adev, bo_va, true); if (r) return r; -- cgit v1.2.3 From 95844d20ae024b5d553c9923a0d3145c3956bf69 Mon Sep 17 00:00:00 2001 From: Marek Olšák Date: Wed, 17 Aug 2016 23:49:27 +0200 Subject: drm/amdgpu: throttle buffer migrations at CS using a fixed MBps limit (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old mechanism used a per-submission limit that didn't take previous submissions within the same time frame into account. It also filled VRAM slowly when VRAM usage dropped due to a big eviction or buffer deallocation. This new method establishes a configurable MBps limit that is obeyed when VRAM usage is very high. When VRAM usage is not very high, it gives the driver the freedom to fill it quickly. The result is more consistent performance. It can't keep the BO move rate low if lots of evictions are happening due to VRAM fragmentation, or if a big buffer is being migrated. The amdgpu.moverate parameter can be used to set a non-default limit. Measurements can be done to find out which amdgpu.moverate setting gives the best results. Mainly APUs and cards with small VRAM will benefit from this. For F1 2015, anything with 2 GB VRAM or less will benefit. Some benchmark results - F1 2015 (Tonga 2GB): Limit MinFPS AvgFPS Old code: 14 32.6 128 MB/s: 28 41 64 MB/s: 15.5 43 32 MB/s: 28.7 43.4 8 MB/s: 27.8 44.4 8 MB/s: 21.9 42.8 (different run) Random drops in Min FPS can still occur (due to fragmented VRAM?), but the average FPS is much better. 8 MB/s is probably a good limit for this game & the current VRAM management. The random FPS drops are still to be tackled. v2: use a spinlock Signed-off-by: Marek Olšák Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 9 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 152 ++++++++++++++++++++--------- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 10 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 + 4 files changed, 127 insertions(+), 48 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4cfcf9c37800..938ef1cb68cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -64,6 +64,7 @@ extern int amdgpu_modeset; extern int amdgpu_vram_limit; extern int amdgpu_gart_size; +extern int amdgpu_moverate; extern int amdgpu_benchmarking; extern int amdgpu_testing; extern int amdgpu_audio; @@ -2034,6 +2035,14 @@ struct amdgpu_device { atomic64_t num_evictions; atomic_t gpu_reset_counter; + /* data for buffer migration throttling */ + struct { + spinlock_t lock; + s64 last_update_us; + s64 accum_us; /* accumulated microseconds */ + u32 log2_max_MBps; + } mm_stats; + /* display */ bool enable_virtual_display; struct amdgpu_mode_info mode_info; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d80e5d3a4add..82927570333a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -235,56 +235,115 @@ free_chunk: return ret; } -/* Returns how many bytes TTM can move per IB. +/* Convert microseconds to bytes. */ +static u64 us_to_bytes(struct amdgpu_device *adev, s64 us) +{ + if (us <= 0 || !adev->mm_stats.log2_max_MBps) + return 0; + + /* Since accum_us is incremented by a million per second, just + * multiply it by the number of MB/s to get the number of bytes. + */ + return us << adev->mm_stats.log2_max_MBps; +} + +static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes) +{ + if (!adev->mm_stats.log2_max_MBps) + return 0; + + return bytes >> adev->mm_stats.log2_max_MBps; +} + +/* Returns how many bytes TTM can move right now. If no bytes can be moved, + * it returns 0. If it returns non-zero, it's OK to move at least one buffer, + * which means it can go over the threshold once. If that happens, the driver + * will be in debt and no other buffer migrations can be done until that debt + * is repaid. + * + * This approach allows moving a buffer of any size (it's important to allow + * that). + * + * The currency is simply time in microseconds and it increases as the clock + * ticks. The accumulated microseconds (us) are converted to bytes and + * returned. */ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) { - u64 real_vram_size = adev->mc.real_vram_size; - u64 vram_usage = atomic64_read(&adev->vram_usage); + s64 time_us, increment_us; + u64 max_bytes; + u64 free_vram, total_vram, used_vram; - /* This function is based on the current VRAM usage. + /* Allow a maximum of 200 accumulated ms. This is basically per-IB + * throttling. * - * - If all of VRAM is free, allow relocating the number of bytes that - * is equal to 1/4 of the size of VRAM for this IB. + * It means that in order to get full max MBps, at least 5 IBs per + * second must be submitted and not more than 200ms apart from each + * other. + */ + const s64 us_upper_bound = 200000; - * - If more than one half of VRAM is occupied, only allow relocating - * 1 MB of data for this IB. - * - * - From 0 to one half of used VRAM, the threshold decreases - * linearly. - * __________________ - * 1/4 of -|\ | - * VRAM | \ | - * | \ | - * | \ | - * | \ | - * | \ | - * | \ | - * | \________|1 MB - * |----------------| - * VRAM 0 % 100 % - * used used - * - * Note: It's a threshold, not a limit. The threshold must be crossed - * for buffer relocations to stop, so any buffer of an arbitrary size - * can be moved as long as the threshold isn't crossed before - * the relocation takes place. We don't want to disable buffer - * relocations completely. + if (!adev->mm_stats.log2_max_MBps) + return 0; + + total_vram = adev->mc.real_vram_size - adev->vram_pin_size; + used_vram = atomic64_read(&adev->vram_usage); + free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; + + spin_lock(&adev->mm_stats.lock); + + /* Increase the amount of accumulated us. */ + time_us = ktime_to_us(ktime_get()); + increment_us = time_us - adev->mm_stats.last_update_us; + adev->mm_stats.last_update_us = time_us; + adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us, + us_upper_bound); + + /* This prevents the short period of low performance when the VRAM + * usage is low and the driver is in debt or doesn't have enough + * accumulated us to fill VRAM quickly. * - * The idea is that buffers should be placed in VRAM at creation time - * and TTM should only do a minimum number of relocations during - * command submission. In practice, you need to submit at least - * a dozen IBs to move all buffers to VRAM if they are in GTT. + * The situation can occur in these cases: + * - a lot of VRAM is freed by userspace + * - the presence of a big buffer causes a lot of evictions + * (solution: split buffers into smaller ones) * - * Also, things can get pretty crazy under memory pressure and actual - * VRAM usage can change a lot, so playing safe even at 50% does - * consistently increase performance. + * If 128 MB or 1/8th of VRAM is free, start filling it now by setting + * accum_us to a positive number. */ + if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) { + s64 min_us; + + /* Be more aggresive on dGPUs. Try to fill a portion of free + * VRAM now. + */ + if (!(adev->flags & AMD_IS_APU)) + min_us = bytes_to_us(adev, free_vram / 4); + else + min_us = 0; /* Reset accum_us on APUs. */ + + adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); + } - u64 half_vram = real_vram_size >> 1; - u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage; - u64 bytes_moved_threshold = half_free_vram >> 1; - return max(bytes_moved_threshold, 1024*1024ull); + /* This returns 0 if the driver is in debt to disallow (optional) + * buffer moves. + */ + max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); + + spin_unlock(&adev->mm_stats.lock); + return max_bytes; +} + +/* Report how many bytes have really been moved for the last command + * submission. This can result in a debt that can stop buffer migrations + * temporarily. + */ +static void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, + u64 num_bytes) +{ + spin_lock(&adev->mm_stats.lock); + adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); + spin_unlock(&adev->mm_stats.lock); } static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, @@ -297,15 +356,10 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, if (bo->pin_count) return 0; - /* Avoid moving this one if we have moved too many buffers - * for this IB already. - * - * Note that this allows moving at least one buffer of - * any size, because it doesn't take the current "bo" - * into account. We don't want to disallow buffer moves - * completely. + /* Don't move this buffer if we have depleted our allowance + * to move it. Don't move anything if the threshold is zero. */ - if (p->bytes_moved <= p->bytes_moved_threshold) + if (p->bytes_moved < p->bytes_moved_threshold) domain = bo->prefered_domains; else domain = bo->allowed_domains; @@ -494,6 +548,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, goto error_validate; } + amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved); + fpriv->vm.last_eviction_counter = atomic64_read(&p->adev->num_evictions); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1ef4034b3be5..847583d8a3b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1490,6 +1490,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, { int r, i; bool runtime = false; + u32 max_MBps; adev->shutdown = false; adev->dev = &pdev->dev; @@ -1549,6 +1550,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->didt_idx_lock); spin_lock_init(&adev->gc_cac_idx_lock); spin_lock_init(&adev->audio_endpt_idx_lock); + spin_lock_init(&adev->mm_stats.lock); INIT_LIST_HEAD(&adev->shadow_list); mutex_init(&adev->shadow_list_lock); @@ -1660,6 +1662,14 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->accel_working = true; + /* Initialize the buffer migration limit. */ + if (amdgpu_moverate >= 0) + max_MBps = amdgpu_moverate; + else + max_MBps = 8; /* Allow 8 MB/s. */ + /* Get a log2 for easy divisions. */ + adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); + amdgpu_fbdev_init(adev); r = amdgpu_ib_pool_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 1b787d974515..6fed75454800 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -62,6 +62,7 @@ int amdgpu_vram_limit = 0; int amdgpu_gart_size = -1; /* auto */ +int amdgpu_moverate = -1; /* auto */ int amdgpu_benchmarking = 0; int amdgpu_testing = 0; int amdgpu_audio = -1; @@ -100,6 +101,9 @@ module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); MODULE_PARM_DESC(gartsize, "Size of PCIE/IGP gart to setup in megabytes (32, 64, etc., -1 = auto)"); module_param_named(gartsize, amdgpu_gart_size, int, 0600); +MODULE_PARM_DESC(moverate, "Maximum buffer migration rate in MB/s. (32, 64, etc., -1=auto, 0=1=disabled)"); +module_param_named(moverate, amdgpu_moverate, int, 0600); + MODULE_PARM_DESC(benchmark, "Run benchmark"); module_param_named(benchmark, amdgpu_benchmarking, int, 0444); -- cgit v1.2.3 From 1abdc3d73dd9dc2f3dc619d466d378e70cbcc24a Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 31 Aug 2016 17:28:11 +0200 Subject: drm/amdgpu: only try again if we actually run into -ENOMEM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All other errors can't be fixed by using a different memory domain. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 82927570333a..d7a957376a90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -371,11 +371,9 @@ retry: p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) - initial_bytes_moved; - if (unlikely(r)) { - if (r != -ERESTARTSYS && domain != bo->allowed_domains) { - domain = bo->allowed_domains; - goto retry; - } + if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) { + domain = bo->allowed_domains; + goto retry; } return r; -- cgit v1.2.3 From 662bfa61fff1de4dd96029d556d7b301e27c3556 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 1 Sep 2016 12:13:18 +0200 Subject: drm/amdgpu: prevent command submission failures under memory pressure v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As last resort try to evict BOs from the current working set into other memory domains. This effectively prevents command submission failures when VM page tables have been swapped out. v2: fix typos Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 63 +++++++++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2cfaa6429c64..39baabe5f092 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1225,6 +1225,7 @@ struct amdgpu_cs_parser { struct fence *fence; uint64_t bytes_moved_threshold; uint64_t bytes_moved; + struct amdgpu_bo_list_entry *evictable; /* user fence */ struct amdgpu_bo_list_entry uf_entry; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index d7a957376a90..e29e7b9ca3a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -379,6 +379,58 @@ retry: return r; } +/* Last resort, try to evict something from the current working set */ +static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, + struct amdgpu_bo_list_entry *lobj) +{ + uint32_t domain = lobj->robj->allowed_domains; + int r; + + if (!p->evictable) + return false; + + for (;&p->evictable->tv.head != &p->validated; + p->evictable = list_prev_entry(p->evictable, tv.head)) { + + struct amdgpu_bo_list_entry *candidate = p->evictable; + struct amdgpu_bo *bo = candidate->robj; + u64 initial_bytes_moved; + uint32_t other; + + /* If we reached our current BO we can forget it */ + if (candidate == lobj) + break; + + other = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); + + /* Check if this BO is in one of the domains we need space for */ + if (!(other & domain)) + continue; + + /* Check if we can move this BO somewhere else */ + other = bo->allowed_domains & ~domain; + if (!other) + continue; + + /* Good we can try to move this BO somewhere else */ + amdgpu_ttm_placement_from_domain(bo, other); + initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved); + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) - + initial_bytes_moved; + + if (unlikely(r)) + break; + + p->evictable = list_prev_entry(p->evictable, tv.head); + list_move(&candidate->tv.head, &p->validated); + + return true; + } + + return false; +} + int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, struct list_head *validated) { @@ -403,9 +455,15 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, binding_userptr = true; } - r = amdgpu_cs_bo_validate(p, bo); + if (p->evictable == lobj) + p->evictable = NULL; + + do { + r = amdgpu_cs_bo_validate(p, bo); + } while (r == -ENOMEM && amdgpu_cs_try_evict(p, lobj)); if (r) return r; + if (bo->shadow) { r = amdgpu_cs_bo_validate(p, bo); if (r) @@ -533,6 +591,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev); p->bytes_moved = 0; + p->evictable = list_last_entry(&p->validated, + struct amdgpu_bo_list_entry, + tv.head); r = amdgpu_cs_list_validate(p, &duplicates); if (r) { -- cgit v1.2.3 From 3aecd24c65b9539b6faac2a52a9aaa7bc90f4677 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Thu, 25 Aug 2016 15:40:48 +0800 Subject: drm/amdgpu: change job->ctx field name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit job->ctx actually is a fence_context of the entity it belongs to, naming it as ctx is too vague, and we'll need add amdgpu_ctx into the job structure later. Signed-off-by: Monk Liu Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 12 ++++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 39baabe5f092..10ec29c50077 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1241,7 +1241,7 @@ struct amdgpu_job { struct fence *fence; /* the hw fence */ uint32_t num_ibs; void *owner; - uint64_t ctx; + uint64_t fence_ctx; /* the fence_context this job uses */ bool vm_needs_flush; unsigned vm_id; uint64_t vm_pd_addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e29e7b9ca3a6..56bde6436a1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -989,7 +989,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, } job->owner = p->filp; - job->ctx = entity->fence_context; + job->fence_ctx = entity->fence_context; p->fence = fence_get(&job->base.s_fence->finished); cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence); job->uf_sequence = cs->out.handle; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 11f2fba4653d..04263f0fd1af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -124,7 +124,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, bool skip_preamble, need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; - uint64_t ctx; + uint64_t fence_ctx; unsigned i; int r = 0; @@ -135,10 +135,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, /* ring tests don't use a job */ if (job) { vm = job->vm; - ctx = job->ctx; + fence_ctx = job->fence_ctx; } else { vm = NULL; - ctx = 0; + fence_ctx = 0; } if (!ring->ready) { @@ -174,8 +174,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, /* always set cond_exec_polling to CONTINUE */ *ring->cond_exe_cpu_addr = 1; - skip_preamble = ring->current_ctx == ctx; - need_ctx_switch = ring->current_ctx != ctx; + skip_preamble = ring->current_ctx == fence_ctx; + need_ctx_switch = ring->current_ctx != fence_ctx; for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; @@ -209,7 +209,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, if (patch_offset != ~0 && ring->funcs->patch_cond_exec) amdgpu_ring_patch_cond_exec(ring, patch_offset); - ring->current_ctx = ctx; + ring->current_ctx = fence_ctx; if (ring->funcs->emit_switch_buffer) amdgpu_ring_emit_switch_buffer(ring); amdgpu_ring_commit(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 6674d40eb3ab..ac8d40168a47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -124,7 +124,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, return r; job->owner = owner; - job->ctx = entity->fence_context; + job->fence_ctx = entity->fence_context; *f = fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); amd_sched_entity_push_job(&job->base); -- cgit v1.2.3 From 753ad49c9fdfc732972b0d03f2889f473ed35e59 Mon Sep 17 00:00:00 2001 From: Monk Liu Date: Fri, 26 Aug 2016 13:28:28 +0800 Subject: drm/amdgpu:implement CONTEXT_CONTROL (v5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v1: for gfx8, use CONTEXT_CONTROL package to dynamically skip preamble CEIB and other load_xxx command in sequence. v2: support GFX7 as well. remove cntxcntl in compute ring funcs because CPC doesn't support this packet. v3: fix reduntant judgement in cntxcntl. v4: some cleanups, don't change cs_submit() v5: keep old MESA supported & bump up KMS version. Signed-off-by: Monk Liu Ack-by: Chunming Zhou Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 12 +++++++++++- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 20 ++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 30 ++++++++++++++++++++++++++++++ 6 files changed, 79 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 10ec29c50077..717c3b4e1d54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -320,6 +320,7 @@ struct amdgpu_ring_funcs { void (*begin_use)(struct amdgpu_ring *ring); void (*end_use)(struct amdgpu_ring *ring); void (*emit_switch_buffer) (struct amdgpu_ring *ring); + void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); }; /* @@ -966,6 +967,7 @@ struct amdgpu_ctx { spinlock_t ring_lock; struct fence **fences; struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; + bool preamble_presented; }; struct amdgpu_ctx_mgr { @@ -1231,6 +1233,10 @@ struct amdgpu_cs_parser { struct amdgpu_bo_list_entry uf_entry; }; +#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */ +#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */ +#define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */ + struct amdgpu_job { struct amd_sched_job base; struct amdgpu_device *adev; @@ -1239,6 +1245,7 @@ struct amdgpu_job { struct amdgpu_sync sync; struct amdgpu_ib *ibs; struct fence *fence; /* the hw fence */ + uint32_t preamble_status; uint32_t num_ibs; void *owner; uint64_t fence_ctx; /* the fence_context this job uses */ @@ -2276,6 +2283,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r)) #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) +#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 56bde6436a1f..61b7e25535bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -850,6 +850,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, if (r) return r; + if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) { + parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; + if (!parser->ctx->preamble_presented) { + parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST; + parser->ctx->preamble_presented = true; + } + } + if (parser->job->ring && parser->job->ring != ring) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 3bbc0faf48c8..ca3d87aac7fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -55,9 +55,10 @@ * - 3.3.0 - Add VM support for UVD on supported hardware. * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS. * - 3.5.0 - Add support for new UVD_NO_OP register. + * - 3.6.0 - kmd involves use CONTEXT_CONTROL in ring buffer. */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 5 +#define KMS_DRIVER_MINOR 6 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 04263f0fd1af..2aa741c2a64c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -125,6 +125,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, unsigned patch_offset = ~0; struct amdgpu_vm *vm; uint64_t fence_ctx; + uint32_t status = 0; unsigned i; int r = 0; @@ -176,11 +177,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, skip_preamble = ring->current_ctx == fence_ctx; need_ctx_switch = ring->current_ctx != fence_ctx; + if (job && ring->funcs->emit_cntxcntl) { + if (need_ctx_switch) + status |= AMDGPU_HAVE_CTX_SWITCH; + status |= job->preamble_status; + amdgpu_ring_emit_cntxcntl(ring, status); + } + for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; /* drop preamble IBs if we don't have a context switch */ - if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble) + if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && + skip_preamble && + !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST)) continue; amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index a93a803b659e..8c780f6c1276 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2096,6 +2096,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, amdgpu_ring_write(ring, control); } +static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) +{ + uint32_t dw2 = 0; + + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ + if (flags & AMDGPU_HAVE_CTX_SWITCH) { + /* set load_global_config & load_global_uconfig */ + dw2 |= 0x8001; + /* set load_cs_sh_regs */ + dw2 |= 0x01000000; + /* set load_per_context_state & load_gfx_sh_regs */ + dw2 |= 0x10002; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + amdgpu_ring_write(ring, dw2); + amdgpu_ring_write(ring, 0); +} + /** * gfx_v7_0_ring_test_ib - basic ring IB test * @@ -4938,6 +4957,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .test_ib = gfx_v7_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 44915056297b..dca8b368728c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6076,6 +6076,35 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0); } +static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) +{ + uint32_t dw2 = 0; + + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ + if (flags & AMDGPU_HAVE_CTX_SWITCH) { + /* set load_global_config & load_global_uconfig */ + dw2 |= 0x8001; + /* set load_cs_sh_regs */ + dw2 |= 0x01000000; + /* set load_per_context_state & load_gfx_sh_regs for GFX */ + dw2 |= 0x10002; + + /* set load_ce_ram if preamble presented */ + if (AMDGPU_PREAMBLE_IB_PRESENT & flags) + dw2 |= 0x10000000; + } else { + /* still load_ce_ram if this is the first time preamble presented + * although there is no context switch happens. + */ + if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags) + dw2 |= 0x10000000; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + amdgpu_ring_write(ring, dw2); + amdgpu_ring_write(ring, 0); +} + static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { @@ -6258,6 +6287,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v8_ring_emit_sb, + .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { -- cgit v1.2.3 From c855e25090cdafffb87119028eb018030a46dd9e Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 5 Sep 2016 17:00:57 +0200 Subject: drm/amdgpu: bind GTT on demand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't really need the GTT table any more most of the time. So bind it only on demand. Signed-off-by: Christian König Reviewed-by: Alex Deucher Acked-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 34 ++++++++++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 7 ++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 33 ++++++++++++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 6 +++++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 ++- 8 files changed, 84 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 717c3b4e1d54..2e8f469159c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -2521,6 +2521,7 @@ static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } struct amdgpu_bo_va_mapping * amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, uint64_t addr, struct amdgpu_bo **bo); +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser); #include "amdgpu_object.h" #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 61b7e25535bf..f7dccb10d965 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -639,8 +639,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, } } - if (p->uf_entry.robj) - p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj); + if (!r && p->uf_entry.robj) { + struct amdgpu_bo *uf = p->uf_entry.robj; + + r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem); + p->job->uf_addr += amdgpu_bo_gpu_offset(uf); + } error_validate: if (r) { @@ -1163,3 +1167,29 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, return NULL; } + +/** + * amdgpu_cs_sysvm_access_required - make BOs accessible by the system VM + * + * @parser: command submission parser context + * + * Helper for UVD/VCE VM emulation, make sure BOs are accessible by the system VM. + */ +int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser) +{ + unsigned i; + int r; + + if (!parser->bo_list) + return 0; + + for (i = 0; i < parser->bo_list->num_entries; i++) { + struct amdgpu_bo *bo = parser->bo_list->array[i].robj; + + r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem); + if (unlikely(r)) + return r; + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 162411bd8145..5a6216c9c007 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -675,6 +675,11 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, dev_err(bo->adev->dev, "%p pin failed\n", bo); goto error; } + r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem); + if (unlikely(r)) { + dev_err(bo->adev->dev, "%p bind failed\n", bo); + goto error; + } bo->pin_count = 1; if (gpu_addr != NULL) @@ -947,6 +952,8 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct fence *fence, u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) { WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM); + WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT && + !amdgpu_ttm_is_bound(bo->tbo.ttm)); WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) && !bo->pin_count); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index a4914b788517..73298fadd7f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -256,8 +256,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, new_start = new_mem->start << PAGE_SHIFT; switch (old_mem->mem_type) { - case TTM_PL_VRAM: case TTM_PL_TT: + r = amdgpu_ttm_bind(bo->ttm, old_mem); + if (r) + return r; + + case TTM_PL_VRAM: old_start += bo->bdev->man[old_mem->mem_type].gpu_offset; break; default: @@ -265,8 +269,12 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, return -EINVAL; } switch (new_mem->mem_type) { - case TTM_PL_VRAM: case TTM_PL_TT: + r = amdgpu_ttm_bind(bo->ttm, new_mem); + if (r) + return r; + + case TTM_PL_VRAM: new_start += bo->bdev->man[new_mem->mem_type].gpu_offset; break; default: @@ -638,7 +646,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) { struct amdgpu_ttm_tt *gtt = (void*)ttm; - uint32_t flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); int r; if (gtt->userptr) { @@ -659,6 +666,26 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, bo_mem->mem_type == AMDGPU_PL_OA) return -EINVAL; + return 0; +} + +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + + return gtt && !list_empty(>t->list); +} + +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + uint32_t flags; + int r; + + if (!ttm || amdgpu_ttm_is_bound(ttm)) + return 0; + + flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, ttm->pages, gtt->ttm.dma_address, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 72f6bfc15d8f..214bae965fc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -77,4 +77,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, struct fence **fence); int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); +bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); +int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index a71efeb5083b..25dd58a65905 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -882,6 +882,10 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) return -EINVAL; } + r = amdgpu_cs_sysvm_access_required(parser); + if (r) + return r; + ctx.parser = parser; ctx.buf_sizes = buf_sizes; ctx.ib_idx = ib_idx; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 9b71d6c2a968..b0c670294e2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -634,7 +634,11 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) uint32_t allocated = 0; uint32_t tmp, handle = 0; uint32_t *size = &tmp; - int i, r = 0, idx = 0; + int i, r, idx = 0; + + r = amdgpu_cs_sysvm_access_required(p); + if (r) + return r; while (idx < ib->length_dw) { uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index bf56f1814437..bd5af328154f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1163,7 +1163,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, } flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem); - gtt_flags = (adev == bo_va->bo->adev) ? flags : 0; + gtt_flags = (amdgpu_ttm_is_bound(bo_va->bo->tbo.ttm) && + adev == bo_va->bo->adev) ? flags : 0; spin_lock(&vm->status_lock); if (!list_empty(&bo_va->vm_status)) -- cgit v1.2.3 From 761c2e82054fda665bcdec95b9daf3d468c5fd5b Mon Sep 17 00:00:00 2001 From: Baoyou Xie Date: Sat, 3 Sep 2016 13:57:14 +0800 Subject: drm/amdgpu: mark symbols static where possible MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We get a few warnings when building kernel with W=1: drivers/gpu/drm/amd/amdgpu/cz_smc.c:51:5: warning: no previous prototype for 'cz_send_msg_to_smc_async' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/cz_smc.c:143:5: warning: no previous prototype for 'cz_write_smc_sram_dword' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/iceland_smc.c:124:6: warning: no previous prototype for 'iceland_start_smc' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c:3926:6: warning: no previous prototype for 'gfx_v8_0_rlc_stop' [-Wmissing-prototypes] drivers/gpu/drm/amd/amdgpu/amdgpu_job.c:94:6: warning: no previous prototype for 'amdgpu_job_free_cb' [-Wmissing-prototypes] .... In fact, these functions are only used in the file in which they are declared and don't need a declaration, but can be made static. So this patch marks these functions with 'static'. Reviewed-by: Christian König Acked-by: Huang Rui Reviewed-by: Edward O'Callaghan Signed-off-by: Baoyou Xie Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 +- drivers/gpu/drm/amd/amdgpu/cz_smc.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/iceland_smc.c | 8 ++++---- 6 files changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 040993c998d4..f1c53a2b09c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -616,7 +616,7 @@ static int amdgpu_cgs_irq_put(struct cgs_device *cgs_device, unsigned src_id, un return amdgpu_irq_put(adev, adev->irq.sources[src_id], type); } -int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device, +static int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device, enum amd_ip_block_type block_type, enum amd_clockgating_state state) { @@ -637,7 +637,7 @@ int amdgpu_cgs_set_clockgating_state(struct cgs_device *cgs_device, return r; } -int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device, +static int amdgpu_cgs_set_powergating_state(struct cgs_device *cgs_device, enum amd_ip_block_type block_type, enum amd_powergating_state state) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f7dccb10d965..9480be45b45f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -431,7 +431,7 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, return false; } -int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, +static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, struct list_head *validated) { struct amdgpu_bo_list_entry *lobj; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index ac8d40168a47..8c5807994073 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -91,7 +91,7 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) amdgpu_ib_free(job->adev, &job->ibs[i], f); } -void amdgpu_job_free_cb(struct amd_sched_job *s_job) +static void amdgpu_job_free_cb(struct amd_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); diff --git a/drivers/gpu/drm/amd/amdgpu/cz_smc.c b/drivers/gpu/drm/amd/amdgpu/cz_smc.c index 69ac373c4d0f..95887e484c51 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_smc.c @@ -50,7 +50,7 @@ static struct cz_smu_private_data *cz_smu_get_priv(struct amdgpu_device *adev) return priv; } -int cz_send_msg_to_smc_async(struct amdgpu_device *adev, u16 msg) +static int cz_send_msg_to_smc_async(struct amdgpu_device *adev, u16 msg) { int i; u32 content = 0, tmp; @@ -142,7 +142,7 @@ int cz_read_smc_sram_dword(struct amdgpu_device *adev, u32 smc_address, return 0; } -int cz_write_smc_sram_dword(struct amdgpu_device *adev, u32 smc_address, +static int cz_write_smc_sram_dword(struct amdgpu_device *adev, u32 smc_address, u32 value, u32 limit) { int ret; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index dca8b368728c..e2ed71d5fd53 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -3862,7 +3862,7 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev) } } -void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) +static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev) { WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c index 211839913728..ef7c27d7356a 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c @@ -121,7 +121,7 @@ out: return result; } -void iceland_start_smc(struct amdgpu_device *adev) +static void iceland_start_smc(struct amdgpu_device *adev) { uint32_t val = RREG32_SMC(ixSMC_SYSCON_RESET_CNTL); @@ -129,7 +129,7 @@ void iceland_start_smc(struct amdgpu_device *adev) WREG32_SMC(ixSMC_SYSCON_RESET_CNTL, val); } -void iceland_reset_smc(struct amdgpu_device *adev) +static void iceland_reset_smc(struct amdgpu_device *adev) { uint32_t val = RREG32_SMC(ixSMC_SYSCON_RESET_CNTL); @@ -145,7 +145,7 @@ static int iceland_program_jump_on_start(struct amdgpu_device *adev) return 0; } -void iceland_stop_smc_clock(struct amdgpu_device *adev) +static void iceland_stop_smc_clock(struct amdgpu_device *adev) { uint32_t val = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0); @@ -153,7 +153,7 @@ void iceland_stop_smc_clock(struct amdgpu_device *adev) WREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0, val); } -void iceland_start_smc_clock(struct amdgpu_device *adev) +static void iceland_start_smc_clock(struct amdgpu_device *adev) { uint32_t val = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0); -- cgit v1.2.3 From aa29040b437be9e4255062aa38bcbe7cb50da8c1 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 9 Sep 2016 11:21:43 +0200 Subject: drm/amdgpu: validate size and offset of user fence BO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to validate the offset to make sure that we don't write after the BO. Additional to that a page should be enough and can make address space handling much easier. Signed-off-by: Christian König Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 9480be45b45f..b8412bcbad2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -91,6 +91,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, uint32_t *offset) { struct drm_gem_object *gobj; + unsigned long size; gobj = drm_gem_object_lookup(p->filp, data->handle); if (gobj == NULL) @@ -101,6 +102,11 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; p->uf_entry.tv.shared = true; p->uf_entry.user_pages = NULL; + + size = amdgpu_bo_size(p->uf_entry.robj); + if (size != PAGE_SIZE || (data->offset + 8) > size) + return -EINVAL; + *offset = data->offset; drm_gem_object_unreference_unlocked(gobj); -- cgit v1.2.3 From bb990bb09235a3cc38fd4600d48c7bfb7a0a167c Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 9 Sep 2016 16:32:33 +0200 Subject: drm/amdgpu: add a custom GTT memory manager v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only allocate address space when we really need it. v2: fix a typo, add correct function description, stop leaking the node in the error case. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c | 238 ++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 20 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 9 +- 6 files changed, 265 insertions(+), 11 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index d15e9b080ce1..9ec262d4b8a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -23,7 +23,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ - amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o + amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ + amdgpu_gtt_mgr.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b8412bcbad2a..b0f6e6957536 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -648,7 +648,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (!r && p->uf_entry.robj) { struct amdgpu_bo *uf = p->uf_entry.robj; - r = amdgpu_ttm_bind(uf->tbo.ttm, &uf->tbo.mem); + r = amdgpu_ttm_bind(&uf->tbo, &uf->tbo.mem); p->job->uf_addr += amdgpu_bo_gpu_offset(uf); } @@ -1192,7 +1192,7 @@ int amdgpu_cs_sysvm_access_required(struct amdgpu_cs_parser *parser) for (i = 0; i < parser->bo_list->num_entries; i++) { struct amdgpu_bo *bo = parser->bo_list->array[i].robj; - r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem); + r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); if (unlikely(r)) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c new file mode 100644 index 000000000000..262e872bea0e --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -0,0 +1,238 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Christian König + */ + +#include +#include "amdgpu.h" + +struct amdgpu_gtt_mgr { + struct drm_mm mm; + spinlock_t lock; + uint64_t available; +}; + +/** + * amdgpu_gtt_mgr_init - init GTT manager and DRM MM + * + * @man: TTM memory type manager + * @p_size: maximum size of GTT + * + * Allocate and initialize the GTT manager. + */ +static int amdgpu_gtt_mgr_init(struct ttm_mem_type_manager *man, + unsigned long p_size) +{ + struct amdgpu_gtt_mgr *mgr; + + mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); + if (!mgr) + return -ENOMEM; + + drm_mm_init(&mgr->mm, 0, p_size); + spin_lock_init(&mgr->lock); + mgr->available = p_size; + man->priv = mgr; + return 0; +} + +/** + * amdgpu_gtt_mgr_fini - free and destroy GTT manager + * + * @man: TTM memory type manager + * + * Destroy and free the GTT manager, returns -EBUSY if ranges are still + * allocated inside it. + */ +static int amdgpu_gtt_mgr_fini(struct ttm_mem_type_manager *man) +{ + struct amdgpu_gtt_mgr *mgr = man->priv; + + spin_lock(&mgr->lock); + if (!drm_mm_clean(&mgr->mm)) { + spin_unlock(&mgr->lock); + return -EBUSY; + } + + drm_mm_takedown(&mgr->mm); + spin_unlock(&mgr->lock); + kfree(mgr); + man->priv = NULL; + return 0; +} + +/** + * amdgpu_gtt_mgr_alloc - allocate new ranges + * + * @man: TTM memory type manager + * @tbo: TTM BO we need this range for + * @place: placement flags and restrictions + * @mem: the resulting mem object + * + * Allocate the address space for a node. + */ +int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, + struct ttm_buffer_object *tbo, + const struct ttm_place *place, + struct ttm_mem_reg *mem) +{ + struct amdgpu_gtt_mgr *mgr = man->priv; + struct drm_mm_node *node = mem->mm_node; + enum drm_mm_search_flags sflags = DRM_MM_SEARCH_BEST; + enum drm_mm_allocator_flags aflags = DRM_MM_CREATE_DEFAULT; + unsigned long fpfn, lpfn; + int r; + + if (node->start != AMDGPU_BO_INVALID_OFFSET) + return 0; + + if (place) + fpfn = place->fpfn; + else + fpfn = 0; + + if (place && place->lpfn) + lpfn = place->lpfn; + else + lpfn = man->size; + + if (place && place->flags & TTM_PL_FLAG_TOPDOWN) { + sflags = DRM_MM_SEARCH_BELOW; + aflags = DRM_MM_CREATE_TOP; + } + + spin_lock(&mgr->lock); + r = drm_mm_insert_node_in_range_generic(&mgr->mm, node, mem->num_pages, + mem->page_alignment, 0, + fpfn, lpfn, sflags, aflags); + spin_unlock(&mgr->lock); + + if (!r) { + mem->start = node->start; + tbo->offset = (tbo->mem.start << PAGE_SHIFT) + + tbo->bdev->man[tbo->mem.mem_type].gpu_offset; + } + + return r; +} + +/** + * amdgpu_gtt_mgr_new - allocate a new node + * + * @man: TTM memory type manager + * @tbo: TTM BO we need this range for + * @place: placement flags and restrictions + * @mem: the resulting mem object + * + * Dummy, allocate the node but no space for it yet. + */ +static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, + struct ttm_buffer_object *tbo, + const struct ttm_place *place, + struct ttm_mem_reg *mem) +{ + struct amdgpu_gtt_mgr *mgr = man->priv; + struct drm_mm_node *node; + int r; + + spin_lock(&mgr->lock); + if (mgr->available < mem->num_pages) { + spin_unlock(&mgr->lock); + return 0; + } + mgr->available -= mem->num_pages; + spin_unlock(&mgr->lock); + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + node->start = AMDGPU_BO_INVALID_OFFSET; + mem->mm_node = node; + + if (place->fpfn || place->lpfn || place->flags & TTM_PL_FLAG_TOPDOWN) { + r = amdgpu_gtt_mgr_alloc(man, tbo, place, mem); + if (unlikely(r)) { + kfree(node); + mem->mm_node = NULL; + } + } else { + mem->start = node->start; + } + + return 0; +} + +/** + * amdgpu_gtt_mgr_del - free ranges + * + * @man: TTM memory type manager + * @tbo: TTM BO we need this range for + * @place: placement flags and restrictions + * @mem: TTM memory object + * + * Free the allocated GTT again. + */ +static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man, + struct ttm_mem_reg *mem) +{ + struct amdgpu_gtt_mgr *mgr = man->priv; + struct drm_mm_node *node = mem->mm_node; + + if (!node) + return; + + spin_lock(&mgr->lock); + if (node->start != AMDGPU_BO_INVALID_OFFSET) + drm_mm_remove_node(node); + mgr->available += mem->num_pages; + spin_unlock(&mgr->lock); + + kfree(node); + mem->mm_node = NULL; +} + +/** + * amdgpu_gtt_mgr_debug - dump VRAM table + * + * @man: TTM memory type manager + * @prefix: text prefix + * + * Dump the table content using printk. + */ +static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man, + const char *prefix) +{ + struct amdgpu_gtt_mgr *mgr = man->priv; + + spin_lock(&mgr->lock); + drm_mm_debug_table(&mgr->mm, prefix); + spin_unlock(&mgr->lock); +} + +const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func = { + amdgpu_gtt_mgr_init, + amdgpu_gtt_mgr_fini, + amdgpu_gtt_mgr_new, + amdgpu_gtt_mgr_del, + amdgpu_gtt_mgr_debug +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 428aa00025e4..9b80dfedcb4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -673,7 +673,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, dev_err(bo->adev->dev, "%p pin failed\n", bo); goto error; } - r = amdgpu_ttm_bind(bo->tbo.ttm, &bo->tbo.mem); + r = amdgpu_ttm_bind(&bo->tbo, &bo->tbo.mem); if (unlikely(r)) { dev_err(bo->adev->dev, "%p bind failed\n", bo); goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index dfb12237a6b0..e21e823f67a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -160,7 +160,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, man->default_caching = TTM_PL_FLAG_CACHED; break; case TTM_PL_TT: - man->func = &ttm_bo_manager_func; + man->func = &amdgpu_gtt_mgr_func; man->gpu_offset = adev->mc.gtt_start; man->available_caching = TTM_PL_MASK_CACHING; man->default_caching = TTM_PL_FLAG_CACHED; @@ -277,7 +277,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, switch (old_mem->mem_type) { case TTM_PL_TT: - r = amdgpu_ttm_bind(bo->ttm, old_mem); + r = amdgpu_ttm_bind(bo, old_mem); if (r) return r; @@ -290,7 +290,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, } switch (new_mem->mem_type) { case TTM_PL_TT: - r = amdgpu_ttm_bind(bo->ttm, new_mem); + r = amdgpu_ttm_bind(bo, new_mem); if (r) return r; @@ -675,7 +675,6 @@ static int amdgpu_ttm_backend_bind(struct ttm_tt *ttm, return r; } } - gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; if (!ttm->num_pages) { WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n", ttm->num_pages, bo_mem, ttm); @@ -696,16 +695,25 @@ bool amdgpu_ttm_is_bound(struct ttm_tt *ttm) return gtt && !list_empty(>t->list); } -int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem) +int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) { - struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct ttm_tt *ttm = bo->ttm; + struct amdgpu_ttm_tt *gtt = (void *)bo->ttm; uint32_t flags; int r; if (!ttm || amdgpu_ttm_is_bound(ttm)) return 0; + r = amdgpu_gtt_mgr_alloc(&bo->bdev->man[TTM_PL_TT], bo, + NULL, bo_mem); + if (r) { + DRM_ERROR("Failed to allocate GTT address space (%d)\n", r); + return r; + } + flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); + gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, ttm->pages, gtt->ttm.dma_address, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 3ee825f4de28..9812c805326c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -65,6 +65,13 @@ struct amdgpu_mman { struct amdgpu_mman_lru guard; }; +extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func; + +int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man, + struct ttm_buffer_object *tbo, + const struct ttm_place *place, + struct ttm_mem_reg *mem); + int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, uint64_t dst_offset, @@ -78,6 +85,6 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); bool amdgpu_ttm_is_bound(struct ttm_tt *ttm); -int amdgpu_ttm_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem); +int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem); #endif -- cgit v1.2.3