diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
41 files changed, 563 insertions, 477 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c8ad6bf6618a..88db3c263e5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -82,6 +82,7 @@ #include "amdgpu_bo_list.h" #include "amdgpu_gem.h" #include "amdgpu_doorbell.h" +#include "amdgpu_amdkfd.h" #define MAX_GPU_INSTANCE 16 @@ -163,6 +164,7 @@ extern int amdgpu_si_support; extern int amdgpu_cik_support; #endif +#define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 @@ -862,6 +864,9 @@ struct amdgpu_device { /* GDS */ struct amdgpu_gds gds; + /* KFD */ + struct amdgpu_kfd_dev kfd; + /* display related functionality */ struct amdgpu_display_manager dm; @@ -875,9 +880,6 @@ struct amdgpu_device { atomic64_t visible_pin_size; atomic64_t gart_pin_size; - /* amdkfd interface */ - struct kfd_dev *kfd; - /* soc15 register offset based on ip, instance and segment */ uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; @@ -910,7 +912,9 @@ struct amdgpu_device { bool in_gpu_reset; struct mutex lock_reset; struct amdgpu_doorbell_index doorbell_index; + int asic_reset_res; + struct work_struct xgmi_reset_work; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 47db65926d71..4376b17ca594 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -886,6 +886,5 @@ void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev, void amdgpu_acpi_fini(struct amdgpu_device *adev) { unregister_acpi_notifier(&adev->acpi_nb); - if (adev->atif) - kfree(adev->atif); + kfree(adev->atif); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index d693b8047653..2dfaf158ef07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -26,15 +26,26 @@ #include "amdgpu.h" #include "amdgpu_gfx.h" #include <linux/module.h> +#include <linux/dma-buf.h> const struct kgd2kfd_calls *kgd2kfd; static const unsigned int compute_vmid_bitmap = 0xFF00; +/* Total memory size in system memory and all GPU VRAM. Used to + * estimate worst case amount of memory to reserve for page tables + */ +uint64_t amdgpu_amdkfd_total_mem_size; + int amdgpu_amdkfd_init(void) { + struct sysinfo si; int ret; + si_meminfo(&si); + amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh; + amdgpu_amdkfd_total_mem_size *= si.mem_unit; + #ifdef CONFIG_HSA_AMD ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); if (ret) @@ -87,8 +98,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) return; } - adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, - adev->pdev, kfd2kgd); + adev->kfd.dev = kgd2kfd->probe((struct kgd_dev *)adev, + adev->pdev, kfd2kgd); + + if (adev->kfd.dev) + amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; } /** @@ -128,7 +142,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { int i, n; int last_valid_bit; - if (adev->kfd) { + + if (adev->kfd.dev) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = compute_vmid_bitmap, .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, @@ -167,7 +182,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) &gpu_resources.doorbell_start_offset); if (adev->asic_type < CHIP_VEGA10) { - kgd2kfd->device_init(adev->kfd, &gpu_resources); + kgd2kfd->device_init(adev->kfd.dev, &gpu_resources); return; } @@ -196,37 +211,37 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) gpu_resources.reserved_doorbell_mask = 0x1e0; gpu_resources.reserved_doorbell_val = 0x0e0; - kgd2kfd->device_init(adev->kfd, &gpu_resources); + kgd2kfd->device_init(adev->kfd.dev, &gpu_resources); } } void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) { - if (adev->kfd) { - kgd2kfd->device_exit(adev->kfd); - adev->kfd = NULL; + if (adev->kfd.dev) { + kgd2kfd->device_exit(adev->kfd.dev); + adev->kfd.dev = NULL; } } void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry) { - if (adev->kfd) - kgd2kfd->interrupt(adev->kfd, ih_ring_entry); + if (adev->kfd.dev) + kgd2kfd->interrupt(adev->kfd.dev, ih_ring_entry); } void amdgpu_amdkfd_suspend(struct amdgpu_device *adev) { - if (adev->kfd) - kgd2kfd->suspend(adev->kfd); + if (adev->kfd.dev) + kgd2kfd->suspend(adev->kfd.dev); } int amdgpu_amdkfd_resume(struct amdgpu_device *adev) { int r = 0; - if (adev->kfd) - r = kgd2kfd->resume(adev->kfd); + if (adev->kfd.dev) + r = kgd2kfd->resume(adev->kfd.dev); return r; } @@ -235,8 +250,8 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev) { int r = 0; - if (adev->kfd) - r = kgd2kfd->pre_reset(adev->kfd); + if (adev->kfd.dev) + r = kgd2kfd->pre_reset(adev->kfd.dev); return r; } @@ -245,8 +260,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) { int r = 0; - if (adev->kfd) - r = kgd2kfd->post_reset(adev->kfd); + if (adev->kfd.dev) + r = kgd2kfd->post_reset(adev->kfd.dev); return r; } @@ -419,6 +434,62 @@ void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) cu_info->lds_size = acu_info.lds_size; } +int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, + struct kgd_dev **dma_buf_kgd, + uint64_t *bo_size, void *metadata_buffer, + size_t buffer_size, uint32_t *metadata_size, + uint32_t *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct dma_buf *dma_buf; + struct drm_gem_object *obj; + struct amdgpu_bo *bo; + uint64_t metadata_flags; + int r = -EINVAL; + + dma_buf = dma_buf_get(dma_buf_fd); + if (IS_ERR(dma_buf)) + return PTR_ERR(dma_buf); + + if (dma_buf->ops != &amdgpu_dmabuf_ops) + /* Can't handle non-graphics buffers */ + goto out_put; + + obj = dma_buf->priv; + if (obj->dev->driver != adev->ddev->driver) + /* Can't handle buffers from different drivers */ + goto out_put; + + adev = obj->dev->dev_private; + bo = gem_to_amdgpu_bo(obj); + if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT))) + /* Only VRAM and GTT BOs are supported */ + goto out_put; + + r = 0; + if (dma_buf_kgd) + *dma_buf_kgd = (struct kgd_dev *)adev; + if (bo_size) + *bo_size = amdgpu_bo_size(bo); + if (metadata_size) + *metadata_size = bo->metadata_size; + if (metadata_buffer) + r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size, + metadata_size, &metadata_flags); + if (flags) { + *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT; + + if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) + *flags |= ALLOC_MEM_FLAGS_PUBLIC; + } + +out_put: + dma_buf_put(dma_buf); + return r; +} + uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; @@ -501,7 +572,7 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) { - if (adev->kfd) { + if (adev->kfd.dev) { if ((1 << vmid) & compute_vmid_bitmap) return true; } @@ -515,7 +586,7 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) return false; } -void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) { } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index bcf587b4ba98..70429f7aa9a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -27,7 +27,6 @@ #include <linux/types.h> #include <linux/mm.h> -#include <linux/mmu_context.h> #include <linux/workqueue.h> #include <kgd_kfd_interface.h> #include <drm/ttm/ttm_execbuf_util.h> @@ -35,6 +34,7 @@ #include "amdgpu_vm.h" extern const struct kgd2kfd_calls *kgd2kfd; +extern uint64_t amdgpu_amdkfd_total_mem_size; struct amdgpu_device; @@ -77,6 +77,11 @@ struct amdgpu_amdkfd_fence { char timeline_name[TASK_COMM_LEN]; }; +struct amdgpu_kfd_dev { + struct kfd_dev *dev; + uint64_t vram_used; +}; + struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, struct mm_struct *mm); bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); @@ -144,6 +149,11 @@ uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd); uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd); void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); +int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, + struct kgd_dev **dmabuf_kgd, + uint64_t *bo_size, void *metadata_buffer, + size_t buffer_size, uint32_t *metadata_size, + uint32_t *flags); uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); @@ -195,7 +205,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, struct kfd_vm_fault_info *info); +int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, + struct dma_buf *dmabuf, + uint64_t va, void *vm, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset); + void amdgpu_amdkfd_gpuvm_init_mem_limits(void); -void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 72a357dae070..ff7fac7df34b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -23,6 +23,7 @@ #include <linux/fdtable.h> #include <linux/uaccess.h> #include <linux/firmware.h> +#include <linux/mmu_context.h> #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 0e2a56b6a9b6..56ea929f524b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -24,6 +24,7 @@ #include <linux/fdtable.h> #include <linux/uaccess.h> #include <linux/firmware.h> +#include <linux/mmu_context.h> #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 03b604c96d94..5c51d4910650 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -26,6 +26,7 @@ #include <linux/fdtable.h> #include <linux/uaccess.h> #include <linux/firmware.h> +#include <linux/mmu_context.h> #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f3129b912714..be1ab43473c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -25,6 +25,7 @@ #include <linux/list.h> #include <linux/pagemap.h> #include <linux/sched/mm.h> +#include <linux/dma-buf.h> #include <drm/drmP.h> #include "amdgpu_object.h" #include "amdgpu_vm.h" @@ -110,17 +111,17 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) (kfd_mem_limit.max_ttm_mem_limit >> 20)); } -static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, +static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 domain, bool sg) { - size_t acc_size, system_mem_needed, ttm_mem_needed; + size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; + uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9; int ret = 0; acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, sizeof(struct amdgpu_bo)); - spin_lock(&kfd_mem_limit.mem_limit_lock); - + vram_needed = 0; if (domain == AMDGPU_GEM_DOMAIN_GTT) { /* TTM GTT memory */ system_mem_needed = acc_size + size; @@ -133,23 +134,30 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, /* VRAM and SG */ system_mem_needed = acc_size; ttm_mem_needed = acc_size; + if (domain == AMDGPU_GEM_DOMAIN_VRAM) + vram_needed = size; } + spin_lock(&kfd_mem_limit.mem_limit_lock); + if ((kfd_mem_limit.system_mem_used + system_mem_needed > - kfd_mem_limit.max_system_mem_limit) || - (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > - kfd_mem_limit.max_ttm_mem_limit)) + kfd_mem_limit.max_system_mem_limit) || + (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > + kfd_mem_limit.max_ttm_mem_limit) || + (adev->kfd.vram_used + vram_needed > + adev->gmc.real_vram_size - reserved_for_pt)) { ret = -ENOMEM; - else { + } else { kfd_mem_limit.system_mem_used += system_mem_needed; kfd_mem_limit.ttm_mem_used += ttm_mem_needed; + adev->kfd.vram_used += vram_needed; } spin_unlock(&kfd_mem_limit.mem_limit_lock); return ret; } -static void unreserve_system_mem_limit(struct amdgpu_device *adev, +static void unreserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 domain, bool sg) { size_t acc_size; @@ -167,6 +175,11 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, } else { kfd_mem_limit.system_mem_used -= acc_size; kfd_mem_limit.ttm_mem_used -= acc_size; + if (domain == AMDGPU_GEM_DOMAIN_VRAM) { + adev->kfd.vram_used -= size; + WARN_ONCE(adev->kfd.vram_used < 0, + "kfd VRAM memory accounting unbalanced"); + } } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); @@ -176,29 +189,18 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, spin_unlock(&kfd_mem_limit.mem_limit_lock); } -void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) { - spin_lock(&kfd_mem_limit.mem_limit_lock); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + u32 domain = bo->preferred_domains; + bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { - kfd_mem_limit.system_mem_used -= - (bo->tbo.acc_size + amdgpu_bo_size(bo)); - kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size; - } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { - kfd_mem_limit.system_mem_used -= - (bo->tbo.acc_size + amdgpu_bo_size(bo)); - kfd_mem_limit.ttm_mem_used -= - (bo->tbo.acc_size + amdgpu_bo_size(bo)); - } else { - kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; - kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size; + domain = AMDGPU_GEM_DOMAIN_CPU; + sg = false; } - WARN_ONCE(kfd_mem_limit.system_mem_used < 0, - "kfd system memory accounting unbalanced"); - WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, - "kfd TTM memory accounting unbalanced"); - spin_unlock(&kfd_mem_limit.mem_limit_lock); + unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg); } @@ -535,7 +537,7 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, struct amdgpu_bo *bo = mem->bo; INIT_LIST_HEAD(&entry->head); - entry->shared = true; + entry->num_shared = 1; entry->bo = &bo->tbo; mutex_lock(&process_info->lock); if (userptr) @@ -676,7 +678,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; - ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.tv.num_shared = 1; ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); @@ -740,7 +742,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; - ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.tv.num_shared = 1; ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); @@ -885,6 +887,24 @@ update_gpuvm_pte_failed: return ret; } +static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) +{ + struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); + + if (!sg) + return NULL; + if (sg_alloc_table(sg, 1, GFP_KERNEL)) { + kfree(sg); + return NULL; + } + sg->sgl->dma_address = addr; + sg->sgl->length = size; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->sgl->dma_length = size; +#endif + return sg; +} + static int process_validate_vms(struct amdkfd_process_info *process_info) { struct amdgpu_vm *peer_vm; @@ -1168,6 +1188,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + enum ttm_bo_type bo_type = ttm_bo_type_device; + struct sg_table *sg = NULL; uint64_t user_addr = 0; struct amdgpu_bo *bo; struct amdgpu_bo_param bp; @@ -1196,13 +1218,25 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (!offset || !*offset) return -EINVAL; user_addr = *offset; + } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { + domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_domain = AMDGPU_GEM_DOMAIN_CPU; + bo_type = ttm_bo_type_sg; + alloc_flags = 0; + if (size > UINT_MAX) + return -EINVAL; + sg = create_doorbell_sg(*offset, size); + if (!sg) + return -ENOMEM; } else { return -EINVAL; } *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if (!*mem) - return -ENOMEM; + if (!*mem) { + ret = -ENOMEM; + goto err; + } INIT_LIST_HEAD(&(*mem)->bo_va_list); mutex_init(&(*mem)->lock); (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); @@ -1235,8 +1269,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( amdgpu_sync_create(&(*mem)->sync); - ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, - alloc_domain, false); + ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg); if (ret) { pr_debug("Insufficient system memory\n"); goto err_reserve_limit; @@ -1250,7 +1283,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( bp.byte_align = byte_align; bp.domain = alloc_domain; bp.flags = alloc_flags; - bp.type = ttm_bo_type_device; + bp.type = bo_type; bp.resv = NULL; ret = amdgpu_bo_create(adev, &bp, &bo); if (ret) { @@ -1258,6 +1291,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( domain_string(alloc_domain), ret); goto err_bo_create; } + if (bo_type == ttm_bo_type_sg) { + bo->tbo.sg = sg; + bo->tbo.ttm->sg = sg; + } bo->kfd_bo = *mem; (*mem)->bo = bo; if (user_addr) @@ -1289,10 +1326,15 @@ allocate_init_user_pages_failed: /* Don't unreserve system mem limit twice */ goto err_reserve_limit; err_bo_create: - unreserve_system_mem_limit(adev, size, alloc_domain, false); + unreserve_mem_limit(adev, size, alloc_domain, !!sg); err_reserve_limit: mutex_destroy(&(*mem)->lock); kfree(*mem); +err: + if (sg) { + sg_free_table(sg); + kfree(sg); + } return ret; } @@ -1362,6 +1404,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Free the sync object */ amdgpu_sync_free(&mem->sync); + /* If the SG is not NULL, it's one we created for a doorbell + * BO. We need to free it. + */ + if (mem->bo->tbo.sg) { + sg_free_table(mem->bo->tbo.sg); + kfree(mem->bo->tbo.sg); + } + /* Free the BO*/ amdgpu_bo_unref(&mem->bo); mutex_destroy(&mem->lock); @@ -1664,6 +1714,60 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, return 0; } +int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, + struct dma_buf *dma_buf, + uint64_t va, void *vm, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct drm_gem_object *obj; + struct amdgpu_bo *bo; + struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + + if (dma_buf->ops != &amdgpu_dmabuf_ops) + /* Can't handle non-graphics buffers */ + return -EINVAL; + + obj = dma_buf->priv; + if (obj->dev->dev_private != adev) + /* Can't handle buffers from other devices */ + return -EINVAL; + + bo = gem_to_amdgpu_bo(obj); + if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT))) + /* Only VRAM and GTT BOs are supported */ + return -EINVAL; + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (!*mem) + return -ENOMEM; + + if (size) + *size = amdgpu_bo_size(bo); + + if (mmap_offset) + *mmap_offset = amdgpu_bo_mmap_offset(bo); + + INIT_LIST_HEAD(&(*mem)->bo_va_list); + mutex_init(&(*mem)->lock); + (*mem)->mapping_flags = + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | + AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC; + + (*mem)->bo = amdgpu_bo_ref(bo); + (*mem)->va = va; + (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; + (*mem)->mapped_to_gpu_memory = 0; + (*mem)->process_info = avm->process_info; + add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); + amdgpu_sync_create(&(*mem)->sync); + + return 0; +} + /* Evict a userptr BO by stopping the queues if necessary * * Runs in MMU notifier, may be in RECLAIM_FS context. This means it @@ -1830,7 +1934,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) validate_list.head) { list_add_tail(&mem->resv_list.head, &resv_list); mem->resv_list.bo = mem->validate_list.bo; - mem->resv_list.shared = mem->validate_list.shared; + mem->resv_list.num_shared = mem->validate_list.num_shared; } /* Reserve all BOs and page tables for validation */ @@ -2049,7 +2153,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) list_add_tail(&mem->resv_list.head, &ctx.list); mem->resv_list.bo = mem->validate_list.bo; - mem->resv_list.shared = mem->validate_list.shared; + mem->resv_list.num_shared = mem->validate_list.num_shared; } ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 14d2982a47cc..5c79da8e1150 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -118,7 +118,6 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, entry->priority = min(info[i].bo_priority, AMDGPU_BO_LIST_MAX_PRIORITY); entry->tv.bo = &bo->tbo; - entry->tv.shared = !bo->prime_shared_count; if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS) list->gds_obj = bo; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index dc54e9efd910..5dc3ee372e2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -50,7 +50,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; p->uf_entry.tv.bo = &bo->tbo; - p->uf_entry.tv.shared = true; + /* One for TTM and one for the CS job */ + p->uf_entry.tv.num_shared = 2; p->uf_entry.user_pages = NULL; drm_gem_object_put_unlocked(gobj); @@ -598,6 +599,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return r; } + /* One for TTM and one for the CS job */ + amdgpu_bo_list_for_each_entry(e, p->bo_list) + e->tv.num_shared = 2; + amdgpu_bo_list_get_list(p->bo_list, &p->validated); if (p->bo_list->first_userptr != p->bo_list->num_entries) p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); @@ -717,8 +722,14 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, gws = p->bo_list->gws_obj; oa = p->bo_list->oa_obj; - amdgpu_bo_list_for_each_entry(e, p->bo_list) - e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo)); + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + + /* Make sure we use the exclusive slot for shared BOs */ + if (bo->prime_shared_count) + e->tv.num_shared = 0; + e->bo_va = amdgpu_vm_bo_find(vm, bo); + } if (gds) { p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; @@ -955,10 +966,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1); - if (r) - return r; - p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo); if (amdgpu_vm_debug) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index 5b550706ee76..7e22be7ca68a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -74,7 +74,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&csa_tv.head); csa_tv.bo = &bo->tbo; - csa_tv.shared = true; + csa_tv.num_shared = 1; list_add(&csa_tv.head, &list); amdgpu_vm_get_pd_bo(vm, &list, &pd); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 95f4c4139fc6..d85184b5b35c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -248,7 +248,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, return -ENOMEM; mutex_lock(&mgr->lock); - r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL); + r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL); if (r < 0) { mutex_unlock(&mgr->lock); kfree(ctx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index c75badfa5c4c..b60afeade50a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -515,7 +515,6 @@ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) */ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) { - amdgpu_asic_init_doorbell_index(adev); /* No doorbell on SI hardware generation */ if (adev->asic_type < CHIP_BONAIRE) { @@ -529,6 +528,8 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) return -EINVAL; + amdgpu_asic_init_doorbell_index(adev); + /* doorbell bar mapping */ adev->doorbell.base = pci_resource_start(adev->pdev, 2); adev->doorbell.size = pci_resource_len(adev->pdev, 2); @@ -1864,6 +1865,9 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) { int i, r; + if (adev->gmc.xgmi.num_physical_nodes > 1) + amdgpu_xgmi_remove_device(adev); + amdgpu_amdkfd_device_fini(adev); amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); @@ -2353,6 +2357,19 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) return amdgpu_device_asic_has_dc_support(adev->asic_type); } + +static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) +{ + struct amdgpu_device *adev = + container_of(__work, struct amdgpu_device, xgmi_reset_work); + + adev->asic_reset_res = amdgpu_asic_reset(adev); + if (adev->asic_reset_res) + DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s", + adev->asic_reset_res, adev->ddev->unique); +} + + /** * amdgpu_device_init - initialize the driver * @@ -2451,6 +2468,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, amdgpu_device_delay_enable_gfx_off); + INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); + adev->gfx.gfx_off_req_count = 1; adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; @@ -3239,6 +3258,8 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) if (amdgpu_gpu_recovery == -1) { switch (adev->asic_type) { + case CHIP_BONAIRE: + case CHIP_HAWAII: case CHIP_TOPAZ: case CHIP_TONGA: case CHIP_FIJI: @@ -3328,10 +3349,31 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, */ if (need_full_reset) { list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { - r = amdgpu_asic_reset(tmp_adev); - if (r) - DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s", + /* For XGMI run all resets in parallel to speed up the process */ + if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { + if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work)) + r = -EALREADY; + } else + r = amdgpu_asic_reset(tmp_adev); + + if (r) { + DRM_ERROR("ASIC reset failed with err r, %d for drm dev, %s", r, tmp_adev->ddev->unique); + break; + } + } + + /* For XGMI wait for all PSP resets to complete before proceed */ + if (!r) { + list_for_each_entry(tmp_adev, device_list_handle, + gmc.xgmi.head) { + if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { + flush_work(&tmp_adev->xgmi_reset_work); + r = tmp_adev->asic_reset_res; + if (r) + break; + } + } } } @@ -3518,8 +3560,6 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ if (tmp_adev == adev) continue; - dev_info(tmp_adev->dev, "GPU reset begin for drm dev %s!\n", adev->ddev->unique); - amdgpu_device_lock_adev(tmp_adev); r = amdgpu_device_pre_asic_reset(tmp_adev, NULL, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 7b3d1ebda9df..f4f00217546e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -169,7 +169,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, INIT_LIST_HEAD(&duplicates); tv.bo = &bo->tbo; - tv.shared = true; + tv.num_shared = 1; list_add(&tv.head, &list); amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); @@ -604,7 +604,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -ENOENT; abo = gem_to_amdgpu_bo(gobj); tv.bo = &abo->tbo; - tv.shared = !!(abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID); + if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) + tv.num_shared = 1; + else + tv.num_shared = 0; list_add(&tv.head, &list); } else { gobj = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h index d63daba9b17c..f1ddfc50bcc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -54,6 +54,8 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); +extern const struct dma_buf_ops amdgpu_dmabuf_ops; + /* * GEM objects. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 8c57924c075f..81e6070d255b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -99,6 +99,7 @@ struct amdgpu_xgmi { unsigned num_physical_nodes; /* gpu list in the same hive */ struct list_head head; + bool supported; }; struct amdgpu_gmc { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 9ce8c93ec19b..f877bb78d10a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -51,14 +51,12 @@ struct amdgpu_ih_ring { struct amdgpu_ih_funcs { /* ring read/write ptr handling, called from interrupt context */ u32 (*get_wptr)(struct amdgpu_device *adev); - bool (*prescreen_iv)(struct amdgpu_device *adev); void (*decode_iv)(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); void (*set_rptr)(struct amdgpu_device *adev); }; #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) -#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev)) #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 6b6524f04ce0..b7968f426862 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -145,13 +145,6 @@ static void amdgpu_irq_callback(struct amdgpu_device *adev, u32 ring_index = ih->rptr >> 2; struct amdgpu_iv_entry entry; - /* Prescreening of high-frequency interrupts */ - if (!amdgpu_ih_prescreen_iv(adev)) - return; - - /* Before dispatching irq to IP blocks, send it to amdkfd */ - amdgpu_amdkfd_interrupt(adev, (const void *) &ih->ring[ring_index]); - entry.iv_entry = (const uint32_t *)&ih->ring[ring_index]; amdgpu_ih_decode_iv(adev, &entry); @@ -371,39 +364,38 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, unsigned client_id = entry->client_id; unsigned src_id = entry->src_id; struct amdgpu_irq_src *src; + bool handled = false; int r; trace_amdgpu_iv(entry); if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) { DRM_DEBUG("Invalid client_id in IV: %d\n", client_id); - return; - } - if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) { + } else if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) { DRM_DEBUG("Invalid src_id in IV: %d\n", src_id); - return; - } - if (adev->irq.virq[src_id]) { + } else if (adev->irq.virq[src_id]) { generic_handle_irq(irq_find_mapping(adev->irq.domain, src_id)); - } else { - if (!adev->irq.client[client_id].sources) { - DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n", - client_id, src_id); - return; - } - src = adev->irq.client[client_id].sources[src_id]; - if (!src) { - DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id); - return; - } + } else if (!adev->irq.client[client_id].sources) { + DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n", + client_id, src_id); + } else if ((src = adev->irq.client[client_id].sources[src_id])) { r = src->funcs->process(adev, src, entry); - if (r) + if (r < 0) DRM_ERROR("error processing interrupt (%d)\n", r); + else if (r) + handled = true; + + } else { + DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id); } + + /* Send it to amdkfd as well if it isn't already handled */ + if (!handled) + amdgpu_amdkfd_interrupt(adev, entry->iv_entry); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index cf768acb51dc..fd271f9746a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -81,7 +81,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) amdgpu_bo_subtract_pin_size(bo); if (bo->kfd_bo) - amdgpu_amdkfd_unreserve_system_memory_limit(bo); + amdgpu_amdkfd_unreserve_memory_limit(bo); amdgpu_bo_kunmap(bo); @@ -608,53 +608,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev, } /** - * amdgpu_bo_backup_to_shadow - Backs up an &amdgpu_bo buffer object - * @adev: amdgpu device object - * @ring: amdgpu_ring for the engine handling the buffer operations - * @bo: &amdgpu_bo buffer to be backed up - * @resv: reservation object with embedded fence - * @fence: dma_fence associated with the operation - * @direct: whether to submit the job directly - * - * Copies an &amdgpu_bo buffer object to its shadow object. - * Not used for now. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_bo *bo, - struct reservation_object *resv, - struct dma_fence **fence, - bool direct) - -{ - struct amdgpu_bo *shadow = bo->shadow; - uint64_t bo_addr, shadow_addr; - int r; - - if (!shadow) - return -EINVAL; - - bo_addr = amdgpu_bo_gpu_offset(bo); - shadow_addr = amdgpu_bo_gpu_offset(bo->shadow); - - r = reservation_object_reserve_shared(bo->tbo.resv, 1); - if (r) - goto err; - - r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr, - amdgpu_bo_size(bo), resv, fence, - direct, false); - if (!r) - amdgpu_bo_fence(bo, *fence, true); - -err: - return r; -} - -/** * amdgpu_bo_validate - validate an &amdgpu_bo buffer object * @bo: pointer to the buffer object * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 7d3312d0da11..9291c2f837e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -267,11 +267,6 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared); u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); -int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_bo *bo, - struct reservation_object *resv, - struct dma_fence **fence, bool direct); int amdgpu_bo_validate(struct amdgpu_bo *bo); int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 3e44d889f7af..71913a18d142 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -39,8 +39,6 @@ #include <drm/amdgpu_drm.h> #include <linux/dma-buf.h> -static const struct dma_buf_ops amdgpu_dmabuf_ops; - /** * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table * implementation @@ -332,7 +330,7 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, return ret; } -static const struct dma_buf_ops amdgpu_dmabuf_ops = { +const struct dma_buf_ops amdgpu_dmabuf_ops = { .attach = amdgpu_gem_map_attach, .detach = amdgpu_gem_map_detach, .map_dma_buf = drm_gem_map_dma_buf, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index e05dc66b1090..6759d898b3ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -155,10 +155,22 @@ psp_cmd_submit_buf(struct psp_context *psp, return ret; } -static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd, +bool psp_support_vmr_ring(struct psp_context *psp) +{ + if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version > 0x80045) + return true; + else + return false; +} + +static void psp_prep_tmr_cmd_buf(struct psp_context *psp, + struct psp_gfx_cmd_resp *cmd, uint64_t tmr_mc, uint32_t size) { - cmd->cmd_id = GFX_CMD_ID_SETUP_TMR; + if (psp_support_vmr_ring(psp)) + cmd->cmd_id = GFX_CMD_ID_SETUP_VMR; + else + cmd->cmd_id = GFX_CMD_ID_SETUP_TMR; cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc); cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc); cmd->cmd.cmd_setup_tmr.buf_size = size; @@ -192,7 +204,7 @@ static int psp_tmr_load(struct psp_context *psp) if (!cmd) return -ENOMEM; - psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, PSP_TMR_SIZE); + psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, PSP_TMR_SIZE); DRM_INFO("reserve 0x%x from 0x%llx for PSP TMR SIZE\n", PSP_TMR_SIZE, psp->tmr_mc_addr); @@ -536,8 +548,10 @@ static int psp_load_fw(struct amdgpu_device *adev) int ret; struct psp_context *psp = &adev->psp; - if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset != 0) + if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) { + psp_ring_destroy(psp, PSP_RING_TYPE__KM); goto skip_memalloc; + } psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!psp->cmd) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 9ec5d1a666a6..10decf70c9aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -217,6 +217,7 @@ extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; int psp_gpu_reset(struct amdgpu_device *adev); int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); +bool psp_support_vmr_ring(struct psp_context *psp); extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 5b75bdc8dc28..335a0edf114b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -397,7 +397,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, { ktime_t deadline = ktime_add_us(ktime_get(), 10000); - if (!ring->funcs->soft_recovery) + if (!ring->funcs->soft_recovery || !fence) return false; atomic_inc(&ring->adev->gpu_reset_counter); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 58a2363040dd..fc91f3e54a87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -617,7 +617,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, { entry->priority = 0; entry->tv.bo = &vm->root.base.bo->tbo; - entry->tv.shared = true; + /* One for the VM updates, one for TTM and one for the CS job */ + entry->tv.num_shared = 3; entry->user_pages = NULL; list_add(&entry->tv.head, validated); } @@ -773,10 +774,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); - r = reservation_object_reserve_shared(bo->tbo.resv, 1); - if (r) - return r; - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) goto error; @@ -1842,10 +1839,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1); - if (r) - goto error_free; - r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags); if (r) goto error_free; @@ -3026,6 +3019,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) goto error_free_root; + r = reservation_object_reserve_shared(root->tbo.resv, 1); + if (r) + goto error_unreserve; + r = amdgpu_vm_clear_bo(adev, vm, root, adev->vm_manager.root_level, vm->pte_support_ats); @@ -3055,7 +3052,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, } INIT_KFIFO(vm->faults); - vm->fault_credit = 16; return 0; @@ -3268,42 +3264,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) } /** - * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID - * - * @adev: amdgpu_device pointer - * @pasid: PASID do identify the VM - * - * This function is expected to be called in interrupt context. - * - * Returns: - * True if there was fault credit, false otherwise - */ -bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, - unsigned int pasid) -{ - struct amdgpu_vm *vm; - - spin_lock(&adev->vm_manager.pasid_lock); - vm = idr_find(&adev->vm_manager.pasid_idr, pasid); - if (!vm) { - /* VM not found, can't track fault credit */ - spin_unlock(&adev->vm_manager.pasid_lock); - return true; - } - - /* No lock needed. only accessed by IRQ handler */ - if (!vm->fault_credit) { - /* Too many faults in this VM */ - spin_unlock(&adev->vm_manager.pasid_lock); - return false; - } - - vm->fault_credit--; - spin_unlock(&adev->vm_manager.pasid_lock); - return true; -} - -/** * amdgpu_vm_manager_init - init the VM manager * * @adev: amdgpu_device pointer diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 2a8898d19c8b..e8dcfd59fc93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -229,9 +229,6 @@ struct amdgpu_vm { /* Up to 128 pending retry page faults */ DECLARE_KFIFO(faults, u64, 128); - /* Limit non-retry fault storms */ - unsigned int fault_credit; - /* Points to the KFD process VM info */ struct amdkfd_process_info *process_info; @@ -299,8 +296,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid); void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); -bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, - unsigned int pasid); void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct list_head *validated, struct amdgpu_bo_list_entry *entry); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index fb37e69f1bba..0b263a9857c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -78,7 +78,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id, ret); else - dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n", + dev_info(adev->dev, "XGMI: Set topology for node %d, hive 0x%llx.\n", adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id); @@ -94,9 +94,9 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev) int count = 0, ret = -EINVAL; - if ((adev->asic_type < CHIP_VEGA20) || - (adev->flags & AMD_IS_APU) ) + if (!adev->gmc.xgmi.supported) return 0; + adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp); adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp); @@ -135,3 +135,23 @@ exit: mutex_unlock(&xgmi_mutex); return ret; } + +void amdgpu_xgmi_remove_device(struct amdgpu_device *adev) +{ + struct amdgpu_hive_info *hive; + + if (!adev->gmc.xgmi.supported) + return; + + mutex_lock(&xgmi_mutex); + + hive = amdgpu_get_xgmi_hive(adev); + if (!hive) + goto exit; + + if (!(hive->number_devices--)) + mutex_destroy(&hive->hive_lock); + +exit: + mutex_unlock(&xgmi_mutex); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 6335bfdcc51d..6151eb9c8ad3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -35,5 +35,6 @@ struct amdgpu_hive_info { struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); int amdgpu_xgmi_add_device(struct amdgpu_device *adev); +void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index b5775c6a857b..8a8b4967a101 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -228,34 +228,6 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev) * [127:96] - reserved */ -/** - * cik_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool cik_ih_prescreen_iv(struct amdgpu_device *adev) -{ - u32 ring_index = adev->irq.ih.rptr >> 2; - u16 pasid; - - switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) { - case 146: - case 147: - pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16; - if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid)) - return true; - break; - default: - /* Not a VM fault */ - return true; - } - - adev->irq.ih.rptr += 16; - return false; -} - /** * cik_ih_decode_iv - decode an interrupt vector * @@ -461,7 +433,6 @@ static const struct amd_ip_funcs cik_ih_ip_funcs = { static const struct amdgpu_ih_funcs cik_ih_funcs = { .get_wptr = cik_ih_get_wptr, - .prescreen_iv = cik_ih_prescreen_iv, .decode_iv = cik_ih_decode_iv, .set_rptr = cik_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index df5ac4d85a00..9d3ea298e116 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -208,34 +208,6 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev) } /** - * cz_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool cz_ih_prescreen_iv(struct amdgpu_device *adev) -{ - u32 ring_index = adev->irq.ih.rptr >> 2; - u16 pasid; - - switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) { - case 146: - case 147: - pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16; - if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid)) - return true; - break; - default: - /* Not a VM fault */ - return true; - } - - adev->irq.ih.rptr += 16; - return false; -} - -/** * cz_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer @@ -442,7 +414,6 @@ static const struct amd_ip_funcs cz_ih_ip_funcs = { static const struct amdgpu_ih_funcs cz_ih_funcs = { .get_wptr = cz_ih_get_wptr, - .prescreen_iv = cz_ih_prescreen_iv, .decode_iv = cz_ih_decode_iv, .set_rptr = cz_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 1454fc306783..381f593b0cda 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4068,6 +4068,11 @@ static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) { + if (amdgpu_sriov_vf(adev)) { + gfx_v8_0_init_csb(adev); + return 0; + } + adev->gfx.rlc.funcs->stop(adev); adev->gfx.rlc.funcs->reset(adev); gfx_v8_0_init_pg(adev); @@ -4947,14 +4952,13 @@ static bool gfx_v8_0_check_soft_reset(void *handle) static int gfx_v8_0_pre_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 grbm_soft_reset = 0, srbm_soft_reset = 0; + u32 grbm_soft_reset = 0; if ((!adev->gfx.grbm_soft_reset) && (!adev->gfx.srbm_soft_reset)) return 0; grbm_soft_reset = adev->gfx.grbm_soft_reset; - srbm_soft_reset = adev->gfx.srbm_soft_reset; /* stop the rlc */ adev->gfx.rlc.funcs->stop(adev); @@ -5051,14 +5055,13 @@ static int gfx_v8_0_soft_reset(void *handle) static int gfx_v8_0_post_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 grbm_soft_reset = 0, srbm_soft_reset = 0; + u32 grbm_soft_reset = 0; if ((!adev->gfx.grbm_soft_reset) && (!adev->gfx.srbm_soft_reset)) return 0; grbm_soft_reset = adev->gfx.grbm_soft_reset; - srbm_soft_reset = adev->gfx.srbm_soft_reset; if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index af8ccb014be3..f62d570a81a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -86,6 +86,7 @@ MODULE_FIRMWARE("amdgpu/picasso_me.bin"); MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); +MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); @@ -645,7 +646,20 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); + /* + * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin + * instead of picasso_rlc.bin. + * Judgment method: + * PCO AM4: revision >= 0xC8 && revision <= 0xCF + * or revision >= 0xD8 && revision <= 0xDF + * otherwise is PCO FP5 + */ + if (!strcmp(chip_name, "picasso") && + (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || + ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3a4e5d8d5162..ce150de723c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -244,6 +244,62 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, return 0; } +/** + * vega10_ih_prescreen_iv - prescreen an interrupt vector + * + * @adev: amdgpu_device pointer + * + * Returns true if the interrupt vector should be further processed. + */ +static bool gmc_v9_0_prescreen_iv(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry, + uint64_t addr) +{ + struct amdgpu_vm *vm; + u64 key; + int r; + + /* No PASID, can't identify faulting process */ + if (!entry->pasid) + return true; + + /* Not a retry fault */ + if (!(entry->src_data[1] & 0x80)) + return true; + + /* Track retry faults in per-VM fault FIFO. */ + spin_lock(&adev->vm_manager.pasid_lock); + vm = idr_find(&adev->vm_manager.pasid_idr, entry->pasid); + if (!vm) { + /* VM not found, process it normally */ + spin_unlock(&adev->vm_manager.pasid_lock); + return true; + } + + key = AMDGPU_VM_FAULT(entry->pasid, addr); + r = amdgpu_vm_add_fault(vm->fault_hash, key); + + /* Hash table is full or the fault is already being processed, + * ignore further page faults + */ + if (r != 0) { + spin_unlock(&adev->vm_manager.pasid_lock); + return false; + } + /* No locking required with single writer and single reader */ + r = kfifo_put(&vm->faults, key); + if (!r) { + /* FIFO is full. Ignore it until there is space */ + amdgpu_vm_clear_fault(vm->fault_hash, key); + spin_unlock(&adev->vm_manager.pasid_lock); + return false; + } + + spin_unlock(&adev->vm_manager.pasid_lock); + /* It's the first fault for this address, process it normally */ + return true; +} + static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -255,6 +311,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; + if (!gmc_v9_0_prescreen_iv(adev, entry, addr)) + return 1; /* This also prevents sending it to KFD */ + if (!amdgpu_sriov_vf(adev)) { status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); @@ -902,6 +961,9 @@ static int gmc_v9_0_sw_init(void *handle) /* This interrupt is VMC page fault.*/ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT, &adev->gmc.vm_fault); + if (r) + return r; + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, &adev->gmc.vm_fault); @@ -934,7 +996,7 @@ static int gmc_v9_0_sw_init(void *handle) } adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); - if (adev->asic_type == CHIP_VEGA20) { + if (adev->gmc.xgmi.supported) { r = gfxhub_v1_1_get_xgmi_info(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index cf0fc61aebe6..a3984d10b604 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -208,34 +208,6 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev) } /** - * iceland_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool iceland_ih_prescreen_iv(struct amdgpu_device *adev) -{ - u32 ring_index = adev->irq.ih.rptr >> 2; - u16 pasid; - - switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) { - case 146: - case 147: - pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16; - if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid)) - return true; - break; - default: - /* Not a VM fault */ - return true; - } - - adev->irq.ih.rptr += 16; - return false; -} - -/** * iceland_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer @@ -440,7 +412,6 @@ static const struct amd_ip_funcs iceland_ih_ip_funcs = { static const struct amdgpu_ih_funcs iceland_ih_funcs = { .get_wptr = iceland_ih_get_wptr, - .prescreen_iv = iceland_ih_prescreen_iv, .decode_iv = iceland_ih_decode_iv, .set_rptr = iceland_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 882bd83a28c4..0de00fbe9233 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -43,6 +43,8 @@ enum psp_gfx_crtl_cmd_id GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */ + GFX_CTRL_CMD_ID_CONSUME_CMD = 0x000A0000, /* send interrupt to psp for updating write pointer of vf */ + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING = 0x000C0000, /* destroy GPCOM ring */ GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ }; @@ -89,7 +91,8 @@ enum psp_gfx_cmd_id GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */ GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */ GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */ - + GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ + GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ }; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index e5dd052d9e06..6c9a1b748ca7 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -171,8 +171,11 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) * are already been loaded. */ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - if (sol_reg) + if (sol_reg) { + psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); + printk("sos fw version = 0x%x.\n", psp->sos_fw_version); return 0; + } /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), @@ -296,26 +299,47 @@ static int psp_v11_0_ring_create(struct psp_context *psp, struct psp_ring *ring = &psp->km_ring; struct amdgpu_device *adev = psp->adev; - /* Write low address of the ring to C2PMSG_69 */ - psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); - /* Write high address of the ring to C2PMSG_70 */ - psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); - /* Write size of ring to C2PMSG_71 */ - psp_ring_reg = ring->ring_size; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); - /* Write the ring initialization command to C2PMSG_64 */ - psp_ring_reg = ring_type; - psp_ring_reg = psp_ring_reg << 16; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); - - /* there might be handshake issue with hardware which needs delay */ - mdelay(20); - - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); + if (psp_support_vmr_ring(psp)) { + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); + + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } return ret; } @@ -326,15 +350,24 @@ static int psp_v11_0_ring_stop(struct psp_context *psp, int ret = 0; struct amdgpu_device *adev = psp->adev; - /* Write the ring destroy command to C2PMSG_64 */ - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_DESTROY_RINGS); + /* Write the ring destroy command*/ + if (psp_support_vmr_ring(psp)) + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); /* there might be handshake issue with hardware which needs delay */ mdelay(20); - /* Wait for response flag (bit 31) in C2PMSG_64 */ - ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x80000000, false); + /* Wait for response flag (bit 31) */ + if (psp_support_vmr_ring(psp)) + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + else + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); return ret; } @@ -373,7 +406,10 @@ static int psp_v11_0_cmd_submit(struct psp_context *psp, uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; /* KM (GPCOM) prepare write pointer */ - psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + if (psp_support_vmr_ring(psp)) + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); /* Update KM RB frame pointer to new frame */ /* write_frame ptr increments by size of rb_frame in bytes */ @@ -402,7 +438,11 @@ static int psp_v11_0_cmd_submit(struct psp_context *psp, /* Update the write Pointer in DWORDs */ psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); + if (psp_support_vmr_ring(psp)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); return 0; } @@ -547,7 +587,7 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp) /*send the mode 1 reset command*/ WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST); - mdelay(1000); + msleep(500); offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 7efb823dd3b1..7357fd56e614 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -592,7 +592,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) /*send the mode 1 reset command*/ WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST); - mdelay(1000); + msleep(500); offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index b3d7d9f83202..2938fb9f17cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -118,19 +118,6 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev) return (wptr & adev->irq.ih.ptr_mask); } -/** - * si_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool si_ih_prescreen_iv(struct amdgpu_device *adev) -{ - /* Process all interrupts */ - return true; -} - static void si_ih_decode_iv(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { @@ -301,7 +288,6 @@ static const struct amd_ip_funcs si_ih_ip_funcs = { static const struct amdgpu_ih_funcs si_ih_funcs = { .get_wptr = si_ih_get_wptr, - .prescreen_iv = si_ih_prescreen_iv, .decode_iv = si_ih_decode_iv, .set_rptr = si_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 83624e150ca7..8849b74078d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -507,6 +507,9 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) return -EINVAL; } + if (adev->asic_type == CHIP_VEGA20) + adev->gmc.xgmi.supported = true; + if (adev->flags & AMD_IS_APU) adev->nbio_funcs = &nbio_v7_0_funcs; else if (adev->asic_type == CHIP_VEGA20) diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index dcdbb4d72472..15da06ddeb75 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -219,34 +219,6 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev) } /** - * tonga_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool tonga_ih_prescreen_iv(struct amdgpu_device *adev) -{ - u32 ring_index = adev->irq.ih.rptr >> 2; - u16 pasid; - - switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) { - case 146: - case 147: - pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16; - if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid)) - return true; - break; - default: - /* Not a VM fault */ - return true; - } - - adev->irq.ih.rptr += 16; - return false; -} - -/** * tonga_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer @@ -506,7 +478,6 @@ static const struct amd_ip_funcs tonga_ih_ip_funcs = { static const struct amdgpu_ih_funcs tonga_ih_funcs = { .get_wptr = tonga_ih_get_wptr, - .prescreen_iv = tonga_ih_prescreen_iv, .decode_iv = tonga_ih_decode_iv, .set_rptr = tonga_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index d84b687240d1..2c250b01a903 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -220,90 +220,6 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev) } /** - * vega10_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev) -{ - u32 ring_index = adev->irq.ih.rptr >> 2; - u32 dw0, dw3, dw4, dw5; - u16 pasid; - u64 addr, key; - struct amdgpu_vm *vm; - int r; - - dw0 = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]); - dw3 = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); - dw4 = le32_to_cpu(adev->irq.ih.ring[ring_index + 4]); - dw5 = le32_to_cpu(adev->irq.ih.ring[ring_index + 5]); - - /* Filter retry page faults, let only the first one pass. If - * there are too many outstanding faults, ignore them until - * some faults get cleared. - */ - switch (dw0 & 0xff) { - case SOC15_IH_CLIENTID_VMC: - case SOC15_IH_CLIENTID_UTCL2: - break; - default: - /* Not a VM fault */ - return true; - } - - pasid = dw3 & 0xffff; - /* No PASID, can't identify faulting process */ - if (!pasid) - return true; - - /* Not a retry fault, check fault credit */ - if (!(dw5 & 0x80)) { - if (!amdgpu_vm_pasid_fault_credit(adev, pasid)) - goto ignore_iv; - return true; - } - - /* Track retry faults in per-VM fault FIFO. */ - spin_lock(&adev->vm_manager.pasid_lock); - vm = idr_find(&adev->vm_manager.pasid_idr, pasid); - addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12); - key = AMDGPU_VM_FAULT(pasid, addr); - if (!vm) { - /* VM not found, process it normally */ - spin_unlock(&adev->vm_manager.pasid_lock); - return true; - } else { - r = amdgpu_vm_add_fault(vm->fault_hash, key); - - /* Hash table is full or the fault is already being processed, - * ignore further page faults - */ - if (r != 0) { - spin_unlock(&adev->vm_manager.pasid_lock); - goto ignore_iv; - } - } - /* No locking required with single writer and single reader */ - r = kfifo_put(&vm->faults, key); - if (!r) { - /* FIFO is full. Ignore it until there is space */ - amdgpu_vm_clear_fault(vm->fault_hash, key); - spin_unlock(&adev->vm_manager.pasid_lock); - goto ignore_iv; - } - - spin_unlock(&adev->vm_manager.pasid_lock); - /* It's the first fault for this address, process it normally */ - return true; - -ignore_iv: - adev->irq.ih.rptr += 32; - return false; -} - -/** * vega10_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer @@ -487,7 +403,6 @@ const struct amd_ip_funcs vega10_ih_ip_funcs = { static const struct amdgpu_ih_funcs vega10_ih_funcs = { .get_wptr = vega10_ih_get_wptr, - .prescreen_iv = vega10_ih_prescreen_iv, .decode_iv = vega10_ih_decode_iv, .set_rptr = vega10_ih_set_rptr }; |