diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 201 |
1 files changed, 117 insertions, 84 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 65803e153a22..d97e330a5022 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -505,7 +505,7 @@ static int kfd_sysfs_create_file(struct kfd_process *p, struct attribute *attr, static int kfd_procfs_add_sysfs_stats(struct kfd_process *p) { int ret = 0; - struct kfd_process_device *pdd; + int i; char stats_dir_filename[MAX_SYSFS_FILENAME_LEN]; if (!p) @@ -520,7 +520,8 @@ static int kfd_procfs_add_sysfs_stats(struct kfd_process *p) * - proc/<pid>/stats_<gpuid>/evicted_ms * - proc/<pid>/stats_<gpuid>/cu_occupancy */ - list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; struct kobject *kobj_stats; snprintf(stats_dir_filename, MAX_SYSFS_FILENAME_LEN, @@ -571,7 +572,7 @@ err: static int kfd_procfs_add_sysfs_files(struct kfd_process *p) { int ret = 0; - struct kfd_process_device *pdd; + int i; if (!p) return -EINVAL; @@ -584,7 +585,9 @@ static int kfd_procfs_add_sysfs_files(struct kfd_process *p) * - proc/<pid>/vram_<gpuid> * - proc/<pid>/sdma_<gpuid> */ - list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + snprintf(pdd->vram_filename, MAX_SYSFS_FILENAME_LEN, "vram_%u", pdd->dev->id); ret = kfd_sysfs_create_file(p, &pdd->attr_vram, pdd->vram_filename); @@ -775,10 +778,8 @@ struct kfd_process *kfd_create_process(struct file *filep) goto out; ret = kfd_process_init_cwsr_apu(process, filep); - if (ret) { - process = ERR_PTR(ret); - goto out; - } + if (ret) + goto out_destroy; if (!procfs.kobj) goto out; @@ -826,6 +827,14 @@ out: mutex_unlock(&kfd_processes_mutex); return process; + +out_destroy: + hash_del_rcu(&process->kfd_processes); + mutex_unlock(&kfd_processes_mutex); + synchronize_srcu(&kfd_processes_srcu); + /* kfd_process_free_notifier will trigger the cleanup */ + mmu_notifier_put(&process->mmu_notifier); + return ERR_PTR(ret); } struct kfd_process *kfd_get_process(const struct task_struct *thread) @@ -875,21 +884,23 @@ void kfd_unref_process(struct kfd_process *p) kref_put(&p->ref, kfd_process_ref_release); } + static void kfd_process_device_free_bos(struct kfd_process_device *pdd) { struct kfd_process *p = pdd->process; void *mem; int id; + int i; /* * Remove all handles from idr and release appropriate * local memory object */ idr_for_each_entry(&pdd->alloc_idr, mem, id) { - struct kfd_process_device *peer_pdd; - list_for_each_entry(peer_pdd, &p->per_device_data, - per_device_list) { + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *peer_pdd = p->pdds[i]; + if (!peer_pdd->vm) continue; amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( @@ -903,18 +914,19 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd) static void kfd_process_free_outstanding_kfd_bos(struct kfd_process *p) { - struct kfd_process_device *pdd; + int i; - list_for_each_entry(pdd, &p->per_device_data, per_device_list) - kfd_process_device_free_bos(pdd); + for (i = 0; i < p->n_pdds; i++) + kfd_process_device_free_bos(p->pdds[i]); } static void kfd_process_destroy_pdds(struct kfd_process *p) { - struct kfd_process_device *pdd, *temp; + int i; + + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; - list_for_each_entry_safe(pdd, temp, &p->per_device_data, - per_device_list) { pr_debug("Releasing pdd (topology id %d) for process (pasid 0x%x)\n", pdd->dev->id, p->pasid); @@ -923,11 +935,6 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) pdd->dev->kgd, pdd->vm); fput(pdd->drm_file); } - else if (pdd->vm) - amdgpu_amdkfd_gpuvm_destroy_process_vm( - pdd->dev->kgd, pdd->vm); - - list_del(&pdd->per_device_list); if (pdd->qpd.cwsr_kaddr && !pdd->qpd.cwsr_base) free_pages((unsigned long)pdd->qpd.cwsr_kaddr, @@ -949,7 +956,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) } kfree(pdd); + p->pdds[i] = NULL; } + p->n_pdds = 0; } /* No process locking is needed in this function, because the process @@ -961,7 +970,7 @@ static void kfd_process_wq_release(struct work_struct *work) { struct kfd_process *p = container_of(work, struct kfd_process, release_work); - struct kfd_process_device *pdd; + int i; /* Remove the procfs files */ if (p->kobj) { @@ -970,7 +979,9 @@ static void kfd_process_wq_release(struct work_struct *work) kobject_put(p->kobj_queues); p->kobj_queues = NULL; - list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + sysfs_remove_file(p->kobj, &pdd->attr_vram); sysfs_remove_file(p->kobj, &pdd->attr_sdma); sysfs_remove_file(p->kobj, &pdd->attr_evict); @@ -1011,6 +1022,16 @@ static void kfd_process_ref_release(struct kref *ref) queue_work(kfd_process_wq, &p->release_work); } +static struct mmu_notifier *kfd_process_alloc_notifier(struct mm_struct *mm) +{ + int idx = srcu_read_lock(&kfd_processes_srcu); + struct kfd_process *p = find_process_by_mm(mm); + + srcu_read_unlock(&kfd_processes_srcu, idx); + + return p ? &p->mmu_notifier : ERR_PTR(-ESRCH); +} + static void kfd_process_free_notifier(struct mmu_notifier *mn) { kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier)); @@ -1020,7 +1041,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, struct mm_struct *mm) { struct kfd_process *p; - struct kfd_process_device *pdd = NULL; + int i; /* * The kfd_process structure can not be free because the @@ -1044,8 +1065,8 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, * pdd is in debug mode, we should first force unregistration, * then we will be able to destroy the queues */ - list_for_each_entry(pdd, &p->per_device_data, per_device_list) { - struct kfd_dev *dev = pdd->dev; + for (i = 0; i < p->n_pdds; i++) { + struct kfd_dev *dev = p->pdds[i]->dev; mutex_lock(kfd_get_dbgmgr_mutex()); if (dev && dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { @@ -1075,17 +1096,18 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { .release = kfd_process_notifier_release, + .alloc_notifier = kfd_process_alloc_notifier, .free_notifier = kfd_process_free_notifier, }; static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) { unsigned long offset; - struct kfd_process_device *pdd; + int i; - list_for_each_entry(pdd, &p->per_device_data, per_device_list) { - struct kfd_dev *dev = pdd->dev; - struct qcm_process_device *qpd = &pdd->qpd; + for (i = 0; i < p->n_pdds; i++) { + struct kfd_dev *dev = p->pdds[i]->dev; + struct qcm_process_device *qpd = &p->pdds[i]->qpd; if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) continue; @@ -1145,6 +1167,25 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) return 0; } +void kfd_process_set_trap_handler(struct qcm_process_device *qpd, + uint64_t tba_addr, + uint64_t tma_addr) +{ + if (qpd->cwsr_kaddr) { + /* KFD trap handler is bound, record as second-level TBA/TMA + * in first-level TMA. First-level trap will jump to second. + */ + uint64_t *tma = + (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); + tma[0] = tba_addr; + tma[1] = tma_addr; + } else { + /* No trap handler bound, bind as first-level TBA/TMA. */ + qpd->tba_addr = tba_addr; + qpd->tma_addr = tma_addr; + } +} + /* * On return the kfd_process is fully operational and will be freed when the * mm is released @@ -1152,6 +1193,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) static struct kfd_process *create_process(const struct task_struct *thread) { struct kfd_process *process; + struct mmu_notifier *mn; int err = -ENOMEM; process = kzalloc(sizeof(*process), GFP_KERNEL); @@ -1162,7 +1204,7 @@ static struct kfd_process *create_process(const struct task_struct *thread) mutex_init(&process->mutex); process->mm = thread->mm; process->lead_thread = thread->group_leader; - INIT_LIST_HEAD(&process->per_device_data); + process->n_pdds = 0; INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); process->last_restore_timestamp = get_jiffies_64(); @@ -1182,19 +1224,28 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (err != 0) goto err_init_apertures; - /* Must be last, have to use release destruction after this */ - process->mmu_notifier.ops = &kfd_process_mmu_notifier_ops; - err = mmu_notifier_register(&process->mmu_notifier, process->mm); - if (err) + /* alloc_notifier needs to find the process in the hash table */ + hash_add_rcu(kfd_processes_table, &process->kfd_processes, + (uintptr_t)process->mm); + + /* MMU notifier registration must be the last call that can fail + * because after this point we cannot unwind the process creation. + * After this point, mmu_notifier_put will trigger the cleanup by + * dropping the last process reference in the free_notifier. + */ + mn = mmu_notifier_get(&kfd_process_mmu_notifier_ops, process->mm); + if (IS_ERR(mn)) { + err = PTR_ERR(mn); goto err_register_notifier; + } + BUG_ON(mn != &process->mmu_notifier); get_task_struct(process->lead_thread); - hash_add_rcu(kfd_processes_table, &process->kfd_processes, - (uintptr_t)process->mm); return process; err_register_notifier: + hash_del_rcu(&process->kfd_processes); kfd_process_free_outstanding_kfd_bos(process); kfd_process_destroy_pdds(process); err_init_apertures: @@ -1244,11 +1295,11 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd, struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process *p) { - struct kfd_process_device *pdd = NULL; + int i; - list_for_each_entry(pdd, &p->per_device_data, per_device_list) - if (pdd->dev == dev) - return pdd; + for (i = 0; i < p->n_pdds; i++) + if (p->pdds[i]->dev == dev) + return p->pdds[i]; return NULL; } @@ -1258,6 +1309,8 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, { struct kfd_process_device *pdd = NULL; + if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE)) + return NULL; pdd = kzalloc(sizeof(*pdd), GFP_KERNEL); if (!pdd) return NULL; @@ -1286,7 +1339,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, pdd->vram_usage = 0; pdd->sdma_past_activity_counter = 0; atomic64_set(&pdd->evict_duration_counter, 0); - list_add(&pdd->per_device_list, &p->per_device_data); + p->pdds[p->n_pdds++] = pdd; /* Init idr used for memory handle translation */ idr_init(&pdd->alloc_idr); @@ -1319,19 +1372,18 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, struct kfd_dev *dev; int ret; + if (!drm_file) + return -EINVAL; + if (pdd->vm) - return drm_file ? -EBUSY : 0; + return -EBUSY; p = pdd->process; dev = pdd->dev; - if (drm_file) - ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( - dev->kgd, drm_file, p->pasid, - &pdd->vm, &p->kgd_process_info, &p->ef); - else - ret = amdgpu_amdkfd_gpuvm_create_process_vm(dev->kgd, p->pasid, - &pdd->vm, &p->kgd_process_info, &p->ef); + ret = amdgpu_amdkfd_gpuvm_acquire_process_vm( + dev->kgd, drm_file, p->pasid, + &pdd->vm, &p->kgd_process_info, &p->ef); if (ret) { pr_err("Failed to create process VM object\n"); return ret; @@ -1353,8 +1405,6 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, err_init_cwsr: err_reserve_ib_mem: kfd_process_device_free_bos(pdd); - if (!drm_file) - amdgpu_amdkfd_gpuvm_destroy_process_vm(dev->kgd, pdd->vm); pdd->vm = NULL; return ret; @@ -1379,6 +1429,9 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, return ERR_PTR(-ENOMEM); } + if (!pdd->vm) + return ERR_PTR(-ENODEV); + /* * signal runtime-pm system to auto resume and prevent * further runtime suspend once device pdd is created until @@ -1396,10 +1449,6 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, if (err) goto out; - err = kfd_process_device_init_vm(pdd, NULL); - if (err) - goto out; - /* * make sure that runtime_usage counter is incremented just once * per pdd @@ -1418,28 +1467,6 @@ out: return ERR_PTR(err); } -struct kfd_process_device *kfd_get_first_process_device_data( - struct kfd_process *p) -{ - return list_first_entry(&p->per_device_data, - struct kfd_process_device, - per_device_list); -} - -struct kfd_process_device *kfd_get_next_process_device_data( - struct kfd_process *p, - struct kfd_process_device *pdd) -{ - if (list_is_last(&pdd->per_device_list, &p->per_device_data)) - return NULL; - return list_next_entry(pdd, per_device_list); -} - -bool kfd_has_process_device_data(struct kfd_process *p) -{ - return !(list_empty(&p->per_device_data)); -} - /* Create specific handle mapped to mem from process local memory idr * Assumes that the process lock is held. */ @@ -1515,11 +1542,13 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) */ int kfd_process_evict_queues(struct kfd_process *p) { - struct kfd_process_device *pdd; int r = 0; + int i; unsigned int n_evicted = 0; - list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, &pdd->qpd); if (r) { @@ -1535,7 +1564,9 @@ fail: /* To keep state consistent, roll back partial eviction by * restoring queues */ - list_for_each_entry(pdd, &p->per_device_data, per_device_list) { + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + if (n_evicted == 0) break; if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, @@ -1551,10 +1582,12 @@ fail: /* kfd_process_restore_queues - Restore all user queues of a process */ int kfd_process_restore_queues(struct kfd_process *p) { - struct kfd_process_device *pdd; int r, ret = 0; + int i; + + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; - list_for_each_entry(pdd, &p->per_device_data, per_device_list) { r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, &pdd->qpd); if (r) { |