diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau')
34 files changed, 807 insertions, 560 deletions
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 7840b6428afb..8d37a694b772 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -38,7 +38,9 @@ #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_edid.h> +#include <drm/drm_eld.h> #include <drm/drm_fb_helper.h> +#include <drm/drm_fixed.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> @@ -945,7 +947,8 @@ nv50_msto_prepare(struct drm_atomic_state *state, if (ret == 0) { nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index, payload->vc_start_slot, payload->time_slots, - payload->pbn, payload->time_slots * mst_state->pbn_div); + payload->pbn, + payload->time_slots * dfixed_trunc(mst_state->pbn_div)); } else { nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index, 0, 0, 0, 0); } @@ -982,15 +985,14 @@ nv50_msto_atomic_check(struct drm_encoder *encoder, const int clock = crtc_state->adjusted_mode.clock; asyh->or.bpc = connector->display_info.bpc; - asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3, - false); + asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3 << 4); } mst_state = drm_atomic_get_mst_topology_state(state, &mstm->mgr); if (IS_ERR(mst_state)) return PTR_ERR(mst_state); - if (!mst_state->pbn_div) { + if (!mst_state->pbn_div.full) { struct nouveau_encoder *outp = mstc->mstm->outp; mst_state->pbn_div = drm_dp_get_vc_payload_bw(&mstm->mgr, @@ -2474,7 +2476,7 @@ nv50_disp_atomic_commit(struct drm_device *dev, err_cleanup: if (ret) - drm_atomic_helper_cleanup_planes(dev, state); + drm_atomic_helper_unprepare_planes(dev, state); done: pm_runtime_put_autosuspend(dev->dev); return ret; diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h index 82b267c11147..460459af272d 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h @@ -14,7 +14,7 @@ struct nvkm_event { int index_nr; spinlock_t refs_lock; - spinlock_t list_lock; + rwlock_t list_lock; int *refs; struct list_head ntfy; @@ -38,7 +38,7 @@ nvkm_event_init(const struct nvkm_event_func *func, struct nvkm_subdev *subdev, int types_nr, int index_nr, struct nvkm_event *event) { spin_lock_init(&event->refs_lock); - spin_lock_init(&event->list_lock); + rwlock_init(&event->list_lock); return __nvkm_event_init(func, subdev, types_nr, index_nr, event); } diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h index 2fa0445d8928..d1437c08645f 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h @@ -187,7 +187,7 @@ struct nvkm_gsp { void (*rpc_done)(struct nvkm_gsp *gsp, void *repv); void *(*rm_ctrl_get)(struct nvkm_gsp_object *, u32 cmd, u32 argc); - void *(*rm_ctrl_push)(struct nvkm_gsp_object *, void *argv, u32 repc); + int (*rm_ctrl_push)(struct nvkm_gsp_object *, void **argv, u32 repc); void (*rm_ctrl_done)(struct nvkm_gsp_object *, void *repv); void *(*rm_alloc_get)(struct nvkm_gsp_object *, u32 oclass, u32 argc); @@ -265,7 +265,7 @@ nvkm_gsp_rm_ctrl_get(struct nvkm_gsp_object *object, u32 cmd, u32 argc) return object->client->gsp->rm->rm_ctrl_get(object, cmd, argc); } -static inline void * +static inline int nvkm_gsp_rm_ctrl_push(struct nvkm_gsp_object *object, void *argv, u32 repc) { return object->client->gsp->rm->rm_ctrl_push(object, argv, repc); @@ -275,21 +275,24 @@ static inline void * nvkm_gsp_rm_ctrl_rd(struct nvkm_gsp_object *object, u32 cmd, u32 repc) { void *argv = nvkm_gsp_rm_ctrl_get(object, cmd, repc); + int ret; if (IS_ERR(argv)) return argv; - return nvkm_gsp_rm_ctrl_push(object, argv, repc); + ret = nvkm_gsp_rm_ctrl_push(object, &argv, repc); + if (ret) + return ERR_PTR(ret); + return argv; } static inline int nvkm_gsp_rm_ctrl_wr(struct nvkm_gsp_object *object, void *argv) { - void *repv = nvkm_gsp_rm_ctrl_push(object, argv, 0); - - if (IS_ERR(repv)) - return PTR_ERR(repv); + int ret = nvkm_gsp_rm_ctrl_push(object, &argv, 0); + if (ret) + return ret; return 0; } diff --git a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h index 5a2f273d95c8..0e32e71e123f 100644 --- a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h +++ b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h @@ -26,6 +26,49 @@ * DEALINGS IN THE SOFTWARE. */ +/** + * msgqTxHeader -- TX queue data structure + * @version: the version of this structure, must be 0 + * @size: the size of the entire queue, including this header + * @msgSize: the padded size of queue element, 16 is minimum + * @msgCount: the number of elements in this queue + * @writePtr: head index of this queue + * @flags: 1 = swap the RX pointers + * @rxHdrOff: offset of readPtr in this structure + * @entryOff: offset of beginning of queue (msgqRxHeader), relative to + * beginning of this structure + * + * The command queue is a queue of RPCs that are sent from the driver to the + * GSP. The status queue is a queue of messages/responses from GSP-RM to the + * driver. Although the driver allocates memory for both queues, the command + * queue is owned by the driver and the status queue is owned by GSP-RM. In + * addition, the headers of the two queues must not share the same 4K page. + * + * Each queue is prefixed with this data structure. The idea is that a queue + * and its header are written to only by their owner. That is, only the + * driver writes to the command queue and command queue header, and only the + * GSP writes to the status (receive) queue and its header. + * + * This is enforced by the concept of "swapping" the RX pointers. This is + * why the 'flags' field must be set to 1. 'rxHdrOff' is how the GSP knows + * where the where the tail pointer of its status queue. + * + * When the driver writes a new RPC to the command queue, it updates writePtr. + * When it reads a new message from the status queue, it updates readPtr. In + * this way, the GSP knows when a new command is in the queue (it polls + * writePtr) and it knows how much free space is in the status queue (it + * checks readPtr). The driver never cares about how much free space is in + * the status queue. + * + * As usual, producers write to the head pointer, and consumers read from the + * tail pointer. When head == tail, the queue is empty. + * + * So to summarize: + * command.writePtr = head of command queue + * command.readPtr = tail of status queue + * status.writePtr = head of status queue + * status.readPtr = tail of command queue + */ typedef struct { NvU32 version; // queue version @@ -38,6 +81,14 @@ typedef struct NvU32 entryOff; // Offset of entries from start of backing store. } msgqTxHeader; +/** + * msgqRxHeader - RX queue data structure + * @readPtr: tail index of the other queue + * + * Although this is a separate struct, it could easily be merged into + * msgqTxHeader. msgqTxHeader.rxHdrOff is simply the offset of readPtr + * from the beginning of msgqTxHeader. + */ typedef struct { NvU32 readPtr; // message id of last message read diff --git a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h index 754c6af42f30..10121218f4d3 100644 --- a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h +++ b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h @@ -38,7 +38,7 @@ typedef struct PACKED_REGISTRY_TABLE { NvU32 size; NvU32 numEntries; - PACKED_REGISTRY_ENTRY entries[0]; + PACKED_REGISTRY_ENTRY entries[] __counted_by(numEntries); } PACKED_REGISTRY_TABLE; #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index 2edd7bb13fae..a04156ca8390 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -127,21 +127,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16, { struct nouveau_abi16_ntfy *ntfy, *temp; - /* When a client exits without waiting for it's queued up jobs to - * finish it might happen that we fault the channel. This is due to - * drm_file_free() calling drm_gem_release() before the postclose() - * callback. Hence, we can't tear down this scheduler entity before - * uvmm mappings are unmapped. Currently, we can't detect this case. - * - * However, this should be rare and harmless, since the channel isn't - * needed anymore. - */ - nouveau_sched_entity_fini(&chan->sched_entity); + /* Cancel all jobs from the entity's queue. */ + drm_sched_entity_fini(&chan->sched.entity); - /* wait for all activity to stop before cleaning up */ if (chan->chan) nouveau_channel_idle(chan->chan); + nouveau_sched_fini(&chan->sched); + /* cleanup notifier state */ list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) { nouveau_abi16_ntfy_fini(chan, ntfy); @@ -344,8 +337,8 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) if (ret) goto done; - ret = nouveau_sched_entity_init(&chan->sched_entity, &drm->sched, - drm->sched_wq); + ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq, + chan->chan->dma.ib_max); if (ret) goto done; diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h index 9f538486c10e..1f5e243c0c75 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.h +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h @@ -26,7 +26,7 @@ struct nouveau_abi16_chan { struct nouveau_bo *ntfy; struct nouveau_vma *ntfy_vma; struct nvkm_mm heap; - struct nouveau_sched_entity sched_entity; + struct nouveau_sched sched; }; struct nouveau_abi16 { diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 0f3bd187ede6..00cc7d1abaa3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -148,10 +148,17 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo) * If nouveau_bo_new() allocated this buffer, the GEM object was never * initialized, so don't attempt to release it. */ - if (bo->base.dev) + if (bo->base.dev) { + /* Gem objects not being shared with other VMs get their + * dma_resv from a root GEM object. + */ + if (nvbo->no_share) + drm_gem_object_put(nvbo->r_obj); + drm_gem_object_release(&bo->base); - else + } else { dma_resv_fini(&bo->base._resv); + } kfree(nvbo); } @@ -318,8 +325,9 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain, (!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT)) continue; - if (pi < 0) - pi = i; + /* pick the last one as it will be smallest. */ + pi = i; + /* Stop once the buffer is larger than the current page size. */ if (*size >= 1ULL << vmm->page[i].shift) break; @@ -1054,17 +1062,18 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, { struct nouveau_drm *drm = nouveau_bdev(bo->bdev); struct nouveau_bo *nvbo = nouveau_bo(bo); + struct drm_gem_object *obj = &bo->base; struct ttm_resource *old_reg = bo->resource; struct nouveau_drm_tile *new_tile = NULL; int ret = 0; - if (new_reg->mem_type == TTM_PL_TT) { ret = nouveau_ttm_tt_bind(bo->bdev, bo->ttm, new_reg); if (ret) return ret; } + drm_gpuvm_bo_gem_evict(obj, evict); nouveau_bo_move_ntfy(bo, new_reg); ret = ttm_bo_wait_ctx(bo, ctx); if (ret) @@ -1129,6 +1138,7 @@ out: out_ntfy: if (ret) { nouveau_bo_move_ntfy(bo, bo->resource); + drm_gpuvm_bo_gem_evict(obj, !evict); } return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index 07f671cf895e..70c551921a9e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h @@ -26,6 +26,11 @@ struct nouveau_bo { struct list_head entry; int pbbo_index; bool validate_mapped; + + /* Root GEM object we derive the dma_resv of in case this BO is not + * shared between VMs. + */ + struct drm_gem_object *r_obj; bool no_share; /* GPU address space is independent of CPU word size */ diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index d8c92521226d..f28f9a857458 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -726,6 +726,11 @@ nouveau_display_create(struct drm_device *dev) if (nouveau_modeset != 2) { ret = nvif_disp_ctor(&drm->client.device, "kmsDisp", 0, &disp->disp); + /* no display hw */ + if (ret == -ENODEV) { + ret = 0; + goto disp_create_err; + } if (!ret && (disp->disp.outp_mask || drm->vbios.dcb.entries)) { nouveau_display_create_properties(dev); diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 50589f982d1a..6f6c31a9937b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -190,6 +190,8 @@ nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence, static void nouveau_cli_fini(struct nouveau_cli *cli) { + struct nouveau_uvmm *uvmm = nouveau_cli_uvmm_locked(cli); + /* All our channels are dead now, which means all the fences they * own are signalled, and all callback functions have been called. * @@ -199,8 +201,9 @@ nouveau_cli_fini(struct nouveau_cli *cli) WARN_ON(!list_empty(&cli->worker)); usif_client_fini(cli); - nouveau_uvmm_fini(&cli->uvmm); - nouveau_sched_entity_fini(&cli->sched_entity); + nouveau_sched_fini(&cli->sched); + if (uvmm) + nouveau_uvmm_fini(uvmm); nouveau_vmm_fini(&cli->svm); nouveau_vmm_fini(&cli->vmm); nvif_mmu_dtor(&cli->mmu); @@ -307,8 +310,17 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, cli->mem = &mems[ret]; - ret = nouveau_sched_entity_init(&cli->sched_entity, &drm->sched, - drm->sched_wq); + /* Don't pass in the (shared) sched_wq in order to let + * nouveau_sched_init() create a dedicated one for VM_BIND jobs. + * + * This is required to ensure that for VM_BIND jobs free_job() work and + * run_job() work can always run concurrently and hence, free_job() work + * can never stall run_job() work. For EXEC jobs we don't have this + * requirement, since EXEC job's free_job() does not require to take any + * locks which indirectly or directly are held for allocations + * elsewhere. + */ + ret = nouveau_sched_init(&cli->sched, drm, NULL, 1); if (ret) goto done; @@ -579,13 +591,16 @@ nouveau_drm_device_init(struct drm_device *dev) nvif_parent_ctor(&nouveau_parent, &drm->parent); drm->master.base.object.parent = &drm->parent; - ret = nouveau_sched_init(drm); - if (ret) + drm->sched_wq = alloc_workqueue("nouveau_sched_wq_shared", 0, + WQ_MAX_ACTIVE); + if (!drm->sched_wq) { + ret = -ENOMEM; goto fail_alloc; + } ret = nouveau_cli_init(drm, "DRM-master", &drm->master); if (ret) - goto fail_sched; + goto fail_wq; ret = nouveau_cli_init(drm, "DRM", &drm->client); if (ret) @@ -655,8 +670,8 @@ fail_ttm: nouveau_cli_fini(&drm->client); fail_master: nouveau_cli_fini(&drm->master); -fail_sched: - nouveau_sched_fini(drm); +fail_wq: + destroy_workqueue(drm->sched_wq); fail_alloc: nvif_parent_dtor(&drm->parent); kfree(drm); @@ -708,10 +723,9 @@ nouveau_drm_device_fini(struct drm_device *dev) } mutex_unlock(&drm->clients_lock); - nouveau_sched_fini(drm); - nouveau_cli_fini(&drm->client); nouveau_cli_fini(&drm->master); + destroy_workqueue(drm->sched_wq); nvif_parent_dtor(&drm->parent); mutex_destroy(&drm->clients_lock); kfree(drm); diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index e73a233c6572..8a6d94c8b163 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -93,9 +93,12 @@ struct nouveau_cli { struct nvif_mmu mmu; struct nouveau_vmm vmm; struct nouveau_vmm svm; - struct nouveau_uvmm uvmm; + struct { + struct nouveau_uvmm *ptr; + bool disabled; + } uvmm; - struct nouveau_sched_entity sched_entity; + struct nouveau_sched sched; const struct nvif_mclass *mem; @@ -121,10 +124,7 @@ struct nouveau_cli_work { static inline struct nouveau_uvmm * nouveau_cli_uvmm(struct nouveau_cli *cli) { - if (!cli || !cli->uvmm.vmm.cli) - return NULL; - - return &cli->uvmm; + return cli ? cli->uvmm.ptr : NULL; } static inline struct nouveau_uvmm * @@ -258,6 +258,9 @@ struct nouveau_drm { u64 context_base; } *runl; + /* Workqueue used for channel schedulers. */ + struct workqueue_struct *sched_wq; + /* context for accelerated drm-internal operations */ struct nouveau_channel *cechan; struct nouveau_channel *channel; @@ -298,10 +301,6 @@ struct nouveau_drm { struct mutex lock; bool component_registered; } audio; - - struct drm_gpu_scheduler sched; - struct workqueue_struct *sched_wq; - }; static inline struct nouveau_drm * diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c index 9a5ef574744b..bc5d71b79ab2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.c +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: MIT -#include <drm/drm_exec.h> - #include "nouveau_drv.h" #include "nouveau_gem.h" #include "nouveau_mem.h" @@ -86,14 +84,12 @@ */ static int -nouveau_exec_job_submit(struct nouveau_job *job) +nouveau_exec_job_submit(struct nouveau_job *job, + struct drm_gpuvm_exec *vme) { struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); struct nouveau_cli *cli = job->cli; struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli); - struct drm_exec *exec = &job->exec; - struct drm_gem_object *obj; - unsigned long index; int ret; /* Create a new fence, but do not emit yet. */ @@ -102,52 +98,29 @@ nouveau_exec_job_submit(struct nouveau_job *job) return ret; nouveau_uvmm_lock(uvmm); - drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES); - drm_exec_until_all_locked(exec) { - struct drm_gpuva *va; - - drm_gpuvm_for_each_va(va, &uvmm->base) { - if (unlikely(va == &uvmm->base.kernel_alloc_node)) - continue; - - ret = drm_exec_prepare_obj(exec, va->gem.obj, 1); - drm_exec_retry_on_contention(exec); - if (ret) - goto err_uvmm_unlock; - } + ret = drm_gpuvm_exec_lock(vme); + if (ret) { + nouveau_uvmm_unlock(uvmm); + return ret; } nouveau_uvmm_unlock(uvmm); - drm_exec_for_each_locked_object(exec, index, obj) { - struct nouveau_bo *nvbo = nouveau_gem_object(obj); - - ret = nouveau_bo_validate(nvbo, true, false); - if (ret) - goto err_exec_fini; + ret = drm_gpuvm_exec_validate(vme); + if (ret) { + drm_gpuvm_exec_unlock(vme); + return ret; } return 0; - -err_uvmm_unlock: - nouveau_uvmm_unlock(uvmm); -err_exec_fini: - drm_exec_fini(exec); - return ret; - } static void -nouveau_exec_job_armed_submit(struct nouveau_job *job) +nouveau_exec_job_armed_submit(struct nouveau_job *job, + struct drm_gpuvm_exec *vme) { - struct drm_exec *exec = &job->exec; - struct drm_gem_object *obj; - unsigned long index; - - drm_exec_for_each_locked_object(exec, index, obj) - dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage); - - drm_exec_fini(exec); + drm_gpuvm_exec_resv_add_fence(vme, job->done_fence, + job->resv_usage, job->resv_usage); + drm_gpuvm_exec_unlock(vme); } static struct dma_fence * @@ -192,6 +165,7 @@ nouveau_exec_job_free(struct nouveau_job *job) { struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); + nouveau_job_done(job); nouveau_job_free(job); kfree(exec_job->fence); @@ -211,8 +185,6 @@ nouveau_exec_job_timeout(struct nouveau_job *job) NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n", chan->chid); - nouveau_sched_entity_fini(job->entity); - return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -259,10 +231,12 @@ nouveau_exec_job_init(struct nouveau_exec_job **pjob, } } + args.file_priv = __args->file_priv; job->chan = __args->chan; - args.sched_entity = __args->sched_entity; - args.file_priv = __args->file_priv; + args.sched = __args->sched; + /* Plus one to account for the HW fence. */ + args.credits = job->push.count + 1; args.in_sync.count = __args->in_sync.count; args.in_sync.s = __args->in_sync.s; @@ -415,7 +389,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev, if (ret) goto out; - args.sched_entity = &chan16->sched_entity; + args.sched = &chan16->sched; args.file_priv = file_priv; args.chan = chan; diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.h b/drivers/gpu/drm/nouveau/nouveau_exec.h index 5488d337bcc0..9b3b151facfd 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.h +++ b/drivers/gpu/drm/nouveau/nouveau_exec.h @@ -3,16 +3,12 @@ #ifndef __NOUVEAU_EXEC_H__ #define __NOUVEAU_EXEC_H__ -#include <drm/drm_exec.h> - #include "nouveau_drv.h" #include "nouveau_sched.h" struct nouveau_exec_job_args { struct drm_file *file_priv; - struct nouveau_sched_entity *sched_entity; - - struct drm_exec exec; + struct nouveau_sched *sched; struct nouveau_channel *chan; struct { diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index ca762ea55413..5057d976fa57 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -62,7 +62,7 @@ nouveau_fence_signal(struct nouveau_fence *fence) if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) { struct nouveau_fence_chan *fctx = nouveau_fctx(fence); - if (!--fctx->notify_ref) + if (atomic_dec_and_test(&fctx->notify_ref)) drop = 1; } @@ -103,6 +103,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error) void nouveau_fence_context_del(struct nouveau_fence_chan *fctx) { + cancel_work_sync(&fctx->allow_block_work); nouveau_fence_context_kill(fctx, 0); nvif_event_dtor(&fctx->event); fctx->dead = 1; @@ -167,6 +168,18 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc return ret; } +static void +nouveau_fence_work_allow_block(struct work_struct *work) +{ + struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan, + allow_block_work); + + if (atomic_read(&fctx->notify_ref) == 0) + nvif_event_block(&fctx->event); + else + nvif_event_allow(&fctx->event); +} + void nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx) { @@ -178,6 +191,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha } args; int ret; + INIT_WORK(&fctx->allow_block_work, nouveau_fence_work_allow_block); INIT_LIST_HEAD(&fctx->flip); INIT_LIST_HEAD(&fctx->pending); spin_lock_init(&fctx->lock); @@ -521,15 +535,19 @@ static bool nouveau_fence_enable_signaling(struct dma_fence *f) struct nouveau_fence *fence = from_fence(f); struct nouveau_fence_chan *fctx = nouveau_fctx(fence); bool ret; + bool do_work; - if (!fctx->notify_ref++) - nvif_event_allow(&fctx->event); + if (atomic_inc_return(&fctx->notify_ref) == 0) + do_work = true; ret = nouveau_fence_no_signaling(f); if (ret) set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags); - else if (!--fctx->notify_ref) - nvif_event_block(&fctx->event); + else if (atomic_dec_and_test(&fctx->notify_ref)) + do_work = true; + + if (do_work) + schedule_work(&fctx->allow_block_work); return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 64d33ae7f356..28f5cf013b89 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -3,6 +3,7 @@ #define __NOUVEAU_FENCE_H__ #include <linux/dma-fence.h> +#include <linux/workqueue.h> #include <nvif/event.h> struct nouveau_drm; @@ -45,7 +46,9 @@ struct nouveau_fence_chan { char name[32]; struct nvif_event event; - int notify_ref, dead, killed; + struct work_struct allow_block_work; + atomic_t notify_ref; + int dead, killed; }; struct nouveau_fence_priv { diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index a0d303e5ce3d..49c2bcbef129 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -111,7 +111,8 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) if (vmm->vmm.object.oclass < NVIF_CLASS_VMM_NV50) return 0; - if (nvbo->no_share && uvmm && &uvmm->resv != nvbo->bo.base.resv) + if (nvbo->no_share && uvmm && + drm_gpuvm_resv(&uvmm->base) != nvbo->bo.base.resv) return -EPERM; ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL); @@ -245,7 +246,7 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain, if (unlikely(!uvmm)) return -EINVAL; - resv = &uvmm->resv; + resv = drm_gpuvm_resv(&uvmm->base); } if (!(domain & (NOUVEAU_GEM_DOMAIN_VRAM | NOUVEAU_GEM_DOMAIN_GART))) @@ -288,6 +289,11 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain, if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) nvbo->valid_domains &= domain; + if (nvbo->no_share) { + nvbo->r_obj = drm_gpuvm_resv_obj(&uvmm->base); + drm_gem_object_get(nvbo->r_obj); + } + *pnvbo = nvbo; return 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c index 23cd43a7fd19..bf2dc7567ea4 100644 --- a/drivers/gpu/drm/nouveau/nouveau_platform.c +++ b/drivers/gpu/drm/nouveau/nouveau_platform.c @@ -43,11 +43,10 @@ static int nouveau_platform_probe(struct platform_device *pdev) return 0; } -static int nouveau_platform_remove(struct platform_device *pdev) +static void nouveau_platform_remove(struct platform_device *pdev) { struct drm_device *dev = platform_get_drvdata(pdev); nouveau_drm_device_remove(dev); - return 0; } #if IS_ENABLED(CONFIG_OF) @@ -93,5 +92,5 @@ struct platform_driver nouveau_platform_driver = { .of_match_table = of_match_ptr(nouveau_platform_match), }, .probe = nouveau_platform_probe, - .remove = nouveau_platform_remove, + .remove_new = nouveau_platform_remove, }; diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index 7c376c4ccdcf..dd98f6910f9c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -12,30 +12,28 @@ #include "nouveau_abi16.h" #include "nouveau_sched.h" -/* FIXME - * - * We want to make sure that jobs currently executing can't be deferred by - * other jobs competing for the hardware. Otherwise we might end up with job - * timeouts just because of too many clients submitting too many jobs. We don't - * want jobs to time out because of system load, but because of the job being - * too bulky. - * - * For now allow for up to 16 concurrent jobs in flight until we know how many - * rings the hardware can process in parallel. - */ -#define NOUVEAU_SCHED_HW_SUBMISSIONS 16 #define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000 +/* Starts at 0, since the DRM scheduler interprets those parameters as (initial) + * index to the run-queue array. + */ +enum nouveau_sched_priority { + NOUVEAU_SCHED_PRIORITY_SINGLE = DRM_SCHED_PRIORITY_KERNEL, + NOUVEAU_SCHED_PRIORITY_COUNT, +}; + int nouveau_job_init(struct nouveau_job *job, struct nouveau_job_args *args) { - struct nouveau_sched_entity *entity = args->sched_entity; + struct nouveau_sched *sched = args->sched; int ret; + INIT_LIST_HEAD(&job->entry); + job->file_priv = args->file_priv; job->cli = nouveau_cli(args->file_priv); - job->entity = entity; + job->sched = sched; job->sync = args->sync; job->resv_usage = args->resv_usage; @@ -86,10 +84,10 @@ nouveau_job_init(struct nouveau_job *job, ret = -ENOMEM; goto err_free_objs; } - } - ret = drm_sched_job_init(&job->base, &entity->base, NULL); + ret = drm_sched_job_init(&job->base, &sched->entity, + args->credits, NULL); if (ret) goto err_free_chains; @@ -109,6 +107,27 @@ return ret; } void +nouveau_job_fini(struct nouveau_job *job) +{ + dma_fence_put(job->done_fence); + drm_sched_job_cleanup(&job->base); + + job->ops->free(job); +} + +void +nouveau_job_done(struct nouveau_job *job) +{ + struct nouveau_sched *sched = job->sched; + + spin_lock(&sched->job.list.lock); + list_del(&job->entry); + spin_unlock(&sched->job.list.lock); + + wake_up(&sched->job.wq); +} + +void nouveau_job_free(struct nouveau_job *job) { kfree(job->in_sync.data); @@ -117,13 +136,6 @@ nouveau_job_free(struct nouveau_job *job) kfree(job->out_sync.chains); } -void nouveau_job_fini(struct nouveau_job *job) -{ - dma_fence_put(job->done_fence); - drm_sched_job_cleanup(&job->base); - job->ops->free(job); -} - static int sync_find_fence(struct nouveau_job *job, struct drm_nouveau_sync *sync, @@ -261,8 +273,13 @@ nouveau_job_fence_attach(struct nouveau_job *job) int nouveau_job_submit(struct nouveau_job *job) { - struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity); + struct nouveau_sched *sched = job->sched; struct dma_fence *done_fence = NULL; + struct drm_gpuvm_exec vm_exec = { + .vm = &nouveau_cli_uvmm(job->cli)->base, + .flags = DRM_EXEC_IGNORE_DUPLICATES, + .num_fences = 1, + }; int ret; ret = nouveau_job_add_deps(job); @@ -276,46 +293,29 @@ nouveau_job_submit(struct nouveau_job *job) /* Make sure the job appears on the sched_entity's queue in the same * order as it was submitted. */ - mutex_lock(&entity->mutex); + mutex_lock(&sched->mutex); /* Guarantee we won't fail after the submit() callback returned * successfully. */ if (job->ops->submit) { - ret = job->ops->submit(job); + ret = job->ops->submit(job, &vm_exec); if (ret) goto err_cleanup; } + /* Submit was successful; add the job to the schedulers job list. */ + spin_lock(&sched->job.list.lock); + list_add(&job->entry, &sched->job.list.head); + spin_unlock(&sched->job.list.lock); + drm_sched_job_arm(&job->base); job->done_fence = dma_fence_get(&job->base.s_fence->finished); if (job->sync) done_fence = dma_fence_get(job->done_fence); - /* If a sched job depends on a dma-fence from a job from the same GPU - * scheduler instance, but a different scheduler entity, the GPU - * scheduler does only wait for the particular job to be scheduled, - * rather than for the job to fully complete. This is due to the GPU - * scheduler assuming that there is a scheduler instance per ring. - * However, the current implementation, in order to avoid arbitrary - * amounts of kthreads, has a single scheduler instance while scheduler - * entities represent rings. - * - * As a workaround, set the DRM_SCHED_FENCE_DONT_PIPELINE for all - * out-fences in order to force the scheduler to wait for full job - * completion for dependent jobs from different entities and same - * scheduler instance. - * - * There is some work in progress [1] to address the issues of firmware - * schedulers; once it is in-tree the scheduler topology in Nouveau - * should be re-worked accordingly. - * - * [1] https://lore.kernel.org/dri-devel/20230801205103.627779-1-matthew.brost@intel.com/ - */ - set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags); - if (job->ops->armed_submit) - job->ops->armed_submit(job); + job->ops->armed_submit(job, &vm_exec); nouveau_job_fence_attach(job); @@ -326,7 +326,7 @@ nouveau_job_submit(struct nouveau_job *job) drm_sched_entity_push_job(&job->base); - mutex_unlock(&entity->mutex); + mutex_unlock(&sched->mutex); if (done_fence) { dma_fence_wait(done_fence, true); @@ -336,20 +336,13 @@ nouveau_job_submit(struct nouveau_job *job) return 0; err_cleanup: - mutex_unlock(&entity->mutex); + mutex_unlock(&sched->mutex); nouveau_job_fence_attach_cleanup(job); err: job->state = NOUVEAU_JOB_SUBMIT_FAILED; return ret; } -bool -nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity, - struct work_struct *work) -{ - return queue_work(entity->sched_wq, work); -} - static struct dma_fence * nouveau_job_run(struct nouveau_job *job) { @@ -399,50 +392,82 @@ nouveau_sched_free_job(struct drm_sched_job *sched_job) nouveau_job_fini(job); } -int nouveau_sched_entity_init(struct nouveau_sched_entity *entity, - struct drm_gpu_scheduler *sched, - struct workqueue_struct *sched_wq) -{ - mutex_init(&entity->mutex); - spin_lock_init(&entity->job.list.lock); - INIT_LIST_HEAD(&entity->job.list.head); - init_waitqueue_head(&entity->job.wq); - - entity->sched_wq = sched_wq; - return drm_sched_entity_init(&entity->base, - DRM_SCHED_PRIORITY_NORMAL, - &sched, 1, NULL); -} - -void -nouveau_sched_entity_fini(struct nouveau_sched_entity *entity) -{ - drm_sched_entity_destroy(&entity->base); -} - static const struct drm_sched_backend_ops nouveau_sched_ops = { .run_job = nouveau_sched_run_job, .timedout_job = nouveau_sched_timedout_job, .free_job = nouveau_sched_free_job, }; -int nouveau_sched_init(struct nouveau_drm *drm) +int +nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, + struct workqueue_struct *wq, u32 credit_limit) { - struct drm_gpu_scheduler *sched = &drm->sched; + struct drm_gpu_scheduler *drm_sched = &sched->base; + struct drm_sched_entity *entity = &sched->entity; long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS); + int ret; - drm->sched_wq = create_singlethread_workqueue("nouveau_sched_wq"); - if (!drm->sched_wq) - return -ENOMEM; + if (!wq) { + wq = alloc_workqueue("nouveau_sched_wq_%d", 0, WQ_MAX_ACTIVE, + current->pid); + if (!wq) + return -ENOMEM; + + sched->wq = wq; + } - return drm_sched_init(sched, &nouveau_sched_ops, - DRM_SCHED_PRIORITY_COUNT, - NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit, - NULL, NULL, "nouveau_sched", drm->dev->dev); + ret = drm_sched_init(drm_sched, &nouveau_sched_ops, wq, + NOUVEAU_SCHED_PRIORITY_COUNT, + credit_limit, 0, job_hang_limit, + NULL, NULL, "nouveau_sched", drm->dev->dev); + if (ret) + goto fail_wq; + + /* Using DRM_SCHED_PRIORITY_KERNEL, since that's what we're required to use + * when we want to have a single run-queue only. + * + * It's not documented, but one will find out when trying to use any + * other priority running into faults, because the scheduler uses the + * priority as array index. + * + * Can't use NOUVEAU_SCHED_PRIORITY_SINGLE either, because it's not + * matching the enum type used in drm_sched_entity_init(). + */ + ret = drm_sched_entity_init(entity, DRM_SCHED_PRIORITY_KERNEL, + &drm_sched, 1, NULL); + if (ret) + goto fail_sched; + + mutex_init(&sched->mutex); + spin_lock_init(&sched->job.list.lock); + INIT_LIST_HEAD(&sched->job.list.head); + init_waitqueue_head(&sched->job.wq); + + return 0; + +fail_sched: + drm_sched_fini(drm_sched); +fail_wq: + if (sched->wq) + destroy_workqueue(sched->wq); + return ret; } -void nouveau_sched_fini(struct nouveau_drm *drm) +void +nouveau_sched_fini(struct nouveau_sched *sched) { - destroy_workqueue(drm->sched_wq); - drm_sched_fini(&drm->sched); + struct drm_gpu_scheduler *drm_sched = &sched->base; + struct drm_sched_entity *entity = &sched->entity; + + rmb(); /* for list_empty to work without lock */ + wait_event(sched->job.wq, list_empty(&sched->job.list.head)); + + drm_sched_entity_fini(entity); + drm_sched_fini(drm_sched); + + /* Destroy workqueue after scheduler tear down, otherwise it might still + * be in use. + */ + if (sched->wq) + destroy_workqueue(sched->wq); } diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h index 27ac19792597..a6528f5981e6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.h +++ b/drivers/gpu/drm/nouveau/nouveau_sched.h @@ -5,7 +5,7 @@ #include <linux/types.h> -#include <drm/drm_exec.h> +#include <drm/drm_gpuvm.h> #include <drm/gpu_scheduler.h> #include "nouveau_drv.h" @@ -26,7 +26,8 @@ enum nouveau_job_state { struct nouveau_job_args { struct drm_file *file_priv; - struct nouveau_sched_entity *sched_entity; + struct nouveau_sched *sched; + u32 credits; enum dma_resv_usage resv_usage; bool sync; @@ -49,12 +50,12 @@ struct nouveau_job { enum nouveau_job_state state; - struct nouveau_sched_entity *entity; + struct nouveau_sched *sched; + struct list_head entry; struct drm_file *file_priv; struct nouveau_cli *cli; - struct drm_exec exec; enum dma_resv_usage resv_usage; struct dma_fence *done_fence; @@ -76,8 +77,8 @@ struct nouveau_job { /* If .submit() returns without any error, it is guaranteed that * armed_submit() is called. */ - int (*submit)(struct nouveau_job *); - void (*armed_submit)(struct nouveau_job *); + int (*submit)(struct nouveau_job *, struct drm_gpuvm_exec *); + void (*armed_submit)(struct nouveau_job *, struct drm_gpuvm_exec *); struct dma_fence *(*run)(struct nouveau_job *); void (*free)(struct nouveau_job *); enum drm_gpu_sched_stat (*timeout)(struct nouveau_job *); @@ -90,20 +91,17 @@ int nouveau_job_ucopy_syncs(struct nouveau_job_args *args, int nouveau_job_init(struct nouveau_job *job, struct nouveau_job_args *args); -void nouveau_job_free(struct nouveau_job *job); - -int nouveau_job_submit(struct nouveau_job *job); void nouveau_job_fini(struct nouveau_job *job); +int nouveau_job_submit(struct nouveau_job *job); +void nouveau_job_done(struct nouveau_job *job); +void nouveau_job_free(struct nouveau_job *job); -#define to_nouveau_sched_entity(entity) \ - container_of((entity), struct nouveau_sched_entity, base) - -struct nouveau_sched_entity { - struct drm_sched_entity base; +struct nouveau_sched { + struct drm_gpu_scheduler base; + struct drm_sched_entity entity; + struct workqueue_struct *wq; struct mutex mutex; - struct workqueue_struct *sched_wq; - struct { struct { struct list_head head; @@ -113,15 +111,8 @@ struct nouveau_sched_entity { } job; }; -int nouveau_sched_entity_init(struct nouveau_sched_entity *entity, - struct drm_gpu_scheduler *sched, - struct workqueue_struct *sched_wq); -void nouveau_sched_entity_fini(struct nouveau_sched_entity *entity); - -bool nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity, - struct work_struct *work); - -int nouveau_sched_init(struct nouveau_drm *drm); -void nouveau_sched_fini(struct nouveau_drm *drm); +int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, + struct workqueue_struct *wq, u32 credit_limit); +void nouveau_sched_fini(struct nouveau_sched *sched); #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 5cf892c50f43..4f223c972c6a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -62,6 +62,8 @@ struct bind_job_op { enum vm_bind_op op; u32 flags; + struct drm_gpuvm_bo *vm_bo; + struct { u64 addr; u64 range; @@ -436,8 +438,9 @@ nouveau_uvma_region_complete(struct nouveau_uvma_region *reg) static void op_map_prepare_unwind(struct nouveau_uvma *uvma) { + struct drm_gpuva *va = &uvma->va; nouveau_uvma_gem_put(uvma); - drm_gpuva_remove(&uvma->va); + drm_gpuva_remove(va); nouveau_uvma_free(uvma); } @@ -466,6 +469,7 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, break; case DRM_GPUVA_OP_REMAP: { struct drm_gpuva_op_remap *r = &op->remap; + struct drm_gpuva *va = r->unmap->va; if (r->next) op_map_prepare_unwind(new->next); @@ -473,7 +477,7 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, if (r->prev) op_map_prepare_unwind(new->prev); - op_unmap_prepare_unwind(r->unmap->va); + op_unmap_prepare_unwind(va); break; } case DRM_GPUVA_OP_UNMAP: @@ -604,6 +608,9 @@ op_unmap_prepare(struct drm_gpuva_op_unmap *u) drm_gpuva_unmap(u); } +/* + * Note: @args should not be NULL when calling for a map operation. + */ static int nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, struct nouveau_uvma_prealloc *new, @@ -624,7 +631,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, if (ret) goto unwind; - if (args && vmm_get_range) { + if (vmm_get_range) { ret = nouveau_uvmm_vmm_get(uvmm, vmm_get_start, vmm_get_range); if (ret) { @@ -632,6 +639,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, goto unwind; } } + break; } case DRM_GPUVA_OP_REMAP: { @@ -929,25 +937,13 @@ nouveau_uvmm_sm_unmap_cleanup(struct nouveau_uvmm *uvmm, static int nouveau_uvmm_validate_range(struct nouveau_uvmm *uvmm, u64 addr, u64 range) { - u64 end = addr + range; - u64 kernel_managed_end = uvmm->kernel_managed_addr + - uvmm->kernel_managed_size; - if (addr & ~PAGE_MASK) return -EINVAL; if (range & ~PAGE_MASK) return -EINVAL; - if (end <= addr) - return -EINVAL; - - if (addr < NOUVEAU_VA_SPACE_START || - end > NOUVEAU_VA_SPACE_END) - return -EINVAL; - - if (addr < kernel_managed_end && - end > uvmm->kernel_managed_addr) + if (!drm_gpuvm_range_valid(&uvmm->base, addr, range)) return -EINVAL; return 0; @@ -970,6 +966,12 @@ nouveau_uvmm_bind_job_free(struct kref *kref) { struct nouveau_uvmm_bind_job *job = container_of(kref, struct nouveau_uvmm_bind_job, kref); + struct bind_job_op *op, *next; + + list_for_each_op_safe(op, next, &job->ops) { + list_del(&op->entry); + kfree(op); + } nouveau_job_free(&job->base); kfree(job); @@ -1011,14 +1013,16 @@ bind_validate_op(struct nouveau_job *job, static void bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range) { - struct nouveau_uvmm_bind_job *bind_job; - struct nouveau_sched_entity *entity = job->entity; + struct nouveau_sched *sched = job->sched; + struct nouveau_job *__job; struct bind_job_op *op; u64 end = addr + range; again: - spin_lock(&entity->job.list.lock); - list_for_each_entry(bind_job, &entity->job.list.head, entry) { + spin_lock(&sched->job.list.lock); + list_for_each_entry(__job, &sched->job.list.head, entry) { + struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(__job); + list_for_each_op(op, &bind_job->ops) { if (op->op == OP_UNMAP) { u64 op_addr = op->va.addr; @@ -1026,7 +1030,7 @@ again: if (!(end <= op_addr || addr >= op_end)) { nouveau_uvmm_bind_job_get(bind_job); - spin_unlock(&entity->job.list.lock); + spin_unlock(&sched->job.list.lock); wait_for_completion(&bind_job->complete); nouveau_uvmm_bind_job_put(bind_job); goto again; @@ -1034,7 +1038,7 @@ again: } } } - spin_unlock(&entity->job.list.lock); + spin_unlock(&sched->job.list.lock); } static int @@ -1113,22 +1117,28 @@ bind_validate_region(struct nouveau_job *job) } static void -bind_link_gpuvas(struct drm_gpuva_ops *ops, struct nouveau_uvma_prealloc *new) +bind_link_gpuvas(struct bind_job_op *bop) { + struct nouveau_uvma_prealloc *new = &bop->new; + struct drm_gpuvm_bo *vm_bo = bop->vm_bo; + struct drm_gpuva_ops *ops = bop->ops; struct drm_gpuva_op *op; drm_gpuva_for_each_op(op, ops) { switch (op->op) { case DRM_GPUVA_OP_MAP: - drm_gpuva_link(&new->map->va); + drm_gpuva_link(&new->map->va, vm_bo); break; - case DRM_GPUVA_OP_REMAP: + case DRM_GPUVA_OP_REMAP: { + struct drm_gpuva *va = op->remap.unmap->va; + if (op->remap.prev) - drm_gpuva_link(&new->prev->va); + drm_gpuva_link(&new->prev->va, va->vm_bo); if (op->remap.next) - drm_gpuva_link(&new->next->va); - drm_gpuva_unlink(op->remap.unmap->va); + drm_gpuva_link(&new->next->va, va->vm_bo); + drm_gpuva_unlink(va); break; + } case DRM_GPUVA_OP_UNMAP: drm_gpuva_unlink(op->unmap.va); break; @@ -1139,21 +1149,70 @@ bind_link_gpuvas(struct drm_gpuva_ops *ops, struct nouveau_uvma_prealloc *new) } static int -nouveau_uvmm_bind_job_submit(struct nouveau_job *job) +bind_lock_validate(struct nouveau_job *job, struct drm_exec *exec, + unsigned int num_fences) +{ + struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job); + struct bind_job_op *op; + int ret; + + list_for_each_op(op, &bind_job->ops) { + struct drm_gpuva_op *va_op; + + if (!op->ops) + continue; + + drm_gpuva_for_each_op(va_op, op->ops) { + struct drm_gem_object *obj = op_gem_obj(va_op); + + if (unlikely(!obj)) + continue; + + ret = drm_exec_prepare_obj(exec, obj, num_fences); + if (ret) + return ret; + + /* Don't validate GEMs backing mappings we're about to + * unmap, it's not worth the effort. + */ + if (va_op->op == DRM_GPUVA_OP_UNMAP) + continue; + + ret = nouveau_bo_validate(nouveau_gem_object(obj), + true, false); + if (ret) + return ret; + } + } + + return 0; +} + +static int +nouveau_uvmm_bind_job_submit(struct nouveau_job *job, + struct drm_gpuvm_exec *vme) { struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli); struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job); - struct nouveau_sched_entity *entity = job->entity; - struct drm_exec *exec = &job->exec; + struct drm_exec *exec = &vme->exec; struct bind_job_op *op; int ret; list_for_each_op(op, &bind_job->ops) { if (op->op == OP_MAP) { - op->gem.obj = drm_gem_object_lookup(job->file_priv, - op->gem.handle); - if (!op->gem.obj) + struct drm_gem_object *obj = op->gem.obj = + drm_gem_object_lookup(job->file_priv, + op->gem.handle); + if (!obj) return -ENOENT; + + dma_resv_lock(obj->resv, NULL); + op->vm_bo = drm_gpuvm_bo_obtain(&uvmm->base, obj); + dma_resv_unlock(obj->resv); + if (IS_ERR(op->vm_bo)) + return PTR_ERR(op->vm_bo); + + drm_gpuvm_bo_extobj_add(op->vm_bo); } ret = bind_validate_op(job, op); @@ -1176,6 +1235,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job) * unwind all GPU VA space changes on failure. */ nouveau_uvmm_lock(uvmm); + list_for_each_op(op, &bind_job->ops) { switch (op->op) { case OP_MAP_SPARSE: @@ -1287,55 +1347,13 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job) } } - drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES); + drm_exec_init(exec, vme->flags, 0); drm_exec_until_all_locked(exec) { - list_for_each_op(op, &bind_job->ops) { - struct drm_gpuva_op *va_op; - - if (IS_ERR_OR_NULL(op->ops)) - continue; - - drm_gpuva_for_each_op(va_op, op->ops) { - struct drm_gem_object *obj = op_gem_obj(va_op); - - if (unlikely(!obj)) - continue; - - ret = drm_exec_prepare_obj(exec, obj, 1); - drm_exec_retry_on_contention(exec); - if (ret) { - op = list_last_op(&bind_job->ops); - goto unwind; - } - } - } - } - - list_for_each_op(op, &bind_job->ops) { - struct drm_gpuva_op *va_op; - - if (IS_ERR_OR_NULL(op->ops)) - continue; - - drm_gpuva_for_each_op(va_op, op->ops) { - struct drm_gem_object *obj = op_gem_obj(va_op); - - if (unlikely(!obj)) - continue; - - /* Don't validate GEMs backing mappings we're about to - * unmap, it's not worth the effort. - */ - if (unlikely(va_op->op == DRM_GPUVA_OP_UNMAP)) - continue; - - ret = nouveau_bo_validate(nouveau_gem_object(obj), - true, false); - if (ret) { - op = list_last_op(&bind_job->ops); - goto unwind; - } + ret = bind_lock_validate(job, exec, vme->num_fences); + drm_exec_retry_on_contention(exec); + if (ret) { + op = list_last_op(&bind_job->ops); + goto unwind; } } @@ -1364,7 +1382,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job) case OP_UNMAP_SPARSE: case OP_MAP: case OP_UNMAP: - bind_link_gpuvas(op->ops, &op->new); + bind_link_gpuvas(op); break; default: break; @@ -1372,10 +1390,6 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job) } nouveau_uvmm_unlock(uvmm); - spin_lock(&entity->job.list.lock); - list_add(&bind_job->entry, &entity->job.list.head); - spin_unlock(&entity->job.list.lock); - return 0; unwind_continue: @@ -1410,21 +1424,17 @@ unwind: } nouveau_uvmm_unlock(uvmm); - drm_exec_fini(exec); + drm_gpuvm_exec_unlock(vme); return ret; } static void -nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job) +nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job, + struct drm_gpuvm_exec *vme) { - struct drm_exec *exec = &job->exec; - struct drm_gem_object *obj; - unsigned long index; - - drm_exec_for_each_locked_object(exec, index, obj) - dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage); - - drm_exec_fini(exec); + drm_gpuvm_exec_resv_add_fence(vme, job->done_fence, + job->resv_usage, job->resv_usage); + drm_gpuvm_exec_unlock(vme); } static struct dma_fence * @@ -1462,14 +1472,11 @@ out: } static void -nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work) +nouveau_uvmm_bind_job_cleanup(struct nouveau_job *job) { - struct nouveau_uvmm_bind_job *bind_job = - container_of(work, struct nouveau_uvmm_bind_job, work); - struct nouveau_job *job = &bind_job->base; + struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job); struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli); - struct nouveau_sched_entity *entity = job->entity; - struct bind_job_op *op, *next; + struct bind_job_op *op; list_for_each_op(op, &bind_job->ops) { struct drm_gem_object *obj = op->gem.obj; @@ -1511,42 +1518,27 @@ nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work) if (!IS_ERR_OR_NULL(op->ops)) drm_gpuva_ops_free(&uvmm->base, op->ops); + if (!IS_ERR_OR_NULL(op->vm_bo)) { + dma_resv_lock(obj->resv, NULL); + drm_gpuvm_bo_put(op->vm_bo); + dma_resv_unlock(obj->resv); + } + if (obj) drm_gem_object_put(obj); } - spin_lock(&entity->job.list.lock); - list_del(&bind_job->entry); - spin_unlock(&entity->job.list.lock); - + nouveau_job_done(job); complete_all(&bind_job->complete); - wake_up(&entity->job.wq); - - /* Remove and free ops after removing the bind job from the job list to - * avoid races against bind_validate_map_sparse(). - */ - list_for_each_op_safe(op, next, &bind_job->ops) { - list_del(&op->entry); - kfree(op); - } nouveau_uvmm_bind_job_put(bind_job); } -static void -nouveau_uvmm_bind_job_free_qwork(struct nouveau_job *job) -{ - struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job); - struct nouveau_sched_entity *entity = job->entity; - - nouveau_sched_entity_qwork(entity, &bind_job->work); -} - static struct nouveau_job_ops nouveau_bind_job_ops = { .submit = nouveau_uvmm_bind_job_submit, .armed_submit = nouveau_uvmm_bind_job_armed_submit, .run = nouveau_uvmm_bind_job_run, - .free = nouveau_uvmm_bind_job_free_qwork, + .free = nouveau_uvmm_bind_job_cleanup, }; static int @@ -1607,7 +1599,6 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob, return ret; INIT_LIST_HEAD(&job->ops); - INIT_LIST_HEAD(&job->entry); for (i = 0; i < __args->op.count; i++) { ret = bind_job_op_from_uop(&op, &__args->op.s[i]); @@ -1618,11 +1609,12 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob, } init_completion(&job->complete); - INIT_WORK(&job->work, nouveau_uvmm_bind_job_free_work_fn); - args.sched_entity = __args->sched_entity; args.file_priv = __args->file_priv; + args.sched = __args->sched; + args.credits = 1; + args.in_sync.count = __args->in_sync.count; args.in_sync.s = __args->in_sync.s; @@ -1648,18 +1640,6 @@ err_free: return ret; } -int -nouveau_uvmm_ioctl_vm_init(struct drm_device *dev, - void *data, - struct drm_file *file_priv) -{ - struct nouveau_cli *cli = nouveau_cli(file_priv); - struct drm_nouveau_vm_init *init = data; - - return nouveau_uvmm_init(&cli->uvmm, cli, init->kernel_managed_addr, - init->kernel_managed_size); -} - static int nouveau_uvmm_vm_bind(struct nouveau_uvmm_bind_job_args *args) { @@ -1760,7 +1740,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev, if (ret) return ret; - args.sched_entity = &cli->sched_entity; + args.sched = &cli->sched; args.file_priv = file_priv; ret = nouveau_uvmm_vm_bind(&args); @@ -1776,15 +1756,18 @@ void nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbo, struct nouveau_mem *mem) { struct drm_gem_object *obj = &nvbo->bo.base; + struct drm_gpuvm_bo *vm_bo; struct drm_gpuva *va; dma_resv_assert_held(obj->resv); - drm_gem_for_each_gpuva(va, obj) { - struct nouveau_uvma *uvma = uvma_from_va(va); + drm_gem_for_each_gpuvm_bo(vm_bo, obj) { + drm_gpuvm_bo_for_each_va(va, vm_bo) { + struct nouveau_uvma *uvma = uvma_from_va(va); - nouveau_uvma_map(uvma, mem); - drm_gpuva_invalidate(va, false); + nouveau_uvma_map(uvma, mem); + drm_gpuva_invalidate(va, false); + } } } @@ -1792,29 +1775,62 @@ void nouveau_uvmm_bo_unmap_all(struct nouveau_bo *nvbo) { struct drm_gem_object *obj = &nvbo->bo.base; + struct drm_gpuvm_bo *vm_bo; struct drm_gpuva *va; dma_resv_assert_held(obj->resv); - drm_gem_for_each_gpuva(va, obj) { - struct nouveau_uvma *uvma = uvma_from_va(va); + drm_gem_for_each_gpuvm_bo(vm_bo, obj) { + drm_gpuvm_bo_for_each_va(va, vm_bo) { + struct nouveau_uvma *uvma = uvma_from_va(va); - nouveau_uvma_unmap(uvma); - drm_gpuva_invalidate(va, true); + nouveau_uvma_unmap(uvma); + drm_gpuva_invalidate(va, true); + } } } +static void +nouveau_uvmm_free(struct drm_gpuvm *gpuvm) +{ + struct nouveau_uvmm *uvmm = uvmm_from_gpuvm(gpuvm); + + kfree(uvmm); +} + +static int +nouveau_uvmm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) +{ + struct nouveau_bo *nvbo = nouveau_gem_object(vm_bo->obj); + + return nouveau_bo_validate(nvbo, true, false); +} + +static const struct drm_gpuvm_ops gpuvm_ops = { + .vm_free = nouveau_uvmm_free, + .vm_bo_validate = nouveau_uvmm_bo_validate, +}; + int -nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, - u64 kernel_managed_addr, u64 kernel_managed_size) +nouveau_uvmm_ioctl_vm_init(struct drm_device *dev, + void *data, + struct drm_file *file_priv) { + struct nouveau_uvmm *uvmm; + struct nouveau_cli *cli = nouveau_cli(file_priv); + struct drm_device *drm = cli->drm->dev; + struct drm_gem_object *r_obj; + struct drm_nouveau_vm_init *init = data; + u64 kernel_managed_end; int ret; - u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size; - mutex_init(&uvmm->mutex); - dma_resv_init(&uvmm->resv); - mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN); - mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex); + if (check_add_overflow(init->kernel_managed_addr, + init->kernel_managed_size, + &kernel_managed_end)) + return -EINVAL; + + if (kernel_managed_end > NOUVEAU_VA_SPACE_END) + return -EINVAL; mutex_lock(&cli->mutex); @@ -1823,39 +1839,48 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, goto out_unlock; } - if (kernel_managed_end <= kernel_managed_addr) { - ret = -EINVAL; + uvmm = kzalloc(sizeof(*uvmm), GFP_KERNEL); + if (!uvmm) { + ret = -ENOMEM; goto out_unlock; } - if (kernel_managed_end > NOUVEAU_VA_SPACE_END) { - ret = -EINVAL; + r_obj = drm_gpuvm_resv_object_alloc(drm); + if (!r_obj) { + kfree(uvmm); + ret = -ENOMEM; goto out_unlock; } - uvmm->kernel_managed_addr = kernel_managed_addr; - uvmm->kernel_managed_size = kernel_managed_size; + mutex_init(&uvmm->mutex); + mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN); + mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex); - drm_gpuvm_init(&uvmm->base, cli->name, + drm_gpuvm_init(&uvmm->base, cli->name, 0, drm, r_obj, NOUVEAU_VA_SPACE_START, NOUVEAU_VA_SPACE_END, - kernel_managed_addr, kernel_managed_size, - NULL); + init->kernel_managed_addr, + init->kernel_managed_size, + &gpuvm_ops); + /* GPUVM takes care from here on. */ + drm_gem_object_put(r_obj); ret = nvif_vmm_ctor(&cli->mmu, "uvmm", cli->vmm.vmm.object.oclass, RAW, - kernel_managed_addr, kernel_managed_size, - NULL, 0, &cli->uvmm.vmm.vmm); + init->kernel_managed_addr, + init->kernel_managed_size, + NULL, 0, &uvmm->vmm.vmm); if (ret) - goto out_free_gpuva_mgr; + goto out_gpuvm_fini; - cli->uvmm.vmm.cli = cli; + uvmm->vmm.cli = cli; + cli->uvmm.ptr = uvmm; mutex_unlock(&cli->mutex); return 0; -out_free_gpuva_mgr: - drm_gpuvm_destroy(&uvmm->base); +out_gpuvm_fini: + drm_gpuvm_put(&uvmm->base); out_unlock: mutex_unlock(&cli->mutex); return ret; @@ -1867,15 +1892,8 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm) MA_STATE(mas, &uvmm->region_mt, 0, 0); struct nouveau_uvma_region *reg; struct nouveau_cli *cli = uvmm->vmm.cli; - struct nouveau_sched_entity *entity = &cli->sched_entity; struct drm_gpuva *va, *next; - if (!cli) - return; - - rmb(); /* for list_empty to work without lock */ - wait_event(entity->job.wq, list_empty(&entity->job.list.head)); - nouveau_uvmm_lock(uvmm); drm_gpuvm_for_each_va_safe(va, next, &uvmm->base) { struct nouveau_uvma *uvma = uvma_from_va(va); @@ -1910,8 +1928,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm) mutex_lock(&cli->mutex); nouveau_vmm_fini(&uvmm->vmm); - drm_gpuvm_destroy(&uvmm->base); + drm_gpuvm_put(&uvmm->base); mutex_unlock(&cli->mutex); - - dma_resv_fini(&uvmm->resv); } diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.h b/drivers/gpu/drm/nouveau/nouveau_uvmm.h index a308c59760a5..9d3c348581eb 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h @@ -12,12 +12,6 @@ struct nouveau_uvmm { struct nouveau_vmm vmm; struct maple_tree region_mt; struct mutex mutex; - struct dma_resv resv; - - u64 kernel_managed_addr; - u64 kernel_managed_size; - - bool disabled; }; struct nouveau_uvma_region { @@ -50,8 +44,6 @@ struct nouveau_uvmm_bind_job { struct nouveau_job base; struct kref kref; - struct list_head entry; - struct work_struct work; struct completion complete; /* struct bind_job_op */ @@ -60,7 +52,7 @@ struct nouveau_uvmm_bind_job { struct nouveau_uvmm_bind_job_args { struct drm_file *file_priv; - struct nouveau_sched_entity *sched_entity; + struct nouveau_sched *sched; unsigned int flags; @@ -82,8 +74,6 @@ struct nouveau_uvmm_bind_job_args { #define to_uvmm_bind_job(job) container_of((job), struct nouveau_uvmm_bind_job, base) -int nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, - u64 kernel_managed_addr, u64 kernel_managed_size); void nouveau_uvmm_fini(struct nouveau_uvmm *uvmm); void nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbov, struct nouveau_mem *mem); diff --git a/drivers/gpu/drm/nouveau/nv04_fence.c b/drivers/gpu/drm/nouveau/nv04_fence.c index 5b71a5a5cd85..cdbc75e3d1f6 100644 --- a/drivers/gpu/drm/nouveau/nv04_fence.c +++ b/drivers/gpu/drm/nouveau/nv04_fence.c @@ -39,7 +39,7 @@ struct nv04_fence_priv { static int nv04_fence_emit(struct nouveau_fence *fence) { - struct nvif_push *push = fence->channel->chan.push; + struct nvif_push *push = unrcu_pointer(fence->channel)->chan.push; int ret = PUSH_WAIT(push, 2); if (ret == 0) { PUSH_NVSQ(push, NV_SW, 0x0150, fence->base.seqno); diff --git a/drivers/gpu/drm/nouveau/nvkm/core/event.c b/drivers/gpu/drm/nouveau/nvkm/core/event.c index a6c877135598..61fed7792e41 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/event.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/event.c @@ -81,17 +81,17 @@ nvkm_event_ntfy_state(struct nvkm_event_ntfy *ntfy) static void nvkm_event_ntfy_remove(struct nvkm_event_ntfy *ntfy) { - spin_lock_irq(&ntfy->event->list_lock); + write_lock_irq(&ntfy->event->list_lock); list_del_init(&ntfy->head); - spin_unlock_irq(&ntfy->event->list_lock); + write_unlock_irq(&ntfy->event->list_lock); } static void nvkm_event_ntfy_insert(struct nvkm_event_ntfy *ntfy) { - spin_lock_irq(&ntfy->event->list_lock); + write_lock_irq(&ntfy->event->list_lock); list_add_tail(&ntfy->head, &ntfy->event->ntfy); - spin_unlock_irq(&ntfy->event->list_lock); + write_unlock_irq(&ntfy->event->list_lock); } static void @@ -176,7 +176,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits) return; nvkm_trace(event->subdev, "event: ntfy %08x on %d\n", bits, id); - spin_lock_irqsave(&event->list_lock, flags); + read_lock_irqsave(&event->list_lock, flags); list_for_each_entry_safe(ntfy, ntmp, &event->ntfy, head) { if (ntfy->id == id && ntfy->bits & bits) { @@ -185,7 +185,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits) } } - spin_unlock_irqrestore(&event->list_lock, flags); + read_unlock_irqrestore(&event->list_lock, flags); } void diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c index 457ec5db794d..b24eb1e560bc 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c @@ -209,7 +209,7 @@ nvkm_disp_dtor(struct nvkm_engine *engine) nvkm_head_del(&head); } - if (disp->func->dtor) + if (disp->func && disp->func->dtor) disp->func->dtor(disp); return data; @@ -243,8 +243,10 @@ nvkm_disp_new_(const struct nvkm_disp_func *func, struct nvkm_device *device, spin_lock_init(&disp->client.lock); ret = nvkm_engine_ctor(&nvkm_disp, device, type, inst, true, &disp->engine); - if (ret) + if (ret) { + disp->func = NULL; return ret; + } if (func->super) { disp->super.wq = create_singlethread_workqueue("nvkm-disp"); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c index 298035070b3a..6a0a4d3b8902 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c @@ -282,7 +282,7 @@ r535_sor_bl_get(struct nvkm_ior *sor) { struct nvkm_disp *disp = sor->disp; NV0073_CTRL_SPECIFIC_BACKLIGHT_BRIGHTNESS_PARAMS *ctrl; - int lvl; + int ret, lvl; ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_SPECIFIC_GET_BACKLIGHT_BRIGHTNESS, @@ -292,9 +292,11 @@ r535_sor_bl_get(struct nvkm_ior *sor) ctrl->displayId = BIT(sor->asy.outp->index); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } lvl = ctrl->brightness; nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); @@ -649,9 +651,11 @@ r535_conn_new(struct nvkm_disp *disp, u32 id) ctrl->subDeviceInstance = 0; ctrl->displayId = BIT(id); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return (void *)ctrl; + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ERR_PTR(ret); + } list_for_each_entry(conn, &disp->conns, head) { if (conn->index == ctrl->data[0].index) { @@ -686,7 +690,7 @@ r535_outp_acquire(struct nvkm_outp *outp, bool hda) struct nvkm_disp *disp = outp->disp; struct nvkm_ior *ior; NV0073_CTRL_DFP_ASSIGN_SOR_PARAMS *ctrl; - int or; + int ret, or; ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_DFP_ASSIGN_SOR, sizeof(*ctrl)); @@ -699,9 +703,11 @@ r535_outp_acquire(struct nvkm_outp *outp, bool hda) if (hda) ctrl->flags |= NVDEF(NV0073_CTRL, DFP_ASSIGN_SOR_FLAGS, AUDIO, OPTIMAL); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } for (or = 0; or < ARRAY_SIZE(ctrl->sorAssignListWithTag); or++) { if (ctrl->sorAssignListWithTag[or].displayMask & BIT(outp->index)) { @@ -727,6 +733,7 @@ static int r535_disp_head_displayid(struct nvkm_disp *disp, int head, u32 *displayid) { NV0073_CTRL_SYSTEM_GET_ACTIVE_PARAMS *ctrl; + int ret; ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_SYSTEM_GET_ACTIVE, sizeof(*ctrl)); @@ -736,9 +743,11 @@ r535_disp_head_displayid(struct nvkm_disp *disp, int head, u32 *displayid) ctrl->subDeviceInstance = 0; ctrl->head = head; - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } *displayid = ctrl->displayId; nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); @@ -772,9 +781,11 @@ r535_outp_inherit(struct nvkm_outp *outp) ctrl->subDeviceInstance = 0; ctrl->displayId = displayid; - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); return NULL; + } id = ctrl->index; proto = ctrl->protocol; @@ -825,6 +836,7 @@ r535_outp_dfp_get_info(struct nvkm_outp *outp) { NV0073_CTRL_DFP_GET_INFO_PARAMS *ctrl; struct nvkm_disp *disp = outp->disp; + int ret; ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_DFP_GET_INFO, sizeof(*ctrl)); if (IS_ERR(ctrl)) @@ -832,9 +844,11 @@ r535_outp_dfp_get_info(struct nvkm_outp *outp) ctrl->displayId = BIT(outp->index); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } nvkm_debug(&disp->engine.subdev, "DFP %08x: flags:%08x flags2:%08x\n", ctrl->displayId, ctrl->flags, ctrl->flags2); @@ -858,9 +872,11 @@ r535_outp_detect(struct nvkm_outp *outp) ctrl->subDeviceInstance = 0; ctrl->displayMask = BIT(outp->index); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } if (ctrl->displayMask & BIT(outp->index)) { ret = r535_outp_dfp_get_info(outp); @@ -895,6 +911,7 @@ r535_dp_mst_id_get(struct nvkm_outp *outp, u32 *pid) { NV0073_CTRL_CMD_DP_TOPOLOGY_ALLOCATE_DISPLAYID_PARAMS *ctrl; struct nvkm_disp *disp = outp->disp; + int ret; ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_DP_TOPOLOGY_ALLOCATE_DISPLAYID, @@ -904,9 +921,11 @@ r535_dp_mst_id_get(struct nvkm_outp *outp, u32 *pid) ctrl->subDeviceInstance = 0; ctrl->displayId = BIT(outp->index); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } *pid = ctrl->displayIdAssigned; nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); @@ -938,38 +957,60 @@ r535_dp_train_target(struct nvkm_outp *outp, u8 target, bool mst, u8 link_nr, u8 { struct nvkm_disp *disp = outp->disp; NV0073_CTRL_DP_CTRL_PARAMS *ctrl; - int ret; - - ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_DP_CTRL, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + int ret, retries; + u32 cmd, data; - ctrl->subDeviceInstance = 0; - ctrl->displayId = BIT(outp->index); - ctrl->cmd = NVDEF(NV0073_CTRL, DP_CMD, SET_LANE_COUNT, TRUE) | - NVDEF(NV0073_CTRL, DP_CMD, SET_LINK_BW, TRUE) | - NVDEF(NV0073_CTRL, DP_CMD, TRAIN_PHY_REPEATER, YES); - ctrl->data = NVVAL(NV0073_CTRL, DP_DATA, SET_LANE_COUNT, link_nr) | - NVVAL(NV0073_CTRL, DP_DATA, SET_LINK_BW, link_bw) | - NVVAL(NV0073_CTRL, DP_DATA, TARGET, target); + cmd = NVDEF(NV0073_CTRL, DP_CMD, SET_LANE_COUNT, TRUE) | + NVDEF(NV0073_CTRL, DP_CMD, SET_LINK_BW, TRUE) | + NVDEF(NV0073_CTRL, DP_CMD, TRAIN_PHY_REPEATER, YES); + data = NVVAL(NV0073_CTRL, DP_DATA, SET_LANE_COUNT, link_nr) | + NVVAL(NV0073_CTRL, DP_DATA, SET_LINK_BW, link_bw) | + NVVAL(NV0073_CTRL, DP_DATA, TARGET, target); if (mst) - ctrl->cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_FORMAT_MODE, MULTI_STREAM); + cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_FORMAT_MODE, MULTI_STREAM); if (outp->dp.dpcd[DPCD_RC02] & DPCD_RC02_ENHANCED_FRAME_CAP) - ctrl->cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_ENHANCED_FRAMING, TRUE); + cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_ENHANCED_FRAMING, TRUE); if (target == 0 && (outp->dp.dpcd[DPCD_RC02] & 0x20) && !(outp->dp.dpcd[DPCD_RC03] & DPCD_RC03_TPS4_SUPPORTED)) - ctrl->cmd |= NVDEF(NV0073_CTRL, DP_CMD, POST_LT_ADJ_REQ_GRANTED, YES); + cmd |= NVDEF(NV0073_CTRL, DP_CMD, POST_LT_ADJ_REQ_GRANTED, YES); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + /* We should retry up to 3 times, but only if GSP asks politely */ + for (retries = 0; retries < 3; ++retries) { + ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_DP_CTRL, + sizeof(*ctrl)); + if (IS_ERR(ctrl)) + return PTR_ERR(ctrl); + + ctrl->subDeviceInstance = 0; + ctrl->displayId = BIT(outp->index); + ctrl->retryTimeMs = 0; + ctrl->cmd = cmd; + ctrl->data = data; + + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret == -EAGAIN && ctrl->retryTimeMs) { + /* + * Device (likely an eDP panel) isn't ready yet, wait for the time specified + * by GSP before retrying again + */ + nvkm_debug(&disp->engine.subdev, + "Waiting %dms for GSP LT panel delay before retrying\n", + ctrl->retryTimeMs); + msleep(ctrl->retryTimeMs); + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + } else { + /* GSP didn't say to retry, or we were successful */ + if (ctrl->err) + ret = -EIO; + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + break; + } + } - ret = ctrl->err ? -EIO : 0; - nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); return ret; } @@ -1036,9 +1077,11 @@ r535_dp_aux_xfer(struct nvkm_outp *outp, u8 type, u32 addr, u8 *data, u8 *psize) ctrl->size = !ctrl->bAddrOnly ? (size - 1) : 0; memcpy(ctrl->data, data, size); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); return PTR_ERR(ctrl); + } memcpy(data, ctrl->data, size); *psize = ctrl->size; @@ -1111,10 +1154,13 @@ r535_tmds_edid_get(struct nvkm_outp *outp, u8 *data, u16 *psize) ctrl->subDeviceInstance = 0; ctrl->displayId = BIT(outp->index); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } + ret = -E2BIG; if (ctrl->bufferSize <= *psize) { memcpy(data, ctrl->edidBuffer, ctrl->bufferSize); *psize = ctrl->bufferSize; @@ -1153,9 +1199,11 @@ r535_outp_new(struct nvkm_disp *disp, u32 id) ctrl->subDeviceInstance = 0; ctrl->displayId = BIT(id); - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } switch (ctrl->type) { case NV0073_CTRL_SPECIFIC_OR_TYPE_NONE: @@ -1229,9 +1277,11 @@ r535_outp_new(struct nvkm_disp *disp, u32 id) ctrl->sorIndex = ~0; - ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl)); - if (IS_ERR(ctrl)) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl)); + if (ret) { + nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl); + return ret; + } switch (NVVAL_GET(ctrl->maxLinkRate, NV0073_CTRL_CMD, DP_GET_CAPS, MAX_LINK_RATE)) { case NV0073_CTRL_CMD_DP_GET_CAPS_MAX_LINK_RATE_1_62: @@ -1465,8 +1515,6 @@ r535_disp_oneinit(struct nvkm_disp *disp) bool nvhg = acpi_check_dsm(handle, &NVHG_DSM_GUID, NVHG_DSM_REV, 1ULL << 0x00000014); - printk(KERN_ERR "bl: nbci:%d nvhg:%d\n", nbci, nvhg); - if (nbci || nvhg) { union acpi_object argv4 = { .buffer.type = ACPI_TYPE_BUFFER, @@ -1479,9 +1527,6 @@ r535_disp_oneinit(struct nvkm_disp *disp) if (!obj) { acpi_handle_info(handle, "failed to evaluate _DSM\n"); } else { - printk(KERN_ERR "bl: obj type %d\n", obj->type); - printk(KERN_ERR "bl: obj len %d\n", obj->package.count); - for (int i = 0; i < obj->package.count; i++) { union acpi_object *elt = &obj->package.elements[i]; u32 size; @@ -1491,12 +1536,10 @@ r535_disp_oneinit(struct nvkm_disp *disp) else size = 4; - printk(KERN_ERR "elt %03d: type %d size %d\n", i, elt->type, size); memcpy(&ctrl->backLightData[ctrl->backLightDataSize], &elt->integer.value, size); ctrl->backLightDataSize += size; } - printk(KERN_ERR "bl: data size %d\n", ctrl->backLightDataSize); ctrl->status = 0; ACPI_FREE(obj); } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c index e4279f1772a1..377d0e0cef84 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c @@ -385,7 +385,7 @@ nvkm_uoutp_mthd_inherit(struct nvkm_outp *outp, void *argv, u32 argc) /* Ensure an ior is hooked up to this outp already */ ior = outp->func->inherit(outp); - if (!ior) + if (!ior || !ior->arm.head) return -ENODEV; /* With iors, there will be a separate output path for each type of connector - and all of diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c index 87a62d4ff4bd..7d4716dcd512 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c @@ -24,7 +24,6 @@ #include "chan.h" #include "chid.h" #include "cgrp.h" -#include "chid.h" #include "runl.h" #include "priv.h" diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c index c8ce7ff18713..e74493a4569e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c @@ -550,6 +550,10 @@ ga100_fifo_nonstall_ctor(struct nvkm_fifo *fifo) struct nvkm_engn *engn = list_first_entry(&runl->engns, typeof(*engn), head); runl->nonstall.vector = engn->func->nonstall(engn); + + /* if no nonstall vector just keep going */ + if (runl->nonstall.vector == -1) + continue; if (runl->nonstall.vector < 0) { RUNL_ERROR(runl, "nonstall %d", runl->nonstall.vector); return runl->nonstall.vector; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c index 3adbb05ff587..3454c7d29502 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c @@ -242,6 +242,7 @@ r535_chan_id_put(struct nvkm_chan *chan) nvkm_memory_unref(&userd->mem); nvkm_chid_put(runl->chid, userd->chid, &chan->cgrp->lock); list_del(&userd->head); + kfree(userd); } break; @@ -350,7 +351,7 @@ r535_engn_nonstall(struct nvkm_engn *engn) int ret; ret = nvkm_gsp_intr_nonstall(subdev->device->gsp, subdev->type, subdev->inst); - WARN_ON(ret < 0); + WARN_ON(ret == -ENOENT); return ret; } @@ -539,7 +540,7 @@ r535_fifo_runl_ctor(struct nvkm_fifo *fifo) struct nvkm_runl *runl; struct nvkm_engn *engn; u32 cgids = 2048; - u32 chids = 2048 / CHID_PER_USERD; + u32 chids = 2048; int ret; NV2080_CTRL_FIFO_GET_DEVICE_INFO_TABLE_PARAMS *ctrl; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c index 04bceaa28a19..da1bebb896f7 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c @@ -25,12 +25,8 @@ int nvkm_gsp_intr_nonstall(struct nvkm_gsp *gsp, enum nvkm_subdev_type type, int inst) { for (int i = 0; i < gsp->intr_nr; i++) { - if (gsp->intr[i].type == type && gsp->intr[i].inst == inst) { - if (gsp->intr[i].nonstall != ~0) - return gsp->intr[i].nonstall; - - return -EINVAL; - } + if (gsp->intr[i].type == type && gsp->intr[i].inst == inst) + return gsp->intr[i].nonstall; } return -ENOENT; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index e31f9641114b..9ee58e2a0eb2 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -70,6 +70,20 @@ struct r535_gsp_msg { #define GSP_MSG_HDR_SIZE offsetof(struct r535_gsp_msg, data) +static int +r535_rpc_status_to_errno(uint32_t rpc_status) +{ + switch (rpc_status) { + case 0x55: /* NV_ERR_NOT_READY */ + case 0x66: /* NV_ERR_TIMEOUT_RETRY */ + return -EAGAIN; + case 0x51: /* NV_ERR_NO_MEMORY */ + return -ENOMEM; + default: + return -EINVAL; + } +} + static void * r535_gsp_msgq_wait(struct nvkm_gsp *gsp, u32 repc, u32 *prepc, int *ptime) { @@ -298,7 +312,8 @@ retry: struct nvkm_gsp_msgq_ntfy *ntfy = &gsp->msgq.ntfy[i]; if (ntfy->fn == msg->function) { - ntfy->func(ntfy->priv, ntfy->fn, msg->data, msg->length - sizeof(*msg)); + if (ntfy->func) + ntfy->func(ntfy->priv, ntfy->fn, msg->data, msg->length - sizeof(*msg)); break; } } @@ -365,10 +380,8 @@ r535_gsp_rpc_send(struct nvkm_gsp *gsp, void *argv, bool wait, u32 repc) } ret = r535_gsp_cmdq_push(gsp, rpc); - if (ret) { - mutex_unlock(&gsp->cmdq.mutex); + if (ret) return ERR_PTR(ret); - } if (wait) { msg = r535_gsp_msg_recv(gsp, fn, repc); @@ -585,14 +598,14 @@ r535_gsp_rpc_rm_alloc_push(struct nvkm_gsp_object *object, void *argv, u32 repc) return rpc; if (rpc->status) { - nvkm_error(&gsp->subdev, "RM_ALLOC: 0x%x\n", rpc->status); - ret = ERR_PTR(-EINVAL); + ret = ERR_PTR(r535_rpc_status_to_errno(rpc->status)); + if (PTR_ERR(ret) != -EAGAIN) + nvkm_error(&gsp->subdev, "RM_ALLOC: 0x%x\n", rpc->status); } else { ret = repc ? rpc->params : NULL; } - if (IS_ERR_OR_NULL(ret)) - nvkm_gsp_rpc_done(gsp, rpc); + nvkm_gsp_rpc_done(gsp, rpc); return ret; } @@ -625,29 +638,34 @@ r535_gsp_rpc_rm_ctrl_done(struct nvkm_gsp_object *object, void *repv) { rpc_gsp_rm_control_v03_00 *rpc = container_of(repv, typeof(*rpc), params); + if (!repv) + return; nvkm_gsp_rpc_done(object->client->gsp, rpc); } -static void * -r535_gsp_rpc_rm_ctrl_push(struct nvkm_gsp_object *object, void *argv, u32 repc) +static int +r535_gsp_rpc_rm_ctrl_push(struct nvkm_gsp_object *object, void **argv, u32 repc) { - rpc_gsp_rm_control_v03_00 *rpc = container_of(argv, typeof(*rpc), params); + rpc_gsp_rm_control_v03_00 *rpc = container_of((*argv), typeof(*rpc), params); struct nvkm_gsp *gsp = object->client->gsp; - void *ret; + int ret = 0; rpc = nvkm_gsp_rpc_push(gsp, rpc, true, repc); - if (IS_ERR_OR_NULL(rpc)) - return rpc; + if (IS_ERR_OR_NULL(rpc)) { + *argv = NULL; + return PTR_ERR(rpc); + } if (rpc->status) { - nvkm_error(&gsp->subdev, "cli:0x%08x obj:0x%08x ctrl cmd:0x%08x failed: 0x%08x\n", - object->client->object.handle, object->handle, rpc->cmd, rpc->status); - ret = ERR_PTR(-EINVAL); - } else { - ret = repc ? rpc->params : NULL; + ret = r535_rpc_status_to_errno(rpc->status); + if (ret != -EAGAIN) + nvkm_error(&gsp->subdev, "cli:0x%08x obj:0x%08x ctrl cmd:0x%08x failed: 0x%08x\n", + object->client->object.handle, object->handle, rpc->cmd, rpc->status); } - if (IS_ERR_OR_NULL(ret)) + if (repc) + *argv = rpc->params; + else nvkm_gsp_rpc_done(gsp, rpc); return ret; @@ -689,8 +707,8 @@ r535_gsp_rpc_get(struct nvkm_gsp *gsp, u32 fn, u32 argc) struct nvfw_gsp_rpc *rpc; rpc = r535_gsp_cmdq_get(gsp, ALIGN(sizeof(*rpc) + argc, sizeof(u64))); - if (!rpc) - return NULL; + if (IS_ERR(rpc)) + return ERR_CAST(rpc); rpc->header_version = 0x03000000; rpc->signature = ('C' << 24) | ('P' << 16) | ('R' << 8) | 'V'; @@ -845,9 +863,11 @@ r535_gsp_intr_get_table(struct nvkm_gsp *gsp) if (IS_ERR(ctrl)) return PTR_ERR(ctrl); - ctrl = nvkm_gsp_rm_ctrl_push(&gsp->internal.device.subdevice, ctrl, sizeof(*ctrl)); - if (WARN_ON(IS_ERR(ctrl))) - return PTR_ERR(ctrl); + ret = nvkm_gsp_rm_ctrl_push(&gsp->internal.device.subdevice, &ctrl, sizeof(*ctrl)); + if (WARN_ON(ret)) { + nvkm_gsp_rm_ctrl_done(&gsp->internal.device.subdevice, ctrl); + return ret; + } for (unsigned i = 0; i < ctrl->tableLen; i++) { enum nvkm_subdev_type type; @@ -1048,7 +1068,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp) char *strings; int str_offset; int i; - size_t rpc_size = sizeof(*rpc) + sizeof(rpc->entries[0]) * NV_GSP_REG_NUM_ENTRIES; + size_t rpc_size = struct_size(rpc, entries, NV_GSP_REG_NUM_ENTRIES); /* add strings + null terminator */ for (i = 0; i < NV_GSP_REG_NUM_ENTRIES; i++) @@ -1101,16 +1121,12 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps) if (!obj) return; - printk(KERN_ERR "nvop: obj type %d\n", obj->type); - printk(KERN_ERR "nvop: obj len %d\n", obj->buffer.length); - if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || WARN_ON(obj->buffer.length != 4)) return; caps->status = 0; caps->optimusCaps = *(u32 *)obj->buffer.pointer; - printk(KERN_ERR "nvop: caps %08x\n", caps->optimusCaps); ACPI_FREE(obj); @@ -1137,9 +1153,6 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt) if (!obj) return; - printk(KERN_ERR "jt: obj type %d\n", obj->type); - printk(KERN_ERR "jt: obj len %d\n", obj->buffer.length); - if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) || WARN_ON(obj->buffer.length != 4)) return; @@ -1148,7 +1161,6 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt) jt->jtCaps = *(u32 *)obj->buffer.pointer; jt->jtRevId = (jt->jtCaps & 0xfff00000) >> 20; jt->bSBIOSCaps = 0; - printk(KERN_ERR "jt: caps %08x rev:%04x\n", jt->jtCaps, jt->jtRevId); ACPI_FREE(obj); @@ -1159,7 +1171,9 @@ static void r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, MUX_METHOD_DATA_ELEMENT *mode, MUX_METHOD_DATA_ELEMENT *part) { - acpi_handle iter = NULL, handle_mux; + union acpi_object mux_arg = { ACPI_TYPE_INTEGER }; + struct acpi_object_list input = { 1, &mux_arg }; + acpi_handle iter = NULL, handle_mux = NULL; acpi_status status; unsigned long long value; @@ -1181,14 +1195,18 @@ r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, MUX_METHOD_DATA_ELEMENT *mode, if (!handle_mux) return; - status = acpi_evaluate_integer(handle_mux, "MXDM", NULL, &value); + /* I -think- 0 means "acquire" according to nvidia's driver source */ + input.pointer->integer.type = ACPI_TYPE_INTEGER; + input.pointer->integer.value = 0; + + status = acpi_evaluate_integer(handle_mux, "MXDM", &input, &value); if (ACPI_SUCCESS(status)) { mode->acpiId = id; mode->mode = value; mode->status = 0; } - status = acpi_evaluate_integer(handle_mux, "MXDS", NULL, &value); + status = acpi_evaluate_integer(handle_mux, "MXDS", &input, &value); if (ACPI_SUCCESS(status)) { part->acpiId = id; part->mode = value; @@ -1234,8 +1252,8 @@ r535_gsp_acpi_dod(acpi_handle handle, DOD_METHOD_DATA *dod) dod->acpiIdListLen += sizeof(dod->acpiIdList[0]); } - printk(KERN_ERR "_DOD: ok! len:%d\n", dod->acpiIdListLen); dod->status = 0; + kfree(output.pointer); } #endif @@ -1379,6 +1397,13 @@ r535_gsp_msg_post_event(void *priv, u32 fn, void *repv, u32 repc) return 0; } +/** + * r535_gsp_msg_run_cpu_sequencer() -- process I/O commands from the GSP + * + * The GSP sequencer is a list of I/O commands that the GSP can send to + * the driver to perform for various purposes. The most common usage is to + * perform a special mid-initialization reset. + */ static int r535_gsp_msg_run_cpu_sequencer(void *priv, u32 fn, void *repv, u32 repc) { @@ -1718,6 +1743,23 @@ r535_gsp_libos_id8(const char *name) return id; } +/** + * create_pte_array() - creates a PTE array of a physically contiguous buffer + * @ptes: pointer to the array + * @addr: base address of physically contiguous buffer (GSP_PAGE_SIZE aligned) + * @size: size of the buffer + * + * GSP-RM sometimes expects physically-contiguous buffers to have an array of + * "PTEs" for each page in that buffer. Although in theory that allows for + * the buffer to be physically discontiguous, GSP-RM does not currently + * support that. + * + * In this case, the PTEs are DMA addresses of each page of the buffer. Since + * the buffer is physically contiguous, calculating all the PTEs is simple + * math. + * + * See memdescGetPhysAddrsForGpu() + */ static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size) { unsigned int num_pages = DIV_ROUND_UP_ULL(size, GSP_PAGE_SIZE); @@ -1727,6 +1769,35 @@ static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size) ptes[i] = (u64)addr + (i << GSP_PAGE_SHIFT); } +/** + * r535_gsp_libos_init() -- create the libos arguments structure + * + * The logging buffers are byte queues that contain encoded printf-like + * messages from GSP-RM. They need to be decoded by a special application + * that can parse the buffers. + * + * The 'loginit' buffer contains logs from early GSP-RM init and + * exception dumps. The 'logrm' buffer contains the subsequent logs. Both are + * written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE. + * + * The physical address map for the log buffer is stored in the buffer + * itself, starting with offset 1. Offset 0 contains the "put" pointer. + * + * The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is + * configured for a larger page size (e.g. 64K pages), we need to give + * the GSP an array of 4K pages. Fortunately, since the buffer is + * physically contiguous, it's simple math to calculate the addresses. + * + * The buffers must be a multiple of GSP_PAGE_SIZE. GSP-RM also currently + * ignores the @kind field for LOGINIT, LOGINTR, and LOGRM, but expects the + * buffers to be physically contiguous anyway. + * + * The memory allocated for the arguments must remain until the GSP sends the + * init_done RPC. + * + * See _kgspInitLibosLoggingStructures (allocates memory for buffers) + * See kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array) + */ static int r535_gsp_libos_init(struct nvkm_gsp *gsp) { @@ -1837,6 +1908,35 @@ nvkm_gsp_radix3_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_radix3 *rx3) nvkm_gsp_mem_dtor(gsp, &rx3->mem[i]); } +/** + * nvkm_gsp_radix3_sg - build a radix3 table from a S/G list + * + * The GSP uses a three-level page table, called radix3, to map the firmware. + * Each 64-bit "pointer" in the table is either the bus address of an entry in + * the next table (for levels 0 and 1) or the bus address of the next page in + * the GSP firmware image itself. + * + * Level 0 contains a single entry in one page that points to the first page + * of level 1. + * + * Level 1, since it's also only one page in size, contains up to 512 entries, + * one for each page in Level 2. + * + * Level 2 can be up to 512 pages in size, and each of those entries points to + * the next page of the firmware image. Since there can be up to 512*512 + * pages, that limits the size of the firmware to 512*512*GSP_PAGE_SIZE = 1GB. + * + * Internally, the GSP has its window into system memory, but the base + * physical address of the aperture is not 0. In fact, it varies depending on + * the GPU architecture. Since the GPU is a PCI device, this window is + * accessed via DMA and is therefore bound by IOMMU translation. The end + * result is that GSP-RM must translate the bus addresses in the table to GSP + * physical addresses. All this should happen transparently. + * + * Returns 0 on success, or negative error code + * + * See kgspCreateRadix3_IMPL + */ static int nvkm_gsp_radix3_sg(struct nvkm_device *device, struct sg_table *sgt, u64 size, struct nvkm_gsp_radix3 *rx3) @@ -2106,7 +2206,9 @@ r535_gsp_oneinit(struct nvkm_gsp *gsp) r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED, r535_gsp_msg_mmu_fault_queued, gsp); r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_OS_ERROR_LOG, r535_gsp_msg_os_error_log, gsp); - + r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_PERF_BRIDGELESS_INFO_UPDATE, NULL, NULL); + r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT, NULL, NULL); + r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_GSP_SEND_USER_SHARED_DATA, NULL, NULL); ret = r535_gsp_rm_boot_ctor(gsp); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 1b811d6972a1..201022ae9214 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -49,14 +49,14 @@ #include <subdev/mmu.h> struct gk20a_instobj { - struct nvkm_memory memory; + struct nvkm_instobj base; struct nvkm_mm_node *mn; struct gk20a_instmem *imem; /* CPU mapping */ u32 *vaddr; }; -#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory) +#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, base.memory) /* * Used for objects allocated using the DMA API @@ -148,7 +148,7 @@ gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj) list_del(&obj->vaddr_node); vunmap(obj->base.vaddr); obj->base.vaddr = NULL; - imem->vaddr_use -= nvkm_memory_size(&obj->base.memory); + imem->vaddr_use -= nvkm_memory_size(&obj->base.base.memory); nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use, imem->vaddr_max); } @@ -283,7 +283,7 @@ gk20a_instobj_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm, { struct gk20a_instobj *node = gk20a_instobj(memory); struct nvkm_vmm_map map = { - .memory = &node->memory, + .memory = &node->base.memory, .offset = offset, .mem = node->mn, }; @@ -391,8 +391,8 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align, return -ENOMEM; *_node = &node->base; - nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory); - node->base.memory.ptrs = &gk20a_instobj_ptrs; + nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.base.memory); + node->base.base.memory.ptrs = &gk20a_instobj_ptrs; node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT, &node->handle, GFP_KERNEL, @@ -438,8 +438,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align, *_node = &node->base; node->dma_addrs = (void *)(node->pages + npages); - nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.memory); - node->base.memory.ptrs = &gk20a_instobj_ptrs; + nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.base.memory); + node->base.base.memory.ptrs = &gk20a_instobj_ptrs; /* Allocate backing memory */ for (i = 0; i < npages; i++) { @@ -533,7 +533,7 @@ gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero, else ret = gk20a_instobj_ctor_dma(imem, size >> PAGE_SHIFT, align, &node); - *pmemory = node ? &node->memory : NULL; + *pmemory = node ? &node->base.memory : NULL; if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c index e34bc6076401..8379e72d77ab 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c @@ -31,7 +31,7 @@ tu102_vmm_flush(struct nvkm_vmm *vmm, int depth) type |= 0x00000001; /* PAGE_ALL */ if (atomic_read(&vmm->engref[NVKM_SUBDEV_BAR])) - type |= 0x00000004; /* HUB_ONLY */ + type |= 0x00000006; /* HUB_ONLY | ALL PDB (hack) */ mutex_lock(&vmm->mmu->mutex); |