summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/nouveau
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/nouveau')
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/disp.c12
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/core/event.h4
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h17
-rw-r--r--drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h51
-rw-r--r--drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_abi16.c19
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_abi16.h2
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c20
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.h5
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_display.c5
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drm.c36
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_drv.h19
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_exec.c68
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_exec.h6
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fence.c28
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_fence.h5
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_gem.c10
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_platform.c5
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_sched.c207
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_sched.h43
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_uvmm.c380
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_uvmm.h12
-rw-r--r--drivers/gpu/drm/nouveau/nv04_fence.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/core/event.c12
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c6
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c171
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c5
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c8
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c180
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c18
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c2
34 files changed, 807 insertions, 560 deletions
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 7840b6428afb..8d37a694b772 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -38,7 +38,9 @@
#include <drm/drm_atomic.h>
#include <drm/drm_atomic_helper.h>
#include <drm/drm_edid.h>
+#include <drm/drm_eld.h>
#include <drm/drm_fb_helper.h>
+#include <drm/drm_fixed.h>
#include <drm/drm_probe_helper.h>
#include <drm/drm_vblank.h>
@@ -945,7 +947,8 @@ nv50_msto_prepare(struct drm_atomic_state *state,
if (ret == 0) {
nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index,
payload->vc_start_slot, payload->time_slots,
- payload->pbn, payload->time_slots * mst_state->pbn_div);
+ payload->pbn,
+ payload->time_slots * dfixed_trunc(mst_state->pbn_div));
} else {
nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index, 0, 0, 0, 0);
}
@@ -982,15 +985,14 @@ nv50_msto_atomic_check(struct drm_encoder *encoder,
const int clock = crtc_state->adjusted_mode.clock;
asyh->or.bpc = connector->display_info.bpc;
- asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3,
- false);
+ asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3 << 4);
}
mst_state = drm_atomic_get_mst_topology_state(state, &mstm->mgr);
if (IS_ERR(mst_state))
return PTR_ERR(mst_state);
- if (!mst_state->pbn_div) {
+ if (!mst_state->pbn_div.full) {
struct nouveau_encoder *outp = mstc->mstm->outp;
mst_state->pbn_div = drm_dp_get_vc_payload_bw(&mstm->mgr,
@@ -2474,7 +2476,7 @@ nv50_disp_atomic_commit(struct drm_device *dev,
err_cleanup:
if (ret)
- drm_atomic_helper_cleanup_planes(dev, state);
+ drm_atomic_helper_unprepare_planes(dev, state);
done:
pm_runtime_put_autosuspend(dev->dev);
return ret;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h
index 82b267c11147..460459af272d 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h
@@ -14,7 +14,7 @@ struct nvkm_event {
int index_nr;
spinlock_t refs_lock;
- spinlock_t list_lock;
+ rwlock_t list_lock;
int *refs;
struct list_head ntfy;
@@ -38,7 +38,7 @@ nvkm_event_init(const struct nvkm_event_func *func, struct nvkm_subdev *subdev,
int types_nr, int index_nr, struct nvkm_event *event)
{
spin_lock_init(&event->refs_lock);
- spin_lock_init(&event->list_lock);
+ rwlock_init(&event->list_lock);
return __nvkm_event_init(func, subdev, types_nr, index_nr, event);
}
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
index 2fa0445d8928..d1437c08645f 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
@@ -187,7 +187,7 @@ struct nvkm_gsp {
void (*rpc_done)(struct nvkm_gsp *gsp, void *repv);
void *(*rm_ctrl_get)(struct nvkm_gsp_object *, u32 cmd, u32 argc);
- void *(*rm_ctrl_push)(struct nvkm_gsp_object *, void *argv, u32 repc);
+ int (*rm_ctrl_push)(struct nvkm_gsp_object *, void **argv, u32 repc);
void (*rm_ctrl_done)(struct nvkm_gsp_object *, void *repv);
void *(*rm_alloc_get)(struct nvkm_gsp_object *, u32 oclass, u32 argc);
@@ -265,7 +265,7 @@ nvkm_gsp_rm_ctrl_get(struct nvkm_gsp_object *object, u32 cmd, u32 argc)
return object->client->gsp->rm->rm_ctrl_get(object, cmd, argc);
}
-static inline void *
+static inline int
nvkm_gsp_rm_ctrl_push(struct nvkm_gsp_object *object, void *argv, u32 repc)
{
return object->client->gsp->rm->rm_ctrl_push(object, argv, repc);
@@ -275,21 +275,24 @@ static inline void *
nvkm_gsp_rm_ctrl_rd(struct nvkm_gsp_object *object, u32 cmd, u32 repc)
{
void *argv = nvkm_gsp_rm_ctrl_get(object, cmd, repc);
+ int ret;
if (IS_ERR(argv))
return argv;
- return nvkm_gsp_rm_ctrl_push(object, argv, repc);
+ ret = nvkm_gsp_rm_ctrl_push(object, &argv, repc);
+ if (ret)
+ return ERR_PTR(ret);
+ return argv;
}
static inline int
nvkm_gsp_rm_ctrl_wr(struct nvkm_gsp_object *object, void *argv)
{
- void *repv = nvkm_gsp_rm_ctrl_push(object, argv, 0);
-
- if (IS_ERR(repv))
- return PTR_ERR(repv);
+ int ret = nvkm_gsp_rm_ctrl_push(object, &argv, 0);
+ if (ret)
+ return ret;
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h
index 5a2f273d95c8..0e32e71e123f 100644
--- a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h
+++ b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/common/shared/msgq/inc/msgq/msgq_priv.h
@@ -26,6 +26,49 @@
* DEALINGS IN THE SOFTWARE.
*/
+/**
+ * msgqTxHeader -- TX queue data structure
+ * @version: the version of this structure, must be 0
+ * @size: the size of the entire queue, including this header
+ * @msgSize: the padded size of queue element, 16 is minimum
+ * @msgCount: the number of elements in this queue
+ * @writePtr: head index of this queue
+ * @flags: 1 = swap the RX pointers
+ * @rxHdrOff: offset of readPtr in this structure
+ * @entryOff: offset of beginning of queue (msgqRxHeader), relative to
+ * beginning of this structure
+ *
+ * The command queue is a queue of RPCs that are sent from the driver to the
+ * GSP. The status queue is a queue of messages/responses from GSP-RM to the
+ * driver. Although the driver allocates memory for both queues, the command
+ * queue is owned by the driver and the status queue is owned by GSP-RM. In
+ * addition, the headers of the two queues must not share the same 4K page.
+ *
+ * Each queue is prefixed with this data structure. The idea is that a queue
+ * and its header are written to only by their owner. That is, only the
+ * driver writes to the command queue and command queue header, and only the
+ * GSP writes to the status (receive) queue and its header.
+ *
+ * This is enforced by the concept of "swapping" the RX pointers. This is
+ * why the 'flags' field must be set to 1. 'rxHdrOff' is how the GSP knows
+ * where the where the tail pointer of its status queue.
+ *
+ * When the driver writes a new RPC to the command queue, it updates writePtr.
+ * When it reads a new message from the status queue, it updates readPtr. In
+ * this way, the GSP knows when a new command is in the queue (it polls
+ * writePtr) and it knows how much free space is in the status queue (it
+ * checks readPtr). The driver never cares about how much free space is in
+ * the status queue.
+ *
+ * As usual, producers write to the head pointer, and consumers read from the
+ * tail pointer. When head == tail, the queue is empty.
+ *
+ * So to summarize:
+ * command.writePtr = head of command queue
+ * command.readPtr = tail of status queue
+ * status.writePtr = head of status queue
+ * status.readPtr = tail of command queue
+ */
typedef struct
{
NvU32 version; // queue version
@@ -38,6 +81,14 @@ typedef struct
NvU32 entryOff; // Offset of entries from start of backing store.
} msgqTxHeader;
+/**
+ * msgqRxHeader - RX queue data structure
+ * @readPtr: tail index of the other queue
+ *
+ * Although this is a separate struct, it could easily be merged into
+ * msgqTxHeader. msgqTxHeader.rxHdrOff is simply the offset of readPtr
+ * from the beginning of msgqTxHeader.
+ */
typedef struct
{
NvU32 readPtr; // message id of last message read
diff --git a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h
index 754c6af42f30..10121218f4d3 100644
--- a/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h
+++ b/drivers/gpu/drm/nouveau/include/nvrm/535.113.01/nvidia/generated/g_os_nvoc.h
@@ -38,7 +38,7 @@ typedef struct PACKED_REGISTRY_TABLE
{
NvU32 size;
NvU32 numEntries;
- PACKED_REGISTRY_ENTRY entries[0];
+ PACKED_REGISTRY_ENTRY entries[] __counted_by(numEntries);
} PACKED_REGISTRY_TABLE;
#endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 2edd7bb13fae..a04156ca8390 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -127,21 +127,14 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16,
{
struct nouveau_abi16_ntfy *ntfy, *temp;
- /* When a client exits without waiting for it's queued up jobs to
- * finish it might happen that we fault the channel. This is due to
- * drm_file_free() calling drm_gem_release() before the postclose()
- * callback. Hence, we can't tear down this scheduler entity before
- * uvmm mappings are unmapped. Currently, we can't detect this case.
- *
- * However, this should be rare and harmless, since the channel isn't
- * needed anymore.
- */
- nouveau_sched_entity_fini(&chan->sched_entity);
+ /* Cancel all jobs from the entity's queue. */
+ drm_sched_entity_fini(&chan->sched.entity);
- /* wait for all activity to stop before cleaning up */
if (chan->chan)
nouveau_channel_idle(chan->chan);
+ nouveau_sched_fini(&chan->sched);
+
/* cleanup notifier state */
list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) {
nouveau_abi16_ntfy_fini(chan, ntfy);
@@ -344,8 +337,8 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
if (ret)
goto done;
- ret = nouveau_sched_entity_init(&chan->sched_entity, &drm->sched,
- drm->sched_wq);
+ ret = nouveau_sched_init(&chan->sched, drm, drm->sched_wq,
+ chan->chan->dma.ib_max);
if (ret)
goto done;
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h
index 9f538486c10e..1f5e243c0c75 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.h
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h
@@ -26,7 +26,7 @@ struct nouveau_abi16_chan {
struct nouveau_bo *ntfy;
struct nouveau_vma *ntfy_vma;
struct nvkm_mm heap;
- struct nouveau_sched_entity sched_entity;
+ struct nouveau_sched sched;
};
struct nouveau_abi16 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 0f3bd187ede6..00cc7d1abaa3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -148,10 +148,17 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
* If nouveau_bo_new() allocated this buffer, the GEM object was never
* initialized, so don't attempt to release it.
*/
- if (bo->base.dev)
+ if (bo->base.dev) {
+ /* Gem objects not being shared with other VMs get their
+ * dma_resv from a root GEM object.
+ */
+ if (nvbo->no_share)
+ drm_gem_object_put(nvbo->r_obj);
+
drm_gem_object_release(&bo->base);
- else
+ } else {
dma_resv_fini(&bo->base._resv);
+ }
kfree(nvbo);
}
@@ -318,8 +325,9 @@ nouveau_bo_alloc(struct nouveau_cli *cli, u64 *size, int *align, u32 domain,
(!vmm->page[i].host || vmm->page[i].shift > PAGE_SHIFT))
continue;
- if (pi < 0)
- pi = i;
+ /* pick the last one as it will be smallest. */
+ pi = i;
+
/* Stop once the buffer is larger than the current page size. */
if (*size >= 1ULL << vmm->page[i].shift)
break;
@@ -1054,17 +1062,18 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict,
{
struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
struct nouveau_bo *nvbo = nouveau_bo(bo);
+ struct drm_gem_object *obj = &bo->base;
struct ttm_resource *old_reg = bo->resource;
struct nouveau_drm_tile *new_tile = NULL;
int ret = 0;
-
if (new_reg->mem_type == TTM_PL_TT) {
ret = nouveau_ttm_tt_bind(bo->bdev, bo->ttm, new_reg);
if (ret)
return ret;
}
+ drm_gpuvm_bo_gem_evict(obj, evict);
nouveau_bo_move_ntfy(bo, new_reg);
ret = ttm_bo_wait_ctx(bo, ctx);
if (ret)
@@ -1129,6 +1138,7 @@ out:
out_ntfy:
if (ret) {
nouveau_bo_move_ntfy(bo, bo->resource);
+ drm_gpuvm_bo_gem_evict(obj, !evict);
}
return ret;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
index 07f671cf895e..70c551921a9e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -26,6 +26,11 @@ struct nouveau_bo {
struct list_head entry;
int pbbo_index;
bool validate_mapped;
+
+ /* Root GEM object we derive the dma_resv of in case this BO is not
+ * shared between VMs.
+ */
+ struct drm_gem_object *r_obj;
bool no_share;
/* GPU address space is independent of CPU word size */
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index d8c92521226d..f28f9a857458 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -726,6 +726,11 @@ nouveau_display_create(struct drm_device *dev)
if (nouveau_modeset != 2) {
ret = nvif_disp_ctor(&drm->client.device, "kmsDisp", 0, &disp->disp);
+ /* no display hw */
+ if (ret == -ENODEV) {
+ ret = 0;
+ goto disp_create_err;
+ }
if (!ret && (disp->disp.outp_mask || drm->vbios.dcb.entries)) {
nouveau_display_create_properties(dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 50589f982d1a..6f6c31a9937b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -190,6 +190,8 @@ nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence,
static void
nouveau_cli_fini(struct nouveau_cli *cli)
{
+ struct nouveau_uvmm *uvmm = nouveau_cli_uvmm_locked(cli);
+
/* All our channels are dead now, which means all the fences they
* own are signalled, and all callback functions have been called.
*
@@ -199,8 +201,9 @@ nouveau_cli_fini(struct nouveau_cli *cli)
WARN_ON(!list_empty(&cli->worker));
usif_client_fini(cli);
- nouveau_uvmm_fini(&cli->uvmm);
- nouveau_sched_entity_fini(&cli->sched_entity);
+ nouveau_sched_fini(&cli->sched);
+ if (uvmm)
+ nouveau_uvmm_fini(uvmm);
nouveau_vmm_fini(&cli->svm);
nouveau_vmm_fini(&cli->vmm);
nvif_mmu_dtor(&cli->mmu);
@@ -307,8 +310,17 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
cli->mem = &mems[ret];
- ret = nouveau_sched_entity_init(&cli->sched_entity, &drm->sched,
- drm->sched_wq);
+ /* Don't pass in the (shared) sched_wq in order to let
+ * nouveau_sched_init() create a dedicated one for VM_BIND jobs.
+ *
+ * This is required to ensure that for VM_BIND jobs free_job() work and
+ * run_job() work can always run concurrently and hence, free_job() work
+ * can never stall run_job() work. For EXEC jobs we don't have this
+ * requirement, since EXEC job's free_job() does not require to take any
+ * locks which indirectly or directly are held for allocations
+ * elsewhere.
+ */
+ ret = nouveau_sched_init(&cli->sched, drm, NULL, 1);
if (ret)
goto done;
@@ -579,13 +591,16 @@ nouveau_drm_device_init(struct drm_device *dev)
nvif_parent_ctor(&nouveau_parent, &drm->parent);
drm->master.base.object.parent = &drm->parent;
- ret = nouveau_sched_init(drm);
- if (ret)
+ drm->sched_wq = alloc_workqueue("nouveau_sched_wq_shared", 0,
+ WQ_MAX_ACTIVE);
+ if (!drm->sched_wq) {
+ ret = -ENOMEM;
goto fail_alloc;
+ }
ret = nouveau_cli_init(drm, "DRM-master", &drm->master);
if (ret)
- goto fail_sched;
+ goto fail_wq;
ret = nouveau_cli_init(drm, "DRM", &drm->client);
if (ret)
@@ -655,8 +670,8 @@ fail_ttm:
nouveau_cli_fini(&drm->client);
fail_master:
nouveau_cli_fini(&drm->master);
-fail_sched:
- nouveau_sched_fini(drm);
+fail_wq:
+ destroy_workqueue(drm->sched_wq);
fail_alloc:
nvif_parent_dtor(&drm->parent);
kfree(drm);
@@ -708,10 +723,9 @@ nouveau_drm_device_fini(struct drm_device *dev)
}
mutex_unlock(&drm->clients_lock);
- nouveau_sched_fini(drm);
-
nouveau_cli_fini(&drm->client);
nouveau_cli_fini(&drm->master);
+ destroy_workqueue(drm->sched_wq);
nvif_parent_dtor(&drm->parent);
mutex_destroy(&drm->clients_lock);
kfree(drm);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index e73a233c6572..8a6d94c8b163 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -93,9 +93,12 @@ struct nouveau_cli {
struct nvif_mmu mmu;
struct nouveau_vmm vmm;
struct nouveau_vmm svm;
- struct nouveau_uvmm uvmm;
+ struct {
+ struct nouveau_uvmm *ptr;
+ bool disabled;
+ } uvmm;
- struct nouveau_sched_entity sched_entity;
+ struct nouveau_sched sched;
const struct nvif_mclass *mem;
@@ -121,10 +124,7 @@ struct nouveau_cli_work {
static inline struct nouveau_uvmm *
nouveau_cli_uvmm(struct nouveau_cli *cli)
{
- if (!cli || !cli->uvmm.vmm.cli)
- return NULL;
-
- return &cli->uvmm;
+ return cli ? cli->uvmm.ptr : NULL;
}
static inline struct nouveau_uvmm *
@@ -258,6 +258,9 @@ struct nouveau_drm {
u64 context_base;
} *runl;
+ /* Workqueue used for channel schedulers. */
+ struct workqueue_struct *sched_wq;
+
/* context for accelerated drm-internal operations */
struct nouveau_channel *cechan;
struct nouveau_channel *channel;
@@ -298,10 +301,6 @@ struct nouveau_drm {
struct mutex lock;
bool component_registered;
} audio;
-
- struct drm_gpu_scheduler sched;
- struct workqueue_struct *sched_wq;
-
};
static inline struct nouveau_drm *
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c
index 9a5ef574744b..bc5d71b79ab2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_exec.c
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: MIT
-#include <drm/drm_exec.h>
-
#include "nouveau_drv.h"
#include "nouveau_gem.h"
#include "nouveau_mem.h"
@@ -86,14 +84,12 @@
*/
static int
-nouveau_exec_job_submit(struct nouveau_job *job)
+nouveau_exec_job_submit(struct nouveau_job *job,
+ struct drm_gpuvm_exec *vme)
{
struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
struct nouveau_cli *cli = job->cli;
struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli);
- struct drm_exec *exec = &job->exec;
- struct drm_gem_object *obj;
- unsigned long index;
int ret;
/* Create a new fence, but do not emit yet. */
@@ -102,52 +98,29 @@ nouveau_exec_job_submit(struct nouveau_job *job)
return ret;
nouveau_uvmm_lock(uvmm);
- drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES);
- drm_exec_until_all_locked(exec) {
- struct drm_gpuva *va;
-
- drm_gpuvm_for_each_va(va, &uvmm->base) {
- if (unlikely(va == &uvmm->base.kernel_alloc_node))
- continue;
-
- ret = drm_exec_prepare_obj(exec, va->gem.obj, 1);
- drm_exec_retry_on_contention(exec);
- if (ret)
- goto err_uvmm_unlock;
- }
+ ret = drm_gpuvm_exec_lock(vme);
+ if (ret) {
+ nouveau_uvmm_unlock(uvmm);
+ return ret;
}
nouveau_uvmm_unlock(uvmm);
- drm_exec_for_each_locked_object(exec, index, obj) {
- struct nouveau_bo *nvbo = nouveau_gem_object(obj);
-
- ret = nouveau_bo_validate(nvbo, true, false);
- if (ret)
- goto err_exec_fini;
+ ret = drm_gpuvm_exec_validate(vme);
+ if (ret) {
+ drm_gpuvm_exec_unlock(vme);
+ return ret;
}
return 0;
-
-err_uvmm_unlock:
- nouveau_uvmm_unlock(uvmm);
-err_exec_fini:
- drm_exec_fini(exec);
- return ret;
-
}
static void
-nouveau_exec_job_armed_submit(struct nouveau_job *job)
+nouveau_exec_job_armed_submit(struct nouveau_job *job,
+ struct drm_gpuvm_exec *vme)
{
- struct drm_exec *exec = &job->exec;
- struct drm_gem_object *obj;
- unsigned long index;
-
- drm_exec_for_each_locked_object(exec, index, obj)
- dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage);
-
- drm_exec_fini(exec);
+ drm_gpuvm_exec_resv_add_fence(vme, job->done_fence,
+ job->resv_usage, job->resv_usage);
+ drm_gpuvm_exec_unlock(vme);
}
static struct dma_fence *
@@ -192,6 +165,7 @@ nouveau_exec_job_free(struct nouveau_job *job)
{
struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job);
+ nouveau_job_done(job);
nouveau_job_free(job);
kfree(exec_job->fence);
@@ -211,8 +185,6 @@ nouveau_exec_job_timeout(struct nouveau_job *job)
NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n",
chan->chid);
- nouveau_sched_entity_fini(job->entity);
-
return DRM_GPU_SCHED_STAT_NOMINAL;
}
@@ -259,10 +231,12 @@ nouveau_exec_job_init(struct nouveau_exec_job **pjob,
}
}
+ args.file_priv = __args->file_priv;
job->chan = __args->chan;
- args.sched_entity = __args->sched_entity;
- args.file_priv = __args->file_priv;
+ args.sched = __args->sched;
+ /* Plus one to account for the HW fence. */
+ args.credits = job->push.count + 1;
args.in_sync.count = __args->in_sync.count;
args.in_sync.s = __args->in_sync.s;
@@ -415,7 +389,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev,
if (ret)
goto out;
- args.sched_entity = &chan16->sched_entity;
+ args.sched = &chan16->sched;
args.file_priv = file_priv;
args.chan = chan;
diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.h b/drivers/gpu/drm/nouveau/nouveau_exec.h
index 5488d337bcc0..9b3b151facfd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_exec.h
+++ b/drivers/gpu/drm/nouveau/nouveau_exec.h
@@ -3,16 +3,12 @@
#ifndef __NOUVEAU_EXEC_H__
#define __NOUVEAU_EXEC_H__
-#include <drm/drm_exec.h>
-
#include "nouveau_drv.h"
#include "nouveau_sched.h"
struct nouveau_exec_job_args {
struct drm_file *file_priv;
- struct nouveau_sched_entity *sched_entity;
-
- struct drm_exec exec;
+ struct nouveau_sched *sched;
struct nouveau_channel *chan;
struct {
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index ca762ea55413..5057d976fa57 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -62,7 +62,7 @@ nouveau_fence_signal(struct nouveau_fence *fence)
if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) {
struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
- if (!--fctx->notify_ref)
+ if (atomic_dec_and_test(&fctx->notify_ref))
drop = 1;
}
@@ -103,6 +103,7 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error)
void
nouveau_fence_context_del(struct nouveau_fence_chan *fctx)
{
+ cancel_work_sync(&fctx->allow_block_work);
nouveau_fence_context_kill(fctx, 0);
nvif_event_dtor(&fctx->event);
fctx->dead = 1;
@@ -167,6 +168,18 @@ nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc
return ret;
}
+static void
+nouveau_fence_work_allow_block(struct work_struct *work)
+{
+ struct nouveau_fence_chan *fctx = container_of(work, struct nouveau_fence_chan,
+ allow_block_work);
+
+ if (atomic_read(&fctx->notify_ref) == 0)
+ nvif_event_block(&fctx->event);
+ else
+ nvif_event_allow(&fctx->event);
+}
+
void
nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_chan *fctx)
{
@@ -178,6 +191,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
} args;
int ret;
+ INIT_WORK(&fctx->allow_block_work, nouveau_fence_work_allow_block);
INIT_LIST_HEAD(&fctx->flip);
INIT_LIST_HEAD(&fctx->pending);
spin_lock_init(&fctx->lock);
@@ -521,15 +535,19 @@ static bool nouveau_fence_enable_signaling(struct dma_fence *f)
struct nouveau_fence *fence = from_fence(f);
struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
bool ret;
+ bool do_work;
- if (!fctx->notify_ref++)
- nvif_event_allow(&fctx->event);
+ if (atomic_inc_return(&fctx->notify_ref) == 0)
+ do_work = true;
ret = nouveau_fence_no_signaling(f);
if (ret)
set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags);
- else if (!--fctx->notify_ref)
- nvif_event_block(&fctx->event);
+ else if (atomic_dec_and_test(&fctx->notify_ref))
+ do_work = true;
+
+ if (do_work)
+ schedule_work(&fctx->allow_block_work);
return ret;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 64d33ae7f356..28f5cf013b89 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -3,6 +3,7 @@
#define __NOUVEAU_FENCE_H__
#include <linux/dma-fence.h>
+#include <linux/workqueue.h>
#include <nvif/event.h>
struct nouveau_drm;
@@ -45,7 +46,9 @@ struct nouveau_fence_chan {
char name[32];
struct nvif_event event;
- int notify_ref, dead, killed;
+ struct work_struct allow_block_work;
+ atomic_t notify_ref;
+ int dead, killed;
};
struct nouveau_fence_priv {
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index a0d303e5ce3d..49c2bcbef129 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -111,7 +111,8 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv)
if (vmm->vmm.object.oclass < NVIF_CLASS_VMM_NV50)
return 0;
- if (nvbo->no_share && uvmm && &uvmm->resv != nvbo->bo.base.resv)
+ if (nvbo->no_share && uvmm &&
+ drm_gpuvm_resv(&uvmm->base) != nvbo->bo.base.resv)
return -EPERM;
ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL);
@@ -245,7 +246,7 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain,
if (unlikely(!uvmm))
return -EINVAL;
- resv = &uvmm->resv;
+ resv = drm_gpuvm_resv(&uvmm->base);
}
if (!(domain & (NOUVEAU_GEM_DOMAIN_VRAM | NOUVEAU_GEM_DOMAIN_GART)))
@@ -288,6 +289,11 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain,
if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
nvbo->valid_domains &= domain;
+ if (nvbo->no_share) {
+ nvbo->r_obj = drm_gpuvm_resv_obj(&uvmm->base);
+ drm_gem_object_get(nvbo->r_obj);
+ }
+
*pnvbo = nvbo;
return 0;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c
index 23cd43a7fd19..bf2dc7567ea4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_platform.c
+++ b/drivers/gpu/drm/nouveau/nouveau_platform.c
@@ -43,11 +43,10 @@ static int nouveau_platform_probe(struct platform_device *pdev)
return 0;
}
-static int nouveau_platform_remove(struct platform_device *pdev)
+static void nouveau_platform_remove(struct platform_device *pdev)
{
struct drm_device *dev = platform_get_drvdata(pdev);
nouveau_drm_device_remove(dev);
- return 0;
}
#if IS_ENABLED(CONFIG_OF)
@@ -93,5 +92,5 @@ struct platform_driver nouveau_platform_driver = {
.of_match_table = of_match_ptr(nouveau_platform_match),
},
.probe = nouveau_platform_probe,
- .remove = nouveau_platform_remove,
+ .remove_new = nouveau_platform_remove,
};
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c
index 7c376c4ccdcf..dd98f6910f9c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.c
@@ -12,30 +12,28 @@
#include "nouveau_abi16.h"
#include "nouveau_sched.h"
-/* FIXME
- *
- * We want to make sure that jobs currently executing can't be deferred by
- * other jobs competing for the hardware. Otherwise we might end up with job
- * timeouts just because of too many clients submitting too many jobs. We don't
- * want jobs to time out because of system load, but because of the job being
- * too bulky.
- *
- * For now allow for up to 16 concurrent jobs in flight until we know how many
- * rings the hardware can process in parallel.
- */
-#define NOUVEAU_SCHED_HW_SUBMISSIONS 16
#define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000
+/* Starts at 0, since the DRM scheduler interprets those parameters as (initial)
+ * index to the run-queue array.
+ */
+enum nouveau_sched_priority {
+ NOUVEAU_SCHED_PRIORITY_SINGLE = DRM_SCHED_PRIORITY_KERNEL,
+ NOUVEAU_SCHED_PRIORITY_COUNT,
+};
+
int
nouveau_job_init(struct nouveau_job *job,
struct nouveau_job_args *args)
{
- struct nouveau_sched_entity *entity = args->sched_entity;
+ struct nouveau_sched *sched = args->sched;
int ret;
+ INIT_LIST_HEAD(&job->entry);
+
job->file_priv = args->file_priv;
job->cli = nouveau_cli(args->file_priv);
- job->entity = entity;
+ job->sched = sched;
job->sync = args->sync;
job->resv_usage = args->resv_usage;
@@ -86,10 +84,10 @@ nouveau_job_init(struct nouveau_job *job,
ret = -ENOMEM;
goto err_free_objs;
}
-
}
- ret = drm_sched_job_init(&job->base, &entity->base, NULL);
+ ret = drm_sched_job_init(&job->base, &sched->entity,
+ args->credits, NULL);
if (ret)
goto err_free_chains;
@@ -109,6 +107,27 @@ return ret;
}
void
+nouveau_job_fini(struct nouveau_job *job)
+{
+ dma_fence_put(job->done_fence);
+ drm_sched_job_cleanup(&job->base);
+
+ job->ops->free(job);
+}
+
+void
+nouveau_job_done(struct nouveau_job *job)
+{
+ struct nouveau_sched *sched = job->sched;
+
+ spin_lock(&sched->job.list.lock);
+ list_del(&job->entry);
+ spin_unlock(&sched->job.list.lock);
+
+ wake_up(&sched->job.wq);
+}
+
+void
nouveau_job_free(struct nouveau_job *job)
{
kfree(job->in_sync.data);
@@ -117,13 +136,6 @@ nouveau_job_free(struct nouveau_job *job)
kfree(job->out_sync.chains);
}
-void nouveau_job_fini(struct nouveau_job *job)
-{
- dma_fence_put(job->done_fence);
- drm_sched_job_cleanup(&job->base);
- job->ops->free(job);
-}
-
static int
sync_find_fence(struct nouveau_job *job,
struct drm_nouveau_sync *sync,
@@ -261,8 +273,13 @@ nouveau_job_fence_attach(struct nouveau_job *job)
int
nouveau_job_submit(struct nouveau_job *job)
{
- struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity);
+ struct nouveau_sched *sched = job->sched;
struct dma_fence *done_fence = NULL;
+ struct drm_gpuvm_exec vm_exec = {
+ .vm = &nouveau_cli_uvmm(job->cli)->base,
+ .flags = DRM_EXEC_IGNORE_DUPLICATES,
+ .num_fences = 1,
+ };
int ret;
ret = nouveau_job_add_deps(job);
@@ -276,46 +293,29 @@ nouveau_job_submit(struct nouveau_job *job)
/* Make sure the job appears on the sched_entity's queue in the same
* order as it was submitted.
*/
- mutex_lock(&entity->mutex);
+ mutex_lock(&sched->mutex);
/* Guarantee we won't fail after the submit() callback returned
* successfully.
*/
if (job->ops->submit) {
- ret = job->ops->submit(job);
+ ret = job->ops->submit(job, &vm_exec);
if (ret)
goto err_cleanup;
}
+ /* Submit was successful; add the job to the schedulers job list. */
+ spin_lock(&sched->job.list.lock);
+ list_add(&job->entry, &sched->job.list.head);
+ spin_unlock(&sched->job.list.lock);
+
drm_sched_job_arm(&job->base);
job->done_fence = dma_fence_get(&job->base.s_fence->finished);
if (job->sync)
done_fence = dma_fence_get(job->done_fence);
- /* If a sched job depends on a dma-fence from a job from the same GPU
- * scheduler instance, but a different scheduler entity, the GPU
- * scheduler does only wait for the particular job to be scheduled,
- * rather than for the job to fully complete. This is due to the GPU
- * scheduler assuming that there is a scheduler instance per ring.
- * However, the current implementation, in order to avoid arbitrary
- * amounts of kthreads, has a single scheduler instance while scheduler
- * entities represent rings.
- *
- * As a workaround, set the DRM_SCHED_FENCE_DONT_PIPELINE for all
- * out-fences in order to force the scheduler to wait for full job
- * completion for dependent jobs from different entities and same
- * scheduler instance.
- *
- * There is some work in progress [1] to address the issues of firmware
- * schedulers; once it is in-tree the scheduler topology in Nouveau
- * should be re-worked accordingly.
- *
- * [1] https://lore.kernel.org/dri-devel/20230801205103.627779-1-matthew.brost@intel.com/
- */
- set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags);
-
if (job->ops->armed_submit)
- job->ops->armed_submit(job);
+ job->ops->armed_submit(job, &vm_exec);
nouveau_job_fence_attach(job);
@@ -326,7 +326,7 @@ nouveau_job_submit(struct nouveau_job *job)
drm_sched_entity_push_job(&job->base);
- mutex_unlock(&entity->mutex);
+ mutex_unlock(&sched->mutex);
if (done_fence) {
dma_fence_wait(done_fence, true);
@@ -336,20 +336,13 @@ nouveau_job_submit(struct nouveau_job *job)
return 0;
err_cleanup:
- mutex_unlock(&entity->mutex);
+ mutex_unlock(&sched->mutex);
nouveau_job_fence_attach_cleanup(job);
err:
job->state = NOUVEAU_JOB_SUBMIT_FAILED;
return ret;
}
-bool
-nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity,
- struct work_struct *work)
-{
- return queue_work(entity->sched_wq, work);
-}
-
static struct dma_fence *
nouveau_job_run(struct nouveau_job *job)
{
@@ -399,50 +392,82 @@ nouveau_sched_free_job(struct drm_sched_job *sched_job)
nouveau_job_fini(job);
}
-int nouveau_sched_entity_init(struct nouveau_sched_entity *entity,
- struct drm_gpu_scheduler *sched,
- struct workqueue_struct *sched_wq)
-{
- mutex_init(&entity->mutex);
- spin_lock_init(&entity->job.list.lock);
- INIT_LIST_HEAD(&entity->job.list.head);
- init_waitqueue_head(&entity->job.wq);
-
- entity->sched_wq = sched_wq;
- return drm_sched_entity_init(&entity->base,
- DRM_SCHED_PRIORITY_NORMAL,
- &sched, 1, NULL);
-}
-
-void
-nouveau_sched_entity_fini(struct nouveau_sched_entity *entity)
-{
- drm_sched_entity_destroy(&entity->base);
-}
-
static const struct drm_sched_backend_ops nouveau_sched_ops = {
.run_job = nouveau_sched_run_job,
.timedout_job = nouveau_sched_timedout_job,
.free_job = nouveau_sched_free_job,
};
-int nouveau_sched_init(struct nouveau_drm *drm)
+int
+nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
+ struct workqueue_struct *wq, u32 credit_limit)
{
- struct drm_gpu_scheduler *sched = &drm->sched;
+ struct drm_gpu_scheduler *drm_sched = &sched->base;
+ struct drm_sched_entity *entity = &sched->entity;
long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS);
+ int ret;
- drm->sched_wq = create_singlethread_workqueue("nouveau_sched_wq");
- if (!drm->sched_wq)
- return -ENOMEM;
+ if (!wq) {
+ wq = alloc_workqueue("nouveau_sched_wq_%d", 0, WQ_MAX_ACTIVE,
+ current->pid);
+ if (!wq)
+ return -ENOMEM;
+
+ sched->wq = wq;
+ }
- return drm_sched_init(sched, &nouveau_sched_ops,
- DRM_SCHED_PRIORITY_COUNT,
- NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit,
- NULL, NULL, "nouveau_sched", drm->dev->dev);
+ ret = drm_sched_init(drm_sched, &nouveau_sched_ops, wq,
+ NOUVEAU_SCHED_PRIORITY_COUNT,
+ credit_limit, 0, job_hang_limit,
+ NULL, NULL, "nouveau_sched", drm->dev->dev);
+ if (ret)
+ goto fail_wq;
+
+ /* Using DRM_SCHED_PRIORITY_KERNEL, since that's what we're required to use
+ * when we want to have a single run-queue only.
+ *
+ * It's not documented, but one will find out when trying to use any
+ * other priority running into faults, because the scheduler uses the
+ * priority as array index.
+ *
+ * Can't use NOUVEAU_SCHED_PRIORITY_SINGLE either, because it's not
+ * matching the enum type used in drm_sched_entity_init().
+ */
+ ret = drm_sched_entity_init(entity, DRM_SCHED_PRIORITY_KERNEL,
+ &drm_sched, 1, NULL);
+ if (ret)
+ goto fail_sched;
+
+ mutex_init(&sched->mutex);
+ spin_lock_init(&sched->job.list.lock);
+ INIT_LIST_HEAD(&sched->job.list.head);
+ init_waitqueue_head(&sched->job.wq);
+
+ return 0;
+
+fail_sched:
+ drm_sched_fini(drm_sched);
+fail_wq:
+ if (sched->wq)
+ destroy_workqueue(sched->wq);
+ return ret;
}
-void nouveau_sched_fini(struct nouveau_drm *drm)
+void
+nouveau_sched_fini(struct nouveau_sched *sched)
{
- destroy_workqueue(drm->sched_wq);
- drm_sched_fini(&drm->sched);
+ struct drm_gpu_scheduler *drm_sched = &sched->base;
+ struct drm_sched_entity *entity = &sched->entity;
+
+ rmb(); /* for list_empty to work without lock */
+ wait_event(sched->job.wq, list_empty(&sched->job.list.head));
+
+ drm_sched_entity_fini(entity);
+ drm_sched_fini(drm_sched);
+
+ /* Destroy workqueue after scheduler tear down, otherwise it might still
+ * be in use.
+ */
+ if (sched->wq)
+ destroy_workqueue(sched->wq);
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h
index 27ac19792597..a6528f5981e6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sched.h
+++ b/drivers/gpu/drm/nouveau/nouveau_sched.h
@@ -5,7 +5,7 @@
#include <linux/types.h>
-#include <drm/drm_exec.h>
+#include <drm/drm_gpuvm.h>
#include <drm/gpu_scheduler.h>
#include "nouveau_drv.h"
@@ -26,7 +26,8 @@ enum nouveau_job_state {
struct nouveau_job_args {
struct drm_file *file_priv;
- struct nouveau_sched_entity *sched_entity;
+ struct nouveau_sched *sched;
+ u32 credits;
enum dma_resv_usage resv_usage;
bool sync;
@@ -49,12 +50,12 @@ struct nouveau_job {
enum nouveau_job_state state;
- struct nouveau_sched_entity *entity;
+ struct nouveau_sched *sched;
+ struct list_head entry;
struct drm_file *file_priv;
struct nouveau_cli *cli;
- struct drm_exec exec;
enum dma_resv_usage resv_usage;
struct dma_fence *done_fence;
@@ -76,8 +77,8 @@ struct nouveau_job {
/* If .submit() returns without any error, it is guaranteed that
* armed_submit() is called.
*/
- int (*submit)(struct nouveau_job *);
- void (*armed_submit)(struct nouveau_job *);
+ int (*submit)(struct nouveau_job *, struct drm_gpuvm_exec *);
+ void (*armed_submit)(struct nouveau_job *, struct drm_gpuvm_exec *);
struct dma_fence *(*run)(struct nouveau_job *);
void (*free)(struct nouveau_job *);
enum drm_gpu_sched_stat (*timeout)(struct nouveau_job *);
@@ -90,20 +91,17 @@ int nouveau_job_ucopy_syncs(struct nouveau_job_args *args,
int nouveau_job_init(struct nouveau_job *job,
struct nouveau_job_args *args);
-void nouveau_job_free(struct nouveau_job *job);
-
-int nouveau_job_submit(struct nouveau_job *job);
void nouveau_job_fini(struct nouveau_job *job);
+int nouveau_job_submit(struct nouveau_job *job);
+void nouveau_job_done(struct nouveau_job *job);
+void nouveau_job_free(struct nouveau_job *job);
-#define to_nouveau_sched_entity(entity) \
- container_of((entity), struct nouveau_sched_entity, base)
-
-struct nouveau_sched_entity {
- struct drm_sched_entity base;
+struct nouveau_sched {
+ struct drm_gpu_scheduler base;
+ struct drm_sched_entity entity;
+ struct workqueue_struct *wq;
struct mutex mutex;
- struct workqueue_struct *sched_wq;
-
struct {
struct {
struct list_head head;
@@ -113,15 +111,8 @@ struct nouveau_sched_entity {
} job;
};
-int nouveau_sched_entity_init(struct nouveau_sched_entity *entity,
- struct drm_gpu_scheduler *sched,
- struct workqueue_struct *sched_wq);
-void nouveau_sched_entity_fini(struct nouveau_sched_entity *entity);
-
-bool nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity,
- struct work_struct *work);
-
-int nouveau_sched_init(struct nouveau_drm *drm);
-void nouveau_sched_fini(struct nouveau_drm *drm);
+int nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm,
+ struct workqueue_struct *wq, u32 credit_limit);
+void nouveau_sched_fini(struct nouveau_sched *sched);
#endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 5cf892c50f43..4f223c972c6a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -62,6 +62,8 @@ struct bind_job_op {
enum vm_bind_op op;
u32 flags;
+ struct drm_gpuvm_bo *vm_bo;
+
struct {
u64 addr;
u64 range;
@@ -436,8 +438,9 @@ nouveau_uvma_region_complete(struct nouveau_uvma_region *reg)
static void
op_map_prepare_unwind(struct nouveau_uvma *uvma)
{
+ struct drm_gpuva *va = &uvma->va;
nouveau_uvma_gem_put(uvma);
- drm_gpuva_remove(&uvma->va);
+ drm_gpuva_remove(va);
nouveau_uvma_free(uvma);
}
@@ -466,6 +469,7 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm,
break;
case DRM_GPUVA_OP_REMAP: {
struct drm_gpuva_op_remap *r = &op->remap;
+ struct drm_gpuva *va = r->unmap->va;
if (r->next)
op_map_prepare_unwind(new->next);
@@ -473,7 +477,7 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm,
if (r->prev)
op_map_prepare_unwind(new->prev);
- op_unmap_prepare_unwind(r->unmap->va);
+ op_unmap_prepare_unwind(va);
break;
}
case DRM_GPUVA_OP_UNMAP:
@@ -604,6 +608,9 @@ op_unmap_prepare(struct drm_gpuva_op_unmap *u)
drm_gpuva_unmap(u);
}
+/*
+ * Note: @args should not be NULL when calling for a map operation.
+ */
static int
nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm,
struct nouveau_uvma_prealloc *new,
@@ -624,7 +631,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm,
if (ret)
goto unwind;
- if (args && vmm_get_range) {
+ if (vmm_get_range) {
ret = nouveau_uvmm_vmm_get(uvmm, vmm_get_start,
vmm_get_range);
if (ret) {
@@ -632,6 +639,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm,
goto unwind;
}
}
+
break;
}
case DRM_GPUVA_OP_REMAP: {
@@ -929,25 +937,13 @@ nouveau_uvmm_sm_unmap_cleanup(struct nouveau_uvmm *uvmm,
static int
nouveau_uvmm_validate_range(struct nouveau_uvmm *uvmm, u64 addr, u64 range)
{
- u64 end = addr + range;
- u64 kernel_managed_end = uvmm->kernel_managed_addr +
- uvmm->kernel_managed_size;
-
if (addr & ~PAGE_MASK)
return -EINVAL;
if (range & ~PAGE_MASK)
return -EINVAL;
- if (end <= addr)
- return -EINVAL;
-
- if (addr < NOUVEAU_VA_SPACE_START ||
- end > NOUVEAU_VA_SPACE_END)
- return -EINVAL;
-
- if (addr < kernel_managed_end &&
- end > uvmm->kernel_managed_addr)
+ if (!drm_gpuvm_range_valid(&uvmm->base, addr, range))
return -EINVAL;
return 0;
@@ -970,6 +966,12 @@ nouveau_uvmm_bind_job_free(struct kref *kref)
{
struct nouveau_uvmm_bind_job *job =
container_of(kref, struct nouveau_uvmm_bind_job, kref);
+ struct bind_job_op *op, *next;
+
+ list_for_each_op_safe(op, next, &job->ops) {
+ list_del(&op->entry);
+ kfree(op);
+ }
nouveau_job_free(&job->base);
kfree(job);
@@ -1011,14 +1013,16 @@ bind_validate_op(struct nouveau_job *job,
static void
bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range)
{
- struct nouveau_uvmm_bind_job *bind_job;
- struct nouveau_sched_entity *entity = job->entity;
+ struct nouveau_sched *sched = job->sched;
+ struct nouveau_job *__job;
struct bind_job_op *op;
u64 end = addr + range;
again:
- spin_lock(&entity->job.list.lock);
- list_for_each_entry(bind_job, &entity->job.list.head, entry) {
+ spin_lock(&sched->job.list.lock);
+ list_for_each_entry(__job, &sched->job.list.head, entry) {
+ struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(__job);
+
list_for_each_op(op, &bind_job->ops) {
if (op->op == OP_UNMAP) {
u64 op_addr = op->va.addr;
@@ -1026,7 +1030,7 @@ again:
if (!(end <= op_addr || addr >= op_end)) {
nouveau_uvmm_bind_job_get(bind_job);
- spin_unlock(&entity->job.list.lock);
+ spin_unlock(&sched->job.list.lock);
wait_for_completion(&bind_job->complete);
nouveau_uvmm_bind_job_put(bind_job);
goto again;
@@ -1034,7 +1038,7 @@ again:
}
}
}
- spin_unlock(&entity->job.list.lock);
+ spin_unlock(&sched->job.list.lock);
}
static int
@@ -1113,22 +1117,28 @@ bind_validate_region(struct nouveau_job *job)
}
static void
-bind_link_gpuvas(struct drm_gpuva_ops *ops, struct nouveau_uvma_prealloc *new)
+bind_link_gpuvas(struct bind_job_op *bop)
{
+ struct nouveau_uvma_prealloc *new = &bop->new;
+ struct drm_gpuvm_bo *vm_bo = bop->vm_bo;
+ struct drm_gpuva_ops *ops = bop->ops;
struct drm_gpuva_op *op;
drm_gpuva_for_each_op(op, ops) {
switch (op->op) {
case DRM_GPUVA_OP_MAP:
- drm_gpuva_link(&new->map->va);
+ drm_gpuva_link(&new->map->va, vm_bo);
break;
- case DRM_GPUVA_OP_REMAP:
+ case DRM_GPUVA_OP_REMAP: {
+ struct drm_gpuva *va = op->remap.unmap->va;
+
if (op->remap.prev)
- drm_gpuva_link(&new->prev->va);
+ drm_gpuva_link(&new->prev->va, va->vm_bo);
if (op->remap.next)
- drm_gpuva_link(&new->next->va);
- drm_gpuva_unlink(op->remap.unmap->va);
+ drm_gpuva_link(&new->next->va, va->vm_bo);
+ drm_gpuva_unlink(va);
break;
+ }
case DRM_GPUVA_OP_UNMAP:
drm_gpuva_unlink(op->unmap.va);
break;
@@ -1139,21 +1149,70 @@ bind_link_gpuvas(struct drm_gpuva_ops *ops, struct nouveau_uvma_prealloc *new)
}
static int
-nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
+bind_lock_validate(struct nouveau_job *job, struct drm_exec *exec,
+ unsigned int num_fences)
+{
+ struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
+ struct bind_job_op *op;
+ int ret;
+
+ list_for_each_op(op, &bind_job->ops) {
+ struct drm_gpuva_op *va_op;
+
+ if (!op->ops)
+ continue;
+
+ drm_gpuva_for_each_op(va_op, op->ops) {
+ struct drm_gem_object *obj = op_gem_obj(va_op);
+
+ if (unlikely(!obj))
+ continue;
+
+ ret = drm_exec_prepare_obj(exec, obj, num_fences);
+ if (ret)
+ return ret;
+
+ /* Don't validate GEMs backing mappings we're about to
+ * unmap, it's not worth the effort.
+ */
+ if (va_op->op == DRM_GPUVA_OP_UNMAP)
+ continue;
+
+ ret = nouveau_bo_validate(nouveau_gem_object(obj),
+ true, false);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int
+nouveau_uvmm_bind_job_submit(struct nouveau_job *job,
+ struct drm_gpuvm_exec *vme)
{
struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli);
struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
- struct nouveau_sched_entity *entity = job->entity;
- struct drm_exec *exec = &job->exec;
+ struct drm_exec *exec = &vme->exec;
struct bind_job_op *op;
int ret;
list_for_each_op(op, &bind_job->ops) {
if (op->op == OP_MAP) {
- op->gem.obj = drm_gem_object_lookup(job->file_priv,
- op->gem.handle);
- if (!op->gem.obj)
+ struct drm_gem_object *obj = op->gem.obj =
+ drm_gem_object_lookup(job->file_priv,
+ op->gem.handle);
+ if (!obj)
return -ENOENT;
+
+ dma_resv_lock(obj->resv, NULL);
+ op->vm_bo = drm_gpuvm_bo_obtain(&uvmm->base, obj);
+ dma_resv_unlock(obj->resv);
+ if (IS_ERR(op->vm_bo))
+ return PTR_ERR(op->vm_bo);
+
+ drm_gpuvm_bo_extobj_add(op->vm_bo);
}
ret = bind_validate_op(job, op);
@@ -1176,6 +1235,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
* unwind all GPU VA space changes on failure.
*/
nouveau_uvmm_lock(uvmm);
+
list_for_each_op(op, &bind_job->ops) {
switch (op->op) {
case OP_MAP_SPARSE:
@@ -1287,55 +1347,13 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
}
}
- drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES);
+ drm_exec_init(exec, vme->flags, 0);
drm_exec_until_all_locked(exec) {
- list_for_each_op(op, &bind_job->ops) {
- struct drm_gpuva_op *va_op;
-
- if (IS_ERR_OR_NULL(op->ops))
- continue;
-
- drm_gpuva_for_each_op(va_op, op->ops) {
- struct drm_gem_object *obj = op_gem_obj(va_op);
-
- if (unlikely(!obj))
- continue;
-
- ret = drm_exec_prepare_obj(exec, obj, 1);
- drm_exec_retry_on_contention(exec);
- if (ret) {
- op = list_last_op(&bind_job->ops);
- goto unwind;
- }
- }
- }
- }
-
- list_for_each_op(op, &bind_job->ops) {
- struct drm_gpuva_op *va_op;
-
- if (IS_ERR_OR_NULL(op->ops))
- continue;
-
- drm_gpuva_for_each_op(va_op, op->ops) {
- struct drm_gem_object *obj = op_gem_obj(va_op);
-
- if (unlikely(!obj))
- continue;
-
- /* Don't validate GEMs backing mappings we're about to
- * unmap, it's not worth the effort.
- */
- if (unlikely(va_op->op == DRM_GPUVA_OP_UNMAP))
- continue;
-
- ret = nouveau_bo_validate(nouveau_gem_object(obj),
- true, false);
- if (ret) {
- op = list_last_op(&bind_job->ops);
- goto unwind;
- }
+ ret = bind_lock_validate(job, exec, vme->num_fences);
+ drm_exec_retry_on_contention(exec);
+ if (ret) {
+ op = list_last_op(&bind_job->ops);
+ goto unwind;
}
}
@@ -1364,7 +1382,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
case OP_UNMAP_SPARSE:
case OP_MAP:
case OP_UNMAP:
- bind_link_gpuvas(op->ops, &op->new);
+ bind_link_gpuvas(op);
break;
default:
break;
@@ -1372,10 +1390,6 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job)
}
nouveau_uvmm_unlock(uvmm);
- spin_lock(&entity->job.list.lock);
- list_add(&bind_job->entry, &entity->job.list.head);
- spin_unlock(&entity->job.list.lock);
-
return 0;
unwind_continue:
@@ -1410,21 +1424,17 @@ unwind:
}
nouveau_uvmm_unlock(uvmm);
- drm_exec_fini(exec);
+ drm_gpuvm_exec_unlock(vme);
return ret;
}
static void
-nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job)
+nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job,
+ struct drm_gpuvm_exec *vme)
{
- struct drm_exec *exec = &job->exec;
- struct drm_gem_object *obj;
- unsigned long index;
-
- drm_exec_for_each_locked_object(exec, index, obj)
- dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage);
-
- drm_exec_fini(exec);
+ drm_gpuvm_exec_resv_add_fence(vme, job->done_fence,
+ job->resv_usage, job->resv_usage);
+ drm_gpuvm_exec_unlock(vme);
}
static struct dma_fence *
@@ -1462,14 +1472,11 @@ out:
}
static void
-nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work)
+nouveau_uvmm_bind_job_cleanup(struct nouveau_job *job)
{
- struct nouveau_uvmm_bind_job *bind_job =
- container_of(work, struct nouveau_uvmm_bind_job, work);
- struct nouveau_job *job = &bind_job->base;
+ struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli);
- struct nouveau_sched_entity *entity = job->entity;
- struct bind_job_op *op, *next;
+ struct bind_job_op *op;
list_for_each_op(op, &bind_job->ops) {
struct drm_gem_object *obj = op->gem.obj;
@@ -1511,42 +1518,27 @@ nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work)
if (!IS_ERR_OR_NULL(op->ops))
drm_gpuva_ops_free(&uvmm->base, op->ops);
+ if (!IS_ERR_OR_NULL(op->vm_bo)) {
+ dma_resv_lock(obj->resv, NULL);
+ drm_gpuvm_bo_put(op->vm_bo);
+ dma_resv_unlock(obj->resv);
+ }
+
if (obj)
drm_gem_object_put(obj);
}
- spin_lock(&entity->job.list.lock);
- list_del(&bind_job->entry);
- spin_unlock(&entity->job.list.lock);
-
+ nouveau_job_done(job);
complete_all(&bind_job->complete);
- wake_up(&entity->job.wq);
-
- /* Remove and free ops after removing the bind job from the job list to
- * avoid races against bind_validate_map_sparse().
- */
- list_for_each_op_safe(op, next, &bind_job->ops) {
- list_del(&op->entry);
- kfree(op);
- }
nouveau_uvmm_bind_job_put(bind_job);
}
-static void
-nouveau_uvmm_bind_job_free_qwork(struct nouveau_job *job)
-{
- struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job);
- struct nouveau_sched_entity *entity = job->entity;
-
- nouveau_sched_entity_qwork(entity, &bind_job->work);
-}
-
static struct nouveau_job_ops nouveau_bind_job_ops = {
.submit = nouveau_uvmm_bind_job_submit,
.armed_submit = nouveau_uvmm_bind_job_armed_submit,
.run = nouveau_uvmm_bind_job_run,
- .free = nouveau_uvmm_bind_job_free_qwork,
+ .free = nouveau_uvmm_bind_job_cleanup,
};
static int
@@ -1607,7 +1599,6 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob,
return ret;
INIT_LIST_HEAD(&job->ops);
- INIT_LIST_HEAD(&job->entry);
for (i = 0; i < __args->op.count; i++) {
ret = bind_job_op_from_uop(&op, &__args->op.s[i]);
@@ -1618,11 +1609,12 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob,
}
init_completion(&job->complete);
- INIT_WORK(&job->work, nouveau_uvmm_bind_job_free_work_fn);
- args.sched_entity = __args->sched_entity;
args.file_priv = __args->file_priv;
+ args.sched = __args->sched;
+ args.credits = 1;
+
args.in_sync.count = __args->in_sync.count;
args.in_sync.s = __args->in_sync.s;
@@ -1648,18 +1640,6 @@ err_free:
return ret;
}
-int
-nouveau_uvmm_ioctl_vm_init(struct drm_device *dev,
- void *data,
- struct drm_file *file_priv)
-{
- struct nouveau_cli *cli = nouveau_cli(file_priv);
- struct drm_nouveau_vm_init *init = data;
-
- return nouveau_uvmm_init(&cli->uvmm, cli, init->kernel_managed_addr,
- init->kernel_managed_size);
-}
-
static int
nouveau_uvmm_vm_bind(struct nouveau_uvmm_bind_job_args *args)
{
@@ -1760,7 +1740,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev,
if (ret)
return ret;
- args.sched_entity = &cli->sched_entity;
+ args.sched = &cli->sched;
args.file_priv = file_priv;
ret = nouveau_uvmm_vm_bind(&args);
@@ -1776,15 +1756,18 @@ void
nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbo, struct nouveau_mem *mem)
{
struct drm_gem_object *obj = &nvbo->bo.base;
+ struct drm_gpuvm_bo *vm_bo;
struct drm_gpuva *va;
dma_resv_assert_held(obj->resv);
- drm_gem_for_each_gpuva(va, obj) {
- struct nouveau_uvma *uvma = uvma_from_va(va);
+ drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
+ drm_gpuvm_bo_for_each_va(va, vm_bo) {
+ struct nouveau_uvma *uvma = uvma_from_va(va);
- nouveau_uvma_map(uvma, mem);
- drm_gpuva_invalidate(va, false);
+ nouveau_uvma_map(uvma, mem);
+ drm_gpuva_invalidate(va, false);
+ }
}
}
@@ -1792,29 +1775,62 @@ void
nouveau_uvmm_bo_unmap_all(struct nouveau_bo *nvbo)
{
struct drm_gem_object *obj = &nvbo->bo.base;
+ struct drm_gpuvm_bo *vm_bo;
struct drm_gpuva *va;
dma_resv_assert_held(obj->resv);
- drm_gem_for_each_gpuva(va, obj) {
- struct nouveau_uvma *uvma = uvma_from_va(va);
+ drm_gem_for_each_gpuvm_bo(vm_bo, obj) {
+ drm_gpuvm_bo_for_each_va(va, vm_bo) {
+ struct nouveau_uvma *uvma = uvma_from_va(va);
- nouveau_uvma_unmap(uvma);
- drm_gpuva_invalidate(va, true);
+ nouveau_uvma_unmap(uvma);
+ drm_gpuva_invalidate(va, true);
+ }
}
}
+static void
+nouveau_uvmm_free(struct drm_gpuvm *gpuvm)
+{
+ struct nouveau_uvmm *uvmm = uvmm_from_gpuvm(gpuvm);
+
+ kfree(uvmm);
+}
+
+static int
+nouveau_uvmm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
+{
+ struct nouveau_bo *nvbo = nouveau_gem_object(vm_bo->obj);
+
+ return nouveau_bo_validate(nvbo, true, false);
+}
+
+static const struct drm_gpuvm_ops gpuvm_ops = {
+ .vm_free = nouveau_uvmm_free,
+ .vm_bo_validate = nouveau_uvmm_bo_validate,
+};
+
int
-nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
- u64 kernel_managed_addr, u64 kernel_managed_size)
+nouveau_uvmm_ioctl_vm_init(struct drm_device *dev,
+ void *data,
+ struct drm_file *file_priv)
{
+ struct nouveau_uvmm *uvmm;
+ struct nouveau_cli *cli = nouveau_cli(file_priv);
+ struct drm_device *drm = cli->drm->dev;
+ struct drm_gem_object *r_obj;
+ struct drm_nouveau_vm_init *init = data;
+ u64 kernel_managed_end;
int ret;
- u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size;
- mutex_init(&uvmm->mutex);
- dma_resv_init(&uvmm->resv);
- mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN);
- mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex);
+ if (check_add_overflow(init->kernel_managed_addr,
+ init->kernel_managed_size,
+ &kernel_managed_end))
+ return -EINVAL;
+
+ if (kernel_managed_end > NOUVEAU_VA_SPACE_END)
+ return -EINVAL;
mutex_lock(&cli->mutex);
@@ -1823,39 +1839,48 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
goto out_unlock;
}
- if (kernel_managed_end <= kernel_managed_addr) {
- ret = -EINVAL;
+ uvmm = kzalloc(sizeof(*uvmm), GFP_KERNEL);
+ if (!uvmm) {
+ ret = -ENOMEM;
goto out_unlock;
}
- if (kernel_managed_end > NOUVEAU_VA_SPACE_END) {
- ret = -EINVAL;
+ r_obj = drm_gpuvm_resv_object_alloc(drm);
+ if (!r_obj) {
+ kfree(uvmm);
+ ret = -ENOMEM;
goto out_unlock;
}
- uvmm->kernel_managed_addr = kernel_managed_addr;
- uvmm->kernel_managed_size = kernel_managed_size;
+ mutex_init(&uvmm->mutex);
+ mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN);
+ mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex);
- drm_gpuvm_init(&uvmm->base, cli->name,
+ drm_gpuvm_init(&uvmm->base, cli->name, 0, drm, r_obj,
NOUVEAU_VA_SPACE_START,
NOUVEAU_VA_SPACE_END,
- kernel_managed_addr, kernel_managed_size,
- NULL);
+ init->kernel_managed_addr,
+ init->kernel_managed_size,
+ &gpuvm_ops);
+ /* GPUVM takes care from here on. */
+ drm_gem_object_put(r_obj);
ret = nvif_vmm_ctor(&cli->mmu, "uvmm",
cli->vmm.vmm.object.oclass, RAW,
- kernel_managed_addr, kernel_managed_size,
- NULL, 0, &cli->uvmm.vmm.vmm);
+ init->kernel_managed_addr,
+ init->kernel_managed_size,
+ NULL, 0, &uvmm->vmm.vmm);
if (ret)
- goto out_free_gpuva_mgr;
+ goto out_gpuvm_fini;
- cli->uvmm.vmm.cli = cli;
+ uvmm->vmm.cli = cli;
+ cli->uvmm.ptr = uvmm;
mutex_unlock(&cli->mutex);
return 0;
-out_free_gpuva_mgr:
- drm_gpuvm_destroy(&uvmm->base);
+out_gpuvm_fini:
+ drm_gpuvm_put(&uvmm->base);
out_unlock:
mutex_unlock(&cli->mutex);
return ret;
@@ -1867,15 +1892,8 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm)
MA_STATE(mas, &uvmm->region_mt, 0, 0);
struct nouveau_uvma_region *reg;
struct nouveau_cli *cli = uvmm->vmm.cli;
- struct nouveau_sched_entity *entity = &cli->sched_entity;
struct drm_gpuva *va, *next;
- if (!cli)
- return;
-
- rmb(); /* for list_empty to work without lock */
- wait_event(entity->job.wq, list_empty(&entity->job.list.head));
-
nouveau_uvmm_lock(uvmm);
drm_gpuvm_for_each_va_safe(va, next, &uvmm->base) {
struct nouveau_uvma *uvma = uvma_from_va(va);
@@ -1910,8 +1928,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm)
mutex_lock(&cli->mutex);
nouveau_vmm_fini(&uvmm->vmm);
- drm_gpuvm_destroy(&uvmm->base);
+ drm_gpuvm_put(&uvmm->base);
mutex_unlock(&cli->mutex);
-
- dma_resv_fini(&uvmm->resv);
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.h b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
index a308c59760a5..9d3c348581eb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h
@@ -12,12 +12,6 @@ struct nouveau_uvmm {
struct nouveau_vmm vmm;
struct maple_tree region_mt;
struct mutex mutex;
- struct dma_resv resv;
-
- u64 kernel_managed_addr;
- u64 kernel_managed_size;
-
- bool disabled;
};
struct nouveau_uvma_region {
@@ -50,8 +44,6 @@ struct nouveau_uvmm_bind_job {
struct nouveau_job base;
struct kref kref;
- struct list_head entry;
- struct work_struct work;
struct completion complete;
/* struct bind_job_op */
@@ -60,7 +52,7 @@ struct nouveau_uvmm_bind_job {
struct nouveau_uvmm_bind_job_args {
struct drm_file *file_priv;
- struct nouveau_sched_entity *sched_entity;
+ struct nouveau_sched *sched;
unsigned int flags;
@@ -82,8 +74,6 @@ struct nouveau_uvmm_bind_job_args {
#define to_uvmm_bind_job(job) container_of((job), struct nouveau_uvmm_bind_job, base)
-int nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli,
- u64 kernel_managed_addr, u64 kernel_managed_size);
void nouveau_uvmm_fini(struct nouveau_uvmm *uvmm);
void nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbov, struct nouveau_mem *mem);
diff --git a/drivers/gpu/drm/nouveau/nv04_fence.c b/drivers/gpu/drm/nouveau/nv04_fence.c
index 5b71a5a5cd85..cdbc75e3d1f6 100644
--- a/drivers/gpu/drm/nouveau/nv04_fence.c
+++ b/drivers/gpu/drm/nouveau/nv04_fence.c
@@ -39,7 +39,7 @@ struct nv04_fence_priv {
static int
nv04_fence_emit(struct nouveau_fence *fence)
{
- struct nvif_push *push = fence->channel->chan.push;
+ struct nvif_push *push = unrcu_pointer(fence->channel)->chan.push;
int ret = PUSH_WAIT(push, 2);
if (ret == 0) {
PUSH_NVSQ(push, NV_SW, 0x0150, fence->base.seqno);
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/event.c b/drivers/gpu/drm/nouveau/nvkm/core/event.c
index a6c877135598..61fed7792e41 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/event.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/event.c
@@ -81,17 +81,17 @@ nvkm_event_ntfy_state(struct nvkm_event_ntfy *ntfy)
static void
nvkm_event_ntfy_remove(struct nvkm_event_ntfy *ntfy)
{
- spin_lock_irq(&ntfy->event->list_lock);
+ write_lock_irq(&ntfy->event->list_lock);
list_del_init(&ntfy->head);
- spin_unlock_irq(&ntfy->event->list_lock);
+ write_unlock_irq(&ntfy->event->list_lock);
}
static void
nvkm_event_ntfy_insert(struct nvkm_event_ntfy *ntfy)
{
- spin_lock_irq(&ntfy->event->list_lock);
+ write_lock_irq(&ntfy->event->list_lock);
list_add_tail(&ntfy->head, &ntfy->event->ntfy);
- spin_unlock_irq(&ntfy->event->list_lock);
+ write_unlock_irq(&ntfy->event->list_lock);
}
static void
@@ -176,7 +176,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits)
return;
nvkm_trace(event->subdev, "event: ntfy %08x on %d\n", bits, id);
- spin_lock_irqsave(&event->list_lock, flags);
+ read_lock_irqsave(&event->list_lock, flags);
list_for_each_entry_safe(ntfy, ntmp, &event->ntfy, head) {
if (ntfy->id == id && ntfy->bits & bits) {
@@ -185,7 +185,7 @@ nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits)
}
}
- spin_unlock_irqrestore(&event->list_lock, flags);
+ read_unlock_irqrestore(&event->list_lock, flags);
}
void
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
index 457ec5db794d..b24eb1e560bc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
@@ -209,7 +209,7 @@ nvkm_disp_dtor(struct nvkm_engine *engine)
nvkm_head_del(&head);
}
- if (disp->func->dtor)
+ if (disp->func && disp->func->dtor)
disp->func->dtor(disp);
return data;
@@ -243,8 +243,10 @@ nvkm_disp_new_(const struct nvkm_disp_func *func, struct nvkm_device *device,
spin_lock_init(&disp->client.lock);
ret = nvkm_engine_ctor(&nvkm_disp, device, type, inst, true, &disp->engine);
- if (ret)
+ if (ret) {
+ disp->func = NULL;
return ret;
+ }
if (func->super) {
disp->super.wq = create_singlethread_workqueue("nvkm-disp");
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
index 298035070b3a..6a0a4d3b8902 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/r535.c
@@ -282,7 +282,7 @@ r535_sor_bl_get(struct nvkm_ior *sor)
{
struct nvkm_disp *disp = sor->disp;
NV0073_CTRL_SPECIFIC_BACKLIGHT_BRIGHTNESS_PARAMS *ctrl;
- int lvl;
+ int ret, lvl;
ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom,
NV0073_CTRL_CMD_SPECIFIC_GET_BACKLIGHT_BRIGHTNESS,
@@ -292,9 +292,11 @@ r535_sor_bl_get(struct nvkm_ior *sor)
ctrl->displayId = BIT(sor->asy.outp->index);
- ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
- if (IS_ERR(ctrl))
- return PTR_ERR(ctrl);
+ ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+ if (ret) {
+ nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+ return ret;
+ }
lvl = ctrl->brightness;
nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
@@ -649,9 +651,11 @@ r535_conn_new(struct nvkm_disp *disp, u32 id)
ctrl->subDeviceInstance = 0;
ctrl->displayId = BIT(id);
- ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
- if (IS_ERR(ctrl))
- return (void *)ctrl;
+ ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+ if (ret) {
+ nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+ return ERR_PTR(ret);
+ }
list_for_each_entry(conn, &disp->conns, head) {
if (conn->index == ctrl->data[0].index) {
@@ -686,7 +690,7 @@ r535_outp_acquire(struct nvkm_outp *outp, bool hda)
struct nvkm_disp *disp = outp->disp;
struct nvkm_ior *ior;
NV0073_CTRL_DFP_ASSIGN_SOR_PARAMS *ctrl;
- int or;
+ int ret, or;
ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom,
NV0073_CTRL_CMD_DFP_ASSIGN_SOR, sizeof(*ctrl));
@@ -699,9 +703,11 @@ r535_outp_acquire(struct nvkm_outp *outp, bool hda)
if (hda)
ctrl->flags |= NVDEF(NV0073_CTRL, DFP_ASSIGN_SOR_FLAGS, AUDIO, OPTIMAL);
- ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
- if (IS_ERR(ctrl))
- return PTR_ERR(ctrl);
+ ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+ if (ret) {
+ nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+ return ret;
+ }
for (or = 0; or < ARRAY_SIZE(ctrl->sorAssignListWithTag); or++) {
if (ctrl->sorAssignListWithTag[or].displayMask & BIT(outp->index)) {
@@ -727,6 +733,7 @@ static int
r535_disp_head_displayid(struct nvkm_disp *disp, int head, u32 *displayid)
{
NV0073_CTRL_SYSTEM_GET_ACTIVE_PARAMS *ctrl;
+ int ret;
ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom,
NV0073_CTRL_CMD_SYSTEM_GET_ACTIVE, sizeof(*ctrl));
@@ -736,9 +743,11 @@ r535_disp_head_displayid(struct nvkm_disp *disp, int head, u32 *displayid)
ctrl->subDeviceInstance = 0;
ctrl->head = head;
- ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
- if (IS_ERR(ctrl))
- return PTR_ERR(ctrl);
+ ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+ if (ret) {
+ nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+ return ret;
+ }
*displayid = ctrl->displayId;
nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
@@ -772,9 +781,11 @@ r535_outp_inherit(struct nvkm_outp *outp)
ctrl->subDeviceInstance = 0;
ctrl->displayId = displayid;
- ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
- if (IS_ERR(ctrl))
+ ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+ if (ret) {
+ nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
return NULL;
+ }
id = ctrl->index;
proto = ctrl->protocol;
@@ -825,6 +836,7 @@ r535_outp_dfp_get_info(struct nvkm_outp *outp)
{
NV0073_CTRL_DFP_GET_INFO_PARAMS *ctrl;
struct nvkm_disp *disp = outp->disp;
+ int ret;
ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_DFP_GET_INFO, sizeof(*ctrl));
if (IS_ERR(ctrl))
@@ -832,9 +844,11 @@ r535_outp_dfp_get_info(struct nvkm_outp *outp)
ctrl->displayId = BIT(outp->index);
- ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
- if (IS_ERR(ctrl))
- return PTR_ERR(ctrl);
+ ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+ if (ret) {
+ nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+ return ret;
+ }
nvkm_debug(&disp->engine.subdev, "DFP %08x: flags:%08x flags2:%08x\n",
ctrl->displayId, ctrl->flags, ctrl->flags2);
@@ -858,9 +872,11 @@ r535_outp_detect(struct nvkm_outp *outp)
ctrl->subDeviceInstance = 0;
ctrl->displayMask = BIT(outp->index);
- ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
- if (IS_ERR(ctrl))
- return PTR_ERR(ctrl);
+ ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+ if (ret) {
+ nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+ return ret;
+ }
if (ctrl->displayMask & BIT(outp->index)) {
ret = r535_outp_dfp_get_info(outp);
@@ -895,6 +911,7 @@ r535_dp_mst_id_get(struct nvkm_outp *outp, u32 *pid)
{
NV0073_CTRL_CMD_DP_TOPOLOGY_ALLOCATE_DISPLAYID_PARAMS *ctrl;
struct nvkm_disp *disp = outp->disp;
+ int ret;
ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom,
NV0073_CTRL_CMD_DP_TOPOLOGY_ALLOCATE_DISPLAYID,
@@ -904,9 +921,11 @@ r535_dp_mst_id_get(struct nvkm_outp *outp, u32 *pid)
ctrl->subDeviceInstance = 0;
ctrl->displayId = BIT(outp->index);
- ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
- if (IS_ERR(ctrl))
- return PTR_ERR(ctrl);
+ ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+ if (ret) {
+ nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+ return ret;
+ }
*pid = ctrl->displayIdAssigned;
nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
@@ -938,38 +957,60 @@ r535_dp_train_target(struct nvkm_outp *outp, u8 target, bool mst, u8 link_nr, u8
{
struct nvkm_disp *disp = outp->disp;
NV0073_CTRL_DP_CTRL_PARAMS *ctrl;
- int ret;
-
- ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_DP_CTRL, sizeof(*ctrl));
- if (IS_ERR(ctrl))
- return PTR_ERR(ctrl);
+ int ret, retries;
+ u32 cmd, data;
- ctrl->subDeviceInstance = 0;
- ctrl->displayId = BIT(outp->index);
- ctrl->cmd = NVDEF(NV0073_CTRL, DP_CMD, SET_LANE_COUNT, TRUE) |
- NVDEF(NV0073_CTRL, DP_CMD, SET_LINK_BW, TRUE) |
- NVDEF(NV0073_CTRL, DP_CMD, TRAIN_PHY_REPEATER, YES);
- ctrl->data = NVVAL(NV0073_CTRL, DP_DATA, SET_LANE_COUNT, link_nr) |
- NVVAL(NV0073_CTRL, DP_DATA, SET_LINK_BW, link_bw) |
- NVVAL(NV0073_CTRL, DP_DATA, TARGET, target);
+ cmd = NVDEF(NV0073_CTRL, DP_CMD, SET_LANE_COUNT, TRUE) |
+ NVDEF(NV0073_CTRL, DP_CMD, SET_LINK_BW, TRUE) |
+ NVDEF(NV0073_CTRL, DP_CMD, TRAIN_PHY_REPEATER, YES);
+ data = NVVAL(NV0073_CTRL, DP_DATA, SET_LANE_COUNT, link_nr) |
+ NVVAL(NV0073_CTRL, DP_DATA, SET_LINK_BW, link_bw) |
+ NVVAL(NV0073_CTRL, DP_DATA, TARGET, target);
if (mst)
- ctrl->cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_FORMAT_MODE, MULTI_STREAM);
+ cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_FORMAT_MODE, MULTI_STREAM);
if (outp->dp.dpcd[DPCD_RC02] & DPCD_RC02_ENHANCED_FRAME_CAP)
- ctrl->cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_ENHANCED_FRAMING, TRUE);
+ cmd |= NVDEF(NV0073_CTRL, DP_CMD, SET_ENHANCED_FRAMING, TRUE);
if (target == 0 &&
(outp->dp.dpcd[DPCD_RC02] & 0x20) &&
!(outp->dp.dpcd[DPCD_RC03] & DPCD_RC03_TPS4_SUPPORTED))
- ctrl->cmd |= NVDEF(NV0073_CTRL, DP_CMD, POST_LT_ADJ_REQ_GRANTED, YES);
+ cmd |= NVDEF(NV0073_CTRL, DP_CMD, POST_LT_ADJ_REQ_GRANTED, YES);
- ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
- if (IS_ERR(ctrl))
- return PTR_ERR(ctrl);
+ /* We should retry up to 3 times, but only if GSP asks politely */
+ for (retries = 0; retries < 3; ++retries) {
+ ctrl = nvkm_gsp_rm_ctrl_get(&disp->rm.objcom, NV0073_CTRL_CMD_DP_CTRL,
+ sizeof(*ctrl));
+ if (IS_ERR(ctrl))
+ return PTR_ERR(ctrl);
+
+ ctrl->subDeviceInstance = 0;
+ ctrl->displayId = BIT(outp->index);
+ ctrl->retryTimeMs = 0;
+ ctrl->cmd = cmd;
+ ctrl->data = data;
+
+ ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+ if (ret == -EAGAIN && ctrl->retryTimeMs) {
+ /*
+ * Device (likely an eDP panel) isn't ready yet, wait for the time specified
+ * by GSP before retrying again
+ */
+ nvkm_debug(&disp->engine.subdev,
+ "Waiting %dms for GSP LT panel delay before retrying\n",
+ ctrl->retryTimeMs);
+ msleep(ctrl->retryTimeMs);
+ nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+ } else {
+ /* GSP didn't say to retry, or we were successful */
+ if (ctrl->err)
+ ret = -EIO;
+ nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+ break;
+ }
+ }
- ret = ctrl->err ? -EIO : 0;
- nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
return ret;
}
@@ -1036,9 +1077,11 @@ r535_dp_aux_xfer(struct nvkm_outp *outp, u8 type, u32 addr, u8 *data, u8 *psize)
ctrl->size = !ctrl->bAddrOnly ? (size - 1) : 0;
memcpy(ctrl->data, data, size);
- ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
- if (IS_ERR(ctrl))
+ ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+ if (ret) {
+ nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
return PTR_ERR(ctrl);
+ }
memcpy(data, ctrl->data, size);
*psize = ctrl->size;
@@ -1111,10 +1154,13 @@ r535_tmds_edid_get(struct nvkm_outp *outp, u8 *data, u16 *psize)
ctrl->subDeviceInstance = 0;
ctrl->displayId = BIT(outp->index);
- ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
- if (IS_ERR(ctrl))
- return PTR_ERR(ctrl);
+ ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+ if (ret) {
+ nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+ return ret;
+ }
+ ret = -E2BIG;
if (ctrl->bufferSize <= *psize) {
memcpy(data, ctrl->edidBuffer, ctrl->bufferSize);
*psize = ctrl->bufferSize;
@@ -1153,9 +1199,11 @@ r535_outp_new(struct nvkm_disp *disp, u32 id)
ctrl->subDeviceInstance = 0;
ctrl->displayId = BIT(id);
- ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
- if (IS_ERR(ctrl))
- return PTR_ERR(ctrl);
+ ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+ if (ret) {
+ nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+ return ret;
+ }
switch (ctrl->type) {
case NV0073_CTRL_SPECIFIC_OR_TYPE_NONE:
@@ -1229,9 +1277,11 @@ r535_outp_new(struct nvkm_disp *disp, u32 id)
ctrl->sorIndex = ~0;
- ctrl = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, ctrl, sizeof(*ctrl));
- if (IS_ERR(ctrl))
- return PTR_ERR(ctrl);
+ ret = nvkm_gsp_rm_ctrl_push(&disp->rm.objcom, &ctrl, sizeof(*ctrl));
+ if (ret) {
+ nvkm_gsp_rm_ctrl_done(&disp->rm.objcom, ctrl);
+ return ret;
+ }
switch (NVVAL_GET(ctrl->maxLinkRate, NV0073_CTRL_CMD, DP_GET_CAPS, MAX_LINK_RATE)) {
case NV0073_CTRL_CMD_DP_GET_CAPS_MAX_LINK_RATE_1_62:
@@ -1465,8 +1515,6 @@ r535_disp_oneinit(struct nvkm_disp *disp)
bool nvhg = acpi_check_dsm(handle, &NVHG_DSM_GUID, NVHG_DSM_REV,
1ULL << 0x00000014);
- printk(KERN_ERR "bl: nbci:%d nvhg:%d\n", nbci, nvhg);
-
if (nbci || nvhg) {
union acpi_object argv4 = {
.buffer.type = ACPI_TYPE_BUFFER,
@@ -1479,9 +1527,6 @@ r535_disp_oneinit(struct nvkm_disp *disp)
if (!obj) {
acpi_handle_info(handle, "failed to evaluate _DSM\n");
} else {
- printk(KERN_ERR "bl: obj type %d\n", obj->type);
- printk(KERN_ERR "bl: obj len %d\n", obj->package.count);
-
for (int i = 0; i < obj->package.count; i++) {
union acpi_object *elt = &obj->package.elements[i];
u32 size;
@@ -1491,12 +1536,10 @@ r535_disp_oneinit(struct nvkm_disp *disp)
else
size = 4;
- printk(KERN_ERR "elt %03d: type %d size %d\n", i, elt->type, size);
memcpy(&ctrl->backLightData[ctrl->backLightDataSize], &elt->integer.value, size);
ctrl->backLightDataSize += size;
}
- printk(KERN_ERR "bl: data size %d\n", ctrl->backLightDataSize);
ctrl->status = 0;
ACPI_FREE(obj);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c
index e4279f1772a1..377d0e0cef84 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c
@@ -385,7 +385,7 @@ nvkm_uoutp_mthd_inherit(struct nvkm_outp *outp, void *argv, u32 argc)
/* Ensure an ior is hooked up to this outp already */
ior = outp->func->inherit(outp);
- if (!ior)
+ if (!ior || !ior->arm.head)
return -ENODEV;
/* With iors, there will be a separate output path for each type of connector - and all of
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
index 87a62d4ff4bd..7d4716dcd512 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
@@ -24,7 +24,6 @@
#include "chan.h"
#include "chid.h"
#include "cgrp.h"
-#include "chid.h"
#include "runl.h"
#include "priv.h"
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
index c8ce7ff18713..e74493a4569e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
@@ -550,6 +550,10 @@ ga100_fifo_nonstall_ctor(struct nvkm_fifo *fifo)
struct nvkm_engn *engn = list_first_entry(&runl->engns, typeof(*engn), head);
runl->nonstall.vector = engn->func->nonstall(engn);
+
+ /* if no nonstall vector just keep going */
+ if (runl->nonstall.vector == -1)
+ continue;
if (runl->nonstall.vector < 0) {
RUNL_ERROR(runl, "nonstall %d", runl->nonstall.vector);
return runl->nonstall.vector;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c
index 3adbb05ff587..3454c7d29502 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/r535.c
@@ -242,6 +242,7 @@ r535_chan_id_put(struct nvkm_chan *chan)
nvkm_memory_unref(&userd->mem);
nvkm_chid_put(runl->chid, userd->chid, &chan->cgrp->lock);
list_del(&userd->head);
+ kfree(userd);
}
break;
@@ -350,7 +351,7 @@ r535_engn_nonstall(struct nvkm_engn *engn)
int ret;
ret = nvkm_gsp_intr_nonstall(subdev->device->gsp, subdev->type, subdev->inst);
- WARN_ON(ret < 0);
+ WARN_ON(ret == -ENOENT);
return ret;
}
@@ -539,7 +540,7 @@ r535_fifo_runl_ctor(struct nvkm_fifo *fifo)
struct nvkm_runl *runl;
struct nvkm_engn *engn;
u32 cgids = 2048;
- u32 chids = 2048 / CHID_PER_USERD;
+ u32 chids = 2048;
int ret;
NV2080_CTRL_FIFO_GET_DEVICE_INFO_TABLE_PARAMS *ctrl;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c
index 04bceaa28a19..da1bebb896f7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c
@@ -25,12 +25,8 @@ int
nvkm_gsp_intr_nonstall(struct nvkm_gsp *gsp, enum nvkm_subdev_type type, int inst)
{
for (int i = 0; i < gsp->intr_nr; i++) {
- if (gsp->intr[i].type == type && gsp->intr[i].inst == inst) {
- if (gsp->intr[i].nonstall != ~0)
- return gsp->intr[i].nonstall;
-
- return -EINVAL;
- }
+ if (gsp->intr[i].type == type && gsp->intr[i].inst == inst)
+ return gsp->intr[i].nonstall;
}
return -ENOENT;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
index e31f9641114b..9ee58e2a0eb2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c
@@ -70,6 +70,20 @@ struct r535_gsp_msg {
#define GSP_MSG_HDR_SIZE offsetof(struct r535_gsp_msg, data)
+static int
+r535_rpc_status_to_errno(uint32_t rpc_status)
+{
+ switch (rpc_status) {
+ case 0x55: /* NV_ERR_NOT_READY */
+ case 0x66: /* NV_ERR_TIMEOUT_RETRY */
+ return -EAGAIN;
+ case 0x51: /* NV_ERR_NO_MEMORY */
+ return -ENOMEM;
+ default:
+ return -EINVAL;
+ }
+}
+
static void *
r535_gsp_msgq_wait(struct nvkm_gsp *gsp, u32 repc, u32 *prepc, int *ptime)
{
@@ -298,7 +312,8 @@ retry:
struct nvkm_gsp_msgq_ntfy *ntfy = &gsp->msgq.ntfy[i];
if (ntfy->fn == msg->function) {
- ntfy->func(ntfy->priv, ntfy->fn, msg->data, msg->length - sizeof(*msg));
+ if (ntfy->func)
+ ntfy->func(ntfy->priv, ntfy->fn, msg->data, msg->length - sizeof(*msg));
break;
}
}
@@ -365,10 +380,8 @@ r535_gsp_rpc_send(struct nvkm_gsp *gsp, void *argv, bool wait, u32 repc)
}
ret = r535_gsp_cmdq_push(gsp, rpc);
- if (ret) {
- mutex_unlock(&gsp->cmdq.mutex);
+ if (ret)
return ERR_PTR(ret);
- }
if (wait) {
msg = r535_gsp_msg_recv(gsp, fn, repc);
@@ -585,14 +598,14 @@ r535_gsp_rpc_rm_alloc_push(struct nvkm_gsp_object *object, void *argv, u32 repc)
return rpc;
if (rpc->status) {
- nvkm_error(&gsp->subdev, "RM_ALLOC: 0x%x\n", rpc->status);
- ret = ERR_PTR(-EINVAL);
+ ret = ERR_PTR(r535_rpc_status_to_errno(rpc->status));
+ if (PTR_ERR(ret) != -EAGAIN)
+ nvkm_error(&gsp->subdev, "RM_ALLOC: 0x%x\n", rpc->status);
} else {
ret = repc ? rpc->params : NULL;
}
- if (IS_ERR_OR_NULL(ret))
- nvkm_gsp_rpc_done(gsp, rpc);
+ nvkm_gsp_rpc_done(gsp, rpc);
return ret;
}
@@ -625,29 +638,34 @@ r535_gsp_rpc_rm_ctrl_done(struct nvkm_gsp_object *object, void *repv)
{
rpc_gsp_rm_control_v03_00 *rpc = container_of(repv, typeof(*rpc), params);
+ if (!repv)
+ return;
nvkm_gsp_rpc_done(object->client->gsp, rpc);
}
-static void *
-r535_gsp_rpc_rm_ctrl_push(struct nvkm_gsp_object *object, void *argv, u32 repc)
+static int
+r535_gsp_rpc_rm_ctrl_push(struct nvkm_gsp_object *object, void **argv, u32 repc)
{
- rpc_gsp_rm_control_v03_00 *rpc = container_of(argv, typeof(*rpc), params);
+ rpc_gsp_rm_control_v03_00 *rpc = container_of((*argv), typeof(*rpc), params);
struct nvkm_gsp *gsp = object->client->gsp;
- void *ret;
+ int ret = 0;
rpc = nvkm_gsp_rpc_push(gsp, rpc, true, repc);
- if (IS_ERR_OR_NULL(rpc))
- return rpc;
+ if (IS_ERR_OR_NULL(rpc)) {
+ *argv = NULL;
+ return PTR_ERR(rpc);
+ }
if (rpc->status) {
- nvkm_error(&gsp->subdev, "cli:0x%08x obj:0x%08x ctrl cmd:0x%08x failed: 0x%08x\n",
- object->client->object.handle, object->handle, rpc->cmd, rpc->status);
- ret = ERR_PTR(-EINVAL);
- } else {
- ret = repc ? rpc->params : NULL;
+ ret = r535_rpc_status_to_errno(rpc->status);
+ if (ret != -EAGAIN)
+ nvkm_error(&gsp->subdev, "cli:0x%08x obj:0x%08x ctrl cmd:0x%08x failed: 0x%08x\n",
+ object->client->object.handle, object->handle, rpc->cmd, rpc->status);
}
- if (IS_ERR_OR_NULL(ret))
+ if (repc)
+ *argv = rpc->params;
+ else
nvkm_gsp_rpc_done(gsp, rpc);
return ret;
@@ -689,8 +707,8 @@ r535_gsp_rpc_get(struct nvkm_gsp *gsp, u32 fn, u32 argc)
struct nvfw_gsp_rpc *rpc;
rpc = r535_gsp_cmdq_get(gsp, ALIGN(sizeof(*rpc) + argc, sizeof(u64)));
- if (!rpc)
- return NULL;
+ if (IS_ERR(rpc))
+ return ERR_CAST(rpc);
rpc->header_version = 0x03000000;
rpc->signature = ('C' << 24) | ('P' << 16) | ('R' << 8) | 'V';
@@ -845,9 +863,11 @@ r535_gsp_intr_get_table(struct nvkm_gsp *gsp)
if (IS_ERR(ctrl))
return PTR_ERR(ctrl);
- ctrl = nvkm_gsp_rm_ctrl_push(&gsp->internal.device.subdevice, ctrl, sizeof(*ctrl));
- if (WARN_ON(IS_ERR(ctrl)))
- return PTR_ERR(ctrl);
+ ret = nvkm_gsp_rm_ctrl_push(&gsp->internal.device.subdevice, &ctrl, sizeof(*ctrl));
+ if (WARN_ON(ret)) {
+ nvkm_gsp_rm_ctrl_done(&gsp->internal.device.subdevice, ctrl);
+ return ret;
+ }
for (unsigned i = 0; i < ctrl->tableLen; i++) {
enum nvkm_subdev_type type;
@@ -1048,7 +1068,7 @@ r535_gsp_rpc_set_registry(struct nvkm_gsp *gsp)
char *strings;
int str_offset;
int i;
- size_t rpc_size = sizeof(*rpc) + sizeof(rpc->entries[0]) * NV_GSP_REG_NUM_ENTRIES;
+ size_t rpc_size = struct_size(rpc, entries, NV_GSP_REG_NUM_ENTRIES);
/* add strings + null terminator */
for (i = 0; i < NV_GSP_REG_NUM_ENTRIES; i++)
@@ -1101,16 +1121,12 @@ r535_gsp_acpi_caps(acpi_handle handle, CAPS_METHOD_DATA *caps)
if (!obj)
return;
- printk(KERN_ERR "nvop: obj type %d\n", obj->type);
- printk(KERN_ERR "nvop: obj len %d\n", obj->buffer.length);
-
if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) ||
WARN_ON(obj->buffer.length != 4))
return;
caps->status = 0;
caps->optimusCaps = *(u32 *)obj->buffer.pointer;
- printk(KERN_ERR "nvop: caps %08x\n", caps->optimusCaps);
ACPI_FREE(obj);
@@ -1137,9 +1153,6 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt)
if (!obj)
return;
- printk(KERN_ERR "jt: obj type %d\n", obj->type);
- printk(KERN_ERR "jt: obj len %d\n", obj->buffer.length);
-
if (WARN_ON(obj->type != ACPI_TYPE_BUFFER) ||
WARN_ON(obj->buffer.length != 4))
return;
@@ -1148,7 +1161,6 @@ r535_gsp_acpi_jt(acpi_handle handle, JT_METHOD_DATA *jt)
jt->jtCaps = *(u32 *)obj->buffer.pointer;
jt->jtRevId = (jt->jtCaps & 0xfff00000) >> 20;
jt->bSBIOSCaps = 0;
- printk(KERN_ERR "jt: caps %08x rev:%04x\n", jt->jtCaps, jt->jtRevId);
ACPI_FREE(obj);
@@ -1159,7 +1171,9 @@ static void
r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, MUX_METHOD_DATA_ELEMENT *mode,
MUX_METHOD_DATA_ELEMENT *part)
{
- acpi_handle iter = NULL, handle_mux;
+ union acpi_object mux_arg = { ACPI_TYPE_INTEGER };
+ struct acpi_object_list input = { 1, &mux_arg };
+ acpi_handle iter = NULL, handle_mux = NULL;
acpi_status status;
unsigned long long value;
@@ -1181,14 +1195,18 @@ r535_gsp_acpi_mux_id(acpi_handle handle, u32 id, MUX_METHOD_DATA_ELEMENT *mode,
if (!handle_mux)
return;
- status = acpi_evaluate_integer(handle_mux, "MXDM", NULL, &value);
+ /* I -think- 0 means "acquire" according to nvidia's driver source */
+ input.pointer->integer.type = ACPI_TYPE_INTEGER;
+ input.pointer->integer.value = 0;
+
+ status = acpi_evaluate_integer(handle_mux, "MXDM", &input, &value);
if (ACPI_SUCCESS(status)) {
mode->acpiId = id;
mode->mode = value;
mode->status = 0;
}
- status = acpi_evaluate_integer(handle_mux, "MXDS", NULL, &value);
+ status = acpi_evaluate_integer(handle_mux, "MXDS", &input, &value);
if (ACPI_SUCCESS(status)) {
part->acpiId = id;
part->mode = value;
@@ -1234,8 +1252,8 @@ r535_gsp_acpi_dod(acpi_handle handle, DOD_METHOD_DATA *dod)
dod->acpiIdListLen += sizeof(dod->acpiIdList[0]);
}
- printk(KERN_ERR "_DOD: ok! len:%d\n", dod->acpiIdListLen);
dod->status = 0;
+ kfree(output.pointer);
}
#endif
@@ -1379,6 +1397,13 @@ r535_gsp_msg_post_event(void *priv, u32 fn, void *repv, u32 repc)
return 0;
}
+/**
+ * r535_gsp_msg_run_cpu_sequencer() -- process I/O commands from the GSP
+ *
+ * The GSP sequencer is a list of I/O commands that the GSP can send to
+ * the driver to perform for various purposes. The most common usage is to
+ * perform a special mid-initialization reset.
+ */
static int
r535_gsp_msg_run_cpu_sequencer(void *priv, u32 fn, void *repv, u32 repc)
{
@@ -1718,6 +1743,23 @@ r535_gsp_libos_id8(const char *name)
return id;
}
+/**
+ * create_pte_array() - creates a PTE array of a physically contiguous buffer
+ * @ptes: pointer to the array
+ * @addr: base address of physically contiguous buffer (GSP_PAGE_SIZE aligned)
+ * @size: size of the buffer
+ *
+ * GSP-RM sometimes expects physically-contiguous buffers to have an array of
+ * "PTEs" for each page in that buffer. Although in theory that allows for
+ * the buffer to be physically discontiguous, GSP-RM does not currently
+ * support that.
+ *
+ * In this case, the PTEs are DMA addresses of each page of the buffer. Since
+ * the buffer is physically contiguous, calculating all the PTEs is simple
+ * math.
+ *
+ * See memdescGetPhysAddrsForGpu()
+ */
static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size)
{
unsigned int num_pages = DIV_ROUND_UP_ULL(size, GSP_PAGE_SIZE);
@@ -1727,6 +1769,35 @@ static void create_pte_array(u64 *ptes, dma_addr_t addr, size_t size)
ptes[i] = (u64)addr + (i << GSP_PAGE_SHIFT);
}
+/**
+ * r535_gsp_libos_init() -- create the libos arguments structure
+ *
+ * The logging buffers are byte queues that contain encoded printf-like
+ * messages from GSP-RM. They need to be decoded by a special application
+ * that can parse the buffers.
+ *
+ * The 'loginit' buffer contains logs from early GSP-RM init and
+ * exception dumps. The 'logrm' buffer contains the subsequent logs. Both are
+ * written to directly by GSP-RM and can be any multiple of GSP_PAGE_SIZE.
+ *
+ * The physical address map for the log buffer is stored in the buffer
+ * itself, starting with offset 1. Offset 0 contains the "put" pointer.
+ *
+ * The GSP only understands 4K pages (GSP_PAGE_SIZE), so even if the kernel is
+ * configured for a larger page size (e.g. 64K pages), we need to give
+ * the GSP an array of 4K pages. Fortunately, since the buffer is
+ * physically contiguous, it's simple math to calculate the addresses.
+ *
+ * The buffers must be a multiple of GSP_PAGE_SIZE. GSP-RM also currently
+ * ignores the @kind field for LOGINIT, LOGINTR, and LOGRM, but expects the
+ * buffers to be physically contiguous anyway.
+ *
+ * The memory allocated for the arguments must remain until the GSP sends the
+ * init_done RPC.
+ *
+ * See _kgspInitLibosLoggingStructures (allocates memory for buffers)
+ * See kgspSetupLibosInitArgs_IMPL (creates pLibosInitArgs[] array)
+ */
static int
r535_gsp_libos_init(struct nvkm_gsp *gsp)
{
@@ -1837,6 +1908,35 @@ nvkm_gsp_radix3_dtor(struct nvkm_gsp *gsp, struct nvkm_gsp_radix3 *rx3)
nvkm_gsp_mem_dtor(gsp, &rx3->mem[i]);
}
+/**
+ * nvkm_gsp_radix3_sg - build a radix3 table from a S/G list
+ *
+ * The GSP uses a three-level page table, called radix3, to map the firmware.
+ * Each 64-bit "pointer" in the table is either the bus address of an entry in
+ * the next table (for levels 0 and 1) or the bus address of the next page in
+ * the GSP firmware image itself.
+ *
+ * Level 0 contains a single entry in one page that points to the first page
+ * of level 1.
+ *
+ * Level 1, since it's also only one page in size, contains up to 512 entries,
+ * one for each page in Level 2.
+ *
+ * Level 2 can be up to 512 pages in size, and each of those entries points to
+ * the next page of the firmware image. Since there can be up to 512*512
+ * pages, that limits the size of the firmware to 512*512*GSP_PAGE_SIZE = 1GB.
+ *
+ * Internally, the GSP has its window into system memory, but the base
+ * physical address of the aperture is not 0. In fact, it varies depending on
+ * the GPU architecture. Since the GPU is a PCI device, this window is
+ * accessed via DMA and is therefore bound by IOMMU translation. The end
+ * result is that GSP-RM must translate the bus addresses in the table to GSP
+ * physical addresses. All this should happen transparently.
+ *
+ * Returns 0 on success, or negative error code
+ *
+ * See kgspCreateRadix3_IMPL
+ */
static int
nvkm_gsp_radix3_sg(struct nvkm_device *device, struct sg_table *sgt, u64 size,
struct nvkm_gsp_radix3 *rx3)
@@ -2106,7 +2206,9 @@ r535_gsp_oneinit(struct nvkm_gsp *gsp)
r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_MMU_FAULT_QUEUED,
r535_gsp_msg_mmu_fault_queued, gsp);
r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_OS_ERROR_LOG, r535_gsp_msg_os_error_log, gsp);
-
+ r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_PERF_BRIDGELESS_INFO_UPDATE, NULL, NULL);
+ r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_UCODE_LIBOS_PRINT, NULL, NULL);
+ r535_gsp_msg_ntfy_add(gsp, NV_VGPU_MSG_EVENT_GSP_SEND_USER_SHARED_DATA, NULL, NULL);
ret = r535_gsp_rm_boot_ctor(gsp);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index 1b811d6972a1..201022ae9214 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -49,14 +49,14 @@
#include <subdev/mmu.h>
struct gk20a_instobj {
- struct nvkm_memory memory;
+ struct nvkm_instobj base;
struct nvkm_mm_node *mn;
struct gk20a_instmem *imem;
/* CPU mapping */
u32 *vaddr;
};
-#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, memory)
+#define gk20a_instobj(p) container_of((p), struct gk20a_instobj, base.memory)
/*
* Used for objects allocated using the DMA API
@@ -148,7 +148,7 @@ gk20a_instobj_iommu_recycle_vaddr(struct gk20a_instobj_iommu *obj)
list_del(&obj->vaddr_node);
vunmap(obj->base.vaddr);
obj->base.vaddr = NULL;
- imem->vaddr_use -= nvkm_memory_size(&obj->base.memory);
+ imem->vaddr_use -= nvkm_memory_size(&obj->base.base.memory);
nvkm_debug(&imem->base.subdev, "vaddr used: %x/%x\n", imem->vaddr_use,
imem->vaddr_max);
}
@@ -283,7 +283,7 @@ gk20a_instobj_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm,
{
struct gk20a_instobj *node = gk20a_instobj(memory);
struct nvkm_vmm_map map = {
- .memory = &node->memory,
+ .memory = &node->base.memory,
.offset = offset,
.mem = node->mn,
};
@@ -391,8 +391,8 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align,
return -ENOMEM;
*_node = &node->base;
- nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.memory);
- node->base.memory.ptrs = &gk20a_instobj_ptrs;
+ nvkm_memory_ctor(&gk20a_instobj_func_dma, &node->base.base.memory);
+ node->base.base.memory.ptrs = &gk20a_instobj_ptrs;
node->base.vaddr = dma_alloc_attrs(dev, npages << PAGE_SHIFT,
&node->handle, GFP_KERNEL,
@@ -438,8 +438,8 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
*_node = &node->base;
node->dma_addrs = (void *)(node->pages + npages);
- nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.memory);
- node->base.memory.ptrs = &gk20a_instobj_ptrs;
+ nvkm_memory_ctor(&gk20a_instobj_func_iommu, &node->base.base.memory);
+ node->base.base.memory.ptrs = &gk20a_instobj_ptrs;
/* Allocate backing memory */
for (i = 0; i < npages; i++) {
@@ -533,7 +533,7 @@ gk20a_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero,
else
ret = gk20a_instobj_ctor_dma(imem, size >> PAGE_SHIFT,
align, &node);
- *pmemory = node ? &node->memory : NULL;
+ *pmemory = node ? &node->base.memory : NULL;
if (ret)
return ret;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c
index e34bc6076401..8379e72d77ab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu102.c
@@ -31,7 +31,7 @@ tu102_vmm_flush(struct nvkm_vmm *vmm, int depth)
type |= 0x00000001; /* PAGE_ALL */
if (atomic_read(&vmm->engref[NVKM_SUBDEV_BAR]))
- type |= 0x00000004; /* HUB_ONLY */
+ type |= 0x00000006; /* HUB_ONLY | ALL PDB (hack) */
mutex_lock(&vmm->mmu->mutex);