summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_exec_queue.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_exec_queue.c')
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c134
1 files changed, 99 insertions, 35 deletions
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index d098d2dd1b2d..fee22358cc09 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -8,6 +8,7 @@
#include <linux/nospec.h>
#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
#include <drm/drm_file.h>
#include <uapi/drm/xe_drm.h>
@@ -16,6 +17,7 @@
#include "xe_hw_engine_class_sysfs.h"
#include "xe_hw_engine_group.h"
#include "xe_hw_fence.h"
+#include "xe_irq.h"
#include "xe_lrc.h"
#include "xe_macros.h"
#include "xe_migrate.h"
@@ -23,6 +25,7 @@
#include "xe_ring_ops_types.h"
#include "xe_trace.h"
#include "xe_vm.h"
+#include "xe_pxp.h"
enum xe_exec_queue_sched_prop {
XE_EXEC_QUEUE_JOB_TIMEOUT = 0,
@@ -36,6 +39,8 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue
static void __xe_exec_queue_free(struct xe_exec_queue *q)
{
+ if (xe_exec_queue_uses_pxp(q))
+ xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
if (q->vm)
xe_vm_put(q->vm);
@@ -68,6 +73,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
q->gt = gt;
q->class = hwe->class;
q->width = width;
+ q->msix_vec = XE_IRQ_DEFAULT_MSIX;
q->logical_mask = logical_mask;
q->fence_irq = &gt->fence_irq[hwe->class];
q->ring_ops = gt->ring_ops[hwe->class];
@@ -75,6 +81,7 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
INIT_LIST_HEAD(&q->lr.link);
INIT_LIST_HEAD(&q->multi_gt_link);
INIT_LIST_HEAD(&q->hw_engine_group_link);
+ INIT_LIST_HEAD(&q->pxp.link);
q->sched_props.timeslice_us = hwe->eclass->sched_props.timeslice_us;
q->sched_props.preempt_timeout_us =
@@ -107,35 +114,37 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
static int __xe_exec_queue_init(struct xe_exec_queue *q)
{
- struct xe_vm *vm = q->vm;
int i, err;
+ u32 flags = 0;
- if (vm) {
- err = xe_vm_lock(vm, true);
- if (err)
- return err;
+ /*
+ * PXP workloads executing on RCS or CCS must run in isolation (i.e. no
+ * other workload can use the EUs at the same time). On MTL this is done
+ * by setting the RUNALONE bit in the LRC, while starting on Xe2 there
+ * is a dedicated bit for it.
+ */
+ if (xe_exec_queue_uses_pxp(q) &&
+ (q->class == XE_ENGINE_CLASS_RENDER || q->class == XE_ENGINE_CLASS_COMPUTE)) {
+ if (GRAPHICS_VER(gt_to_xe(q->gt)) >= 20)
+ flags |= XE_LRC_CREATE_PXP;
+ else
+ flags |= XE_LRC_CREATE_RUNALONE;
}
for (i = 0; i < q->width; ++i) {
- q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K);
+ q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec, flags);
if (IS_ERR(q->lrc[i])) {
err = PTR_ERR(q->lrc[i]);
- goto err_unlock;
+ goto err_lrc;
}
}
- if (vm)
- xe_vm_unlock(vm);
-
err = q->ops->init(q);
if (err)
goto err_lrc;
return 0;
-err_unlock:
- if (vm)
- xe_vm_unlock(vm);
err_lrc:
for (i = i - 1; i >= 0; --i)
xe_lrc_put(q->lrc[i]);
@@ -150,6 +159,9 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
struct xe_exec_queue *q;
int err;
+ /* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */
+ xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0)));
+
q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags,
extensions);
if (IS_ERR(q))
@@ -159,12 +171,26 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
if (err)
goto err_post_alloc;
+ /*
+ * We can only add the queue to the PXP list after the init is complete,
+ * because the PXP termination can call exec_queue_kill and that will
+ * go bad if the queue is only half-initialized. This means that we
+ * can't do it when we handle the PXP extension in __xe_exec_queue_alloc
+ * and we need to do it here instead.
+ */
+ if (xe_exec_queue_uses_pxp(q)) {
+ err = xe_pxp_exec_queue_add(xe->pxp, q);
+ if (err)
+ goto err_post_alloc;
+ }
+
return q;
err_post_alloc:
__xe_exec_queue_free(q);
return ERR_PTR(err);
}
+ALLOW_ERROR_INJECTION(xe_exec_queue_create, ERRNO);
struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
struct xe_vm *vm,
@@ -240,12 +266,16 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
return q;
}
+ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO);
void xe_exec_queue_destroy(struct kref *ref)
{
struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
struct xe_exec_queue *eq, *next;
+ if (xe_exec_queue_uses_pxp(q))
+ xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
+
xe_exec_queue_last_fence_put_unlocked(q);
if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
list_for_each_entry_safe(eq, next, &q->multi_gt_list,
@@ -260,8 +290,17 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
{
int i;
+ /*
+ * Before releasing our ref to lrc and xef, accumulate our run ticks
+ * and wakeup any waiters.
+ */
+ xe_exec_queue_update_run_ticks(q);
+ if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal))
+ wake_up_var(&q->xef->exec_queue.pending_removal);
+
for (i = 0; i < q->width; ++i)
xe_lrc_put(q->lrc[i]);
+
__xe_exec_queue_free(q);
}
@@ -392,6 +431,22 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *
return 0;
}
+static int
+exec_queue_set_pxp_type(struct xe_device *xe, struct xe_exec_queue *q, u64 value)
+{
+ if (value == DRM_XE_PXP_TYPE_NONE)
+ return 0;
+
+ /* we only support HWDRM sessions right now */
+ if (XE_IOCTL_DBG(xe, value != DRM_XE_PXP_TYPE_HWDRM))
+ return -EINVAL;
+
+ if (!xe_pxp_is_enabled(xe->pxp))
+ return -ENODEV;
+
+ return xe_pxp_exec_queue_set_type(xe->pxp, q, DRM_XE_PXP_TYPE_HWDRM);
+}
+
typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
struct xe_exec_queue *q,
u64 value);
@@ -399,6 +454,7 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
[DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
+ [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE] = exec_queue_set_pxp_type,
};
static int exec_queue_user_ext_set_property(struct xe_device *xe,
@@ -410,7 +466,7 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe,
int err;
u32 idx;
- err = __copy_from_user(&ext, address, sizeof(ext));
+ err = copy_from_user(&ext, address, sizeof(ext));
if (XE_IOCTL_DBG(xe, err))
return -EFAULT;
@@ -418,7 +474,8 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe,
ARRAY_SIZE(exec_queue_set_property_funcs)) ||
XE_IOCTL_DBG(xe, ext.pad) ||
XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY &&
- ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE))
+ ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE &&
+ ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PXP_TYPE))
return -EINVAL;
idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs));
@@ -448,7 +505,7 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue
if (XE_IOCTL_DBG(xe, ext_number >= MAX_USER_EXTENSIONS))
return -E2BIG;
- err = __copy_from_user(&ext, address, sizeof(ext));
+ err = copy_from_user(&ext, address, sizeof(ext));
if (XE_IOCTL_DBG(xe, err))
return -EFAULT;
@@ -470,7 +527,7 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue
return 0;
}
-static u32 calc_validate_logical_mask(struct xe_device *xe, struct xe_gt *gt,
+static u32 calc_validate_logical_mask(struct xe_device *xe,
struct drm_xe_engine_class_instance *eci,
u16 width, u16 num_placements)
{
@@ -532,15 +589,15 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
u64_to_user_ptr(args->instances);
struct xe_hw_engine *hwe;
struct xe_vm *vm;
- struct xe_gt *gt;
struct xe_tile *tile;
struct xe_exec_queue *q = NULL;
u32 logical_mask;
+ u32 flags = 0;
u32 id;
u32 len;
int err;
- if (XE_IOCTL_DBG(xe, args->flags) ||
+ if (XE_IOCTL_DBG(xe, args->flags & ~DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT) ||
XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
return -EINVAL;
@@ -548,15 +605,17 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
if (XE_IOCTL_DBG(xe, !len || len > XE_HW_ENGINE_MAX_INSTANCE))
return -EINVAL;
- err = __copy_from_user(eci, user_eci,
- sizeof(struct drm_xe_engine_class_instance) *
- len);
+ err = copy_from_user(eci, user_eci,
+ sizeof(struct drm_xe_engine_class_instance) * len);
if (XE_IOCTL_DBG(xe, err))
return -EFAULT;
if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count))
return -EINVAL;
+ if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT)
+ flags |= EXEC_QUEUE_FLAG_LOW_LATENCY;
+
if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
if (XE_IOCTL_DBG(xe, args->width != 1) ||
XE_IOCTL_DBG(xe, args->num_placements != 1) ||
@@ -565,8 +624,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
for_each_tile(tile, xe, id) {
struct xe_exec_queue *new;
- u32 flags = EXEC_QUEUE_FLAG_VM;
+ flags |= EXEC_QUEUE_FLAG_VM;
if (id)
flags |= EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD;
@@ -585,8 +644,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
&q->multi_gt_link);
}
} else {
- gt = xe_device_get_gt(xe, eci[0].gt_id);
- logical_mask = calc_validate_logical_mask(xe, gt, eci,
+ logical_mask = calc_validate_logical_mask(xe, eci,
args->width,
args->num_placements);
if (XE_IOCTL_DBG(xe, !logical_mask))
@@ -613,7 +671,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
}
q = xe_exec_queue_create(xe, vm, logical_mask,
- args->width, hwe, 0,
+ args->width, hwe, flags,
args->extensions);
up_read(&vm->lock);
xe_vm_put(vm);
@@ -756,20 +814,21 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q)
*/
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
{
- struct xe_file *xef;
+ struct xe_device *xe = gt_to_xe(q->gt);
struct xe_lrc *lrc;
- u32 old_ts, new_ts;
+ u64 old_ts, new_ts;
+ int idx;
/*
- * Jobs that are run during driver load may use an exec_queue, but are
- * not associated with a user xe file, so avoid accumulating busyness
- * for kernel specific work.
+ * Jobs that are executed by kernel doesn't have a corresponding xe_file
+ * and thus are not accounted.
*/
- if (!q->vm || !q->vm->xef)
+ if (!q->xef)
return;
- xef = q->vm->xef;
-
+ /* Synchronize with unbind while holding the xe file open */
+ if (!drm_dev_enter(&xe->drm, &idx))
+ return;
/*
* Only sample the first LRC. For parallel submission, all of them are
* scheduled together and we compensate that below by multiplying by
@@ -780,7 +839,9 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
*/
lrc = q->lrc[0];
new_ts = xe_lrc_update_timestamp(lrc, &old_ts);
- xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
+ q->xef->run_ticks[q->class] += (new_ts - old_ts) * q->width;
+
+ drm_dev_exit(idx);
}
/**
@@ -820,7 +881,10 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
mutex_lock(&xef->exec_queue.lock);
q = xa_erase(&xef->exec_queue.xa, args->exec_queue_id);
+ if (q)
+ atomic_inc(&xef->exec_queue.pending_removal);
mutex_unlock(&xef->exec_queue.lock);
+
if (XE_IOCTL_DBG(xe, !q))
return -ENOENT;