diff options
Diffstat (limited to 'drivers/gpu/drm/xe/xe_oa.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_oa.c | 1144 |
1 files changed, 663 insertions, 481 deletions
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 2804f14f8f29..fb842fa0552e 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -12,11 +12,12 @@ #include <drm/drm_managed.h> #include <uapi/drm/xe_drm.h> +#include <generated/xe_wa_oob.h> + #include "abi/guc_actions_slpc_abi.h" #include "instructions/xe_mi_commands.h" #include "regs/xe_engine_regs.h" #include "regs/xe_gt_regs.h" -#include "regs/xe_lrc_layout.h" #include "regs/xe_oa_regs.h" #include "xe_assert.h" #include "xe_bb.h" @@ -28,7 +29,6 @@ #include "xe_gt_mcr.h" #include "xe_gt_printk.h" #include "xe_guc_pc.h" -#include "xe_lrc.h" #include "xe_macros.h" #include "xe_mmio.h" #include "xe_oa.h" @@ -36,11 +36,23 @@ #include "xe_pm.h" #include "xe_sched_job.h" #include "xe_sriov.h" +#include "xe_sync.h" +#include "xe_wa.h" #define DEFAULT_POLL_FREQUENCY_HZ 200 #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) #define XE_OA_UNIT_INVALID U32_MAX +enum xe_oa_submit_deps { + XE_OA_SUBMIT_NO_DEPS, + XE_OA_SUBMIT_ADD_DEPS, +}; + +enum xe_oa_user_extn_from { + XE_OA_USER_EXTN_FROM_OPEN, + XE_OA_USER_EXTN_FROM_CONFIG, +}; + struct xe_oa_reg { struct xe_reg addr; u32 value; @@ -63,13 +75,8 @@ struct xe_oa_config { struct rcu_head rcu; }; -struct flex { - struct xe_reg reg; - u32 offset; - u32 value; -}; - struct xe_oa_open_param { + struct xe_file *xef; u32 oa_unit_id; bool sample; u32 metric_set; @@ -81,6 +88,11 @@ struct xe_oa_open_param { struct xe_exec_queue *exec_q; struct xe_hw_engine *hwe; bool no_preempt; + struct drm_xe_sync __user *syncs_user; + int num_syncs; + struct xe_sync_entry *syncs; + size_t oa_buffer_size; + int wait_num_reports; }; struct xe_oa_config_bo { @@ -90,6 +102,17 @@ struct xe_oa_config_bo { struct xe_bb *bb; }; +struct xe_oa_fence { + /* @base: dma fence base */ + struct dma_fence base; + /* @lock: lock for the fence */ + spinlock_t lock; + /* @work: work to signal @base */ + struct delayed_work work; + /* @cb: callback to schedule @work */ + struct dma_fence_cb cb; +}; + #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x static const struct xe_oa_format oa_formats[] = { @@ -162,10 +185,10 @@ static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_se return oa_config; } -static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo) +static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *last_fence) { xe_oa_config_put(oa_bo->oa_config); - xe_bb_free(oa_bo->bb, NULL); + xe_bb_free(oa_bo->bb, last_fence); kfree(oa_bo); } @@ -176,7 +199,7 @@ static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) { - return xe_mmio_read32(stream->gt, __oa_regs(stream)->oa_tail_ptr) & + return xe_mmio_read32(&stream->gt->mmio, __oa_regs(stream)->oa_tail_ptr) & OAG_OATAILPTR_MASK; } @@ -214,11 +237,9 @@ static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); + u32 tail, hw_tail, partial_report_size, available; int report_size = stream->oa_buffer.format->size; - u32 tail, hw_tail; unsigned long flags; - bool pollin; - u32 partial_report_size; spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); @@ -262,12 +283,12 @@ static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) stream->oa_buffer.tail = tail; - pollin = xe_oa_circ_diff(stream, stream->oa_buffer.tail, - stream->oa_buffer.head) >= report_size; + available = xe_oa_circ_diff(stream, stream->oa_buffer.tail, stream->oa_buffer.head); + stream->pollin = available >= stream->wait_num_reports * report_size; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - return pollin; + return stream->pollin; } static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) @@ -275,10 +296,8 @@ static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) struct xe_oa_stream *stream = container_of(hrtimer, typeof(*stream), poll_check_timer); - if (xe_oa_buffer_check_unlocked(stream)) { - stream->pollin = true; + if (xe_oa_buffer_check_unlocked(stream)) wake_up(&stream->poll_wq); - } hrtimer_forward_now(hrtimer, ns_to_ktime(stream->poll_period_ns)); @@ -366,7 +385,7 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr; spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - xe_mmio_write32(stream->gt, oaheadptr, + xe_mmio_write32(&stream->gt->mmio, oaheadptr, (head + gtt_offset) & OAG_OAHEADPTR_MASK); stream->oa_buffer.head = head; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); @@ -378,21 +397,30 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) { u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT; + int size_exponent = __ffs(stream->oa_buffer.bo->size); + u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT; + struct xe_mmio *mmio = &stream->gt->mmio; unsigned long flags; + /* + * If oa buffer size is more than 16MB (exponent greater than 24), the + * oa buffer size field is multiplied by 8 in xe_oa_enable_metric_set. + */ + oa_buf |= REG_FIELD_PREP(OABUFFER_SIZE_MASK, + size_exponent > 24 ? size_exponent - 20 : size_exponent - 17); + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_status, 0); - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr, + xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0); + xe_mmio_write32(mmio, __oa_regs(stream)->oa_head_ptr, gtt_offset & OAG_OAHEADPTR_MASK); stream->oa_buffer.head = 0; /* * PRM says: "This MMIO must be set before the OATAILPTR register and after the * OAHEADPTR register. This is to enable proper functionality of the overflow bit". */ - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_buffer, oa_buf); - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_tail_ptr, + xe_mmio_write32(mmio, __oa_regs(stream)->oa_buffer, oa_buf); + xe_mmio_write32(mmio, __oa_regs(stream)->oa_tail_ptr, gtt_offset & OAG_OATAILPTR_MASK); /* Mark that we need updated tail pointer to read from */ @@ -424,6 +452,12 @@ static u32 __oa_ccs_select(struct xe_oa_stream *stream) return val; } +static u32 __oactrl_used_bits(struct xe_oa_stream *stream) +{ + return stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ? + OAG_OACONTROL_USED_BITS : OAM_OACONTROL_USED_BITS; +} + static void xe_oa_enable(struct xe_oa_stream *stream) { const struct xe_oa_format *format = stream->oa_buffer.format; @@ -444,21 +478,23 @@ static void xe_oa_enable(struct xe_oa_stream *stream) stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) val |= OAG_OACONTROL_OA_PES_DISAG_EN; - xe_mmio_write32(stream->gt, regs->oa_ctrl, val); + xe_mmio_rmw32(&stream->gt->mmio, regs->oa_ctrl, __oactrl_used_bits(stream), val); } static void xe_oa_disable(struct xe_oa_stream *stream) { - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, 0); - if (xe_mmio_wait32(stream->gt, __oa_regs(stream)->oa_ctrl, + struct xe_mmio *mmio = &stream->gt->mmio; + + xe_mmio_rmw32(mmio, __oa_regs(stream)->oa_ctrl, __oactrl_used_bits(stream), 0); + if (xe_mmio_wait32(mmio, __oa_regs(stream)->oa_ctrl, OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false)) drm_err(&stream->oa->xe->drm, "wait for OA to be disabled timed out\n"); if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) { /* <= XE_METEORLAKE except XE_PVC */ - xe_mmio_write32(stream->gt, OA_TLB_INV_CR, 1); - if (xe_mmio_wait32(stream->gt, OA_TLB_INV_CR, 1, 0, 50000, NULL, false)) + xe_mmio_write32(mmio, OA_TLB_INV_CR, 1); + if (xe_mmio_wait32(mmio, OA_TLB_INV_CR, 1, 0, 50000, NULL, false)) drm_err(&stream->oa->xe->drm, "wait for OA tlb invalidate timed out\n"); } @@ -481,7 +517,7 @@ static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf, size_t count, size_t *offset) { /* Only clear our bits to avoid side-effects */ - stream->oa_status = xe_mmio_rmw32(stream->gt, __oa_regs(stream)->oa_status, + stream->oa_status = xe_mmio_rmw32(&stream->gt->mmio, __oa_regs(stream)->oa_status, OASTATUS_RELEVANT_BITS, 0); /* * Signal to userspace that there is non-zero OA status to read via @@ -515,6 +551,7 @@ static ssize_t xe_oa_read(struct file *file, char __user *buf, mutex_unlock(&stream->stream_lock); } while (!offset && !ret); } else { + xe_oa_buffer_check_unlocked(stream); mutex_lock(&stream->stream_lock); ret = __xe_oa_read(stream, buf, count, &offset); mutex_unlock(&stream->stream_lock); @@ -567,32 +604,60 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait) return ret; } -static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb) +static void xe_oa_lock_vma(struct xe_exec_queue *q) { + if (q->vm) { + down_read(&q->vm->lock); + xe_vm_lock(q->vm, false); + } +} + +static void xe_oa_unlock_vma(struct xe_exec_queue *q) +{ + if (q->vm) { + xe_vm_unlock(q->vm); + up_read(&q->vm->lock); + } +} + +static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps, + struct xe_bb *bb) +{ + struct xe_exec_queue *q = stream->exec_q ?: stream->k_exec_q; struct xe_sched_job *job; struct dma_fence *fence; - long timeout; int err = 0; - /* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */ - job = xe_bb_create_job(stream->k_exec_q, bb); + xe_oa_lock_vma(q); + + job = xe_bb_create_job(q, bb); if (IS_ERR(job)) { err = PTR_ERR(job); goto exit; } + job->ggtt = true; + + if (deps == XE_OA_SUBMIT_ADD_DEPS) { + for (int i = 0; i < stream->num_syncs && !err; i++) + err = xe_sync_entry_add_deps(&stream->syncs[i], job); + if (err) { + drm_dbg(&stream->oa->xe->drm, "xe_sync_entry_add_deps err %d\n", err); + goto err_put_job; + } + } xe_sched_job_arm(job); fence = dma_fence_get(&job->drm.s_fence->finished); xe_sched_job_push(job); - timeout = dma_fence_wait_timeout(fence, false, HZ); - dma_fence_put(fence); - if (timeout < 0) - err = timeout; - else if (!timeout) - err = -ETIME; + xe_oa_unlock_vma(q); + + return fence; +err_put_job: + xe_sched_job_put(job); exit: - return err; + xe_oa_unlock_vma(q); + return ERR_PTR(err); } static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs) @@ -636,57 +701,34 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream) xe_oa_config_put(stream->oa_config); llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) - free_oa_config_bo(oa_bo); -} - -static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc, - struct xe_bb *bb, const struct flex *flex, u32 count) -{ - u32 offset = xe_bo_ggtt_addr(lrc->bo); - - do { - bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); - bb->cs[bb->len++] = offset + flex->offset * sizeof(u32); - bb->cs[bb->len++] = 0; - bb->cs[bb->len++] = flex->value; - - } while (flex++, --count); + free_oa_config_bo(oa_bo, stream->last_fence); + dma_fence_put(stream->last_fence); } -static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc, - const struct flex *flex, u32 count) +static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri, u32 count) { + struct dma_fence *fence; struct xe_bb *bb; int err; - bb = xe_bb_new(stream->gt, 4 * count, false); + bb = xe_bb_new(stream->gt, 2 * count + 1, false); if (IS_ERR(bb)) { err = PTR_ERR(bb); goto exit; } - xe_oa_store_flex(stream, lrc, bb, flex, count); - - err = xe_oa_submit_bb(stream, bb); - xe_bb_free(bb, NULL); -exit: - return err; -} - -static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri) -{ - struct xe_bb *bb; - int err; + write_cs_mi_lri(bb, reg_lri, count); - bb = xe_bb_new(stream->gt, 3, false); - if (IS_ERR(bb)) { - err = PTR_ERR(bb); - goto exit; + fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto free_bb; } + xe_bb_free(bb, fence); + dma_fence_put(fence); - write_cs_mi_lri(bb, reg_lri, 1); - - err = xe_oa_submit_bb(stream, bb); + return 0; +free_bb: xe_bb_free(bb, NULL); exit: return err; @@ -695,70 +737,55 @@ exit: static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) { const struct xe_oa_format *format = stream->oa_buffer.format; - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); - struct flex regs_context[] = { + struct xe_oa_reg reg_lri[] = { { OACTXCONTROL(stream->hwe->mmio_base), - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, enable ? OA_COUNTER_RESUME : 0, }, { + OAR_OACONTROL, + oacontrol, + }, + { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), - regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE), + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) }, }; - struct xe_oa_reg reg_lri = { OAR_OACONTROL, oacontrol }; - int err; - /* Modify stream hwe context image with regs_context */ - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], - regs_context, ARRAY_SIZE(regs_context)); - if (err) - return err; - - /* Apply reg_lri using LRI */ - return xe_oa_load_with_lri(stream, ®_lri); + return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); } static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) { const struct xe_oa_format *format = stream->oa_buffer.format; - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 regs_offset = xe_lrc_regs_offset(lrc) / sizeof(u32); u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); - struct flex regs_context[] = { + struct xe_oa_reg reg_lri[] = { { OACTXCONTROL(stream->hwe->mmio_base), - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] + 1, enable ? OA_COUNTER_RESUME : 0, }, { + OAC_OACONTROL, + oacontrol + }, + { RING_CONTEXT_CONTROL(stream->hwe->mmio_base), - regs_offset + CTX_CONTEXT_CONTROL, - _MASKED_BIT_ENABLE(CTX_CTRL_OAC_CONTEXT_ENABLE) | + _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, + enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0), }, }; - struct xe_oa_reg reg_lri = { OAC_OACONTROL, oacontrol }; - int err; /* Set ccs select to enable programming of OAC_OACONTROL */ - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream)); + xe_mmio_write32(&stream->gt->mmio, __oa_regs(stream)->oa_ctrl, + __oa_ccs_select(stream)); - /* Modify stream hwe context image with regs_context */ - err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0], - regs_context, ARRAY_SIZE(regs_context)); - if (err) - return err; - - /* Apply reg_lri using LRI */ - return xe_oa_load_with_lri(stream, ®_lri); + return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); } static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable) @@ -785,20 +812,18 @@ static u32 oag_configure_mmio_trigger(const struct xe_oa_stream *stream, bool en static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) { + struct xe_mmio *mmio = &stream->gt->mmio; u32 sqcnt1; - /* - * Wa_1508761755:xehpsdv, dg2 - * Enable thread stall DOP gating and EU DOP gating. - */ - if (stream->oa->xe->info.platform == XE_DG2) { + /* Enable thread stall DOP gating and EU DOP gating. */ + if (XE_WA(stream->gt, 1508761755)) { xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE)); xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); } - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug, + xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug, oag_configure_mmio_trigger(stream, false)); /* disable the context save/restore or OAR counters */ @@ -806,13 +831,13 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) xe_oa_configure_oa_context(stream, false); /* Make sure we disable noa to save power. */ - xe_mmio_rmw32(stream->gt, RPM_CONFIG1, GT_NOA_ENABLE, 0); + xe_mmio_rmw32(mmio, RPM_CONFIG1, GT_NOA_ENABLE, 0); sqcnt1 = SQCNT1_PMON_ENABLE | (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); /* Reset PMON Enable to save power. */ - xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, sqcnt1, 0); + xe_mmio_rmw32(mmio, XELPMP_SQCNT1, sqcnt1, 0); } static void xe_oa_stream_destroy(struct xe_oa_stream *stream) @@ -832,7 +857,7 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) xe_oa_free_oa_buffer(stream); - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); xe_pm_runtime_put(stream->oa->xe); /* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ @@ -840,17 +865,15 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream) xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); xe_oa_free_configs(stream); + xe_file_put(stream->xef); } -static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) +static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) { struct xe_bo *bo; - BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE); - BUILD_BUG_ON(XE_OA_BUFFER_SIZE < SZ_128K || XE_OA_BUFFER_SIZE > SZ_16M); - bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL, - XE_OA_BUFFER_SIZE, ttm_bo_type_kernel, + size, ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); if (IS_ERR(bo)) return PTR_ERR(bo); @@ -910,11 +933,62 @@ out: return oa_bo; } +static void xe_oa_update_last_fence(struct xe_oa_stream *stream, struct dma_fence *fence) +{ + dma_fence_put(stream->last_fence); + stream->last_fence = dma_fence_get(fence); +} + +static void xe_oa_fence_work_fn(struct work_struct *w) +{ + struct xe_oa_fence *ofence = container_of(w, typeof(*ofence), work.work); + + /* Signal fence to indicate new OA configuration is active */ + dma_fence_signal(&ofence->base); + dma_fence_put(&ofence->base); +} + +static void xe_oa_config_cb(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + /* Additional empirical delay needed for NOA programming after registers are written */ +#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 + + struct xe_oa_fence *ofence = container_of(cb, typeof(*ofence), cb); + + INIT_DELAYED_WORK(&ofence->work, xe_oa_fence_work_fn); + queue_delayed_work(system_unbound_wq, &ofence->work, + usecs_to_jiffies(NOA_PROGRAM_ADDITIONAL_DELAY_US)); + dma_fence_put(fence); +} + +static const char *xe_oa_get_driver_name(struct dma_fence *fence) +{ + return "xe_oa"; +} + +static const char *xe_oa_get_timeline_name(struct dma_fence *fence) +{ + return "unbound"; +} + +static const struct dma_fence_ops xe_oa_fence_ops = { + .get_driver_name = xe_oa_get_driver_name, + .get_timeline_name = xe_oa_get_timeline_name, +}; + static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config) { #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 struct xe_oa_config_bo *oa_bo; - int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US; + struct xe_oa_fence *ofence; + int i, err, num_signal = 0; + struct dma_fence *fence; + + ofence = kzalloc(sizeof(*ofence), GFP_KERNEL); + if (!ofence) { + err = -ENOMEM; + goto exit; + } oa_bo = xe_oa_alloc_config_buffer(stream, config); if (IS_ERR(oa_bo)) { @@ -922,11 +996,50 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config goto exit; } - err = xe_oa_submit_bb(stream, oa_bo->bb); + /* Emit OA configuration batch */ + fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb); + if (IS_ERR(fence)) { + err = PTR_ERR(fence); + goto exit; + } - /* Additional empirical delay needed for NOA programming after registers are written */ - usleep_range(us, 2 * us); + /* Point of no return: initialize and set fence to signal */ + spin_lock_init(&ofence->lock); + dma_fence_init(&ofence->base, &xe_oa_fence_ops, &ofence->lock, 0, 0); + + for (i = 0; i < stream->num_syncs; i++) { + if (stream->syncs[i].flags & DRM_XE_SYNC_FLAG_SIGNAL) + num_signal++; + xe_sync_entry_signal(&stream->syncs[i], &ofence->base); + } + + /* Additional dma_fence_get in case we dma_fence_wait */ + if (!num_signal) + dma_fence_get(&ofence->base); + + /* Update last fence too before adding callback */ + xe_oa_update_last_fence(stream, fence); + + /* Add job fence callback to schedule work to signal ofence->base */ + err = dma_fence_add_callback(fence, &ofence->cb, xe_oa_config_cb); + xe_gt_assert(stream->gt, !err || err == -ENOENT); + if (err == -ENOENT) + xe_oa_config_cb(fence, &ofence->cb); + + /* If nothing needs to be signaled we wait synchronously */ + if (!num_signal) { + dma_fence_wait(&ofence->base, false); + dma_fence_put(&ofence->base); + } + + /* Done with syncs */ + for (i = 0; i < stream->num_syncs; i++) + xe_sync_entry_cleanup(&stream->syncs[i]); + kfree(stream->syncs); + + return 0; exit: + kfree(ofence); return err; } @@ -938,17 +1051,24 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) 0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); } +static u32 oag_buf_size_select(const struct xe_oa_stream *stream) +{ + return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT, + stream->oa_buffer.bo->size > SZ_16M ? + OAG_OA_DEBUG_BUF_SIZE_SELECT : 0); +} + static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) { + struct xe_mmio *mmio = &stream->gt->mmio; u32 oa_debug, sqcnt1; int ret; /* - * Wa_1508761755:xehpsdv, dg2 * EU NOA signals behave incorrectly if EU clock gating is enabled. * Disable thread stall DOP gating and EU DOP gating. */ - if (stream->oa->xe->info.platform == XE_DG2) { + if (XE_WA(stream->gt, 1508761755)) { xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN, _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2, @@ -966,12 +1086,13 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL | OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL; - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug, + xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug, _MASKED_BIT_ENABLE(oa_debug) | oag_report_ctx_switches(stream) | + oag_buf_size_select(stream) | oag_configure_mmio_trigger(stream, true)); - xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? + xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? (OAG_OAGLBCTXCTRL_COUNTER_RESUME | OAG_OAGLBCTXCTRL_TIMER_ENABLE | REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK, @@ -985,7 +1106,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) sqcnt1 = SQCNT1_PMON_ENABLE | (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); - xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, 0, sqcnt1); + xe_mmio_rmw32(mmio, XELPMP_SQCNT1, 0, sqcnt1); /* Configure OAR/OAC */ if (stream->exec_q) { @@ -997,6 +1118,288 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) return xe_oa_emit_oa_config(stream, stream->oa_config); } +static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) +{ + u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); + u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); + u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); + u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); + int idx; + + for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { + const struct xe_oa_format *f = &oa->oa_formats[idx]; + + if (counter_size == f->counter_size && bc_report == f->bc_report && + type == f->type && counter_sel == f->counter_select) { + *name = idx; + return 0; + } + } + + return -EINVAL; +} + +static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (value >= oa->oa_unit_ids) { + drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); + return -EINVAL; + } + param->oa_unit_id = value; + return 0; +} + +static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->sample = value; + return 0; +} + +static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->metric_set = value; + return 0; +} + +static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + int ret = decode_oa_format(oa, value, ¶m->oa_format); + + if (ret) { + drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); + return ret; + } + return 0; +} + +static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ +#define OA_EXPONENT_MAX 31 + + if (value > OA_EXPONENT_MAX) { + drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); + return -EINVAL; + } + param->period_exponent = value; + return 0; +} + +static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->disabled = value; + return 0; +} + +static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->exec_queue_id = value; + return 0; +} + +static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->engine_instance = value; + return 0; +} + +static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->no_preempt = value; + return 0; +} + +static int xe_oa_set_prop_num_syncs(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->num_syncs = value; + return 0; +} + +static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + param->syncs_user = u64_to_user_ptr(value); + return 0; +} + +static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (!is_power_of_2(value) || value < SZ_128K || value > SZ_128M) { + drm_dbg(&oa->xe->drm, "OA buffer size invalid %llu\n", value); + return -EINVAL; + } + param->oa_buffer_size = value; + return 0; +} + +static int xe_oa_set_prop_wait_num_reports(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + if (!value) { + drm_dbg(&oa->xe->drm, "wait_num_reports %llu\n", value); + return -EINVAL; + } + param->wait_num_reports = value; + return 0; +} + +static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param) +{ + return -EINVAL; +} + +typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, + struct xe_oa_open_param *param); +static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = { + [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, + [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, + [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, + [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, + [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, + [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, + [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, + [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, + [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, + [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, + [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, + [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size, + [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_wait_num_reports, +}; + +static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { + [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, + [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, + [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, + [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval, + [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_ret_inval, +}; + +static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from, + u64 extension, struct xe_oa_open_param *param) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_ext_set_property ext; + int err; + u32 idx; + + err = copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + BUILD_BUG_ON(ARRAY_SIZE(xe_oa_set_property_funcs_open) != + ARRAY_SIZE(xe_oa_set_property_funcs_config)); + + if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs_open)) || + XE_IOCTL_DBG(oa->xe, ext.pad)) + return -EINVAL; + + idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs_open)); + + if (from == XE_OA_USER_EXTN_FROM_CONFIG) + return xe_oa_set_property_funcs_config[idx](oa, ext.value, param); + else + return xe_oa_set_property_funcs_open[idx](oa, ext.value, param); +} + +typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, enum xe_oa_user_extn_from from, + u64 extension, struct xe_oa_open_param *param); +static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { + [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, +}; + +#define MAX_USER_EXTENSIONS 16 +static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from from, u64 extension, + int ext_number, struct xe_oa_open_param *param) +{ + u64 __user *address = u64_to_user_ptr(extension); + struct drm_xe_user_extension ext; + int err; + u32 idx; + + if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) + return -E2BIG; + + err = copy_from_user(&ext, address, sizeof(ext)); + if (XE_IOCTL_DBG(oa->xe, err)) + return -EFAULT; + + if (XE_IOCTL_DBG(oa->xe, ext.pad) || + XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) + return -EINVAL; + + idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); + err = xe_oa_user_extension_funcs[idx](oa, from, extension, param); + if (XE_IOCTL_DBG(oa->xe, err)) + return err; + + if (ext.next_extension) + return xe_oa_user_extensions(oa, from, ext.next_extension, ++ext_number, param); + + return 0; +} + +static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param) +{ + int ret, num_syncs, num_ufence = 0; + + if (param->num_syncs && !param->syncs_user) { + drm_dbg(&oa->xe->drm, "num_syncs specified without sync array\n"); + ret = -EINVAL; + goto exit; + } + + if (param->num_syncs) { + param->syncs = kcalloc(param->num_syncs, sizeof(*param->syncs), GFP_KERNEL); + if (!param->syncs) { + ret = -ENOMEM; + goto exit; + } + } + + for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) { + ret = xe_sync_entry_parse(oa->xe, param->xef, ¶m->syncs[num_syncs], + ¶m->syncs_user[num_syncs], 0); + if (ret) + goto err_syncs; + + if (xe_sync_is_ufence(¶m->syncs[num_syncs])) + num_ufence++; + } + + if (XE_IOCTL_DBG(oa->xe, num_ufence > 1)) { + ret = -EINVAL; + goto err_syncs; + } + + return 0; + +err_syncs: + while (num_syncs--) + xe_sync_entry_cleanup(¶m->syncs[num_syncs]); + kfree(param->syncs); +exit: + return ret; +} + static void xe_oa_stream_enable(struct xe_oa_stream *stream) { stream->pollin = false; @@ -1090,36 +1493,38 @@ static int xe_oa_disable_locked(struct xe_oa_stream *stream) static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) { - struct drm_xe_ext_set_property ext; + struct xe_oa_open_param param = {}; long ret = stream->oa_config->id; struct xe_oa_config *config; int err; - err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext)); - if (XE_IOCTL_DBG(stream->oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) || - XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) || - XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) || - XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET)) - return -EINVAL; + err = xe_oa_user_extensions(stream->oa, XE_OA_USER_EXTN_FROM_CONFIG, arg, 0, ¶m); + if (err) + return err; - config = xe_oa_get_oa_config(stream->oa, ext.value); + config = xe_oa_get_oa_config(stream->oa, param.metric_set); if (!config) return -ENODEV; - if (config != stream->oa_config) { - err = xe_oa_emit_oa_config(stream, config); - if (!err) - config = xchg(&stream->oa_config, config); - else - ret = err; + param.xef = stream->xef; + err = xe_oa_parse_syncs(stream->oa, ¶m); + if (err) + goto err_config_put; + + stream->num_syncs = param.num_syncs; + stream->syncs = param.syncs; + + err = xe_oa_emit_oa_config(stream, config); + if (!err) { + config = xchg(&stream->oa_config, config); + drm_dbg(&stream->oa->xe->drm, "changed to oa config uuid=%s\n", + stream->oa_config->uuid); } +err_config_put: xe_oa_config_put(config); - return ret; + return err ?: ret; } static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) @@ -1145,7 +1550,7 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) { - struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, }; + struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, }; void __user *uaddr = (void __user *)arg; if (copy_to_user(uaddr, &info, sizeof(info))) @@ -1206,9 +1611,11 @@ static int xe_oa_release(struct inode *inode, struct file *file) struct xe_oa_stream *stream = file->private_data; struct xe_gt *gt = stream->gt; + xe_pm_runtime_get(gt_to_xe(gt)); mutex_lock(>->oa.gt_lock); xe_oa_destroy_locked(stream); mutex_unlock(>->oa.gt_lock); + xe_pm_runtime_put(gt_to_xe(gt)); /* Release the reference the OA stream kept on the driver */ drm_dev_put(>_to_xe(gt)->drm); @@ -1229,7 +1636,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) } /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ - if (vma->vm_end - vma->vm_start != XE_OA_BUFFER_SIZE) { + if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) { drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); return -EINVAL; } @@ -1267,86 +1674,12 @@ static const struct file_operations xe_oa_fops = { .mmap = xe_oa_mmap, }; -static bool engine_supports_mi_query(struct xe_hw_engine *hwe) -{ - return hwe->class == XE_ENGINE_CLASS_RENDER || - hwe->class == XE_ENGINE_CLASS_COMPUTE; -} - -static bool xe_oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end) -{ - u32 idx = *offset; - u32 len = min(MI_LRI_LEN(state[idx]) + idx, end); - bool found = false; - - idx++; - for (; idx < len; idx += 2) { - if (state[idx] == reg) { - found = true; - break; - } - } - - *offset = idx; - return found; -} - -#define IS_MI_LRI_CMD(x) (REG_FIELD_GET(MI_OPCODE, (x)) == \ - REG_FIELD_GET(MI_OPCODE, MI_LOAD_REGISTER_IMM)) - -static u32 xe_oa_context_image_offset(struct xe_oa_stream *stream, u32 reg) -{ - struct xe_lrc *lrc = stream->exec_q->lrc[0]; - u32 len = (xe_gt_lrc_size(stream->gt, stream->hwe->class) + - lrc->ring.size) / sizeof(u32); - u32 offset = xe_lrc_regs_offset(lrc) / sizeof(u32); - u32 *state = (u32 *)lrc->bo->vmap.vaddr; - - if (drm_WARN_ON(&stream->oa->xe->drm, !state)) - return U32_MAX; - - for (; offset < len; ) { - if (IS_MI_LRI_CMD(state[offset])) { - /* - * We expect reg-value pairs in MI_LRI command, so - * MI_LRI_LEN() should be even - */ - drm_WARN_ON(&stream->oa->xe->drm, - MI_LRI_LEN(state[offset]) & 0x1); - - if (xe_oa_find_reg_in_lri(state, reg, &offset, len)) - break; - } else { - offset++; - } - } - - return offset < len ? offset : U32_MAX; -} - -static int xe_oa_set_ctx_ctrl_offset(struct xe_oa_stream *stream) -{ - struct xe_reg reg = OACTXCONTROL(stream->hwe->mmio_base); - u32 offset = stream->oa->ctx_oactxctrl_offset[stream->hwe->class]; - - /* Do this only once. Failure is stored as offset of U32_MAX */ - if (offset) - goto exit; - - offset = xe_oa_context_image_offset(stream, reg.addr); - stream->oa->ctx_oactxctrl_offset[stream->hwe->class] = offset; - - drm_dbg(&stream->oa->xe->drm, "%s oa ctx control at 0x%08x dword offset\n", - stream->hwe->name, offset); -exit: - return offset && offset != U32_MAX ? 0 : -ENODEV; -} - static int xe_oa_stream_init(struct xe_oa_stream *stream, struct xe_oa_open_param *param) { struct xe_oa_unit *u = param->hwe->oa_unit; struct xe_gt *gt = param->hwe->gt; + unsigned int fw_ref; int ret; stream->exec_q = param->exec_q; @@ -1356,9 +1689,14 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; stream->sample = param->sample; - stream->periodic = param->period_exponent > 0; + stream->periodic = param->period_exponent >= 0; stream->period_exponent = param->period_exponent; stream->no_preempt = param->no_preempt; + stream->wait_num_reports = param->wait_num_reports; + + stream->xef = xe_file_get(param->xef); + stream->num_syncs = param->num_syncs; + stream->syncs = param->syncs; /* * For Xe2+, when overrun mode is enabled, there are no partial reports at the end @@ -1368,20 +1706,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, if (GRAPHICS_VER(stream->oa->xe) >= 20 && stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) stream->oa_buffer.circ_size = - XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size; + param->oa_buffer_size - + param->oa_buffer_size % stream->oa_buffer.format->size; else - stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE; - - if (stream->exec_q && engine_supports_mi_query(stream->hwe)) { - /* If we don't find the context offset, just return error */ - ret = xe_oa_set_ctx_ctrl_offset(stream); - if (ret) { - drm_err(&stream->oa->xe->drm, - "xe_oa_set_ctx_ctrl_offset failed for %s\n", - stream->hwe->name); - goto exit; - } - } + stream->oa_buffer.circ_size = param->oa_buffer_size; stream->oa_config = xe_oa_get_oa_config(stream->oa, param->metric_set); if (!stream->oa_config) { @@ -1391,12 +1719,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, } /* - * Wa_1509372804:pvc - * * GuC reset of engines causes OA to lose configuration * state. Prevent this by overriding GUCRC mode. */ - if (stream->oa->xe->info.platform == XE_PVC) { + if (XE_WA(stream->gt, 1509372804)) { ret = xe_guc_pc_override_gucrc_mode(>->uc.guc.pc, SLPC_GUCRC_MODE_GUCRC_NO_RC6); if (ret) @@ -1407,9 +1733,13 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, /* Take runtime pm ref and forcewake to disable RC6 */ xe_pm_runtime_get(stream->oa->xe); - XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) { + ret = -ETIMEDOUT; + goto err_fw_put; + } - ret = xe_oa_alloc_oa_buffer(stream); + ret = xe_oa_alloc_oa_buffer(stream, param->oa_buffer_size); if (ret) goto err_fw_put; @@ -1434,8 +1764,8 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, WRITE_ONCE(u->exclusive_stream, stream); - hrtimer_init(&stream->poll_check_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - stream->poll_check_timer.function = xe_oa_poll_check_timer_cb; + hrtimer_setup(&stream->poll_check_timer, xe_oa_poll_check_timer_cb, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); init_waitqueue_head(&stream->poll_wq); spin_lock_init(&stream->oa_buffer.ptr_lock); @@ -1449,13 +1779,14 @@ err_put_k_exec_q: err_free_oa_buf: xe_oa_free_oa_buffer(stream); err_fw_put: - XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); + xe_force_wake_put(gt_to_fw(gt), fw_ref); xe_pm_runtime_put(stream->oa->xe); if (stream->override_gucrc) xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); err_free_configs: xe_oa_free_configs(stream); exit: + xe_file_put(stream->xef); return ret; } @@ -1523,23 +1854,14 @@ u32 xe_oa_timestamp_frequency(struct xe_gt *gt) { u32 reg, shift; - /* - * Wa_18013179988:dg2 - * Wa_14015568240:pvc - * Wa_14015846243:mtl - */ - switch (gt_to_xe(gt)->info.platform) { - case XE_DG2: - case XE_PVC: - case XE_METEORLAKE: + if (XE_WA(gt, 18013179988) || XE_WA(gt, 14015568240)) { xe_pm_runtime_get(gt_to_xe(gt)); - reg = xe_mmio_read32(gt, RPM_CONFIG0); + reg = xe_mmio_read32(>->mmio, RPM_CONFIG0); xe_pm_runtime_put(gt_to_xe(gt)); shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); return gt->info.reference_clock << (3 - shift); - - default: + } else { return gt->info.reference_clock; } } @@ -1565,27 +1887,6 @@ static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type) } } -static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) -{ - u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); - u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); - u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); - u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); - int idx; - - for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { - const struct xe_oa_format *f = &oa->oa_formats[idx]; - - if (counter_size == f->counter_size && bc_report == f->bc_report && - type == f->type && counter_sel == f->counter_select) { - *name = idx; - return 0; - } - } - - return -EINVAL; -} - /** * xe_oa_unit_id - Return OA unit ID for a hardware engine * @hwe: @xe_hw_engine @@ -1632,155 +1933,6 @@ out: return ret; } -static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - if (value >= oa->oa_unit_ids) { - drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value); - return -EINVAL; - } - param->oa_unit_id = value; - return 0; -} - -static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->sample = value; - return 0; -} - -static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->metric_set = value; - return 0; -} - -static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - int ret = decode_oa_format(oa, value, ¶m->oa_format); - - if (ret) { - drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value); - return ret; - } - return 0; -} - -static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ -#define OA_EXPONENT_MAX 31 - - if (value > OA_EXPONENT_MAX) { - drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX); - return -EINVAL; - } - param->period_exponent = value; - return 0; -} - -static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->disabled = value; - return 0; -} - -static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->exec_queue_id = value; - return 0; -} - -static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->engine_instance = value; - return 0; -} - -static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param) -{ - param->no_preempt = value; - return 0; -} - -typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, - struct xe_oa_open_param *param); -static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = { - [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, - [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, - [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, - [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, - [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, - [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, - [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, - [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, - [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, -}; - -static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension, - struct xe_oa_open_param *param) -{ - u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_ext_set_property ext; - int err; - u32 idx; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) || - XE_IOCTL_DBG(oa->xe, ext.pad)) - return -EINVAL; - - idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs)); - return xe_oa_set_property_funcs[idx](oa, ext.value, param); -} - -typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension, - struct xe_oa_open_param *param); -static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { - [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, -}; - -#define MAX_USER_EXTENSIONS 16 -static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number, - struct xe_oa_open_param *param) -{ - u64 __user *address = u64_to_user_ptr(extension); - struct drm_xe_user_extension ext; - int err; - u32 idx; - - if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) - return -E2BIG; - - err = __copy_from_user(&ext, address, sizeof(ext)); - if (XE_IOCTL_DBG(oa->xe, err)) - return -EFAULT; - - if (XE_IOCTL_DBG(oa->xe, ext.pad) || - XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) - return -EINVAL; - - idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); - err = xe_oa_user_extension_funcs[idx](oa, extension, param); - if (XE_IOCTL_DBG(oa->xe, err)) - return err; - - if (ext.next_extension) - return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param); - - return 0; -} - /** * xe_oa_stream_open_ioctl - Opens an OA stream * @dev: @drm_device @@ -1806,7 +1958,9 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f return -ENODEV; } - ret = xe_oa_user_extensions(oa, data, 0, ¶m); + param.xef = xef; + param.period_exponent = -1; + ret = xe_oa_user_extensions(oa, XE_OA_USER_EXTN_FROM_OPEN, data, 0, ¶m); if (ret) return ret; @@ -1815,8 +1969,8 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) return -ENOENT; - if (param.exec_q->width > 1) - drm_dbg(&oa->xe->drm, "exec_q->width > 1, programming only exec_q->lrc[0]\n"); + if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) + return -EOPNOTSUPP; } /* @@ -1860,7 +2014,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f goto err_exec_q; } - if (param.period_exponent > 0) { + if (param.period_exponent >= 0) { u64 oa_period, oa_freq_hz; /* Requesting samples from OAG buffer is a privileged operation */ @@ -1874,11 +2028,35 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); } + if (!param.oa_buffer_size) + param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE; + + if (!param.wait_num_reports) + param.wait_num_reports = 1; + if (param.wait_num_reports > param.oa_buffer_size / f->size) { + drm_dbg(&oa->xe->drm, "wait_num_reports %d\n", param.wait_num_reports); + ret = -EINVAL; + goto err_exec_q; + } + + ret = xe_oa_parse_syncs(oa, ¶m); + if (ret) + goto err_exec_q; + mutex_lock(¶m.hwe->gt->oa.gt_lock); ret = xe_oa_stream_open_ioctl_locked(oa, ¶m); mutex_unlock(¶m.hwe->gt->oa.gt_lock); + if (ret < 0) + goto err_sync_cleanup; + + return ret; + +err_sync_cleanup: + while (param.num_syncs--) + xe_sync_entry_cleanup(¶m.syncs[param.num_syncs]); + kfree(param.syncs); err_exec_q: - if (ret < 0 && param.exec_q) + if (param.exec_q) xe_exec_queue_put(param.exec_q); return ret; } @@ -1978,6 +2156,7 @@ static const struct xe_mmio_range xe2_oa_mux_regs[] = { { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ + { .start = 0xD0E0, .end = 0xD0F4 }, /* VISACTL */ { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ @@ -2042,6 +2221,7 @@ addr_err: kfree(oa_regs); return ERR_PTR(err); } +ALLOW_ERROR_INJECTION(xe_oa_alloc_regs, ERRNO); static ssize_t show_dynamic_id(struct kobject *kobj, struct kobj_attribute *attr, @@ -2101,7 +2281,7 @@ int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *fi return -EACCES; } - err = __copy_from_user(¶m, u64_to_user_ptr(data), sizeof(param)); + err = copy_from_user(¶m, u64_to_user_ptr(data), sizeof(param)); if (XE_IOCTL_DBG(oa->xe, err)) return -EFAULT; @@ -2234,36 +2414,36 @@ err_unlock: return ret; } +static void xe_oa_unregister(void *arg) +{ + struct xe_oa *oa = arg; + + if (!oa->metrics_kobj) + return; + + kobject_put(oa->metrics_kobj); + oa->metrics_kobj = NULL; +} + /** * xe_oa_register - Xe OA registration * @xe: @xe_device * * Exposes the metrics sysfs directory upon completion of module initialization */ -void xe_oa_register(struct xe_device *xe) +int xe_oa_register(struct xe_device *xe) { struct xe_oa *oa = &xe->oa; if (!oa->xe) - return; + return 0; oa->metrics_kobj = kobject_create_and_add("metrics", &xe->drm.primary->kdev->kobj); -} - -/** - * xe_oa_unregister - Xe OA de-registration - * @xe: @xe_device - */ -void xe_oa_unregister(struct xe_device *xe) -{ - struct xe_oa *oa = &xe->oa; - if (!oa->metrics_kobj) - return; + return -ENOMEM; - kobject_put(oa->metrics_kobj); - oa->metrics_kobj = NULL; + return devm_add_action_or_reset(xe->drm.dev, xe_oa_unregister, oa); } static u32 num_oa_units_per_gt(struct xe_gt *gt) @@ -2348,8 +2528,10 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt) u->type = DRM_XE_OA_UNIT_TYPE_OAM; } + xe_mmio_write32(>->mmio, u->regs.oa_ctrl, 0); + /* Ensure MMIO trigger remains disabled till there is a stream */ - xe_mmio_write32(gt, u->regs.oa_debug, + xe_mmio_write32(>->mmio, u->regs.oa_debug, oag_configure_mmio_trigger(NULL, false)); /* Set oa_unit_ids now to ensure ids remain contiguous */ @@ -2450,6 +2632,27 @@ static void xe_oa_init_supported_formats(struct xe_oa *oa) } } +static int destroy_config(int id, void *p, void *data) +{ + xe_oa_config_put(p); + + return 0; +} + +static void xe_oa_fini(void *arg) +{ + struct xe_device *xe = arg; + struct xe_oa *oa = &xe->oa; + + if (!oa->xe) + return; + + idr_for_each(&oa->metrics_idr, destroy_config, oa); + idr_destroy(&oa->metrics_idr); + + oa->xe = NULL; +} + /** * xe_oa_init - OA initialization during device probe * @xe: @xe_device @@ -2481,31 +2684,10 @@ int xe_oa_init(struct xe_device *xe) } xe_oa_init_supported_formats(oa); - return 0; -exit: - oa->xe = NULL; - return ret; -} -static int destroy_config(int id, void *p, void *data) -{ - xe_oa_config_put(p); - return 0; -} - -/** - * xe_oa_fini - OA de-initialization during device remove - * @xe: @xe_device - */ -void xe_oa_fini(struct xe_device *xe) -{ - struct xe_oa *oa = &xe->oa; - - if (!oa->xe) - return; - - idr_for_each(&oa->metrics_idr, destroy_config, oa); - idr_destroy(&oa->metrics_idr); + return devm_add_action_or_reset(xe->drm.dev, xe_oa_fini, xe); +exit: oa->xe = NULL; + return ret; } |