diff options
94 files changed, 4488 insertions, 1889 deletions
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index dc850b4b6e21..26a22f529e9b 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -1716,11 +1716,15 @@ int drm_atomic_check_only(struct drm_atomic_state *state) } } - if (config->funcs->atomic_check) + if (config->funcs->atomic_check) { ret = config->funcs->atomic_check(state->dev, state); - if (ret) - return ret; + if (ret) { + DRM_DEBUG_ATOMIC("atomic driver check for %p failed: %d\n", + state, ret); + return ret; + } + } if (!state->allow_modeset) { for_each_new_crtc_in_state(state, crtc, crtc_state, i) { diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 9bee52a949a9..4c6adae23e18 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -18,6 +18,10 @@ subdir-ccflags-y += $(call cc-disable-warning, type-limits) subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) subdir-ccflags-y += $(call cc-disable-warning, implicit-fallthrough) subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) +# clang warnings +subdir-ccflags-y += $(call cc-disable-warning, sign-compare) +subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) +subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror # Fine grained warnings disable @@ -67,11 +71,11 @@ i915-y += i915_cmd_parser.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ - i915_gem_timeline.o \ i915_gem_userptr.o \ i915_gemfs.o \ i915_query.o \ i915_request.o \ + i915_timeline.o \ i915_trace_points.o \ i915_vma.o \ intel_breadcrumbs.o \ @@ -154,7 +158,8 @@ i915-y += dvo_ch7017.o \ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o i915-$(CONFIG_DRM_I915_SELFTEST) += \ selftests/i915_random.o \ - selftests/i915_selftest.o + selftests/i915_selftest.o \ + selftests/igt_flush_test.o # virtual gpu code i915-y += i915_vgpu.o diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index d85939bd7b47..718ca08f9575 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -813,15 +813,31 @@ static inline bool is_force_nonpriv_mmio(unsigned int offset) } static int force_nonpriv_reg_handler(struct parser_exec_state *s, - unsigned int offset, unsigned int index) + unsigned int offset, unsigned int index, char *cmd) { struct intel_gvt *gvt = s->vgpu->gvt; - unsigned int data = cmd_val(s, index + 1); + unsigned int data; + u32 ring_base; + u32 nopid; + struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; + + if (!strcmp(cmd, "lri")) + data = cmd_val(s, index + 1); + else { + gvt_err("Unexpected forcenonpriv 0x%x write from cmd %s\n", + offset, cmd); + return -EINVAL; + } + + ring_base = dev_priv->engine[s->ring_id]->mmio_base; + nopid = i915_mmio_reg_offset(RING_NOPID(ring_base)); - if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data)) { + if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data) && + data != nopid) { gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n", offset, data); - return -EPERM; + patch_value(s, cmd_ptr(s, index), nopid); + return 0; } return 0; } @@ -869,7 +885,7 @@ static int cmd_reg_handler(struct parser_exec_state *s, return -EINVAL; if (is_force_nonpriv_mmio(offset) && - force_nonpriv_reg_handler(s, offset, index)) + force_nonpriv_reg_handler(s, offset, index, cmd)) return -EPERM; if (offset == i915_mmio_reg_offset(DERRMR) || @@ -1604,7 +1620,8 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s) if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) { /* BDW decides privilege based on address space */ - if (cmd_val(s, 0) & (1 << 8)) + if (cmd_val(s, 0) & (1 << 8) && + !(s->vgpu->scan_nonprivbb & (1 << s->ring_id))) return 0; } return 1; @@ -1618,6 +1635,8 @@ static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) bool bb_end = false; struct intel_vgpu *vgpu = s->vgpu; u32 cmd; + struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ? + s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm; *bb_size = 0; @@ -1629,18 +1648,22 @@ static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) cmd = cmd_val(s, 0); info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", - cmd, get_opcode(cmd, s->ring_id)); + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n", + cmd, get_opcode(cmd, s->ring_id), + (s->buf_addr_type == PPGTT_BUFFER) ? + "ppgtt" : "ggtt", s->ring_id, s->workload); return -EBADRQC; } do { - if (copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, + if (copy_gma_to_hva(s->vgpu, mm, gma, gma + 4, &cmd) < 0) return -EFAULT; info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", - cmd, get_opcode(cmd, s->ring_id)); + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n", + cmd, get_opcode(cmd, s->ring_id), + (s->buf_addr_type == PPGTT_BUFFER) ? + "ppgtt" : "ggtt", s->ring_id, s->workload); return -EBADRQC; } @@ -1666,6 +1689,9 @@ static int perform_bb_shadow(struct parser_exec_state *s) unsigned long gma = 0; unsigned long bb_size; int ret = 0; + struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ? + s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm; + unsigned long gma_start_offset = 0; /* get the start gm address of the batch buffer */ gma = get_gma_bb_from_cmd(s, 1); @@ -1680,8 +1706,24 @@ static int perform_bb_shadow(struct parser_exec_state *s) if (!bb) return -ENOMEM; + bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true; + + /* the gma_start_offset stores the batch buffer's start gma's + * offset relative to page boundary. so for non-privileged batch + * buffer, the shadowed gem object holds exactly the same page + * layout as original gem object. This is for the convience of + * replacing the whole non-privilged batch buffer page to this + * shadowed one in PPGTT at the same gma address. (this replacing + * action is not implemented yet now, but may be necessary in + * future). + * for prileged batch buffer, we just change start gma address to + * that of shadowed page. + */ + if (bb->ppgtt) + gma_start_offset = gma & ~I915_GTT_PAGE_MASK; + bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv, - roundup(bb_size, PAGE_SIZE)); + roundup(bb_size + gma_start_offset, PAGE_SIZE)); if (IS_ERR(bb->obj)) { ret = PTR_ERR(bb->obj); goto err_free_bb; @@ -1702,9 +1744,9 @@ static int perform_bb_shadow(struct parser_exec_state *s) bb->clflush &= ~CLFLUSH_BEFORE; } - ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, + ret = copy_gma_to_hva(s->vgpu, mm, gma, gma + bb_size, - bb->va); + bb->va + gma_start_offset); if (ret < 0) { gvt_vgpu_err("fail to copy guest ring buffer\n"); ret = -EFAULT; @@ -1730,7 +1772,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) * buffer's gma in pair. After all, we don't want to pin the shadow * buffer here (too early). */ - s->ip_va = bb->va; + s->ip_va = bb->va + gma_start_offset; s->ip_gma = gma; return 0; err_unmap: @@ -2469,15 +2511,18 @@ static int cmd_parser_exec(struct parser_exec_state *s) info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", - cmd, get_opcode(cmd, s->ring_id)); + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n", + cmd, get_opcode(cmd, s->ring_id), + (s->buf_addr_type == PPGTT_BUFFER) ? + "ppgtt" : "ggtt", s->ring_id, s->workload); return -EBADRQC; } s->info = info; trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va, - cmd_length(s), s->buf_type); + cmd_length(s), s->buf_type, s->buf_addr_type, + s->workload, info->name); if (info->handler) { ret = info->handler(s); diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c index f7d0078eb61b..2ec89bcb59f1 100644 --- a/drivers/gpu/drm/i915/gvt/debugfs.c +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -124,6 +124,68 @@ static int vgpu_mmio_diff_show(struct seq_file *s, void *unused) } DEFINE_SHOW_ATTRIBUTE(vgpu_mmio_diff); +static int +vgpu_scan_nonprivbb_get(void *data, u64 *val) +{ + struct intel_vgpu *vgpu = (struct intel_vgpu *)data; + *val = vgpu->scan_nonprivbb; + return 0; +} + +/* + * set/unset bit engine_id of vgpu->scan_nonprivbb to turn on/off scanning + * of non-privileged batch buffer. e.g. + * if vgpu->scan_nonprivbb=3, then it will scan non-privileged batch buffer + * on engine 0 and 1. + */ +static int +vgpu_scan_nonprivbb_set(void *data, u64 val) +{ + struct intel_vgpu *vgpu = (struct intel_vgpu *)data; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + enum intel_engine_id id; + char buf[128], *s; + int len; + + val &= (1 << I915_NUM_ENGINES) - 1; + + if (vgpu->scan_nonprivbb == val) + return 0; + + if (!val) + goto done; + + len = sprintf(buf, + "gvt: vgpu %d turns on non-privileged batch buffers scanning on Engines:", + vgpu->id); + + s = buf + len; + + for (id = 0; id < I915_NUM_ENGINES; id++) { + struct intel_engine_cs *engine; + + engine = dev_priv->engine[id]; + if (engine && (val & (1 << id))) { + len = snprintf(s, 4, "%d, ", engine->id); + s += len; + } else + val &= ~(1 << id); + } + + if (val) + sprintf(s, "low performance expected."); + + pr_warn("%s\n", buf); + +done: + vgpu->scan_nonprivbb = val; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops, + vgpu_scan_nonprivbb_get, vgpu_scan_nonprivbb_set, + "0x%llx\n"); + /** * intel_gvt_debugfs_add_vgpu - register debugfs entries for a vGPU * @vgpu: a vGPU @@ -151,6 +213,11 @@ int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) if (!ent) return -ENOMEM; + ent = debugfs_create_file("scan_nonprivbb", 0644, vgpu->debugfs, + vgpu, &vgpu_scan_nonprivbb_fops); + if (!ent) + return -ENOMEM; + return 0; } diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index efacd8abbedc..05d15a095310 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -99,7 +99,6 @@ struct intel_vgpu_fence { struct intel_vgpu_mmio { void *vreg; void *sreg; - bool disable_warn_untrack; }; #define INTEL_GVT_MAX_BAR_NUM 4 @@ -226,6 +225,7 @@ struct intel_vgpu { struct completion vblank_done; + u32 scan_nonprivbb; }; /* validating GM healthy status*/ diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index a33c1c3e4a21..4b6532fb789a 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -191,6 +191,8 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, unsigned int max_fence = vgpu_fence_sz(vgpu); if (fence_num >= max_fence) { + gvt_vgpu_err("access oob fence reg %d/%d\n", + fence_num, max_fence); /* When guest access oob fence regs without access * pv_info first, we treat guest not supporting GVT, @@ -200,11 +202,6 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST); - if (!vgpu->mmio.disable_warn_untrack) { - gvt_vgpu_err("found oob fence register access\n"); - gvt_vgpu_err("total fence %d, access fence %d\n", - max_fence, fence_num); - } memset(p_data, 0, bytes); return -EINVAL; } @@ -477,22 +474,28 @@ static int force_nonpriv_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { u32 reg_nonpriv = *(u32 *)p_data; + int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); + u32 ring_base; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ret = -EINVAL; - if ((bytes != 4) || ((offset & (bytes - 1)) != 0)) { - gvt_err("vgpu(%d) Invalid FORCE_NONPRIV offset %x(%dB)\n", - vgpu->id, offset, bytes); + if ((bytes != 4) || ((offset & (bytes - 1)) != 0) || ring_id < 0) { + gvt_err("vgpu(%d) ring %d Invalid FORCE_NONPRIV offset %x(%dB)\n", + vgpu->id, ring_id, offset, bytes); return ret; } - if (in_whitelist(reg_nonpriv)) { + ring_base = dev_priv->engine[ring_id]->mmio_base; + + if (in_whitelist(reg_nonpriv) || + reg_nonpriv == i915_mmio_reg_offset(RING_NOPID(ring_base))) { ret = intel_vgpu_default_mmio_write(vgpu, offset, p_data, bytes); - } else { - gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x\n", - vgpu->id, reg_nonpriv); - } - return ret; + } else + gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x at offset %x\n", + vgpu->id, reg_nonpriv, offset); + + return 0; } static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, @@ -3092,9 +3095,7 @@ int intel_vgpu_mmio_reg_rw(struct intel_vgpu *vgpu, unsigned int offset, */ mmio_info = find_mmio_info(gvt, offset); if (!mmio_info) { - if (!vgpu->mmio.disable_warn_untrack) - gvt_vgpu_err("untracked MMIO %08x len %d\n", - offset, bytes); + gvt_dbg_mmio("untracked MMIO %08x len %d\n", offset, bytes); goto default_rw; } diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 11b71b33f1c0..e4960aff68bd 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -244,8 +244,6 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr) /* set the bit 0:2(Core C-State ) to C0 */ vgpu_vreg_t(vgpu, GEN6_GT_CORE_STATUS) = 0; - - vgpu->mmio.disable_warn_untrack = false; } else { #define GVT_GEN8_MMIO_RESET_OFFSET (0x44200) /* only reset the engine related, so starting with 0x44200 diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index a5bac83d53a9..0f949554d118 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -448,7 +448,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id) { - u32 *reg_state = ctx->engine[ring_id].lrc_reg_state; + u32 *reg_state = ctx->__engine[ring_id].lrc_reg_state; u32 inhibit_mask = _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 75b7bc7b344c..d053cbe1dc94 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -53,7 +53,6 @@ struct vgpu_sched_data { bool active; ktime_t sched_in_time; - ktime_t sched_out_time; ktime_t sched_time; ktime_t left_ts; ktime_t allocated_ts; @@ -66,17 +65,22 @@ struct gvt_sched_data { struct hrtimer timer; unsigned long period; struct list_head lru_runq_head; + ktime_t expire_time; }; -static void vgpu_update_timeslice(struct intel_vgpu *pre_vgpu) +static void vgpu_update_timeslice(struct intel_vgpu *vgpu, ktime_t cur_time) { ktime_t delta_ts; - struct vgpu_sched_data *vgpu_data = pre_vgpu->sched_data; + struct vgpu_sched_data *vgpu_data; - delta_ts = vgpu_data->sched_out_time - vgpu_data->sched_in_time; + if (!vgpu || vgpu == vgpu->gvt->idle_vgpu) + return; - vgpu_data->sched_time += delta_ts; - vgpu_data->left_ts -= delta_ts; + vgpu_data = vgpu->sched_data; + delta_ts = ktime_sub(cur_time, vgpu_data->sched_in_time); + vgpu_data->sched_time = ktime_add(vgpu_data->sched_time, delta_ts); + vgpu_data->left_ts = ktime_sub(vgpu_data->left_ts, delta_ts); + vgpu_data->sched_in_time = cur_time; } #define GVT_TS_BALANCE_PERIOD_MS 100 @@ -150,11 +154,7 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) } cur_time = ktime_get(); - if (scheduler->current_vgpu) { - vgpu_data = scheduler->current_vgpu->sched_data; - vgpu_data->sched_out_time = cur_time; - vgpu_update_timeslice(scheduler->current_vgpu); - } + vgpu_update_timeslice(scheduler->current_vgpu, cur_time); vgpu_data = scheduler->next_vgpu->sched_data; vgpu_data->sched_in_time = cur_time; @@ -226,17 +226,22 @@ out: void intel_gvt_schedule(struct intel_gvt *gvt) { struct gvt_sched_data *sched_data = gvt->scheduler.sched_data; - static uint64_t timer_check; + ktime_t cur_time; mutex_lock(&gvt->lock); + cur_time = ktime_get(); if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED, (void *)&gvt->service_request)) { - if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS)) + if (cur_time >= sched_data->expire_time) { gvt_balance_timeslice(sched_data); + sched_data->expire_time = ktime_add_ms( + cur_time, GVT_TS_BALANCE_PERIOD_MS); + } } clear_bit(INTEL_GVT_REQUEST_EVENT_SCHED, (void *)&gvt->service_request); + vgpu_update_timeslice(gvt->scheduler.current_vgpu, cur_time); tbs_sched_func(sched_data); mutex_unlock(&gvt->lock); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 638abe84857c..c2d183b91500 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -58,7 +58,7 @@ static void update_shadow_pdps(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; @@ -97,7 +97,7 @@ static void sr_oa_regs(struct intel_vgpu_workload *workload, i915_mmio_reg_offset(EU_PERF_CNTL6), }; - if (!workload || !reg_state || workload->ring_id != RCS) + if (workload->ring_id != RCS) return; if (save) { @@ -130,7 +130,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *dst; @@ -283,7 +283,7 @@ static int shadow_context_status_change(struct notifier_block *nb, static void shadow_context_descriptor_update(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); u64 desc = 0; desc = ce->lrc_desc; @@ -389,7 +389,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) * shadow_ctx pages invalid. So gvt need to pin itself. After update * the guest context, gvt can unpin the shadow_ctx safely. */ - ring = engine->context_pin(engine, shadow_ctx); + ring = intel_context_pin(shadow_ctx, engine); if (IS_ERR(ring)) { ret = PTR_ERR(ring); gvt_vgpu_err("fail to pin shadow context\n"); @@ -403,7 +403,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) return 0; err_unpin: - engine->context_unpin(engine, shadow_ctx); + intel_context_unpin(shadow_ctx, engine); err_shadow: release_shadow_wa_ctx(&workload->wa_ctx); err_scan: @@ -437,7 +437,7 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) return 0; err_unpin: - engine->context_unpin(engine, shadow_ctx); + intel_context_unpin(shadow_ctx, engine); release_shadow_wa_ctx(&workload->wa_ctx); return ret; } @@ -452,12 +452,6 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) int ret; list_for_each_entry(bb, &workload->shadow_bb, list) { - bb->vma = i915_gem_object_ggtt_pin(bb->obj, NULL, 0, 0, 0); - if (IS_ERR(bb->vma)) { - ret = PTR_ERR(bb->vma); - goto err; - } - /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va * is only updated into ring_scan_buffer, not real ring address * allocated in later copy_workload_to_ring_buffer. pls be noted @@ -469,25 +463,53 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) bb->bb_start_cmd_va = workload->shadow_ring_buffer_va + bb->bb_offset; - /* relocate shadow batch buffer */ - bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); - if (gmadr_bytes == 8) - bb->bb_start_cmd_va[2] = 0; + if (bb->ppgtt) { + /* for non-priv bb, scan&shadow is only for + * debugging purpose, so the content of shadow bb + * is the same as original bb. Therefore, + * here, rather than switch to shadow bb's gma + * address, we directly use original batch buffer's + * gma address, and send original bb to hardware + * directly + */ + if (bb->clflush & CLFLUSH_AFTER) { + drm_clflush_virt_range(bb->va, + bb->obj->base.size); + bb->clflush &= ~CLFLUSH_AFTER; + } + i915_gem_obj_finish_shmem_access(bb->obj); + bb->accessing = false; + + } else { + bb->vma = i915_gem_object_ggtt_pin(bb->obj, + NULL, 0, 0, 0); + if (IS_ERR(bb->vma)) { + ret = PTR_ERR(bb->vma); + goto err; + } - /* No one is going to touch shadow bb from now on. */ - if (bb->clflush & CLFLUSH_AFTER) { - drm_clflush_virt_range(bb->va, bb->obj->base.size); - bb->clflush &= ~CLFLUSH_AFTER; - } + /* relocate shadow batch buffer */ + bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); + if (gmadr_bytes == 8) + bb->bb_start_cmd_va[2] = 0; - ret = i915_gem_object_set_to_gtt_domain(bb->obj, false); - if (ret) - goto err; + /* No one is going to touch shadow bb from now on. */ + if (bb->clflush & CLFLUSH_AFTER) { + drm_clflush_virt_range(bb->va, + bb->obj->base.size); + bb->clflush &= ~CLFLUSH_AFTER; + } - i915_gem_obj_finish_shmem_access(bb->obj); - bb->accessing = false; + ret = i915_gem_object_set_to_gtt_domain(bb->obj, + false); + if (ret) + goto err; - i915_vma_move_to_active(bb->vma, workload->req, 0); + i915_gem_obj_finish_shmem_access(bb->obj); + bb->accessing = false; + + i915_vma_move_to_active(bb->vma, workload->req, 0); + } } return 0; err: @@ -504,7 +526,7 @@ static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) struct intel_vgpu_submission *s = &workload->vgpu->submission; struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; @@ -666,7 +688,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) ret = prepare_workload(workload); if (ret) { - engine->context_unpin(engine, shadow_ctx); + intel_context_unpin(shadow_ctx, engine); goto out; } @@ -749,7 +771,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) struct i915_gem_context *shadow_ctx = s->shadow_ctx; int ring_id = workload->ring_id; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *src; @@ -876,7 +898,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) } mutex_lock(&dev_priv->drm.struct_mutex); /* unpin shadow ctx as the shadow_ctx update is done */ - engine->context_unpin(engine, s->shadow_ctx); + intel_context_unpin(s->shadow_ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); } @@ -1134,9 +1156,6 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) if (IS_ERR(s->shadow_ctx)) return PTR_ERR(s->shadow_ctx); - if (HAS_LOGICAL_RING_PREEMPTION(vgpu->gvt->dev_priv)) - s->shadow_ctx->priority = INT_MAX; - bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload", diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 486ed57a4ad1..6c644782193e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -125,6 +125,7 @@ struct intel_vgpu_shadow_bb { unsigned int clflush; bool accessing; unsigned long bb_offset; + bool ppgtt; }; #define workload_q_head(vgpu, ring_id) \ diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index 82093f1e8612..1fd64202d74e 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -224,19 +224,25 @@ TRACE_EVENT(oos_sync, TP_printk("%s", __entry->buf) ); +#define GVT_CMD_STR_LEN 40 TRACE_EVENT(gvt_command, - TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len, - u32 buf_type), + TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, + u32 cmd_len, u32 buf_type, u32 buf_addr_type, + void *workload, char *cmd_name), - TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type), + TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type, + buf_addr_type, workload, cmd_name), TP_STRUCT__entry( __field(u8, vgpu_id) __field(u8, ring_id) __field(u32, ip_gma) __field(u32, buf_type) + __field(u32, buf_addr_type) __field(u32, cmd_len) + __field(void*, workload) __dynamic_array(u32, raw_cmd, cmd_len) + __array(char, cmd_name, GVT_CMD_STR_LEN) ), TP_fast_assign( @@ -244,17 +250,25 @@ TRACE_EVENT(gvt_command, __entry->ring_id = ring_id; __entry->ip_gma = ip_gma; __entry->buf_type = buf_type; + __entry->buf_addr_type = buf_addr_type; __entry->cmd_len = cmd_len; + __entry->workload = workload; + snprintf(__entry->cmd_name, GVT_CMD_STR_LEN, "%s", cmd_name); memcpy(__get_dynamic_array(raw_cmd), cmd_va, cmd_len * sizeof(*cmd_va)); ), - TP_printk("vgpu%d ring %d: buf_type %u, ip_gma %08x, raw cmd %s", + TP_printk("vgpu%d ring %d: address_type %u, buf_type %u, ip_gma %08x,cmd (name=%s,len=%u,raw cmd=%s), workload=%p\n", __entry->vgpu_id, __entry->ring_id, + __entry->buf_addr_type, __entry->buf_type, __entry->ip_gma, - __print_array(__get_dynamic_array(raw_cmd), __entry->cmd_len, 4)) + __entry->cmd_name, + __entry->cmd_len, + __print_array(__get_dynamic_array(raw_cmd), + __entry->cmd_len, 4), + __entry->workload) ); #define GVT_TEMP_STR_LEN 10 diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 2e6652a9bb9e..13e7b9e4a6e6 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -377,16 +377,19 @@ static void print_batch_pool_stats(struct seq_file *m, print_file_stats(m, "[k]batch pool", stats); } -static int per_file_ctx_stats(int id, void *ptr, void *data) +static int per_file_ctx_stats(int idx, void *ptr, void *data) { struct i915_gem_context *ctx = ptr; - int n; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, ctx->i915, id) { + struct intel_context *ce = to_intel_context(ctx, engine); - for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) { - if (ctx->engine[n].state) - per_file_stats(0, ctx->engine[n].state->obj, data); - if (ctx->engine[n].ring) - per_file_stats(0, ctx->engine[n].ring->vma->obj, data); + if (ce->state) + per_file_stats(0, ce->state->obj, data); + if (ce->ring) + per_file_stats(0, ce->ring->vma->obj, data); } return 0; @@ -1340,10 +1343,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) struct rb_node *rb; seq_printf(m, "%s:\n", engine->name); - seq_printf(m, "\tseqno = %x [current %x, last %x], inflight %d\n", + seq_printf(m, "\tseqno = %x [current %x, last %x]\n", engine->hangcheck.seqno, seqno[id], - intel_engine_last_submit(engine), - engine->timeline->inflight_seqnos); + intel_engine_last_submit(engine)); seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n", yesno(intel_engine_has_waiter(engine)), yesno(test_bit(engine->id, @@ -1960,7 +1962,8 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_putc(m, '\n'); for_each_engine(engine, dev_priv, id) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(ctx, engine); seq_printf(m, "%s: ", engine->name); if (ce->state) @@ -2603,6 +2606,26 @@ static const char *psr2_live_status(u32 val) return "unknown"; } +static const char *psr_sink_status(u8 val) +{ + static const char * const sink_status[] = { + "inactive", + "transition to active, capture and display", + "active, display from RFB", + "active, capture and display on sink device timings", + "transition to inactive, capture and display, timing re-sync", + "reserved", + "reserved", + "sink internal error" + }; + + val &= DP_PSR_SINK_STATE_MASK; + if (val < ARRAY_SIZE(sink_status)) + return sink_status[val]; + + return "unknown"; +} + static int i915_edp_psr_status(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -2684,12 +2707,61 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) seq_printf(m, "EDP_PSR2_STATUS: %x [%s]\n", psr2, psr2_live_status(psr2)); } + + if (dev_priv->psr.enabled) { + struct drm_dp_aux *aux = &dev_priv->psr.enabled->aux; + u8 val; + + if (drm_dp_dpcd_readb(aux, DP_PSR_STATUS, &val) == 1) + seq_printf(m, "Sink PSR status: 0x%x [%s]\n", val, + psr_sink_status(val)); + } mutex_unlock(&dev_priv->psr.lock); + if (READ_ONCE(dev_priv->psr.debug)) { + seq_printf(m, "Last attempted entry at: %lld\n", + dev_priv->psr.last_entry_attempt); + seq_printf(m, "Last exit at: %lld\n", + dev_priv->psr.last_exit); + } + + intel_runtime_pm_put(dev_priv); + return 0; +} + +static int +i915_edp_psr_debug_set(void *data, u64 val) +{ + struct drm_i915_private *dev_priv = data; + + if (!CAN_PSR(dev_priv)) + return -ENODEV; + + DRM_DEBUG_KMS("PSR debug %s\n", enableddisabled(val)); + + intel_runtime_pm_get(dev_priv); + intel_psr_irq_control(dev_priv, !!val); intel_runtime_pm_put(dev_priv); + return 0; } +static int +i915_edp_psr_debug_get(void *data, u64 *val) +{ + struct drm_i915_private *dev_priv = data; + + if (!CAN_PSR(dev_priv)) + return -ENODEV; + + *val = READ_ONCE(dev_priv->psr.debug); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_edp_psr_debug_fops, + i915_edp_psr_debug_get, i915_edp_psr_debug_set, + "%llu\n"); + static int i915_sink_crc(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -3296,6 +3368,28 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) seq_printf(m, " fp0: 0x%08x\n", pll->state.hw_state.fp0); seq_printf(m, " fp1: 0x%08x\n", pll->state.hw_state.fp1); seq_printf(m, " wrpll: 0x%08x\n", pll->state.hw_state.wrpll); + seq_printf(m, " cfgcr0: 0x%08x\n", pll->state.hw_state.cfgcr0); + seq_printf(m, " cfgcr1: 0x%08x\n", pll->state.hw_state.cfgcr1); + seq_printf(m, " mg_refclkin_ctl: 0x%08x\n", + pll->state.hw_state.mg_refclkin_ctl); + seq_printf(m, " mg_clktop2_coreclkctl1: 0x%08x\n", + pll->state.hw_state.mg_clktop2_coreclkctl1); + seq_printf(m, " mg_clktop2_hsclkctl: 0x%08x\n", + pll->state.hw_state.mg_clktop2_hsclkctl); + seq_printf(m, " mg_pll_div0: 0x%08x\n", + pll->state.hw_state.mg_pll_div0); + seq_printf(m, " mg_pll_div1: 0x%08x\n", + pll->state.hw_state.mg_pll_div1); + seq_printf(m, " mg_pll_lf: 0x%08x\n", + pll->state.hw_state.mg_pll_lf); + seq_printf(m, " mg_pll_frac_lock: 0x%08x\n", + pll->state.hw_state.mg_pll_frac_lock); + seq_printf(m, " mg_pll_ssc: 0x%08x\n", + pll->state.hw_state.mg_pll_ssc); + seq_printf(m, " mg_pll_bias: 0x%08x\n", + pll->state.hw_state.mg_pll_bias); + seq_printf(m, " mg_pll_tdc_coldst_bias: 0x%08x\n", + pll->state.hw_state.mg_pll_tdc_coldst_bias); } drm_modeset_unlock_all(dev); @@ -3304,24 +3398,13 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) static int i915_wa_registers(struct seq_file *m, void *unused) { - int i; - int ret; - struct intel_engine_cs *engine; struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; struct i915_workarounds *workarounds = &dev_priv->workarounds; - enum intel_engine_id id; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; + int i; intel_runtime_pm_get(dev_priv); seq_printf(m, "Workarounds applied: %d\n", workarounds->count); - for_each_engine(engine, dev_priv, id) - seq_printf(m, "HW whitelist count for %s: %d\n", - engine->name, workarounds->hw_whitelist_count[id]); for (i = 0; i < workarounds->count; ++i) { i915_reg_t addr; u32 mask, value, read; @@ -3337,7 +3420,6 @@ static int i915_wa_registers(struct seq_file *m, void *unused) } intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev->struct_mutex); return 0; } @@ -4177,119 +4259,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops, "0x%08llx\n"); static int -i915_max_freq_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - if (INTEL_GEN(dev_priv) < 6) - return -ENODEV; - - *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.max_freq_softlimit); - return 0; -} - -static int -i915_max_freq_set(void *data, u64 val) -{ - struct drm_i915_private *dev_priv = data; - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 hw_max, hw_min; - int ret; - - if (INTEL_GEN(dev_priv) < 6) - return -ENODEV; - - DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val); - - ret = mutex_lock_interruptible(&dev_priv->pcu_lock); - if (ret) - return ret; - - /* - * Turbo will still be enabled, but won't go above the set value. - */ - val = intel_freq_opcode(dev_priv, val); - - hw_max = rps->max_freq; - hw_min = rps->min_freq; - - if (val < hw_min || val > hw_max || val < rps->min_freq_softlimit) { - mutex_unlock(&dev_priv->pcu_lock); - return -EINVAL; - } - - rps->max_freq_softlimit = val; - - if (intel_set_rps(dev_priv, val)) - DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); - - mutex_unlock(&dev_priv->pcu_lock); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_max_freq_fops, - i915_max_freq_get, i915_max_freq_set, - "%llu\n"); - -static int -i915_min_freq_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - if (INTEL_GEN(dev_priv) < 6) - return -ENODEV; - - *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.min_freq_softlimit); - return 0; -} - -static int -i915_min_freq_set(void *data, u64 val) -{ - struct drm_i915_private *dev_priv = data; - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 hw_max, hw_min; - int ret; - - if (INTEL_GEN(dev_priv) < 6) - return -ENODEV; - - DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val); - - ret = mutex_lock_interruptible(&dev_priv->pcu_lock); - if (ret) - return ret; - - /* - * Turbo will still be enabled, but won't go below the set value. - */ - val = intel_freq_opcode(dev_priv, val); - - hw_max = rps->max_freq; - hw_min = rps->min_freq; - - if (val < hw_min || - val > hw_max || val > rps->max_freq_softlimit) { - mutex_unlock(&dev_priv->pcu_lock); - return -EINVAL; - } - - rps->min_freq_softlimit = val; - - if (intel_set_rps(dev_priv, val)) - DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); - - mutex_unlock(&dev_priv->pcu_lock); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops, - i915_min_freq_get, i915_min_freq_set, - "%llu\n"); - -static int i915_cache_sharing_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; @@ -4850,8 +4819,6 @@ static const struct i915_debugfs_files { const struct file_operations *fops; } i915_debugfs_files[] = { {"i915_wedged", &i915_wedged_fops}, - {"i915_max_freq", &i915_max_freq_fops}, - {"i915_min_freq", &i915_min_freq_fops}, {"i915_cache_sharing", &i915_cache_sharing_fops}, {"i915_ring_missed_irq", &i915_ring_missed_irq_fops}, {"i915_ring_test_irq", &i915_ring_test_irq_fops}, @@ -4874,7 +4841,8 @@ static const struct i915_debugfs_files { {"i915_guc_log_relay", &i915_guc_log_relay_fops}, {"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops}, {"i915_ipc_status", &i915_ipc_status_fops}, - {"i915_drrs_ctl", &i915_drrs_ctl_fops} + {"i915_drrs_ctl", &i915_drrs_ctl_fops}, + {"i915_edp_psr_debug", &i915_edp_psr_debug_fops} }; int i915_debugfs_register(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index c471a7528a50..9c449b8d8eab 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -101,7 +101,13 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level, __builtin_return_address(0), &vaf); if (is_error && !shown_bug_once) { - dev_notice(kdev, "%s", FDO_BUG_MSG); + /* + * Ask the user to file a bug report for the error, except + * if they may have caused the bug by fiddling with unsafe + * module parameters. + */ + if (!test_taint(TAINT_USER)) + dev_notice(kdev, "%s", FDO_BUG_MSG); shown_bug_once = true; } @@ -2468,10 +2474,13 @@ static void vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv, /* * RC6 transitioning can be delayed up to 2 msec (see * valleyview_enable_rps), use 3 msec for safety. + * + * This can fail to turn off the rc6 if the GPU is stuck after a failed + * reset and we are trying to force the machine to sleep. */ if (vlv_wait_for_pw_status(dev_priv, mask, val)) - DRM_ERROR("timeout waiting for GT wells to go %s\n", - onoff(wait_for_on)); + DRM_DEBUG_DRIVER("timeout waiting for GT wells to go %s\n", + onoff(wait_for_on)); } static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e50d9589d6e3..34c125e2d90c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -72,9 +72,10 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" #include "i915_gem_gtt.h" -#include "i915_gem_timeline.h" #include "i915_gpu_error.h" #include "i915_request.h" +#include "i915_scheduler.h" +#include "i915_timeline.h" #include "i915_vma.h" #include "intel_gvt.h" @@ -84,8 +85,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20180413" -#define DRIVER_TIMESTAMP 1523611258 +#define DRIVER_DATE "20180514" +#define DRIVER_TIMESTAMP 1526300884 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -609,6 +610,9 @@ struct i915_psr { bool has_hw_tracking; bool psr2_enabled; u8 sink_sync_latency; + bool debug; + ktime_t last_entry_attempt; + ktime_t last_exit; void (*enable_source)(struct intel_dp *, const struct intel_crtc_state *); @@ -1069,6 +1073,7 @@ struct intel_vbt_data { } edp; struct { + bool enable; bool full_link; bool require_aux_wakeup; int idle_frames; @@ -1185,6 +1190,7 @@ struct skl_ddb_allocation { /* packed/y */ struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES]; struct skl_ddb_entry uv_plane[I915_MAX_PIPES][I915_MAX_PLANES]; + u8 enabled_slices; /* GEN11 has configurable 2 slices */ }; struct skl_ddb_values { @@ -1297,7 +1303,6 @@ struct i915_wa_reg { struct i915_workarounds { struct i915_wa_reg reg[I915_MAX_WA_REGS]; u32 count; - u32 hw_whitelist_count[I915_NUM_ENGINES]; }; struct i915_virtual_gpu { @@ -2056,8 +2061,11 @@ struct drm_i915_private { void (*cleanup_engine)(struct intel_engine_cs *engine); struct list_head timelines; - struct i915_gem_timeline global_timeline; + + struct list_head active_rings; + struct list_head closed_vma; u32 active_requests; + u32 request_serial; /** * Is the GPU currently considered idle, or busy executing @@ -2462,6 +2470,15 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_CNL_REVID(p, since, until) \ (IS_CANNONLAKE(p) && IS_REVID(p, since, until)) +#define ICL_REVID_A0 0x0 +#define ICL_REVID_A2 0x1 +#define ICL_REVID_B0 0x3 +#define ICL_REVID_B2 0x4 +#define ICL_REVID_C0 0x5 + +#define IS_ICL_REVID(p, since, until) \ + (IS_ICELAKE(p) && IS_REVID(p, since, until)) + /* * The genX designation typically refers to the render engine, so render * capability related checks should use IS_GEN, while display and other checks @@ -3159,7 +3176,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj, struct intel_rps_client *rps); int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, - int priority); + const struct i915_sched_attr *attr); #define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX int __must_check @@ -3228,16 +3245,6 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) return ctx; } -static inline struct intel_timeline * -i915_gem_context_lookup_timeline(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - struct i915_address_space *vm; - - vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base; - return &vm->timeline.engine[engine->id]; -} - int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 4c9d2a6f7d28..0a2070112b66 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -141,6 +141,7 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) { lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(i915->gt.active_requests); + GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); if (!i915->gt.awake) return I915_EPOCH_INVALID; @@ -161,9 +162,10 @@ static u32 __i915_gem_park(struct drm_i915_private *i915) synchronize_irq(i915->drm.irq); intel_engines_park(i915); - i915_gem_timelines_park(i915); + i915_timelines_park(i915); i915_pmu_gt_parked(i915); + i915_vma_parked(i915); i915->gt.awake = false; @@ -564,7 +566,8 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, return timeout; } -static void __fence_set_priority(struct dma_fence *fence, int prio) +static void __fence_set_priority(struct dma_fence *fence, + const struct i915_sched_attr *attr) { struct i915_request *rq; struct intel_engine_cs *engine; @@ -575,13 +578,16 @@ static void __fence_set_priority(struct dma_fence *fence, int prio) rq = to_request(fence); engine = rq->engine; - rcu_read_lock(); + local_bh_disable(); + rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) - engine->schedule(rq, prio); + engine->schedule(rq, attr); rcu_read_unlock(); + local_bh_enable(); /* kick the tasklets if queues were reprioritised */ } -static void fence_set_priority(struct dma_fence *fence, int prio) +static void fence_set_priority(struct dma_fence *fence, + const struct i915_sched_attr *attr) { /* Recurse once into a fence-array */ if (dma_fence_is_array(fence)) { @@ -589,16 +595,16 @@ static void fence_set_priority(struct dma_fence *fence, int prio) int i; for (i = 0; i < array->num_fences; i++) - __fence_set_priority(array->fences[i], prio); + __fence_set_priority(array->fences[i], attr); } else { - __fence_set_priority(fence, prio); + __fence_set_priority(fence, attr); } } int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, - int prio) + const struct i915_sched_attr *attr) { struct dma_fence *excl; @@ -613,7 +619,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, return ret; for (i = 0; i < count; i++) { - fence_set_priority(shared[i], prio); + fence_set_priority(shared[i], attr); dma_fence_put(shared[i]); } @@ -623,7 +629,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, } if (excl) { - fence_set_priority(excl, prio); + fence_set_priority(excl, attr); dma_fence_put(excl); } return 0; @@ -2974,8 +2980,8 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ - spin_lock_irqsave(&engine->timeline->lock, flags); - list_for_each_entry(request, &engine->timeline->requests, link) { + spin_lock_irqsave(&engine->timeline.lock, flags); + list_for_each_entry(request, &engine->timeline.requests, link) { if (__i915_request_completed(request, request->global_seqno)) continue; @@ -2986,7 +2992,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) active = request; break; } - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); return active; } @@ -3107,15 +3113,15 @@ static void engine_skip_context(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct i915_gem_context *hung_ctx = request->ctx; - struct intel_timeline *timeline; + struct i915_timeline *timeline = request->timeline; unsigned long flags; - timeline = i915_gem_context_lookup_timeline(hung_ctx, engine); + GEM_BUG_ON(timeline == &engine->timeline); - spin_lock_irqsave(&engine->timeline->lock, flags); - spin_lock(&timeline->lock); + spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING); - list_for_each_entry_continue(request, &engine->timeline->requests, link) + list_for_each_entry_continue(request, &engine->timeline.requests, link) if (request->ctx == hung_ctx) skip_request(request); @@ -3123,7 +3129,7 @@ static void engine_skip_context(struct i915_request *request) skip_request(request); spin_unlock(&timeline->lock); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } /* Returns the request if it was guilty of the hang */ @@ -3180,11 +3186,11 @@ i915_gem_reset_request(struct intel_engine_cs *engine, dma_fence_set_error(&request->fence, -EAGAIN); /* Rewind the engine to replay the incomplete rq */ - spin_lock_irq(&engine->timeline->lock); + spin_lock_irq(&engine->timeline.lock); request = list_prev_entry(request, link); - if (&request->link == &engine->timeline->requests) + if (&request->link == &engine->timeline.requests) request = NULL; - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline.lock); } } @@ -3232,7 +3238,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv, stalled_mask & ENGINE_MASK(id)); ctx = fetch_and_zero(&engine->last_retired_context); if (ctx) - engine->context_unpin(engine, ctx); + intel_context_unpin(ctx, engine); /* * Ostensibily, we always want a context loaded for powersaving, @@ -3297,10 +3303,10 @@ static void nop_complete_submit_request(struct i915_request *request) request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); - spin_lock_irqsave(&request->engine->timeline->lock, flags); + spin_lock_irqsave(&request->engine->timeline.lock, flags); __i915_request_submit(request); intel_engine_init_global_seqno(request->engine, request->global_seqno); - spin_unlock_irqrestore(&request->engine->timeline->lock, flags); + spin_unlock_irqrestore(&request->engine->timeline.lock, flags); } void i915_gem_set_wedged(struct drm_i915_private *i915) @@ -3310,7 +3316,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) GEM_TRACE("start\n"); - if (drm_debug & DRM_UT_DRIVER) { + if (GEM_SHOW_DEBUG()) { struct drm_printer p = drm_debug_printer(__func__); for_each_engine(engine, i915, id) @@ -3369,10 +3375,10 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) * (lockless) lookup doesn't try and wait upon the request as we * reset it. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); intel_engine_init_global_seqno(engine, intel_engine_last_submit(engine)); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); i915_gem_reset_finish_engine(engine); } @@ -3384,8 +3390,7 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) bool i915_gem_unset_wedged(struct drm_i915_private *i915) { - struct i915_gem_timeline *tl; - int i; + struct i915_timeline *tl; lockdep_assert_held(&i915->drm.struct_mutex); if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) @@ -3404,29 +3409,27 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) * No more can be submitted until we reset the wedged bit. */ list_for_each_entry(tl, &i915->gt.timelines, link) { - for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { - struct i915_request *rq; + struct i915_request *rq; - rq = i915_gem_active_peek(&tl->engine[i].last_request, - &i915->drm.struct_mutex); - if (!rq) - continue; + rq = i915_gem_active_peek(&tl->last_request, + &i915->drm.struct_mutex); + if (!rq) + continue; - /* - * We can't use our normal waiter as we want to - * avoid recursively trying to handle the current - * reset. The basic dma_fence_default_wait() installs - * a callback for dma_fence_signal(), which is - * triggered by our nop handler (indirectly, the - * callback enables the signaler thread which is - * woken by the nop_submit_request() advancing the seqno - * and when the seqno passes the fence, the signaler - * then signals the fence waking us up). - */ - if (dma_fence_default_wait(&rq->fence, true, - MAX_SCHEDULE_TIMEOUT) < 0) - return false; - } + /* + * We can't use our normal waiter as we want to + * avoid recursively trying to handle the current + * reset. The basic dma_fence_default_wait() installs + * a callback for dma_fence_signal(), which is + * triggered by our nop handler (indirectly, the + * callback enables the signaler thread which is + * woken by the nop_submit_request() advancing the seqno + * and when the seqno passes the fence, the signaler + * then signals the fence waking us up). + */ + if (dma_fence_default_wait(&rq->fence, true, + MAX_SCHEDULE_TIMEOUT) < 0) + return false; } i915_retire_requests(i915); GEM_BUG_ON(i915->gt.active_requests); @@ -3731,17 +3734,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return ret; } -static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) +static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags) { - int ret, i; - - for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { - ret = i915_gem_active_wait(&tl->engine[i].last_request, flags); - if (ret) - return ret; - } - - return 0; + return i915_gem_active_wait(&tl->last_request, flags); } static int wait_for_engines(struct drm_i915_private *i915) @@ -3759,30 +3754,37 @@ static int wait_for_engines(struct drm_i915_private *i915) int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) { - int ret; - /* If the device is asleep, we have no requests outstanding */ if (!READ_ONCE(i915->gt.awake)) return 0; if (flags & I915_WAIT_LOCKED) { - struct i915_gem_timeline *tl; + struct i915_timeline *tl; + int err; lockdep_assert_held(&i915->drm.struct_mutex); list_for_each_entry(tl, &i915->gt.timelines, link) { - ret = wait_for_timeline(tl, flags); - if (ret) - return ret; + err = wait_for_timeline(tl, flags); + if (err) + return err; } i915_retire_requests(i915); - ret = wait_for_engines(i915); + return wait_for_engines(i915); } else { - ret = wait_for_timeline(&i915->gt.global_timeline, flags); - } + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; - return ret; + for_each_engine(engine, i915, id) { + err = wait_for_timeline(&engine->timeline, flags); + if (err) + return err; + } + + return 0; + } } static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) @@ -4796,7 +4798,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, &obj->vma_list, obj_link) { GEM_BUG_ON(i915_vma_is_active(vma)); vma->flags &= ~I915_VMA_PIN_MASK; - i915_vma_close(vma); + i915_vma_destroy(vma); } GEM_BUG_ON(!list_empty(&obj->vma_list)); GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); @@ -4951,7 +4953,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915) enum intel_engine_id id; for_each_engine(engine, i915, id) { - GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline->last_request)); + GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); GEM_BUG_ON(engine->last_retired_context != kernel_context); } } @@ -5289,7 +5291,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) for_each_engine(engine, i915, id) { struct i915_vma *state; - state = ctx->engine[id].state; + state = to_intel_context(ctx, engine)->state; if (!state) continue; @@ -5597,12 +5599,9 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) if (!dev_priv->priorities) goto err_dependencies; - mutex_lock(&dev_priv->drm.struct_mutex); INIT_LIST_HEAD(&dev_priv->gt.timelines); - err = i915_gem_timeline_init__global(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); - if (err) - goto err_priorities; + INIT_LIST_HEAD(&dev_priv->gt.active_rings); + INIT_LIST_HEAD(&dev_priv->gt.closed_vma); i915_gem_init__mm(dev_priv); @@ -5623,8 +5622,6 @@ int i915_gem_init_early(struct drm_i915_private *dev_priv) return 0; -err_priorities: - kmem_cache_destroy(dev_priv->priorities); err_dependencies: kmem_cache_destroy(dev_priv->dependencies); err_requests: @@ -5645,11 +5642,7 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); WARN_ON(dev_priv->mm.object_count); - - mutex_lock(&dev_priv->drm.struct_mutex); - i915_gem_timeline_fini(&dev_priv->gt.global_timeline); WARN_ON(!list_empty(&dev_priv->gt.timelines)); - mutex_unlock(&dev_priv->drm.struct_mutex); kmem_cache_destroy(dev_priv->priorities); kmem_cache_destroy(dev_priv->dependencies); diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index deaf78d2ae8b..525920404ede 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -30,6 +30,9 @@ struct drm_i915_private; #ifdef CONFIG_DRM_I915_DEBUG_GEM + +#define GEM_SHOW_DEBUG() (drm_debug & DRM_UT_DRIVER) + #define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ pr_err("%s:%d GEM_BUG_ON(%s)\n", \ __func__, __LINE__, __stringify(condition)); \ @@ -45,6 +48,9 @@ struct drm_i915_private; #define GEM_DEBUG_BUG_ON(expr) GEM_BUG_ON(expr) #else + +#define GEM_SHOW_DEBUG() (0) + #define GEM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) #define GEM_WARN_ON(expr) (BUILD_BUG_ON_INVALID(expr), 0) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 9b3834a846e8..33f8a4b3c981 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -117,15 +117,15 @@ static void lut_close(struct i915_gem_context *ctx) static void i915_gem_context_free(struct i915_gem_context *ctx) { - int i; + unsigned int n; lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); i915_ppgtt_put(ctx->ppgtt); - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct intel_context *ce = &ctx->engine[i]; + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { + struct intel_context *ce = &ctx->__engine[n]; if (!ce->state) continue; @@ -281,7 +281,7 @@ __create_hw_context(struct drm_i915_private *dev_priv, kref_init(&ctx->ref); list_add_tail(&ctx->link, &dev_priv->contexts.list); ctx->i915 = dev_priv; - ctx->priority = I915_PRIORITY_NORMAL; + ctx->sched.priority = I915_PRIORITY_NORMAL; INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); @@ -431,7 +431,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) return ctx; i915_gem_context_clear_bannable(ctx); - ctx->priority = prio; + ctx->sched.priority = prio; ctx->ring_size = PAGE_SIZE; GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); @@ -521,7 +521,7 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) if (!engine->last_retired_context) continue; - engine->context_unpin(engine, engine->last_retired_context); + intel_context_unpin(engine->last_retired_context, engine); engine->last_retired_context = NULL; } } @@ -577,19 +577,29 @@ void i915_gem_context_close(struct drm_file *file) idr_destroy(&file_priv->context_idr); } -static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) +static struct i915_request * +last_request_on_engine(struct i915_timeline *timeline, + struct intel_engine_cs *engine) { - struct i915_gem_timeline *timeline; + struct i915_request *rq; - list_for_each_entry(timeline, &engine->i915->gt.timelines, link) { - struct intel_timeline *tl; + if (timeline == &engine->timeline) + return NULL; - if (timeline == &engine->i915->gt.global_timeline) - continue; + rq = i915_gem_active_raw(&timeline->last_request, + &engine->i915->drm.struct_mutex); + if (rq && rq->engine == engine) + return rq; + + return NULL; +} - tl = &timeline->engine[engine->id]; - if (i915_gem_active_peek(&tl->last_request, - &engine->i915->drm.struct_mutex)) +static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) +{ + struct i915_timeline *timeline; + + list_for_each_entry(timeline, &engine->i915->gt.timelines, link) { + if (last_request_on_engine(timeline, engine)) return false; } @@ -599,7 +609,7 @@ static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - struct i915_gem_timeline *timeline; + struct i915_timeline *timeline; enum intel_engine_id id; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -619,11 +629,8 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) /* Queue this switch after all other activity */ list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { struct i915_request *prev; - struct intel_timeline *tl; - tl = &timeline->engine[engine->id]; - prev = i915_gem_active_raw(&tl->last_request, - &dev_priv->drm.struct_mutex); + prev = last_request_on_engine(timeline, engine); if (prev) i915_sw_fence_await_sw_fence_gfp(&rq->submit, &prev->submit, @@ -753,7 +760,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, args->value = i915_gem_context_is_bannable(ctx); break; case I915_CONTEXT_PARAM_PRIORITY: - args->value = ctx->priority; + args->value = ctx->sched.priority; break; default: ret = -EINVAL; @@ -826,7 +833,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, !capable(CAP_SYS_NICE)) ret = -EPERM; else - ctx->priority = priority; + ctx->sched.priority = priority; } break; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 7854262ddfd9..ace3b129c189 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -137,18 +137,7 @@ struct i915_gem_context { */ u32 user_handle; - /** - * @priority: execution and service priority - * - * All clients are equal, but some are more equal than others! - * - * Requests from a context with a greater (more positive) value of - * @priority will be executed before those with a lower @priority - * value, forming a simple QoS. - * - * The &drm_i915_private.kernel_context is assigned the lowest priority. - */ - int priority; + struct i915_sched_attr sched; /** ggtt_offset_bias: placement restriction for context objects */ u32 ggtt_offset_bias; @@ -160,7 +149,7 @@ struct i915_gem_context { u32 *lrc_reg_state; u64 lrc_desc; int pin_count; - } engine[I915_NUM_ENGINES]; + } __engine[I915_NUM_ENGINES]; /** ring_size: size for allocating the per-engine ring buffer */ u32 ring_size; @@ -267,6 +256,34 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) return !ctx->file_priv; } +static inline struct intel_context * +to_intel_context(struct i915_gem_context *ctx, + const struct intel_engine_cs *engine) +{ + return &ctx->__engine[engine->id]; +} + +static inline struct intel_ring * +intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + return engine->context_pin(engine, ctx); +} + +static inline void __intel_context_pin(struct i915_gem_context *ctx, + const struct intel_engine_cs *engine) +{ + struct intel_context *ce = to_intel_context(ctx, engine); + + GEM_BUG_ON(!ce->pin_count); + ce->pin_count++; +} + +static inline void intel_context_unpin(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + engine->context_unpin(engine, ctx); +} + /* i915_gem_context.c */ int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); void i915_gem_contexts_lost(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c74f5df3fb5a..f627a8c47c58 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -762,7 +762,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) } /* transfer ref to ctx */ - vma->open_count++; + if (!vma->open_count++) + i915_vma_reopen(vma); list_add(&lut->obj_link, &obj->lut_list); list_add(&lut->ctx_link, &eb->ctx->handles_list); lut->ctx = eb->ctx; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 21d72f695adb..996ab2ad6c45 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -110,7 +110,8 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma); static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv) { - /* Note that as an uncached mmio write, this should flush the + /* + * Note that as an uncached mmio write, this will flush the * WCB of the writes into the GGTT before it triggers the invalidate. */ I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); @@ -1161,6 +1162,27 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, vaddr[idx.pde] |= GEN8_PDE_IPS_64K; kunmap_atomic(vaddr); page_size = I915_GTT_PAGE_SIZE_64K; + + /* + * We write all 4K page entries, even when using 64K + * pages. In order to verify that the HW isn't cheating + * by using the 4K PTE instead of the 64K PTE, we want + * to remove all the surplus entries. If the HW skipped + * the 64K PTE, it will read/write into the scratch page + * instead - which we detect as missing results during + * selftests. + */ + if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { + u16 i; + + encode = pte_encode | vma->vm->scratch_page.daddr; + vaddr = kmap_atomic_px(pd->page_table[idx.pde]); + + for (i = 1; i < index; i += 16) + memset64(vaddr + i, encode, 15); + + kunmap_atomic(vaddr); + } } vma->page_sizes.gtt |= page_size; @@ -2111,8 +2133,6 @@ static void i915_address_space_init(struct i915_address_space *vm, struct drm_i915_private *dev_priv, const char *name) { - i915_gem_timeline_init(dev_priv, &vm->timeline, name); - drm_mm_init(&vm->mm, 0, vm->total); vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; @@ -2129,7 +2149,6 @@ static void i915_address_space_fini(struct i915_address_space *vm) if (pagevec_count(&vm->free_pages)) vm_free_pages_release(vm, true); - i915_gem_timeline_fini(&vm->timeline); drm_mm_takedown(&vm->mm); list_del(&vm->global_link); } @@ -2140,15 +2159,15 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) * called on driver load and after a GPU reset, so you can place * workarounds here even if they get overwritten by GPU reset. */ - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl */ + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ if (IS_BROADWELL(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); else if (IS_CHERRYVIEW(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); - else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv)) - I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); else if (IS_GEN9_LP(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); + else if (INTEL_GEN(dev_priv) >= 9) + I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); /* * To support 64K PTEs we need to first enable the use of the @@ -2222,6 +2241,12 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv, void i915_ppgtt_close(struct i915_address_space *vm) { + GEM_BUG_ON(vm->closed); + vm->closed = true; +} + +static void ppgtt_destroy_vma(struct i915_address_space *vm) +{ struct list_head *phases[] = { &vm->active_list, &vm->inactive_list, @@ -2229,15 +2254,12 @@ void i915_ppgtt_close(struct i915_address_space *vm) NULL, }, **phase; - GEM_BUG_ON(vm->closed); vm->closed = true; - for (phase = phases; *phase; phase++) { struct i915_vma *vma, *vn; list_for_each_entry_safe(vma, vn, *phase, vm_link) - if (!i915_vma_is_closed(vma)) - i915_vma_close(vma); + i915_vma_destroy(vma); } } @@ -2248,7 +2270,8 @@ void i915_ppgtt_release(struct kref *kref) trace_i915_ppgtt_release(&ppgtt->base); - /* vmas should already be unbound and destroyed */ + ppgtt_destroy_vma(&ppgtt->base); + GEM_BUG_ON(!list_empty(&ppgtt->base.active_list)); GEM_BUG_ON(!list_empty(&ppgtt->base.inactive_list)); GEM_BUG_ON(!list_empty(&ppgtt->base.unbound_list)); @@ -2417,11 +2440,9 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, for_each_sgt_dma(addr, sgt_iter, vma->pages) gen8_set_pte(gtt_entries++, pte_encode | addr); - wmb(); - - /* This next bit makes the above posting read even more important. We - * want to flush the TLBs only after we're certain all the PTE updates - * have finished. + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. */ ggtt->invalidate(vm->i915); } @@ -2459,11 +2480,10 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, dma_addr_t addr; for_each_sgt_dma(addr, iter, vma->pages) iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); - wmb(); - /* This next bit makes the above posting read even more important. We - * want to flush the TLBs only after we're certain all the PTE updates - * have finished. + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. */ ggtt->invalidate(vm->i915); } @@ -3325,14 +3345,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - - if (INTEL_GEN(dev_priv) >= 9) { - size = gen8_get_total_gtt_size(snb_gmch_ctl); - } else if (IS_CHERRYVIEW(dev_priv)) { + if (IS_CHERRYVIEW(dev_priv)) size = chv_get_total_gtt_size(snb_gmch_ctl); - } else { + else size = gen8_get_total_gtt_size(snb_gmch_ctl); - } ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; ggtt->base.cleanup = gen6_gmch_remove; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 6efc017e8bb3..aec4f73574f4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -38,10 +38,9 @@ #include <linux/mm.h> #include <linux/pagevec.h> -#include "i915_gem_timeline.h" - #include "i915_request.h" #include "i915_selftest.h" +#include "i915_timeline.h" #define I915_GTT_PAGE_SIZE_4K BIT(12) #define I915_GTT_PAGE_SIZE_64K BIT(16) @@ -257,7 +256,6 @@ struct i915_pml4 { struct i915_address_space { struct drm_mm mm; - struct i915_gem_timeline timeline; struct drm_i915_private *i915; struct device *dma; /* Every address space belongs to a struct file - except for the global @@ -344,6 +342,7 @@ struct i915_address_space { void (*clear_pages)(struct i915_vma *vma); I915_SELFTEST_DECLARE(struct fault_attr fault_attr); + I915_SELFTEST_DECLARE(bool scrub_64K); }; #define i915_is_ggtt(V) (!(V)->file) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index af915d041281..ad949cc30928 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -51,6 +51,10 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, if (!drm_mm_initialized(&dev_priv->mm.stolen)) return -ENODEV; + /* WaSkipStolenMemoryFirstPage:bdw+ */ + if (INTEL_GEN(dev_priv) >= 8 && start < 4096) + start = 4096; + mutex_lock(&dev_priv->mm.stolen_lock); ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, size, alignment, 0, @@ -343,7 +347,6 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) { resource_size_t reserved_base, stolen_top; resource_size_t reserved_total, reserved_size; - resource_size_t stolen_usable_start; mutex_init(&dev_priv->mm.stolen_lock); @@ -435,17 +438,11 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) (u64)resource_size(&dev_priv->dsm) >> 10, ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10); - stolen_usable_start = 0; - /* WaSkipStolenMemoryFirstPage:bdw+ */ - if (INTEL_GEN(dev_priv) >= 8) - stolen_usable_start = 4096; - dev_priv->stolen_usable_size = - resource_size(&dev_priv->dsm) - reserved_total - stolen_usable_start; + resource_size(&dev_priv->dsm) - reserved_total; /* Basic memrange allocator for stolen space. */ - drm_mm_init(&dev_priv->mm.stolen, stolen_usable_start, - dev_priv->stolen_usable_size); + drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->stolen_usable_size); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c deleted file mode 100644 index e9fd87604067..000000000000 --- a/drivers/gpu/drm/i915/i915_gem_timeline.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "i915_drv.h" -#include "i915_syncmap.h" - -static void __intel_timeline_init(struct intel_timeline *tl, - struct i915_gem_timeline *parent, - u64 context, - struct lock_class_key *lockclass, - const char *lockname) -{ - tl->fence_context = context; - tl->common = parent; - spin_lock_init(&tl->lock); - lockdep_set_class_and_name(&tl->lock, lockclass, lockname); - init_request_active(&tl->last_request, NULL); - INIT_LIST_HEAD(&tl->requests); - i915_syncmap_init(&tl->sync); -} - -static void __intel_timeline_fini(struct intel_timeline *tl) -{ - GEM_BUG_ON(!list_empty(&tl->requests)); - - i915_syncmap_free(&tl->sync); -} - -static int __i915_gem_timeline_init(struct drm_i915_private *i915, - struct i915_gem_timeline *timeline, - const char *name, - struct lock_class_key *lockclass, - const char *lockname) -{ - unsigned int i; - u64 fences; - - lockdep_assert_held(&i915->drm.struct_mutex); - - /* - * Ideally we want a set of engines on a single leaf as we expect - * to mostly be tracking synchronisation between engines. It is not - * a huge issue if this is not the case, but we may want to mitigate - * any page crossing penalties if they become an issue. - */ - BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); - - timeline->i915 = i915; - timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL); - if (!timeline->name) - return -ENOMEM; - - list_add(&timeline->link, &i915->gt.timelines); - - /* Called during early_init before we know how many engines there are */ - fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine)); - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) - __intel_timeline_init(&timeline->engine[i], - timeline, fences++, - lockclass, lockname); - - return 0; -} - -int i915_gem_timeline_init(struct drm_i915_private *i915, - struct i915_gem_timeline *timeline, - const char *name) -{ - static struct lock_class_key class; - - return __i915_gem_timeline_init(i915, timeline, name, - &class, "&timeline->lock"); -} - -int i915_gem_timeline_init__global(struct drm_i915_private *i915) -{ - static struct lock_class_key class; - - return __i915_gem_timeline_init(i915, - &i915->gt.global_timeline, - "[execution]", - &class, "&global_timeline->lock"); -} - -/** - * i915_gem_timelines_park - called when the driver idles - * @i915: the drm_i915_private device - * - * When the driver is completely idle, we know that all of our sync points - * have been signaled and our tracking is then entirely redundant. Any request - * to wait upon an older sync point will be completed instantly as we know - * the fence is signaled and therefore we will not even look them up in the - * sync point map. - */ -void i915_gem_timelines_park(struct drm_i915_private *i915) -{ - struct i915_gem_timeline *timeline; - int i; - - lockdep_assert_held(&i915->drm.struct_mutex); - - list_for_each_entry(timeline, &i915->gt.timelines, link) { - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) { - struct intel_timeline *tl = &timeline->engine[i]; - - /* - * All known fences are completed so we can scrap - * the current sync point tracking and start afresh, - * any attempt to wait upon a previous sync point - * will be skipped as the fence was signaled. - */ - i915_syncmap_free(&tl->sync); - } - } -} - -void i915_gem_timeline_fini(struct i915_gem_timeline *timeline) -{ - int i; - - lockdep_assert_held(&timeline->i915->drm.struct_mutex); - - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) - __intel_timeline_fini(&timeline->engine[i]); - - list_del(&timeline->link); - kfree(timeline->name); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_timeline.c" -#include "selftests/i915_gem_timeline.c" -#endif diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index d596a8302ca3..854bd51b9478 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -778,6 +778,9 @@ i915_gem_userptr_ioctl(struct drm_device *dev, I915_USERPTR_UNSYNCHRONIZED)) return -EINVAL; + if (!args->user_size) + return -EINVAL; + if (offset_in_page(args->user_ptr | args->user_size)) return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index effaf982b19b..df234dc23274 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -404,16 +404,17 @@ static const char *bannable(const struct drm_i915_error_context *ctx) static void error_print_request(struct drm_i915_error_state_buf *m, const char *prefix, - const struct drm_i915_error_request *erq) + const struct drm_i915_error_request *erq, + const unsigned long epoch) { if (!erq->seqno) return; - err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n", + err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", prefix, erq->pid, erq->ban_score, - erq->context, erq->seqno, erq->priority, - jiffies_to_msecs(jiffies - erq->jiffies), - erq->head, erq->tail); + erq->context, erq->seqno, erq->sched_attr.priority, + jiffies_to_msecs(erq->jiffies - epoch), + erq->start, erq->head, erq->tail); } static void error_print_context(struct drm_i915_error_state_buf *m, @@ -422,12 +423,13 @@ static void error_print_context(struct drm_i915_error_state_buf *m, { err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n", header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, - ctx->priority, ctx->ban_score, bannable(ctx), + ctx->sched_attr.priority, ctx->ban_score, bannable(ctx), ctx->guilty, ctx->active); } static void error_print_engine(struct drm_i915_error_state_buf *m, - const struct drm_i915_error_engine *ee) + const struct drm_i915_error_engine *ee, + const unsigned long epoch) { int n; @@ -497,14 +499,15 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, err_printf(m, " hangcheck stall: %s\n", yesno(ee->hangcheck_stalled)); err_printf(m, " hangcheck action: %s\n", hangcheck_action_to_str(ee->hangcheck_action)); - err_printf(m, " hangcheck action timestamp: %lu, %u ms ago\n", + err_printf(m, " hangcheck action timestamp: %dms (%lu%s)\n", + jiffies_to_msecs(ee->hangcheck_timestamp - epoch), ee->hangcheck_timestamp, - jiffies_to_msecs(jiffies - ee->hangcheck_timestamp)); + ee->hangcheck_timestamp == epoch ? "; epoch" : ""); err_printf(m, " engine reset count: %u\n", ee->reset_count); for (n = 0; n < ee->num_ports; n++) { err_printf(m, " ELSP[%d]:", n); - error_print_request(m, " ", &ee->execlist[n]); + error_print_request(m, " ", &ee->execlist[n], epoch); } error_print_context(m, " Active context: ", &ee->context); @@ -650,6 +653,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, ts = ktime_to_timespec64(error->uptime); err_printf(m, "Uptime: %lld s %ld us\n", (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); + err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ); + err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n", + error->capture, + jiffies_to_msecs(jiffies - error->capture), + jiffies_to_msecs(error->capture - error->epoch)); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].hangcheck_stalled && @@ -710,7 +718,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].engine_id != -1) - error_print_engine(m, &error->engine[i]); + error_print_engine(m, &error->engine[i], error->epoch); } for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) { @@ -769,7 +777,9 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, dev_priv->engine[i]->name, ee->num_requests); for (j = 0; j < ee->num_requests; j++) - error_print_request(m, " ", &ee->requests[j]); + error_print_request(m, " ", + &ee->requests[j], + error->epoch); } if (IS_ERR(ee->waiters)) { @@ -1278,10 +1288,11 @@ static void record_request(struct i915_request *request, struct drm_i915_error_request *erq) { erq->context = request->ctx->hw_id; - erq->priority = request->priotree.priority; + erq->sched_attr = request->sched.attr; erq->ban_score = atomic_read(&request->ctx->ban_score); erq->seqno = request->global_seqno; erq->jiffies = request->emitted_jiffies; + erq->start = i915_ggtt_offset(request->ring->vma); erq->head = request->head; erq->tail = request->tail; @@ -1299,7 +1310,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->timeline->requests, link) + list_for_each_entry_from(request, &engine->timeline.requests, link) count++; if (!count) return; @@ -1312,7 +1323,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->timeline->requests, link) { + list_for_each_entry_from(request, &engine->timeline.requests, link) { if (count >= ee->num_requests) { /* * If the ring request list was changed in @@ -1372,7 +1383,7 @@ static void record_context(struct drm_i915_error_context *e, e->handle = ctx->user_handle; e->hw_id = ctx->hw_id; - e->priority = ctx->priority; + e->sched_attr = ctx->sched; e->ban_score = atomic_read(&ctx->ban_score); e->bannable = i915_gem_context_is_bannable(ctx); e->guilty = atomic_read(&ctx->guilty_count); @@ -1472,7 +1483,8 @@ static void gem_record_rings(struct i915_gpu_state *error) ee->ctx = i915_error_object_create(i915, - request->ctx->engine[i].state); + to_intel_context(request->ctx, + engine)->state); error->simulated |= i915_gem_context_no_error_capture(request->ctx); @@ -1735,6 +1747,22 @@ static void capture_params(struct i915_gpu_state *error) #undef DUP } +static unsigned long capture_find_epoch(const struct i915_gpu_state *error) +{ + unsigned long epoch = error->capture; + int i; + + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + const struct drm_i915_error_engine *ee = &error->engine[i]; + + if (ee->hangcheck_stalled && + time_before(ee->hangcheck_timestamp, epoch)) + epoch = ee->hangcheck_timestamp; + } + + return epoch; +} + static int capture(void *data) { struct i915_gpu_state *error = data; @@ -1743,6 +1771,7 @@ static int capture(void *data) error->boottime = ktime_get_boottime(); error->uptime = ktime_sub(ktime_get(), error->i915->gt.last_init_time); + error->capture = jiffies; capture_params(error); capture_gen_state(error); @@ -1756,6 +1785,8 @@ static int capture(void *data) error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); + error->epoch = capture_find_epoch(error); + return 0; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index c05b6034d718..dac0f8c4c1cf 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -20,6 +20,7 @@ #include "i915_gem.h" #include "i915_gem_gtt.h" #include "i915_params.h" +#include "i915_scheduler.h" struct drm_i915_private; struct intel_overlay_error_state; @@ -30,6 +31,8 @@ struct i915_gpu_state { ktime_t time; ktime_t boottime; ktime_t uptime; + unsigned long capture; + unsigned long epoch; struct drm_i915_private *i915; @@ -122,11 +125,11 @@ struct i915_gpu_state { pid_t pid; u32 handle; u32 hw_id; - int priority; int ban_score; int active; int guilty; bool bannable; + struct i915_sched_attr sched_attr; } context; struct drm_i915_error_object { @@ -147,11 +150,12 @@ struct i915_gpu_state { long jiffies; pid_t pid; u32 context; - int priority; int ban_score; u32 seqno; + u32 start; u32 head; u32 tail; + struct i915_sched_attr sched_attr; } *requests, execlist[EXECLIST_MAX_PORTS]; unsigned int num_ports; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b03d18561b55..f9bc3aaa90d0 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -247,9 +247,9 @@ static u32 gen11_gt_engine_identity(struct drm_i915_private * const i915, const unsigned int bank, const unsigned int bit); -static bool gen11_reset_one_iir(struct drm_i915_private * const i915, - const unsigned int bank, - const unsigned int bit) +bool gen11_reset_one_iir(struct drm_i915_private * const i915, + const unsigned int bank, + const unsigned int bit) { void __iomem * const regs = i915->regs; u32 dw; @@ -2464,6 +2464,13 @@ static void ivb_display_irq_handler(struct drm_i915_private *dev_priv, if (de_iir & DE_ERR_INT_IVB) ivb_err_int_handler(dev_priv); + if (de_iir & DE_EDP_PSR_INT_HSW) { + u32 psr_iir = I915_READ(EDP_PSR_IIR); + + intel_psr_irq_handler(dev_priv, psr_iir); + I915_WRITE(EDP_PSR_IIR, psr_iir); + } + if (de_iir & DE_AUX_CHANNEL_A_IVB) dp_aux_irq_handler(dev_priv); @@ -2593,11 +2600,25 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) if (master_ctl & GEN8_DE_MISC_IRQ) { iir = I915_READ(GEN8_DE_MISC_IIR); if (iir) { + bool found = false; + I915_WRITE(GEN8_DE_MISC_IIR, iir); ret = IRQ_HANDLED; - if (iir & GEN8_DE_MISC_GSE) + + if (iir & GEN8_DE_MISC_GSE) { intel_opregion_asle_intr(dev_priv); - else + found = true; + } + + if (iir & GEN8_DE_EDP_PSR) { + u32 psr_iir = I915_READ(EDP_PSR_IIR); + + intel_psr_irq_handler(dev_priv, psr_iir); + I915_WRITE(EDP_PSR_IIR, psr_iir); + found = true; + } + + if (!found) DRM_ERROR("Unexpected DE Misc interrupt\n"); } else @@ -3348,6 +3369,11 @@ static void ironlake_irq_reset(struct drm_device *dev) if (IS_GEN7(dev_priv)) I915_WRITE(GEN7_ERR_INT, 0xffffffff); + if (IS_HASWELL(dev_priv)) { + I915_WRITE(EDP_PSR_IMR, 0xffffffff); + I915_WRITE(EDP_PSR_IIR, 0xffffffff); + } + gen5_gt_irq_reset(dev_priv); ibx_irq_reset(dev_priv); @@ -3386,6 +3412,9 @@ static void gen8_irq_reset(struct drm_device *dev) gen8_gt_irq_reset(dev_priv); + I915_WRITE(EDP_PSR_IMR, 0xffffffff); + I915_WRITE(EDP_PSR_IIR, 0xffffffff); + for_each_pipe(dev_priv, pipe) if (intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe))) @@ -3762,6 +3791,12 @@ static int ironlake_irq_postinstall(struct drm_device *dev) DE_DP_A_HOTPLUG); } + if (IS_HASWELL(dev_priv)) { + gen3_assert_iir_is_zero(dev_priv, EDP_PSR_IIR); + intel_psr_irq_control(dev_priv, dev_priv->psr.debug); + display_mask |= DE_EDP_PSR_INT_HSW; + } + dev_priv->irq_mask = ~display_mask; ibx_irq_pre_postinstall(dev); @@ -3872,7 +3907,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) uint32_t de_pipe_enables; u32 de_port_masked = GEN8_AUX_CHANNEL_A; u32 de_port_enables; - u32 de_misc_masked = GEN8_DE_MISC_GSE; + u32 de_misc_masked = GEN8_DE_MISC_GSE | GEN8_DE_EDP_PSR; enum pipe pipe; if (INTEL_GEN(dev_priv) >= 9) { @@ -3897,6 +3932,9 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) else if (IS_BROADWELL(dev_priv)) de_port_enables |= GEN8_PORT_DP_A_HOTPLUG; + gen3_assert_iir_is_zero(dev_priv, EDP_PSR_IIR); + intel_psr_irq_control(dev_priv, dev_priv->psr.debug); + for_each_pipe(dev_priv, pipe) { dev_priv->de_irq_mask[pipe] = ~de_pipe_masked; diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 08108ce5be21..66ea3552c63e 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -164,6 +164,9 @@ i915_param_named_unsafe(guc_firmware_path, charp, 0400, i915_param_named_unsafe(huc_firmware_path, charp, 0400, "HuC firmware path to use instead of the default one"); +i915_param_named_unsafe(dmc_firmware_path, charp, 0400, + "DMC firmware path to use instead of the default one"); + i915_param_named_unsafe(enable_dp_mst, bool, 0600, "Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)"); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index c96360398072..6684025b7af8 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -51,6 +51,7 @@ struct drm_printer; param(int, guc_log_level, -1) \ param(char *, guc_firmware_path, NULL) \ param(char *, huc_firmware_path, NULL) \ + param(char *, dmc_firmware_path, NULL) \ param(int, mmio_debug, 0) \ param(int, edp_vswing, 0) \ param(int, reset, 2) \ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index bfc906cd4e5e..019bd2d073ad 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1234,7 +1234,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * * NB: implied RCS engine... */ - ring = engine->context_pin(engine, stream->ctx); + ring = intel_context_pin(stream->ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); if (IS_ERR(ring)) return PTR_ERR(ring); @@ -1246,7 +1246,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * with gen8+ and execlists */ dev_priv->perf.oa.specific_ctx_id = - i915_ggtt_offset(stream->ctx->engine[engine->id].state); + i915_ggtt_offset(to_intel_context(stream->ctx, engine)->state); } return 0; @@ -1271,7 +1271,7 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) mutex_lock(&dev_priv->drm.struct_mutex); dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; - engine->context_unpin(engine, stream->ctx); + intel_context_unpin(stream->ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); } @@ -1695,7 +1695,7 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr const struct i915_oa_config *oa_config) { struct intel_engine_cs *engine = dev_priv->engine[RCS]; - struct i915_gem_timeline *timeline; + struct i915_timeline *timeline; struct i915_request *rq; int ret; @@ -1716,15 +1716,11 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr /* Queue this switch after all other activity */ list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { struct i915_request *prev; - struct intel_timeline *tl; - tl = &timeline->engine[engine->id]; - prev = i915_gem_active_raw(&tl->last_request, + prev = i915_gem_active_raw(&timeline->last_request, &dev_priv->drm.struct_mutex); if (prev) - i915_sw_fence_await_sw_fence_gfp(&rq->submit, - &prev->submit, - GFP_KERNEL); + i915_request_await_dma_fence(rq, &prev->fence); } i915_request_add(rq); @@ -1759,6 +1755,7 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, const struct i915_oa_config *oa_config) { + struct intel_engine_cs *engine = dev_priv->engine[RCS]; struct i915_gem_context *ctx; int ret; unsigned int wait_flags = I915_WAIT_LOCKED; @@ -1789,7 +1786,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, /* Update all contexts now that we've stalled the submission. */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { - struct intel_context *ce = &ctx->engine[RCS]; + struct intel_context *ce = to_intel_context(ctx, engine); u32 *regs; /* OA settings will be set upon first use */ @@ -1963,11 +1960,19 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) static void gen7_oa_disable(struct drm_i915_private *dev_priv) { I915_WRITE(GEN7_OACONTROL, 0); + if (intel_wait_for_register(dev_priv, + GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0, + 50)) + DRM_ERROR("wait for OA to be disabled timed out\n"); } static void gen8_oa_disable(struct drm_i915_private *dev_priv) { I915_WRITE(GEN8_OACONTROL, 0); + if (intel_wait_for_register(dev_priv, + GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0, + 50)) + DRM_ERROR("wait for OA to be disabled timed out\n"); } /** diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index fb106026a1f4..f11bb213ec07 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3840,6 +3840,7 @@ enum { #define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) +#define MSCUNIT_CLKGATE_DIS (1 << 10) #define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) #define GWUNIT_CLKGATE_DIS (1 << 16) @@ -3847,6 +3848,9 @@ enum { #define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) #define VFUNIT_CLKGATE_DIS (1 << 20) +#define INF_UNIT_LEVEL_CLKGATE _MMIO(0x9560) +#define CGPSF_CLKGATE_DIS (1 << 3) + /* * Display engine regs */ @@ -4026,6 +4030,13 @@ enum { #define EDP_PSR_TP1_TIME_0us (3<<4) #define EDP_PSR_IDLE_FRAME_SHIFT 0 +/* Bspec claims those aren't shifted but stay at 0x64800 */ +#define EDP_PSR_IMR _MMIO(0x64834) +#define EDP_PSR_IIR _MMIO(0x64838) +#define EDP_PSR_ERROR(trans) (1 << (((trans) * 8 + 10) & 31)) +#define EDP_PSR_POST_EXIT(trans) (1 << (((trans) * 8 + 9) & 31)) +#define EDP_PSR_PRE_ENTRY(trans) (1 << (((trans) * 8 + 8) & 31)) + #define EDP_PSR_AUX_CTL _MMIO(dev_priv->psr_mmio_base + 0x10) #define EDP_PSR_AUX_CTL_TIME_OUT_MASK (3 << 26) #define EDP_PSR_AUX_CTL_MESSAGE_SIZE_MASK (0x1f << 20) @@ -4088,6 +4099,29 @@ enum { #define EDP_PSR2_IDLE_FRAME_MASK 0xf #define EDP_PSR2_IDLE_FRAME_SHIFT 0 +#define _PSR_EVENT_TRANS_A 0x60848 +#define _PSR_EVENT_TRANS_B 0x61848 +#define _PSR_EVENT_TRANS_C 0x62848 +#define _PSR_EVENT_TRANS_D 0x63848 +#define _PSR_EVENT_TRANS_EDP 0x6F848 +#define PSR_EVENT(trans) _MMIO_TRANS2(trans, _PSR_EVENT_TRANS_A) +#define PSR_EVENT_PSR2_WD_TIMER_EXPIRE (1 << 17) +#define PSR_EVENT_PSR2_DISABLED (1 << 16) +#define PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN (1 << 15) +#define PSR_EVENT_SU_CRC_FIFO_UNDERRUN (1 << 14) +#define PSR_EVENT_GRAPHICS_RESET (1 << 12) +#define PSR_EVENT_PCH_INTERRUPT (1 << 11) +#define PSR_EVENT_MEMORY_UP (1 << 10) +#define PSR_EVENT_FRONT_BUFFER_MODIFY (1 << 9) +#define PSR_EVENT_WD_TIMER_EXPIRE (1 << 8) +#define PSR_EVENT_PIPE_REGISTERS_UPDATE (1 << 6) +#define PSR_EVENT_REGISTER_UPDATE (1 << 5) +#define PSR_EVENT_HDCP_ENABLE (1 << 4) +#define PSR_EVENT_KVMR_SESSION_ENABLE (1 << 3) +#define PSR_EVENT_VBI_ENABLE (1 << 2) +#define PSR_EVENT_LPSP_MODE_EXIT (1 << 1) +#define PSR_EVENT_PSR_DISABLE (1 << 0) + #define EDP_PSR2_STATUS _MMIO(0x6f940) #define EDP_PSR2_STATUS_STATE_MASK (0xf<<28) #define EDP_PSR2_STATUS_STATE_SHIFT 28 @@ -6377,9 +6411,9 @@ enum { #define _PLANE_COLOR_CTL_1_A 0x701CC /* GLK+ */ #define _PLANE_COLOR_CTL_2_A 0x702CC /* GLK+ */ #define _PLANE_COLOR_CTL_3_A 0x703CC /* GLK+ */ -#define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) +#define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) /* Pre-ICL */ #define PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE (1 << 28) -#define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) +#define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) /* Pre-ICL */ #define PLANE_COLOR_CSC_MODE_BYPASS (0 << 17) #define PLANE_COLOR_CSC_MODE_YUV601_TO_RGB709 (1 << 17) #define PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709 (2 << 17) @@ -6474,6 +6508,9 @@ enum { #define _PLANE_BUF_CFG_1_B 0x7127c #define _PLANE_BUF_CFG_2_B 0x7137c +#define SKL_DDB_ENTRY_MASK 0x3FF +#define ICL_DDB_ENTRY_MASK 0x7FF +#define DDB_ENTRY_END_SHIFT 16 #define _PLANE_BUF_CFG_1(pipe) \ _PIPE(pipe, _PLANE_BUF_CFG_1_A, _PLANE_BUF_CFG_1_B) #define _PLANE_BUF_CFG_2(pipe) \ @@ -6837,6 +6874,7 @@ enum { #define DE_PCH_EVENT_IVB (1<<28) #define DE_DP_A_HOTPLUG_IVB (1<<27) #define DE_AUX_CHANNEL_A_IVB (1<<26) +#define DE_EDP_PSR_INT_HSW (1<<19) #define DE_SPRITEC_FLIP_DONE_IVB (1<<14) #define DE_PLANEC_FLIP_DONE_IVB (1<<13) #define DE_PIPEC_VBLANK_IVB (1<<10) @@ -6961,6 +6999,7 @@ enum { #define GEN8_DE_MISC_IIR _MMIO(0x44468) #define GEN8_DE_MISC_IER _MMIO(0x4446c) #define GEN8_DE_MISC_GSE (1 << 27) +#define GEN8_DE_EDP_PSR (1 << 19) #define GEN8_PCU_ISR _MMIO(0x444e0) #define GEN8_PCU_IMR _MMIO(0x444e4) @@ -7191,18 +7230,22 @@ enum { #define GEN7_L3CNTLREG3 _MMIO(0xB024) #define GEN7_L3_CHICKEN_MODE_REGISTER _MMIO(0xB030) -#define GEN7_WA_L3_CHICKEN_MODE 0x20000000 +#define GEN7_WA_L3_CHICKEN_MODE 0x20000000 +#define GEN10_L3_CHICKEN_MODE_REGISTER _MMIO(0xB114) +#define GEN11_I2M_WRITE_DISABLE (1 << 28) #define GEN7_L3SQCREG4 _MMIO(0xb034) #define L3SQ_URB_READ_CAM_MATCH_DISABLE (1<<27) #define GEN8_L3SQCREG4 _MMIO(0xb118) -#define GEN8_LQSC_RO_PERF_DIS (1<<27) -#define GEN8_LQSC_FLUSH_COHERENT_LINES (1<<21) +#define GEN11_LQSC_CLEAN_EVICT_DISABLE (1 << 6) +#define GEN8_LQSC_RO_PERF_DIS (1 << 27) +#define GEN8_LQSC_FLUSH_COHERENT_LINES (1 << 21) /* GEN8 chicken */ #define HDC_CHICKEN0 _MMIO(0x7300) #define CNL_HDC_CHICKEN0 _MMIO(0xE5F0) +#define ICL_HDC_MODE _MMIO(0xE5F4) #define HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE (1<<15) #define HDC_FENCE_DEST_SLM_DISABLE (1<<14) #define HDC_DONOT_FETCH_MEM_WHEN_MASKED (1<<11) @@ -7216,6 +7259,9 @@ enum { #define SLICE_ECO_CHICKEN0 _MMIO(0x7308) #define PIXEL_MASK_CAMMING_DISABLE (1 << 14) +#define GEN9_WM_CHICKEN3 _MMIO(0x5588) +#define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9) + /* WaCatErrorRejectionIssue */ #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG _MMIO(0x9030) #define GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB (1<<11) @@ -8214,8 +8260,30 @@ enum { #define GEN8_DOP_CLOCK_GATE_GUC_ENABLE (1<<4) #define GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE (1<<6) -#define GEN8_GARBCNTL _MMIO(0xB004) -#define GEN9_GAPS_TSV_CREDIT_DISABLE (1<<7) +#define GEN8_GARBCNTL _MMIO(0xB004) +#define GEN9_GAPS_TSV_CREDIT_DISABLE (1 << 7) +#define GEN11_ARBITRATION_PRIO_ORDER_MASK (0x3f << 22) +#define GEN11_HASH_CTRL_EXCL_MASK (0x7f << 0) +#define GEN11_HASH_CTRL_EXCL_BIT0 (1 << 0) + +#define GEN11_GLBLINVL _MMIO(0xB404) +#define GEN11_BANK_HASH_ADDR_EXCL_MASK (0x7f << 5) +#define GEN11_BANK_HASH_ADDR_EXCL_BIT0 (1 << 5) + +#define GEN10_DFR_RATIO_EN_AND_CHICKEN _MMIO(0x9550) +#define DFR_DISABLE (1 << 9) + +#define GEN11_GACB_PERF_CTRL _MMIO(0x4B80) +#define GEN11_HASH_CTRL_MASK (0x3 << 12 | 0xf << 0) +#define GEN11_HASH_CTRL_BIT0 (1 << 0) +#define GEN11_HASH_CTRL_BIT4 (1 << 12) + +#define GEN11_LSN_UNSLCVC _MMIO(0xB43C) +#define GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC (1 << 9) +#define GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC (1 << 7) + +#define GAMW_ECO_DEV_RW_IA_REG _MMIO(0x4080) +#define GAMW_ECO_DEV_CTX_RELOAD_DISABLE (1 << 7) /* IVYBRIDGE DPF */ #define GEN7_L3CDERRST1(slice) _MMIO(0xB008 + (slice) * 0x200) /* L3CD Error Status 1 */ @@ -8724,6 +8792,12 @@ enum skl_power_gate { #define PORT_CLK_SEL_NONE (7<<29) #define PORT_CLK_SEL_MASK (7<<29) +/* On ICL+ this is the same as PORT_CLK_SEL, but all bits change. */ +#define DDI_CLK_SEL(port) PORT_CLK_SEL(port) +#define DDI_CLK_SEL_NONE (0x0 << 28) +#define DDI_CLK_SEL_MG (0x8 << 28) +#define DDI_CLK_SEL_MASK (0xF << 28) + /* Transcoder clock selection */ #define _TRANS_CLK_SEL_A 0x46140 #define _TRANS_CLK_SEL_B 0x46144 @@ -8854,6 +8928,7 @@ enum skl_power_gate { * CNL Clocks */ #define DPCLKA_CFGCR0 _MMIO(0x6C200) +#define DPCLKA_CFGCR0_ICL _MMIO(0x164280) #define DPCLKA_CFGCR0_DDI_CLK_OFF(port) (1 << ((port) == PORT_F ? 23 : \ (port)+10)) #define DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port) ((port) == PORT_F ? 21 : \ @@ -8870,10 +8945,141 @@ enum skl_power_gate { #define PLL_POWER_STATE (1 << 26) #define CNL_DPLL_ENABLE(pll) _MMIO_PLL(pll, DPLL0_ENABLE, DPLL1_ENABLE) +#define _MG_PLL1_ENABLE 0x46030 +#define _MG_PLL2_ENABLE 0x46034 +#define _MG_PLL3_ENABLE 0x46038 +#define _MG_PLL4_ENABLE 0x4603C +/* Bits are the same as DPLL0_ENABLE */ +#define MG_PLL_ENABLE(port) _MMIO_PORT((port) - PORT_C, _MG_PLL1_ENABLE, \ + _MG_PLL2_ENABLE) + +#define _MG_REFCLKIN_CTL_PORT1 0x16892C +#define _MG_REFCLKIN_CTL_PORT2 0x16992C +#define _MG_REFCLKIN_CTL_PORT3 0x16A92C +#define _MG_REFCLKIN_CTL_PORT4 0x16B92C +#define MG_REFCLKIN_CTL_OD_2_MUX(x) ((x) << 8) +#define MG_REFCLKIN_CTL(port) _MMIO_PORT((port) - PORT_C, \ + _MG_REFCLKIN_CTL_PORT1, \ + _MG_REFCLKIN_CTL_PORT2) + +#define _MG_CLKTOP2_CORECLKCTL1_PORT1 0x1688D8 +#define _MG_CLKTOP2_CORECLKCTL1_PORT2 0x1698D8 +#define _MG_CLKTOP2_CORECLKCTL1_PORT3 0x16A8D8 +#define _MG_CLKTOP2_CORECLKCTL1_PORT4 0x16B8D8 +#define MG_CLKTOP2_CORECLKCTL1_B_DIVRATIO(x) ((x) << 16) +#define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(x) ((x) << 8) +#define MG_CLKTOP2_CORECLKCTL1(port) _MMIO_PORT((port) - PORT_C, \ + _MG_CLKTOP2_CORECLKCTL1_PORT1, \ + _MG_CLKTOP2_CORECLKCTL1_PORT2) + +#define _MG_CLKTOP2_HSCLKCTL_PORT1 0x1688D4 +#define _MG_CLKTOP2_HSCLKCTL_PORT2 0x1698D4 +#define _MG_CLKTOP2_HSCLKCTL_PORT3 0x16A8D4 +#define _MG_CLKTOP2_HSCLKCTL_PORT4 0x16B8D4 +#define MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL(x) ((x) << 16) +#define MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL(x) ((x) << 14) +#define MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO(x) ((x) << 12) +#define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(x) ((x) << 8) +#define MG_CLKTOP2_HSCLKCTL(port) _MMIO_PORT((port) - PORT_C, \ + _MG_CLKTOP2_HSCLKCTL_PORT1, \ + _MG_CLKTOP2_HSCLKCTL_PORT2) + +#define _MG_PLL_DIV0_PORT1 0x168A00 +#define _MG_PLL_DIV0_PORT2 0x169A00 +#define _MG_PLL_DIV0_PORT3 0x16AA00 +#define _MG_PLL_DIV0_PORT4 0x16BA00 +#define MG_PLL_DIV0_FRACNEN_H (1 << 30) +#define MG_PLL_DIV0_FBDIV_FRAC(x) ((x) << 8) +#define MG_PLL_DIV0_FBDIV_INT(x) ((x) << 0) +#define MG_PLL_DIV0(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV0_PORT1, \ + _MG_PLL_DIV0_PORT2) + +#define _MG_PLL_DIV1_PORT1 0x168A04 +#define _MG_PLL_DIV1_PORT2 0x169A04 +#define _MG_PLL_DIV1_PORT3 0x16AA04 +#define _MG_PLL_DIV1_PORT4 0x16BA04 +#define MG_PLL_DIV1_IREF_NDIVRATIO(x) ((x) << 16) +#define MG_PLL_DIV1_DITHER_DIV_1 (0 << 12) +#define MG_PLL_DIV1_DITHER_DIV_2 (1 << 12) +#define MG_PLL_DIV1_DITHER_DIV_4 (2 << 12) +#define MG_PLL_DIV1_DITHER_DIV_8 (3 << 12) +#define MG_PLL_DIV1_NDIVRATIO(x) ((x) << 4) +#define MG_PLL_DIV1_FBPREDIV(x) ((x) << 0) +#define MG_PLL_DIV1(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV1_PORT1, \ + _MG_PLL_DIV1_PORT2) + +#define _MG_PLL_LF_PORT1 0x168A08 +#define _MG_PLL_LF_PORT2 0x169A08 +#define _MG_PLL_LF_PORT3 0x16AA08 +#define _MG_PLL_LF_PORT4 0x16BA08 +#define MG_PLL_LF_TDCTARGETCNT(x) ((x) << 24) +#define MG_PLL_LF_AFCCNTSEL_256 (0 << 20) +#define MG_PLL_LF_AFCCNTSEL_512 (1 << 20) +#define MG_PLL_LF_GAINCTRL(x) ((x) << 16) +#define MG_PLL_LF_INT_COEFF(x) ((x) << 8) +#define MG_PLL_LF_PROP_COEFF(x) ((x) << 0) +#define MG_PLL_LF(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_LF_PORT1, \ + _MG_PLL_LF_PORT2) + +#define _MG_PLL_FRAC_LOCK_PORT1 0x168A0C +#define _MG_PLL_FRAC_LOCK_PORT2 0x169A0C +#define _MG_PLL_FRAC_LOCK_PORT3 0x16AA0C +#define _MG_PLL_FRAC_LOCK_PORT4 0x16BA0C +#define MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 (1 << 18) +#define MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 (1 << 16) +#define MG_PLL_FRAC_LOCK_LOCKTHRESH(x) ((x) << 11) +#define MG_PLL_FRAC_LOCK_DCODITHEREN (1 << 10) +#define MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN (1 << 8) +#define MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(x) ((x) << 0) +#define MG_PLL_FRAC_LOCK(port) _MMIO_PORT((port) - PORT_C, \ + _MG_PLL_FRAC_LOCK_PORT1, \ + _MG_PLL_FRAC_LOCK_PORT2) + +#define _MG_PLL_SSC_PORT1 0x168A10 +#define _MG_PLL_SSC_PORT2 0x169A10 +#define _MG_PLL_SSC_PORT3 0x16AA10 +#define _MG_PLL_SSC_PORT4 0x16BA10 +#define MG_PLL_SSC_EN (1 << 28) +#define MG_PLL_SSC_TYPE(x) ((x) << 26) +#define MG_PLL_SSC_STEPLENGTH(x) ((x) << 16) +#define MG_PLL_SSC_STEPNUM(x) ((x) << 10) +#define MG_PLL_SSC_FLLEN (1 << 9) +#define MG_PLL_SSC_STEPSIZE(x) ((x) << 0) +#define MG_PLL_SSC(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_SSC_PORT1, \ + _MG_PLL_SSC_PORT2) + +#define _MG_PLL_BIAS_PORT1 0x168A14 +#define _MG_PLL_BIAS_PORT2 0x169A14 +#define _MG_PLL_BIAS_PORT3 0x16AA14 +#define _MG_PLL_BIAS_PORT4 0x16BA14 +#define MG_PLL_BIAS_BIAS_GB_SEL(x) ((x) << 30) +#define MG_PLL_BIAS_INIT_DCOAMP(x) ((x) << 24) +#define MG_PLL_BIAS_BIAS_BONUS(x) ((x) << 16) +#define MG_PLL_BIAS_BIASCAL_EN (1 << 15) +#define MG_PLL_BIAS_CTRIM(x) ((x) << 8) +#define MG_PLL_BIAS_VREF_RDAC(x) ((x) << 5) +#define MG_PLL_BIAS_IREFTRIM(x) ((x) << 0) +#define MG_PLL_BIAS(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_BIAS_PORT1, \ + _MG_PLL_BIAS_PORT2) + +#define _MG_PLL_TDC_COLDST_BIAS_PORT1 0x168A18 +#define _MG_PLL_TDC_COLDST_BIAS_PORT2 0x169A18 +#define _MG_PLL_TDC_COLDST_BIAS_PORT3 0x16AA18 +#define _MG_PLL_TDC_COLDST_BIAS_PORT4 0x16BA18 +#define MG_PLL_TDC_COLDST_IREFINT_EN (1 << 27) +#define MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(x) ((x) << 17) +#define MG_PLL_TDC_COLDST_COLDSTART (1 << 16) +#define MG_PLL_TDC_TDCOVCCORR_EN (1 << 2) +#define MG_PLL_TDC_TDCSEL(x) ((x) << 0) +#define MG_PLL_TDC_COLDST_BIAS(port) _MMIO_PORT((port) - PORT_C, \ + _MG_PLL_TDC_COLDST_BIAS_PORT1, \ + _MG_PLL_TDC_COLDST_BIAS_PORT2) + #define _CNL_DPLL0_CFGCR0 0x6C000 #define _CNL_DPLL1_CFGCR0 0x6C080 #define DPLL_CFGCR0_HDMI_MODE (1 << 30) #define DPLL_CFGCR0_SSC_ENABLE (1 << 29) +#define DPLL_CFGCR0_SSC_ENABLE_ICL (1 << 25) #define DPLL_CFGCR0_LINK_RATE_MASK (0xf << 25) #define DPLL_CFGCR0_LINK_RATE_2700 (0 << 25) #define DPLL_CFGCR0_LINK_RATE_1350 (1 << 25) @@ -8907,8 +9113,19 @@ enum skl_power_gate { #define DPLL_CFGCR1_PDIV_5 (4 << 2) #define DPLL_CFGCR1_PDIV_7 (8 << 2) #define DPLL_CFGCR1_CENTRAL_FREQ (3 << 0) +#define DPLL_CFGCR1_CENTRAL_FREQ_8400 (3 << 0) #define CNL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _CNL_DPLL0_CFGCR1, _CNL_DPLL1_CFGCR1) +#define _ICL_DPLL0_CFGCR0 0x164000 +#define _ICL_DPLL1_CFGCR0 0x164080 +#define ICL_DPLL_CFGCR0(pll) _MMIO_PLL(pll, _ICL_DPLL0_CFGCR0, \ + _ICL_DPLL1_CFGCR0) + +#define _ICL_DPLL0_CFGCR1 0x164004 +#define _ICL_DPLL1_CFGCR1 0x164084 +#define ICL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _ICL_DPLL0_CFGCR1, \ + _ICL_DPLL1_CFGCR1) + /* BXT display engine PLL */ #define BXT_DE_PLL_CTL _MMIO(0x6d000) #define BXT_DE_PLL_RATIO(x) (x) /* {60,65,100} * 19.2MHz */ @@ -9680,6 +9897,13 @@ enum skl_power_gate { #define GEN9_MFX1_MOCS(i) _MMIO(0xca00 + (i) * 4) /* Media 1 MOCS registers */ #define GEN9_VEBOX_MOCS(i) _MMIO(0xcb00 + (i) * 4) /* Video MOCS registers */ #define GEN9_BLT_MOCS(i) _MMIO(0xcc00 + (i) * 4) /* Blitter MOCS registers */ +/* Media decoder 2 MOCS registers */ +#define GEN11_MFX2_MOCS(i) _MMIO(0x10000 + (i) * 4) + +#define GEN10_SCRATCH_LNCF2 _MMIO(0xb0a0) +#define PMFLUSHDONE_LNICRSDROP (1 << 20) +#define PMFLUSH_GAPL3UNBLOCK (1 << 21) +#define PMFLUSHDONE_LNEBLK (1 << 22) /* gamt regs */ #define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9ca9c24b4421..8928894dd9c7 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -49,7 +49,7 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return "signaled"; - return to_request(fence)->timeline->common->name; + return to_request(fence)->timeline->name; } static bool i915_fence_signaled(struct dma_fence *fence) @@ -125,22 +125,22 @@ i915_dependency_free(struct drm_i915_private *i915, } static void -__i915_priotree_add_dependency(struct i915_priotree *pt, - struct i915_priotree *signal, - struct i915_dependency *dep, - unsigned long flags) +__i915_sched_node_add_dependency(struct i915_sched_node *node, + struct i915_sched_node *signal, + struct i915_dependency *dep, + unsigned long flags) { INIT_LIST_HEAD(&dep->dfs_link); list_add(&dep->wait_link, &signal->waiters_list); - list_add(&dep->signal_link, &pt->signalers_list); + list_add(&dep->signal_link, &node->signalers_list); dep->signaler = signal; dep->flags = flags; } static int -i915_priotree_add_dependency(struct drm_i915_private *i915, - struct i915_priotree *pt, - struct i915_priotree *signal) +i915_sched_node_add_dependency(struct drm_i915_private *i915, + struct i915_sched_node *node, + struct i915_sched_node *signal) { struct i915_dependency *dep; @@ -148,16 +148,18 @@ i915_priotree_add_dependency(struct drm_i915_private *i915, if (!dep) return -ENOMEM; - __i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC); + __i915_sched_node_add_dependency(node, signal, dep, + I915_DEPENDENCY_ALLOC); return 0; } static void -i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) +i915_sched_node_fini(struct drm_i915_private *i915, + struct i915_sched_node *node) { - struct i915_dependency *dep, *next; + struct i915_dependency *dep, *tmp; - GEM_BUG_ON(!list_empty(&pt->link)); + GEM_BUG_ON(!list_empty(&node->link)); /* * Everyone we depended upon (the fences we wait to be signaled) @@ -165,8 +167,8 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) * However, retirement is run independently on each timeline and * so we may be called out-of-order. */ - list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) { - GEM_BUG_ON(!i915_priotree_signaled(dep->signaler)); + list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { + GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler)); GEM_BUG_ON(!list_empty(&dep->dfs_link)); list_del(&dep->wait_link); @@ -175,8 +177,8 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) } /* Remove ourselves from everyone who depends upon us */ - list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) { - GEM_BUG_ON(dep->signaler != pt); + list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { + GEM_BUG_ON(dep->signaler != node); GEM_BUG_ON(!list_empty(&dep->dfs_link)); list_del(&dep->signal_link); @@ -186,17 +188,18 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) } static void -i915_priotree_init(struct i915_priotree *pt) +i915_sched_node_init(struct i915_sched_node *node) { - INIT_LIST_HEAD(&pt->signalers_list); - INIT_LIST_HEAD(&pt->waiters_list); - INIT_LIST_HEAD(&pt->link); - pt->priority = I915_PRIORITY_INVALID; + INIT_LIST_HEAD(&node->signalers_list); + INIT_LIST_HEAD(&node->waiters_list); + INIT_LIST_HEAD(&node->link); + node->attr.priority = I915_PRIORITY_INVALID; } static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) { struct intel_engine_cs *engine; + struct i915_timeline *timeline; enum intel_engine_id id; int ret; @@ -211,34 +214,33 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ for_each_engine(engine, i915, id) { - struct i915_gem_timeline *timeline; - struct intel_timeline *tl = engine->timeline; - GEM_TRACE("%s seqno %d (current %d) -> %d\n", engine->name, - tl->seqno, + engine->timeline.seqno, intel_engine_get_seqno(engine), seqno); - if (!i915_seqno_passed(seqno, tl->seqno)) { + if (!i915_seqno_passed(seqno, engine->timeline.seqno)) { /* Flush any waiters before we reuse the seqno */ intel_engine_disarm_breadcrumbs(engine); + intel_engine_init_hangcheck(engine); GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals)); } /* Check we are idle before we fiddle with hw state! */ GEM_BUG_ON(!intel_engine_is_idle(engine)); - GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); + GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request)); /* Finally reset hw state */ intel_engine_init_global_seqno(engine, seqno); - tl->seqno = seqno; - - list_for_each_entry(timeline, &i915->gt.timelines, link) - memset(timeline->engine[id].global_sync, 0, - sizeof(timeline->engine[id].global_sync)); + engine->timeline.seqno = seqno; } + list_for_each_entry(timeline, &i915->gt.timelines, link) + memset(timeline->global_sync, 0, sizeof(timeline->global_sync)); + + i915->gt.request_serial = seqno; + return 0; } @@ -255,18 +257,22 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) return reset_all_global_seqno(i915, seqno - 1); } -static int reserve_engine(struct intel_engine_cs *engine) +static int reserve_gt(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = engine->i915; - u32 active = ++engine->timeline->inflight_seqnos; - u32 seqno = engine->timeline->seqno; int ret; - /* Reservation is fine until we need to wrap around */ - if (unlikely(add_overflows(seqno, active))) { + /* + * Reservation is fine until we may need to wrap around + * + * By incrementing the serial for every request, we know that no + * individual engine may exceed that serial (as each is reset to 0 + * on any wrap). This protects even the most pessimistic of migrations + * of every request from all engines onto just one. + */ + while (unlikely(++i915->gt.request_serial == 0)) { ret = reset_all_global_seqno(i915, 0); if (ret) { - engine->timeline->inflight_seqnos--; + i915->gt.request_serial--; return ret; } } @@ -277,15 +283,11 @@ static int reserve_engine(struct intel_engine_cs *engine) return 0; } -static void unreserve_engine(struct intel_engine_cs *engine) +static void unreserve_gt(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = engine->i915; - + GEM_BUG_ON(!i915->gt.active_requests); if (!--i915->gt.active_requests) i915_gem_park(i915); - - GEM_BUG_ON(!engine->timeline->inflight_seqnos); - engine->timeline->inflight_seqnos--; } void i915_gem_retire_noop(struct i915_gem_active *active, @@ -296,6 +298,7 @@ void i915_gem_retire_noop(struct i915_gem_active *active, static void advance_ring(struct i915_request *request) { + struct intel_ring *ring = request->ring; unsigned int tail; /* @@ -307,7 +310,8 @@ static void advance_ring(struct i915_request *request) * Note this requires that we are always called in request * completion order. */ - if (list_is_last(&request->ring_link, &request->ring->request_list)) { + GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list)); + if (list_is_last(&request->ring_link, &ring->request_list)) { /* * We may race here with execlists resubmitting this request * as we retire it. The resubmission will move the ring->tail @@ -317,12 +321,13 @@ static void advance_ring(struct i915_request *request) * noops - they are safe to be replayed on a reset. */ tail = READ_ONCE(request->tail); + list_del(&ring->active_link); } else { tail = request->postfix; } - list_del(&request->ring_link); + list_del_init(&request->ring_link); - request->ring->head = tail; + ring->head = tail; } static void free_capture_list(struct i915_request *request) @@ -338,31 +343,84 @@ static void free_capture_list(struct i915_request *request) } } +static void __retire_engine_request(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + GEM_TRACE("%s(%s) fence %llx:%d, global=%d, current %d\n", + __func__, engine->name, + rq->fence.context, rq->fence.seqno, + rq->global_seqno, + intel_engine_get_seqno(engine)); + + GEM_BUG_ON(!i915_request_completed(rq)); + + local_irq_disable(); + + spin_lock(&engine->timeline.lock); + GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests)); + list_del_init(&rq->link); + spin_unlock(&engine->timeline.lock); + + spin_lock(&rq->lock); + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + dma_fence_signal_locked(&rq->fence); + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) + intel_engine_cancel_signaling(rq); + if (rq->waitboost) { + GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); + atomic_dec(&rq->i915->gt_pm.rps.num_waiters); + } + spin_unlock(&rq->lock); + + local_irq_enable(); + + /* + * The backing object for the context is done after switching to the + * *next* context. Therefore we cannot retire the previous context until + * the next context has already started running. However, since we + * cannot take the required locks at i915_request_submit() we + * defer the unpinning of the active context to now, retirement of + * the subsequent request. + */ + if (engine->last_retired_context) + intel_context_unpin(engine->last_retired_context, engine); + engine->last_retired_context = rq->ctx; +} + +static void __retire_engine_upto(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + struct i915_request *tmp; + + if (list_empty(&rq->link)) + return; + + do { + tmp = list_first_entry(&engine->timeline.requests, + typeof(*tmp), link); + + GEM_BUG_ON(tmp->engine != engine); + __retire_engine_request(engine, tmp); + } while (tmp != rq); +} + static void i915_request_retire(struct i915_request *request) { - struct intel_engine_cs *engine = request->engine; struct i915_gem_active *active, *next; GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n", - engine->name, + request->engine->name, request->fence.context, request->fence.seqno, request->global_seqno, - intel_engine_get_seqno(engine)); + intel_engine_get_seqno(request->engine)); lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); GEM_BUG_ON(!i915_request_completed(request)); - GEM_BUG_ON(!request->i915->gt.active_requests); trace_i915_request_retire(request); - spin_lock_irq(&engine->timeline->lock); - list_del_init(&request->link); - spin_unlock_irq(&engine->timeline->lock); - - unreserve_engine(request->engine); advance_ring(request); - free_capture_list(request); /* @@ -398,65 +456,53 @@ static void i915_request_retire(struct i915_request *request) /* Retirement decays the ban score as it is a sign of ctx progress */ atomic_dec_if_positive(&request->ctx->ban_score); + intel_context_unpin(request->ctx, request->engine); - /* - * The backing object for the context is done after switching to the - * *next* context. Therefore we cannot retire the previous context until - * the next context has already started running. However, since we - * cannot take the required locks at i915_request_submit() we - * defer the unpinning of the active context to now, retirement of - * the subsequent request. - */ - if (engine->last_retired_context) - engine->context_unpin(engine, engine->last_retired_context); - engine->last_retired_context = request->ctx; + __retire_engine_upto(request->engine, request); - spin_lock_irq(&request->lock); - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags)) - dma_fence_signal_locked(&request->fence); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_cancel_signaling(request); - if (request->waitboost) { - GEM_BUG_ON(!atomic_read(&request->i915->gt_pm.rps.num_waiters)); - atomic_dec(&request->i915->gt_pm.rps.num_waiters); - } - spin_unlock_irq(&request->lock); + unreserve_gt(request->i915); - i915_priotree_fini(request->i915, &request->priotree); + i915_sched_node_fini(request->i915, &request->sched); i915_request_put(request); } void i915_request_retire_upto(struct i915_request *rq) { - struct intel_engine_cs *engine = rq->engine; + struct intel_ring *ring = rq->ring; struct i915_request *tmp; + GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n", + rq->engine->name, + rq->fence.context, rq->fence.seqno, + rq->global_seqno, + intel_engine_get_seqno(rq->engine)); + lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(!i915_request_completed(rq)); - if (list_empty(&rq->link)) + if (list_empty(&rq->ring_link)) return; do { - tmp = list_first_entry(&engine->timeline->requests, - typeof(*tmp), link); + tmp = list_first_entry(&ring->request_list, + typeof(*tmp), ring_link); i915_request_retire(tmp); } while (tmp != rq); } -static u32 timeline_get_seqno(struct intel_timeline *tl) +static u32 timeline_get_seqno(struct i915_timeline *tl) { return ++tl->seqno; } static void move_to_timeline(struct i915_request *request, - struct intel_timeline *timeline) + struct i915_timeline *timeline) { - GEM_BUG_ON(request->timeline == request->engine->timeline); - lockdep_assert_held(&request->engine->timeline->lock); + GEM_BUG_ON(request->timeline == &request->engine->timeline); + lockdep_assert_held(&request->engine->timeline.lock); - spin_lock(&request->timeline->lock); + spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING); list_move_tail(&request->link, &timeline->requests); spin_unlock(&request->timeline->lock); } @@ -469,15 +515,15 @@ void __i915_request_submit(struct i915_request *request) GEM_TRACE("%s fence %llx:%d -> global=%d, current %d\n", engine->name, request->fence.context, request->fence.seqno, - engine->timeline->seqno + 1, + engine->timeline.seqno + 1, intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); GEM_BUG_ON(request->global_seqno); - seqno = timeline_get_seqno(engine->timeline); + seqno = timeline_get_seqno(&engine->timeline); GEM_BUG_ON(!seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); @@ -492,7 +538,7 @@ void __i915_request_submit(struct i915_request *request) request->ring->vaddr + request->postfix); /* Transfer from per-context onto the global per-engine timeline */ - move_to_timeline(request, engine->timeline); + move_to_timeline(request, &engine->timeline); trace_i915_request_execute(request); @@ -505,11 +551,11 @@ void i915_request_submit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); __i915_request_submit(request); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } void __i915_request_unsubmit(struct i915_request *request) @@ -523,17 +569,17 @@ void __i915_request_unsubmit(struct i915_request *request) intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); /* * Only unwind in reverse order, required so that the per-context list * is kept in seqno/ring order. */ GEM_BUG_ON(!request->global_seqno); - GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); + GEM_BUG_ON(request->global_seqno != engine->timeline.seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), request->global_seqno)); - engine->timeline->seqno--; + engine->timeline.seqno--; /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); @@ -560,11 +606,11 @@ void i915_request_unsubmit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); __i915_request_unsubmit(request); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static int __i915_sw_fence_call @@ -635,12 +681,12 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - ring = engine->context_pin(engine, ctx); + ring = intel_context_pin(ctx, engine); if (IS_ERR(ring)) return ERR_CAST(ring); GEM_BUG_ON(!ring); - ret = reserve_engine(engine); + ret = reserve_gt(i915); if (ret) goto err_unpin; @@ -648,10 +694,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) if (ret) goto err_unreserve; - /* Move the oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry_or_null(&engine->timeline->requests, - typeof(*rq), link); - if (rq && i915_request_completed(rq)) + /* Move our oldest request to the slab-cache (if not in use!) */ + rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link); + if (!list_is_last(&rq->ring_link, &ring->request_list) && + i915_request_completed(rq)) i915_request_retire(rq); /* @@ -711,8 +757,13 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) } } - rq->timeline = i915_gem_context_lookup_timeline(ctx, engine); - GEM_BUG_ON(rq->timeline == engine->timeline); + INIT_LIST_HEAD(&rq->active_list); + rq->i915 = i915; + rq->engine = engine; + rq->ctx = ctx; + rq->ring = ring; + rq->timeline = ring->timeline; + GEM_BUG_ON(rq->timeline == &engine->timeline); spin_lock_init(&rq->lock); dma_fence_init(&rq->fence, @@ -725,13 +776,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); init_waitqueue_head(&rq->execute); - i915_priotree_init(&rq->priotree); - - INIT_LIST_HEAD(&rq->active_list); - rq->i915 = i915; - rq->engine = engine; - rq->ctx = ctx; - rq->ring = ring; + i915_sched_node_init(&rq->sched); /* No zalloc, must clear what we need by hand */ rq->global_seqno = 0; @@ -768,6 +813,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) if (ret) goto err_unwind; + /* Keep a second pin for the dual retirement along engine and ring */ + __intel_context_pin(rq->ctx, engine); + /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); return rq; @@ -777,14 +825,14 @@ err_unwind: /* Make sure we didn't add ourselves to external state before freeing */ GEM_BUG_ON(!list_empty(&rq->active_list)); - GEM_BUG_ON(!list_empty(&rq->priotree.signalers_list)); - GEM_BUG_ON(!list_empty(&rq->priotree.waiters_list)); + GEM_BUG_ON(!list_empty(&rq->sched.signalers_list)); + GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); kmem_cache_free(i915->requests, rq); err_unreserve: - unreserve_engine(engine); + unreserve_gt(i915); err_unpin: - engine->context_unpin(engine, ctx); + intel_context_unpin(ctx, engine); return ERR_PTR(ret); } @@ -800,9 +848,9 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return 0; if (to->engine->schedule) { - ret = i915_priotree_add_dependency(to->i915, - &to->priotree, - &from->priotree); + ret = i915_sched_node_add_dependency(to->i915, + &to->sched, + &from->sched); if (ret < 0) return ret; } @@ -880,7 +928,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Squash repeated waits to the same timelines */ if (fence->context != rq->i915->mm.unordered_timeline && - intel_timeline_sync_is_later(rq->timeline, fence)) + i915_timeline_sync_is_later(rq->timeline, fence)) continue; if (dma_fence_is_i915(fence)) @@ -894,7 +942,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Record the latest fence used against each timeline */ if (fence->context != rq->i915->mm.unordered_timeline) - intel_timeline_sync_set(rq->timeline, fence); + i915_timeline_sync_set(rq->timeline, fence); } while (--nchild); return 0; @@ -971,7 +1019,7 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) { struct intel_engine_cs *engine = request->engine; struct intel_ring *ring = request->ring; - struct intel_timeline *timeline = request->timeline; + struct i915_timeline *timeline = request->timeline; struct i915_request *prev; u32 *cs; int err; @@ -1033,10 +1081,10 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, &request->submitq); if (engine->schedule) - __i915_priotree_add_dependency(&request->priotree, - &prev->priotree, - &request->dep, - 0); + __i915_sched_node_add_dependency(&request->sched, + &prev->sched, + &request->dep, + 0); } spin_lock_irq(&timeline->lock); @@ -1047,6 +1095,8 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) i915_gem_active_set(&timeline->last_request, request); list_add_tail(&request->ring_link, &ring->request_list); + if (list_is_first(&request->ring_link, &ring->request_list)) + list_add(&ring->active_link, &request->i915->gt.active_rings); request->emitted_jiffies = jiffies; /* @@ -1060,12 +1110,11 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ - rcu_read_lock(); + local_bh_disable(); + rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) - engine->schedule(request, request->ctx->priority); + engine->schedule(request, &request->ctx->sched); rcu_read_unlock(); - - local_bh_disable(); i915_sw_fence_commit(&request->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ @@ -1354,38 +1403,30 @@ complete: return timeout; } -static void engine_retire_requests(struct intel_engine_cs *engine) +static void ring_retire_requests(struct intel_ring *ring) { struct i915_request *request, *next; - u32 seqno = intel_engine_get_seqno(engine); - LIST_HEAD(retire); - spin_lock_irq(&engine->timeline->lock); list_for_each_entry_safe(request, next, - &engine->timeline->requests, link) { - if (!i915_seqno_passed(seqno, request->global_seqno)) + &ring->request_list, ring_link) { + if (!i915_request_completed(request)) break; - list_move_tail(&request->link, &retire); - } - spin_unlock_irq(&engine->timeline->lock); - - list_for_each_entry_safe(request, next, &retire, link) i915_request_retire(request); + } } void i915_retire_requests(struct drm_i915_private *i915) { - struct intel_engine_cs *engine; - enum intel_engine_id id; + struct intel_ring *ring, *tmp; lockdep_assert_held(&i915->drm.struct_mutex); if (!i915->gt.active_requests) return; - for_each_engine(engine, i915, id) - engine_retire_requests(engine); + list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link) + ring_retire_requests(ring); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 7d6eb82eeb91..eddbd4245cb3 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -28,13 +28,16 @@ #include <linux/dma-fence.h> #include "i915_gem.h" +#include "i915_scheduler.h" #include "i915_sw_fence.h" +#include "i915_scheduler.h" #include <uapi/drm/i915_drm.h> struct drm_file; struct drm_i915_gem_object; struct i915_request; +struct i915_timeline; struct intel_wait { struct rb_node node; @@ -48,44 +51,6 @@ struct intel_signal_node { struct list_head link; }; -struct i915_dependency { - struct i915_priotree *signaler; - struct list_head signal_link; - struct list_head wait_link; - struct list_head dfs_link; - unsigned long flags; -#define I915_DEPENDENCY_ALLOC BIT(0) -}; - -/* - * "People assume that time is a strict progression of cause to effect, but - * actually, from a nonlinear, non-subjective viewpoint, it's more like a big - * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015 - * - * Requests exist in a complex web of interdependencies. Each request - * has to wait for some other request to complete before it is ready to be run - * (e.g. we have to wait until the pixels have been rendering into a texture - * before we can copy from it). We track the readiness of a request in terms - * of fences, but we also need to keep the dependency tree for the lifetime - * of the request (beyond the life of an individual fence). We use the tree - * at various points to reorder the requests whilst keeping the requests - * in order with respect to their various dependencies. - */ -struct i915_priotree { - struct list_head signalers_list; /* those before us, we depend upon */ - struct list_head waiters_list; /* those after us, they depend upon us */ - struct list_head link; - int priority; -}; - -enum { - I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, - I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, - I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, - - I915_PRIORITY_INVALID = INT_MIN -}; - struct i915_capture_list { struct i915_capture_list *next; struct i915_vma *vma; @@ -131,7 +96,7 @@ struct i915_request { struct i915_gem_context *ctx; struct intel_engine_cs *engine; struct intel_ring *ring; - struct intel_timeline *timeline; + struct i915_timeline *timeline; struct intel_signal_node signaling; /* @@ -154,7 +119,7 @@ struct i915_request { * to retirement), i.e. bidirectional dependency information for the * request not tied to individual fences. */ - struct i915_priotree priotree; + struct i915_sched_node sched; struct i915_dependency dep; /** @@ -343,10 +308,10 @@ static inline bool i915_request_started(const struct i915_request *rq) seqno - 1); } -static inline bool i915_priotree_signaled(const struct i915_priotree *pt) +static inline bool i915_sched_node_signaled(const struct i915_sched_node *node) { const struct i915_request *rq = - container_of(pt, const struct i915_request, priotree); + container_of(node, const struct i915_request, sched); return i915_request_completed(rq); } diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h new file mode 100644 index 000000000000..70a42220358d --- /dev/null +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#ifndef _I915_SCHEDULER_H_ +#define _I915_SCHEDULER_H_ + +#include <linux/bitops.h> + +#include <uapi/drm/i915_drm.h> + +enum { + I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, + I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, + I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, + + I915_PRIORITY_INVALID = INT_MIN +}; + +struct i915_sched_attr { + /** + * @priority: execution and service priority + * + * All clients are equal, but some are more equal than others! + * + * Requests from a context with a greater (more positive) value of + * @priority will be executed before those with a lower @priority + * value, forming a simple QoS. + * + * The &drm_i915_private.kernel_context is assigned the lowest priority. + */ + int priority; +}; + +/* + * "People assume that time is a strict progression of cause to effect, but + * actually, from a nonlinear, non-subjective viewpoint, it's more like a big + * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015 + * + * Requests exist in a complex web of interdependencies. Each request + * has to wait for some other request to complete before it is ready to be run + * (e.g. we have to wait until the pixels have been rendering into a texture + * before we can copy from it). We track the readiness of a request in terms + * of fences, but we also need to keep the dependency tree for the lifetime + * of the request (beyond the life of an individual fence). We use the tree + * at various points to reorder the requests whilst keeping the requests + * in order with respect to their various dependencies. + * + * There is no active component to the "scheduler". As we know the dependency + * DAG of each request, we are able to insert it into a sorted queue when it + * is ready, and are able to reorder its portion of the graph to accommodate + * dynamic priority changes. + */ +struct i915_sched_node { + struct list_head signalers_list; /* those before us, we depend upon */ + struct list_head waiters_list; /* those after us, they depend upon us */ + struct list_head link; + struct i915_sched_attr attr; +}; + +struct i915_dependency { + struct i915_sched_node *signaler; + struct list_head signal_link; + struct list_head wait_link; + struct list_head dfs_link; + unsigned long flags; +#define I915_DEPENDENCY_ALLOC BIT(0) +}; + +#endif /* _I915_SCHEDULER_H_ */ diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c new file mode 100644 index 000000000000..4667cc08c416 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -0,0 +1,105 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016-2018 Intel Corporation + */ + +#include "i915_drv.h" + +#include "i915_timeline.h" +#include "i915_syncmap.h" + +void i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *timeline, + const char *name) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + + /* + * Ideally we want a set of engines on a single leaf as we expect + * to mostly be tracking synchronisation between engines. It is not + * a huge issue if this is not the case, but we may want to mitigate + * any page crossing penalties if they become an issue. + */ + BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); + + timeline->name = name; + + list_add(&timeline->link, &i915->gt.timelines); + + /* Called during early_init before we know how many engines there are */ + + timeline->fence_context = dma_fence_context_alloc(1); + + spin_lock_init(&timeline->lock); + + init_request_active(&timeline->last_request, NULL); + INIT_LIST_HEAD(&timeline->requests); + + i915_syncmap_init(&timeline->sync); +} + +/** + * i915_timelines_park - called when the driver idles + * @i915: the drm_i915_private device + * + * When the driver is completely idle, we know that all of our sync points + * have been signaled and our tracking is then entirely redundant. Any request + * to wait upon an older sync point will be completed instantly as we know + * the fence is signaled and therefore we will not even look them up in the + * sync point map. + */ +void i915_timelines_park(struct drm_i915_private *i915) +{ + struct i915_timeline *timeline; + + lockdep_assert_held(&i915->drm.struct_mutex); + + list_for_each_entry(timeline, &i915->gt.timelines, link) { + /* + * All known fences are completed so we can scrap + * the current sync point tracking and start afresh, + * any attempt to wait upon a previous sync point + * will be skipped as the fence was signaled. + */ + i915_syncmap_free(&timeline->sync); + } +} + +void i915_timeline_fini(struct i915_timeline *timeline) +{ + GEM_BUG_ON(!list_empty(&timeline->requests)); + + i915_syncmap_free(&timeline->sync); + + list_del(&timeline->link); +} + +struct i915_timeline * +i915_timeline_create(struct drm_i915_private *i915, const char *name) +{ + struct i915_timeline *timeline; + + timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); + if (!timeline) + return ERR_PTR(-ENOMEM); + + i915_timeline_init(i915, timeline, name); + kref_init(&timeline->kref); + + return timeline; +} + +void __i915_timeline_free(struct kref *kref) +{ + struct i915_timeline *timeline = + container_of(kref, typeof(*timeline), kref); + + i915_timeline_fini(timeline); + kfree(timeline); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_timeline.c" +#include "selftests/i915_timeline.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 33e01bf6aa36..dc2a4632faa7 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -22,27 +22,20 @@ * */ -#ifndef I915_GEM_TIMELINE_H -#define I915_GEM_TIMELINE_H +#ifndef I915_TIMELINE_H +#define I915_TIMELINE_H #include <linux/list.h> +#include <linux/kref.h> #include "i915_request.h" #include "i915_syncmap.h" #include "i915_utils.h" -struct i915_gem_timeline; - -struct intel_timeline { +struct i915_timeline { u64 fence_context; u32 seqno; - /** - * Count of outstanding requests, from the time they are constructed - * to the moment they are retired. Loosely coupled to hardware. - */ - u32 inflight_seqnos; - spinlock_t lock; /** @@ -77,47 +70,57 @@ struct intel_timeline { */ u32 global_sync[I915_NUM_ENGINES]; - struct i915_gem_timeline *common; -}; - -struct i915_gem_timeline { struct list_head link; - - struct drm_i915_private *i915; const char *name; - struct intel_timeline engine[I915_NUM_ENGINES]; + struct kref kref; }; -int i915_gem_timeline_init(struct drm_i915_private *i915, - struct i915_gem_timeline *tl, - const char *name); -int i915_gem_timeline_init__global(struct drm_i915_private *i915); -void i915_gem_timelines_park(struct drm_i915_private *i915); -void i915_gem_timeline_fini(struct i915_gem_timeline *tl); +void i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *tl, + const char *name); +void i915_timeline_fini(struct i915_timeline *tl); + +struct i915_timeline * +i915_timeline_create(struct drm_i915_private *i915, const char *name); -static inline int __intel_timeline_sync_set(struct intel_timeline *tl, - u64 context, u32 seqno) +static inline struct i915_timeline * +i915_timeline_get(struct i915_timeline *timeline) +{ + kref_get(&timeline->kref); + return timeline; +} + +void __i915_timeline_free(struct kref *kref); +static inline void i915_timeline_put(struct i915_timeline *timeline) +{ + kref_put(&timeline->kref, __i915_timeline_free); +} + +static inline int __i915_timeline_sync_set(struct i915_timeline *tl, + u64 context, u32 seqno) { return i915_syncmap_set(&tl->sync, context, seqno); } -static inline int intel_timeline_sync_set(struct intel_timeline *tl, - const struct dma_fence *fence) +static inline int i915_timeline_sync_set(struct i915_timeline *tl, + const struct dma_fence *fence) { - return __intel_timeline_sync_set(tl, fence->context, fence->seqno); + return __i915_timeline_sync_set(tl, fence->context, fence->seqno); } -static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl, - u64 context, u32 seqno) +static inline bool __i915_timeline_sync_is_later(struct i915_timeline *tl, + u64 context, u32 seqno) { return i915_syncmap_is_later(&tl->sync, context, seqno); } -static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, - const struct dma_fence *fence) +static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, + const struct dma_fence *fence) { - return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno); + return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno); } +void i915_timelines_park(struct drm_i915_private *i915); + #endif diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 408827bf5d96..8cc3a256f29d 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -679,45 +679,68 @@ DEFINE_EVENT(i915_request, i915_request_execute, TP_ARGS(rq) ); -DECLARE_EVENT_CLASS(i915_request_hw, - TP_PROTO(struct i915_request *rq, unsigned int port), - TP_ARGS(rq, port), - - TP_STRUCT__entry( - __field(u32, dev) - __field(u32, hw_id) - __field(u32, ring) - __field(u32, ctx) - __field(u32, seqno) - __field(u32, global_seqno) - __field(u32, port) - ), - - TP_fast_assign( - __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->ctx->hw_id; - __entry->ring = rq->engine->id; - __entry->ctx = rq->fence.context; - __entry->seqno = rq->fence.seqno; - __entry->global_seqno = rq->global_seqno; - __entry->port = port; - ), - - TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", - __entry->dev, __entry->hw_id, __entry->ring, - __entry->ctx, __entry->seqno, - __entry->global_seqno, __entry->port) -); +TRACE_EVENT(i915_request_in, + TP_PROTO(struct i915_request *rq, unsigned int port), + TP_ARGS(rq, port), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, hw_id) + __field(u32, ring) + __field(u32, ctx) + __field(u32, seqno) + __field(u32, global_seqno) + __field(u32, port) + __field(u32, prio) + ), + + TP_fast_assign( + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global_seqno = rq->global_seqno; + __entry->prio = rq->sched.attr.priority; + __entry->port = port; + ), -DEFINE_EVENT(i915_request_hw, i915_request_in, - TP_PROTO(struct i915_request *rq, unsigned int port), - TP_ARGS(rq, port) + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, prio=%u, global=%u, port=%u", + __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx, + __entry->seqno, __entry->prio, __entry->global_seqno, + __entry->port) ); -DEFINE_EVENT(i915_request, i915_request_out, - TP_PROTO(struct i915_request *rq), - TP_ARGS(rq) +TRACE_EVENT(i915_request_out, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, hw_id) + __field(u32, ring) + __field(u32, ctx) + __field(u32, seqno) + __field(u32, global_seqno) + __field(u32, completed) + ), + + TP_fast_assign( + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global_seqno = rq->global_seqno; + __entry->completed = i915_request_completed(rq); + ), + + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, completed?=%u", + __entry->dev, __entry->hw_id, __entry->ring, + __entry->ctx, __entry->seqno, + __entry->global_seqno, __entry->completed) ); + #else #if !defined(TRACE_HEADER_MULTI_READ) static inline void @@ -811,42 +834,6 @@ DEFINE_EVENT(i915_request, i915_request_wait_end, TP_ARGS(rq) ); -TRACE_EVENT(i915_flip_request, - TP_PROTO(int plane, struct drm_i915_gem_object *obj), - - TP_ARGS(plane, obj), - - TP_STRUCT__entry( - __field(int, plane) - __field(struct drm_i915_gem_object *, obj) - ), - - TP_fast_assign( - __entry->plane = plane; - __entry->obj = obj; - ), - - TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj) -); - -TRACE_EVENT(i915_flip_complete, - TP_PROTO(int plane, struct drm_i915_gem_object *obj), - - TP_ARGS(plane, obj), - - TP_STRUCT__entry( - __field(int, plane) - __field(struct drm_i915_gem_object *, obj) - ), - - TP_fast_assign( - __entry->plane = plane; - __entry->obj = obj; - ), - - TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj) -); - TRACE_EVENT_CONDITION(i915_reg_rw, TP_PROTO(bool write, i915_reg_t reg, u64 val, int len, bool trace), diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 0695717522ea..00165ad55fb3 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -120,6 +120,12 @@ static inline u64 ptr_to_u64(const void *ptr) #include <linux/list.h> +static inline int list_is_first(const struct list_head *list, + const struct list_head *head) +{ + return head->next == list; +} + static inline void __list_del_many(struct list_head *head, struct list_head *first) { diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 4bda3bd29bf5..9324d476e0a7 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -46,8 +46,6 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) - WARN_ON(i915_vma_unbind(vma)); GEM_BUG_ON(!i915_gem_object_is_active(obj)); if (--obj->active_count) @@ -232,7 +230,6 @@ i915_vma_instance(struct drm_i915_gem_object *obj, if (!vma) vma = vma_create(obj, vm, view); - GEM_BUG_ON(!IS_ERR(vma) && i915_vma_is_closed(vma)); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma); return vma; @@ -684,13 +681,43 @@ err_unpin: return ret; } -static void i915_vma_destroy(struct i915_vma *vma) +void i915_vma_close(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + + GEM_BUG_ON(i915_vma_is_closed(vma)); + vma->flags |= I915_VMA_CLOSED; + + /* + * We defer actually closing, unbinding and destroying the VMA until + * the next idle point, or if the object is freed in the meantime. By + * postponing the unbind, we allow for it to be resurrected by the + * client, avoiding the work required to rebind the VMA. This is + * advantageous for DRI, where the client/server pass objects + * between themselves, temporarily opening a local VMA to the + * object, and then closing it again. The same object is then reused + * on the next frame (or two, depending on the depth of the swap queue) + * causing us to rebind the VMA once more. This ends up being a lot + * of wasted work for the steady state. + */ + list_add_tail(&vma->closed_link, &vma->vm->i915->gt.closed_vma); +} + +void i915_vma_reopen(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + + if (vma->flags & I915_VMA_CLOSED) { + vma->flags &= ~I915_VMA_CLOSED; + list_del(&vma->closed_link); + } +} + +static void __i915_vma_destroy(struct i915_vma *vma) { int i; GEM_BUG_ON(vma->node.allocated); - GEM_BUG_ON(i915_vma_is_active(vma)); - GEM_BUG_ON(!i915_vma_is_closed(vma)); GEM_BUG_ON(vma->fence); for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) @@ -699,6 +726,7 @@ static void i915_vma_destroy(struct i915_vma *vma) list_del(&vma->obj_link); list_del(&vma->vm_link); + rb_erase(&vma->obj_node, &vma->obj->vma_tree); if (!i915_vma_is_ggtt(vma)) i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); @@ -706,15 +734,30 @@ static void i915_vma_destroy(struct i915_vma *vma) kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); } -void i915_vma_close(struct i915_vma *vma) +void i915_vma_destroy(struct i915_vma *vma) { - GEM_BUG_ON(i915_vma_is_closed(vma)); - vma->flags |= I915_VMA_CLOSED; + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - rb_erase(&vma->obj_node, &vma->obj->vma_tree); + GEM_BUG_ON(i915_vma_is_active(vma)); + GEM_BUG_ON(i915_vma_is_pinned(vma)); + + if (i915_vma_is_closed(vma)) + list_del(&vma->closed_link); + + WARN_ON(i915_vma_unbind(vma)); + __i915_vma_destroy(vma); +} + +void i915_vma_parked(struct drm_i915_private *i915) +{ + struct i915_vma *vma, *next; - if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) - WARN_ON(i915_vma_unbind(vma)); + list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) { + GEM_BUG_ON(!i915_vma_is_closed(vma)); + i915_vma_destroy(vma); + } + + GEM_BUG_ON(!list_empty(&i915->gt.closed_vma)); } static void __i915_vma_iounmap(struct i915_vma *vma) @@ -804,7 +847,7 @@ int i915_vma_unbind(struct i915_vma *vma) return -EBUSY; if (!drm_mm_node_allocated(&vma->node)) - goto destroy; + return 0; GEM_BUG_ON(obj->bind_count == 0); GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); @@ -841,10 +884,6 @@ int i915_vma_unbind(struct i915_vma *vma) i915_vma_remove(vma); -destroy: - if (unlikely(i915_vma_is_closed(vma))) - i915_vma_destroy(vma); - return 0; } diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 8c5022095418..fc4294cfaa91 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -119,6 +119,8 @@ struct i915_vma { /** This vma's place in the eviction list */ struct list_head evict_link; + struct list_head closed_link; + /** * Used for performing relocations during execbuffer insertion. */ @@ -285,6 +287,8 @@ void i915_vma_revoke_mmap(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); +void i915_vma_reopen(struct i915_vma *vma); +void i915_vma_destroy(struct i915_vma *vma); int __i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); @@ -408,6 +412,8 @@ i915_vma_unpin_fence(struct i915_vma *vma) __i915_vma_unpin_fence(vma); } +void i915_vma_parked(struct drm_i915_private *i915); + #define for_each_until(cond) if (cond) break; else /** diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 7481ce85746b..6d068786eb41 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -183,11 +183,16 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ } /* FIXME pre-g4x don't work like this */ - if (intel_state->base.visible) + if (state->visible) crtc_state->active_planes |= BIT(intel_plane->id); else crtc_state->active_planes &= ~BIT(intel_plane->id); + if (state->visible && state->fb->format->format == DRM_FORMAT_NV12) + crtc_state->nv12_planes |= BIT(intel_plane->id); + else + crtc_state->nv12_planes &= ~BIT(intel_plane->id); + return intel_plane_atomic_calc_changes(old_crtc_state, &crtc_state->base, old_plane_state, diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index 702d3fab97fc..54270bdde100 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -530,6 +530,7 @@ parse_driver_features(struct drm_i915_private *dev_priv, */ if (!driver->drrs_enabled) dev_priv->vbt.drrs_type = DRRS_NOT_SUPPORTED; + dev_priv->vbt.psr.enable = driver->psr_enabled; } static void diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 671a6d61e29d..18e643df523e 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -82,7 +82,7 @@ static unsigned long wait_timeout(void) static noinline void missed_breadcrumb(struct intel_engine_cs *engine) { - if (drm_debug & DRM_UT_DRIVER) { + if (GEM_SHOW_DEBUG()) { struct drm_printer p = drm_debug_printer(__func__); intel_engine_dump(engine, &p, @@ -130,11 +130,12 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t) static void intel_breadcrumbs_fake_irq(struct timer_list *t) { - struct intel_engine_cs *engine = from_timer(engine, t, - breadcrumbs.fake_irq); + struct intel_engine_cs *engine = + from_timer(engine, t, breadcrumbs.fake_irq); struct intel_breadcrumbs *b = &engine->breadcrumbs; - /* The timer persists in case we cannot enable interrupts, + /* + * The timer persists in case we cannot enable interrupts, * or if we have previously seen seqno/interrupt incoherency * ("missed interrupt" syndrome, better known as a "missed breadcrumb"). * Here the worker will wake up every jiffie in order to kick the @@ -148,6 +149,12 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t) if (!b->irq_armed) return; + /* If the user has disabled the fake-irq, restore the hangchecking */ + if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) { + mod_timer(&b->hangcheck, wait_timeout()); + return; + } + mod_timer(&b->fake_irq, jiffies + 1); } @@ -831,8 +838,8 @@ static void cancel_fake_irq(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; + del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */ del_timer_sync(&b->hangcheck); - del_timer_sync(&b->fake_irq); clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); } @@ -840,15 +847,22 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - cancel_fake_irq(engine); spin_lock_irq(&b->irq_lock); + /* + * Leave the fake_irq timer enabled (if it is running), but clear the + * bit so that it turns itself off on its next wake up and goes back + * to the long hangcheck interval if still required. + */ + clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + if (b->irq_enabled) irq_enable(engine); else irq_disable(engine); - /* We set the IRQ_BREADCRUMB bit when we enable the irq presuming the + /* + * We set the IRQ_BREADCRUMB bit when we enable the irq presuming the * GPU is active and may have already executed the MI_USER_INTERRUPT * before the CPU is ready to receive. However, the engine is currently * idle (we haven't started it yet), there is no possibility for a @@ -857,9 +871,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) */ clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - if (b->irq_armed) - enable_fake_irq(b); - spin_unlock_irq(&b->irq_lock); } diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index 32d24c69da3c..704ddb4d3ca7 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -2302,9 +2302,44 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) return 0; } +static int skl_dpll0_vco(struct intel_atomic_state *intel_state) +{ + struct drm_i915_private *dev_priv = to_i915(intel_state->base.dev); + struct intel_crtc *crtc; + struct intel_crtc_state *crtc_state; + int vco, i; + + vco = intel_state->cdclk.logical.vco; + if (!vco) + vco = dev_priv->skl_preferred_vco_freq; + + for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) { + if (!crtc_state->base.enable) + continue; + + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) + continue; + + /* + * DPLL0 VCO may need to be adjusted to get the correct + * clock for eDP. This will affect cdclk as well. + */ + switch (crtc_state->port_clock / 2) { + case 108000: + case 216000: + vco = 8640000; + break; + default: + vco = 8100000; + break; + } + } + + return vco; +} + static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) { - struct drm_i915_private *dev_priv = to_i915(state->dev); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); int min_cdclk, cdclk, vco; @@ -2312,9 +2347,7 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) if (min_cdclk < 0) return min_cdclk; - vco = intel_state->cdclk.logical.vco; - if (!vco) - vco = dev_priv->skl_preferred_vco_freq; + vco = skl_dpll0_vco(intel_state); /* * FIXME should also account for plane ratio diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index f9550ea46c26..cf9b600cca79 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -298,7 +298,10 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, csr->version = css_header->version; - if (IS_CANNONLAKE(dev_priv)) { + if (csr->fw_path == i915_modparams.dmc_firmware_path) { + /* Bypass version check for firmware override. */ + required_version = csr->version; + } else if (IS_CANNONLAKE(dev_priv)) { required_version = CNL_CSR_VERSION_REQUIRED; } else if (IS_GEMINILAKE(dev_priv)) { required_version = GLK_CSR_VERSION_REQUIRED; @@ -453,7 +456,9 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv) if (!HAS_CSR(dev_priv)) return; - if (IS_CANNONLAKE(dev_priv)) + if (i915_modparams.dmc_firmware_path) + csr->fw_path = i915_modparams.dmc_firmware_path; + else if (IS_CANNONLAKE(dev_priv)) csr->fw_path = I915_CSR_CNL; else if (IS_GEMINILAKE(dev_priv)) csr->fw_path = I915_CSR_GLK; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 92cb26b18a9b..b98ac0541f19 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -870,6 +870,45 @@ cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) } } +static const struct icl_combo_phy_ddi_buf_trans * +icl_get_combo_buf_trans(struct drm_i915_private *dev_priv, enum port port, + int type, int *n_entries) +{ + u32 voltage = I915_READ(ICL_PORT_COMP_DW3(port)) & VOLTAGE_INFO_MASK; + + if (type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.low_vswing) { + switch (voltage) { + case VOLTAGE_INFO_0_85V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_85V); + return icl_combo_phy_ddi_translations_edp_0_85V; + case VOLTAGE_INFO_0_95V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_95V); + return icl_combo_phy_ddi_translations_edp_0_95V; + case VOLTAGE_INFO_1_05V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_1_05V); + return icl_combo_phy_ddi_translations_edp_1_05V; + default: + MISSING_CASE(voltage); + return NULL; + } + } else { + switch (voltage) { + case VOLTAGE_INFO_0_85V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_85V); + return icl_combo_phy_ddi_translations_dp_hdmi_0_85V; + case VOLTAGE_INFO_0_95V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_95V); + return icl_combo_phy_ddi_translations_dp_hdmi_0_95V; + case VOLTAGE_INFO_1_05V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_1_05V); + return icl_combo_phy_ddi_translations_dp_hdmi_1_05V; + default: + MISSING_CASE(voltage); + return NULL; + } + } +} + static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) { int n_entries, level, default_entry; @@ -1013,6 +1052,25 @@ static uint32_t hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll) } } +static uint32_t icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, + const struct intel_shared_dpll *pll) +{ + const enum intel_dpll_id id = pll->info->id; + + switch (id) { + default: + MISSING_CASE(id); + case DPLL_ID_ICL_DPLL0: + case DPLL_ID_ICL_DPLL1: + return DDI_CLK_SEL_NONE; + case DPLL_ID_ICL_MGPLL1: + case DPLL_ID_ICL_MGPLL2: + case DPLL_ID_ICL_MGPLL3: + case DPLL_ID_ICL_MGPLL4: + return DDI_CLK_SEL_MG; + } +} + /* Starting with Haswell, different DDI ports can work in FDI mode for * connection to the PCH-located connectors. For this, it is necessary to train * both the DDI port and PCH receiver for the desired DDI buffer settings. @@ -2025,7 +2083,13 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) enum port port = encoder->port; int n_entries; - if (IS_CANNONLAKE(dev_priv)) { + if (IS_ICELAKE(dev_priv)) { + if (port == PORT_A || port == PORT_B) + icl_get_combo_buf_trans(dev_priv, port, encoder->type, + &n_entries); + else + n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations); + } else if (IS_CANNONLAKE(dev_priv)) { if (encoder->type == INTEL_OUTPUT_EDP) cnl_get_buf_trans_edp(dev_priv, &n_entries); else @@ -2182,6 +2246,146 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, I915_WRITE(CNL_PORT_TX_DW5_GRP(port), val); } +static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv, + u32 level, enum port port, int type) +{ + const struct icl_combo_phy_ddi_buf_trans *ddi_translations = NULL; + u32 n_entries, val; + int ln; + + ddi_translations = icl_get_combo_buf_trans(dev_priv, port, type, + &n_entries); + if (!ddi_translations) + return; + + if (level >= n_entries) { + DRM_DEBUG_KMS("DDI translation not found for level %d. Using %d instead.", level, n_entries - 1); + level = n_entries - 1; + } + + /* Set PORT_TX_DW5 Rterm Sel to 110b. */ + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + val &= ~RTERM_SELECT_MASK; + val |= RTERM_SELECT(0x6); + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); + + /* Program PORT_TX_DW5 */ + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + /* Set DisableTap2 and DisableTap3 if MIPI DSI + * Clear DisableTap2 and DisableTap3 for all other Ports + */ + if (type == INTEL_OUTPUT_DSI) { + val |= TAP2_DISABLE; + val |= TAP3_DISABLE; + } else { + val &= ~TAP2_DISABLE; + val &= ~TAP3_DISABLE; + } + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); + + /* Program PORT_TX_DW2 */ + val = I915_READ(ICL_PORT_TX_DW2_LN0(port)); + val &= ~(SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK | + RCOMP_SCALAR_MASK); + val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_select); + val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_select); + /* Program Rcomp scalar for every table entry */ + val |= RCOMP_SCALAR(ddi_translations[level].dw2_swing_scalar); + I915_WRITE(ICL_PORT_TX_DW2_GRP(port), val); + + /* Program PORT_TX_DW4 */ + /* We cannot write to GRP. It would overwrite individual loadgen. */ + for (ln = 0; ln <= 3; ln++) { + val = I915_READ(ICL_PORT_TX_DW4_LN(port, ln)); + val &= ~(POST_CURSOR_1_MASK | POST_CURSOR_2_MASK | + CURSOR_COEFF_MASK); + val |= ddi_translations[level].dw4_scaling; + I915_WRITE(ICL_PORT_TX_DW4_LN(port, ln), val); + } +} + +static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder, + u32 level, + enum intel_output_type type) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; + int width = 0; + int rate = 0; + u32 val; + int ln = 0; + + if (type == INTEL_OUTPUT_HDMI) { + width = 4; + /* Rate is always < than 6GHz for HDMI */ + } else { + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + + width = intel_dp->lane_count; + rate = intel_dp->link_rate; + } + + /* + * 1. If port type is eDP or DP, + * set PORT_PCS_DW1 cmnkeeper_enable to 1b, + * else clear to 0b. + */ + val = I915_READ(ICL_PORT_PCS_DW1_LN0(port)); + if (type == INTEL_OUTPUT_HDMI) + val &= ~COMMON_KEEPER_EN; + else + val |= COMMON_KEEPER_EN; + I915_WRITE(ICL_PORT_PCS_DW1_GRP(port), val); + + /* 2. Program loadgen select */ + /* + * Program PORT_TX_DW4_LN depending on Bit rate and used lanes + * <= 6 GHz and 4 lanes (LN0=0, LN1=1, LN2=1, LN3=1) + * <= 6 GHz and 1,2 lanes (LN0=0, LN1=1, LN2=1, LN3=0) + * > 6 GHz (LN0=0, LN1=0, LN2=0, LN3=0) + */ + for (ln = 0; ln <= 3; ln++) { + val = I915_READ(ICL_PORT_TX_DW4_LN(port, ln)); + val &= ~LOADGEN_SELECT; + + if ((rate <= 600000 && width == 4 && ln >= 1) || + (rate <= 600000 && width < 4 && (ln == 1 || ln == 2))) { + val |= LOADGEN_SELECT; + } + I915_WRITE(ICL_PORT_TX_DW4_LN(port, ln), val); + } + + /* 3. Set PORT_CL_DW5 SUS Clock Config to 11b */ + val = I915_READ(ICL_PORT_CL_DW5(port)); + val |= SUS_CLOCK_CONFIG; + I915_WRITE(ICL_PORT_CL_DW5(port), val); + + /* 4. Clear training enable to change swing values */ + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + val &= ~TX_TRAINING_EN; + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); + + /* 5. Program swing and de-emphasis */ + icl_ddi_combo_vswing_program(dev_priv, level, port, type); + + /* 6. Set training enable to trigger update */ + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + val |= TX_TRAINING_EN; + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); +} + +static void icl_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level, + enum intel_output_type type) +{ + enum port port = encoder->port; + + if (port == PORT_A || port == PORT_B) + icl_combo_phy_ddi_vswing_sequence(encoder, level, type); + else + /* Not Implemented Yet */ + WARN_ON(1); +} + static uint32_t translate_signal_level(int signal_levels) { int i; @@ -2213,7 +2417,9 @@ u32 bxt_signal_levels(struct intel_dp *intel_dp) struct intel_encoder *encoder = &dport->base; int level = intel_ddi_dp_level(intel_dp); - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + icl_ddi_vswing_sequence(encoder, level, encoder->type); + else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, level, encoder->type); else bxt_ddi_vswing_sequence(encoder, level, encoder->type); @@ -2234,6 +2440,69 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) return DDI_BUF_TRANS_SELECT(level); } +void icl_map_plls_to_ports(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state) +{ + struct intel_shared_dpll *pll = crtc_state->shared_dpll; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct drm_connector_state *conn_state; + struct drm_connector *conn; + int i; + + for_each_new_connector_in_state(old_state, conn, conn_state, i) { + struct intel_encoder *encoder = + to_intel_encoder(conn_state->best_encoder); + enum port port = encoder->port; + uint32_t val; + + if (conn_state->crtc != crtc) + continue; + + mutex_lock(&dev_priv->dpll_lock); + + val = I915_READ(DPCLKA_CFGCR0_ICL); + WARN_ON((val & DPCLKA_CFGCR0_DDI_CLK_OFF(port)) == 0); + + if (port == PORT_A || port == PORT_B) { + val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); + val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, port); + I915_WRITE(DPCLKA_CFGCR0_ICL, val); + POSTING_READ(DPCLKA_CFGCR0_ICL); + } + + val &= ~DPCLKA_CFGCR0_DDI_CLK_OFF(port); + I915_WRITE(DPCLKA_CFGCR0_ICL, val); + + mutex_unlock(&dev_priv->dpll_lock); + } +} + +void icl_unmap_plls_to_ports(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct drm_connector_state *old_conn_state; + struct drm_connector *conn; + int i; + + for_each_old_connector_in_state(old_state, conn, old_conn_state, i) { + struct intel_encoder *encoder = + to_intel_encoder(old_conn_state->best_encoder); + enum port port = encoder->port; + + if (old_conn_state->crtc != crtc) + continue; + + mutex_lock(&dev_priv->dpll_lock); + I915_WRITE(DPCLKA_CFGCR0_ICL, + I915_READ(DPCLKA_CFGCR0_ICL) | + DPCLKA_CFGCR0_DDI_CLK_OFF(port)); + mutex_unlock(&dev_priv->dpll_lock); + } +} + static void intel_ddi_clk_select(struct intel_encoder *encoder, const struct intel_shared_dpll *pll) { @@ -2246,7 +2515,11 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, mutex_lock(&dev_priv->dpll_lock); - if (IS_CANNONLAKE(dev_priv)) { + if (IS_ICELAKE(dev_priv)) { + if (port >= PORT_C) + I915_WRITE(DDI_CLK_SEL(port), + icl_pll_to_ddi_pll_sel(encoder, pll)); + } else if (IS_CANNONLAKE(dev_priv)) { /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ val = I915_READ(DPCLKA_CFGCR0); val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); @@ -2284,14 +2557,18 @@ static void intel_ddi_clk_disable(struct intel_encoder *encoder) struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) { + if (port >= PORT_C) + I915_WRITE(DDI_CLK_SEL(port), DDI_CLK_SEL_NONE); + } else if (IS_CANNONLAKE(dev_priv)) { I915_WRITE(DPCLKA_CFGCR0, I915_READ(DPCLKA_CFGCR0) | DPCLKA_CFGCR0_DDI_CLK_OFF(port)); - else if (IS_GEN9_BC(dev_priv)) + } else if (IS_GEN9_BC(dev_priv)) { I915_WRITE(DPLL_CTRL2, I915_READ(DPLL_CTRL2) | DPLL_CTRL2_DDI_CLK_OFF(port)); - else if (INTEL_GEN(dev_priv) < 9) + } else if (INTEL_GEN(dev_priv) < 9) { I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE); + } } static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, @@ -2316,7 +2593,9 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + icl_ddi_vswing_sequence(encoder, level, encoder->type); + else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, level, encoder->type); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(encoder, level, encoder->type); @@ -2347,7 +2626,9 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + icl_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); + else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index a32ba72c514e..0fd13df424cf 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -848,7 +848,7 @@ void intel_device_info_runtime_init(struct intel_device_info *info) gen9_sseu_info_init(dev_priv); else if (INTEL_GEN(dev_priv) == 10) gen10_sseu_info_init(dev_priv); - else if (INTEL_INFO(dev_priv)->gen >= 11) + else if (INTEL_GEN(dev_priv) >= 11) gen11_sseu_info_init(dev_priv); /* Initialize command stream timestamp frequency */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 020900e08d42..ad588d564198 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -88,6 +88,22 @@ static const uint32_t skl_primary_formats[] = { DRM_FORMAT_VYUY, }; +static const uint32_t skl_pri_planar_formats[] = { + DRM_FORMAT_C8, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_XBGR2101010, + DRM_FORMAT_YUYV, + DRM_FORMAT_YVYU, + DRM_FORMAT_UYVY, + DRM_FORMAT_VYUY, + DRM_FORMAT_NV12, +}; + static const uint64_t skl_format_modifiers_noccs[] = { I915_FORMAT_MOD_Yf_TILED, I915_FORMAT_MOD_Y_TILED, @@ -489,9 +505,21 @@ static const struct intel_limit intel_limits_bxt = { }; static void +skl_wa_528(struct drm_i915_private *dev_priv, int pipe, bool enable) +{ + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return; + + if (enable) + I915_WRITE(CHICKEN_PIPESL_1(pipe), HSW_FBCQ_DIS); + else + I915_WRITE(CHICKEN_PIPESL_1(pipe), 0); +} + +static void skl_wa_clkgate(struct drm_i915_private *dev_priv, int pipe, bool enable) { - if (IS_SKYLAKE(dev_priv)) + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) return; if (enable) @@ -3090,6 +3118,29 @@ static int skl_check_main_surface(const struct intel_crtc_state *crtc_state, return 0; } +static int +skl_check_nv12_surface(const struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state) +{ + /* Display WA #1106 */ + if (plane_state->base.rotation != + (DRM_MODE_REFLECT_X | DRM_MODE_ROTATE_90) && + plane_state->base.rotation != DRM_MODE_ROTATE_270) + return 0; + + /* + * src coordinates are rotated here. + * We check height but report it as width + */ + if (((drm_rect_height(&plane_state->base.src) >> 16) % 4) != 0) { + DRM_DEBUG_KMS("src width must be multiple " + "of 4 for rotated NV12\n"); + return -EINVAL; + } + + return 0; +} + static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->base.fb; @@ -3173,6 +3224,9 @@ int skl_check_plane_surface(const struct intel_crtc_state *crtc_state, * the main surface setup depends on it. */ if (fb->format->format == DRM_FORMAT_NV12) { + ret = skl_check_nv12_surface(crtc_state, plane_state); + if (ret) + return ret; ret = skl_check_nv12_aux_surface(plane_state); if (ret) return ret; @@ -3623,11 +3677,15 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); const struct drm_framebuffer *fb = plane_state->base.fb; u32 plane_color_ctl = 0; - plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE; - plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE; + if (INTEL_GEN(dev_priv) < 11) { + plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE; + plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE; + } plane_color_ctl |= PLANE_COLOR_PLANE_GAMMA_DISABLE; plane_color_ctl |= glk_plane_color_ctl_alpha(fb->format->format); @@ -4790,8 +4848,7 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, } if (plane_scaler_check && pixel_format == DRM_FORMAT_NV12 && - (src_h < SKL_MIN_YUV_420_SRC_H || (src_w % 4) != 0 || - (src_h % 4) != 0)) { + (src_h < SKL_MIN_YUV_420_SRC_H || src_w < SKL_MIN_YUV_420_SRC_W)) { DRM_DEBUG_KMS("NV12: src dimensions not met\n"); return -EINVAL; } @@ -5138,6 +5195,22 @@ static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_s return !old_crtc_state->ips_enabled; } +static bool needs_nv12_wa(struct drm_i915_private *dev_priv, + const struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->nv12_planes) + return false; + + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return false; + + if ((INTEL_GEN(dev_priv) == 9 && !IS_GEMINILAKE(dev_priv)) || + IS_CANNONLAKE(dev_priv)) + return true; + + return false; +} + static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) { struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); @@ -5162,7 +5235,6 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) if (old_primary_state) { struct drm_plane_state *new_primary_state = drm_atomic_get_new_plane_state(old_state, primary); - struct drm_framebuffer *fb = new_primary_state->fb; intel_fbc_post_update(crtc); @@ -5170,14 +5242,13 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) (needs_modeset(&pipe_config->base) || !old_primary_state->visible)) intel_post_enable_primary(&crtc->base, pipe_config); + } - /* Display WA 827 */ - if ((INTEL_GEN(dev_priv) == 9 && !IS_GEMINILAKE(dev_priv)) || - IS_CANNONLAKE(dev_priv)) { - if (fb && fb->format->format == DRM_FORMAT_NV12) - skl_wa_clkgate(dev_priv, crtc->pipe, false); - } - + /* Display WA 827 */ + if (needs_nv12_wa(dev_priv, old_crtc_state) && + !needs_nv12_wa(dev_priv, pipe_config)) { + skl_wa_clkgate(dev_priv, crtc->pipe, false); + skl_wa_528(dev_priv, crtc->pipe, false); } } @@ -5202,14 +5273,6 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, struct intel_plane_state *new_primary_state = intel_atomic_get_new_plane_state(old_intel_state, to_intel_plane(primary)); - struct drm_framebuffer *fb = new_primary_state->base.fb; - - /* Display WA 827 */ - if ((INTEL_GEN(dev_priv) == 9 && !IS_GEMINILAKE(dev_priv)) || - IS_CANNONLAKE(dev_priv)) { - if (fb && fb->format->format == DRM_FORMAT_NV12) - skl_wa_clkgate(dev_priv, crtc->pipe, true); - } intel_fbc_pre_update(crtc, pipe_config, new_primary_state); /* @@ -5221,6 +5284,13 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); } + /* Display WA 827 */ + if (!needs_nv12_wa(dev_priv, old_crtc_state) && + needs_nv12_wa(dev_priv, pipe_config)) { + skl_wa_clkgate(dev_priv, crtc->pipe, true); + skl_wa_528(dev_priv, crtc->pipe, true); + } + /* * Vblank time updates from the shadow to live plane control register * are blocked if the memory self-refresh mode is active at that @@ -5555,6 +5625,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, if (intel_crtc->config->shared_dpll) intel_enable_shared_dpll(intel_crtc); + if (INTEL_GEN(dev_priv) >= 11) + icl_map_plls_to_ports(crtc, pipe_config, old_state); + if (intel_crtc_has_dp_encoder(intel_crtc->config)) intel_dp_set_m_n(intel_crtc, M1_N1); @@ -5752,6 +5825,9 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_ddi_disable_pipe_clock(intel_crtc->config); intel_encoders_post_disable(crtc, old_crtc_state, old_state); + + if (INTEL_GEN(dev_priv) >= 11) + icl_unmap_plls_to_ports(crtc, old_crtc_state, old_state); } static void i9xx_pfit_enable(struct intel_crtc *crtc) @@ -11143,39 +11219,42 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && !(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED); -#define PIPE_CONF_CHECK_X(name) \ +#define PIPE_CONF_CHECK_X(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ "(expected 0x%08x, found 0x%08x)\n", \ current_config->name, \ pipe_config->name); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_I(name) \ +#define PIPE_CONF_CHECK_I(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ "(expected %i, found %i)\n", \ current_config->name, \ pipe_config->name); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_BOOL(name) \ +#define PIPE_CONF_CHECK_BOOL(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ "(expected %s, found %s)\n", \ yesno(current_config->name), \ yesno(pipe_config->name)); \ ret = false; \ - } + } \ +} while (0) /* * Checks state where we only read out the enabling, but not the entire * state itself (like full infoframes or ELD for audio). These states * require a full modeset on bootup to fix up. */ -#define PIPE_CONF_CHECK_BOOL_INCOMPLETE(name) \ +#define PIPE_CONF_CHECK_BOOL_INCOMPLETE(name) do { \ if (!fixup_inherited || (!current_config->name && !pipe_config->name)) { \ PIPE_CONF_CHECK_BOOL(name); \ } else { \ @@ -11184,18 +11263,20 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, yesno(current_config->name), \ yesno(pipe_config->name)); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_P(name) \ +#define PIPE_CONF_CHECK_P(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ "(expected %p, found %p)\n", \ current_config->name, \ pipe_config->name); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_M_N(name) \ +#define PIPE_CONF_CHECK_M_N(name) do { \ if (!intel_compare_link_m_n(¤t_config->name, \ &pipe_config->name,\ adjust)) { \ @@ -11213,14 +11294,15 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, pipe_config->name.link_m, \ pipe_config->name.link_n); \ ret = false; \ - } + } \ +} while (0) /* This is required for BDW+ where there is only one set of registers for * switching between high and low RR. * This macro can be used whenever a comparison has to be made between one * hw state and multiple sw state variables. */ -#define PIPE_CONF_CHECK_M_N_ALT(name, alt_name) \ +#define PIPE_CONF_CHECK_M_N_ALT(name, alt_name) do { \ if (!intel_compare_link_m_n(¤t_config->name, \ &pipe_config->name, adjust) && \ !intel_compare_link_m_n(¤t_config->alt_name, \ @@ -11245,9 +11327,10 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, pipe_config->name.link_m, \ pipe_config->name.link_n); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_FLAGS(name, mask) \ +#define PIPE_CONF_CHECK_FLAGS(name, mask) do { \ if ((current_config->name ^ pipe_config->name) & (mask)) { \ pipe_config_err(adjust, __stringify(name), \ "(%x) (expected %i, found %i)\n", \ @@ -11255,16 +11338,18 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, current_config->name & (mask), \ pipe_config->name & (mask)); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_CLOCK_FUZZY(name) \ +#define PIPE_CONF_CHECK_CLOCK_FUZZY(name) do { \ if (!intel_fuzzy_clock_check(current_config->name, pipe_config->name)) { \ pipe_config_err(adjust, __stringify(name), \ "(expected %i, found %i)\n", \ current_config->name, \ pipe_config->name); \ ret = false; \ - } + } \ +} while (0) #define PIPE_CONF_QUIRK(quirk) \ ((current_config->quirks | pipe_config->quirks) & (quirk)) @@ -11373,6 +11458,16 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_X(dpll_hw_state.pll9); PIPE_CONF_CHECK_X(dpll_hw_state.pll10); PIPE_CONF_CHECK_X(dpll_hw_state.pcsdw12); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_refclkin_ctl); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_clktop2_coreclkctl1); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_clktop2_hsclkctl); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_div0); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_div1); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_lf); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_frac_lock); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_ssc); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_bias); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_tdc_coldst_bias); PIPE_CONF_CHECK_X(dsi_pll.ctrl); PIPE_CONF_CHECK_X(dsi_pll.div); @@ -11436,6 +11531,11 @@ static void verify_wm_state(struct drm_crtc *crtc, skl_ddb_get_hw_state(dev_priv, &hw_ddb); sw_ddb = &dev_priv->wm.skl_hw.ddb; + if (INTEL_GEN(dev_priv) >= 11) + if (hw_ddb.enabled_slices != sw_ddb->enabled_slices) + DRM_ERROR("mismatch in DBUF Slices (expected %u, got %u)\n", + sw_ddb->enabled_slices, + hw_ddb.enabled_slices); /* planes */ for_each_universal_plane(dev_priv, pipe, plane) { hw_plane_wm = &hw_wm.planes[plane]; @@ -12242,6 +12342,8 @@ static void skl_update_crtcs(struct drm_atomic_state *state) bool progress; enum pipe pipe; int i; + u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; + u8 required_slices = intel_state->wm_results.ddb.enabled_slices; const struct skl_ddb_entry *entries[I915_MAX_PIPES] = {}; @@ -12250,6 +12352,10 @@ static void skl_update_crtcs(struct drm_atomic_state *state) if (new_crtc_state->active) entries[i] = &to_intel_crtc_state(old_crtc_state)->wm.skl.ddb; + /* If 2nd DBuf slice required, enable it here */ + if (INTEL_GEN(dev_priv) >= 11 && required_slices > hw_enabled_slices) + icl_dbuf_slices_update(dev_priv, required_slices); + /* * Whenever the number of active pipes changes, we need to make sure we * update the pipes in the right order so that their ddb allocations @@ -12300,6 +12406,10 @@ static void skl_update_crtcs(struct drm_atomic_state *state) progress = true; } } while (progress); + + /* If 2nd DBuf slice is no more required disable it */ + if (INTEL_GEN(dev_priv) >= 11 && required_slices < hw_enabled_slices) + icl_dbuf_slices_update(dev_priv, required_slices); } static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv) @@ -12763,6 +12873,15 @@ static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) intel_unpin_fb_vma(vma, old_plane_state->flags); } +static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) +{ + struct i915_sched_attr attr = { + .priority = I915_PRIORITY_DISPLAY, + }; + + i915_gem_object_wait_priority(obj, 0, &attr); +} + /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @plane: drm plane to prepare for @@ -12839,7 +12958,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); - i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); + fb_obj_bump_render_priority(obj); mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_object_unpin_pages(obj); @@ -13115,6 +13234,7 @@ static bool skl_mod_supported(uint32_t format, uint64_t modifier) case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: case DRM_FORMAT_VYUY: + case DRM_FORMAT_NV12: if (modifier == I915_FORMAT_MOD_Yf_TILED) return true; /* fall through */ @@ -13322,6 +13442,30 @@ static bool skl_plane_has_fbc(struct drm_i915_private *dev_priv, return pipe == PIPE_A && plane_id == PLANE_PRIMARY; } +bool skl_plane_has_planar(struct drm_i915_private *dev_priv, + enum pipe pipe, enum plane_id plane_id) +{ + if (plane_id == PLANE_PRIMARY) { + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return false; + else if ((INTEL_GEN(dev_priv) == 9 && pipe == PIPE_C) && + !IS_GEMINILAKE(dev_priv)) + return false; + } else if (plane_id >= PLANE_SPRITE0) { + if (plane_id == PLANE_CURSOR) + return false; + if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) == 10) { + if (plane_id != PLANE_SPRITE0) + return false; + } else { + if (plane_id != PLANE_SPRITE0 || pipe == PIPE_C || + IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return false; + } + } + return true; +} + static struct intel_plane * intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) { @@ -13382,8 +13526,13 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->check_plane = intel_check_primary_plane; if (INTEL_GEN(dev_priv) >= 9) { - intel_primary_formats = skl_primary_formats; - num_formats = ARRAY_SIZE(skl_primary_formats); + if (skl_plane_has_planar(dev_priv, pipe, PLANE_PRIMARY)) { + intel_primary_formats = skl_pri_planar_formats; + num_formats = ARRAY_SIZE(skl_pri_planar_formats); + } else { + intel_primary_formats = skl_primary_formats; + num_formats = ARRAY_SIZE(skl_primary_formats); + } if (skl_plane_has_ccs(dev_priv, pipe, PLANE_PRIMARY)) modifiers = skl_format_modifiers_ccs; @@ -14204,6 +14353,20 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, goto err; } break; + case DRM_FORMAT_NV12: + if (mode_cmd->modifier[0] == I915_FORMAT_MOD_Y_TILED_CCS || + mode_cmd->modifier[0] == I915_FORMAT_MOD_Yf_TILED_CCS) { + DRM_DEBUG_KMS("RC not to be enabled with NV12\n"); + goto err; + } + if (INTEL_GEN(dev_priv) < 9 || IS_SKYLAKE(dev_priv) || + IS_BROXTON(dev_priv)) { + DRM_DEBUG_KMS("unsupported pixel format: %s\n", + drm_get_format_name(mode_cmd->pixel_format, + &format_name)); + goto err; + } + break; default: DRM_DEBUG_KMS("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); @@ -14216,6 +14379,14 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd); + if (fb->format->format == DRM_FORMAT_NV12 && + (fb->width < SKL_MIN_YUV_420_SRC_W || + fb->height < SKL_MIN_YUV_420_SRC_H || + (fb->width % 4) != 0 || (fb->height % 4) != 0)) { + DRM_DEBUG_KMS("src dimensions not correct for NV12\n"); + return -EINVAL; + } + for (i = 0; i < fb->format->num_planes; i++) { u32 stride_alignment; @@ -15270,6 +15441,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) memset(&crtc->base.mode, 0, sizeof(crtc->base.mode)); if (crtc_state->base.active) { intel_mode_from_pipe_config(&crtc->base.mode, crtc_state); + crtc->base.mode.hdisplay = crtc_state->pipe_src_w; + crtc->base.mode.vdisplay = crtc_state->pipe_src_h; intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, crtc_state); WARN_ON(drm_atomic_set_mode_for_crtc(crtc->base.state, &crtc->base.mode)); diff --git a/drivers/gpu/drm/i915/intel_display.h b/drivers/gpu/drm/i915/intel_display.h index 4e7418b345bc..2ef31617614a 100644 --- a/drivers/gpu/drm/i915/intel_display.h +++ b/drivers/gpu/drm/i915/intel_display.h @@ -218,6 +218,10 @@ struct intel_link_m_n { for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \ for_each_if((__mask) & BIT(__p)) +#define for_each_cpu_transcoder_masked(__dev_priv, __t, __mask) \ + for ((__t) = 0; (__t) < I915_MAX_TRANSCODERS; (__t)++) \ + for_each_if ((__mask) & (1 << (__t))) + #define for_each_universal_plane(__dev_priv, __pipe, __p) \ for ((__p) = 0; \ (__p) < INTEL_INFO(__dev_priv)->num_sprites[(__pipe)] + 1; \ diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 62f82c4298ac..dde92e4af5d3 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1647,9 +1647,17 @@ void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, } } +struct link_config_limits { + int min_clock, max_clock; + int min_lane_count, max_lane_count; + int min_bpp, max_bpp; +}; + static int intel_dp_compute_bpp(struct intel_dp *intel_dp, struct intel_crtc_state *pipe_config) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + struct intel_connector *intel_connector = intel_dp->attached_connector; int bpp, bpc; bpp = pipe_config->pipe_bpp; @@ -1658,13 +1666,16 @@ static int intel_dp_compute_bpp(struct intel_dp *intel_dp, if (bpc > 0) bpp = min(bpp, 3*bpc); - /* For DP Compliance we override the computed bpp for the pipe */ - if (intel_dp->compliance.test_data.bpc != 0) { - pipe_config->pipe_bpp = 3*intel_dp->compliance.test_data.bpc; - pipe_config->dither_force_disable = pipe_config->pipe_bpp == 6*3; - DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", - pipe_config->pipe_bpp); + if (intel_dp_is_edp(intel_dp)) { + /* Get bpp from vbt only for panels that dont have bpp in edid */ + if (intel_connector->base.display_info.bpc == 0 && + dev_priv->vbt.edp.bpp && dev_priv->vbt.edp.bpp < bpp) { + DRM_DEBUG_KMS("clamping bpp for eDP panel to BIOS-provided %i\n", + dev_priv->vbt.edp.bpp); + bpp = dev_priv->vbt.edp.bpp; + } } + return bpp; } @@ -1685,6 +1696,142 @@ static bool intel_edp_compare_alt_mode(struct drm_display_mode *m1, return bres; } +/* Adjust link config limits based on compliance test requests. */ +static void +intel_dp_adjust_compliance_config(struct intel_dp *intel_dp, + struct intel_crtc_state *pipe_config, + struct link_config_limits *limits) +{ + /* For DP Compliance we override the computed bpp for the pipe */ + if (intel_dp->compliance.test_data.bpc != 0) { + int bpp = 3 * intel_dp->compliance.test_data.bpc; + + limits->min_bpp = limits->max_bpp = bpp; + pipe_config->dither_force_disable = bpp == 6 * 3; + + DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", bpp); + } + + /* Use values requested by Compliance Test Request */ + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { + int index; + + /* Validate the compliance test data since max values + * might have changed due to link train fallback. + */ + if (intel_dp_link_params_valid(intel_dp, intel_dp->compliance.test_link_rate, + intel_dp->compliance.test_lane_count)) { + index = intel_dp_rate_index(intel_dp->common_rates, + intel_dp->num_common_rates, + intel_dp->compliance.test_link_rate); + if (index >= 0) + limits->min_clock = limits->max_clock = index; + limits->min_lane_count = limits->max_lane_count = + intel_dp->compliance.test_lane_count; + } + } +} + +/* Optimize link config in order: max bpp, min clock, min lanes */ +static bool +intel_dp_compute_link_config_wide(struct intel_dp *intel_dp, + struct intel_crtc_state *pipe_config, + const struct link_config_limits *limits) +{ + struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + int bpp, clock, lane_count; + int mode_rate, link_clock, link_avail; + + for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) { + mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, + bpp); + + for (clock = limits->min_clock; clock <= limits->max_clock; clock++) { + for (lane_count = limits->min_lane_count; + lane_count <= limits->max_lane_count; + lane_count <<= 1) { + link_clock = intel_dp->common_rates[clock]; + link_avail = intel_dp_max_data_rate(link_clock, + lane_count); + + if (mode_rate <= link_avail) { + pipe_config->lane_count = lane_count; + pipe_config->pipe_bpp = bpp; + pipe_config->port_clock = link_clock; + + return true; + } + } + } + } + + return false; +} + +static bool +intel_dp_compute_link_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config) +{ + struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct link_config_limits limits; + int common_len; + + common_len = intel_dp_common_len_rate_limit(intel_dp, + intel_dp->max_link_rate); + + /* No common link rates between source and sink */ + WARN_ON(common_len <= 0); + + limits.min_clock = 0; + limits.max_clock = common_len - 1; + + limits.min_lane_count = 1; + limits.max_lane_count = intel_dp_max_lane_count(intel_dp); + + limits.min_bpp = 6 * 3; + limits.max_bpp = intel_dp_compute_bpp(intel_dp, pipe_config); + + if (intel_dp_is_edp(intel_dp)) { + /* + * Use the maximum clock and number of lanes the eDP panel + * advertizes being capable of. The panels are generally + * designed to support only a single clock and lane + * configuration, and typically these values correspond to the + * native resolution of the panel. + */ + limits.min_lane_count = limits.max_lane_count; + limits.min_clock = limits.max_clock; + } + + intel_dp_adjust_compliance_config(intel_dp, pipe_config, &limits); + + DRM_DEBUG_KMS("DP link computation with max lane count %i " + "max rate %d max bpp %d pixel clock %iKHz\n", + limits.max_lane_count, + intel_dp->common_rates[limits.max_clock], + limits.max_bpp, adjusted_mode->crtc_clock); + + /* + * Optimize for slow and wide. This is the place to add alternative + * optimization policy. + */ + if (!intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits)) + return false; + + DRM_DEBUG_KMS("DP lane count %d clock %d bpp %d\n", + pipe_config->lane_count, pipe_config->port_clock, + pipe_config->pipe_bpp); + + DRM_DEBUG_KMS("DP link rate required %i available %i\n", + intel_dp_link_required(adjusted_mode->crtc_clock, + pipe_config->pipe_bpp), + intel_dp_max_data_rate(pipe_config->port_clock, + pipe_config->lane_count)); + + return true; +} + bool intel_dp_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, @@ -1698,27 +1845,9 @@ intel_dp_compute_config(struct intel_encoder *encoder, struct intel_connector *intel_connector = intel_dp->attached_connector; struct intel_digital_connector_state *intel_conn_state = to_intel_digital_connector_state(conn_state); - int lane_count, clock; - int min_lane_count = 1; - int max_lane_count = intel_dp_max_lane_count(intel_dp); - /* Conveniently, the link BW constants become indices with a shift...*/ - int min_clock = 0; - int max_clock; - int bpp, mode_rate; - int link_avail, link_clock; - int common_len; - uint8_t link_bw, rate_select; bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc, DP_DPCD_QUIRK_LIMITED_M_N); - common_len = intel_dp_common_len_rate_limit(intel_dp, - intel_dp->max_link_rate); - - /* No common link rates between source and sink */ - WARN_ON(common_len <= 0); - - max_clock = common_len - 1; - if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv) && port != PORT_A) pipe_config->has_pch_encoder = true; @@ -1744,6 +1873,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (INTEL_GEN(dev_priv) >= 9) { int ret; + ret = skl_update_scaler_crtc(pipe_config); if (ret) return ret; @@ -1764,75 +1894,9 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return false; - /* Use values requested by Compliance Test Request */ - if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { - int index; - - /* Validate the compliance test data since max values - * might have changed due to link train fallback. - */ - if (intel_dp_link_params_valid(intel_dp, intel_dp->compliance.test_link_rate, - intel_dp->compliance.test_lane_count)) { - index = intel_dp_rate_index(intel_dp->common_rates, - intel_dp->num_common_rates, - intel_dp->compliance.test_link_rate); - if (index >= 0) - min_clock = max_clock = index; - min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count; - } - } - DRM_DEBUG_KMS("DP link computation with max lane count %i " - "max bw %d pixel clock %iKHz\n", - max_lane_count, intel_dp->common_rates[max_clock], - adjusted_mode->crtc_clock); - - /* Walk through all bpp values. Luckily they're all nicely spaced with 2 - * bpc in between. */ - bpp = intel_dp_compute_bpp(intel_dp, pipe_config); - if (intel_dp_is_edp(intel_dp)) { - - /* Get bpp from vbt only for panels that dont have bpp in edid */ - if (intel_connector->base.display_info.bpc == 0 && - (dev_priv->vbt.edp.bpp && dev_priv->vbt.edp.bpp < bpp)) { - DRM_DEBUG_KMS("clamping bpp for eDP panel to BIOS-provided %i\n", - dev_priv->vbt.edp.bpp); - bpp = dev_priv->vbt.edp.bpp; - } - - /* - * Use the maximum clock and number of lanes the eDP panel - * advertizes being capable of. The panels are generally - * designed to support only a single clock and lane - * configuration, and typically these values correspond to the - * native resolution of the panel. - */ - min_lane_count = max_lane_count; - min_clock = max_clock; - } - - for (; bpp >= 6*3; bpp -= 2*3) { - mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, - bpp); - - for (clock = min_clock; clock <= max_clock; clock++) { - for (lane_count = min_lane_count; - lane_count <= max_lane_count; - lane_count <<= 1) { - - link_clock = intel_dp->common_rates[clock]; - link_avail = intel_dp_max_data_rate(link_clock, - lane_count); - - if (mode_rate <= link_avail) { - goto found; - } - } - } - } - - return false; + if (!intel_dp_compute_link_config(encoder, pipe_config)) + return false; -found: if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { /* * See: @@ -1840,7 +1904,7 @@ found: * VESA DisplayPort Ver.1.2a - 5.1.1.1 Video Colorimetry */ pipe_config->limited_color_range = - bpp != 18 && + pipe_config->pipe_bpp != 18 && drm_default_rgb_quant_range(adjusted_mode) == HDMI_QUANTIZATION_RANGE_LIMITED; } else { @@ -1848,21 +1912,7 @@ found: intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED; } - pipe_config->lane_count = lane_count; - - pipe_config->pipe_bpp = bpp; - pipe_config->port_clock = intel_dp->common_rates[clock]; - - intel_dp_compute_rate(intel_dp, pipe_config->port_clock, - &link_bw, &rate_select); - - DRM_DEBUG_KMS("DP link bw %02x rate select %02x lane count %d clock %d bpp %d\n", - link_bw, rate_select, pipe_config->lane_count, - pipe_config->port_clock, bpp); - DRM_DEBUG_KMS("DP link bw required %i available %i\n", - mode_rate, link_avail); - - intel_link_compute_m_n(bpp, lane_count, + intel_link_compute_m_n(pipe_config->pipe_bpp, pipe_config->lane_count, adjusted_mode->crtc_clock, pipe_config->port_clock, &pipe_config->dp_m_n, @@ -1871,31 +1921,12 @@ found: if (intel_connector->panel.downclock_mode != NULL && dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) { pipe_config->has_drrs = true; - intel_link_compute_m_n(bpp, lane_count, - intel_connector->panel.downclock_mode->clock, - pipe_config->port_clock, - &pipe_config->dp_m2_n2, - reduce_m_n); - } - - /* - * DPLL0 VCO may need to be adjusted to get the correct - * clock for eDP. This will affect cdclk as well. - */ - if (intel_dp_is_edp(intel_dp) && IS_GEN9_BC(dev_priv)) { - int vco; - - switch (pipe_config->port_clock / 2) { - case 108000: - case 216000: - vco = 8640000; - break; - default: - vco = 8100000; - break; - } - - to_intel_atomic_state(pipe_config->base.state)->cdclk.logical.vco = vco; + intel_link_compute_m_n(pipe_config->pipe_bpp, + pipe_config->lane_count, + intel_connector->panel.downclock_mode->clock, + pipe_config->port_clock, + &pipe_config->dp_m2_n2, + reduce_m_n); } if (!HAS_DDI(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index f59b59bb0a21..3fcaa98b9055 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -139,6 +139,11 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp) intel_dp_compute_rate(intel_dp, intel_dp->link_rate, &link_bw, &rate_select); + if (link_bw) + DRM_DEBUG_KMS("Using LINK_BW_SET value %02x\n", link_bw); + else + DRM_DEBUG_KMS("Using LINK_RATE_SET value %02x\n", rate_select); + /* Write the link configuration data */ link_config[0] = link_bw; link_config[1] = intel_dp->lane_count; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index d5e114e9660b..383fbc15113d 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -2218,6 +2218,7 @@ cnl_ddi_calculate_wrpll(int clock, struct skl_wrpll_params *wrpll_params) { u32 afe_clock = clock * 5; + uint32_t ref_clock; u32 dco_min = 7998000; u32 dco_max = 10000000; u32 dco_mid = (dco_min + dco_max) / 2; @@ -2250,8 +2251,17 @@ cnl_ddi_calculate_wrpll(int clock, cnl_wrpll_get_multipliers(best_div, &pdiv, &qdiv, &kdiv); - cnl_wrpll_params_populate(wrpll_params, best_dco, - dev_priv->cdclk.hw.ref, pdiv, qdiv, kdiv); + ref_clock = dev_priv->cdclk.hw.ref; + + /* + * For ICL, the spec states: if reference frequency is 38.4, use 19.2 + * because the DPLL automatically divides that by 2. + */ + if (IS_ICELAKE(dev_priv) && ref_clock == 38400) + ref_clock = 19200; + + cnl_wrpll_params_populate(wrpll_params, best_dco, ref_clock, pdiv, qdiv, + kdiv); return true; } @@ -2399,6 +2409,644 @@ static const struct intel_dpll_mgr cnl_pll_mgr = { .dump_hw_state = cnl_dump_hw_state, }; +/* + * These values alrea already adjusted: they're the bits we write to the + * registers, not the logical values. + */ +static const struct skl_wrpll_params icl_dp_combo_pll_24MHz_values[] = { + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [0]: 5.4 */ + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [1]: 2.7 */ + .pdiv = 0x2 /* 3 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [2]: 1.62 */ + .pdiv = 0x4 /* 5 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [3]: 3.24 */ + .pdiv = 0x4 /* 5 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x168, .dco_fraction = 0x0000, /* [4]: 2.16 */ + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 1, .qdiv_ratio = 2}, + { .dco_integer = 0x168, .dco_fraction = 0x0000, /* [5]: 4.32 */ + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x195, .dco_fraction = 0x0000, /* [6]: 6.48 */ + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [7]: 8.1 */ + .pdiv = 0x1 /* 2 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, +}; + +/* Also used for 38.4 MHz values. */ +static const struct skl_wrpll_params icl_dp_combo_pll_19_2MHz_values[] = { + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [0]: 5.4 */ + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [1]: 2.7 */ + .pdiv = 0x2 /* 3 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [2]: 1.62 */ + .pdiv = 0x4 /* 5 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [3]: 3.24 */ + .pdiv = 0x4 /* 5 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1C2, .dco_fraction = 0x0000, /* [4]: 2.16 */ + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 1, .qdiv_ratio = 2}, + { .dco_integer = 0x1C2, .dco_fraction = 0x0000, /* [5]: 4.32 */ + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1FA, .dco_fraction = 0x2000, /* [6]: 6.48 */ + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [7]: 8.1 */ + .pdiv = 0x1 /* 2 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, +}; + +static bool icl_calc_dp_combo_pll(struct drm_i915_private *dev_priv, int clock, + struct skl_wrpll_params *pll_params) +{ + const struct skl_wrpll_params *params; + + params = dev_priv->cdclk.hw.ref == 24000 ? + icl_dp_combo_pll_24MHz_values : + icl_dp_combo_pll_19_2MHz_values; + + switch (clock) { + case 540000: + *pll_params = params[0]; + break; + case 270000: + *pll_params = params[1]; + break; + case 162000: + *pll_params = params[2]; + break; + case 324000: + *pll_params = params[3]; + break; + case 216000: + *pll_params = params[4]; + break; + case 432000: + *pll_params = params[5]; + break; + case 648000: + *pll_params = params[6]; + break; + case 810000: + *pll_params = params[7]; + break; + default: + MISSING_CASE(clock); + return false; + } + + return true; +} + +static bool icl_calc_dpll_state(struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder, int clock, + struct intel_dpll_hw_state *pll_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + uint32_t cfgcr0, cfgcr1; + struct skl_wrpll_params pll_params = { 0 }; + bool ret; + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + ret = cnl_ddi_calculate_wrpll(clock, dev_priv, &pll_params); + else + ret = icl_calc_dp_combo_pll(dev_priv, clock, &pll_params); + + if (!ret) + return false; + + cfgcr0 = DPLL_CFGCR0_DCO_FRACTION(pll_params.dco_fraction) | + pll_params.dco_integer; + + cfgcr1 = DPLL_CFGCR1_QDIV_RATIO(pll_params.qdiv_ratio) | + DPLL_CFGCR1_QDIV_MODE(pll_params.qdiv_mode) | + DPLL_CFGCR1_KDIV(pll_params.kdiv) | + DPLL_CFGCR1_PDIV(pll_params.pdiv) | + DPLL_CFGCR1_CENTRAL_FREQ_8400; + + pll_state->cfgcr0 = cfgcr0; + pll_state->cfgcr1 = cfgcr1; + return true; +} + +static enum port icl_mg_pll_id_to_port(enum intel_dpll_id id) +{ + return id - DPLL_ID_ICL_MGPLL1 + PORT_C; +} + +static enum intel_dpll_id icl_port_to_mg_pll_id(enum port port) +{ + return port - PORT_C + DPLL_ID_ICL_MGPLL1; +} + +static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, + uint32_t *target_dco_khz, + struct intel_dpll_hw_state *state) +{ + uint32_t dco_min_freq, dco_max_freq; + int div1_vals[] = {7, 5, 3, 2}; + unsigned int i; + int div2; + + dco_min_freq = is_dp ? 8100000 : use_ssc ? 8000000 : 7992000; + dco_max_freq = is_dp ? 8100000 : 10000000; + + for (i = 0; i < ARRAY_SIZE(div1_vals); i++) { + int div1 = div1_vals[i]; + + for (div2 = 10; div2 > 0; div2--) { + int dco = div1 * div2 * clock_khz * 5; + int a_divratio, tlinedrv, inputsel, hsdiv; + + if (dco < dco_min_freq || dco > dco_max_freq) + continue; + + if (div2 >= 2) { + a_divratio = is_dp ? 10 : 5; + tlinedrv = 2; + } else { + a_divratio = 5; + tlinedrv = 0; + } + inputsel = is_dp ? 0 : 1; + + switch (div1) { + default: + MISSING_CASE(div1); + case 2: + hsdiv = 0; + break; + case 3: + hsdiv = 1; + break; + case 5: + hsdiv = 2; + break; + case 7: + hsdiv = 3; + break; + } + + *target_dco_khz = dco; + + state->mg_refclkin_ctl = MG_REFCLKIN_CTL_OD_2_MUX(1); + + state->mg_clktop2_coreclkctl1 = + MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(a_divratio); + + state->mg_clktop2_hsclkctl = + MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL(tlinedrv) | + MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL(inputsel) | + MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO(hsdiv) | + MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(div2); + + return true; + } + } + + return false; +} + +/* + * The specification for this function uses real numbers, so the math had to be + * adapted to integer-only calculation, that's why it looks so different. + */ +static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder, int clock, + struct intel_dpll_hw_state *pll_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + int refclk_khz = dev_priv->cdclk.hw.ref; + uint32_t dco_khz, m1div, m2div_int, m2div_rem, m2div_frac; + uint32_t iref_ndiv, iref_trim, iref_pulse_w; + uint32_t prop_coeff, int_coeff; + uint32_t tdc_targetcnt, feedfwgain; + uint64_t ssc_stepsize, ssc_steplen, ssc_steplog; + uint64_t tmp; + bool use_ssc = false; + bool is_dp = !intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI); + + if (!icl_mg_pll_find_divisors(clock, is_dp, use_ssc, &dco_khz, + pll_state)) { + DRM_DEBUG_KMS("Failed to find divisors for clock %d\n", clock); + return false; + } + + m1div = 2; + m2div_int = dco_khz / (refclk_khz * m1div); + if (m2div_int > 255) { + m1div = 4; + m2div_int = dco_khz / (refclk_khz * m1div); + if (m2div_int > 255) { + DRM_DEBUG_KMS("Failed to find mdiv for clock %d\n", + clock); + return false; + } + } + m2div_rem = dco_khz % (refclk_khz * m1div); + + tmp = (uint64_t)m2div_rem * (1 << 22); + do_div(tmp, refclk_khz * m1div); + m2div_frac = tmp; + + switch (refclk_khz) { + case 19200: + iref_ndiv = 1; + iref_trim = 28; + iref_pulse_w = 1; + break; + case 24000: + iref_ndiv = 1; + iref_trim = 25; + iref_pulse_w = 2; + break; + case 38400: + iref_ndiv = 2; + iref_trim = 28; + iref_pulse_w = 1; + break; + default: + MISSING_CASE(refclk_khz); + return false; + } + + /* + * tdc_res = 0.000003 + * tdc_targetcnt = int(2 / (tdc_res * 8 * 50 * 1.1) / refclk_mhz + 0.5) + * + * The multiplication by 1000 is due to refclk MHz to KHz conversion. It + * was supposed to be a division, but we rearranged the operations of + * the formula to avoid early divisions so we don't multiply the + * rounding errors. + * + * 0.000003 * 8 * 50 * 1.1 = 0.00132, also known as 132 / 100000, which + * we also rearrange to work with integers. + * + * The 0.5 transformed to 5 results in a multiplication by 10 and the + * last division by 10. + */ + tdc_targetcnt = (2 * 1000 * 100000 * 10 / (132 * refclk_khz) + 5) / 10; + + /* + * Here we divide dco_khz by 10 in order to allow the dividend to fit in + * 32 bits. That's not a problem since we round the division down + * anyway. + */ + feedfwgain = (use_ssc || m2div_rem > 0) ? + m1div * 1000000 * 100 / (dco_khz * 3 / 10) : 0; + + if (dco_khz >= 9000000) { + prop_coeff = 5; + int_coeff = 10; + } else { + prop_coeff = 4; + int_coeff = 8; + } + + if (use_ssc) { + tmp = (uint64_t)dco_khz * 47 * 32; + do_div(tmp, refclk_khz * m1div * 10000); + ssc_stepsize = tmp; + + tmp = (uint64_t)dco_khz * 1000; + ssc_steplen = DIV_ROUND_UP_ULL(tmp, 32 * 2 * 32); + } else { + ssc_stepsize = 0; + ssc_steplen = 0; + } + ssc_steplog = 4; + + pll_state->mg_pll_div0 = (m2div_rem > 0 ? MG_PLL_DIV0_FRACNEN_H : 0) | + MG_PLL_DIV0_FBDIV_FRAC(m2div_frac) | + MG_PLL_DIV0_FBDIV_INT(m2div_int); + + pll_state->mg_pll_div1 = MG_PLL_DIV1_IREF_NDIVRATIO(iref_ndiv) | + MG_PLL_DIV1_DITHER_DIV_2 | + MG_PLL_DIV1_NDIVRATIO(1) | + MG_PLL_DIV1_FBPREDIV(m1div); + + pll_state->mg_pll_lf = MG_PLL_LF_TDCTARGETCNT(tdc_targetcnt) | + MG_PLL_LF_AFCCNTSEL_512 | + MG_PLL_LF_GAINCTRL(1) | + MG_PLL_LF_INT_COEFF(int_coeff) | + MG_PLL_LF_PROP_COEFF(prop_coeff); + + pll_state->mg_pll_frac_lock = MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 | + MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 | + MG_PLL_FRAC_LOCK_LOCKTHRESH(10) | + MG_PLL_FRAC_LOCK_DCODITHEREN | + MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(feedfwgain); + if (use_ssc || m2div_rem > 0) + pll_state->mg_pll_frac_lock |= MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN; + + pll_state->mg_pll_ssc = (use_ssc ? MG_PLL_SSC_EN : 0) | + MG_PLL_SSC_TYPE(2) | + MG_PLL_SSC_STEPLENGTH(ssc_steplen) | + MG_PLL_SSC_STEPNUM(ssc_steplog) | + MG_PLL_SSC_FLLEN | + MG_PLL_SSC_STEPSIZE(ssc_stepsize); + + pll_state->mg_pll_tdc_coldst_bias = MG_PLL_TDC_COLDST_COLDSTART; + + if (refclk_khz != 38400) { + pll_state->mg_pll_tdc_coldst_bias |= + MG_PLL_TDC_COLDST_IREFINT_EN | + MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(iref_pulse_w) | + MG_PLL_TDC_COLDST_COLDSTART | + MG_PLL_TDC_TDCOVCCORR_EN | + MG_PLL_TDC_TDCSEL(3); + + pll_state->mg_pll_bias = MG_PLL_BIAS_BIAS_GB_SEL(3) | + MG_PLL_BIAS_INIT_DCOAMP(0x3F) | + MG_PLL_BIAS_BIAS_BONUS(10) | + MG_PLL_BIAS_BIASCAL_EN | + MG_PLL_BIAS_CTRIM(12) | + MG_PLL_BIAS_VREF_RDAC(4) | + MG_PLL_BIAS_IREFTRIM(iref_trim); + } + + return true; +} + +static struct intel_shared_dpll * +icl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder) +{ + struct intel_shared_dpll *pll; + struct intel_dpll_hw_state pll_state = {}; + enum port port = encoder->port; + enum intel_dpll_id min, max; + int clock = crtc_state->port_clock; + bool ret; + + switch (port) { + case PORT_A: + case PORT_B: + min = DPLL_ID_ICL_DPLL0; + max = DPLL_ID_ICL_DPLL1; + ret = icl_calc_dpll_state(crtc_state, encoder, clock, + &pll_state); + break; + case PORT_C: + case PORT_D: + case PORT_E: + case PORT_F: + min = icl_port_to_mg_pll_id(port); + max = min; + ret = icl_calc_mg_pll_state(crtc_state, encoder, clock, + &pll_state); + break; + default: + MISSING_CASE(port); + return NULL; + } + + if (!ret) { + DRM_DEBUG_KMS("Could not calculate PLL state.\n"); + return NULL; + } + + crtc_state->dpll_hw_state = pll_state; + + pll = intel_find_shared_dpll(crtc, crtc_state, min, max); + if (!pll) { + DRM_DEBUG_KMS("No PLL selected\n"); + return NULL; + } + + intel_reference_shared_dpll(pll, crtc_state); + + return pll; +} + +static i915_reg_t icl_pll_id_to_enable_reg(enum intel_dpll_id id) +{ + switch (id) { + default: + MISSING_CASE(id); + case DPLL_ID_ICL_DPLL0: + case DPLL_ID_ICL_DPLL1: + return CNL_DPLL_ENABLE(id); + case DPLL_ID_ICL_MGPLL1: + case DPLL_ID_ICL_MGPLL2: + case DPLL_ID_ICL_MGPLL3: + case DPLL_ID_ICL_MGPLL4: + return MG_PLL_ENABLE(icl_mg_pll_id_to_port(id)); + } +} + +static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll, + struct intel_dpll_hw_state *hw_state) +{ + const enum intel_dpll_id id = pll->info->id; + uint32_t val; + enum port port; + bool ret = false; + + if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + return false; + + val = I915_READ(icl_pll_id_to_enable_reg(id)); + if (!(val & PLL_ENABLE)) + goto out; + + switch (id) { + case DPLL_ID_ICL_DPLL0: + case DPLL_ID_ICL_DPLL1: + hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id)); + hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id)); + break; + case DPLL_ID_ICL_MGPLL1: + case DPLL_ID_ICL_MGPLL2: + case DPLL_ID_ICL_MGPLL3: + case DPLL_ID_ICL_MGPLL4: + port = icl_mg_pll_id_to_port(id); + hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(port)); + hw_state->mg_clktop2_coreclkctl1 = + I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); + hw_state->mg_clktop2_hsclkctl = + I915_READ(MG_CLKTOP2_HSCLKCTL(port)); + hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(port)); + hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(port)); + hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(port)); + hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(port)); + hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(port)); + hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(port)); + hw_state->mg_pll_tdc_coldst_bias = + I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); + break; + default: + MISSING_CASE(id); + } + + ret = true; +out: + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + return ret; +} + +static void icl_dpll_write(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; + const enum intel_dpll_id id = pll->info->id; + + I915_WRITE(ICL_DPLL_CFGCR0(id), hw_state->cfgcr0); + I915_WRITE(ICL_DPLL_CFGCR1(id), hw_state->cfgcr1); + POSTING_READ(ICL_DPLL_CFGCR1(id)); +} + +static void icl_mg_pll_write(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; + enum port port = icl_mg_pll_id_to_port(pll->info->id); + + I915_WRITE(MG_REFCLKIN_CTL(port), hw_state->mg_refclkin_ctl); + I915_WRITE(MG_CLKTOP2_CORECLKCTL1(port), + hw_state->mg_clktop2_coreclkctl1); + I915_WRITE(MG_CLKTOP2_HSCLKCTL(port), hw_state->mg_clktop2_hsclkctl); + I915_WRITE(MG_PLL_DIV0(port), hw_state->mg_pll_div0); + I915_WRITE(MG_PLL_DIV1(port), hw_state->mg_pll_div1); + I915_WRITE(MG_PLL_LF(port), hw_state->mg_pll_lf); + I915_WRITE(MG_PLL_FRAC_LOCK(port), hw_state->mg_pll_frac_lock); + I915_WRITE(MG_PLL_SSC(port), hw_state->mg_pll_ssc); + I915_WRITE(MG_PLL_BIAS(port), hw_state->mg_pll_bias); + I915_WRITE(MG_PLL_TDC_COLDST_BIAS(port), + hw_state->mg_pll_tdc_coldst_bias); + POSTING_READ(MG_PLL_TDC_COLDST_BIAS(port)); +} + +static void icl_pll_enable(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + const enum intel_dpll_id id = pll->info->id; + i915_reg_t enable_reg = icl_pll_id_to_enable_reg(id); + uint32_t val; + + val = I915_READ(enable_reg); + val |= PLL_POWER_ENABLE; + I915_WRITE(enable_reg, val); + + /* + * The spec says we need to "wait" but it also says it should be + * immediate. + */ + if (intel_wait_for_register(dev_priv, enable_reg, PLL_POWER_STATE, + PLL_POWER_STATE, 1)) + DRM_ERROR("PLL %d Power not enabled\n", id); + + switch (id) { + case DPLL_ID_ICL_DPLL0: + case DPLL_ID_ICL_DPLL1: + icl_dpll_write(dev_priv, pll); + break; + case DPLL_ID_ICL_MGPLL1: + case DPLL_ID_ICL_MGPLL2: + case DPLL_ID_ICL_MGPLL3: + case DPLL_ID_ICL_MGPLL4: + icl_mg_pll_write(dev_priv, pll); + break; + default: + MISSING_CASE(id); + } + + /* + * DVFS pre sequence would be here, but in our driver the cdclk code + * paths should already be setting the appropriate voltage, hence we do + * nothign here. + */ + + val = I915_READ(enable_reg); + val |= PLL_ENABLE; + I915_WRITE(enable_reg, val); + + if (intel_wait_for_register(dev_priv, enable_reg, PLL_LOCK, PLL_LOCK, + 1)) /* 600us actually. */ + DRM_ERROR("PLL %d not locked\n", id); + + /* DVFS post sequence would be here. See the comment above. */ +} + +static void icl_pll_disable(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + const enum intel_dpll_id id = pll->info->id; + i915_reg_t enable_reg = icl_pll_id_to_enable_reg(id); + uint32_t val; + + /* The first steps are done by intel_ddi_post_disable(). */ + + /* + * DVFS pre sequence would be here, but in our driver the cdclk code + * paths should already be setting the appropriate voltage, hence we do + * nothign here. + */ + + val = I915_READ(enable_reg); + val &= ~PLL_ENABLE; + I915_WRITE(enable_reg, val); + + /* Timeout is actually 1us. */ + if (intel_wait_for_register(dev_priv, enable_reg, PLL_LOCK, 0, 1)) + DRM_ERROR("PLL %d locked\n", id); + + /* DVFS post sequence would be here. See the comment above. */ + + val = I915_READ(enable_reg); + val &= ~PLL_POWER_ENABLE; + I915_WRITE(enable_reg, val); + + /* + * The spec says we need to "wait" but it also says it should be + * immediate. + */ + if (intel_wait_for_register(dev_priv, enable_reg, PLL_POWER_STATE, 0, + 1)) + DRM_ERROR("PLL %d Power not disabled\n", id); +} + +static void icl_dump_hw_state(struct drm_i915_private *dev_priv, + struct intel_dpll_hw_state *hw_state) +{ + DRM_DEBUG_KMS("dpll_hw_state: cfgcr0: 0x%x, cfgcr1: 0x%x, " + "mg_refclkin_ctl: 0x%x, hg_clktop2_coreclkctl1: 0x%x, " + "mg_clktop2_hsclkctl: 0x%x, mg_pll_div0: 0x%x, " + "mg_pll_div2: 0x%x, mg_pll_lf: 0x%x, " + "mg_pll_frac_lock: 0x%x, mg_pll_ssc: 0x%x, " + "mg_pll_bias: 0x%x, mg_pll_tdc_coldst_bias: 0x%x\n", + hw_state->cfgcr0, hw_state->cfgcr1, + hw_state->mg_refclkin_ctl, + hw_state->mg_clktop2_coreclkctl1, + hw_state->mg_clktop2_hsclkctl, + hw_state->mg_pll_div0, + hw_state->mg_pll_div1, + hw_state->mg_pll_lf, + hw_state->mg_pll_frac_lock, + hw_state->mg_pll_ssc, + hw_state->mg_pll_bias, + hw_state->mg_pll_tdc_coldst_bias); +} + +static const struct intel_shared_dpll_funcs icl_pll_funcs = { + .enable = icl_pll_enable, + .disable = icl_pll_disable, + .get_hw_state = icl_pll_get_hw_state, +}; + +static const struct dpll_info icl_plls[] = { + { "DPLL 0", &icl_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, + { "DPLL 1", &icl_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, + { "MG PLL 1", &icl_pll_funcs, DPLL_ID_ICL_MGPLL1, 0 }, + { "MG PLL 2", &icl_pll_funcs, DPLL_ID_ICL_MGPLL2, 0 }, + { "MG PLL 3", &icl_pll_funcs, DPLL_ID_ICL_MGPLL3, 0 }, + { "MG PLL 4", &icl_pll_funcs, DPLL_ID_ICL_MGPLL4, 0 }, + { }, +}; + +static const struct intel_dpll_mgr icl_pll_mgr = { + .dpll_info = icl_plls, + .get_dpll = icl_get_dpll, + .dump_hw_state = icl_dump_hw_state, +}; + /** * intel_shared_dpll_init - Initialize shared DPLLs * @dev: drm device @@ -2412,7 +3060,9 @@ void intel_shared_dpll_init(struct drm_device *dev) const struct dpll_info *dpll_info; int i; - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + dpll_mgr = &icl_pll_mgr; + else if (IS_CANNONLAKE(dev_priv)) dpll_mgr = &cnl_pll_mgr; else if (IS_GEN9_BC(dev_priv)) dpll_mgr = &skl_pll_mgr; diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index 4febfaa90bde..7a0cd564a9ee 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -103,6 +103,32 @@ enum intel_dpll_id { * @DPLL_ID_SKL_DPLL3: SKL and later DPLL3 */ DPLL_ID_SKL_DPLL3 = 3, + + + /** + * @DPLL_ID_ICL_DPLL0: ICL combo PHY DPLL0 + */ + DPLL_ID_ICL_DPLL0 = 0, + /** + * @DPLL_ID_ICL_DPLL1: ICL combo PHY DPLL1 + */ + DPLL_ID_ICL_DPLL1 = 1, + /** + * @DPLL_ID_ICL_MGPLL1: ICL MG PLL 1 port 1 (C) + */ + DPLL_ID_ICL_MGPLL1 = 2, + /** + * @DPLL_ID_ICL_MGPLL2: ICL MG PLL 1 port 2 (D) + */ + DPLL_ID_ICL_MGPLL2 = 3, + /** + * @DPLL_ID_ICL_MGPLL3: ICL MG PLL 1 port 3 (E) + */ + DPLL_ID_ICL_MGPLL3 = 4, + /** + * @DPLL_ID_ICL_MGPLL4: ICL MG PLL 1 port 4 (F) + */ + DPLL_ID_ICL_MGPLL4 = 5, }; #define I915_NUM_PLLS 6 @@ -135,6 +161,21 @@ struct intel_dpll_hw_state { /* bxt */ uint32_t ebb0, ebb4, pll0, pll1, pll2, pll3, pll6, pll8, pll9, pll10, pcsdw12; + + /* + * ICL uses the following, already defined: + * uint32_t cfgcr0, cfgcr1; + */ + uint32_t mg_refclkin_ctl; + uint32_t mg_clktop2_coreclkctl1; + uint32_t mg_clktop2_hsclkctl; + uint32_t mg_pll_div0; + uint32_t mg_pll_div1; + uint32_t mg_pll_lf; + uint32_t mg_pll_frac_lock; + uint32_t mg_pll_ssc; + uint32_t mg_pll_bias; + uint32_t mg_pll_tdc_coldst_bias; }; /** diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 6bd7bff0947a..d7dbca1aabff 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -56,6 +56,8 @@ for (;;) { \ const bool expired__ = ktime_after(ktime_get_raw(), end__); \ OP; \ + /* Guarantee COND check prior to timeout */ \ + barrier(); \ if (COND) { \ ret__ = 0; \ break; \ @@ -96,6 +98,8 @@ u64 now = local_clock(); \ if (!(ATOMIC)) \ preempt_enable(); \ + /* Guarantee COND check prior to timeout */ \ + barrier(); \ if (COND) { \ ret = 0; \ break; \ @@ -140,6 +144,10 @@ #define KHz(x) (1000 * (x)) #define MHz(x) KHz(1000 * (x)) +#define KBps(x) (1000 * (x)) +#define MBps(x) KBps(1000 * (x)) +#define GBps(x) ((u64)1000 * MBps((x))) + /* * Display related stuff */ @@ -882,6 +890,7 @@ struct intel_crtc_state { /* bitmask of visible planes (enum plane_id) */ u8 active_planes; + u8 nv12_planes; /* HDMI scrambling status */ bool hdmi_scrambling; @@ -1329,6 +1338,9 @@ void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv); void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv); /* i915_irq.c */ +bool gen11_reset_one_iir(struct drm_i915_private * const i915, + const unsigned int bank, + const unsigned int bit); void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); @@ -1398,6 +1410,12 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp); u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, bool enable); +void icl_map_plls_to_ports(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state); +void icl_unmap_plls_to_ports(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state); unsigned int intel_fb_align_height(const struct drm_framebuffer *fb, int plane, unsigned int height); @@ -1580,8 +1598,6 @@ void bxt_enable_dc9(struct drm_i915_private *dev_priv); void bxt_disable_dc9(struct drm_i915_private *dev_priv); void gen9_enable_dc5(struct drm_i915_private *dev_priv); unsigned int skl_cdclk_get_vco(unsigned int freq); -void skl_enable_dc6(struct drm_i915_private *dev_priv); -void skl_disable_dc6(struct drm_i915_private *dev_priv); void intel_dp_get_m_n(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config); void intel_dp_set_m_n(struct intel_crtc *crtc, enum link_m_n_set m_n); @@ -1901,6 +1917,8 @@ void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); void intel_psr_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state); +void intel_psr_irq_control(struct drm_i915_private *dev_priv, bool debug); +void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir); /* intel_runtime_pm.c */ int intel_power_domains_init(struct drm_i915_private *); @@ -1924,6 +1942,8 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); void intel_display_power_put(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); +void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, + u8 req_slices); static inline void assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv) @@ -2062,6 +2082,8 @@ bool skl_plane_get_hw_state(struct intel_plane *plane); bool skl_plane_has_ccs(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id); bool intel_format_is_yuv(uint32_t format); +bool skl_plane_has_planar(struct drm_i915_private *dev_priv, + enum pipe pipe, enum plane_id plane_id); /* intel_tv.c */ void intel_tv_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c index 91c07b0c8db9..4d6ffa7b3e7b 100644 --- a/drivers/gpu/drm/i915/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c @@ -647,6 +647,11 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) /* prepare count */ prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * mul); + if (prepare_cnt > PREPARE_CNT_MAX) { + DRM_DEBUG_KMS("prepare count too high %u\n", prepare_cnt); + prepare_cnt = PREPARE_CNT_MAX; + } + /* exit zero count */ exit_zero_cnt = DIV_ROUND_UP( (ths_prepare_hszero - ths_prepare_ns) * ui_den, @@ -662,32 +667,29 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) if (exit_zero_cnt < (55 * ui_den / ui_num) && (55 * ui_den) % ui_num) exit_zero_cnt += 1; + if (exit_zero_cnt > EXIT_ZERO_CNT_MAX) { + DRM_DEBUG_KMS("exit zero count too high %u\n", exit_zero_cnt); + exit_zero_cnt = EXIT_ZERO_CNT_MAX; + } + /* clk zero count */ clk_zero_cnt = DIV_ROUND_UP( (tclk_prepare_clkzero - ths_prepare_ns) * ui_den, ui_num * mul); + if (clk_zero_cnt > CLK_ZERO_CNT_MAX) { + DRM_DEBUG_KMS("clock zero count too high %u\n", clk_zero_cnt); + clk_zero_cnt = CLK_ZERO_CNT_MAX; + } + /* trail count */ tclk_trail_ns = max(mipi_config->tclk_trail, mipi_config->ths_trail); trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, ui_num * mul); - if (prepare_cnt > PREPARE_CNT_MAX || - exit_zero_cnt > EXIT_ZERO_CNT_MAX || - clk_zero_cnt > CLK_ZERO_CNT_MAX || - trail_cnt > TRAIL_CNT_MAX) - DRM_DEBUG_DRIVER("Values crossing maximum limits, restricting to max values\n"); - - if (prepare_cnt > PREPARE_CNT_MAX) - prepare_cnt = PREPARE_CNT_MAX; - - if (exit_zero_cnt > EXIT_ZERO_CNT_MAX) - exit_zero_cnt = EXIT_ZERO_CNT_MAX; - - if (clk_zero_cnt > CLK_ZERO_CNT_MAX) - clk_zero_cnt = CLK_ZERO_CNT_MAX; - - if (trail_cnt > TRAIL_CNT_MAX) + if (trail_cnt > TRAIL_CNT_MAX) { + DRM_DEBUG_KMS("trail count too high %u\n", trail_cnt); trail_cnt = TRAIL_CNT_MAX; + } /* B080 */ intel_dsi->dphy_reg = exit_zero_cnt << 24 | trail_cnt << 16 | diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 1a8370779bbb..6bfd7e3ed152 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -306,7 +306,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv, /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; - spin_lock_init(&engine->stats.lock); + seqlock_init(&engine->stats.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -451,11 +451,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); } -static void intel_engine_init_timeline(struct intel_engine_cs *engine) -{ - engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id]; -} - static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) { i915_gem_batch_pool_init(&engine->batch_pool, engine); @@ -463,21 +458,10 @@ static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) static bool csb_force_mmio(struct drm_i915_private *i915) { - /* - * IOMMU adds unpredictable latency causing the CSB write (from the - * GPU into the HWSP) to only be visible some time after the interrupt - * (missed breadcrumb syndrome). - */ - if (intel_vtd_active()) - return true; - /* Older GVT emulation depends upon intercepting CSB mmio */ if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) return true; - if (IS_CANNONLAKE(i915)) - return true; - return false; } @@ -507,8 +491,9 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) */ void intel_engine_setup_common(struct intel_engine_cs *engine) { + i915_timeline_init(engine->i915, &engine->timeline, engine->name); + intel_engine_init_execlist(engine); - intel_engine_init_timeline(engine); intel_engine_init_hangcheck(engine); intel_engine_init_batch_pool(engine); intel_engine_init_cmd_parser(engine); @@ -541,8 +526,6 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) goto err_unref; engine->scratch = vma; - DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", - engine->name, i915_ggtt_offset(vma)); return 0; err_unref: @@ -636,9 +619,6 @@ static int init_status_page(struct intel_engine_cs *engine) engine->status_page.vma = vma; engine->status_page.ggtt_offset = i915_ggtt_offset(vma); engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE); - - DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", - engine->name, i915_ggtt_offset(vma)); return 0; err_unpin: @@ -690,7 +670,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * be available. To avoid this we always pin the default * context. */ - ring = engine->context_pin(engine, engine->i915->kernel_context); + ring = intel_context_pin(engine->i915->kernel_context, engine); if (IS_ERR(ring)) return PTR_ERR(ring); @@ -699,8 +679,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * we can interrupt the engine at any time. */ if (engine->i915->preempt_context) { - ring = engine->context_pin(engine, - engine->i915->preempt_context); + ring = intel_context_pin(engine->i915->preempt_context, engine); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto err_unpin_kernel; @@ -724,9 +703,9 @@ err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: if (engine->i915->preempt_context) - engine->context_unpin(engine, engine->i915->preempt_context); + intel_context_unpin(engine->i915->preempt_context, engine); err_unpin_kernel: - engine->context_unpin(engine, engine->i915->kernel_context); + intel_context_unpin(engine->i915->kernel_context, engine); return ret; } @@ -754,8 +733,10 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) i915_gem_object_put(engine->default_state); if (engine->i915->preempt_context) - engine->context_unpin(engine, engine->i915->preempt_context); - engine->context_unpin(engine, engine->i915->kernel_context); + intel_context_unpin(engine->i915->preempt_context, engine); + intel_context_unpin(engine->i915->kernel_context, engine); + + i915_timeline_fini(&engine->timeline); } u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) @@ -1008,7 +989,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) * the last request that remains in the timeline. When idle, it is * the last executed context as tracked by retirement. */ - rq = __i915_gem_active_peek(&engine->timeline->last_request); + rq = __i915_gem_active_peek(&engine->timeline.last_request); if (rq) return rq->ctx == kernel_context; else @@ -1081,6 +1062,8 @@ void intel_engines_unpark(struct drm_i915_private *i915) for_each_engine(engine, i915, id) { if (engine->unpark) engine->unpark(engine); + + intel_engine_init_hangcheck(engine); } } @@ -1113,17 +1096,35 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) return which; } +static int print_sched_attr(struct drm_i915_private *i915, + const struct i915_sched_attr *attr, + char *buf, int x, int len) +{ + if (attr->priority == I915_PRIORITY_INVALID) + return x; + + x += snprintf(buf + x, len - x, + " prio=%d", attr->priority); + + return x; +} + static void print_request(struct drm_printer *m, struct i915_request *rq, const char *prefix) { const char *name = rq->fence.ops->get_timeline_name(&rq->fence); + char buf[80]; + int x = 0; - drm_printf(m, "%s%x%s [%llx:%x] prio=%d @ %dms: %s\n", prefix, + x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); + + drm_printf(m, "%s%x%s [%llx:%x]%s @ %dms: %s\n", + prefix, rq->global_seqno, i915_request_completed(rq) ? "!" : "", rq->fence.context, rq->fence.seqno, - rq->priotree.priority, + buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), name); } @@ -1266,8 +1267,9 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, char hdr[80]; snprintf(hdr, sizeof(hdr), - "\t\tELSP[%d] count=%d, rq: ", - idx, count); + "\t\tELSP[%d] count=%d, ring->start=%08x, rq: ", + idx, count, + i915_ggtt_offset(rq->ring->vma)); print_request(m, rq, hdr); } else { drm_printf(m, "\t\tELSP[%d] idle\n", idx); @@ -1289,11 +1291,13 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) { + const int MAX_REQUESTS_TO_SHOW = 8; struct intel_breadcrumbs * const b = &engine->breadcrumbs; const struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_gpu_error * const error = &engine->i915->gpu_error; - struct i915_request *rq; + struct i915_request *rq, *last; struct rb_node *rb; + int count; if (header) { va_list ap; @@ -1306,12 +1310,11 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (i915_terminally_wedged(&engine->i915->gpu_error)) drm_printf(m, "*** WEDGED ***\n"); - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", intel_engine_get_seqno(engine), intel_engine_last_submit(engine), engine->hangcheck.seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), - engine->timeline->inflight_seqnos); + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), i915_reset_count(error)); @@ -1320,14 +1323,14 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tRequests:\n"); - rq = list_first_entry(&engine->timeline->requests, + rq = list_first_entry(&engine->timeline.requests, struct i915_request, link); - if (&rq->link != &engine->timeline->requests) + if (&rq->link != &engine->timeline.requests) print_request(m, rq, "\t\tfirst "); - rq = list_last_entry(&engine->timeline->requests, + rq = list_last_entry(&engine->timeline.requests, struct i915_request, link); - if (&rq->link != &engine->timeline->requests) + if (&rq->link != &engine->timeline.requests) print_request(m, rq, "\t\tlast "); rq = i915_gem_find_active_request(engine); @@ -1359,18 +1362,49 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } - spin_lock_irq(&engine->timeline->lock); - list_for_each_entry(rq, &engine->timeline->requests, link) - print_request(m, rq, "\t\tE "); + spin_lock_irq(&engine->timeline.lock); + + last = NULL; + count = 0; + list_for_each_entry(rq, &engine->timeline.requests, link) { + if (count++ < MAX_REQUESTS_TO_SHOW - 1) + print_request(m, rq, "\t\tE "); + else + last = rq; + } + if (last) { + if (count > MAX_REQUESTS_TO_SHOW) { + drm_printf(m, + "\t\t...skipping %d executing requests...\n", + count - MAX_REQUESTS_TO_SHOW); + } + print_request(m, last, "\t\tE "); + } + + last = NULL; + count = 0; drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); for (rb = execlists->first; rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - list_for_each_entry(rq, &p->requests, priotree.link) - print_request(m, rq, "\t\tQ "); + list_for_each_entry(rq, &p->requests, sched.link) { + if (count++ < MAX_REQUESTS_TO_SHOW - 1) + print_request(m, rq, "\t\tQ "); + else + last = rq; + } } - spin_unlock_irq(&engine->timeline->lock); + if (last) { + if (count > MAX_REQUESTS_TO_SHOW) { + drm_printf(m, + "\t\t...skipping %d queued requests...\n", + count - MAX_REQUESTS_TO_SHOW); + } + print_request(m, last, "\t\tQ "); + } + + spin_unlock_irq(&engine->timeline.lock); spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { @@ -1435,7 +1469,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) return -ENODEV; tasklet_disable(&execlists->tasklet); - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); if (unlikely(engine->stats.enabled == ~0)) { err = -EBUSY; @@ -1459,7 +1493,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) } unlock: - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); tasklet_enable(&execlists->tasklet); return err; @@ -1488,12 +1522,13 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) */ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine) { + unsigned int seq; ktime_t total; - unsigned long flags; - spin_lock_irqsave(&engine->stats.lock, flags); - total = __intel_engine_get_busy_time(engine); - spin_unlock_irqrestore(&engine->stats.lock, flags); + do { + seq = read_seqbegin(&engine->stats.lock); + total = __intel_engine_get_busy_time(engine); + } while (read_seqretry(&engine->stats.lock, seq)); return total; } @@ -1511,13 +1546,13 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return; - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); WARN_ON_ONCE(engine->stats.enabled == 0); if (--engine->stats.enabled == 0) { engine->stats.total = __intel_engine_get_busy_time(engine); engine->stats.active = 0; } - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index a00a59a7d9ec..116f4ccf1bbd 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -203,26 +203,6 @@ void intel_guc_fini(struct intel_guc *guc) guc_shared_data_destroy(guc); } -static u32 get_gt_type(struct drm_i915_private *dev_priv) -{ - /* XXX: GT type based on PCI device ID? field seems unused by fw */ - return 0; -} - -static u32 get_core_family(struct drm_i915_private *dev_priv) -{ - u32 gen = INTEL_GEN(dev_priv); - - switch (gen) { - case 9: - return GUC_CORE_FAMILY_GEN9; - - default: - MISSING_CASE(gen); - return GUC_CORE_FAMILY_UNKNOWN; - } -} - static u32 get_log_control_flags(void) { u32 level = i915_modparams.guc_log_level; @@ -255,10 +235,6 @@ void intel_guc_init_params(struct intel_guc *guc) memset(params, 0, sizeof(params)); - params[GUC_CTL_DEVICE_INFO] |= - (get_gt_type(dev_priv) << GUC_CTL_GT_TYPE_SHIFT) | - (get_core_family(dev_priv) << GUC_CTL_CORE_FAMILY_SHIFT); - /* * GuC ARAT increment is 10 ns. GuC default scheduler quantum is one * second. This ARAR is calculated by: diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c index 334cb5202e1c..dcaa3fb71765 100644 --- a/drivers/gpu/drm/i915/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/intel_guc_ads.c @@ -121,7 +121,8 @@ int intel_guc_ads_create(struct intel_guc *guc) * to find it. Note that we have to skip our header (1 page), * because our GuC shared data is there. */ - kernel_ctx_vma = dev_priv->kernel_context->engine[RCS].state; + kernel_ctx_vma = to_intel_context(dev_priv->kernel_context, + dev_priv->engine[RCS])->state; blob->ads.golden_context_lrca = intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset; diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index d73673f5d30c..0867ba76d445 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -23,9 +23,6 @@ #ifndef _INTEL_GUC_FWIF_H #define _INTEL_GUC_FWIF_H -#define GUC_CORE_FAMILY_GEN9 12 -#define GUC_CORE_FAMILY_UNKNOWN 0x7fffffff - #define GUC_CLIENT_PRIORITY_KMD_HIGH 0 #define GUC_CLIENT_PRIORITY_HIGH 1 #define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 @@ -82,8 +79,6 @@ #define GUC_CTL_ARAT_LOW 2 #define GUC_CTL_DEVICE_INFO 3 -#define GUC_CTL_GT_TYPE_SHIFT 0 -#define GUC_CTL_CORE_FAMILY_SHIFT 7 #define GUC_CTL_LOG_PARAMS 4 #define GUC_LOG_VALID (1 << 0) diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 97121230656c..2feb65096966 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -124,9 +124,17 @@ static int reserve_doorbell(struct intel_guc_client *client) return 0; } +static bool has_doorbell(struct intel_guc_client *client) +{ + if (client->doorbell_id == GUC_DOORBELL_INVALID) + return false; + + return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); +} + static void unreserve_doorbell(struct intel_guc_client *client) { - GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID); + GEM_BUG_ON(!has_doorbell(client)); __clear_bit(client->doorbell_id, client->guc->doorbell_bitmap); client->doorbell_id = GUC_DOORBELL_INVALID; @@ -184,14 +192,6 @@ static struct guc_doorbell_info *__get_doorbell(struct intel_guc_client *client) return client->vaddr + client->doorbell_offset; } -static bool has_doorbell(struct intel_guc_client *client) -{ - if (client->doorbell_id == GUC_DOORBELL_INVALID) - return false; - - return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); -} - static void __create_doorbell(struct intel_guc_client *client) { struct guc_doorbell_info *doorbell; @@ -207,7 +207,6 @@ static void __destroy_doorbell(struct intel_guc_client *client) struct guc_doorbell_info *doorbell; u16 db_id = client->doorbell_id; - doorbell = __get_doorbell(client); doorbell->db_status = GUC_DOORBELL_DISABLED; doorbell->cookie = 0; @@ -224,6 +223,9 @@ static int create_doorbell(struct intel_guc_client *client) { int ret; + if (WARN_ON(!has_doorbell(client))) + return -ENODEV; /* internal setup error, should never happen */ + __update_doorbell_desc(client, client->doorbell_id); __create_doorbell(client); @@ -362,7 +364,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, desc->db_id = client->doorbell_id; for_each_engine_masked(engine, dev_priv, client->engines, tmp) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); u32 guc_engine_id = engine->guc_id; struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; @@ -659,7 +661,7 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq) static inline int rq_prio(const struct i915_request *rq) { - return rq->priotree.priority; + return rq->sched.attr.priority; } static inline int port_prio(const struct execlist_port *port) @@ -667,7 +669,7 @@ static inline int port_prio(const struct execlist_port *port) return rq_prio(port_request(port)); } -static void guc_dequeue(struct intel_engine_cs *engine) +static bool __guc_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; @@ -677,7 +679,8 @@ static void guc_dequeue(struct intel_engine_cs *engine) bool submit = false; struct rb_node *rb; - spin_lock_irq(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); + rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); @@ -692,13 +695,13 @@ static void guc_dequeue(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_PREEMPT); queue_work(engine->i915->guc.preempt_wq, &preempt_work->work); - goto unlock; + return false; } } port++; if (port_isset(port)) - goto unlock; + return false; } GEM_BUG_ON(port_isset(port)); @@ -706,11 +709,11 @@ static void guc_dequeue(struct intel_engine_cs *engine) struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { if (last && rq->ctx != last->ctx) { if (port == last_port) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -719,7 +722,7 @@ static void guc_dequeue(struct intel_engine_cs *engine) port++; } - INIT_LIST_HEAD(&rq->priotree.link); + INIT_LIST_HEAD(&rq->sched.link); __i915_request_submit(rq); trace_i915_request_in(rq, port_index(port, execlists)); @@ -736,19 +739,34 @@ static void guc_dequeue(struct intel_engine_cs *engine) done: execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; execlists->first = rb; - if (submit) { + if (submit) port_assign(port, last); + if (last) execlists_user_begin(execlists, execlists->port); - guc_submit(engine); - } /* We must always keep the beast fed if we have work piled up */ GEM_BUG_ON(port_isset(execlists->port) && !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); -unlock: - spin_unlock_irq(&engine->timeline->lock); + return submit; +} + +static void guc_dequeue(struct intel_engine_cs *engine) +{ + unsigned long flags; + bool submit; + + local_irq_save(flags); + + spin_lock(&engine->timeline.lock); + submit = __guc_dequeue(engine); + spin_unlock(&engine->timeline.lock); + + if (submit) + guc_submit(engine); + + local_irq_restore(flags); } static void guc_submission_tasklet(unsigned long data) @@ -990,7 +1008,8 @@ static void guc_fill_preempt_context(struct intel_guc *guc) enum intel_engine_id id; for_each_engine(engine, dev_priv, id) { - struct intel_context *ce = &client->owner->engine[id]; + struct intel_context *ce = + to_intel_context(client->owner, engine); u32 addr = intel_hws_preempt_done_address(engine); u32 *cs; diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index fd0ffb8328d0..d47e346bd49e 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -356,7 +356,7 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, break; case ENGINE_DEAD: - if (drm_debug & DRM_UT_DRIVER) { + if (GEM_SHOW_DEBUG()) { struct drm_printer p = drm_debug_printer("hangcheck"); intel_engine_dump(engine, &p, "%s\n", engine->name); } @@ -452,6 +452,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) void intel_engine_init_hangcheck(struct intel_engine_cs *engine) { memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); + engine->hangcheck.action_timestamp = jiffies; } void intel_hangcheck_init(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index c7c85134a84a..15434cad5430 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -177,7 +177,7 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) static inline int rq_prio(const struct i915_request *rq) { - return rq->priotree.priority; + return rq->sched.attr.priority; } static inline bool need_preempt(const struct intel_engine_cs *engine, @@ -185,7 +185,8 @@ static inline bool need_preempt(const struct intel_engine_cs *engine, int prio) { return (intel_engine_has_preemption(engine) && - __execlists_need_preempt(prio, rq_prio(last))); + __execlists_need_preempt(prio, rq_prio(last)) && + !i915_request_completed(last)); } /** @@ -223,7 +224,7 @@ static void intel_lr_context_descriptor_update(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); u64 desc; BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); @@ -257,9 +258,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, } static struct i915_priolist * -lookup_priolist(struct intel_engine_cs *engine, - struct i915_priotree *pt, - int prio) +lookup_priolist(struct intel_engine_cs *engine, int prio) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_priolist *p; @@ -330,10 +329,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) struct i915_priolist *uninitialized_var(p); int last_prio = I915_PRIORITY_INVALID; - lockdep_assert_held(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); list_for_each_entry_safe_reverse(rq, rn, - &engine->timeline->requests, + &engine->timeline.requests, link) { if (i915_request_completed(rq)) return; @@ -344,10 +343,11 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != last_prio) { last_prio = rq_prio(rq); - p = lookup_priolist(engine, &rq->priotree, last_prio); + p = lookup_priolist(engine, last_prio); } - list_add(&rq->priotree.link, &p->requests); + GEM_BUG_ON(p->priority != rq_prio(rq)); + list_add(&rq->sched.link, &p->requests); } } @@ -356,10 +356,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) { struct intel_engine_cs *engine = container_of(execlists, typeof(*engine), execlists); + unsigned long flags; + + spin_lock_irqsave(&engine->timeline.lock, flags); - spin_lock_irq(&engine->timeline->lock); __unwind_incomplete_requests(engine); - spin_unlock_irq(&engine->timeline->lock); + + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static inline void @@ -397,10 +400,11 @@ execlists_context_schedule_in(struct i915_request *rq) } static inline void -execlists_context_schedule_out(struct i915_request *rq) +execlists_context_schedule_out(struct i915_request *rq, unsigned long status) { intel_engine_context_out(rq->engine); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + execlists_context_status_change(rq, status); + trace_i915_request_out(rq); } static void @@ -414,7 +418,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) static u64 execlists_update_context(struct i915_request *rq) { - struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; + struct intel_context *ce = to_intel_context(rq->ctx, rq->engine); struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; @@ -523,7 +527,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) { struct intel_engine_execlists *execlists = &engine->execlists; struct intel_context *ce = - &engine->i915->preempt_context->engine[engine->id]; + to_intel_context(engine->i915->preempt_context, engine); unsigned int n; GEM_BUG_ON(execlists->preempt_complete_status != @@ -552,7 +556,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); } -static void execlists_dequeue(struct intel_engine_cs *engine) +static bool __execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; @@ -562,6 +566,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct rb_node *rb; bool submit = false; + lockdep_assert_held(&engine->timeline.lock); + /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is * static for a context, and unique to each, so we only execute @@ -583,7 +589,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - spin_lock_irq(&engine->timeline->lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); @@ -598,7 +603,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) EXECLISTS_ACTIVE_USER)); GEM_BUG_ON(!port_count(&port[0])); if (port_count(&port[0]) > 1) - goto unlock; + return false; /* * If we write to ELSP a second time before the HW has had @@ -608,11 +613,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * the HW to indicate that it has had a chance to respond. */ if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) - goto unlock; + return false; if (need_preempt(engine, last, execlists->queue_priority)) { inject_preempt_context(engine); - goto unlock; + return false; } /* @@ -637,7 +642,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * priorities of the ports haven't been switch. */ if (port_count(&port[1])) - goto unlock; + return false; /* * WaIdleLiteRestore:bdw,skl @@ -654,7 +659,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { /* * Can we combine this request with the current port? * It has to be the same context/ringbuffer and not @@ -674,7 +679,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ if (port == last_port) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -688,7 +693,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (ctx_single_port_submission(last->ctx) || ctx_single_port_submission(rq->ctx)) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -701,7 +706,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(port_isset(port)); } - INIT_LIST_HEAD(&rq->priotree.link); + INIT_LIST_HEAD(&rq->sched.link); __i915_request_submit(rq); trace_i915_request_in(rq, port_index(port, execlists)); last = rq; @@ -742,13 +747,25 @@ done: /* We must always keep the beast fed if we have work piled up */ GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); -unlock: - spin_unlock_irq(&engine->timeline->lock); - - if (submit) { + /* Re-evaluate the executing context setup after each preemptive kick */ + if (last) execlists_user_begin(execlists, execlists->port); + + return submit; +} + +static void execlists_dequeue(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + unsigned long flags; + bool submit; + + spin_lock_irqsave(&engine->timeline.lock, flags); + submit = __execlists_dequeue(engine); + spin_unlock_irqrestore(&engine->timeline.lock, flags); + + if (submit) execlists_submit_ports(engine); - } GEM_BUG_ON(port_isset(execlists->port) && !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); @@ -771,12 +788,10 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) intel_engine_get_seqno(rq->engine)); GEM_BUG_ON(!execlists->active); - intel_engine_context_out(rq->engine); - - execlists_context_status_change(rq, - i915_request_completed(rq) ? - INTEL_CONTEXT_SCHEDULE_OUT : - INTEL_CONTEXT_SCHEDULE_PREEMPTED); + execlists_context_schedule_out(rq, + i915_request_completed(rq) ? + INTEL_CONTEXT_SCHEDULE_OUT : + INTEL_CONTEXT_SCHEDULE_PREEMPTED); i915_request_put(rq); @@ -789,22 +804,9 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) static void clear_gtiir(struct intel_engine_cs *engine) { - static const u8 gtiir[] = { - [RCS] = 0, - [BCS] = 0, - [VCS] = 1, - [VCS2] = 1, - [VECS] = 3, - }; struct drm_i915_private *dev_priv = engine->i915; int i; - /* TODO: correctly reset irqs for gen11 */ - if (WARN_ON_ONCE(INTEL_GEN(engine->i915) >= 11)) - return; - - GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); - /* * Clear any pending interrupt state. * @@ -812,13 +814,50 @@ static void clear_gtiir(struct intel_engine_cs *engine) * double buffered, and so if we only reset it once there may * still be an interrupt pending. */ - for (i = 0; i < 2; i++) { - I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), + if (INTEL_GEN(dev_priv) >= 11) { + static const struct { + u8 bank; + u8 bit; + } gen11_gtiir[] = { + [RCS] = {0, GEN11_RCS0}, + [BCS] = {0, GEN11_BCS}, + [_VCS(0)] = {1, GEN11_VCS(0)}, + [_VCS(1)] = {1, GEN11_VCS(1)}, + [_VCS(2)] = {1, GEN11_VCS(2)}, + [_VCS(3)] = {1, GEN11_VCS(3)}, + [_VECS(0)] = {1, GEN11_VECS(0)}, + [_VECS(1)] = {1, GEN11_VECS(1)}, + }; + unsigned long irqflags; + + GEM_BUG_ON(engine->id >= ARRAY_SIZE(gen11_gtiir)); + + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + for (i = 0; i < 2; i++) { + gen11_reset_one_iir(dev_priv, + gen11_gtiir[engine->id].bank, + gen11_gtiir[engine->id].bit); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); + } else { + static const u8 gtiir[] = { + [RCS] = 0, + [BCS] = 0, + [VCS] = 1, + [VCS2] = 1, + [VECS] = 3, + }; + + GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); + + for (i = 0; i < 2; i++) { + I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), + engine->irq_keep_mask); + POSTING_READ(GEN8_GT_IIR(gtiir[engine->id])); + } + GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) & engine->irq_keep_mask); - POSTING_READ(GEN8_GT_IIR(gtiir[engine->id])); } - GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) & - engine->irq_keep_mask); } static void reset_irq(struct intel_engine_cs *engine) @@ -868,10 +907,10 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) execlists_cancel_port_requests(execlists); reset_irq(engine); - spin_lock(&engine->timeline->lock); + spin_lock(&engine->timeline.lock); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->timeline->requests, link) { + list_for_each_entry(rq, &engine->timeline.requests, link) { GEM_BUG_ON(!rq->global_seqno); if (!i915_request_completed(rq)) dma_fence_set_error(&rq->fence, -EIO); @@ -882,8 +921,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) while (rb) { struct i915_priolist *p = to_priolist(rb); - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { - INIT_LIST_HEAD(&rq->priotree.link); + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { + INIT_LIST_HEAD(&rq->sched.link); dma_fence_set_error(&rq->fence, -EIO); __i915_request_submit(rq); @@ -903,7 +942,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) execlists->first = NULL; GEM_BUG_ON(port_isset(execlists->port)); - spin_unlock(&engine->timeline->lock); + spin_unlock(&engine->timeline.lock); local_irq_restore(flags); } @@ -969,6 +1008,7 @@ static void execlists_submission_tasklet(unsigned long data) head = execlists->csb_head; tail = READ_ONCE(buf[write_idx]); + rmb(); /* Hopefully paired with a wmb() in HW */ } GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", engine->name, @@ -1079,8 +1119,8 @@ static void execlists_submission_tasklet(unsigned long data) */ GEM_BUG_ON(!i915_request_completed(rq)); - execlists_context_schedule_out(rq); - trace_i915_request_out(rq); + execlists_context_schedule_out(rq, + INTEL_CONTEXT_SCHEDULE_OUT); i915_request_put(rq); GEM_TRACE("%s completed ctx=%d\n", @@ -1116,10 +1156,11 @@ static void execlists_submission_tasklet(unsigned long data) } static void queue_request(struct intel_engine_cs *engine, - struct i915_priotree *pt, + struct i915_sched_node *node, int prio) { - list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests); + list_add_tail(&node->link, + &lookup_priolist(engine, prio)->requests); } static void __submit_queue(struct intel_engine_cs *engine, int prio) @@ -1140,42 +1181,45 @@ static void execlists_submit_request(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); - queue_request(engine, &request->priotree, rq_prio(request)); + queue_request(engine, &request->sched, rq_prio(request)); submit_queue(engine, rq_prio(request)); GEM_BUG_ON(!engine->execlists.first); - GEM_BUG_ON(list_empty(&request->priotree.link)); + GEM_BUG_ON(list_empty(&request->sched.link)); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } -static struct i915_request *pt_to_request(struct i915_priotree *pt) +static struct i915_request *sched_to_request(struct i915_sched_node *node) { - return container_of(pt, struct i915_request, priotree); + return container_of(node, struct i915_request, sched); } static struct intel_engine_cs * -pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) +sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) { - struct intel_engine_cs *engine = pt_to_request(pt)->engine; + struct intel_engine_cs *engine = sched_to_request(node)->engine; GEM_BUG_ON(!locked); if (engine != locked) { - spin_unlock(&locked->timeline->lock); - spin_lock(&engine->timeline->lock); + spin_unlock(&locked->timeline.lock); + spin_lock(&engine->timeline.lock); } return engine; } -static void execlists_schedule(struct i915_request *request, int prio) +static void execlists_schedule(struct i915_request *request, + const struct i915_sched_attr *attr) { - struct intel_engine_cs *engine; + struct i915_priolist *uninitialized_var(pl); + struct intel_engine_cs *engine, *last; struct i915_dependency *dep, *p; struct i915_dependency stack; + const int prio = attr->priority; LIST_HEAD(dfs); GEM_BUG_ON(prio == I915_PRIORITY_INVALID); @@ -1183,23 +1227,23 @@ static void execlists_schedule(struct i915_request *request, int prio) if (i915_request_completed(request)) return; - if (prio <= READ_ONCE(request->priotree.priority)) + if (prio <= READ_ONCE(request->sched.attr.priority)) return; /* Need BKL in order to use the temporary link inside i915_dependency */ lockdep_assert_held(&request->i915->drm.struct_mutex); - stack.signaler = &request->priotree; + stack.signaler = &request->sched; list_add(&stack.dfs_link, &dfs); /* * Recursively bump all dependent priorities to match the new request. * * A naive approach would be to use recursion: - * static void update_priorities(struct i915_priotree *pt, prio) { - * list_for_each_entry(dep, &pt->signalers_list, signal_link) + * static void update_priorities(struct i915_sched_node *node, prio) { + * list_for_each_entry(dep, &node->signalers_list, signal_link) * update_priorities(dep->signal, prio) - * queue_request(pt); + * queue_request(node); * } * but that may have unlimited recursion depth and so runs a very * real risk of overunning the kernel stack. Instead, we build @@ -1211,7 +1255,7 @@ static void execlists_schedule(struct i915_request *request, int prio) * last element in the list is the request we must execute first. */ list_for_each_entry(dep, &dfs, dfs_link) { - struct i915_priotree *pt = dep->signaler; + struct i915_sched_node *node = dep->signaler; /* * Within an engine, there can be no cycle, but we may @@ -1219,14 +1263,14 @@ static void execlists_schedule(struct i915_request *request, int prio) * (redundant dependencies are not eliminated) and across * engines. */ - list_for_each_entry(p, &pt->signalers_list, signal_link) { + list_for_each_entry(p, &node->signalers_list, signal_link) { GEM_BUG_ON(p == dep); /* no cycles! */ - if (i915_priotree_signaled(p->signaler)) + if (i915_sched_node_signaled(p->signaler)) continue; - GEM_BUG_ON(p->signaler->priority < pt->priority); - if (prio > READ_ONCE(p->signaler->priority)) + GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority); + if (prio > READ_ONCE(p->signaler->attr.priority)) list_move_tail(&p->dfs_link, &dfs); } } @@ -1237,40 +1281,45 @@ static void execlists_schedule(struct i915_request *request, int prio) * execlists_submit_request()), we can set our own priority and skip * acquiring the engine locks. */ - if (request->priotree.priority == I915_PRIORITY_INVALID) { - GEM_BUG_ON(!list_empty(&request->priotree.link)); - request->priotree.priority = prio; + if (request->sched.attr.priority == I915_PRIORITY_INVALID) { + GEM_BUG_ON(!list_empty(&request->sched.link)); + request->sched.attr = *attr; if (stack.dfs_link.next == stack.dfs_link.prev) return; __list_del_entry(&stack.dfs_link); } + last = NULL; engine = request->engine; - spin_lock_irq(&engine->timeline->lock); + spin_lock_irq(&engine->timeline.lock); /* Fifo and depth-first replacement ensure our deps execute before us */ list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { - struct i915_priotree *pt = dep->signaler; + struct i915_sched_node *node = dep->signaler; INIT_LIST_HEAD(&dep->dfs_link); - engine = pt_lock_engine(pt, engine); + engine = sched_lock_engine(node, engine); - if (prio <= pt->priority) + if (prio <= node->attr.priority) continue; - pt->priority = prio; - if (!list_empty(&pt->link)) { - __list_del_entry(&pt->link); - queue_request(engine, pt, prio); + node->attr.priority = prio; + if (!list_empty(&node->link)) { + if (last != engine) { + pl = lookup_priolist(engine, prio); + last = engine; + } + GEM_BUG_ON(pl->priority != prio); + list_move_tail(&node->link, &pl->requests); } if (prio > engine->execlists.queue_priority && - i915_sw_fence_done(&pt_to_request(pt)->submit)) + i915_sw_fence_done(&sched_to_request(node)->submit)) __submit_queue(engine, prio); } - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline.lock); } static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) @@ -1300,7 +1349,7 @@ static struct intel_ring * execlists_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); void *vaddr; int ret; @@ -1353,7 +1402,7 @@ err: static void execlists_context_unpin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(ce->pin_count == 0); @@ -1372,8 +1421,8 @@ static void execlists_context_unpin(struct intel_engine_cs *engine, static int execlists_request_alloc(struct i915_request *request) { - struct intel_engine_cs *engine = request->engine; - struct intel_context *ce = &request->ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(request->ctx, request->engine); int ret; GEM_BUG_ON(!ce->pin_count); @@ -1633,6 +1682,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return -EINVAL; switch (INTEL_GEN(engine->i915)) { + case 11: + return 0; case 10: wa_bb_fn[0] = gen10_init_indirectctx_bb; wa_bb_fn[1] = NULL; @@ -1744,9 +1795,7 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; - ret = intel_whitelist_workarounds_apply(engine); - if (ret) - return ret; + intel_whitelist_workarounds_apply(engine); /* We need to disable the AsyncFlip performance optimisations in order * to use MI_WAIT_FOR_EVENT within the CS. It should already be @@ -1769,9 +1818,7 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; - ret = intel_whitelist_workarounds_apply(engine); - if (ret) - return ret; + intel_whitelist_workarounds_apply(engine); return 0; } @@ -1780,8 +1827,8 @@ static void reset_common_ring(struct intel_engine_cs *engine, struct i915_request *request) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct intel_context *ce; unsigned long flags; + u32 *regs; GEM_TRACE("%s request global=%x, current=%d\n", engine->name, request ? request->global_seqno : 0, @@ -1803,9 +1850,9 @@ static void reset_common_ring(struct intel_engine_cs *engine, reset_irq(engine); /* Push back any incomplete requests for replay after the reset. */ - spin_lock(&engine->timeline->lock); + spin_lock(&engine->timeline.lock); __unwind_incomplete_requests(engine); - spin_unlock(&engine->timeline->lock); + spin_unlock(&engine->timeline.lock); local_irq_restore(flags); @@ -1831,14 +1878,24 @@ static void reset_common_ring(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - ce = &request->ctx->engine[engine->id]; - execlists_init_reg_state(ce->lrc_reg_state, - request->ctx, engine, ce->ring); + regs = to_intel_context(request->ctx, engine)->lrc_reg_state; + if (engine->default_state) { + void *defaults; + + defaults = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (!IS_ERR(defaults)) { + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + defaults + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + i915_gem_object_unpin_map(engine->default_state); + } + } + execlists_init_reg_state(regs, request->ctx, engine, request->ring); /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */ - ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = - i915_ggtt_offset(ce->ring->vma); - ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix; + regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma); + regs[CTX_RING_HEAD + 1] = request->postfix; request->ring->head = request->postfix; intel_ring_update_space(request->ring); @@ -1899,7 +1956,7 @@ static int gen8_emit_bb_start(struct i915_request *rq, rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine); } - cs = intel_ring_begin(rq, 4); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1928,6 +1985,9 @@ static int gen8_emit_bb_start(struct i915_request *rq, (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_NOOP; intel_ring_advance(rq, cs); return 0; @@ -2070,7 +2130,7 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) cs = gen8_emit_ggtt_write(cs, request->global_seqno, intel_hws_seqno_address(request->engine)); *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2086,7 +2146,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno, intel_hws_seqno_address(request->engine)); *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2272,9 +2332,13 @@ static int logical_ring_init(struct intel_engine_cs *engine) } engine->execlists.preempt_complete_status = ~0u; - if (engine->i915->preempt_context) + if (engine->i915->preempt_context) { + struct intel_context *ce = + to_intel_context(engine->i915->preempt_context, engine); + engine->execlists.preempt_complete_status = - upper_32_bits(engine->i915->preempt_context->engine[engine->id].lrc_desc); + upper_32_bits(ce->lrc_desc); + } return 0; @@ -2528,8 +2592,10 @@ populate_lr_context(struct i915_gem_context *ctx, defaults = i915_gem_object_pin_map(engine->default_state, I915_MAP_WB); - if (IS_ERR(defaults)) - return PTR_ERR(defaults); + if (IS_ERR(defaults)) { + ret = PTR_ERR(defaults); + goto err_unpin_ctx; + } memcpy(vaddr + start, defaults + start, engine->context_size); i915_gem_object_unpin_map(engine->default_state); @@ -2547,19 +2613,20 @@ populate_lr_context(struct i915_gem_context *ctx, _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT); +err_unpin_ctx: i915_gem_object_unpin_map(ctx_obj); - - return 0; + return ret; } static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { struct drm_i915_gem_object *ctx_obj; - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); struct i915_vma *vma; uint32_t context_size; struct intel_ring *ring; + struct i915_timeline *timeline; int ret; if (ce->state) @@ -2575,8 +2642,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, ctx_obj = i915_gem_object_create(ctx->i915, context_size); if (IS_ERR(ctx_obj)) { - DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n"); - return PTR_ERR(ctx_obj); + ret = PTR_ERR(ctx_obj); + goto error_deref_obj; } vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL); @@ -2585,7 +2652,14 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, goto error_deref_obj; } - ring = intel_engine_create_ring(engine, ctx->ring_size); + timeline = i915_timeline_create(ctx->i915, ctx->name); + if (IS_ERR(timeline)) { + ret = PTR_ERR(timeline); + goto error_deref_obj; + } + + ring = intel_engine_create_ring(engine, timeline, ctx->ring_size); + i915_timeline_put(timeline); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto error_deref_obj; @@ -2627,7 +2701,8 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { for_each_engine(engine, dev_priv, id) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(ctx, engine); u32 *reg; if (!ce->state) diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 59d7b86012e9..4ec7d8dd13c8 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -108,7 +108,7 @@ static inline uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - return ctx->engine[engine->id].lrc_desc; + return to_intel_context(ctx, engine)->lrc_desc; } #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index d35d2d50f595..8691c86f579c 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -326,7 +326,8 @@ static void intel_enable_lvds(struct intel_encoder *encoder, I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) | PANEL_POWER_ON); POSTING_READ(lvds_encoder->reg); - if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 1000)) + + if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 5000)) DRM_ERROR("timed out waiting for panel to power on\n"); intel_panel_enable_backlight(pipe_config, conn_state); diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index c0b34b7943b9..9f0bd6a4cb79 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -178,7 +178,8 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, { bool result = false; - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) || + IS_ICELAKE(dev_priv)) { table->size = ARRAY_SIZE(skylake_mocs_table); table->table = skylake_mocs_table; result = true; @@ -217,6 +218,8 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) return GEN9_VEBOX_MOCS(index); case VCS2: return GEN9_MFX1_MOCS(index); + case VCS3: + return GEN11_MFX2_MOCS(index); default: MISSING_CASE(engine_id); return INVALID_MMIO_REG; diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 4f367c16e9e5..39a4e4edda07 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -766,13 +766,12 @@ display_crc_ctl_parse_object(const char *buf, enum intel_pipe_crc_object *o) { int i; - for (i = 0; i < ARRAY_SIZE(pipe_crc_objects); i++) - if (!strcmp(buf, pipe_crc_objects[i])) { - *o = i; - return 0; - } + i = match_string(pipe_crc_objects, ARRAY_SIZE(pipe_crc_objects), buf); + if (i < 0) + return i; - return -EINVAL; + *o = i; + return 0; } static int display_crc_ctl_parse_pipe(struct drm_i915_private *dev_priv, @@ -798,13 +797,12 @@ display_crc_ctl_parse_source(const char *buf, enum intel_pipe_crc_source *s) return 0; } - for (i = 0; i < ARRAY_SIZE(pipe_crc_sources); i++) - if (!strcmp(buf, pipe_crc_sources[i])) { - *s = i; - return 0; - } + i = match_string(pipe_crc_sources, ARRAY_SIZE(pipe_crc_sources), buf); + if (i < 0) + return i; - return -EINVAL; + *s = i; + return 0; } static int display_crc_ctl_parse(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 4baab858e442..b85229e153c4 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3567,6 +3567,23 @@ bool ilk_disable_lp_wm(struct drm_device *dev) return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); } +static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv) +{ + u8 enabled_slices; + + /* Slice 1 will always be enabled */ + enabled_slices = 1; + + /* Gen prior to GEN11 have only one DBuf slice */ + if (INTEL_GEN(dev_priv) < 11) + return enabled_slices; + + if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE) + enabled_slices++; + + return enabled_slices; +} + /* * FIXME: We still don't have the proper code detect if we need to apply the WA, * so assume we'll always need it in order to avoid underruns. @@ -3754,9 +3771,42 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) return true; } +static unsigned int intel_get_ddb_size(struct drm_i915_private *dev_priv, + const struct intel_crtc_state *cstate, + const unsigned int total_data_rate, + const int num_active, + struct skl_ddb_allocation *ddb) +{ + const struct drm_display_mode *adjusted_mode; + u64 total_data_bw; + u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size; + + WARN_ON(ddb_size == 0); + + if (INTEL_GEN(dev_priv) < 11) + return ddb_size - 4; /* 4 blocks for bypass path allocation */ + + adjusted_mode = &cstate->base.adjusted_mode; + total_data_bw = (u64)total_data_rate * drm_mode_vrefresh(adjusted_mode); + + /* + * 12GB/s is maximum BW supported by single DBuf slice. + */ + if (total_data_bw >= GBps(12) || num_active > 1) { + ddb->enabled_slices = 2; + } else { + ddb->enabled_slices = 1; + ddb_size /= 2; + } + + return ddb_size; +} + static void skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, const struct intel_crtc_state *cstate, + const unsigned int total_data_rate, + struct skl_ddb_allocation *ddb, struct skl_ddb_entry *alloc, /* out */ int *num_active /* out */) { @@ -3779,11 +3829,8 @@ skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, else *num_active = hweight32(dev_priv->active_crtcs); - ddb_size = INTEL_INFO(dev_priv)->ddb_size; - WARN_ON(ddb_size == 0); - - if (INTEL_GEN(dev_priv) < 11) - ddb_size -= 4; /* 4 blocks for bypass path allocation */ + ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate, + *num_active, ddb); /* * If the state doesn't change the active CRTC's, then there's @@ -3817,10 +3864,18 @@ static unsigned int skl_cursor_allocation(int num_active) return 8; } -static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg) +static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv, + struct skl_ddb_entry *entry, u32 reg) { - entry->start = reg & 0x3ff; - entry->end = (reg >> 16) & 0x3ff; + u16 mask; + + if (INTEL_GEN(dev_priv) >= 11) + mask = ICL_DDB_ENTRY_MASK; + else + mask = SKL_DDB_ENTRY_MASK; + entry->start = reg & mask; + entry->end = (reg >> DDB_ENTRY_END_SHIFT) & mask; + if (entry->end) entry->end += 1; } @@ -3837,7 +3892,8 @@ skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv, /* Cursor doesn't support NV12/planar, so no extra calculation needed */ if (plane_id == PLANE_CURSOR) { val = I915_READ(CUR_BUF_CFG(pipe)); - skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val); + skl_ddb_entry_init_from_hw(dev_priv, + &ddb->plane[pipe][plane_id], val); return; } @@ -3856,10 +3912,13 @@ skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv, val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id)); if (fourcc == DRM_FORMAT_NV12) { - skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val2); - skl_ddb_entry_init_from_hw(&ddb->uv_plane[pipe][plane_id], val); + skl_ddb_entry_init_from_hw(dev_priv, + &ddb->plane[pipe][plane_id], val2); + skl_ddb_entry_init_from_hw(dev_priv, + &ddb->uv_plane[pipe][plane_id], val); } else { - skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val); + skl_ddb_entry_init_from_hw(dev_priv, + &ddb->plane[pipe][plane_id], val); } } @@ -3870,6 +3929,8 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, memset(ddb, 0, sizeof(*ddb)); + ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv); + for_each_intel_crtc(&dev_priv->drm, crtc) { enum intel_display_power_domain power_domain; enum plane_id plane_id; @@ -4242,7 +4303,11 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, return 0; } - skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active); + total_data_rate = skl_get_total_relative_data_rate(cstate, + plane_data_rate, + uv_plane_data_rate); + skl_ddb_get_pipe_allocation_limits(dev, cstate, total_data_rate, ddb, + alloc, &num_active); alloc_size = skl_ddb_entry_size(alloc); if (alloc_size == 0) return 0; @@ -4277,9 +4342,6 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, * * FIXME: we may not allocate every single block here. */ - total_data_rate = skl_get_total_relative_data_rate(cstate, - plane_data_rate, - uv_plane_data_rate); if (total_data_rate == 0) return 0; @@ -5088,6 +5150,7 @@ skl_copy_ddb_for_pipe(struct skl_ddb_values *dst, sizeof(dst->ddb.uv_plane[pipe])); memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe], sizeof(dst->ddb.plane[pipe])); + dst->ddb.enabled_slices = src->ddb.enabled_slices; } static void @@ -5472,8 +5535,12 @@ void skl_wm_get_hw_state(struct drm_device *dev) /* Fully recompute DDB on first atomic commit */ dev_priv->wm.distrust_bios_wm = true; } else { - /* Easy/common case; just sanitize DDB now if everything off */ - memset(ddb, 0, sizeof(*ddb)); + /* + * Easy/common case; just sanitize DDB now if everything off + * Keep dbuf slice info intact + */ + memset(ddb->plane, 0, sizeof(ddb->plane)); + memset(ddb->uv_plane, 0, sizeof(ddb->uv_plane)); } } @@ -8597,6 +8664,13 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, I915_WRITE(GEN7_MISCCPCTL, misccpctl); } +static void icl_init_clock_gating(struct drm_i915_private *dev_priv) +{ + /* This is not an Wa. Enable to reduce Sampler power */ + I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN, + I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE); +} + static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) { if (!HAS_PCH_CNP(dev_priv)) @@ -9123,7 +9197,9 @@ static void nop_init_clock_gating(struct drm_i915_private *dev_priv) */ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) { - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + dev_priv->display.init_clock_gating = icl_init_clock_gating; + else if (IS_CANNONLAKE(dev_priv)) dev_priv->display.init_clock_gating = cnl_init_clock_gating; else if (IS_COFFEELAKE(dev_priv)) dev_priv->display.init_clock_gating = cfl_init_clock_gating; diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 69a5b276f4d8..db27f2faa1de 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -93,6 +93,114 @@ static void psr_aux_io_power_put(struct intel_dp *intel_dp) intel_display_power_put(dev_priv, psr_aux_domain(intel_dp)); } +void intel_psr_irq_control(struct drm_i915_private *dev_priv, bool debug) +{ + u32 debug_mask, mask; + + /* No PSR interrupts on VLV/CHV */ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + return; + + mask = EDP_PSR_ERROR(TRANSCODER_EDP); + debug_mask = EDP_PSR_POST_EXIT(TRANSCODER_EDP) | + EDP_PSR_PRE_ENTRY(TRANSCODER_EDP); + + if (INTEL_GEN(dev_priv) >= 8) { + mask |= EDP_PSR_ERROR(TRANSCODER_A) | + EDP_PSR_ERROR(TRANSCODER_B) | + EDP_PSR_ERROR(TRANSCODER_C); + + debug_mask |= EDP_PSR_POST_EXIT(TRANSCODER_A) | + EDP_PSR_PRE_ENTRY(TRANSCODER_A) | + EDP_PSR_POST_EXIT(TRANSCODER_B) | + EDP_PSR_PRE_ENTRY(TRANSCODER_B) | + EDP_PSR_POST_EXIT(TRANSCODER_C) | + EDP_PSR_PRE_ENTRY(TRANSCODER_C); + } + + if (debug) + mask |= debug_mask; + + WRITE_ONCE(dev_priv->psr.debug, debug); + I915_WRITE(EDP_PSR_IMR, ~mask); +} + +static void psr_event_print(u32 val, bool psr2_enabled) +{ + DRM_DEBUG_KMS("PSR exit events: 0x%x\n", val); + if (val & PSR_EVENT_PSR2_WD_TIMER_EXPIRE) + DRM_DEBUG_KMS("\tPSR2 watchdog timer expired\n"); + if ((val & PSR_EVENT_PSR2_DISABLED) && psr2_enabled) + DRM_DEBUG_KMS("\tPSR2 disabled\n"); + if (val & PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN) + DRM_DEBUG_KMS("\tSU dirty FIFO underrun\n"); + if (val & PSR_EVENT_SU_CRC_FIFO_UNDERRUN) + DRM_DEBUG_KMS("\tSU CRC FIFO underrun\n"); + if (val & PSR_EVENT_GRAPHICS_RESET) + DRM_DEBUG_KMS("\tGraphics reset\n"); + if (val & PSR_EVENT_PCH_INTERRUPT) + DRM_DEBUG_KMS("\tPCH interrupt\n"); + if (val & PSR_EVENT_MEMORY_UP) + DRM_DEBUG_KMS("\tMemory up\n"); + if (val & PSR_EVENT_FRONT_BUFFER_MODIFY) + DRM_DEBUG_KMS("\tFront buffer modification\n"); + if (val & PSR_EVENT_WD_TIMER_EXPIRE) + DRM_DEBUG_KMS("\tPSR watchdog timer expired\n"); + if (val & PSR_EVENT_PIPE_REGISTERS_UPDATE) + DRM_DEBUG_KMS("\tPIPE registers updated\n"); + if (val & PSR_EVENT_REGISTER_UPDATE) + DRM_DEBUG_KMS("\tRegister updated\n"); + if (val & PSR_EVENT_HDCP_ENABLE) + DRM_DEBUG_KMS("\tHDCP enabled\n"); + if (val & PSR_EVENT_KVMR_SESSION_ENABLE) + DRM_DEBUG_KMS("\tKVMR session enabled\n"); + if (val & PSR_EVENT_VBI_ENABLE) + DRM_DEBUG_KMS("\tVBI enabled\n"); + if (val & PSR_EVENT_LPSP_MODE_EXIT) + DRM_DEBUG_KMS("\tLPSP mode exited\n"); + if ((val & PSR_EVENT_PSR_DISABLE) && !psr2_enabled) + DRM_DEBUG_KMS("\tPSR disabled\n"); +} + +void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir) +{ + u32 transcoders = BIT(TRANSCODER_EDP); + enum transcoder cpu_transcoder; + ktime_t time_ns = ktime_get(); + + if (INTEL_GEN(dev_priv) >= 8) + transcoders |= BIT(TRANSCODER_A) | + BIT(TRANSCODER_B) | + BIT(TRANSCODER_C); + + for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) { + /* FIXME: Exit PSR and link train manually when this happens. */ + if (psr_iir & EDP_PSR_ERROR(cpu_transcoder)) + DRM_DEBUG_KMS("[transcoder %s] PSR aux error\n", + transcoder_name(cpu_transcoder)); + + if (psr_iir & EDP_PSR_PRE_ENTRY(cpu_transcoder)) { + dev_priv->psr.last_entry_attempt = time_ns; + DRM_DEBUG_KMS("[transcoder %s] PSR entry attempt in 2 vblanks\n", + transcoder_name(cpu_transcoder)); + } + + if (psr_iir & EDP_PSR_POST_EXIT(cpu_transcoder)) { + dev_priv->psr.last_exit = time_ns; + DRM_DEBUG_KMS("[transcoder %s] PSR exit completed\n", + transcoder_name(cpu_transcoder)); + + if (INTEL_GEN(dev_priv) >= 9) { + u32 val = I915_READ(PSR_EVENT(cpu_transcoder)); + bool psr2_enabled = dev_priv->psr.psr2_enabled; + + I915_WRITE(PSR_EVENT(cpu_transcoder), val); + psr_event_print(val, psr2_enabled); + } + } + } +} + static bool intel_dp_get_y_coord_required(struct intel_dp *intel_dp) { uint8_t psr_caps = 0; @@ -400,9 +508,8 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) * mesh at all with our frontbuffer tracking. And the hw alone isn't * good enough. */ val |= EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE; - if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { - val |= EDP_Y_COORDINATE_VALID | EDP_Y_COORDINATE_ENABLE; - } + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + val |= EDP_Y_COORDINATE_ENABLE; val |= EDP_PSR2_FRAME_BEFORE_SU(dev_priv->psr.sink_sync_latency + 1); @@ -604,7 +711,8 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp, I915_WRITE(EDP_PSR_DEBUG, EDP_PSR_DEBUG_MASK_MEMUP | EDP_PSR_DEBUG_MASK_HPD | - EDP_PSR_DEBUG_MASK_LPSP); + EDP_PSR_DEBUG_MASK_LPSP | + EDP_PSR_DEBUG_MASK_DISP_REG_WRITE); } } @@ -1065,9 +1173,12 @@ void intel_psr_init(struct drm_i915_private *dev_priv) if (!dev_priv->psr.sink_support) return; - /* Per platform default: all disabled. */ - if (i915_modparams.enable_psr == -1) + if (i915_modparams.enable_psr == -1) { + i915_modparams.enable_psr = dev_priv->vbt.psr.enable; + + /* Per platform default: all disabled. */ i915_modparams.enable_psr = 0; + } /* Set link_standby x link_off defaults */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 757bb0990c07..8f19349a6055 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -558,7 +558,8 @@ static void reset_ring_common(struct intel_engine_cs *engine, */ if (request) { struct drm_i915_private *dev_priv = request->i915; - struct intel_context *ce = &request->ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(request->ctx, + engine); struct i915_hw_ppgtt *ppgtt; if (ce->state) { @@ -618,9 +619,7 @@ static int init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; - ret = intel_whitelist_workarounds_apply(engine); - if (ret) - return ret; + intel_whitelist_workarounds_apply(engine); /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ if (IS_GEN(dev_priv, 4, 6)) @@ -698,17 +697,17 @@ static void cancel_requests(struct intel_engine_cs *engine) struct i915_request *request; unsigned long flags; - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->timeline->requests, link) { + list_for_each_entry(request, &engine->timeline.requests, link) { GEM_BUG_ON(!request->global_seqno); if (!i915_request_completed(request)) dma_fence_set_error(&request->fence, -EIO); } /* Remaining _unready_ requests will be nop'ed when submitted */ - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static void i9xx_submit_request(struct i915_request *request) @@ -1067,7 +1066,6 @@ err: void intel_ring_reset(struct intel_ring *ring, u32 tail) { - GEM_BUG_ON(!list_empty(&ring->request_list)); ring->tail = tail; ring->head = tail; ring->emit = tail; @@ -1119,19 +1117,24 @@ err: } struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size) +intel_engine_create_ring(struct intel_engine_cs *engine, + struct i915_timeline *timeline, + int size) { struct intel_ring *ring; struct i915_vma *vma; GEM_BUG_ON(!is_power_of_2(size)); GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); + GEM_BUG_ON(timeline == &engine->timeline); + lockdep_assert_held(&engine->i915->drm.struct_mutex); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&ring->request_list); + ring->timeline = i915_timeline_get(timeline); ring->size = size; /* Workaround an erratum on the i830 which causes a hang if @@ -1162,12 +1165,13 @@ intel_ring_free(struct intel_ring *ring) i915_vma_close(ring->vma); __i915_gem_object_release_unless_active(obj); + i915_timeline_put(ring->timeline); kfree(ring); } -static int context_pin(struct i915_gem_context *ctx) +static int context_pin(struct intel_context *ce) { - struct i915_vma *vma = ctx->engine[RCS].state; + struct i915_vma *vma = ce->state; int ret; /* @@ -1258,7 +1262,7 @@ static struct intel_ring * intel_ring_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); int ret; lockdep_assert_held(&ctx->i915->drm.struct_mutex); @@ -1280,7 +1284,7 @@ intel_ring_context_pin(struct intel_engine_cs *engine, } if (ce->state) { - ret = context_pin(ctx); + ret = context_pin(ce); if (ret) goto err; @@ -1301,7 +1305,7 @@ err: static void intel_ring_context_unpin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(ce->pin_count == 0); @@ -1320,6 +1324,7 @@ static void intel_ring_context_unpin(struct intel_engine_cs *engine, static int intel_init_ring_buffer(struct intel_engine_cs *engine) { struct intel_ring *ring; + struct i915_timeline *timeline; int err; intel_engine_setup_common(engine); @@ -1328,7 +1333,14 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) if (err) goto err; - ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); + timeline = i915_timeline_create(engine->i915, engine->name); + if (IS_ERR(timeline)) { + err = PTR_ERR(timeline); + goto err; + } + + ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); + i915_timeline_put(timeline); if (IS_ERR(ring)) { err = PTR_ERR(ring); goto err; @@ -1429,7 +1441,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(rq->ctx->engine[RCS].state) | flags; + *cs++ = i915_ggtt_offset(to_intel_context(rq->ctx, engine)->state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -1520,7 +1532,7 @@ static int switch_context(struct i915_request *rq) hw_flags = MI_FORCE_RESTORE; } - if (to_ctx->engine[engine->id].state && + if (to_intel_context(to_ctx, engine)->state && (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) { GEM_BUG_ON(engine->id != RCS); @@ -1568,7 +1580,7 @@ static int ring_request_alloc(struct i915_request *request) { int ret; - GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); + GEM_BUG_ON(!to_intel_context(request->ctx, request->engine)->pin_count); /* Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just @@ -1719,22 +1731,24 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) /* Align the ring tail to a cacheline boundary */ int intel_ring_cacheline_align(struct i915_request *rq) { - int num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); - u32 *cs; + int num_dwords; + void *cs; + num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); if (num_dwords == 0) return 0; - num_dwords = CACHELINE_BYTES / sizeof(u32) - num_dwords; + num_dwords = CACHELINE_DWORDS - num_dwords; + GEM_BUG_ON(num_dwords & 1); + cs = intel_ring_begin(rq, num_dwords); if (IS_ERR(cs)) return PTR_ERR(cs); - while (num_dwords--) - *cs++ = MI_NOOP; - + memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); intel_ring_advance(rq, cs); + GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); return 0; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 717041640135..010750e8ee44 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -3,17 +3,19 @@ #define _INTEL_RINGBUFFER_H_ #include <linux/hashtable.h> +#include <linux/seqlock.h> #include "i915_gem_batch_pool.h" -#include "i915_gem_timeline.h" #include "i915_reg.h" #include "i915_pmu.h" #include "i915_request.h" #include "i915_selftest.h" +#include "i915_timeline.h" #include "intel_gpu_commands.h" struct drm_printer; +struct i915_sched_attr; #define I915_CMD_HASH_ORDER 9 @@ -127,7 +129,9 @@ struct intel_ring { struct i915_vma *vma; void *vaddr; + struct i915_timeline *timeline; struct list_head request_list; + struct list_head active_link; u32 head; u32 tail; @@ -334,7 +338,8 @@ struct intel_engine_cs { u32 mmio_base; struct intel_ring *buffer; - struct intel_timeline *timeline; + + struct i915_timeline timeline; struct drm_i915_gem_object *default_state; @@ -460,7 +465,8 @@ struct intel_engine_cs { * * Called under the struct_mutex. */ - void (*schedule)(struct i915_request *request, int priority); + void (*schedule)(struct i915_request *request, + const struct i915_sched_attr *attr); /* * Cancel all requests on the hardware, or queued for execution. @@ -593,7 +599,7 @@ struct intel_engine_cs { /** * @lock: Lock protecting the below fields. */ - spinlock_t lock; + seqlock_t lock; /** * @enabled: Reference count indicating number of listeners. */ @@ -764,7 +770,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define CNL_HWS_CSB_WRITE_INDEX 0x2f struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size); +intel_engine_create_ring(struct intel_engine_cs *engine, + struct i915_timeline *timeline, + int size); int intel_ring_pin(struct intel_ring *ring, struct drm_i915_private *i915, unsigned int offset_bias); @@ -882,7 +890,7 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) * wtih serialising this hint with anything, so document it as * a hint and nothing more. */ - return READ_ONCE(engine->timeline->seqno); + return READ_ONCE(engine->timeline.seqno); } void intel_engine_get_instdone(struct intel_engine_cs *engine, @@ -1062,7 +1070,7 @@ static inline void intel_engine_context_in(struct intel_engine_cs *engine) if (READ_ONCE(engine->stats.enabled) == 0) return; - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); if (engine->stats.enabled > 0) { if (engine->stats.active++ == 0) @@ -1070,7 +1078,7 @@ static inline void intel_engine_context_in(struct intel_engine_cs *engine) GEM_BUG_ON(engine->stats.active == 0); } - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); } static inline void intel_engine_context_out(struct intel_engine_cs *engine) @@ -1080,7 +1088,7 @@ static inline void intel_engine_context_out(struct intel_engine_cs *engine) if (READ_ONCE(engine->stats.enabled) == 0) return; - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); if (engine->stats.enabled > 0) { ktime_t last; @@ -1107,7 +1115,7 @@ static inline void intel_engine_context_out(struct intel_engine_cs *engine) } } - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); } int intel_enable_engine_stats(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 66de4b2dc8b7..53a6eaa9671a 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -542,6 +542,29 @@ void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv) dev_priv->csr.dc_state = val; } +/** + * gen9_set_dc_state - set target display C power state + * @dev_priv: i915 device instance + * @state: target DC power state + * - DC_STATE_DISABLE + * - DC_STATE_EN_UPTO_DC5 + * - DC_STATE_EN_UPTO_DC6 + * - DC_STATE_EN_DC9 + * + * Signal to DMC firmware/HW the target DC power state passed in @state. + * DMC/HW can turn off individual display clocks and power rails when entering + * a deeper DC power state (higher in number) and turns these back when exiting + * that state to a shallower power state (lower in number). The HW will decide + * when to actually enter a given state on an on-demand basis, for instance + * depending on the active state of display pipes. The state of display + * registers backed by affected power rails are saved/restored as needed. + * + * Based on the above enabling a deeper DC power state is asynchronous wrt. + * enabling it. Disabling a deeper power state is synchronous: for instance + * setting %DC_STATE_DISABLE won't complete until all HW resources are turned + * back on and register state is restored. This is guaranteed by the MMIO write + * to DC_STATE_EN blocking until the state is restored. + */ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state) { uint32_t val; @@ -635,7 +658,7 @@ static void assert_can_enable_dc6(struct drm_i915_private *dev_priv) assert_csr_loaded(dev_priv); } -void skl_enable_dc6(struct drm_i915_private *dev_priv) +static void skl_enable_dc6(struct drm_i915_private *dev_priv) { assert_can_enable_dc6(dev_priv); @@ -649,13 +672,6 @@ void skl_enable_dc6(struct drm_i915_private *dev_priv) gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); } -void skl_disable_dc6(struct drm_i915_private *dev_priv) -{ - DRM_DEBUG_KMS("Disabling DC6\n"); - - gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); -} - static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -2626,32 +2642,69 @@ static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv) mutex_unlock(&power_domains->lock); } -static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) +static inline +bool intel_dbuf_slice_set(struct drm_i915_private *dev_priv, + i915_reg_t reg, bool enable) { - I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) | DBUF_POWER_REQUEST); - POSTING_READ(DBUF_CTL); + u32 val, status; + val = I915_READ(reg); + val = enable ? (val | DBUF_POWER_REQUEST) : (val & ~DBUF_POWER_REQUEST); + I915_WRITE(reg, val); + POSTING_READ(reg); udelay(10); - if (!(I915_READ(DBUF_CTL) & DBUF_POWER_STATE)) - DRM_ERROR("DBuf power enable timeout\n"); + status = I915_READ(reg) & DBUF_POWER_STATE; + if ((enable && !status) || (!enable && status)) { + DRM_ERROR("DBus power %s timeout!\n", + enable ? "enable" : "disable"); + return false; + } + return true; +} + +static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) +{ + intel_dbuf_slice_set(dev_priv, DBUF_CTL, true); } static void gen9_dbuf_disable(struct drm_i915_private *dev_priv) { - I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) & ~DBUF_POWER_REQUEST); - POSTING_READ(DBUF_CTL); + intel_dbuf_slice_set(dev_priv, DBUF_CTL, false); +} - udelay(10); +static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) < 11) + return 1; + return 2; +} - if (I915_READ(DBUF_CTL) & DBUF_POWER_STATE) - DRM_ERROR("DBuf power disable timeout!\n"); +void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, + u8 req_slices) +{ + u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; + u32 val; + bool ret; + + if (req_slices > intel_dbuf_max_slices(dev_priv)) { + DRM_ERROR("Invalid number of dbuf slices requested\n"); + return; + } + + if (req_slices == hw_enabled_slices || req_slices == 0) + return; + + val = I915_READ(DBUF_CTL_S2); + if (req_slices > hw_enabled_slices) + ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, true); + else + ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, false); + + if (ret) + dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices; } -/* - * TODO: we shouldn't always enable DBUF_CTL_S2, we should only enable it when - * needed and keep it disabled as much as possible. - */ static void icl_dbuf_enable(struct drm_i915_private *dev_priv) { I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) | DBUF_POWER_REQUEST); @@ -2663,6 +2716,8 @@ static void icl_dbuf_enable(struct drm_i915_private *dev_priv) if (!(I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) || !(I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)) DRM_ERROR("DBuf power enable timeout\n"); + else + dev_priv->wm.skl_hw.ddb.enabled_slices = 2; } static void icl_dbuf_disable(struct drm_i915_private *dev_priv) @@ -2676,6 +2731,8 @@ static void icl_dbuf_disable(struct drm_i915_private *dev_priv) if ((I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) || (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)) DRM_ERROR("DBuf power disable timeout!\n"); + else + dev_priv->wm.skl_hw.ddb.enabled_slices = 0; } static void icl_mbus_init(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 970015dcc6f1..ee23613f9fd4 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -131,7 +131,7 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) if (scanline < min || scanline > max) break; - if (timeout <= 0) { + if (!timeout) { DRM_ERROR("Potential atomic update failure on pipe %c\n", pipe_name(crtc->pipe)); break; @@ -1011,6 +1011,7 @@ intel_check_sprite_plane(struct intel_plane *plane, src->y2 = (src_y + src_h) << 16; if (intel_format_is_yuv(fb->format->format) && + fb->format->format != DRM_FORMAT_NV12 && (src_x % 2 || src_w % 2)) { DRM_DEBUG_KMS("src x/w (%u, %u) must be a multiple of 2 for YUV planes\n", src_x, src_w); @@ -1179,6 +1180,19 @@ static uint32_t skl_plane_formats[] = { DRM_FORMAT_VYUY, }; +static uint32_t skl_planar_formats[] = { + DRM_FORMAT_RGB565, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_YUYV, + DRM_FORMAT_YVYU, + DRM_FORMAT_UYVY, + DRM_FORMAT_VYUY, + DRM_FORMAT_NV12, +}; + static const uint64_t skl_plane_format_modifiers_noccs[] = { I915_FORMAT_MOD_Yf_TILED, I915_FORMAT_MOD_Y_TILED, @@ -1273,6 +1287,7 @@ static bool skl_mod_supported(uint32_t format, uint64_t modifier) case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: case DRM_FORMAT_VYUY: + case DRM_FORMAT_NV12: if (modifier == I915_FORMAT_MOD_Yf_TILED) return true; /* fall through */ @@ -1372,8 +1387,14 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->disable_plane = skl_disable_plane; intel_plane->get_hw_state = skl_plane_get_hw_state; - plane_formats = skl_plane_formats; - num_plane_formats = ARRAY_SIZE(skl_plane_formats); + if (skl_plane_has_planar(dev_priv, pipe, + PLANE_SPRITE0 + plane)) { + plane_formats = skl_planar_formats; + num_plane_formats = ARRAY_SIZE(skl_planar_formats); + } else { + plane_formats = skl_plane_formats; + num_plane_formats = ARRAY_SIZE(skl_plane_formats); + } if (skl_plane_has_ccs(dev_priv, pipe, PLANE_SPRITE0 + plane)) modifiers = skl_plane_format_modifiers_ccs; diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h index dc33b12394de..87910aa83267 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -30,7 +30,7 @@ struct drm_i915_private; struct i915_vma; /* Home of GuC, HuC and DMC firmwares */ -#define INTEL_UC_FIRMWARE_URL "https://01.org/linuxgraphics/downloads/firmware" +#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915" enum intel_uc_fw_status { INTEL_UC_FIRMWARE_FAIL = -1, diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index d6e20f0f4c28..448293eb638d 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -139,7 +139,9 @@ fw_domain_wait_ack_with_fallback(const struct drm_i915_private *i915, * in the hope that the original ack will be delivered along with * the fallback ack. * - * This workaround is described in HSDES #1604254524 + * This workaround is described in HSDES #1604254524 and it's known as: + * WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl,icl + * although the name is a bit misleading. */ pass = 1; @@ -1394,7 +1396,8 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 11) { int i; - dev_priv->uncore.funcs.force_wake_get = fw_domains_get; + dev_priv->uncore.funcs.force_wake_get = + fw_domains_get_with_fallback; dev_priv->uncore.funcs.force_wake_put = fw_domains_put; fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER_GEN9, diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c index bbbf4ed4aa97..2df3538ceba5 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.c +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -270,6 +270,10 @@ static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv) GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ + if (IS_GEN9_LP(dev_priv)) + WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); + return 0; } @@ -441,6 +445,27 @@ static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv) return 0; } +static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + /* Wa_1604370585:icl (pre-prod) + * Formerly known as WaPushConstantDereferenceHoldDisable + */ + if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0)) + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + PUSH_CONSTANT_DEREF_DISABLE); + + /* WaForceEnableNonCoherent:icl + * This is not the same workaround as in early Gen9 platforms, where + * lacking this could cause system hangs, but coherency performance + * overhead is high and only a few compute workloads really need it + * (the register is whitelisted in hardware now, so UMDs can opt in + * for coherency if they have a good reason). + */ + WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); + + return 0; +} + int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv) { int err = 0; @@ -465,6 +490,8 @@ int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv) err = cfl_ctx_workarounds_init(dev_priv); else if (IS_CANNONLAKE(dev_priv)) err = cnl_ctx_workarounds_init(dev_priv); + else if (IS_ICELAKE(dev_priv)) + err = icl_ctx_workarounds_init(dev_priv); else MISSING_CASE(INTEL_GEN(dev_priv)); if (err) @@ -663,6 +690,90 @@ static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv) _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); } +static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + /* This is not an Wa. Enable for better image quality */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE)); + + /* WaInPlaceDecompressionHang:icl */ + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaPipelineFlushCoherentLines:icl */ + I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | + GEN8_LQSC_FLUSH_COHERENT_LINES); + + /* Wa_1405543622:icl + * Formerly known as WaGAPZPriorityScheme + */ + I915_WRITE(GEN8_GARBCNTL, I915_READ(GEN8_GARBCNTL) | + GEN11_ARBITRATION_PRIO_ORDER_MASK); + + /* Wa_1604223664:icl + * Formerly known as WaL3BankAddressHashing + */ + I915_WRITE(GEN8_GARBCNTL, + (I915_READ(GEN8_GARBCNTL) & ~GEN11_HASH_CTRL_EXCL_MASK) | + GEN11_HASH_CTRL_EXCL_BIT0); + I915_WRITE(GEN11_GLBLINVL, + (I915_READ(GEN11_GLBLINVL) & ~GEN11_BANK_HASH_ADDR_EXCL_MASK) | + GEN11_BANK_HASH_ADDR_EXCL_BIT0); + + /* WaModifyGamTlbPartitioning:icl */ + I915_WRITE(GEN11_GACB_PERF_CTRL, + (I915_READ(GEN11_GACB_PERF_CTRL) & ~GEN11_HASH_CTRL_MASK) | + GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); + + /* Wa_1405733216:icl + * Formerly known as WaDisableCleanEvicts + */ + I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | + GEN11_LQSC_CLEAN_EVICT_DISABLE); + + /* Wa_1405766107:icl + * Formerly known as WaCL2SFHalfMaxAlloc + */ + I915_WRITE(GEN11_LSN_UNSLCVC, I915_READ(GEN11_LSN_UNSLCVC) | + GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC | + GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC); + + /* Wa_220166154:icl + * Formerly known as WaDisCtxReload + */ + I915_WRITE(GAMW_ECO_DEV_RW_IA_REG, I915_READ(GAMW_ECO_DEV_RW_IA_REG) | + GAMW_ECO_DEV_CTX_RELOAD_DISABLE); + + /* Wa_1405779004:icl (pre-prod) */ + if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0)) + I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, + I915_READ(SLICE_UNIT_LEVEL_CLKGATE) | + MSCUNIT_CLKGATE_DIS); + + /* Wa_1406680159:icl */ + I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, + I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE) | + GWUNIT_CLKGATE_DIS); + + /* Wa_1604302699:icl */ + I915_WRITE(GEN10_L3_CHICKEN_MODE_REGISTER, + I915_READ(GEN10_L3_CHICKEN_MODE_REGISTER) | + GEN11_I2M_WRITE_DISABLE); + + /* Wa_1406838659:icl (pre-prod) */ + if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0)) + I915_WRITE(INF_UNIT_LEVEL_CLKGATE, + I915_READ(INF_UNIT_LEVEL_CLKGATE) | + CGPSF_CLKGATE_DIS); + + /* WaForwardProgressSoftReset:icl */ + I915_WRITE(GEN10_SCRATCH_LNCF2, + I915_READ(GEN10_SCRATCH_LNCF2) | + PMFLUSHDONE_LNICRSDROP | + PMFLUSH_GAPL3UNBLOCK | + PMFLUSHDONE_LNEBLK); +} + void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv) { if (INTEL_GEN(dev_priv) < 8) @@ -683,174 +794,156 @@ void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv) cfl_gt_workarounds_apply(dev_priv); else if (IS_CANNONLAKE(dev_priv)) cnl_gt_workarounds_apply(dev_priv); + else if (IS_ICELAKE(dev_priv)) + icl_gt_workarounds_apply(dev_priv); else MISSING_CASE(INTEL_GEN(dev_priv)); } -static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, - i915_reg_t reg) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct i915_workarounds *wa = &dev_priv->workarounds; - const unsigned int index = wa->hw_whitelist_count[engine->id]; +struct whitelist { + i915_reg_t reg[RING_MAX_NONPRIV_SLOTS]; + unsigned int count; + u32 nopid; +}; - if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) - return -EINVAL; - - I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), - i915_mmio_reg_offset(reg)); - wa->hw_whitelist_count[engine->id]++; +static void whitelist_reg(struct whitelist *w, i915_reg_t reg) +{ + if (GEM_WARN_ON(w->count >= RING_MAX_NONPRIV_SLOTS)) + return; - return 0; + w->reg[w->count++] = reg; } -static int bdw_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void bdw_whitelist_build(struct whitelist *w) { - return 0; } -static int chv_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void chv_whitelist_build(struct whitelist *w) { - return 0; } -static int gen9_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void gen9_whitelist_build(struct whitelist *w) { - int ret; - /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ - ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); - if (ret) - return ret; + whitelist_reg(w, GEN9_CTX_PREEMPT_REG); /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ - ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); - if (ret) - return ret; + whitelist_reg(w, GEN8_CS_CHICKEN1); /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ - ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); - if (ret) - return ret; - - return 0; + whitelist_reg(w, GEN8_HDC_CHICKEN1); } -static int skl_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void skl_whitelist_build(struct whitelist *w) { - int ret; - - ret = gen9_whitelist_workarounds_apply(engine); - if (ret) - return ret; + gen9_whitelist_build(w); /* WaDisableLSQCROPERFforOCL:skl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return 0; + whitelist_reg(w, GEN8_L3SQCREG4); } -static int bxt_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void bxt_whitelist_build(struct whitelist *w) { - int ret; - - ret = gen9_whitelist_workarounds_apply(engine); - if (ret) - return ret; - - return 0; + gen9_whitelist_build(w); } -static int kbl_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void kbl_whitelist_build(struct whitelist *w) { - int ret; - - ret = gen9_whitelist_workarounds_apply(engine); - if (ret) - return ret; + gen9_whitelist_build(w); /* WaDisableLSQCROPERFforOCL:kbl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return 0; + whitelist_reg(w, GEN8_L3SQCREG4); } -static int glk_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void glk_whitelist_build(struct whitelist *w) { - int ret; - - ret = gen9_whitelist_workarounds_apply(engine); - if (ret) - return ret; + gen9_whitelist_build(w); /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ - ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1); - if (ret) - return ret; - - return 0; + whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); } -static int cfl_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void cfl_whitelist_build(struct whitelist *w) { - int ret; + gen9_whitelist_build(w); +} - ret = gen9_whitelist_workarounds_apply(engine); - if (ret) - return ret; +static void cnl_whitelist_build(struct whitelist *w) +{ + /* WaEnablePreemptionGranularityControlByUMD:cnl */ + whitelist_reg(w, GEN8_CS_CHICKEN1); +} - return 0; +static void icl_whitelist_build(struct whitelist *w) +{ } -static int cnl_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static struct whitelist *whitelist_build(struct intel_engine_cs *engine, + struct whitelist *w) { - int ret; + struct drm_i915_private *i915 = engine->i915; - /* WaEnablePreemptionGranularityControlByUMD:cnl */ - ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); - if (ret) - return ret; + GEM_BUG_ON(engine->id != RCS); - return 0; + w->count = 0; + w->nopid = i915_mmio_reg_offset(RING_NOPID(engine->mmio_base)); + + if (INTEL_GEN(i915) < 8) + return NULL; + else if (IS_BROADWELL(i915)) + bdw_whitelist_build(w); + else if (IS_CHERRYVIEW(i915)) + chv_whitelist_build(w); + else if (IS_SKYLAKE(i915)) + skl_whitelist_build(w); + else if (IS_BROXTON(i915)) + bxt_whitelist_build(w); + else if (IS_KABYLAKE(i915)) + kbl_whitelist_build(w); + else if (IS_GEMINILAKE(i915)) + glk_whitelist_build(w); + else if (IS_COFFEELAKE(i915)) + cfl_whitelist_build(w); + else if (IS_CANNONLAKE(i915)) + cnl_whitelist_build(w); + else if (IS_ICELAKE(i915)) + icl_whitelist_build(w); + else + MISSING_CASE(INTEL_GEN(i915)); + + return w; } -int intel_whitelist_workarounds_apply(struct intel_engine_cs *engine) +static void whitelist_apply(struct intel_engine_cs *engine, + const struct whitelist *w) { struct drm_i915_private *dev_priv = engine->i915; - int err = 0; + const u32 base = engine->mmio_base; + unsigned int i; - WARN_ON(engine->id != RCS); + if (!w) + return; - dev_priv->workarounds.hw_whitelist_count[engine->id] = 0; + intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); - if (INTEL_GEN(dev_priv) < 8) - err = 0; - else if (IS_BROADWELL(dev_priv)) - err = bdw_whitelist_workarounds_apply(engine); - else if (IS_CHERRYVIEW(dev_priv)) - err = chv_whitelist_workarounds_apply(engine); - else if (IS_SKYLAKE(dev_priv)) - err = skl_whitelist_workarounds_apply(engine); - else if (IS_BROXTON(dev_priv)) - err = bxt_whitelist_workarounds_apply(engine); - else if (IS_KABYLAKE(dev_priv)) - err = kbl_whitelist_workarounds_apply(engine); - else if (IS_GEMINILAKE(dev_priv)) - err = glk_whitelist_workarounds_apply(engine); - else if (IS_COFFEELAKE(dev_priv)) - err = cfl_whitelist_workarounds_apply(engine); - else if (IS_CANNONLAKE(dev_priv)) - err = cnl_whitelist_workarounds_apply(engine); - else - MISSING_CASE(INTEL_GEN(dev_priv)); - if (err) - return err; + for (i = 0; i < w->count; i++) + I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), + i915_mmio_reg_offset(w->reg[i])); - DRM_DEBUG_DRIVER("%s: Number of whitelist w/a: %d\n", engine->name, - dev_priv->workarounds.hw_whitelist_count[engine->id]); - return 0; + /* And clear the rest just in case of garbage */ + for (; i < RING_MAX_NONPRIV_SLOTS; i++) + I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), w->nopid); + + intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); } + +void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine) +{ + struct whitelist w; + + whitelist_apply(engine, whitelist_build(engine, &w)); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_workarounds.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h index d9b0cc5afb4a..b11d0623e626 100644 --- a/drivers/gpu/drm/i915/intel_workarounds.h +++ b/drivers/gpu/drm/i915/intel_workarounds.h @@ -12,6 +12,6 @@ int intel_ctx_workarounds_emit(struct i915_request *rq); void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv); -int intel_whitelist_workarounds_apply(struct intel_engine_cs *engine); +void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine); #endif diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 05bbef363fff..91c72911be3c 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1091,7 +1091,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, out_vma_unpin: i915_vma_unpin(vma); out_vma_close: - i915_vma_close(vma); + i915_vma_destroy(vma); return err; } @@ -1757,6 +1757,9 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) goto out_unlock; } + if (ctx->ppgtt) + ctx->ppgtt->base.scrub_64K = true; + err = i915_subtests(tests, ctx); out_unlock: diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 7ecaed50d0b9..ddb03f009232 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -23,6 +23,7 @@ */ #include "../i915_selftest.h" +#include "igt_flush_test.h" #include "mock_drm.h" #include "huge_gem_object.h" @@ -411,6 +412,8 @@ static int igt_ctx_exec(void *arg) } out_unlock: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 8bf6aa573226..a00e2bd08bce 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -11,6 +11,7 @@ */ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) +selftest(workarounds, intel_workarounds_live_selftests) selftest(requests, i915_request_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index 3000e6a7d82d..19f1c6a5c8fb 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -1,25 +1,7 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ #include "../i915_selftest.h" @@ -35,21 +17,21 @@ struct __igt_sync { bool set; }; -static int __igt_sync(struct intel_timeline *tl, +static int __igt_sync(struct i915_timeline *tl, u64 ctx, const struct __igt_sync *p, const char *name) { int ret; - if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { + if (__i915_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", name, p->name, ctx, p->seqno, yesno(p->expected)); return -EINVAL; } if (p->set) { - ret = __intel_timeline_sync_set(tl, ctx, p->seqno); + ret = __i915_timeline_sync_set(tl, ctx, p->seqno); if (ret) return ret; } @@ -77,37 +59,31 @@ static int igt_sync(void *arg) { "unwrap", UINT_MAX, true, false }, {}, }, *p; - struct intel_timeline *tl; + struct i915_timeline tl; int order, offset; int ret = -ENODEV; - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; - + mock_timeline_init(&tl, 0); for (p = pass; p->name; p++) { for (order = 1; order < 64; order++) { for (offset = -1; offset <= (order > 1); offset++) { u64 ctx = BIT_ULL(order) + offset; - ret = __igt_sync(tl, ctx, p, "1"); + ret = __igt_sync(&tl, ctx, p, "1"); if (ret) goto out; } } } - mock_timeline_destroy(tl); - - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_fini(&tl); + mock_timeline_init(&tl, 0); for (order = 1; order < 64; order++) { for (offset = -1; offset <= (order > 1); offset++) { u64 ctx = BIT_ULL(order) + offset; for (p = pass; p->name; p++) { - ret = __igt_sync(tl, ctx, p, "2"); + ret = __igt_sync(&tl, ctx, p, "2"); if (ret) goto out; } @@ -115,7 +91,7 @@ static int igt_sync(void *arg) } out: - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); return ret; } @@ -127,15 +103,13 @@ static unsigned int random_engine(struct rnd_state *rnd) static int bench_sync(void *arg) { struct rnd_state prng; - struct intel_timeline *tl; + struct i915_timeline tl; unsigned long end_time, count; u64 prng32_1M; ktime_t kt; int order, last_order; - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_init(&tl, 0); /* Lookups from cache are very fast and so the random number generation * and the loop itself becomes a significant factor in the per-iteration @@ -167,7 +141,7 @@ static int bench_sync(void *arg) do { u64 id = i915_prandom_u64_state(&prng); - __intel_timeline_sync_set(tl, id, 0); + __i915_timeline_sync_set(&tl, id, 0); count++; } while (!time_after(jiffies, end_time)); kt = ktime_sub(ktime_get(), kt); @@ -182,8 +156,8 @@ static int bench_sync(void *arg) while (end_time--) { u64 id = i915_prandom_u64_state(&prng); - if (!__intel_timeline_sync_is_later(tl, id, 0)) { - mock_timeline_destroy(tl); + if (!__i915_timeline_sync_is_later(&tl, id, 0)) { + mock_timeline_fini(&tl); pr_err("Lookup of %llu failed\n", id); return -EINVAL; } @@ -193,19 +167,17 @@ static int bench_sync(void *arg) pr_info("%s: %lu random lookups, %lluns/lookup\n", __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); cond_resched(); - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_init(&tl, 0); /* Benchmark setting the first N (in order) contexts */ count = 0; kt = ktime_get(); end_time = jiffies + HZ/10; do { - __intel_timeline_sync_set(tl, count++, 0); + __i915_timeline_sync_set(&tl, count++, 0); } while (!time_after(jiffies, end_time)); kt = ktime_sub(ktime_get(), kt); pr_info("%s: %lu in-order insertions, %lluns/insert\n", @@ -215,9 +187,9 @@ static int bench_sync(void *arg) end_time = count; kt = ktime_get(); while (end_time--) { - if (!__intel_timeline_sync_is_later(tl, end_time, 0)) { + if (!__i915_timeline_sync_is_later(&tl, end_time, 0)) { pr_err("Lookup of %lu failed\n", end_time); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); return -EINVAL; } } @@ -225,12 +197,10 @@ static int bench_sync(void *arg) pr_info("%s: %lu in-order lookups, %lluns/lookup\n", __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); cond_resched(); - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_init(&tl, 0); /* Benchmark searching for a random context id and maybe changing it */ prandom_seed_state(&prng, i915_selftest.random_seed); @@ -241,8 +211,8 @@ static int bench_sync(void *arg) u32 id = random_engine(&prng); u32 seqno = prandom_u32_state(&prng); - if (!__intel_timeline_sync_is_later(tl, id, seqno)) - __intel_timeline_sync_set(tl, id, seqno); + if (!__i915_timeline_sync_is_later(&tl, id, seqno)) + __i915_timeline_sync_set(&tl, id, seqno); count++; } while (!time_after(jiffies, end_time)); @@ -250,7 +220,7 @@ static int bench_sync(void *arg) kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); cond_resched(); /* Benchmark searching for a known context id and changing the seqno */ @@ -258,9 +228,7 @@ static int bench_sync(void *arg) ({ int tmp = last_order; last_order = order; order += tmp; })) { unsigned int mask = BIT(order) - 1; - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_init(&tl, 0); count = 0; kt = ktime_get(); @@ -272,8 +240,8 @@ static int bench_sync(void *arg) */ u64 id = (u64)(count & mask) << order; - __intel_timeline_sync_is_later(tl, id, 0); - __intel_timeline_sync_set(tl, id, 0); + __i915_timeline_sync_is_later(&tl, id, 0); + __i915_timeline_sync_set(&tl, id, 0); count++; } while (!time_after(jiffies, end_time)); @@ -281,7 +249,7 @@ static int bench_sync(void *arg) pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", __func__, count, order, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); cond_resched(); } diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index eb89e301b602..e90f97236e50 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -81,7 +81,7 @@ checked_vma_instance(struct drm_i915_gem_object *obj, } if (i915_vma_compare(vma, vm, view)) { - pr_err("i915_vma_compare failed with create parmaters!\n"); + pr_err("i915_vma_compare failed with create parameters!\n"); return ERR_PTR(-EINVAL); } diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c new file mode 100644 index 000000000000..0d06f559243f --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -0,0 +1,70 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_drv.h" + +#include "../i915_selftest.h" +#include "igt_flush_test.h" + +struct wedge_me { + struct delayed_work work; + struct drm_i915_private *i915; + const void *symbol; +}; + +static void wedge_me(struct work_struct *work) +{ + struct wedge_me *w = container_of(work, typeof(*w), work.work); + + pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); + + GEM_TRACE("%pS timed out.\n", w->symbol); + GEM_TRACE_DUMP(); + + i915_gem_set_wedged(w->i915); +} + +static void __init_wedge(struct wedge_me *w, + struct drm_i915_private *i915, + long timeout, + const void *symbol) +{ + w->i915 = i915; + w->symbol = symbol; + + INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); + schedule_delayed_work(&w->work, timeout); +} + +static void __fini_wedge(struct wedge_me *w) +{ + cancel_delayed_work_sync(&w->work); + destroy_delayed_work_on_stack(&w->work); + w->i915 = NULL; +} + +#define wedge_on_timeout(W, DEV, TIMEOUT) \ + for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \ + (W)->i915; \ + __fini_wedge((W))) + +int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) +{ + struct wedge_me w; + + cond_resched(); + + if (flags & I915_WAIT_LOCKED && + i915_gem_switch_to_kernel_context(i915)) { + pr_err("Failed to switch back to kernel context; declaring wedged\n"); + i915_gem_set_wedged(i915); + } + + wedge_on_timeout(&w, i915, HZ) + i915_gem_wait_for_idle(i915, flags); + + return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; +} diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.h b/drivers/gpu/drm/i915/selftests/igt_flush_test.h new file mode 100644 index 000000000000..63e009927c43 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.h @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#ifndef IGT_FLUSH_TEST_H +#define IGT_FLUSH_TEST_H + +struct drm_i915_private; + +int igt_flush_test(struct drm_i915_private *i915, unsigned int flags); + +#endif /* IGT_FLUSH_TEST_H */ diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 46580026c7fc..d6926e7820e5 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -412,10 +412,11 @@ static int igt_wakeup(void *arg) * that they are ready for the next test. We wait until all * threads are complete and waiting for us (i.e. not a seqno). */ - err = wait_var_event_timeout(&done, !atomic_read(&done), 10 * HZ); - if (err) { + if (!wait_var_event_timeout(&done, + !atomic_read(&done), 10 * HZ)) { pr_err("Timed out waiting for %d remaining waiters\n", atomic_read(&done)); + err = -ETIMEDOUT; break; } diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index 24f913f26a7b..438e0b045a2c 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -26,10 +26,13 @@ #include "../i915_selftest.h" #include "i915_random.h" +#include "igt_flush_test.h" #include "mock_context.h" #include "mock_drm.h" +#define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ + struct hang { struct drm_i915_private *i915; struct drm_i915_gem_object *hws; @@ -251,61 +254,6 @@ static u32 hws_seqno(const struct hang *h, const struct i915_request *rq) return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); } -struct wedge_me { - struct delayed_work work; - struct drm_i915_private *i915; - const void *symbol; -}; - -static void wedge_me(struct work_struct *work) -{ - struct wedge_me *w = container_of(work, typeof(*w), work.work); - - pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); - - GEM_TRACE("%pS timed out.\n", w->symbol); - GEM_TRACE_DUMP(); - - i915_gem_set_wedged(w->i915); -} - -static void __init_wedge(struct wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const void *symbol) -{ - w->i915 = i915; - w->symbol = symbol; - - INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); - schedule_delayed_work(&w->work, timeout); -} - -static void __fini_wedge(struct wedge_me *w) -{ - cancel_delayed_work_sync(&w->work); - destroy_delayed_work_on_stack(&w->work); - w->i915 = NULL; -} - -#define wedge_on_timeout(W, DEV, TIMEOUT) \ - for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \ - (W)->i915; \ - __fini_wedge((W))) - -static noinline int -flush_test(struct drm_i915_private *i915, unsigned int flags) -{ - struct wedge_me w; - - cond_resched(); - - wedge_on_timeout(&w, i915, HZ) - i915_gem_wait_for_idle(i915, flags); - - return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; -} - static void hang_fini(struct hang *h) { *h->batch = MI_BATCH_BUFFER_END; @@ -319,7 +267,7 @@ static void hang_fini(struct hang *h) kernel_context_close(h->ctx); - flush_test(h->i915, I915_WAIT_LOCKED); + igt_flush_test(h->i915, I915_WAIT_LOCKED); } static bool wait_until_running(struct hang *h, struct i915_request *rq) @@ -454,6 +402,11 @@ static int igt_global_reset(void *arg) return err; } +static bool wait_for_idle(struct intel_engine_cs *engine) +{ + return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0; +} + static int __igt_reset_engine(struct drm_i915_private *i915, bool active) { struct intel_engine_cs *engine; @@ -481,6 +434,13 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) if (active && !intel_engine_can_store_dword(engine)) continue; + if (!wait_for_idle(engine)) { + pr_err("%s failed to idle before reset\n", + engine->name); + err = -EIO; + break; + } + reset_count = i915_reset_count(&i915->gpu_error); reset_engine_count = i915_reset_engine_count(&i915->gpu_error, engine); @@ -542,13 +502,26 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) err = -EINVAL; break; } + + if (!wait_for_idle(engine)) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("%s failed to idle after reset\n", + engine->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + err = -EIO; + break; + } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); if (err) break; - err = flush_test(i915, 0); + err = igt_flush_test(i915, 0); if (err) break; } @@ -628,7 +601,7 @@ static int active_engine(void *data) } if (arg->flags & TEST_PRIORITY) - ctx[idx]->priority = + ctx[idx]->sched.priority = i915_prandom_u32_max_state(512, &prng); rq[idx] = i915_request_get(new); @@ -683,7 +656,7 @@ static int __igt_reset_engines(struct drm_i915_private *i915, return err; if (flags & TEST_PRIORITY) - h.ctx->priority = 1024; + h.ctx->sched.priority = 1024; } for_each_engine(engine, i915, id) { @@ -696,6 +669,13 @@ static int __igt_reset_engines(struct drm_i915_private *i915, !intel_engine_can_store_dword(engine)) continue; + if (!wait_for_idle(engine)) { + pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n", + engine->name, test_name); + err = -EIO; + break; + } + memset(threads, 0, sizeof(threads)); for_each_engine(other, i915, tmp) { struct task_struct *tsk; @@ -772,6 +752,20 @@ static int __igt_reset_engines(struct drm_i915_private *i915, i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); i915_request_put(rq); } + + if (!(flags & TEST_SELF) && !wait_for_idle(engine)) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("i915_reset_engine(%s:%s):" + " failed to idle after reset\n", + engine->name, test_name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + err = -EIO; + break; + } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); pr_info("i915_reset_engine(%s:%s): %lu resets\n", @@ -826,7 +820,7 @@ unwind: if (err) break; - err = flush_test(i915, 0); + err = igt_flush_test(i915, 0); if (err) break; } @@ -981,7 +975,7 @@ static int wait_for_others(struct drm_i915_private *i915, if (engine == exclude) continue; - if (wait_for(intel_engine_is_idle(engine), 10)) + if (!wait_for_idle(engine)) return -EIO; } @@ -1120,7 +1114,7 @@ static int igt_reset_queue(void *arg) i915_request_put(prev); - err = flush_test(i915, I915_WAIT_LOCKED); + err = igt_flush_test(i915, I915_WAIT_LOCKED); if (err) break; } @@ -1232,7 +1226,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) err = i915_subtests(tests, i915); mutex_lock(&i915->drm.struct_mutex); - flush_test(i915, I915_WAIT_LOCKED); + igt_flush_test(i915, I915_WAIT_LOCKED); mutex_unlock(&i915->drm.struct_mutex); i915_modparams.enable_hangcheck = saved_hangcheck; diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c index 0481e2e01146..1b8a07125150 100644 --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -5,6 +5,7 @@ */ #include "../i915_selftest.h" +#include "igt_flush_test.h" #include "mock_context.h" @@ -168,61 +169,6 @@ static u32 hws_seqno(const struct spinner *spin, const struct i915_request *rq) return READ_ONCE(*seqno); } -struct wedge_me { - struct delayed_work work; - struct drm_i915_private *i915; - const void *symbol; -}; - -static void wedge_me(struct work_struct *work) -{ - struct wedge_me *w = container_of(work, typeof(*w), work.work); - - pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); - - GEM_TRACE("%pS timed out.\n", w->symbol); - GEM_TRACE_DUMP(); - - i915_gem_set_wedged(w->i915); -} - -static void __init_wedge(struct wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const void *symbol) -{ - w->i915 = i915; - w->symbol = symbol; - - INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); - schedule_delayed_work(&w->work, timeout); -} - -static void __fini_wedge(struct wedge_me *w) -{ - cancel_delayed_work_sync(&w->work); - destroy_delayed_work_on_stack(&w->work); - w->i915 = NULL; -} - -#define wedge_on_timeout(W, DEV, TIMEOUT) \ - for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \ - (W)->i915; \ - __fini_wedge((W))) - -static noinline int -flush_test(struct drm_i915_private *i915, unsigned int flags) -{ - struct wedge_me w; - - cond_resched(); - - wedge_on_timeout(&w, i915, HZ) - i915_gem_wait_for_idle(i915, flags); - - return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; -} - static void spinner_end(struct spinner *spin) { *spin->batch = MI_BATCH_BUFFER_END; @@ -295,7 +241,7 @@ static int live_sanitycheck(void *arg) } spinner_end(&spin); - if (flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { err = -EIO; goto err_ctx; } @@ -307,7 +253,7 @@ err_ctx: err_spin: spinner_fini(&spin); err_unlock: - flush_test(i915, I915_WAIT_LOCKED); + igt_flush_test(i915, I915_WAIT_LOCKED); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -335,12 +281,12 @@ static int live_preempt(void *arg) ctx_hi = kernel_context(i915); if (!ctx_hi) goto err_spin_lo; - ctx_hi->priority = I915_CONTEXT_MAX_USER_PRIORITY; + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; ctx_lo = kernel_context(i915); if (!ctx_lo) goto err_ctx_hi; - ctx_lo->priority = I915_CONTEXT_MIN_USER_PRIORITY; + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; for_each_engine(engine, i915, id) { struct i915_request *rq; @@ -380,7 +326,7 @@ static int live_preempt(void *arg) spinner_end(&spin_hi); spinner_end(&spin_lo); - if (flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { err = -EIO; goto err_ctx_lo; } @@ -396,7 +342,7 @@ err_spin_lo: err_spin_hi: spinner_fini(&spin_hi); err_unlock: - flush_test(i915, I915_WAIT_LOCKED); + igt_flush_test(i915, I915_WAIT_LOCKED); mutex_unlock(&i915->drm.struct_mutex); return err; } @@ -407,6 +353,7 @@ static int live_late_preempt(void *arg) struct i915_gem_context *ctx_hi, *ctx_lo; struct spinner spin_hi, spin_lo; struct intel_engine_cs *engine; + struct i915_sched_attr attr = {}; enum intel_engine_id id; int err = -ENOMEM; @@ -458,7 +405,8 @@ static int live_late_preempt(void *arg) goto err_wedged; } - engine->schedule(rq, I915_PRIORITY_MAX); + attr.priority = I915_PRIORITY_MAX; + engine->schedule(rq, &attr); if (!wait_for_spinner(&spin_hi, rq)) { pr_err("High priority context failed to preempt the low priority context\n"); @@ -468,7 +416,7 @@ static int live_late_preempt(void *arg) spinner_end(&spin_hi); spinner_end(&spin_lo); - if (flush_test(i915, I915_WAIT_LOCKED)) { + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { err = -EIO; goto err_ctx_lo; } @@ -484,7 +432,7 @@ err_spin_lo: err_spin_hi: spinner_fini(&spin_hi); err_unlock: - flush_test(i915, I915_WAIT_LOCKED); + igt_flush_test(i915, I915_WAIT_LOCKED); mutex_unlock(&i915->drm.struct_mutex); return err; @@ -503,5 +451,9 @@ int intel_execlists_live_selftests(struct drm_i915_private *i915) SUBTEST(live_preempt), SUBTEST(live_late_preempt), }; + + if (!HAS_EXECLISTS(i915)) + return 0; + return i915_subtests(tests, i915); } diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c new file mode 100644 index 000000000000..17444a3abbb9 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -0,0 +1,291 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_selftest.h" + +#include "mock_context.h" + +static struct drm_i915_gem_object * +read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *result; + struct i915_request *rq; + struct i915_vma *vma; + const u32 base = engine->mmio_base; + u32 srm, *cs; + int err; + int i; + + result = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(result)) + return result; + + i915_gem_object_set_cache_level(result, I915_CACHE_LLC); + + cs = i915_gem_object_pin_map(result, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_obj; + } + memset(cs, 0xc5, PAGE_SIZE); + i915_gem_object_unpin_map(result); + + vma = i915_vma_instance(result, &engine->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_obj; + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_pin; + } + + srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + if (INTEL_GEN(ctx->i915) >= 8) + srm++; + + cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_req; + } + + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { + *cs++ = srm; + *cs++ = i915_mmio_reg_offset(RING_FORCE_TO_NONPRIV(base, i)); + *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; + *cs++ = 0; + } + intel_ring_advance(rq, cs); + + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + reservation_object_lock(vma->resv, NULL); + reservation_object_add_excl_fence(vma->resv, &rq->fence); + reservation_object_unlock(vma->resv); + + i915_gem_object_get(result); + i915_gem_object_set_active_reference(result); + + __i915_request_add(rq, true); + i915_vma_unpin(vma); + + return result; + +err_req: + i915_request_add(rq); +err_pin: + i915_vma_unpin(vma); +err_obj: + i915_gem_object_put(result); + return ERR_PTR(err); +} + +static u32 get_whitelist_reg(const struct whitelist *w, unsigned int i) +{ + return i < w->count ? i915_mmio_reg_offset(w->reg[i]) : w->nopid; +} + +static void print_results(const struct whitelist *w, const u32 *results) +{ + unsigned int i; + + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { + u32 expected = get_whitelist_reg(w, i); + u32 actual = results[i]; + + pr_info("RING_NONPRIV[%d]: expected 0x%08x, found 0x%08x\n", + i, expected, actual); + } +} + +static int check_whitelist(const struct whitelist *w, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *results; + u32 *vaddr; + int err; + int i; + + results = read_nonprivs(ctx, engine); + if (IS_ERR(results)) + return PTR_ERR(results); + + err = i915_gem_object_set_to_cpu_domain(results, false); + if (err) + goto out_put; + + vaddr = i915_gem_object_pin_map(results, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_put; + } + + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { + u32 expected = get_whitelist_reg(w, i); + u32 actual = vaddr[i]; + + if (expected != actual) { + print_results(w, vaddr); + pr_err("Invalid RING_NONPRIV[%d], expected 0x%08x, found 0x%08x\n", + i, expected, actual); + + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(results); +out_put: + i915_gem_object_put(results); + return err; +} + +static int do_device_reset(struct intel_engine_cs *engine) +{ + i915_reset(engine->i915, ENGINE_MASK(engine->id), NULL); + return 0; +} + +static int do_engine_reset(struct intel_engine_cs *engine) +{ + return i915_reset_engine(engine, NULL); +} + +static int switch_to_scratch_context(struct intel_engine_cs *engine) +{ + struct i915_gem_context *ctx; + struct i915_request *rq; + + ctx = kernel_context(engine->i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + rq = i915_request_alloc(engine, ctx); + kernel_context_close(ctx); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_add(rq); + + return 0; +} + +static int check_whitelist_across_reset(struct intel_engine_cs *engine, + int (*reset)(struct intel_engine_cs *), + const struct whitelist *w, + const char *name) +{ + struct i915_gem_context *ctx; + int err; + + ctx = kernel_context(engine->i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + err = check_whitelist(w, ctx, engine); + if (err) { + pr_err("Invalid whitelist *before* %s reset!\n", name); + goto out; + } + + err = switch_to_scratch_context(engine); + if (err) + goto out; + + err = reset(engine); + if (err) { + pr_err("%s reset failed\n", name); + goto out; + } + + err = check_whitelist(w, ctx, engine); + if (err) { + pr_err("Whitelist not preserved in context across %s reset!\n", + name); + goto out; + } + + kernel_context_close(ctx); + + ctx = kernel_context(engine->i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + err = check_whitelist(w, ctx, engine); + if (err) { + pr_err("Invalid whitelist *after* %s reset in fresh context!\n", + name); + goto out; + } + +out: + kernel_context_close(ctx); + return err; +} + +static int live_reset_whitelist(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine = i915->engine[RCS]; + struct i915_gpu_error *error = &i915->gpu_error; + struct whitelist w; + int err = 0; + + /* If we reset the gpu, we should not lose the RING_NONPRIV */ + + if (!engine) + return 0; + + if (!whitelist_build(engine, &w)) + return 0; + + pr_info("Checking %d whitelisted registers (RING_NONPRIV)\n", w.count); + + set_bit(I915_RESET_BACKOFF, &error->flags); + set_bit(I915_RESET_ENGINE + engine->id, &error->flags); + + if (intel_has_reset_engine(i915)) { + err = check_whitelist_across_reset(engine, + do_engine_reset, &w, + "engine"); + if (err) + goto out; + } + + if (intel_has_gpu_reset(i915)) { + err = check_whitelist_across_reset(engine, + do_device_reset, &w, + "device"); + if (err) + goto out; + } + +out: + clear_bit(I915_RESET_ENGINE + engine->id, &error->flags); + clear_bit(I915_RESET_BACKOFF, &error->flags); + return err; +} + +int intel_workarounds_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_reset_whitelist), + }; + int err; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 78a89efa1119..26bf29d97007 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -25,6 +25,11 @@ #include "mock_engine.h" #include "mock_request.h" +struct mock_ring { + struct intel_ring base; + struct i915_timeline timeline; +}; + static struct mock_request *first_request(struct mock_engine *engine) { return list_first_entry_or_null(&engine->hw_queue, @@ -71,14 +76,21 @@ static struct intel_ring * mock_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - i915_gem_context_get(ctx); + struct intel_context *ce = to_intel_context(ctx, engine); + + if (!ce->pin_count++) + i915_gem_context_get(ctx); + return engine->buffer; } static void mock_context_unpin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - i915_gem_context_put(ctx); + struct intel_context *ce = to_intel_context(ctx, engine); + + if (!--ce->pin_count) + i915_gem_context_put(ctx); } static int mock_request_alloc(struct i915_request *request) @@ -125,7 +137,7 @@ static void mock_submit_request(struct i915_request *request) static struct intel_ring *mock_ring(struct intel_engine_cs *engine) { const unsigned long sz = PAGE_SIZE / 2; - struct intel_ring *ring; + struct mock_ring *ring; BUILD_BUG_ON(MIN_SPACE_FOR_ADD_REQUEST > sz); @@ -133,14 +145,25 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) if (!ring) return NULL; - ring->size = sz; - ring->effective_size = sz; - ring->vaddr = (void *)(ring + 1); + i915_timeline_init(engine->i915, &ring->timeline, engine->name); + + ring->base.size = sz; + ring->base.effective_size = sz; + ring->base.vaddr = (void *)(ring + 1); + ring->base.timeline = &ring->timeline; - INIT_LIST_HEAD(&ring->request_list); - intel_ring_update_space(ring); + INIT_LIST_HEAD(&ring->base.request_list); + intel_ring_update_space(&ring->base); - return ring; + return &ring->base; +} + +static void mock_ring_free(struct intel_ring *base) +{ + struct mock_ring *ring = container_of(base, typeof(*ring), base); + + i915_timeline_fini(&ring->timeline); + kfree(ring); } struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, @@ -155,12 +178,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, if (!engine) return NULL; - engine->base.buffer = mock_ring(&engine->base); - if (!engine->base.buffer) { - kfree(engine); - return NULL; - } - /* minimal engine setup for requests */ engine->base.i915 = i915; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); @@ -174,9 +191,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.emit_breadcrumb = mock_emit_breadcrumb; engine->base.submit_request = mock_submit_request; - engine->base.timeline = - &i915->gt.global_timeline.engine[engine->base.id]; - + i915_timeline_init(i915, &engine->base.timeline, engine->base.name); intel_engine_init_breadcrumbs(&engine->base); engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ @@ -185,7 +200,17 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, timer_setup(&engine->hw_delay, hw_delay_complete, 0); INIT_LIST_HEAD(&engine->hw_queue); + engine->base.buffer = mock_ring(&engine->base); + if (!engine->base.buffer) + goto err_breadcrumbs; + return &engine->base; + +err_breadcrumbs: + intel_engine_fini_breadcrumbs(&engine->base); + i915_timeline_fini(&engine->base.timeline); + kfree(engine); + return NULL; } void mock_engine_flush(struct intel_engine_cs *engine) @@ -217,10 +242,12 @@ void mock_engine_free(struct intel_engine_cs *engine) GEM_BUG_ON(timer_pending(&mock->hw_delay)); if (engine->last_retired_context) - engine->context_unpin(engine, engine->last_retired_context); + intel_context_unpin(engine->last_retired_context, engine); + + mock_ring_free(engine->buffer); intel_engine_fini_breadcrumbs(engine); + i915_timeline_fini(&engine->timeline); - kfree(engine->buffer); kfree(engine); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index e6d4b882599a..94baedfa0f74 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -44,6 +44,7 @@ void mock_device_flush(struct drm_i915_private *i915) mock_engine_flush(engine); i915_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests); } static void mock_device_release(struct drm_device *dev) @@ -72,8 +73,8 @@ static void mock_device_release(struct drm_device *dev) mutex_lock(&i915->drm.struct_mutex); mock_fini_ggtt(i915); - i915_gem_timeline_fini(&i915->gt.global_timeline); mutex_unlock(&i915->drm.struct_mutex); + WARN_ON(!list_empty(&i915->gt.timelines)); destroy_workqueue(i915->wq); @@ -223,26 +224,25 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->priorities) goto err_dependencies; - mutex_lock(&i915->drm.struct_mutex); INIT_LIST_HEAD(&i915->gt.timelines); - err = i915_gem_timeline_init__global(i915); - if (err) { - mutex_unlock(&i915->drm.struct_mutex); - goto err_priorities; - } + INIT_LIST_HEAD(&i915->gt.active_rings); + INIT_LIST_HEAD(&i915->gt.closed_vma); + + mutex_lock(&i915->drm.struct_mutex); mock_init_ggtt(i915); - mutex_unlock(&i915->drm.struct_mutex); mkwrite_device_info(i915)->ring_mask = BIT(0); i915->engine[RCS] = mock_engine(i915, "mock", RCS); if (!i915->engine[RCS]) - goto err_priorities; + goto err_unlock; i915->kernel_context = mock_context(i915, NULL); if (!i915->kernel_context) goto err_engine; + mutex_unlock(&i915->drm.struct_mutex); + WARN_ON(i915_gemfs_init(i915)); return i915; @@ -250,7 +250,8 @@ struct drm_i915_private *mock_gem_device(void) err_engine: for_each_engine(engine, i915, id) mock_engine_free(engine); -err_priorities: +err_unlock: + mutex_unlock(&i915->drm.struct_mutex); kmem_cache_destroy(i915->priorities); err_dependencies: kmem_cache_destroy(i915->dependencies); diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index e96873f96116..36c112088940 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -76,7 +76,6 @@ mock_ppgtt(struct drm_i915_private *i915, INIT_LIST_HEAD(&ppgtt->base.global_link); drm_mm_init(&ppgtt->base.mm, 0, ppgtt->base.total); - i915_gem_timeline_init(i915, &ppgtt->base.timeline, name); ppgtt->base.clear_range = nop_clear_range; ppgtt->base.insert_page = mock_insert_page; diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c index 47b1f47c5812..dcf3b16f5a07 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -1,45 +1,28 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ +#include "../i915_timeline.h" + #include "mock_timeline.h" -struct intel_timeline *mock_timeline(u64 context) +void mock_timeline_init(struct i915_timeline *timeline, u64 context) { - static struct lock_class_key class; - struct intel_timeline *tl; + timeline->fence_context = context; + + spin_lock_init(&timeline->lock); - tl = kzalloc(sizeof(*tl), GFP_KERNEL); - if (!tl) - return NULL; + init_request_active(&timeline->last_request, NULL); + INIT_LIST_HEAD(&timeline->requests); - __intel_timeline_init(tl, NULL, context, &class, "mock"); + i915_syncmap_init(&timeline->sync); - return tl; + INIT_LIST_HEAD(&timeline->link); } -void mock_timeline_destroy(struct intel_timeline *tl) +void mock_timeline_fini(struct i915_timeline *timeline) { - __intel_timeline_fini(tl); - kfree(tl); + i915_timeline_fini(timeline); } diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.h b/drivers/gpu/drm/i915/selftests/mock_timeline.h index c27ff4639b8b..b6deaa61110d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.h +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.h @@ -1,33 +1,15 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ #ifndef __MOCK_TIMELINE__ #define __MOCK_TIMELINE__ -#include "../i915_gem_timeline.h" +struct i915_timeline; -struct intel_timeline *mock_timeline(u64 context); -void mock_timeline_destroy(struct intel_timeline *tl); +void mock_timeline_init(struct i915_timeline *timeline, u64 context); +void mock_timeline_fini(struct i915_timeline *timeline); #endif /* !__MOCK_TIMELINE__ */ diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 70f0c2535b87..bab70ff6e78b 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -349,6 +349,7 @@ #define INTEL_KBL_GT2_IDS(info) \ INTEL_VGA_DEVICE(0x5916, info), /* ULT GT2 */ \ INTEL_VGA_DEVICE(0x5917, info), /* Mobile GT2 */ \ + INTEL_VGA_DEVICE(0x591C, info), /* ULX GT2 */ \ INTEL_VGA_DEVICE(0x5921, info), /* ULT GT2F */ \ INTEL_VGA_DEVICE(0x591E, info), /* ULX GT2 */ \ INTEL_VGA_DEVICE(0x5912, info), /* DT GT2 */ \ |